mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: expose deterministic submit support
Add these bits in the gpu characteristics flags:
NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING - fast
submits with no in-kernel job tracking are supported.
NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL - deterministic
submits also with job tracking and num_inflight_jobs set are supported.
Either of these may get disabled if the particular channel or submit
still requires features that block these.
Make gk20a_channel_sync_needs_sync_framework() take a gk20a pointer
instead of a channel pointer so that it can be called without a channel.
It does not need any per-channel data.
Bug 20029130
Bug 200274674
Change-Id: I5f82510b6d39b53bcf6f1006dd83bdd9053963a0
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1456845
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
(cherry picked from commit ee9733e587 in
dev-kernel)
Reviewed-on: https://git-master.nvidia.com/r/1558993
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
b79a75517a
commit
42d90e17dd
@@ -3106,7 +3106,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
need_sync_framework = force_need_sync_fence ||
|
need_sync_framework = force_need_sync_fence ||
|
||||||
gk20a_channel_sync_needs_sync_framework(c) ||
|
gk20a_channel_sync_needs_sync_framework(g) ||
|
||||||
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
|
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
|
||||||
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
|
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
|
||||||
flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
|
flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
|
||||||
|
|||||||
@@ -979,10 +979,10 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
|
|||||||
return gk20a_channel_semaphore_create(c);
|
return gk20a_channel_semaphore_create(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c)
|
bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_TEGRA_GK20A
|
#ifdef CONFIG_TEGRA_GK20A
|
||||||
if (gk20a_platform_has_syncpoints(c->g->dev))
|
if (gk20a_platform_has_syncpoints(g->dev))
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ struct gk20a_channel_sync {
|
|||||||
|
|
||||||
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
|
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
|
||||||
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
|
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
|
||||||
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c);
|
bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g);
|
||||||
|
|
||||||
#ifdef CONFIG_SYNC
|
#ifdef CONFIG_SYNC
|
||||||
void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g);
|
void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g);
|
||||||
|
|||||||
@@ -2207,6 +2207,25 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
|
|||||||
gk20a_platform_has_syncpoints(g->dev))
|
gk20a_platform_has_syncpoints(g->dev))
|
||||||
gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
|
gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Railgating needs job tracking which prevents fast submits. They're
|
||||||
|
* supported otherwise, provided that the user doesn't request anything
|
||||||
|
* that depends on job tracking. (Here, fast means strictly no
|
||||||
|
* metadata, just the gpfifo contents are copied and gp_put updated).
|
||||||
|
*/
|
||||||
|
if (!platform->can_railgate)
|
||||||
|
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Railgating and sync framework require deferred job cleanup which
|
||||||
|
* prevents deterministic submits. They're supported otherwise,
|
||||||
|
* provided that the user doesn't request anything that depends on
|
||||||
|
* deferred cleanup.
|
||||||
|
*/
|
||||||
|
if (!platform->can_railgate
|
||||||
|
&& !gk20a_channel_sync_needs_sync_framework(g))
|
||||||
|
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL;
|
||||||
|
|
||||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
|
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
|
||||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG;
|
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG;
|
||||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS;
|
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS;
|
||||||
|
|||||||
@@ -137,6 +137,10 @@ struct nvgpu_gpu_zbc_query_table_args {
|
|||||||
#define NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE (1ULL << 16)
|
#define NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE (1ULL << 16)
|
||||||
/* NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS is available */
|
/* NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS is available */
|
||||||
#define NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS (1ULL << 17)
|
#define NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS (1ULL << 17)
|
||||||
|
/* Fast deterministic submits with no job tracking are supported */
|
||||||
|
#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING (1ULL << 18)
|
||||||
|
/* Deterministic submits are supported even with job tracking */
|
||||||
|
#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)
|
||||||
/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */
|
/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */
|
||||||
#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21)
|
#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21)
|
||||||
|
|
||||||
@@ -1350,8 +1354,22 @@ struct nvgpu_alloc_gpfifo_args {
|
|||||||
struct nvgpu_alloc_gpfifo_ex_args {
|
struct nvgpu_alloc_gpfifo_ex_args {
|
||||||
__u32 num_entries;
|
__u32 num_entries;
|
||||||
__u32 num_inflight_jobs;
|
__u32 num_inflight_jobs;
|
||||||
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0) /* set owner channel of this gpfifo as a vpr channel */
|
/* Set owner channel of this gpfifo as a vpr channel. */
|
||||||
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1) /* channel shall exhibit deterministic behavior in the submit path */
|
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0)
|
||||||
|
/*
|
||||||
|
* Channel shall exhibit deterministic behavior in the submit path.
|
||||||
|
*
|
||||||
|
* With this flag, any submits with in-kernel job tracking also require that
|
||||||
|
* num_inflight_jobs is nonzero, and additionally that
|
||||||
|
* NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is found in gpu
|
||||||
|
* characteristics.flags.
|
||||||
|
*
|
||||||
|
* Note that fast submits (with no in-kernel job tracking) are also
|
||||||
|
* deterministic and are supported if the characteristics flags contain
|
||||||
|
* NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING; this flag or
|
||||||
|
* num_inflight_jobs are not necessary in that case.
|
||||||
|
*/
|
||||||
|
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1)
|
||||||
__u32 flags;
|
__u32 flags;
|
||||||
__u32 reserved[5];
|
__u32 reserved[5];
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user