mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: expose deterministic submit support
Add these bits in the gpu characteristics flags: NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING - fast submits with no in-kernel job tracking are supported. NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL - deterministic submits also with job tracking and num_inflight_jobs set are supported. Either of these may get disabled if the particular channel or submit still requires features that block these. Make gk20a_channel_sync_needs_sync_framework() take a gk20a pointer instead of a channel pointer so that it can be called without a channel. It does not need any per-channel data. Bug 200291300 Change-Id: I5f82510b6d39b53bcf6f1006dd83bdd9053963a0 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1456845 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
744e2d202e
commit
ee9733e587
@@ -2468,7 +2468,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
||||
return -EINVAL;
|
||||
|
||||
need_sync_framework = force_need_sync_fence ||
|
||||
gk20a_channel_sync_needs_sync_framework(c) ||
|
||||
gk20a_channel_sync_needs_sync_framework(g) ||
|
||||
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
|
||||
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
|
||||
flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
|
||||
|
||||
@@ -972,9 +972,7 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
|
||||
return gk20a_channel_semaphore_create(c);
|
||||
}
|
||||
|
||||
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c)
|
||||
bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g)
|
||||
{
|
||||
if (gk20a_platform_has_syncpoints(c->g))
|
||||
return false;
|
||||
return true;
|
||||
return !gk20a_platform_has_syncpoints(g);
|
||||
}
|
||||
|
||||
@@ -99,7 +99,7 @@ struct gk20a_channel_sync {
|
||||
|
||||
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
|
||||
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
|
||||
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c);
|
||||
bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g);
|
||||
|
||||
#ifdef CONFIG_SYNC
|
||||
void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g);
|
||||
|
||||
@@ -461,6 +461,25 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
|
||||
if (gk20a_platform_has_syncpoints(g))
|
||||
gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
|
||||
|
||||
/*
|
||||
* Railgating needs job tracking which prevents fast submits. They're
|
||||
* supported otherwise, provided that the user doesn't request anything
|
||||
* that depends on job tracking. (Here, fast means strictly no
|
||||
* metadata, just the gpfifo contents are copied and gp_put updated).
|
||||
*/
|
||||
if (!platform->can_railgate)
|
||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING;
|
||||
|
||||
/*
|
||||
* Railgating and sync framework require deferred job cleanup which
|
||||
* prevents deterministic submits. They're supported otherwise,
|
||||
* provided that the user doesn't request anything that depends on
|
||||
* deferred cleanup.
|
||||
*/
|
||||
if (!platform->can_railgate
|
||||
&& !gk20a_channel_sync_needs_sync_framework(g))
|
||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL;
|
||||
|
||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
|
||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG;
|
||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS;
|
||||
|
||||
@@ -137,6 +137,10 @@ struct nvgpu_gpu_zbc_query_table_args {
|
||||
#define NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE (1ULL << 16)
|
||||
/* NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS is available */
|
||||
#define NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS (1ULL << 17)
|
||||
/* Fast deterministic submits with no job tracking are supported */
|
||||
#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING (1ULL << 18)
|
||||
/* Deterministic submits are supported even with job tracking */
|
||||
#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)
|
||||
|
||||
struct nvgpu_gpu_characteristics {
|
||||
__u32 arch;
|
||||
@@ -1348,8 +1352,22 @@ struct nvgpu_alloc_gpfifo_args {
|
||||
struct nvgpu_alloc_gpfifo_ex_args {
|
||||
__u32 num_entries;
|
||||
__u32 num_inflight_jobs;
|
||||
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0) /* set owner channel of this gpfifo as a vpr channel */
|
||||
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1) /* channel shall exhibit deterministic behavior in the submit path */
|
||||
/* Set owner channel of this gpfifo as a vpr channel. */
|
||||
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0)
|
||||
/*
|
||||
* Channel shall exhibit deterministic behavior in the submit path.
|
||||
*
|
||||
* With this flag, any submits with in-kernel job tracking also require that
|
||||
* num_inflight_jobs is nonzero, and additionally that
|
||||
* NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is found in gpu
|
||||
* characteristics.flags.
|
||||
*
|
||||
* Note that fast submits (with no in-kernel job tracking) are also
|
||||
* deterministic and are supported if the characteristics flags contain
|
||||
* NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING; this flag or
|
||||
* num_inflight_jobs are not necessary in that case.
|
||||
*/
|
||||
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1)
|
||||
__u32 flags;
|
||||
__u32 reserved[5];
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user