gpu: nvgpu: expose deterministic submit support

Add these bits in the gpu characteristics flags:

NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING - fast
submits with no in-kernel job tracking are supported.

NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL - deterministic
submits also with job tracking and num_inflight_jobs set are supported.

Either of these may get disabled if the particular channel or submit
still requires features that block these.

Make gk20a_channel_sync_needs_sync_framework() take a gk20a pointer
instead of a channel pointer so that it can be called without a channel.
It does not need any per-channel data.

Bug 200291300

Change-Id: I5f82510b6d39b53bcf6f1006dd83bdd9053963a0
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1456845
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Holtta
2017-05-02 16:01:51 +03:00
committed by mobile promotions
parent 744e2d202e
commit ee9733e587
5 changed files with 43 additions and 8 deletions

View File

@@ -2468,7 +2468,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
return -EINVAL;
need_sync_framework = force_need_sync_fence ||
gk20a_channel_sync_needs_sync_framework(c) ||
gk20a_channel_sync_needs_sync_framework(g) ||
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));

View File

@@ -972,9 +972,7 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
return gk20a_channel_semaphore_create(c);
}
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c)
bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g)
{
if (gk20a_platform_has_syncpoints(c->g))
return false;
return true;
return !gk20a_platform_has_syncpoints(g);
}

View File

@@ -99,7 +99,7 @@ struct gk20a_channel_sync {
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c);
bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g);
#ifdef CONFIG_SYNC
void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g);

View File

@@ -461,6 +461,25 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
if (gk20a_platform_has_syncpoints(g))
gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
/*
* Railgating needs job tracking which prevents fast submits. They're
* supported otherwise, provided that the user doesn't request anything
* that depends on job tracking. (Here, fast means strictly no
* metadata, just the gpfifo contents are copied and gp_put updated).
*/
if (!platform->can_railgate)
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING;
/*
* Railgating and sync framework require deferred job cleanup which
* prevents deterministic submits. They're supported otherwise,
* provided that the user doesn't request anything that depends on
* deferred cleanup.
*/
if (!platform->can_railgate
&& !gk20a_channel_sync_needs_sync_framework(g))
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL;
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG;
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS;

View File

@@ -137,6 +137,10 @@ struct nvgpu_gpu_zbc_query_table_args {
#define NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE (1ULL << 16)
/* NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS is available */
#define NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS (1ULL << 17)
/* Fast deterministic submits with no job tracking are supported */
#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING (1ULL << 18)
/* Deterministic submits are supported even with job tracking */
#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)
struct nvgpu_gpu_characteristics {
__u32 arch;
@@ -1348,8 +1352,22 @@ struct nvgpu_alloc_gpfifo_args {
struct nvgpu_alloc_gpfifo_ex_args {
__u32 num_entries;
__u32 num_inflight_jobs;
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0) /* set owner channel of this gpfifo as a vpr channel */
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1) /* channel shall exhibit deterministic behavior in the submit path */
/* Set owner channel of this gpfifo as a vpr channel. */
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0)
/*
* Channel shall exhibit deterministic behavior in the submit path.
*
* With this flag, any submits with in-kernel job tracking also require that
* num_inflight_jobs is nonzero, and additionally that
* NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is found in gpu
* characteristics.flags.
*
* Note that fast submits (with no in-kernel job tracking) are also
* deterministic and are supported if the characteristics flags contain
* NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING; this flag or
* num_inflight_jobs are not necessary in that case.
*/
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1)
__u32 flags;
__u32 reserved[5];
};