gpu: nvgpu: expose deterministic submit support

Add these bits in the gpu characteristics flags:

NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING - fast
submits with no in-kernel job tracking are supported.

NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL - deterministic
submits also with job tracking and num_inflight_jobs set are supported.

Either of these may get disabled if the particular channel or submit
still requires features that block these.

Make gk20a_channel_sync_needs_sync_framework() take a gk20a pointer
instead of a channel pointer so that it can be called without a channel.
It does not need any per-channel data.

Bug 20029130
Bug 200274674

Change-Id: I5f82510b6d39b53bcf6f1006dd83bdd9053963a0
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1456845
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
(cherry picked from commit ee9733e587 in
dev-kernel)
Reviewed-on: https://git-master.nvidia.com/r/1558993
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
This commit is contained in:
Konsta Holtta
2017-05-02 16:01:51 +03:00
committed by mobile promotions
parent b79a75517a
commit 42d90e17dd
5 changed files with 43 additions and 6 deletions

View File

@@ -3106,7 +3106,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
return -EINVAL;
need_sync_framework = force_need_sync_fence ||
gk20a_channel_sync_needs_sync_framework(c) ||
gk20a_channel_sync_needs_sync_framework(g) ||
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));

View File

@@ -979,10 +979,10 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
return gk20a_channel_semaphore_create(c);
}
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c)
bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g)
{
#ifdef CONFIG_TEGRA_GK20A
if (gk20a_platform_has_syncpoints(c->g->dev))
if (gk20a_platform_has_syncpoints(g->dev))
return false;
#endif
return true;

View File

@@ -101,7 +101,7 @@ struct gk20a_channel_sync {
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c);
bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g);
#ifdef CONFIG_SYNC
void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g);

View File

@@ -2207,6 +2207,25 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
gk20a_platform_has_syncpoints(g->dev))
gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
/*
* Railgating needs job tracking which prevents fast submits. They're
* supported otherwise, provided that the user doesn't request anything
* that depends on job tracking. (Here, fast means strictly no
* metadata, just the gpfifo contents are copied and gp_put updated).
*/
if (!platform->can_railgate)
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING;
/*
* Railgating and sync framework require deferred job cleanup which
* prevents deterministic submits. They're supported otherwise,
* provided that the user doesn't request anything that depends on
* deferred cleanup.
*/
if (!platform->can_railgate
&& !gk20a_channel_sync_needs_sync_framework(g))
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL;
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG;
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS;