mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: expose deterministic submit support
Add these bits in the gpu characteristics flags:
NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING - fast
submits with no in-kernel job tracking are supported.
NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL - deterministic
submits also with job tracking and num_inflight_jobs set are supported.
Either of these may get disabled if the particular channel or submit
still requires features that block these.
Make gk20a_channel_sync_needs_sync_framework() take a gk20a pointer
instead of a channel pointer so that it can be called without a channel.
It does not need any per-channel data.
Bug 20029130
Bug 200274674
Change-Id: I5f82510b6d39b53bcf6f1006dd83bdd9053963a0
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1456845
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
(cherry picked from commit ee9733e587 in
dev-kernel)
Reviewed-on: https://git-master.nvidia.com/r/1558993
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
b79a75517a
commit
42d90e17dd
@@ -3106,7 +3106,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
||||
return -EINVAL;
|
||||
|
||||
need_sync_framework = force_need_sync_fence ||
|
||||
gk20a_channel_sync_needs_sync_framework(c) ||
|
||||
gk20a_channel_sync_needs_sync_framework(g) ||
|
||||
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
|
||||
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
|
||||
flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
|
||||
|
||||
@@ -979,10 +979,10 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
|
||||
return gk20a_channel_semaphore_create(c);
|
||||
}
|
||||
|
||||
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c)
|
||||
bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g)
|
||||
{
|
||||
#ifdef CONFIG_TEGRA_GK20A
|
||||
if (gk20a_platform_has_syncpoints(c->g->dev))
|
||||
if (gk20a_platform_has_syncpoints(g->dev))
|
||||
return false;
|
||||
#endif
|
||||
return true;
|
||||
|
||||
@@ -101,7 +101,7 @@ struct gk20a_channel_sync {
|
||||
|
||||
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
|
||||
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
|
||||
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c);
|
||||
bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g);
|
||||
|
||||
#ifdef CONFIG_SYNC
|
||||
void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g);
|
||||
|
||||
@@ -2207,6 +2207,25 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
|
||||
gk20a_platform_has_syncpoints(g->dev))
|
||||
gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
|
||||
|
||||
/*
|
||||
* Railgating needs job tracking which prevents fast submits. They're
|
||||
* supported otherwise, provided that the user doesn't request anything
|
||||
* that depends on job tracking. (Here, fast means strictly no
|
||||
* metadata, just the gpfifo contents are copied and gp_put updated).
|
||||
*/
|
||||
if (!platform->can_railgate)
|
||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING;
|
||||
|
||||
/*
|
||||
* Railgating and sync framework require deferred job cleanup which
|
||||
* prevents deterministic submits. They're supported otherwise,
|
||||
* provided that the user doesn't request anything that depends on
|
||||
* deferred cleanup.
|
||||
*/
|
||||
if (!platform->can_railgate
|
||||
&& !gk20a_channel_sync_needs_sync_framework(g))
|
||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL;
|
||||
|
||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
|
||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG;
|
||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS;
|
||||
|
||||
Reference in New Issue
Block a user