From 42d90e17dd98cdd4d41e2b3cb9cdc2eec91c844c Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Tue, 2 May 2017 16:01:51 +0300 Subject: [PATCH] gpu: nvgpu: expose deterministic submit support Add these bits in the gpu characteristics flags: NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING - fast submits with no in-kernel job tracking are supported. NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL - deterministic submits also with job tracking and num_inflight_jobs set are supported. Either of these may get disabled if the particular channel or submit still requires features that block these. Make gk20a_channel_sync_needs_sync_framework() take a gk20a pointer instead of a channel pointer so that it can be called without a channel. It does not need any per-channel data. Bug 20029130 Bug 200274674 Change-Id: I5f82510b6d39b53bcf6f1006dd83bdd9053963a0 Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1456845 Signed-off-by: Debarshi Dutta (cherry picked from commit ee9733e587d977610975435a84e5af7cabba8870 in dev-kernel) Reviewed-on: https://git-master.nvidia.com/r/1558993 GVS: Gerrit_Virtual_Submit Reviewed-by: Bibek Basu --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 4 ++-- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/gk20a.c | 19 +++++++++++++++++ include/uapi/linux/nvgpu.h | 22 ++++++++++++++++++-- 5 files changed, 43 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 26379c623..280e90be1 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -3106,7 +3106,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, return -EINVAL; need_sync_framework = force_need_sync_fence || - gk20a_channel_sync_needs_sync_framework(c) || + gk20a_channel_sync_needs_sync_framework(g) || (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE && (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT || flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)); diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 0aa202c5e..c2fb28a53 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -979,10 +979,10 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c) return gk20a_channel_semaphore_create(c); } -bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c) +bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g) { #ifdef CONFIG_TEGRA_GK20A - if (gk20a_platform_has_syncpoints(c->g->dev)) + if (gk20a_platform_has_syncpoints(g->dev)) return false; #endif return true; diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index c31360230..719ea3c18 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h @@ -101,7 +101,7 @@ struct gk20a_channel_sync { void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync); struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); -bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c); +bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g); #ifdef CONFIG_SYNC void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 29a6fc2e0..86f181449 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -2207,6 +2207,25 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) gk20a_platform_has_syncpoints(g->dev)) gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; + /* + * Railgating needs job tracking which prevents fast submits. They're + * supported otherwise, provided that the user doesn't request anything + * that depends on job tracking. (Here, fast means strictly no + * metadata, just the gpfifo contents are copied and gp_put updated). + */ + if (!platform->can_railgate) + gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING; + + /* + * Railgating and sync framework require deferred job cleanup which + * prevents deterministic submits. They're supported otherwise, + * provided that the user doesn't request anything that depends on + * deferred cleanup. + */ + if (!platform->can_railgate + && !gk20a_channel_sync_needs_sync_framework(g)) + gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL; + gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS; gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG; gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS; diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index dcb02a771..fd3c1a578 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -137,6 +137,10 @@ struct nvgpu_gpu_zbc_query_table_args { #define NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE (1ULL << 16) /* NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS is available */ #define NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS (1ULL << 17) +/* Fast deterministic submits with no job tracking are supported */ +#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING (1ULL << 18) +/* Deterministic submits are supported even with job tracking */ +#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19) /* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */ #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21) @@ -1350,8 +1354,22 @@ struct nvgpu_alloc_gpfifo_args { struct nvgpu_alloc_gpfifo_ex_args { __u32 num_entries; __u32 num_inflight_jobs; -#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0) /* set owner channel of this gpfifo as a vpr channel */ -#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1) /* channel shall exhibit deterministic behavior in the submit path */ +/* Set owner channel of this gpfifo as a vpr channel. */ +#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0) +/* + * Channel shall exhibit deterministic behavior in the submit path. + * + * With this flag, any submits with in-kernel job tracking also require that + * num_inflight_jobs is nonzero, and additionally that + * NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is found in gpu + * characteristics.flags. + * + * Note that fast submits (with no in-kernel job tracking) are also + * deterministic and are supported if the characteristics flags contain + * NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING; this flag or + * num_inflight_jobs are not necessary in that case. + */ +#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1) __u32 flags; __u32 reserved[5]; };