From 8ec4395e8244df548126394629e8b10e0c6fac1e Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Tue, 10 Mar 2020 15:23:40 -0400 Subject: [PATCH] gpu: nvgpu: build flag for deterministic channel Add CONFIG_NVGPU_DETERMINISTIC_CHANNELS and fix preprocessor #ifdefs to allow compiling kernel mode submit without deterministic feature enabled. Jira NVGPU-4661 Change-Id: I4aa678715824e8981d39bd8db0c5ae61ef3a675c Signed-off-by: Thomas Fleury Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2310325 Tested-by: mobile promotions Reviewed-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/Makefile.shared.configs | 1 + drivers/gpu/nvgpu/common/fifo/channel.c | 39 ++++++++++++------- drivers/gpu/nvgpu/common/fifo/submit.c | 28 ++++++++----- .../gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c | 8 ---- drivers/gpu/nvgpu/include/nvgpu/channel.h | 15 +++++-- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 2 + drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 2 + .../fifo/channel/gv11b/nvgpu-channel-gv11b.c | 2 - userspace/units/fifo/channel/nvgpu-channel.c | 16 ++++---- 10 files changed, 67 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 18d8c369c..753be1b8d 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -22,6 +22,7 @@ ccflags-y += -DCONFIG_NVGPU_DEBUGGER ccflags-y += -DCONFIG_NVGPU_ENGINE_RESET endif +ccflags-y += -DCONFIG_NVGPU_DETERMINISTIC_CHANNELS ccflags-y += -DCONFIG_NVGPU_TPC_POWERGATE ccflags-y += -DCONFIG_NVGPU_ACR_LEGACY ccflags-y += -DCONFIG_NVGPU_ENGINE_QUEUE diff --git a/drivers/gpu/nvgpu/Makefile.shared.configs b/drivers/gpu/nvgpu/Makefile.shared.configs index d3e33fd22..2c114f72b 100644 --- a/drivers/gpu/nvgpu/Makefile.shared.configs +++ b/drivers/gpu/nvgpu/Makefile.shared.configs @@ -189,6 +189,7 @@ NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CLK_ARB NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FALCON_NON_FUSA NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_IOCTL_NON_FUSA +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_DETERMINISTIC_CHANNELS CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT := 1 NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index b434995e4..8768ebbcc 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -521,15 +521,19 @@ bool nvgpu_channel_joblist_is_empty(struct nvgpu_channel *c) bool nvgpu_channel_is_prealloc_enabled(struct nvgpu_channel *c) { +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; nvgpu_smp_rmb(); return pre_alloc_enabled; +#else + return false; +#endif } -static int channel_prealloc_resources(struct nvgpu_channel *ch, - u32 num_jobs) +static int channel_prealloc_resources(struct nvgpu_channel *ch, u32 num_jobs) { +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS unsigned int i; int err; size_t size; @@ -603,10 +607,14 @@ clean_up_joblist: clean_up: (void) memset(&ch->joblist.pre_alloc, 0, sizeof(ch->joblist.pre_alloc)); return err; +#else + return -ENOSYS; +#endif } static void channel_free_prealloc_resources(struct nvgpu_channel *c) { +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs[0].wait_cmd); nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); nvgpu_fence_pool_free(c); @@ -618,6 +626,7 @@ static void channel_free_prealloc_resources(struct nvgpu_channel *c) */ nvgpu_smp_wmb(); c->joblist.pre_alloc.enabled = false; +#endif } int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch) @@ -742,7 +751,8 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c, goto clean_up_sync; } - if (c->deterministic && args->num_inflight_jobs != 0U) { + if (nvgpu_channel_is_deterministic(c) && + args->num_inflight_jobs != 0U) { err = channel_prealloc_resources(c, args->num_inflight_jobs); if (err != 0) { @@ -765,7 +775,8 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c, clean_up_priv_cmd: channel_free_priv_cmd_q(c); clean_up_prealloc: - if (c->deterministic && args->num_inflight_jobs != 0U) { + if (nvgpu_channel_is_deterministic(c) && + args->num_inflight_jobs != 0U) { channel_free_prealloc_resources(c); } clean_up_sync: @@ -1433,7 +1444,7 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, * Deterministic channels have a channel-wide power reference; * for others, there's one per submit. */ - if (!c->deterministic) { + if (!nvgpu_channel_is_deterministic(c)) { gk20a_idle(g); } @@ -1696,7 +1707,7 @@ static void channel_free_wait_for_refs(struct nvgpu_channel *ch, } -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS static void channel_free_put_deterministic_ref_from_init( struct nvgpu_channel *ch) { @@ -1834,7 +1845,7 @@ unbind: g->ops.channel.unbind(ch); g->ops.channel.free_inst(g, ch); -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS channel_free_put_deterministic_ref_from_init(ch); #endif @@ -2276,7 +2287,7 @@ int nvgpu_channel_setup_bind(struct nvgpu_channel *c, c->vpr = false; #endif -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) != 0U) { nvgpu_rwsem_down_read(&g->deterministic_busy); /* @@ -2322,8 +2333,8 @@ int nvgpu_channel_setup_bind(struct nvgpu_channel *c, return 0; clean_up_idle: -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA - if (c->deterministic) { +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS + if (nvgpu_channel_is_deterministic(c)) { nvgpu_rwsem_down_read(&g->deterministic_busy); gk20a_idle(g); c->deterministic = false; @@ -2410,7 +2421,7 @@ void nvgpu_channel_sw_quiesce(struct gk20a *g) } #endif -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS /* * Stop deterministic channel activity for do_idle() when power needs to go off * momentarily but deterministic channels keep power refs for potentially a @@ -2769,7 +2780,7 @@ void nvgpu_channel_semaphore_wakeup(struct gk20a *g, bool post_events) * user-space managed * semaphore. */ - if (!c->deterministic) { + if (!nvgpu_channel_is_deterministic(c)) { nvgpu_channel_update(c); } #endif @@ -2881,9 +2892,7 @@ void nvgpu_channel_debug_dump_all(struct gk20a *g, info->tsgid = ch->tsgid; info->pid = ch->pid; info->refs = nvgpu_atomic_read(&ch->ref_count); -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA - info->deterministic = ch->deterministic; -#endif + info->deterministic = nvgpu_channel_is_deterministic(ch); #ifdef CONFIG_NVGPU_SW_SEMAPHORE if (hw_sema != NULL) { diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 14cfc0550..be6669e0f 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -83,7 +83,8 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, * submission when user requested and the wait hasn't expired. */ if (flag_fence_wait) { - u32 max_wait_cmds = c->deterministic ? 1U : 0U; + u32 max_wait_cmds = nvgpu_channel_is_deterministic(c) ? + 1U : 0U; if (!pre_alloc_enabled) { job->wait_cmd = nvgpu_kzalloc(g, @@ -419,7 +420,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, flag_fence_get || ((nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) || nvgpu_is_vpr_resize_enabled()) && - !c->deterministic) || + !nvgpu_channel_is_deterministic(c)) || !skip_buffer_refcounting); #ifdef CONFIG_NVGPU_CHANNEL_WDT @@ -434,7 +435,8 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, * job tracking is required, the channel must have * pre-allocated resources. Otherwise, we fail the submit here */ - if (c->deterministic && !nvgpu_channel_is_prealloc_enabled(c)) { + if (nvgpu_channel_is_deterministic(c) && + !nvgpu_channel_is_prealloc_enabled(c)) { return -EINVAL; } @@ -456,7 +458,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, * is not required, and we clean-up one job-tracking * resource in the submit path. */ - need_deferred_cleanup = !c->deterministic || + need_deferred_cleanup = !nvgpu_channel_is_deterministic(c) || need_sync_framework || !skip_buffer_refcounting; @@ -468,11 +470,11 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, * For deterministic channels, we don't allow deferred clean_up * processing to occur. In cases we hit this, we fail the submit */ - if (c->deterministic && need_deferred_cleanup) { + if (nvgpu_channel_is_deterministic(c) && need_deferred_cleanup) { return -EINVAL; } - if (!c->deterministic) { + if (!nvgpu_channel_is_deterministic(c)) { /* * Get a power ref unless this is a deterministic * channel that holds them during the channel lifetime. @@ -495,6 +497,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, } +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS /* Grab access to HW to deal with do_idle */ if (c->deterministic) { nvgpu_rwsem_down_read(&g->deterministic_busy); @@ -510,6 +513,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, err = -EINVAL; goto clean_up; } +#endif #ifdef CONFIG_NVGPU_TRACE trace_gk20a_channel_submit_gpfifo(g->name, @@ -592,10 +596,12 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, g->ops.userd.gp_put(g, c); +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS /* No hw access beyond this point */ if (c->deterministic) { nvgpu_rwsem_up_read(&g->deterministic_busy); } +#endif #ifdef CONFIG_NVGPU_TRACE trace_gk20a_channel_submitted_gpfifo(g->name, @@ -619,12 +625,14 @@ clean_up_job: clean_up: nvgpu_log_fn(g, "fail"); nvgpu_fence_put(post_fence); +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS if (c->deterministic) { nvgpu_rwsem_up_read(&g->deterministic_busy); - } else { - if (need_deferred_cleanup) { - gk20a_idle(g); - } + return err; + } +#endif + if (need_deferred_cleanup) { + gk20a_idle(g); } return err; diff --git a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c index c3f2f900d..22ed78ec2 100644 --- a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c @@ -85,21 +85,13 @@ void gv11b_channel_debug_dump(struct gk20a *g, struct nvgpu_debug_context *o, struct nvgpu_channel_dump_info *info) { -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs: %d%s: ", -#else - gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs: %d: ", -#endif info->chid, g->name, info->tsgid, info->pid, -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA info->refs, info->deterministic ? ", deterministic" : ""); -#else - info->refs); -#endif gk20a_debug_output(o, "channel status: %s in use %s %s\n", info->hw_state.enabled ? "" : "not", info->hw_state.status_string, diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index ef4d08ec1..deee40bbd 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -183,10 +183,8 @@ struct nvgpu_channel_dump_info { int pid; /** Number of references to this channel. */ int refs; -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA /** Channel uses deterministic submit (kernel submit only). */ bool deterministic; -#endif /** Channel H/W state */ struct nvgpu_channel_hw_state hw_state; /** Snaphsot of channel instance fields. */ @@ -562,7 +560,7 @@ struct nvgpu_channel { bool referenceable; /** True if VPR support was requested during setup bind */ bool vpr; -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS /** * Channel shall exhibit deterministic behavior in the submit path. * Submit latency shall be consistent (and low). Submits that may cause @@ -570,9 +568,9 @@ struct nvgpu_channel { * sync fds or mapped buffer refcounting are not deterministic). */ bool deterministic; -#endif /** Deterministic, but explicitly idle and submits disallowed. */ bool deterministic_railgate_allowed; +#endif /** Channel uses Color Decompression Engine. */ bool cde; /** @@ -667,6 +665,15 @@ void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g); #endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ +static inline bool nvgpu_channel_is_deterministic(struct nvgpu_channel *c) +{ +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS + return c->deterministic; +#else + return false; +#endif +} + /** * @brief Get channel pointer from its node in free channels list. * diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 3f3706d9d..668f098ea 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -710,12 +710,14 @@ struct gk20a { /** Stored HW version info */ struct nvgpu_gpu_params params; +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS /** * Guards access to hardware when usual gk20a_{busy,idle} are skipped * for submits and held for channel lifetime but dropped for an ongoing * gk20a_do_idle(). */ struct nvgpu_rwsem deterministic_busy; +#endif struct nvgpu_netlist_vars *netlist_vars; bool netlist_valid; diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 7df76474d..26bb8af80 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -1617,6 +1617,7 @@ static int nvgpu_gpu_set_deterministic_ch_railgate(struct nvgpu_channel *ch, return err; } +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS static int nvgpu_gpu_set_deterministic_ch(struct nvgpu_channel *ch, u32 flags) { if (!ch->deterministic) @@ -1688,6 +1689,7 @@ out: args->num_channels = i; return err; } +#endif long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { diff --git a/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c b/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c index bbf70cf05..87061bec4 100644 --- a/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c +++ b/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c @@ -284,9 +284,7 @@ int test_gv11b_channel_debug_dump(struct unit_module *m, info->tsgid = ch->tsgid; info->pid = ch->pid; info->refs = nvgpu_atomic_read(&ch->ref_count); -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA info->deterministic = (branches & F_CHANNEL_DUMP_DETERMINISTIC) != 0; -#endif info->hw_state.enabled = (branches & F_CHANNEL_DUMP_ENABLED) != 0; info->hw_state.busy = (branches & F_CHANNEL_DUMP_BUSY) != 0; info->hw_state.status_string = "fake"; diff --git a/userspace/units/fifo/channel/nvgpu-channel.c b/userspace/units/fifo/channel/nvgpu-channel.c index 9149a89f1..6ee4dcc37 100644 --- a/userspace/units/fifo/channel/nvgpu-channel.c +++ b/userspace/units/fifo/channel/nvgpu-channel.c @@ -497,7 +497,7 @@ int test_channel_close(struct unit_module *m, struct gk20a *g, void *vargs) ch->vm = NULL; } -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS if (branches & F_CHANNEL_CLOSE_DETERMINISTIC) { /* Compensate for atomic dec in gk20a_idle() */ nvgpu_atomic_set(&g->usage_count, 1); @@ -603,10 +603,10 @@ int test_channel_close(struct unit_module *m, struct gk20a *g, void *vargs) ch->subctx = NULL; } -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS ch->deterministic = false; -#endif ch->deterministic_railgate_allowed = false; +#endif unit_assert(ch->usermode_submit_enabled == false, goto done); /* we took an extra reference to avoid nvgpu_vm_remove_ref */ @@ -901,7 +901,7 @@ int test_channel_setup_bind(struct unit_module *m, struct gk20a *g, void *vargs) nvgpu_dma_free(g, &ch->usermode_userd); nvgpu_dma_free(g, &ch->usermode_gpfifo); ch->userd_iova = 0U; -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS ch->deterministic = false; #endif nvgpu_atomic_set(&ch->bound, false); @@ -1315,7 +1315,7 @@ done: #define F_CHANNEL_DETERMINISTIC_UNIDLE_GK20ABUSY_FAIL BIT(2) #define F_CHANNEL_DETERMINISTIC_IDLE_LAST BIT(3) -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS static const char *f_channel_deterministic_idle_unidle[] = { "deterministic_channel", "determinstic_railgate_allowed", @@ -1758,7 +1758,7 @@ int test_channel_semaphore_wakeup(struct unit_module *m, unit_verbose(m, "%s branches=%s\n", __func__, branches_str(branches, f_channel_semaphore_wakeup)); -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS if (branches & F_CHANNEL_SEMAPHORRE_WAKEUP_DETERMINISTIC_CH) { ch->deterministic = true; } @@ -1777,7 +1777,7 @@ int test_channel_semaphore_wakeup(struct unit_module *m, nvgpu_channel_semaphore_wakeup(g, false); unit_assert(stub[0].count == (global_count - 1U), goto done); -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS ch->deterministic = false; #endif } @@ -2007,7 +2007,7 @@ struct unit_module_test nvgpu_channel_tests[] = { UNIT_TEST(ch_abort, test_channel_abort, &unit_ctx, 0), UNIT_TEST(mark_error, test_channel_mark_error, &unit_ctx, 0), UNIT_TEST(sw_quiesce, test_channel_sw_quiesce, &unit_ctx, 0), -#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS UNIT_TEST(idle_unidle, test_channel_deterministic_idle_unidle, &unit_ctx, 0), #endif UNIT_TEST(suspend_resume, test_channel_suspend_resume_serviceable_chs, &unit_ctx, 0),