gpu: nvgpu: build flag for deterministic channel

Add CONFIG_NVGPU_DETERMINISTIC_CHANNELS and fix preprocessor #ifdefs to allow compiling kernel mode submit without deterministic feature enabled. Jira NVGPU-4661 Change-Id: I4aa678715824e8981d39bd8db0c5ae61ef3a675c Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2310325 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2020-03-10 15:23:40 -04:00
parent cc043e1506
commit 8ec4395e82
10 changed files with 67 additions and 47 deletions
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -22,6 +22,7 @@ ccflags-y += -DCONFIG_NVGPU_DEBUGGER
 ccflags-y += -DCONFIG_NVGPU_ENGINE_RESET
 endif

+ccflags-y += -DCONFIG_NVGPU_DETERMINISTIC_CHANNELS
 ccflags-y += -DCONFIG_NVGPU_TPC_POWERGATE
 ccflags-y += -DCONFIG_NVGPU_ACR_LEGACY
 ccflags-y += -DCONFIG_NVGPU_ENGINE_QUEUE
--- a/drivers/gpu/nvgpu/Makefile.shared.configs
+++ b/drivers/gpu/nvgpu/Makefile.shared.configs
@@ -189,6 +189,7 @@ NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_CLK_ARB
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_NON_FUSA

 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_IOCTL_NON_FUSA
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_DETERMINISTIC_CHANNELS

 CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT := 1
 NVGPU_COMMON_CFLAGS                    += -DCONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -521,15 +521,19 @@ bool nvgpu_channel_joblist_is_empty(struct nvgpu_channel *c)

 bool nvgpu_channel_is_prealloc_enabled(struct nvgpu_channel *c)
 {
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	bool pre_alloc_enabled = c->joblist.pre_alloc.enabled;

 	nvgpu_smp_rmb();
 	return pre_alloc_enabled;
+#else
+	return false;
+#endif
 }

-static int channel_prealloc_resources(struct nvgpu_channel *ch,
-	       u32 num_jobs)
+static int channel_prealloc_resources(struct nvgpu_channel *ch, u32 num_jobs)
 {
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	unsigned int i;
 	int err;
 	size_t size;
@@ -603,10 +607,14 @@ clean_up_joblist:
 clean_up:
 	(void) memset(&ch->joblist.pre_alloc, 0, sizeof(ch->joblist.pre_alloc));
 	return err;
+#else
+	return -ENOSYS;
+#endif
 }

 static void channel_free_prealloc_resources(struct nvgpu_channel *c)
 {
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs[0].wait_cmd);
 	nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
 	nvgpu_fence_pool_free(c);
@@ -618,6 +626,7 @@ static void channel_free_prealloc_resources(struct nvgpu_channel *c)
 	 */
 	nvgpu_smp_wmb();
 	c->joblist.pre_alloc.enabled = false;
+#endif
 }

 int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch)
@@ -742,7 +751,8 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c,
 		goto clean_up_sync;
 	}

-	if (c->deterministic && args->num_inflight_jobs != 0U) {
+	if (nvgpu_channel_is_deterministic(c) &&
+			args->num_inflight_jobs != 0U) {
 		err = channel_prealloc_resources(c,
 				args->num_inflight_jobs);
 		if (err != 0) {
@@ -765,7 +775,8 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c,
 clean_up_priv_cmd:
 	channel_free_priv_cmd_q(c);
 clean_up_prealloc:
-	if (c->deterministic && args->num_inflight_jobs != 0U) {
+	if (nvgpu_channel_is_deterministic(c) &&
+			args->num_inflight_jobs != 0U) {
 		channel_free_prealloc_resources(c);
 	}
 clean_up_sync:
@@ -1433,7 +1444,7 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c,
 		 * Deterministic channels have a channel-wide power reference;
 		 * for others, there's one per submit.
 		 */
-		if (!c->deterministic) {
+		if (!nvgpu_channel_is_deterministic(c)) {
 			gk20a_idle(g);
 		}

@@ -1696,7 +1707,7 @@ static void channel_free_wait_for_refs(struct nvgpu_channel *ch,

 }

-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 static void channel_free_put_deterministic_ref_from_init(
 		struct nvgpu_channel *ch)
 {
@@ -1834,7 +1845,7 @@ unbind:
 	g->ops.channel.unbind(ch);
 	g->ops.channel.free_inst(g, ch);

-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	channel_free_put_deterministic_ref_from_init(ch);
 #endif

@@ -2276,7 +2287,7 @@ int nvgpu_channel_setup_bind(struct nvgpu_channel *c,
 	c->vpr = false;
 #endif

-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) != 0U) {
 		nvgpu_rwsem_down_read(&g->deterministic_busy);
 		/*
@@ -2322,8 +2333,8 @@ int nvgpu_channel_setup_bind(struct nvgpu_channel *c,
 	return 0;

 clean_up_idle:
-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
-	if (c->deterministic) {
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
+	if (nvgpu_channel_is_deterministic(c)) {
 		nvgpu_rwsem_down_read(&g->deterministic_busy);
 		gk20a_idle(g);
 		c->deterministic = false;
@@ -2410,7 +2421,7 @@ void nvgpu_channel_sw_quiesce(struct gk20a *g)
 }
 #endif

-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 /*
 * Stop deterministic channel activity for do_idle() when power needs to go off
 * momentarily but deterministic channels keep power refs for potentially a
@@ -2769,7 +2780,7 @@ void nvgpu_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
 				 * user-space managed
 				 * semaphore.
 				 */
-				if (!c->deterministic) {
+				if (!nvgpu_channel_is_deterministic(c)) {
 					nvgpu_channel_update(c);
 				}
 #endif
@@ -2881,9 +2892,7 @@ void nvgpu_channel_debug_dump_all(struct gk20a *g,
 		info->tsgid = ch->tsgid;
 		info->pid = ch->pid;
 		info->refs = nvgpu_atomic_read(&ch->ref_count);
-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
-		info->deterministic = ch->deterministic;
-#endif
+		info->deterministic = nvgpu_channel_is_deterministic(ch);

 #ifdef CONFIG_NVGPU_SW_SEMAPHORE
 		if (hw_sema != NULL) {
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -83,7 +83,8 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
 	 * submission when user requested and the wait hasn't expired.
 	 */
 	if (flag_fence_wait) {
-		u32 max_wait_cmds = c->deterministic ? 1U : 0U;
+		u32 max_wait_cmds = nvgpu_channel_is_deterministic(c) ?
+			1U : 0U;

 		if (!pre_alloc_enabled) {
 			job->wait_cmd = nvgpu_kzalloc(g,
@@ -419,7 +420,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 			flag_fence_get ||
 			((nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ||
 				nvgpu_is_vpr_resize_enabled()) &&
-				!c->deterministic) ||
+				!nvgpu_channel_is_deterministic(c)) ||
 			!skip_buffer_refcounting);

 #ifdef CONFIG_NVGPU_CHANNEL_WDT
@@ -434,7 +435,8 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 		 * job tracking is required, the channel must have
 		 * pre-allocated resources. Otherwise, we fail the submit here
 		 */
-		if (c->deterministic && !nvgpu_channel_is_prealloc_enabled(c)) {
+		if (nvgpu_channel_is_deterministic(c) &&
+				!nvgpu_channel_is_prealloc_enabled(c)) {
 			return -EINVAL;
 		}

@@ -456,7 +458,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 		 * is not required, and we clean-up one job-tracking
 		 * resource in the submit path.
 		 */
-		need_deferred_cleanup = !c->deterministic ||
+		need_deferred_cleanup = !nvgpu_channel_is_deterministic(c) ||
 					need_sync_framework ||
 					!skip_buffer_refcounting;

@@ -468,11 +470,11 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 		 * For deterministic channels, we don't allow deferred clean_up
 		 * processing to occur. In cases we hit this, we fail the submit
 		 */
-		if (c->deterministic && need_deferred_cleanup) {
+		if (nvgpu_channel_is_deterministic(c) && need_deferred_cleanup) {
 			return -EINVAL;
 		}

-		if (!c->deterministic) {
+		if (!nvgpu_channel_is_deterministic(c)) {
 			/*
 			 * Get a power ref unless this is a deterministic
 			 * channel that holds them during the channel lifetime.
@@ -495,6 +497,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 	}


+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	/* Grab access to HW to deal with do_idle */
 	if (c->deterministic) {
 		nvgpu_rwsem_down_read(&g->deterministic_busy);
@@ -510,6 +513,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 		err = -EINVAL;
 		goto clean_up;
 	}
+#endif

 #ifdef CONFIG_NVGPU_TRACE
 	trace_gk20a_channel_submit_gpfifo(g->name,
@@ -592,10 +596,12 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,

 	g->ops.userd.gp_put(g, c);

+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	/* No hw access beyond this point */
 	if (c->deterministic) {
 		nvgpu_rwsem_up_read(&g->deterministic_busy);
 	}
+#endif

 #ifdef CONFIG_NVGPU_TRACE
 	trace_gk20a_channel_submitted_gpfifo(g->name,
@@ -619,12 +625,14 @@ clean_up_job:
 clean_up:
 	nvgpu_log_fn(g, "fail");
 	nvgpu_fence_put(post_fence);
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	if (c->deterministic) {
 		nvgpu_rwsem_up_read(&g->deterministic_busy);
-	} else {
-		if (need_deferred_cleanup) {
-			gk20a_idle(g);
-		}
+		return err;
+	}
+#endif
+	if (need_deferred_cleanup) {
+		gk20a_idle(g);
 	}

 	return err;
--- a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c
@@ -85,21 +85,13 @@ void gv11b_channel_debug_dump(struct gk20a *g,
 			     struct nvgpu_debug_context *o,
 			     struct nvgpu_channel_dump_info *info)
 {
-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
 	gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs: %d%s: ",
-#else
-	gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs: %d: ",
-#endif
 			info->chid,
 			g->name,
 			info->tsgid,
 			info->pid,
-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
 			info->refs,
 			info->deterministic ? ", deterministic" : "");
-#else
-			info->refs);
-#endif
 	gk20a_debug_output(o, "channel status: %s in use %s %s\n",
 			info->hw_state.enabled ? "" : "not",
 			info->hw_state.status_string,
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -183,10 +183,8 @@ struct nvgpu_channel_dump_info {
 	int pid;
 	/** Number of references to this channel. */
 	int refs;
-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
 	/** Channel uses deterministic submit (kernel submit only). */
 	bool deterministic;
-#endif
 	/** Channel H/W state */
 	struct nvgpu_channel_hw_state hw_state;
 	/** Snaphsot of channel instance fields. */
@@ -562,7 +560,7 @@ struct nvgpu_channel {
 	bool referenceable;
 	/** True if VPR support was requested during setup bind */
 	bool vpr;
-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	/**
 	 * Channel shall exhibit deterministic behavior in the submit path.
 	 * Submit latency shall be consistent (and low). Submits that may cause
@@ -570,9 +568,9 @@ struct nvgpu_channel {
 	 * sync fds or mapped buffer refcounting are not deterministic).
 	 */
 	bool deterministic;
-#endif
 	/** Deterministic, but explicitly idle and submits disallowed. */
 	bool deterministic_railgate_allowed;
+#endif
 	/** Channel uses Color Decompression Engine. */
 	bool cde;
 	/**
@@ -667,6 +665,15 @@ void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g);

 #endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */

+static inline bool nvgpu_channel_is_deterministic(struct nvgpu_channel *c)
+{
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
+	return c->deterministic;
+#else
+	return false;
+#endif
+}
+
 /**
 * @brief Get channel pointer from its node in free channels list.
 *
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -710,12 +710,14 @@ struct gk20a {
 	/** Stored HW version info */
 	struct nvgpu_gpu_params params;

+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	/**
 	 * Guards access to hardware when usual gk20a_{busy,idle} are skipped
 	 * for submits and held for channel lifetime but dropped for an ongoing
 	 * gk20a_do_idle().
 	 */
 	struct nvgpu_rwsem deterministic_busy;
+#endif

 	struct nvgpu_netlist_vars *netlist_vars;
 	bool netlist_valid;
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -1617,6 +1617,7 @@ static int nvgpu_gpu_set_deterministic_ch_railgate(struct nvgpu_channel *ch,
 	return err;
 }

+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 static int nvgpu_gpu_set_deterministic_ch(struct nvgpu_channel *ch, u32 flags)
 {
 	if (!ch->deterministic)
@@ -1688,6 +1689,7 @@ out:
 	args->num_channels = i;
 	return err;
 }
+#endif

 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
--- a/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c
+++ b/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c
@@ -284,9 +284,7 @@ int test_gv11b_channel_debug_dump(struct unit_module *m,
 		info->tsgid = ch->tsgid;
 		info->pid = ch->pid;
 		info->refs = nvgpu_atomic_read(&ch->ref_count);
-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
 		info->deterministic = (branches & F_CHANNEL_DUMP_DETERMINISTIC) != 0;
-#endif
 		info->hw_state.enabled = (branches & F_CHANNEL_DUMP_ENABLED) != 0;
 		info->hw_state.busy = (branches & F_CHANNEL_DUMP_BUSY) != 0;
 		info->hw_state.status_string = "fake";
--- a/userspace/units/fifo/channel/nvgpu-channel.c
+++ b/userspace/units/fifo/channel/nvgpu-channel.c
@@ -497,7 +497,7 @@ int test_channel_close(struct unit_module *m, struct gk20a *g, void *vargs)
 			ch->vm = NULL;
 		}

-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 		if (branches & F_CHANNEL_CLOSE_DETERMINISTIC) {
 			/* Compensate for atomic dec in gk20a_idle() */
 			nvgpu_atomic_set(&g->usage_count, 1);
@@ -603,10 +603,10 @@ int test_channel_close(struct unit_module *m, struct gk20a *g, void *vargs)
 			ch->subctx = NULL;
 		}

-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 		ch->deterministic = false;
-#endif
 		ch->deterministic_railgate_allowed = false;
+#endif
 		unit_assert(ch->usermode_submit_enabled == false, goto done);

 		/* we took an extra reference to avoid nvgpu_vm_remove_ref */
@@ -901,7 +901,7 @@ int test_channel_setup_bind(struct unit_module *m, struct gk20a *g, void *vargs)
 			nvgpu_dma_free(g, &ch->usermode_userd);
 			nvgpu_dma_free(g, &ch->usermode_gpfifo);
 			ch->userd_iova = 0U;
-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 			ch->deterministic = false;
 #endif
 			nvgpu_atomic_set(&ch->bound, false);
@@ -1315,7 +1315,7 @@ done:
 #define F_CHANNEL_DETERMINISTIC_UNIDLE_GK20ABUSY_FAIL	BIT(2)
 #define F_CHANNEL_DETERMINISTIC_IDLE_LAST		BIT(3)

-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 static const char *f_channel_deterministic_idle_unidle[] = {
 	"deterministic_channel",
 	"determinstic_railgate_allowed",
@@ -1758,7 +1758,7 @@ int test_channel_semaphore_wakeup(struct unit_module *m,
 		unit_verbose(m, "%s branches=%s\n", __func__,
 			branches_str(branches, f_channel_semaphore_wakeup));

-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 		if (branches & F_CHANNEL_SEMAPHORRE_WAKEUP_DETERMINISTIC_CH) {
 			ch->deterministic = true;
 		}
@@ -1777,7 +1777,7 @@ int test_channel_semaphore_wakeup(struct unit_module *m,
 		nvgpu_channel_semaphore_wakeup(g, false);
 		unit_assert(stub[0].count == (global_count - 1U), goto done);

-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 		ch->deterministic = false;
 #endif
 	}
@@ -2007,7 +2007,7 @@ struct unit_module_test nvgpu_channel_tests[] = {
 	UNIT_TEST(ch_abort, test_channel_abort, &unit_ctx, 0),
 	UNIT_TEST(mark_error, test_channel_mark_error, &unit_ctx, 0),
 	UNIT_TEST(sw_quiesce, test_channel_sw_quiesce, &unit_ctx, 0),
-#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	UNIT_TEST(idle_unidle, test_channel_deterministic_idle_unidle, &unit_ctx, 0),
 #endif
 	UNIT_TEST(suspend_resume, test_channel_suspend_resume_serviceable_chs, &unit_ctx, 0),