gpu: nvgpu: support tuning per-ch deterministic opts

Add a new ioctl NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS to adjust deterministic options on a per-channel basis. Currently, the only supported option is to relax the no-railgating requirement on open deterministic channels. This also disallows submits on such channels, until the railgate option is reset. Bug 200327089 Change-Id: If4f0f51fd1d40ad7407d13638150d7402479aff0 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1554563 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2017-11-06 14:25:47 +02:00
parent 1480afeb01
commit 8bdce5337e
7 changed files with 179 additions and 3 deletions
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -537,6 +537,17 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 	if (c->deterministic)
 		nvgpu_rwsem_down_read(&g->deterministic_busy);

+	if (c->deterministic && c->deterministic_railgate_allowed) {
+		/*
+		 * Nope - this channel has dropped its own power ref. As
+		 * deterministic submits don't hold power on per each submitted
+		 * job like normal ones do, the GPU might railgate any time now
+		 * and thus submit is disallowed.
+		 */
+		err = -EINVAL;
+		goto clean_up;
+	}
+
 	trace_gk20a_channel_submit_gpfifo(g->name,
 					  c->chid,
 					  num_entries,
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
@@ -161,6 +161,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
 		NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING},
 	{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
 		NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL},
+	{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS,
+		NVGPU_SUPPORT_DETERMINISTIC_OPTS},
 	{NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE,
 		NVGPU_SUPPORT_IO_COHERENCE},
 	{NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST,
@@ -1319,6 +1321,114 @@ static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g,
 	return err;
 }

+static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch,
+		u32 flags)
+{
+	int err = 0;
+	bool allow;
+	bool disallow;
+
+	allow = flags &
+		NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING;
+
+	disallow = flags &
+		NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING;
+
+	/* Can't be both at the same time */
+	if (allow && disallow)
+		return -EINVAL;
+
+	/* Nothing to do */
+	if (!allow && !disallow)
+		return 0;
+
+	/*
+	 * Moving into explicit idle or back from it? A call that doesn't
+	 * change the status is a no-op.
+	 */
+	if (!ch->deterministic_railgate_allowed &&
+			allow) {
+		gk20a_idle(ch->g);
+	} else if (ch->deterministic_railgate_allowed &&
+			!allow) {
+		err = gk20a_busy(ch->g);
+		if (err) {
+			nvgpu_warn(ch->g,
+				"cannot busy to restore deterministic ch");
+			return err;
+		}
+	}
+	ch->deterministic_railgate_allowed = allow;
+
+	return err;
+}
+
+static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags)
+{
+	if (!ch->deterministic)
+		return -EINVAL;
+
+	return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags);
+}
+
+static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g,
+		struct nvgpu_gpu_set_deterministic_opts_args *args)
+{
+	int __user *user_channels;
+	u32 i = 0;
+	int err = 0;
+
+	gk20a_dbg_fn("");
+
+	user_channels = (int __user *)(uintptr_t)args->channels;
+
+	/* Upper limit; prevent holding deterministic_busy for long */
+	if (args->num_channels > g->fifo.num_channels) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Trivial sanity check first */
+	if (!access_ok(VERIFY_READ, user_channels,
+				args->num_channels * sizeof(int))) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	nvgpu_rwsem_down_read(&g->deterministic_busy);
+
+	/* note: we exit at the first failure */
+	for (; i < args->num_channels; i++) {
+		int ch_fd = 0;
+		struct channel_gk20a *ch;
+
+		if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) {
+			/* User raced with above access_ok */
+			err = -EFAULT;
+			break;
+		}
+
+		ch = gk20a_get_channel_from_file(ch_fd);
+		if (!ch) {
+			err = -EINVAL;
+			break;
+		}
+
+		err = nvgpu_gpu_set_deterministic_ch(ch, args->flags);
+
+		gk20a_channel_put(ch);
+
+		if (err)
+			break;
+	}
+
+	nvgpu_rwsem_up_read(&g->deterministic_busy);
+
+out:
+	args->num_channels = i;
+	return err;
+}
+
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct gk20a_ctrl_priv *priv = filp->private_data;
@@ -1633,6 +1743,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 			(struct nvgpu_gpu_set_therm_alert_limit_args *)buf);
 		break;

+	case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS:
+		err = nvgpu_gpu_set_deterministic_opts(g,
+			(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
+		break;
+
 	default:
 		gk20a_dbg_info("unrecognized gpu ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -578,7 +578,10 @@ unbind:
 	if (ch->deterministic) {
 		nvgpu_rwsem_down_read(&g->deterministic_busy);
 		ch->deterministic = false;
+		if (!ch->deterministic_railgate_allowed)
 			gk20a_idle(g);
+		ch->deterministic_railgate_allowed = false;
+
 		nvgpu_rwsem_up_read(&g->deterministic_busy);
 	}

--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -208,6 +208,8 @@ struct channel_gk20a {
 	bool first_init;
 	bool vpr;
 	bool deterministic;
+	/* deterministic, but explicitly idle and submits disallowed */
+	bool deterministic_railgate_allowed;
 	bool cde;
 	pid_t pid;
 	pid_t tgid;
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -439,6 +439,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
 				NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
 				true);

+	__nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
+
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);

--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -91,6 +91,9 @@ struct gk20a;
 /* FECS context switch tracing is available */
 #define NVGPU_SUPPORT_FECS_CTXSW_TRACE		38

+/* NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS is available */
+#define NVGPU_SUPPORT_DETERMINISTIC_OPTS	39
+
 /*
 * Security flags
 */
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -148,7 +148,8 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST	(1ULL << 21)
 /* Direct PTE kind control is supported (map_buffer_ex) */
 #define NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL	(1ULL << 23)
-
+/* NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS is available */
+#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS	(1ULL << 24)

 struct nvgpu_gpu_characteristics {
 	__u32 arch;
@@ -801,6 +802,42 @@ struct nvgpu_gpu_set_therm_alert_limit_args {
 	__s32 temp_f24_8;
 };

+/*
+ * Adjust options of deterministic channels in channel batches.
+ *
+ * This supports only one option currently: relax railgate blocking by
+ * "disabling" the channel.
+ *
+ * Open deterministic channels do not allow the GPU to railgate by default. It
+ * may be preferable to hold preopened channel contexts open and idle and still
+ * railgate the GPU, taking the channels back into use dynamically in userspace
+ * as an optimization. This ioctl allows to drop or reacquire the requirement
+ * to hold GPU power on for individual channels. If allow_railgate is set on a
+ * channel, no work can be submitted to it.
+ *
+ * num_channels is updated to signify how many channels were updated
+ * successfully. It can be used to test which was the first update to fail.
+ */
+struct nvgpu_gpu_set_deterministic_opts_args {
+	__u32 num_channels; /* in/out */
+/*
+ * Set or unset the railgating reference held by deterministic channels. If
+ * the channel status is already the same as the flag, this is a no-op. Both
+ * of these flags cannot be set at the same time. If none are set, the state
+ * is left as is.
+ */
+#define NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING    (1 << 0)
+#define NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING (1 << 1)
+	__u32 flags;        /* in */
+	/*
+	 * This is a pointer to an array of size num_channels.
+	 *
+	 * The channels have to be valid fds and be previously set as
+	 * deterministic.
+	 */
+	__u64 channels; /* in */
+};
+
 #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
 	_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
 #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -885,8 +922,11 @@ struct nvgpu_gpu_set_therm_alert_limit_args {
 #define NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT \
 		_IOWR(NVGPU_GPU_IOCTL_MAGIC, 39, \
 			struct nvgpu_gpu_set_therm_alert_limit_args)
+#define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \
+	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \
+			struct nvgpu_gpu_set_deterministic_opts_args)
 #define NVGPU_GPU_IOCTL_LAST		\
-	_IOC_NR(NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT)
+	_IOC_NR(NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS)
 #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE	\
 	sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)