gpu: nvgpu: support tuning per-ch deterministic opts

Add a new ioctl NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS to adjust deterministic options on a per-channel basis. Currently, the only supported option is to relax the no-railgating requirement on open deterministic channels. This also disallows submits on such channels, until the railgate option is reset. Bug 200327089 Change-Id: If4f0f51fd1d40ad7407d13638150d7402479aff0 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1554563 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2017-11-06 14:25:47 +02:00
parent 1480afeb01
commit 8bdce5337e
7 changed files with 179 additions and 3 deletions
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -537,6 +537,17 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 	if (c->deterministic)
 		nvgpu_rwsem_down_read(&g->deterministic_busy);
 	if (c->deterministic && c->deterministic_railgate_allowed) {
 		/*
 		 * Nope - this channel has dropped its own power ref. As
 		 * deterministic submits don't hold power on per each submitted
 		 * job like normal ones do, the GPU might railgate any time now
 		 * and thus submit is disallowed.
 		 */
 		err = -EINVAL;
 		goto clean_up;
 	}
 	trace_gk20a_channel_submit_gpfifo(g->name,
 					  c->chid,
 					  num_entries,
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
@@ -161,6 +161,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
 		NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING},
 	{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
 		NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL},
 	{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS,
 		NVGPU_SUPPORT_DETERMINISTIC_OPTS},
 	{NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE,
 		NVGPU_SUPPORT_IO_COHERENCE},
 	{NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST,
@@ -1319,6 +1321,114 @@ static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g,
 	return err;
 }
 static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch,
 		u32 flags)
 {
 	int err = 0;
 	bool allow;
 	bool disallow;
 	allow = flags &
 		NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING;
 	disallow = flags &
 		NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING;
 	/* Can't be both at the same time */
 	if (allow && disallow)
 		return -EINVAL;
 	/* Nothing to do */
 	if (!allow && !disallow)
 		return 0;
 	/*
 	 * Moving into explicit idle or back from it? A call that doesn't
 	 * change the status is a no-op.
 	 */
 	if (!ch->deterministic_railgate_allowed &&
 			allow) {
 		gk20a_idle(ch->g);
 	} else if (ch->deterministic_railgate_allowed &&
 			!allow) {
 		err = gk20a_busy(ch->g);
 		if (err) {
 			nvgpu_warn(ch->g,
 				"cannot busy to restore deterministic ch");
 			return err;
 		}
 	}
 	ch->deterministic_railgate_allowed = allow;
 	return err;
 }
 static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags)
 {
 	if (!ch->deterministic)
 		return -EINVAL;
 	return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags);
 }
 static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g,
 		struct nvgpu_gpu_set_deterministic_opts_args *args)
 {
 	int __user *user_channels;
 	u32 i = 0;
 	int err = 0;
 	gk20a_dbg_fn("");
 	user_channels = (int __user *)(uintptr_t)args->channels;
 	/* Upper limit; prevent holding deterministic_busy for long */
 	if (args->num_channels > g->fifo.num_channels) {
 		err = -EINVAL;
 		goto out;
 	}
 	/* Trivial sanity check first */
 	if (!access_ok(VERIFY_READ, user_channels,
 				args->num_channels * sizeof(int))) {
 		err = -EFAULT;
 		goto out;
 	}
 	nvgpu_rwsem_down_read(&g->deterministic_busy);
 	/* note: we exit at the first failure */
 	for (; i < args->num_channels; i++) {
 		int ch_fd = 0;
 		struct channel_gk20a *ch;
 		if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) {
 			/* User raced with above access_ok */
 			err = -EFAULT;
 			break;
 		}
 		ch = gk20a_get_channel_from_file(ch_fd);
 		if (!ch) {
 			err = -EINVAL;
 			break;
 		}
 		err = nvgpu_gpu_set_deterministic_ch(ch, args->flags);
 		gk20a_channel_put(ch);
 		if (err)
 			break;
 	}
 	nvgpu_rwsem_up_read(&g->deterministic_busy);
 out:
 	args->num_channels = i;
 	return err;
 }
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct gk20a_ctrl_priv *priv = filp->private_data;
@@ -1633,6 +1743,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 			(struct nvgpu_gpu_set_therm_alert_limit_args *)buf);
 		break;
 	case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS:
 		err = nvgpu_gpu_set_deterministic_opts(g,
 			(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
 		break;
 	default:
 		gk20a_dbg_info("unrecognized gpu ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -578,7 +578,10 @@ unbind:
 	if (ch->deterministic) {
 		nvgpu_rwsem_down_read(&g->deterministic_busy);
 		ch->deterministic = false;
 		if (!ch->deterministic_railgate_allowed)
 			gk20a_idle(g);
 		ch->deterministic_railgate_allowed = false;
 		nvgpu_rwsem_up_read(&g->deterministic_busy);
 	}
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -208,6 +208,8 @@ struct channel_gk20a {
 	bool first_init;
 	bool vpr;
 	bool deterministic;
 	/* deterministic, but explicitly idle and submits disallowed */
 	bool deterministic_railgate_allowed;
 	bool cde;
 	pid_t pid;
 	pid_t tgid;
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -439,6 +439,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
 				NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
 				true);
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -91,6 +91,9 @@ struct gk20a;
 /* FECS context switch tracing is available */
 #define NVGPU_SUPPORT_FECS_CTXSW_TRACE		38
 /* NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS is available */
 #define NVGPU_SUPPORT_DETERMINISTIC_OPTS	39
 /*
 * Security flags
 */
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -148,7 +148,8 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST	(1ULL << 21)
 /* Direct PTE kind control is supported (map_buffer_ex) */
 #define NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL	(1ULL << 23)
-
+/* NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS is available */
 #define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS	(1ULL << 24)
 struct nvgpu_gpu_characteristics {
 	__u32 arch;
@@ -801,6 +802,42 @@ struct nvgpu_gpu_set_therm_alert_limit_args {
 	__s32 temp_f24_8;
 };
 /*
 * Adjust options of deterministic channels in channel batches.
 *
 * This supports only one option currently: relax railgate blocking by
 * "disabling" the channel.
 *
 * Open deterministic channels do not allow the GPU to railgate by default. It
 * may be preferable to hold preopened channel contexts open and idle and still
 * railgate the GPU, taking the channels back into use dynamically in userspace
 * as an optimization. This ioctl allows to drop or reacquire the requirement
 * to hold GPU power on for individual channels. If allow_railgate is set on a
 * channel, no work can be submitted to it.
 *
 * num_channels is updated to signify how many channels were updated
 * successfully. It can be used to test which was the first update to fail.
 */
 struct nvgpu_gpu_set_deterministic_opts_args {
 	__u32 num_channels; /* in/out */
 /*
 * Set or unset the railgating reference held by deterministic channels. If
 * the channel status is already the same as the flag, this is a no-op. Both
 * of these flags cannot be set at the same time. If none are set, the state
 * is left as is.
 */
 #define NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING    (1 << 0)
 #define NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING (1 << 1)
 	__u32 flags;        /* in */
 	/*
 	 * This is a pointer to an array of size num_channels.
 	 *
 	 * The channels have to be valid fds and be previously set as
 	 * deterministic.
 	 */
 	__u64 channels; /* in */
 };
 #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
 	_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
 #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -885,8 +922,11 @@ struct nvgpu_gpu_set_therm_alert_limit_args {
 #define NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT \
 		_IOWR(NVGPU_GPU_IOCTL_MAGIC, 39, \
 			struct nvgpu_gpu_set_therm_alert_limit_args)
 #define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \
 	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \
 			struct nvgpu_gpu_set_deterministic_opts_args)
 #define NVGPU_GPU_IOCTL_LAST		\
-	_IOC_NR(NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT)
+	_IOC_NR(NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS)
 #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE	\
 	sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)