gpu: nvgpu: add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST

Add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST ioctl to reschedule runlist, and optionally check host and FECS status to preempt pending load of context not belonging to the calling channel on GR engine during context switch. This should be called immediately after a submit to decrease worst case submit to start latency for high interleave channel. There is less than 0.002% chance that the ioctl blocks up to couple miliseconds due to race condition of FECS status changing while being read. For GV11B it will always preempt pending load of unwanted context since there is no chance that ioctl blocks due to race condition. Also fix bug with host reschedule for multiple runlists which needs to write both runlist registers. Bug 1987640 Bug 1924808 Change-Id: I0b7e2f91bd18b0b20928e5a3311b9426b1bf1848 Signed-off-by: David Li <davli@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1549050 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2018-04-26 02:00:01 -07:00
parent 8ac538e1b1
commit a807cf2041
13 changed files with 183 additions and 43 deletions
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -64,9 +64,6 @@ u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
 	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING)
 		flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING;

-	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST)
-		flags |= NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST;
-
 	return flags;
 }

@@ -1008,10 +1005,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,

 	g->ops.fifo.userd_gp_put(g, c);

-	if ((NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST & flags) &&
-		g->ops.fifo.reschedule_runlist)
-		g->ops.fifo.reschedule_runlist(g, c->runlist_id);
-
 	/* No hw access beyond this point */
 	if (c->deterministic)
 		nvgpu_rwsem_up_read(&g->deterministic_busy);
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -789,10 +789,6 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 	if (ch->has_timedout)
 		return -ETIMEDOUT;

-	if ((NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST & args->flags) &&
-		!capable(CAP_SYS_NICE))
-		return -EPERM;
-
 	nvgpu_get_fence_args(&args->fence, &fence);
 	submit_flags =
 		nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags);
@@ -1291,6 +1287,27 @@ long gk20a_channel_ioctl(struct file *filp,
 		err = gk20a_fifo_preempt(ch->g, ch);
 		gk20a_idle(ch->g);
 		break;
+	case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST:
+		if (!capable(CAP_SYS_NICE)) {
+			err = -EPERM;
+			break;
+		}
+		if (!ch->g->ops.fifo.reschedule_runlist) {
+			err = -ENOSYS;
+			break;
+		}
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = ch->g->ops.fifo.reschedule_runlist(ch,
+			NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT &
+			((struct nvgpu_reschedule_runlist_args *)buf)->flags);
+		gk20a_idle(ch->g);
+		break;
 	case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
 		err = gk20a_busy(ch->g);
 		if (err) {
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -54,7 +54,6 @@ struct fifo_profile_gk20a;
 #define NVGPU_SUBMIT_FLAGS_SYNC_FENCE	(1 << 3)
 #define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI	(1 << 4)
 #define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING	(1 << 5)
-#define NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST	(1 << 6)

 /*
 * The binary format of 'struct nvgpu_channel_fence' introduced here
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -53,6 +53,7 @@
 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>

 #define FECS_METHOD_WFI_RESTORE 0x80000
+#define FECS_MAILBOX_0_ACK_RESTORE 0x4

 static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 					    u32 chid, bool add,
@@ -3282,7 +3283,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 	u32 new_buf;
 	struct channel_gk20a *ch = NULL;
 	struct tsg_gk20a *tsg = NULL;
-	u32 count = 0;
 	u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);

 	runlist = &f->runlist_info[runlist_id];
@@ -3345,12 +3345,13 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 			ret = -E2BIG;
 			goto clean_up;
 		}
-		count = (runlist_end - runlist_entry_base) / runlist_entry_words;
-		WARN_ON(count > f->num_runlist_entries);
+		runlist->count = (runlist_end - runlist_entry_base) /
+			runlist_entry_words;
+		WARN_ON(runlist->count > f->num_runlist_entries);
 	} else	/* suspend to remove all channels */
-		count = 0;
+		runlist->count = 0;

-	g->ops.fifo.runlist_hw_submit(g, runlist_id, count, new_buf);
+	g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf);

 	if (wait_for_finish) {
 		ret = g->ops.fifo.runlist_wait_pending(g, runlist_id);
@@ -3406,31 +3407,98 @@ end:
 	return ret;
 }

-/* trigger host to expire current timeslice and reschedule runlist from front */
-int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id)
+/* trigger host preempt of GR pending load ctx if that ctx is not for ch */
+static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
+		bool wait_preempt)
 {
+	struct gk20a *g = ch->g;
+	struct fifo_runlist_info_gk20a *runlist =
+		&g->fifo.runlist_info[ch->runlist_id];
+	int ret = 0;
+	u32 gr_eng_id = 0;
+	u32 engstat = 0, ctxstat = 0, fecsstat0 = 0, fecsstat1 = 0;
+	s32 preempt_id = -1;
+	u32 preempt_type = 0;
+
+	if (1 != gk20a_fifo_get_engine_ids(
+		g, &gr_eng_id, 1, ENGINE_GR_GK20A))
+		return ret;
+	if (!(runlist->eng_bitmask & (1 << gr_eng_id)))
+		return ret;
+
+	if (wait_preempt && gk20a_readl(g, fifo_preempt_r()) &
+		fifo_preempt_pending_true_f())
+		return ret;
+
+	fecsstat0 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
+	engstat = gk20a_readl(g, fifo_engine_status_r(gr_eng_id));
+	ctxstat = fifo_engine_status_ctx_status_v(engstat);
+	if (ctxstat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
+		/* host switching to next context, preempt that if needed */
+		preempt_id = fifo_engine_status_next_id_v(engstat);
+		preempt_type = fifo_engine_status_next_id_type_v(engstat);
+	} else
+		return ret;
+	if (preempt_id == ch->tsgid && preempt_type)
+		return ret;
+	fecsstat1 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
+	if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE ||
+		fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) {
+		/* preempt useless if FECS acked save and started restore */
+		return ret;
+	}
+
+	gk20a_fifo_issue_preempt(g, preempt_id, preempt_type);
+#ifdef TRACEPOINTS_ENABLED
+	trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, engstat,
+		fecsstat1, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)),
+		gk20a_readl(g, fifo_preempt_r()));
+#endif
+	if (wait_preempt) {
+		g->ops.fifo.is_preempt_pending(
+			g, preempt_id, preempt_type, PREEMPT_TIMEOUT_RC);
+	}
+#ifdef TRACEPOINTS_ENABLED
+	trace_gk20a_reschedule_preempted_next(ch->chid);
+#endif
+	return ret;
+}
+
+int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next)
+{
+	return nvgpu_fifo_reschedule_runlist(ch, preempt_next, true);
+}
+
+/* trigger host to expire current timeslice and reschedule runlist from front */
+int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
+		bool wait_preempt)
+{
+	struct gk20a *g = ch->g;
 	struct fifo_runlist_info_gk20a *runlist;
 	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
 	u32 mutex_ret;
 	int ret = 0;

-	runlist = &g->fifo.runlist_info[runlist_id];
-	if (nvgpu_mutex_tryacquire(&runlist->runlist_lock)) {
+	runlist = &g->fifo.runlist_info[ch->runlist_id];
+	if (!nvgpu_mutex_tryacquire(&runlist->runlist_lock))
+		return -EBUSY;
+
 	mutex_ret = nvgpu_pmu_mutex_acquire(
 		&g->pmu, PMU_MUTEX_ID_FIFO, &token);

-		gk20a_writel(g, fifo_runlist_r(),
-			gk20a_readl(g, fifo_runlist_r()));
-		gk20a_fifo_runlist_wait_pending(g, runlist_id);
+	g->ops.fifo.runlist_hw_submit(
+		g, ch->runlist_id, runlist->count, runlist->cur_buffer);
+
+	if (preempt_next)
+		__locked_fifo_reschedule_preempt_next(ch, wait_preempt);
+
+	gk20a_fifo_runlist_wait_pending(g, ch->runlist_id);

 	if (!mutex_ret)
 		nvgpu_pmu_mutex_release(
 			&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 	nvgpu_mutex_release(&runlist->runlist_lock);
-	} else {
-		/* someone else is writing fifo_runlist_r so not needed here */
-		ret = -EBUSY;
-	}
+
 	return ret;
 }

--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -95,6 +95,7 @@ struct fifo_runlist_info_gk20a {
 	u32  pbdma_bitmask;      /* pbdmas supported for this runlist*/
 	u32  eng_bitmask;        /* engines using this runlist */
 	u32  reset_eng_bitmask;  /* engines to be reset during recovery */
+	u32  count;              /* cached runlist_hw_submit parameter */
 	bool stopped;
 	bool support_tsg;
 	/* protect ch/tsg/runlist preempt & runlist update */
@@ -249,7 +250,9 @@ void gk20a_fifo_disable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg);

 u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 chid);

-int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id);
+int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next);
+int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
+		bool wait_preempt);

 int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid,
 			      bool add, bool wait_for_finish);
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -566,7 +566,8 @@ struct gpu_ops {
 		int (*tsg_verify_channel_status)(struct channel_gk20a *ch);
 		void (*tsg_verify_status_ctx_reload)(struct channel_gk20a *ch);
 		void (*tsg_verify_status_faulted)(struct channel_gk20a *ch);
-		int (*reschedule_runlist)(struct gk20a *g, u32 runlist_id);
+		int (*reschedule_runlist)(struct channel_gk20a *ch,
+				bool preempt_next);
 		int (*update_runlist)(struct gk20a *g, u32 runlist_id,
 				u32 chid, bool add,
 				bool wait_for_finish);
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -664,6 +664,13 @@ static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
 	return runlists_mask;
 }

+int gv11b_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next)
+{
+	/* gv11b allows multiple outstanding preempts,
+	   so always preempt next for best reschedule effect */
+	return nvgpu_fifo_reschedule_runlist(ch, true, false);
+}
+
 static void gv11b_fifo_issue_runlist_preempt(struct gk20a *g,
 					 u32 runlists_mask)
 {
@@ -842,7 +849,6 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
 	return ret;
 }

-
 static int gv11b_fifo_preempt_runlists(struct gk20a *g, u32 runlists_mask)
 {
 	int ret = 0;
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
@@ -76,6 +76,7 @@ void gv11b_dump_channel_status_ramfc(struct gk20a *g,
 void gv11b_dump_eng_status(struct gk20a *g,
 				 struct gk20a_debug_output *o);
 u32 gv11b_fifo_intr_0_error_mask(struct gk20a *g);
+int gv11b_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next);
 int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
 		 unsigned int id_type, unsigned int timeout_rc_type);
 int gv11b_fifo_preempt_channel(struct gk20a *g, u32 chid);
--- a/drivers/gpu/nvgpu/gv11b/gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gv11b.c
@@ -154,6 +154,7 @@ int gv11b_init_gpu_characteristics(struct gk20a *g)
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS, true);
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_SCG, true);
+	__nvgpu_set_enabled(g, NVGPU_SUPPORT_RESCHEDULE_RUNLIST, true);
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS, true);
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true);

--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -503,6 +503,7 @@ static const struct gpu_ops gv11b_ops = {
 		.tsg_verify_channel_status = gk20a_fifo_tsg_unbind_channel_verify_status,
 		.tsg_verify_status_ctx_reload = gm20b_fifo_tsg_verify_status_ctx_reload,
 		.tsg_verify_status_faulted = gv11b_fifo_tsg_verify_status_faulted,
+		.reschedule_runlist = gv11b_fifo_reschedule_runlist,
 		.update_runlist = gk20a_fifo_update_runlist,
 		.trigger_mmu_fault = NULL,
 		.get_mmu_fault_info = NULL,
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -96,7 +96,7 @@ struct gk20a;
 #define NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING 35
 /* Deterministic submits are supported even with job tracking */
 #define NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL	36
-/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */
+/* NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST is available */
 #define NVGPU_SUPPORT_RESCHEDULE_RUNLIST	37

 /* NVGPU_GPU_IOCTL_GET_EVENT_FD is available */
--- a/include/trace/events/gk20a.h
+++ b/include/trace/events/gk20a.h
@@ -1,7 +1,7 @@
 /*
 * gk20a event logging to ftrace.
 *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -346,6 +346,51 @@ TRACE_EVENT(gk20a_channel_submitted_gpfifo,
 		__entry->flags, __entry->incr_id, __entry->incr_value)
 );

+TRACE_EVENT(gk20a_reschedule_preempt_next,
+		TP_PROTO(u32 chid, u32 fecs0, u32 engstat, u32 fecs1, u32 fecs2,
+			u32 preempt),
+
+		TP_ARGS(chid, fecs0, engstat, fecs1, fecs2, preempt),
+
+	TP_STRUCT__entry(
+		__field(u32, chid)
+		__field(u32, fecs0)
+		__field(u32, engstat)
+		__field(u32, fecs1)
+		__field(u32, fecs2)
+		__field(u32, preempt)
+	),
+
+	TP_fast_assign(
+		__entry->chid = chid;
+		__entry->fecs0 = fecs0;
+		__entry->engstat = engstat;
+		__entry->fecs1 = fecs1;
+		__entry->fecs2 = fecs2;
+		__entry->preempt = preempt;
+	),
+
+	TP_printk("chid=%d, fecs0=%#x, engstat=%#x, fecs1=%#x, fecs2=%#x,"
+		" preempt=%#x", __entry->chid, __entry->fecs0, __entry->engstat,
+		__entry->fecs1,	__entry->fecs2, __entry->preempt)
+);
+
+TRACE_EVENT(gk20a_reschedule_preempted_next,
+		TP_PROTO(u32 chid),
+
+		TP_ARGS(chid),
+
+	TP_STRUCT__entry(
+		__field(u32, chid)
+	),
+
+	TP_fast_assign(
+		__entry->chid = chid;
+	),
+
+	TP_printk("chid=%d", __entry->chid)
+);
+
 TRACE_EVENT(gk20a_channel_reset,
 		TP_PROTO(u32 chid, u32 tsgid),

--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -141,7 +141,7 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)
 /* IO coherence support is available */
 #define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE		(1ULL << 20)
-/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */
+/* NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST is available */
 #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST	(1ULL << 21)
 /*  subcontexts are available */
 #define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS         (1ULL << 22)
@@ -1477,8 +1477,6 @@ struct nvgpu_fence {
 #define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI	(1 << 4)
 /* skip buffer refcounting during submit */
 #define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING	(1 << 5)
-/* expire current timeslice and reschedule runlist from front */
-#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST	(1 << 6)

 struct nvgpu_submit_gpfifo_args {
 	__u64 gpfifo;
@@ -1659,6 +1657,11 @@ struct nvgpu_get_user_syncpoint_args {
 	__u32 syncpoint_max;	/* out */
 };

+struct nvgpu_reschedule_runlist_args {
+#define NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT           (1 << 0)
+	__u32 flags;
+};
+
 #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD	\
 	_IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args)
 #define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT	\
@@ -1711,9 +1714,11 @@ struct nvgpu_get_user_syncpoint_args {
 	_IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args)
 #define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \
 	_IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args)
+#define NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST	\
+	_IOW(NVGPU_IOCTL_MAGIC, 127, struct nvgpu_reschedule_runlist_args)

 #define NVGPU_IOCTL_CHANNEL_LAST	\
-	_IOC_NR(NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT)
+	_IOC_NR(NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST)
 #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args)

 /*