gpu: nvgpu: add NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST

NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST causes host to expire current timeslice and reschedule from front of runlist. This can be used with NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH to make a channel start sooner after submit rather than waiting for natural timeslice expiration or block/finish of currently running channel. Bug 1968813 Change-Id: I632e87c5f583a09ec8bf521dc73f595150abebb0 Signed-off-by: David Li <davli@nvidia.com> Reviewed-on: http://git-master/r/#/c/1537198 Reviewed-on: https://git-master.nvidia.com/r/1537198 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2017-08-10 21:44:28 -07:00
parent 4995389f68
commit a199baede7
9 changed files with 47 additions and 3 deletions
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -2668,6 +2668,10 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,

 	g->ops.fifo.userd_gp_put(g, c);

+	if ((NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST & flags) &&
+		g->ops.fifo.reschedule_runlist)
+		g->ops.fifo.reschedule_runlist(g, c->runlist_id);
+
 	/* No hw access beyond this point */
 	if (c->deterministic)
 		up_read(&g->deterministic_busy);
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -3211,6 +3211,34 @@ end:
 	return ret;
 }

+/* trigger host to expire current timeslice and reschedule runlist from front */
+int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id)
+{
+	struct fifo_runlist_info_gk20a *runlist;
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	u32 mutex_ret;
+	int ret = 0;
+
+	runlist = &g->fifo.runlist_info[runlist_id];
+	if (nvgpu_mutex_tryacquire(&runlist->mutex)) {
+		mutex_ret = nvgpu_pmu_mutex_acquire(
+			&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+
+		gk20a_writel(g, fifo_runlist_r(),
+			gk20a_readl(g, fifo_runlist_r()));
+		gk20a_fifo_runlist_wait_pending(g, runlist_id);
+
+		if (!mutex_ret)
+			nvgpu_pmu_mutex_release(
+				&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+		nvgpu_mutex_release(&runlist->mutex);
+	} else {
+		/* someone else is writing fifo_runlist_r so not needed here */
+		ret = -EBUSY;
+	}
+	return ret;
+}
+
 /* add/remove a channel from runlist
   special cases below: runlist->active_channels will NOT be changed.
   (chid == ~0 && !add) means remove all active channels from runlist.
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -250,6 +250,8 @@ int gk20a_fifo_disable_all_engine_activity(struct gk20a *g,
 				bool wait_for_idle);
 u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 chid);

+int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id);
+
 int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid,
 			      bool add, bool wait_for_finish);

--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -476,6 +476,7 @@ struct gpu_ops {
 		int (*resetup_ramfc)(struct channel_gk20a *c);
 		int (*preempt_channel)(struct gk20a *g, u32 chid);
 		int (*preempt_tsg)(struct gk20a *g, u32 tsgid);
+		int (*reschedule_runlist)(struct gk20a *g, u32 runlist_id);
 		int (*update_runlist)(struct gk20a *g, u32 runlist_id,
 				u32 chid, bool add,
 				bool wait_for_finish);
--- a/drivers/gpu/nvgpu/gp10b/gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gp10b.c
@@ -1,7 +1,7 @@
 /*
 * GP10B Graphics
 *
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -108,6 +108,7 @@ int gp10b_init_gpu_characteristics(struct gk20a *g)
 {
 	gk20a_init_gpu_characteristics(g);
 	g->gpu_characteristics.flags |= gp10b_detect_ecc_enabled_units(g);
-
+	g->gpu_characteristics.flags |=
+		NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST;
 	return 0;
 }
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -386,6 +386,7 @@ static const struct gpu_ops gp10b_ops = {
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = gk20a_fifo_preempt_channel,
 		.preempt_tsg = gk20a_fifo_preempt_tsg,
+		.reschedule_runlist = gk20a_fifo_reschedule_runlist,
 		.update_runlist = gk20a_fifo_update_runlist,
 		.trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault,
 		.get_mmu_fault_info = gp10b_fifo_get_mmu_fault_info,
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -17,4 +17,5 @@ void vgpu_gp10b_init_fifo_ops(struct gpu_ops *gops)
 {
 	/* syncpoint protection not supported yet */
 	gops->fifo.resetup_ramfc = NULL;
+	gops->fifo.reschedule_runlist = NULL;
 }
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -358,6 +358,8 @@ static int vgpu_init_gpu_characteristics(struct gk20a *g)
 	/* features vgpu does not support */
 	g->gpu_characteristics.flags &= ~NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
 	g->gpu_characteristics.flags &= ~NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS;
+	g->gpu_characteristics.flags &=
+		~NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST;

 	return 0;
 }
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -144,6 +144,8 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)
 /* IO coherence support is available */
 #define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE		(1ULL << 20)
+/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */
+#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST	(1ULL << 21)

 struct nvgpu_gpu_characteristics {
 	__u32 arch;
@@ -1404,6 +1406,8 @@ struct nvgpu_fence {
 #define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI	(1 << 4)
 /* skip buffer refcounting during submit */
 #define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING	(1 << 5)
+/* expire current timeslice and reschedule runlist from front */
+#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST	(1 << 6)

 struct nvgpu_submit_gpfifo_args {
 	__u64 gpfifo;