gpu: nvgpu: add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST

Add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST ioctl to reschedule runlist, and optionally check host and FECS status to preempt pending load of context not belonging to the calling channel on GR engine during context switch. This should be called immediately after a submit to decrease worst case submit to start latency for high interleave channel. There is less than 0.002% chance that the ioctl blocks up to couple miliseconds due to race condition of FECS status changing while being read. For GV11B it will always preempt pending load of unwanted context since there is no chance that ioctl blocks due to race condition. Also fix bug with host reschedule for multiple runlists which needs to write both runlist registers. Bug 1987640 Bug 1924808 Change-Id: I0b7e2f91bd18b0b20928e5a3311b9426b1bf1848 Signed-off-by: David Li <davli@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1549050 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2018-04-26 02:00:01 -07:00
parent 8ac538e1b1
commit a807cf2041
13 changed files with 183 additions and 43 deletions
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -141,7 +141,7 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)
 /* IO coherence support is available */
 #define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE		(1ULL << 20)
-/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */
+/* NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST is available */
 #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST	(1ULL << 21)
 /*  subcontexts are available */
 #define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS         (1ULL << 22)
@@ -1477,8 +1477,6 @@ struct nvgpu_fence {
 #define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI	(1 << 4)
 /* skip buffer refcounting during submit */
 #define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING	(1 << 5)
-/* expire current timeslice and reschedule runlist from front */
-#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST	(1 << 6)

 struct nvgpu_submit_gpfifo_args {
 	__u64 gpfifo;
@@ -1659,6 +1657,11 @@ struct nvgpu_get_user_syncpoint_args {
 	__u32 syncpoint_max;	/* out */
 };

+struct nvgpu_reschedule_runlist_args {
+#define NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT           (1 << 0)
+	__u32 flags;
+};
+
 #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD	\
 	_IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args)
 #define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT	\
@@ -1711,9 +1714,11 @@ struct nvgpu_get_user_syncpoint_args {
 	_IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args)
 #define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \
 	_IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args)
+#define NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST	\
+	_IOW(NVGPU_IOCTL_MAGIC, 127, struct nvgpu_reschedule_runlist_args)

 #define NVGPU_IOCTL_CHANNEL_LAST	\
-	_IOC_NR(NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT)
+	_IOC_NR(NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST)
 #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args)

 /*