gpu: nvgpu: modify priority of NVS worker thread

In linux threaded interrupts run with a Realtime priority of 50. This bumps up the priority of bottom-half handlers over regular kernel/User threads even during process context. In the current implementation scheduler thread still runs in normal kernel thread priority. In order to allow a seamless scheduling experience, the worker thread is now created with a Realtime priority of 1. This allows for the Worker thread to work at a priority lower than interrupt handlers but higher than the regular kernel threads. Linux kernel allows setting priority with the help of sched_set_fifo() API. Only two modes are supported i.e. sched_set_fifo() and sched_set_fifo_low(). For more reference, refer to this article https://lwn.net/Articles/818388/. Added an implementation of nvgpu_thread_create_priority() for linux thread using the above two APIs. Jira NVGPU-860 Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Change-Id: I0a5a611bf0e0a5b9bb51354c6ff0a99e42e76e2f Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2751736 Reviewed-by: Prateek Sethi <prsethi@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2022-07-27 12:44:16 +05:30
parent 13699c4c15
commit 1d4b7b1c5d
6 changed files with 173 additions and 7 deletions
--- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c
+++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
@@ -895,7 +895,6 @@ int nvgpu_finalize_poweron(struct gk20a *g)

 		NVGPU_INIT_TABLE_ENTRY(g->ops.mm.init_mm_support, NO_FLAG),
 		NVGPU_INIT_TABLE_ENTRY(g->ops.fifo.fifo_init_support, NO_FLAG),
-		NVGPU_INIT_TABLE_ENTRY(nvgpu_nvs_init, NO_FLAG),
 		NVGPU_INIT_TABLE_ENTRY(g->ops.therm.elcg_init_idle_filters,
 				       NO_FLAG),
 		NVGPU_INIT_TABLE_ENTRY(&nvgpu_netlist_init_ctx_vars, NO_FLAG),
@@ -982,6 +981,7 @@ int nvgpu_finalize_poweron(struct gk20a *g)
 		NVGPU_INIT_TABLE_ENTRY(g->ops.pmu.pmu_restore_golden_img_state,
 				       NO_FLAG),
 #endif
+		NVGPU_INIT_TABLE_ENTRY(nvgpu_nvs_init, NO_FLAG),
 		NVGPU_INIT_TABLE_ENTRY(g->ops.channel.resume_all_serviceable_ch,
 				       NO_FLAG),
 #ifndef CONFIG_NVGPU_DGPU
--- a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c
+++ b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c
@@ -423,7 +423,19 @@ static int nvgpu_nvs_worker_init(struct gk20a *g)

 	nvgpu_worker_init_name(worker, "nvgpu_nvs", g->name);

+#ifdef __KERNEL__
+	/* Scheduler a worker thread with RR priority of 1 for Linux.
+	 * Linux uses CFS scheduling class by default for all kernel threads.
+	 * CFS prioritizes threads that have executed for the least amount
+	 * of time and as a result other higher priority kernel threads
+	 * can get delayed.
+	 * Using a RT priority of 1 for linux, ensures that this thread
+	 * always executes before other regular kernel threads.
+	 */
+	err = nvgpu_priority_worker_init(g, worker, 1, &nvs_worker_ops);
+#else
 	err = nvgpu_worker_init(g, worker, &nvs_worker_ops);
+#endif
 	if (err != 0) {
 		/* Ensure that scheduler thread is started as soon as possible to handle
 		 * minimal uptime for applications.
--- a/drivers/gpu/nvgpu/common/utils/worker.c
+++ b/drivers/gpu/nvgpu/common/utils/worker.c
@@ -24,6 +24,7 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/worker.h>
 #include <nvgpu/string.h>
+#include <nvgpu/thread.h>

 static void nvgpu_worker_pre_process(struct nvgpu_worker *worker)
 {
@@ -210,6 +211,51 @@ static int nvgpu_worker_start(struct nvgpu_worker *worker)
 	return err;
 }

+static s32 nvgpu_worker_priority_start(struct nvgpu_worker *worker,
+		int priority)
+{
+	s32 err = 0;
+
+	if (nvgpu_thread_is_running(&worker->poll_task)) {
+		return err;
+	}
+
+	nvgpu_mutex_acquire(&worker->start_lock);
+
+	/*
+	 * Mutexes have implicit barriers, so there is no risk of a thread
+	 * having a stale copy of the poll_task variable as the call to
+	 * thread_is_running is volatile
+	 */
+
+	if (nvgpu_thread_is_running(&worker->poll_task)) {
+		nvgpu_mutex_release(&worker->start_lock);
+		return err;
+	}
+
+/*
+ * HVRTOS, doesn't support dynamic threads, currently, HVRTOS
+ * has a dumb implementation of nvgpu_thread_create(), use that instead
+ * of creating a new API.
+ */
+#ifndef NVGPU_HVRTOS
+	err = nvgpu_thread_create_priority(&worker->poll_task, worker,
+			nvgpu_worker_poll_work, priority, worker->thread_name);
+#else
+	(void)priority;
+	err = nvgpu_thread_create(&worker->poll_task, worker,
+			nvgpu_worker_poll_work, worker->thread_name);
+#endif
+	if (err != 0) {
+		nvgpu_err(worker->g,
+			  "failed to create priority worker poller thread %s err %d",
+			  worker->thread_name, err);
+	}
+
+	nvgpu_mutex_release(&worker->start_lock);
+	return err;
+}
+
 bool nvgpu_worker_should_stop(struct nvgpu_worker *worker)
 {
 	return nvgpu_thread_should_stop(&worker->poll_task);
@@ -274,11 +320,9 @@ void nvgpu_worker_init_name(struct nvgpu_worker *worker,
 	(void) strncat(worker->thread_name, gpu_name, num_free_chars);
 }

-int nvgpu_worker_init(struct gk20a *g, struct nvgpu_worker *worker,
-	const struct nvgpu_worker_ops *worker_ops)
+static void nvgpu_worker_init_common(struct gk20a *g,
+	struct nvgpu_worker *worker, const struct nvgpu_worker_ops *worker_ops)
 {
-	int err;
-
 	worker->g = g;
 	nvgpu_atomic_set(&worker->put, 0);
 	(void) nvgpu_cond_init(&worker->wq);
@@ -287,6 +331,14 @@ int nvgpu_worker_init(struct gk20a *g, struct nvgpu_worker *worker,
 	nvgpu_mutex_init(&worker->start_lock);

 	worker->ops = worker_ops;
+}
+
+int nvgpu_worker_init(struct gk20a *g, struct nvgpu_worker *worker,
+	const struct nvgpu_worker_ops *worker_ops)
+{
+	int err;
+
+	nvgpu_worker_init_common(g, worker, worker_ops);

 	err = nvgpu_worker_start(worker);
 	if (err != 0) {
@@ -297,6 +349,22 @@ int nvgpu_worker_init(struct gk20a *g, struct nvgpu_worker *worker,
 	return 0;
 }

+int nvgpu_priority_worker_init(struct gk20a *g, struct nvgpu_worker *worker,
+	int priority, const struct nvgpu_worker_ops *worker_ops)
+{
+	int err;
+
+	nvgpu_worker_init_common(g, worker, worker_ops);
+
+	err = nvgpu_worker_priority_start(worker, priority);
+	if (err != 0) {
+		nvgpu_err(g, "failed to start worker poller thread %s",
+				worker->thread_name);
+		return err;
+	}
+	return 0;
+}
+
 void nvgpu_worker_deinit(struct nvgpu_worker *worker)
 {
 	nvgpu_mutex_acquire(&worker->start_lock);
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/thread.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/thread.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -28,4 +28,44 @@ struct nvgpu_thread {
 	void *data;
 };

+/**
+ * @brief Create a thread with RT priority.
+ *
+ * @param thread [in]		Thread to create.
+ * @param data [in]		Data to pass to threadfn.
+ * @param threadfn [in]		Thread function.
+ * @param priority [in]		Priority of the thread to be created.
+ * @param name [in]		Name of the thread.
+ *
+ * In Linux, kernel doesn't provide a way to set any arbitrary priority.
+ * There are two main kinds of priority that can be set.
+ * sched_set_fifo() and sched_set_fifo_lo().
+ *
+ * sched_set_fifo() is used to set the thread's priority to MAX_RTPRIORITY/2
+ * This is the priority used by threaded interrupts internally as well.
+ *
+ * sched_set_fifo_lo() sets the thread's priority to 1. This sets the
+ * current thread's RT priority to be 1(higher than other regular kernel threads).
+ *
+ * Normally Linux uses a CFS based scheduler which prioritizes processes
+ * based on increasing order of actual time spent. This adds a kind of
+ * non-determinisim for processes. In Linux, RT scheduling classes have
+ * a higher rank compared to regular threads with CFS scheduler. Thus,
+ * there is a guarantee to pick RT threads deterministically before
+ * other non-RT threads.
+ *
+ * For more reference, refer to this article
+ * https://lwn.net/Articles/818388/.
+ *
+ *
+ * @return Return 0 on success, else return the error number to indicate the
+ * error. The error numbers returned are generated by the OS APIs invoked
+ * internally by this function.
+ *
+ * start routine provided for thread creation.
+ */
+int nvgpu_thread_create_priority(struct nvgpu_thread *thread,
+			void *data, int (*threadfn)(void *data),
+			int priority, const char *name);
+
 #endif /* __NVGPU_THREAD_LINUX_H__ */
--- a/drivers/gpu/nvgpu/include/nvgpu/worker.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/worker.h
@@ -311,6 +311,9 @@ void nvgpu_worker_init_name(struct nvgpu_worker *worker,
 int nvgpu_worker_init(struct gk20a *g, struct nvgpu_worker *worker,
 		const struct nvgpu_worker_ops *worker_ops);

+int nvgpu_priority_worker_init(struct gk20a *g, struct nvgpu_worker *worker,
+	int priority, const struct nvgpu_worker_ops *worker_ops);
+
 /**
 * @brief Stop the background thread associated with the worker.
 *
--- a/drivers/gpu/nvgpu/os/linux/thread.c
+++ b/drivers/gpu/nvgpu/os/linux/thread.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -15,6 +15,8 @@
 */

 #include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/version.h>

 #include <nvgpu/thread.h>
 #include <nvgpu/timers.h>
@@ -56,6 +58,47 @@ int nvgpu_thread_create(struct nvgpu_thread *thread,
 	return 0;
 };

+int nvgpu_thread_create_priority(struct nvgpu_thread *thread,
+			void *data, int (*threadfn)(void *data),
+			int priority, const char *name)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0)
+	struct sched_param sparam = {0};
+#endif
+
+	struct task_struct *task = kthread_create(nvgpu_thread_proxy,
+			thread, name);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+
+	if (priority > 1) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 9, 0)
+		/* Higher priority tasks are run in threaded interrupt priority level */
+		sched_set_fifo(task);
+#else
+		sparam.sched_priority = MAX_RT_PRIO / 2;
+		sched_setscheduler(task, SCHED_FIFO, &sparam);
+#endif
+	} else {
+		/* Only cares about setting a priority higher than normal,
+		 * Lower than threaded interrupt priority but higher than normal.
+		 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 9, 0)
+		sched_set_fifo_low(task);
+#else
+		sparam.sched_priority = 1;
+		sched_setscheduler(task, SCHED_FIFO, &sparam);
+#endif
+	}
+
+	thread->task = task;
+	thread->fn = threadfn;
+	thread->data = data;
+	nvgpu_atomic_set(&thread->running, true);
+	wake_up_process(task);
+	return 0;
+}
+
 void nvgpu_thread_stop(struct nvgpu_thread *thread)
 {
 	bool was_running;