gpu: nvgpu: posix: Multithreading for unit tests

Add a -j argument to enable running unit tests on several threads. Also adds signal handling to prevent a fatal error in one thread from killing the whole unit test framework. JIRA NVGPU-1043 Change-Id: I891a547640cd005a50ffa5c06367ed46c54de012 Signed-off-by: Nicolas Benech <nbenech@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1847740 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2018-09-25 14:37:16 -04:00
parent c2cf2252a9
commit ac87a707b3
6 changed files with 173 additions and 21 deletions
--- a/userspace/src/exec.c
+++ b/userspace/src/exec.c
@@ -21,6 +21,10 @@
 */

 #include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>

 #include <unit/io.h>
 #include <unit/core.h>
@@ -30,20 +34,40 @@

 #include <nvgpu/posix/probe.h>

+/*
+ * Sempaphore to limit the number of threads
+ */
+sem_t unit_thread_semaphore;
+
+/*
+ * C11 thread local storage, used to access test context when a signal is
+ * received (ex: SIGSEGV) in a thread.
+ */
+_Thread_local struct unit_module *thread_local_module;
+_Thread_local struct unit_module_test *thread_local_test;
+
 /*
 * Execute a module and all its subtests. This function builds a gk20a for the
 * test to use by executing nvgpu_posix_probe() and nvgpu_posix_cleanup();
 */
-static int core_exec_module(struct unit_fw *fw,
-			    struct unit_module *module)
+static void *core_exec_module(void *module_param)
 {
 	unsigned int i;
-	struct gk20a *g = fw->nvgpu.nvgpu_posix_probe();
+	struct unit_module *module = (struct unit_module *) module_param;
+	struct gk20a *g;

-	if (!g)
-		return -1;
+	g = module->fw->nvgpu.nvgpu_posix_probe();

-	core_vbs(fw, 1, "Execing module: %s\n", module->name);
+	if (!g) {
+		core_msg_color(module->fw, C_RED,
+				"  nvgpu_posix_probe failed: Module %s\n",
+				module->name);
+		goto thread_exit;
+	}
+
+	core_vbs(module->fw, 1, "Execing module: %s\n", module->name);
+
+	thread_local_module = module;

 	/*
 	 * Execute each test within the module. No reinit is done between tests.
@@ -53,21 +77,86 @@ static int core_exec_module(struct unit_fw *fw,
 	for (i = 0; i < module->nr_tests; i++) {
 		struct unit_module_test *t = module->tests + i;
 		int test_status;
+		thread_local_test = t;

-		core_msg(fw, "Running %s.%s\n", module->name, t->name);
+		core_msg(module->fw, "Running %s.%s\n", module->name,
+			t->name);
 		test_status = t->fn(module, g, t->args);

 		if (test_status != UNIT_SUCCESS)
-			core_msg_color(fw, C_RED,
+			core_msg_color(module->fw, C_RED,
 				       "  Unit error! Test %s.%s FAILED!\n",
 				       module->name, t->name);

-		core_add_test_record(fw, module, t,
+		core_add_test_record(module->fw, module, t,
 				     test_status == UNIT_SUCCESS);
 	}

-	fw->nvgpu.nvgpu_posix_cleanup(g);
+	module->fw->nvgpu.nvgpu_posix_cleanup(g);

+	core_vbs(module->fw, 1, "Module completed: %s\n", module->name);
+thread_exit:
+	sem_post(&unit_thread_semaphore);
+	return NULL;
+}
+
+/*
+ * According to POSIX, "Signals which are generated by some action attributable
+ * to a particular thread, such as a hardware fault, shall be generated for the
+ * thread that caused the signal to be generated."
+ * This custom signal handler will be run from within the thread that caused the
+ * exception. Thanks to the context being saved in local thread storage, it is
+ * then trivial to report which test case failed, and then terminate the thread.
+ */
+static void thread_error_handler(int sig, siginfo_t *siginfo, void *context)
+{
+	core_msg_color(thread_local_module->fw, C_RED,
+			"  Signal %d in Test: %s.%s!\n", sig,
+			thread_local_module->name, thread_local_test->name);
+	core_add_test_record(thread_local_module->fw, thread_local_module,
+			thread_local_test, false);
+	sem_post(&unit_thread_semaphore);
+	pthread_exit(NULL);
+}
+
+/*
+ * Install a custom signal handler for several signals to be used when running
+ * in multithreaded environment.
+ */
+static int install_thread_error_handler(void)
+{
+	struct sigaction action;
+	int err;
+
+	memset(&action, 0, sizeof(action));
+	action.sa_sigaction = &thread_error_handler;
+	action.sa_flags = SA_SIGINFO;
+
+	/* SIGSEGV: Invalid memory reference */
+	err = sigaction(SIGSEGV, &action, NULL);
+	if (err < 0) {
+		return err;
+	}
+	/* SIGILL: Illegal Instruction */
+	err = sigaction(SIGILL, &action, NULL);
+	if (err < 0) {
+		return err;
+	}
+	/* SIGFPE: Floating-point exception */
+	err = sigaction(SIGFPE, &action, NULL);
+	if (err < 0) {
+		return err;
+	}
+	/* SIGBUS: Bus error */
+	err = sigaction(SIGBUS, &action, NULL);
+	if (err < 0) {
+		return err;
+	}
+	/* SIGSYS: Bad system call */
+	err = sigaction(SIGSYS, &action, NULL);
+	if (err < 0) {
+		return err;
+	}
 	return 0;
 }

@@ -76,14 +165,39 @@ static int core_exec_module(struct unit_fw *fw,
 */
 int core_exec(struct unit_fw *fw)
 {
-	int ret;
 	struct unit_module **modules;
+	int err = 0;
+
+	core_vbs(fw, 1, "Using %d threads\n", fw->args->thread_count);
+	sem_init(&unit_thread_semaphore, 0, fw->args->thread_count);
+
+	/*
+	 * If running single threaded, keep the default SIGSEGV handler to make
+	 * interactive debugging easier, otherwise install the custom one.
+	 */
+	if (fw->args->thread_count > 1) {
+		err = install_thread_error_handler();
+		if (err != 0) {
+			core_msg_color(fw, C_RED,
+			"  Failed to install signal handler!\n");
+			return err;
+		}
+	}

 	for (modules = fw->modules; *modules != NULL; modules++) {
-		ret = core_exec_module(fw, *modules);
+		if (fw->args->thread_count == 1) {
+			core_exec_module(*modules);
+		} else {
+			sem_wait(&unit_thread_semaphore);
+			pthread_create(&((*modules)->thread), NULL,
+				core_exec_module, (void *) *modules);
+		}
+	}

-		if (ret != 0)
-			return ret;
+	if (fw->args->thread_count > 1) {
+		for (modules = fw->modules; *modules != NULL; modules++) {
+			pthread_join((*modules)->thread, NULL);
+		}
 	}

 	return 0;