gpu: nvgpu: posix: Multithreading for unit tests

Add a -j argument to enable running unit tests on several
threads. Also adds signal handling to prevent a fatal
error in one thread from killing the whole unit test
framework.

JIRA NVGPU-1043

Change-Id: I891a547640cd005a50ffa5c06367ed46c54de012
Signed-off-by: Nicolas Benech <nbenech@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1847740
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Nicolas Benech
2018-09-25 14:37:16 -04:00
committed by Abdul Salam
parent c2cf2252a9
commit ac87a707b3
6 changed files with 173 additions and 21 deletions

View File

@@ -42,6 +42,7 @@ struct unit_fw_args {
bool help; bool help;
int verbose_lvl; int verbose_lvl;
bool no_color; bool no_color;
int thread_count;
const char *unit_name; const char *unit_name;
const char *unit_load_path; const char *unit_load_path;

View File

@@ -23,6 +23,8 @@
#ifndef __UNIT_UNIT_H__ #ifndef __UNIT_UNIT_H__
#define __UNIT_UNIT_H__ #define __UNIT_UNIT_H__
#include <pthread.h>
struct gk20a; struct gk20a;
struct unit_module; struct unit_module;
@@ -84,6 +86,8 @@ struct unit_module {
*/ */
void *lib_handle; void *lib_handle;
struct unit_fw *fw; struct unit_fw *fw;
pthread_t thread;
}; };
/* /*

View File

@@ -36,11 +36,12 @@ static struct option core_opts[] = {
{ "no-color", 0, NULL, 'C' }, { "no-color", 0, NULL, 'C' },
{ "unit-load-path", 1, NULL, 'L' }, { "unit-load-path", 1, NULL, 'L' },
{ "num-threads", 1, NULL, 'j' },
{ NULL, 0, NULL, 0 } { NULL, 0, NULL, 0 }
}; };
static const char *core_opts_str = "hvqCL:"; static const char *core_opts_str = "hvqCL:j:";
void core_print_help(struct unit_fw *fw) void core_print_help(struct unit_fw *fw)
{ {
@@ -63,6 +64,8 @@ void core_print_help(struct unit_fw *fw)
" corrupt that file.\n", " corrupt that file.\n",
" -L, --unit-load-path <PATH>\n", " -L, --unit-load-path <PATH>\n",
" Path to where the unit test libraries reside.\n", " Path to where the unit test libraries reside.\n",
" -j, --num-threads <COUNT>\n",
" Number of threads to use while running all tests.\n",
"\n", "\n",
"Note: mandatory arguments to long arguments are mandatory for short\n", "Note: mandatory arguments to long arguments are mandatory for short\n",
"arguments as well.\n", "arguments as well.\n",
@@ -79,6 +82,7 @@ NULL
static void set_arg_defaults(struct unit_fw_args *args) static void set_arg_defaults(struct unit_fw_args *args)
{ {
args->unit_load_path = DEFAULT_ARG_UNIT_LOAD_PATH; args->unit_load_path = DEFAULT_ARG_UNIT_LOAD_PATH;
args->thread_count = 1;
} }
/* /*
@@ -121,6 +125,13 @@ int core_parse_args(struct unit_fw *fw, int argc, char **argv)
case 'L': case 'L':
args->unit_load_path = optarg; args->unit_load_path = optarg;
break; break;
case 'j':
args->thread_count = strtol(optarg, NULL, 10);
if (args->thread_count == 0) {
core_err(fw, "Invalid number of threads\n");
return -1;
}
break;
case '?': case '?':
args->help = true; args->help = true;
return -1; return -1;

View File

@@ -21,6 +21,10 @@
*/ */
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <semaphore.h>
#include <signal.h>
#include <unit/io.h> #include <unit/io.h>
#include <unit/core.h> #include <unit/core.h>
@@ -30,20 +34,40 @@
#include <nvgpu/posix/probe.h> #include <nvgpu/posix/probe.h>
/*
* Sempaphore to limit the number of threads
*/
sem_t unit_thread_semaphore;
/*
* C11 thread local storage, used to access test context when a signal is
* received (ex: SIGSEGV) in a thread.
*/
_Thread_local struct unit_module *thread_local_module;
_Thread_local struct unit_module_test *thread_local_test;
/* /*
* Execute a module and all its subtests. This function builds a gk20a for the * Execute a module and all its subtests. This function builds a gk20a for the
* test to use by executing nvgpu_posix_probe() and nvgpu_posix_cleanup(); * test to use by executing nvgpu_posix_probe() and nvgpu_posix_cleanup();
*/ */
static int core_exec_module(struct unit_fw *fw, static void *core_exec_module(void *module_param)
struct unit_module *module)
{ {
unsigned int i; unsigned int i;
struct gk20a *g = fw->nvgpu.nvgpu_posix_probe(); struct unit_module *module = (struct unit_module *) module_param;
struct gk20a *g;
if (!g) g = module->fw->nvgpu.nvgpu_posix_probe();
return -1;
core_vbs(fw, 1, "Execing module: %s\n", module->name); if (!g) {
core_msg_color(module->fw, C_RED,
" nvgpu_posix_probe failed: Module %s\n",
module->name);
goto thread_exit;
}
core_vbs(module->fw, 1, "Execing module: %s\n", module->name);
thread_local_module = module;
/* /*
* Execute each test within the module. No reinit is done between tests. * Execute each test within the module. No reinit is done between tests.
@@ -53,21 +77,86 @@ static int core_exec_module(struct unit_fw *fw,
for (i = 0; i < module->nr_tests; i++) { for (i = 0; i < module->nr_tests; i++) {
struct unit_module_test *t = module->tests + i; struct unit_module_test *t = module->tests + i;
int test_status; int test_status;
thread_local_test = t;
core_msg(fw, "Running %s.%s\n", module->name, t->name); core_msg(module->fw, "Running %s.%s\n", module->name,
t->name);
test_status = t->fn(module, g, t->args); test_status = t->fn(module, g, t->args);
if (test_status != UNIT_SUCCESS) if (test_status != UNIT_SUCCESS)
core_msg_color(fw, C_RED, core_msg_color(module->fw, C_RED,
" Unit error! Test %s.%s FAILED!\n", " Unit error! Test %s.%s FAILED!\n",
module->name, t->name); module->name, t->name);
core_add_test_record(fw, module, t, core_add_test_record(module->fw, module, t,
test_status == UNIT_SUCCESS); test_status == UNIT_SUCCESS);
} }
fw->nvgpu.nvgpu_posix_cleanup(g); module->fw->nvgpu.nvgpu_posix_cleanup(g);
core_vbs(module->fw, 1, "Module completed: %s\n", module->name);
thread_exit:
sem_post(&unit_thread_semaphore);
return NULL;
}
/*
* According to POSIX, "Signals which are generated by some action attributable
* to a particular thread, such as a hardware fault, shall be generated for the
* thread that caused the signal to be generated."
* This custom signal handler will be run from within the thread that caused the
* exception. Thanks to the context being saved in local thread storage, it is
* then trivial to report which test case failed, and then terminate the thread.
*/
static void thread_error_handler(int sig, siginfo_t *siginfo, void *context)
{
core_msg_color(thread_local_module->fw, C_RED,
" Signal %d in Test: %s.%s!\n", sig,
thread_local_module->name, thread_local_test->name);
core_add_test_record(thread_local_module->fw, thread_local_module,
thread_local_test, false);
sem_post(&unit_thread_semaphore);
pthread_exit(NULL);
}
/*
* Install a custom signal handler for several signals to be used when running
* in multithreaded environment.
*/
static int install_thread_error_handler(void)
{
struct sigaction action;
int err;
memset(&action, 0, sizeof(action));
action.sa_sigaction = &thread_error_handler;
action.sa_flags = SA_SIGINFO;
/* SIGSEGV: Invalid memory reference */
err = sigaction(SIGSEGV, &action, NULL);
if (err < 0) {
return err;
}
/* SIGILL: Illegal Instruction */
err = sigaction(SIGILL, &action, NULL);
if (err < 0) {
return err;
}
/* SIGFPE: Floating-point exception */
err = sigaction(SIGFPE, &action, NULL);
if (err < 0) {
return err;
}
/* SIGBUS: Bus error */
err = sigaction(SIGBUS, &action, NULL);
if (err < 0) {
return err;
}
/* SIGSYS: Bad system call */
err = sigaction(SIGSYS, &action, NULL);
if (err < 0) {
return err;
}
return 0; return 0;
} }
@@ -76,14 +165,39 @@ static int core_exec_module(struct unit_fw *fw,
*/ */
int core_exec(struct unit_fw *fw) int core_exec(struct unit_fw *fw)
{ {
int ret;
struct unit_module **modules; struct unit_module **modules;
int err = 0;
core_vbs(fw, 1, "Using %d threads\n", fw->args->thread_count);
sem_init(&unit_thread_semaphore, 0, fw->args->thread_count);
/*
* If running single threaded, keep the default SIGSEGV handler to make
* interactive debugging easier, otherwise install the custom one.
*/
if (fw->args->thread_count > 1) {
err = install_thread_error_handler();
if (err != 0) {
core_msg_color(fw, C_RED,
" Failed to install signal handler!\n");
return err;
}
}
for (modules = fw->modules; *modules != NULL; modules++) { for (modules = fw->modules; *modules != NULL; modules++) {
ret = core_exec_module(fw, *modules); if (fw->args->thread_count == 1) {
core_exec_module(*modules);
} else {
sem_wait(&unit_thread_semaphore);
pthread_create(&((*modules)->thread), NULL,
core_exec_module, (void *) *modules);
}
}
if (ret != 0) if (fw->args->thread_count > 1) {
return ret; for (modules = fw->modules; *modules != NULL; modules++) {
pthread_join((*modules)->thread, NULL);
}
} }
return 0; return 0;

View File

@@ -22,12 +22,18 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <pthread.h>
#include <unit/io.h> #include <unit/io.h>
#include <unit/core.h> #include <unit/core.h>
#include <unit/unit.h> #include <unit/unit.h>
#include <unit/results.h> #include <unit/results.h>
/*
* Mutex to ensure core_add_test_record() is thread safe.
*/
pthread_mutex_t mutex_results = PTHREAD_MUTEX_INITIALIZER;
static int __init_results(struct unit_fw *fw) static int __init_results(struct unit_fw *fw)
{ {
struct unit_results *results; struct unit_results *results;
@@ -72,16 +78,22 @@ int core_add_test_record(struct unit_fw *fw,
bool success) bool success)
{ {
struct unit_test_record *tr; struct unit_test_record *tr;
int err = 0;
pthread_mutex_lock(&mutex_results);
/* /*
* Dones nothing if results are already inited. * Does nothing if results are already inited.
*/ */
if (__init_results(fw) != 0) if (__init_results(fw) != 0) {
return -1; err = -1;
goto done;
}
tr = malloc(sizeof(*tr)); tr = malloc(sizeof(*tr));
if (tr == NULL) if (tr == NULL) {
return -1; err = -1;
goto done;
}
tr->mod = mod; tr->mod = mod;
tr->test = test; tr->test = test;
@@ -97,7 +109,9 @@ int core_add_test_record(struct unit_fw *fw,
if (success) if (success)
fw->results->nr_passing += 1; fw->results->nr_passing += 1;
return 0; done:
pthread_mutex_unlock(&mutex_results);
return err;
} }
void core_print_test_status(struct unit_fw *fw) void core_print_test_status(struct unit_fw *fw)

View File

@@ -72,5 +72,13 @@ int main(int argc, char **argv)
core_print_test_status(fw); core_print_test_status(fw);
if (fw->results->nr_tests == 0) {
/* No tests were run */
return -1;
} else if ((fw->results->nr_tests - fw->results->nr_passing) != 0) {
/* Some tests failed */
return -1;
}
return 0; return 0;
} }