diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index ee38748bd..c0068571f 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -163,6 +163,7 @@ static int nvgpu_sw_quiesce_init_support(struct gk20a *g) /* register callback to SW quiesce GPU in case of BUG() */ g->sw_quiesce_bug_cb.cb = nvgpu_sw_quiesce_bug_cb; g->sw_quiesce_bug_cb.arg = g; + g->sw_quiesce_bug_cb.sw_quiesce_data = true; nvgpu_bug_register_cb(&g->sw_quiesce_bug_cb); #ifdef CONFIG_NVGPU_RECOVERY diff --git a/drivers/gpu/nvgpu/include/nvgpu/bug.h b/drivers/gpu/nvgpu/include/nvgpu/bug.h index 395c70c79..518a2df2a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/bug.h +++ b/drivers/gpu/nvgpu/include/nvgpu/bug.h @@ -92,6 +92,7 @@ struct nvgpu_bug_cb void (*cb)(void *arg); void *arg; struct nvgpu_list_node node; + bool sw_quiesce_data; }; static inline struct nvgpu_bug_cb * diff --git a/drivers/gpu/nvgpu/include/nvgpu/posix/bug.h b/drivers/gpu/nvgpu/include/nvgpu/posix/bug.h index 448560884..c3f30d6f1 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/posix/bug.h +++ b/drivers/gpu/nvgpu/include/nvgpu/posix/bug.h @@ -99,7 +99,7 @@ void nvgpu_bug_cb_longjmp(void *arg); ({ \ jmp_buf handler; \ volatile bool bug_result = true; \ - struct nvgpu_bug_cb callback; \ + struct nvgpu_bug_cb callback = {0}; \ callback.cb = nvgpu_bug_cb_longjmp; \ callback.arg = &handler; \ nvgpu_bug_register_cb(&callback); \ diff --git a/drivers/gpu/nvgpu/os/posix/bug.c b/drivers/gpu/nvgpu/os/posix/bug.c index 113c98b4f..b716b253b 100644 --- a/drivers/gpu/nvgpu/os/posix/bug.c +++ b/drivers/gpu/nvgpu/os/posix/bug.c @@ -40,14 +40,20 @@ #endif struct nvgpu_bug_desc { +#ifdef __NVGPU_UNIT_TEST__ bool in_use; pthread_once_t once; struct nvgpu_spinlock lock; struct nvgpu_list_node head; +#endif + void (*quiesce_cb)(void *arg); + void *quiesce_arg; }; static struct nvgpu_bug_desc bug_desc = { +#ifdef __NVGPU_UNIT_TEST__ .once = PTHREAD_ONCE_INIT +#endif }; #ifdef __NVGPU_UNIT_TEST__ @@ -84,12 +90,14 @@ void dump_stack(void) nvgpu_posix_dump_stack(frames); } +#ifdef __NVGPU_UNIT_TEST__ static void nvgpu_bug_init(void) { nvgpu_spinlock_init(&bug_desc.lock); nvgpu_init_list_node(&bug_desc.head); bug_desc.in_use = true; } +#endif void nvgpu_bug_exit(int status) { @@ -101,26 +109,39 @@ void nvgpu_bug_exit(int status) void nvgpu_bug_register_cb(struct nvgpu_bug_cb *cb) { +#ifdef __NVGPU_UNIT_TEST__ int err; +#endif - err = pthread_once(&bug_desc.once, nvgpu_bug_init); - nvgpu_assert(err == 0); + if (cb->sw_quiesce_data) { + bug_desc.quiesce_cb = cb->cb; + bug_desc.quiesce_arg = cb->arg; + } else { +#ifdef __NVGPU_UNIT_TEST__ + err = pthread_once(&bug_desc.once, nvgpu_bug_init); + nvgpu_assert(err == 0); - nvgpu_spinlock_acquire(&bug_desc.lock); - nvgpu_list_add_tail(&cb->node, &bug_desc.head); - nvgpu_spinlock_release(&bug_desc.lock); + nvgpu_spinlock_acquire(&bug_desc.lock); + nvgpu_list_add_tail(&cb->node, &bug_desc.head); + nvgpu_spinlock_release(&bug_desc.lock); +#endif + } } void nvgpu_bug_unregister_cb(struct nvgpu_bug_cb *cb) { - int err; - - err = pthread_once(&bug_desc.once, nvgpu_bug_init); - nvgpu_assert(err == 0); - - nvgpu_spinlock_acquire(&bug_desc.lock); - nvgpu_list_del(&cb->node); - nvgpu_spinlock_release(&bug_desc.lock); + if (cb->sw_quiesce_data) { + bug_desc.quiesce_cb = NULL; + bug_desc.quiesce_arg = NULL; + } else { +#ifdef __NVGPU_UNIT_TEST__ + if (bug_desc.in_use) { + nvgpu_spinlock_acquire(&bug_desc.lock); + nvgpu_list_del(&cb->node); + nvgpu_spinlock_release(&bug_desc.lock); + } +#endif + } } /* @@ -129,13 +150,21 @@ void nvgpu_bug_unregister_cb(struct nvgpu_bug_cb *cb) void nvgpu_posix_bug(const char *msg, int line_no) { int err; +#ifdef __NVGPU_UNIT_TEST__ struct nvgpu_bug_cb *cb; +#endif nvgpu_err(NULL, "%s:%d BUG detected!", msg, line_no); + #ifndef __NVGPU_UNIT_TEST__ dump_stack(); #endif + if (bug_desc.quiesce_cb != NULL) { + bug_desc.quiesce_cb(bug_desc.quiesce_arg); + } + +#ifdef __NVGPU_UNIT_TEST__ if (!bug_desc.in_use) { goto done; } @@ -164,6 +193,8 @@ void nvgpu_posix_bug(const char *msg, int line_no) nvgpu_spinlock_release(&bug_desc.lock); done: +#endif + #ifdef __NVGPU_UNIT_TEST__ dump_stack(); #endif diff --git a/userspace/units/posix/bug/posix-bug.c b/userspace/units/posix/bug/posix-bug.c index e8a098e97..8ab99e00c 100644 --- a/userspace/units/posix/bug/posix-bug.c +++ b/userspace/units/posix/bug/posix-bug.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -117,8 +117,8 @@ static void other_bug_cb(void *arg) int test_bug_cb(struct unit_module *m, struct gk20a *g, void *args) { - struct nvgpu_bug_cb callback; - struct nvgpu_bug_cb other_callback; + struct nvgpu_bug_cb callback = {0}; + struct nvgpu_bug_cb other_callback = {0}; jmp_buf handler;