From 1f3f34b9068e1f23afb48577821898503d0e379c Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Thu, 19 Dec 2019 15:00:39 -0500 Subject: [PATCH] gpu: nvgpu: add BUG() callbacks Add support for registering callbacks that will be called on BUG(). Jira NVGPU-4512 Change-Id: I35c9b6c17db3b9fa5d098918223083f0b4aaace4 Signed-off-by: Thomas Fleury Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2266391 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/include/nvgpu/bug.h | 22 +++++++ drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 3 +- drivers/gpu/nvgpu/include/nvgpu/posix/bug.h | 31 ++++++++- drivers/gpu/nvgpu/os/posix/bug.c | 70 ++++++++++++++++++++- 4 files changed, 123 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/nvgpu/include/nvgpu/bug.h b/drivers/gpu/nvgpu/include/nvgpu/bug.h index 82e666097..477bfaeac 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/bug.h +++ b/drivers/gpu/nvgpu/include/nvgpu/bug.h @@ -28,6 +28,7 @@ #include #endif #include +#include /* * Define an assert macro that code within nvgpu can use. @@ -92,4 +93,25 @@ struct gk20a; nvgpu_do_assert(); \ } while (false) + +struct nvgpu_bug_cb +{ + void (*cb)(void *arg); + void *arg; + struct nvgpu_list_node node; +}; + +static inline struct nvgpu_bug_cb * +nvgpu_bug_cb_from_node(struct nvgpu_list_node *node) +{ + return (struct nvgpu_bug_cb *) + ((uintptr_t)node - offsetof(struct nvgpu_bug_cb, node)); +}; + +#ifdef __KERNEL__ +static inline void nvgpu_bug_exit(void) { } +static inline void nvgpu_bug_register_cb(struct nvgpu_bug_cb *cb) { } +static inline void nvgpu_bug_unregister_cb(struct nvgpu_bug_cb *cb) { } +#endif + #endif /* NVGPU_BUG_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 0d25e6845..39d3f1d7a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. * * GK20A Graphics * @@ -698,6 +698,7 @@ struct gk20a { struct nvgpu_cond sw_quiesce_cond; struct nvgpu_thread sw_quiesce_thread; #endif + struct nvgpu_list_node bug_node; /** Controls which messages are logged */ u64 log_mask; diff --git a/drivers/gpu/nvgpu/include/nvgpu/posix/bug.h b/drivers/gpu/nvgpu/include/nvgpu/posix/bug.h index e7f433bca..10518652a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/posix/bug.h +++ b/drivers/gpu/nvgpu/include/nvgpu/posix/bug.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -134,4 +134,33 @@ void bug_handler_cancel(void); bug_result; \ }) #endif + +struct nvgpu_bug_cb; + +/** + * @brief Register callback to be invoked on BUG() + * + * @param cb [in] Pointer to callback structure + * + * Register a callback to be invoked on BUG(). + * The nvgpu_bug_cb structure contains a function pointer + * and an argument to be passed to this function. + * This mechanism can be used to perform some emergency + * operations on a GPU before exiting the process. + * + * Note: callback is automatically unregistered before + * being invoked. + */ +void nvgpu_bug_register_cb(struct nvgpu_bug_cb *cb); + +/** + * @brief Unregister a callback for BUG() + * + * @param cb [in] Pointer to callback structure + * + * Remove a callback from the list of callbacks to be + * invoked on BUG(). + */ +void nvgpu_bug_unregister_cb(struct nvgpu_bug_cb *cb); + #endif /* NVGPU_POSIX_BUG_H */ diff --git a/drivers/gpu/nvgpu/os/posix/bug.c b/drivers/gpu/nvgpu/os/posix/bug.c index 015177a7e..61d490d26 100644 --- a/drivers/gpu/nvgpu/os/posix/bug.c +++ b/drivers/gpu/nvgpu/os/posix/bug.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,6 +21,8 @@ */ #include +#include +#include #include #include #include @@ -75,11 +77,48 @@ void dump_stack(void) nvgpu_posix_dump_stack(2); } +struct nvgpu_bug_desc { + bool in_use; + pthread_once_t once; + struct nvgpu_spinlock lock; + struct nvgpu_list_node head; +}; + +struct nvgpu_bug_desc bug = { + .once = PTHREAD_ONCE_INIT +}; + +static void nvgpu_bug_init(void) +{ + nvgpu_err(NULL, "doing init for bug cb"); + nvgpu_spinlock_init(&bug.lock); + nvgpu_init_list_node(&bug.head); + bug.in_use = true; +} + +void nvgpu_bug_register_cb(struct nvgpu_bug_cb *cb) +{ + (void) pthread_once(&bug.once, nvgpu_bug_init); + nvgpu_spinlock_acquire(&bug.lock); + nvgpu_list_add_tail(&cb->node, &bug.head); + nvgpu_spinlock_release(&bug.lock); +} + +void nvgpu_bug_unregister_cb(struct nvgpu_bug_cb *cb) +{ + (void) pthread_once(&bug.once, nvgpu_bug_init); + nvgpu_spinlock_acquire(&bug.lock); + nvgpu_list_del(&cb->node); + nvgpu_spinlock_release(&bug.lock); +} + /* * Ahhh! A bug! */ void nvgpu_posix_bug(const char *fmt, ...) { + struct nvgpu_bug_cb *cb; + #ifdef __NVGPU_UNIT_TEST__ if (expect_bug) { nvgpu_info(NULL, "Expected BUG detected!"); @@ -94,6 +133,35 @@ void nvgpu_posix_bug(const char *fmt, ...) */ nvgpu_err(NULL, "BUG detected!"); dump_stack(); + + if (!bug.in_use) { + goto done; + } + + nvgpu_spinlock_acquire(&bug.lock); + while (!nvgpu_list_empty(&bug.head)) { + /* + * Always process first entry, in -unlikely- where a + * callback would unregister another one. + */ + cb = nvgpu_list_first_entry(&bug.head, + nvgpu_bug_cb, node); + /* Remove callback from list */ + nvgpu_list_del(&cb->node); + /* + * Release spinlock before invoking callback. + * This allows callback to register/unregister other + * callbacks (unlikely). + * This allows using a longjmp in a callback + * for unit testing. + */ + nvgpu_spinlock_release(&bug.lock); + cb->cb(cb->arg); + nvgpu_spinlock_acquire(&bug.lock); + } + nvgpu_spinlock_release(&bug.lock); + +done: (void) raise(SIGSEGV); pthread_exit(NULL); }