mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 02:22:34 +03:00
gpu: nvgpu: add BUG() callbacks
Add support for registering callbacks that will be called on BUG(). Jira NVGPU-4512 Change-Id: I35c9b6c17db3b9fa5d098918223083f0b4aaace4 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2266391 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
Alex Waterman
parent
569f34470e
commit
1f3f34b906
@@ -28,6 +28,7 @@
|
||||
#include <nvgpu/posix/bug.h>
|
||||
#endif
|
||||
#include <nvgpu/cov_whitelist.h>
|
||||
#include <nvgpu/list.h>
|
||||
|
||||
/*
|
||||
* Define an assert macro that code within nvgpu can use.
|
||||
@@ -92,4 +93,25 @@ struct gk20a;
|
||||
nvgpu_do_assert(); \
|
||||
} while (false)
|
||||
|
||||
|
||||
struct nvgpu_bug_cb
|
||||
{
|
||||
void (*cb)(void *arg);
|
||||
void *arg;
|
||||
struct nvgpu_list_node node;
|
||||
};
|
||||
|
||||
static inline struct nvgpu_bug_cb *
|
||||
nvgpu_bug_cb_from_node(struct nvgpu_list_node *node)
|
||||
{
|
||||
return (struct nvgpu_bug_cb *)
|
||||
((uintptr_t)node - offsetof(struct nvgpu_bug_cb, node));
|
||||
};
|
||||
|
||||
#ifdef __KERNEL__
|
||||
static inline void nvgpu_bug_exit(void) { }
|
||||
static inline void nvgpu_bug_register_cb(struct nvgpu_bug_cb *cb) { }
|
||||
static inline void nvgpu_bug_unregister_cb(struct nvgpu_bug_cb *cb) { }
|
||||
#endif
|
||||
|
||||
#endif /* NVGPU_BUG_H */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* GK20A Graphics
|
||||
*
|
||||
@@ -698,6 +698,7 @@ struct gk20a {
|
||||
struct nvgpu_cond sw_quiesce_cond;
|
||||
struct nvgpu_thread sw_quiesce_thread;
|
||||
#endif
|
||||
struct nvgpu_list_node bug_node;
|
||||
|
||||
/** Controls which messages are logged */
|
||||
u64 log_mask;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -134,4 +134,33 @@ void bug_handler_cancel(void);
|
||||
bug_result; \
|
||||
})
|
||||
#endif
|
||||
|
||||
struct nvgpu_bug_cb;
|
||||
|
||||
/**
|
||||
* @brief Register callback to be invoked on BUG()
|
||||
*
|
||||
* @param cb [in] Pointer to callback structure
|
||||
*
|
||||
* Register a callback to be invoked on BUG().
|
||||
* The nvgpu_bug_cb structure contains a function pointer
|
||||
* and an argument to be passed to this function.
|
||||
* This mechanism can be used to perform some emergency
|
||||
* operations on a GPU before exiting the process.
|
||||
*
|
||||
* Note: callback is automatically unregistered before
|
||||
* being invoked.
|
||||
*/
|
||||
void nvgpu_bug_register_cb(struct nvgpu_bug_cb *cb);
|
||||
|
||||
/**
|
||||
* @brief Unregister a callback for BUG()
|
||||
*
|
||||
* @param cb [in] Pointer to callback structure
|
||||
*
|
||||
* Remove a callback from the list of callbacks to be
|
||||
* invoked on BUG().
|
||||
*/
|
||||
void nvgpu_bug_unregister_cb(struct nvgpu_bug_cb *cb);
|
||||
|
||||
#endif /* NVGPU_POSIX_BUG_H */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -21,6 +21,8 @@
|
||||
*/
|
||||
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/lock.h>
|
||||
#include <nvgpu/list.h>
|
||||
#include <nvgpu/posix/bug.h>
|
||||
#include <pthread.h>
|
||||
#include <stdbool.h>
|
||||
@@ -75,11 +77,48 @@ void dump_stack(void)
|
||||
nvgpu_posix_dump_stack(2);
|
||||
}
|
||||
|
||||
struct nvgpu_bug_desc {
|
||||
bool in_use;
|
||||
pthread_once_t once;
|
||||
struct nvgpu_spinlock lock;
|
||||
struct nvgpu_list_node head;
|
||||
};
|
||||
|
||||
struct nvgpu_bug_desc bug = {
|
||||
.once = PTHREAD_ONCE_INIT
|
||||
};
|
||||
|
||||
static void nvgpu_bug_init(void)
|
||||
{
|
||||
nvgpu_err(NULL, "doing init for bug cb");
|
||||
nvgpu_spinlock_init(&bug.lock);
|
||||
nvgpu_init_list_node(&bug.head);
|
||||
bug.in_use = true;
|
||||
}
|
||||
|
||||
void nvgpu_bug_register_cb(struct nvgpu_bug_cb *cb)
|
||||
{
|
||||
(void) pthread_once(&bug.once, nvgpu_bug_init);
|
||||
nvgpu_spinlock_acquire(&bug.lock);
|
||||
nvgpu_list_add_tail(&cb->node, &bug.head);
|
||||
nvgpu_spinlock_release(&bug.lock);
|
||||
}
|
||||
|
||||
void nvgpu_bug_unregister_cb(struct nvgpu_bug_cb *cb)
|
||||
{
|
||||
(void) pthread_once(&bug.once, nvgpu_bug_init);
|
||||
nvgpu_spinlock_acquire(&bug.lock);
|
||||
nvgpu_list_del(&cb->node);
|
||||
nvgpu_spinlock_release(&bug.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ahhh! A bug!
|
||||
*/
|
||||
void nvgpu_posix_bug(const char *fmt, ...)
|
||||
{
|
||||
struct nvgpu_bug_cb *cb;
|
||||
|
||||
#ifdef __NVGPU_UNIT_TEST__
|
||||
if (expect_bug) {
|
||||
nvgpu_info(NULL, "Expected BUG detected!");
|
||||
@@ -94,6 +133,35 @@ void nvgpu_posix_bug(const char *fmt, ...)
|
||||
*/
|
||||
nvgpu_err(NULL, "BUG detected!");
|
||||
dump_stack();
|
||||
|
||||
if (!bug.in_use) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
nvgpu_spinlock_acquire(&bug.lock);
|
||||
while (!nvgpu_list_empty(&bug.head)) {
|
||||
/*
|
||||
* Always process first entry, in -unlikely- where a
|
||||
* callback would unregister another one.
|
||||
*/
|
||||
cb = nvgpu_list_first_entry(&bug.head,
|
||||
nvgpu_bug_cb, node);
|
||||
/* Remove callback from list */
|
||||
nvgpu_list_del(&cb->node);
|
||||
/*
|
||||
* Release spinlock before invoking callback.
|
||||
* This allows callback to register/unregister other
|
||||
* callbacks (unlikely).
|
||||
* This allows using a longjmp in a callback
|
||||
* for unit testing.
|
||||
*/
|
||||
nvgpu_spinlock_release(&bug.lock);
|
||||
cb->cb(cb->arg);
|
||||
nvgpu_spinlock_acquire(&bug.lock);
|
||||
}
|
||||
nvgpu_spinlock_release(&bug.lock);
|
||||
|
||||
done:
|
||||
(void) raise(SIGSEGV);
|
||||
pthread_exit(NULL);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user