mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: support storing/reading single SM error state
Add support to store error state of single SM before
preprocessing SM exception
Error state is stored as :
struct nvgpu_dbg_gpu_sm_error_state_record {
u32 hww_global_esr;
u32 hww_warp_esr;
u64 hww_warp_esr_pc;
u32 hww_global_esr_report_mask;
u32 hww_warp_esr_report_mask;
}
Note that we can safely append new fields to above
structure in the future if required
Also, add IOCTL NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE
to support reading SM's error state by user space
Bug 200156699
Change-Id: I9a62cb01e8a35c720b52d5d202986347706c7308
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1120329
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Terje Bergstrom
parent
9cf7e23f57
commit
04e45bc943
@@ -676,8 +676,35 @@ struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args {
|
||||
#define NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE \
|
||||
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 13, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args)
|
||||
|
||||
|
||||
struct nvgpu_dbg_gpu_sm_error_state_record {
|
||||
__u32 hww_global_esr;
|
||||
__u32 hww_warp_esr;
|
||||
__u64 hww_warp_esr_pc;
|
||||
__u32 hww_global_esr_report_mask;
|
||||
__u32 hww_warp_esr_report_mask;
|
||||
|
||||
/*
|
||||
* Notes
|
||||
* - This struct can be safely appended with new fields. However, always
|
||||
* keep the structure size multiple of 8 and make sure that the binary
|
||||
* layout does not change between 32-bit and 64-bit architectures.
|
||||
*/
|
||||
};
|
||||
|
||||
struct nvgpu_dbg_gpu_read_single_sm_error_state_args {
|
||||
__u32 sm_id;
|
||||
__u32 padding;
|
||||
__u64 sm_error_state_record_mem;
|
||||
__u64 sm_error_state_record_size;
|
||||
};
|
||||
|
||||
#define NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE \
|
||||
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 14, struct nvgpu_dbg_gpu_read_single_sm_error_state_args)
|
||||
|
||||
|
||||
#define NVGPU_DBG_GPU_IOCTL_LAST \
|
||||
_IOC_NR(NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE)
|
||||
_IOC_NR(NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE)
|
||||
|
||||
#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \
|
||||
sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)
|
||||
|
||||
Reference in New Issue
Block a user