gpu: nvgpu: Add Ctrl API to read SM error state

Expose IOCTL to Ctrl node to read Single SM error
under NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE

bug 200412642
JIRA NVGPU-700

Change-Id: I3cbcf4d7f23a53dbd2350b38a5e259559d5fd3af
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1728931
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Vinod G
2018-05-23 17:22:03 -07:00
committed by Tejal Kudav
parent 40cefb666f
commit d84e822128
2 changed files with 91 additions and 1 deletions

View File

@@ -864,6 +864,38 @@ struct nvgpu_gpu_set_deterministic_opts_args {
__u64 channels; /* in */
};
/*
* This struct helps to report the SM error state of a single SM.
* This acts upon the currently resident GR context.
* Global Error status register
* Warp Error status register
* Warp Error status register PC
* Global Error status register Report Mask
* Warp Error status register Report Mask
*/
struct nvgpu_gpu_sm_error_state_record {
__u32 global_esr;
__u32 warp_esr;
__u64 warp_esr_pc;
__u32 global_esr_report_mask;
__u32 warp_esr_report_mask;
};
/*
* This struct helps to read the SM error state.
*/
struct nvgpu_gpu_read_single_sm_error_state_args {
/* Valid SM ID */
__u32 sm_id;
__u32 reserved;
/*
* This is pointer to the struct nvgpu_gpu_sm_error_state_record
*/
__u64 record_mem;
/* size of the record size to read */
__u64 record_size;
};
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -949,8 +981,11 @@ struct nvgpu_gpu_set_deterministic_opts_args {
#define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \
struct nvgpu_gpu_set_deterministic_opts_args)
#define NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 41, \
struct nvgpu_gpu_read_single_sm_error_state_args)
#define NVGPU_GPU_IOCTL_LAST \
_IOC_NR(NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS)
_IOC_NR(NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE)
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)