gpu: nvgpu: add interface to read error state for all SMs

This patch defines the IOCTL NVGPU_TSG_IOCTL_READ_ALL_SM_ERROR_STATES
to read the error states for all the SMs.
The corresponding input parameter is num_sm (number of SM error states to be read) and output is a list of error states for all the SMs.

Bug 200468220

Signed-off-by: Jinesh Parakh <jparakh@nvidia.com>
Change-Id: Iaf926b72d900a6c8f978fa034c20d76e482eb13f
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2717313
Reviewed-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-by: Sandarbh Jain <sanjain@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Jinesh Parakh
2022-05-24 13:16:54 +05:30
committed by mobile promotions
parent 646a48ea5a
commit 8ed2431646

View File

@@ -74,13 +74,33 @@ struct nvgpu_tsg_read_single_sm_error_state_args {
__u32 sm_id;
__u32 reserved;
/*
* This is pointer to the struct nvgpu_gpu_sm_error_state_record
* This is pointer to the struct nvgpu_tsg_sm_error_state_record
*/
__u64 record_mem;
/* size of the record size to read */
__u64 record_size;
};
/*
* This struct helps to read SM error states for all the SMs
*/
struct nvgpu_tsg_read_all_sm_error_state_args {
/*
* in: Number of SM error states to be returned. Must be equal to the number of SMs.
*/
__u32 num_sm;
/*
* Padding to make KMD UAPI compatible with both 32-bit and 64-bit callers.
*/
__u32 reserved;
/*
* out: This points to an array of nvgpu_tsg_read_single_sm_error_state_args.
*/
__u64 buffer_mem;
/* in: size of the buffer to store error states */
__u64 buffer_size;
};
/*
* This struct is used to read and configure l2 max evict_last
* setting.
@@ -145,11 +165,14 @@ struct nvgpu_tsg_set_l2_sector_promotion_args {
#define NVGPU_TSG_IOCTL_BIND_SCHEDULING_DOMAIN \
_IOW(NVGPU_TSG_IOCTL_MAGIC, 16, \
struct nvgpu_tsg_bind_scheduling_domain_args)
#define NVGPU_TSG_IOCTL_READ_ALL_SM_ERROR_STATES \
_IOWR(NVGPU_TSG_IOCTL_MAGIC, 17, \
struct nvgpu_tsg_read_all_sm_error_state_args)
#define NVGPU_TSG_IOCTL_MAX_ARG_SIZE \
sizeof(struct nvgpu_tsg_bind_scheduling_domain_args)
#define NVGPU_TSG_IOCTL_LAST \
_IOC_NR(NVGPU_TSG_IOCTL_BIND_SCHEDULING_DOMAIN)
_IOC_NR(NVGPU_TSG_IOCTL_READ_ALL_SM_ERROR_STATES)
/*
* /dev/nvhost-dbg-gpu device