From 8ed243164674b3f839dfc2c75ffd9a1f12e98a66 Mon Sep 17 00:00:00 2001 From: Jinesh Parakh Date: Tue, 24 May 2022 13:16:54 +0530 Subject: [PATCH] gpu: nvgpu: add interface to read error state for all SMs This patch defines the IOCTL NVGPU_TSG_IOCTL_READ_ALL_SM_ERROR_STATES to read the error states for all the SMs. The corresponding input parameter is num_sm (number of SM error states to be read) and output is a list of error states for all the SMs. Bug 200468220 Signed-off-by: Jinesh Parakh Change-Id: Iaf926b72d900a6c8f978fa034c20d76e482eb13f Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2717313 Reviewed-by: Sagar Kamble Reviewed-by: Sami Kiminki Reviewed-by: Sandarbh Jain Reviewed-by: Vijayakumar Subbu GVS: Gerrit_Virtual_Submit --- include/uapi/linux/nvgpu.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 581cb8c46..56ebbd9af 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -74,13 +74,33 @@ struct nvgpu_tsg_read_single_sm_error_state_args { __u32 sm_id; __u32 reserved; /* - * This is pointer to the struct nvgpu_gpu_sm_error_state_record + * This is pointer to the struct nvgpu_tsg_sm_error_state_record */ __u64 record_mem; /* size of the record size to read */ __u64 record_size; }; +/* + * This struct helps to read SM error states for all the SMs + */ +struct nvgpu_tsg_read_all_sm_error_state_args { + /* + * in: Number of SM error states to be returned. Must be equal to the number of SMs. + */ + __u32 num_sm; + /* + * Padding to make KMD UAPI compatible with both 32-bit and 64-bit callers. + */ + __u32 reserved; + /* + * out: This points to an array of nvgpu_tsg_read_single_sm_error_state_args. + */ + __u64 buffer_mem; + /* in: size of the buffer to store error states */ + __u64 buffer_size; +}; + /* * This struct is used to read and configure l2 max evict_last * setting. @@ -145,11 +165,14 @@ struct nvgpu_tsg_set_l2_sector_promotion_args { #define NVGPU_TSG_IOCTL_BIND_SCHEDULING_DOMAIN \ _IOW(NVGPU_TSG_IOCTL_MAGIC, 16, \ struct nvgpu_tsg_bind_scheduling_domain_args) +#define NVGPU_TSG_IOCTL_READ_ALL_SM_ERROR_STATES \ + _IOWR(NVGPU_TSG_IOCTL_MAGIC, 17, \ + struct nvgpu_tsg_read_all_sm_error_state_args) #define NVGPU_TSG_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_tsg_bind_scheduling_domain_args) #define NVGPU_TSG_IOCTL_LAST \ - _IOC_NR(NVGPU_TSG_IOCTL_BIND_SCHEDULING_DOMAIN) + _IOC_NR(NVGPU_TSG_IOCTL_READ_ALL_SM_ERROR_STATES) /* * /dev/nvhost-dbg-gpu device