mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Add Ctrl API to read SM error state
Expose IOCTL to Ctrl node to read Single SM error under NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE bug 200412642 JIRA NVGPU-700 Change-Id: I3cbcf4d7f23a53dbd2350b38a5e259559d5fd3af Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1728931 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
@@ -1575,6 +1575,56 @@ out:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int nvgpu_gpu_read_single_sm_error_state(struct gk20a *g,
|
||||||
|
struct nvgpu_gpu_read_single_sm_error_state_args *args)
|
||||||
|
{
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
struct nvgpu_gr_sm_error_state *sm_error_state;
|
||||||
|
struct nvgpu_gpu_sm_error_state_record sm_error_state_record;
|
||||||
|
u32 sm_id;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
sm_id = args->sm_id;
|
||||||
|
if (sm_id >= gr->no_of_sm)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
|
|
||||||
|
sm_error_state = gr->sm_error_states + sm_id;
|
||||||
|
sm_error_state_record.global_esr =
|
||||||
|
sm_error_state->hww_global_esr;
|
||||||
|
sm_error_state_record.warp_esr =
|
||||||
|
sm_error_state->hww_warp_esr;
|
||||||
|
sm_error_state_record.warp_esr_pc =
|
||||||
|
sm_error_state->hww_warp_esr_pc;
|
||||||
|
sm_error_state_record.global_esr_report_mask =
|
||||||
|
sm_error_state->hww_global_esr_report_mask;
|
||||||
|
sm_error_state_record.warp_esr_report_mask =
|
||||||
|
sm_error_state->hww_warp_esr_report_mask;
|
||||||
|
|
||||||
|
if (args->record_size > 0) {
|
||||||
|
size_t write_size = sizeof(*sm_error_state);
|
||||||
|
|
||||||
|
if (write_size > args->record_size)
|
||||||
|
write_size = args->record_size;
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||||
|
err = copy_to_user((void __user *)(uintptr_t)
|
||||||
|
args->record_mem,
|
||||||
|
&sm_error_state_record,
|
||||||
|
write_size);
|
||||||
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_err(g, "copy_to_user failed!");
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
args->record_size = write_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||||
{
|
{
|
||||||
struct gk20a_ctrl_priv *priv = filp->private_data;
|
struct gk20a_ctrl_priv *priv = filp->private_data;
|
||||||
@@ -1887,6 +1937,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
|
|||||||
(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
|
(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
|
||||||
|
err = nvgpu_gpu_read_single_sm_error_state(g,
|
||||||
|
(struct nvgpu_gpu_read_single_sm_error_state_args *)buf);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
|
nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
|
||||||
err = -ENOTTY;
|
err = -ENOTTY;
|
||||||
|
|||||||
@@ -864,6 +864,38 @@ struct nvgpu_gpu_set_deterministic_opts_args {
|
|||||||
__u64 channels; /* in */
|
__u64 channels; /* in */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This struct helps to report the SM error state of a single SM.
|
||||||
|
* This acts upon the currently resident GR context.
|
||||||
|
* Global Error status register
|
||||||
|
* Warp Error status register
|
||||||
|
* Warp Error status register PC
|
||||||
|
* Global Error status register Report Mask
|
||||||
|
* Warp Error status register Report Mask
|
||||||
|
*/
|
||||||
|
struct nvgpu_gpu_sm_error_state_record {
|
||||||
|
__u32 global_esr;
|
||||||
|
__u32 warp_esr;
|
||||||
|
__u64 warp_esr_pc;
|
||||||
|
__u32 global_esr_report_mask;
|
||||||
|
__u32 warp_esr_report_mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This struct helps to read the SM error state.
|
||||||
|
*/
|
||||||
|
struct nvgpu_gpu_read_single_sm_error_state_args {
|
||||||
|
/* Valid SM ID */
|
||||||
|
__u32 sm_id;
|
||||||
|
__u32 reserved;
|
||||||
|
/*
|
||||||
|
* This is pointer to the struct nvgpu_gpu_sm_error_state_record
|
||||||
|
*/
|
||||||
|
__u64 record_mem;
|
||||||
|
/* size of the record size to read */
|
||||||
|
__u64 record_size;
|
||||||
|
};
|
||||||
|
|
||||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
||||||
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
||||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
||||||
@@ -949,8 +981,11 @@ struct nvgpu_gpu_set_deterministic_opts_args {
|
|||||||
#define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \
|
#define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \
|
||||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \
|
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \
|
||||||
struct nvgpu_gpu_set_deterministic_opts_args)
|
struct nvgpu_gpu_set_deterministic_opts_args)
|
||||||
|
#define NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE \
|
||||||
|
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 41, \
|
||||||
|
struct nvgpu_gpu_read_single_sm_error_state_args)
|
||||||
#define NVGPU_GPU_IOCTL_LAST \
|
#define NVGPU_GPU_IOCTL_LAST \
|
||||||
_IOC_NR(NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS)
|
_IOC_NR(NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE)
|
||||||
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
||||||
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
|
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user