gpu: nvgpu: Add Ctrl API to read SM error state

Expose IOCTL to Ctrl node to read Single SM error
under NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE

bug 200412642
JIRA NVGPU-700

Change-Id: I3cbcf4d7f23a53dbd2350b38a5e259559d5fd3af
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1728931
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Vinod G
2018-05-23 17:22:03 -07:00
committed by Tejal Kudav
parent 40cefb666f
commit d84e822128
2 changed files with 91 additions and 1 deletions

View File

@@ -1575,6 +1575,56 @@ out:
return err;
}
static int nvgpu_gpu_read_single_sm_error_state(struct gk20a *g,
struct nvgpu_gpu_read_single_sm_error_state_args *args)
{
struct gr_gk20a *gr = &g->gr;
struct nvgpu_gr_sm_error_state *sm_error_state;
struct nvgpu_gpu_sm_error_state_record sm_error_state_record;
u32 sm_id;
int err = 0;
sm_id = args->sm_id;
if (sm_id >= gr->no_of_sm)
return -EINVAL;
nvgpu_speculation_barrier();
sm_error_state = gr->sm_error_states + sm_id;
sm_error_state_record.global_esr =
sm_error_state->hww_global_esr;
sm_error_state_record.warp_esr =
sm_error_state->hww_warp_esr;
sm_error_state_record.warp_esr_pc =
sm_error_state->hww_warp_esr_pc;
sm_error_state_record.global_esr_report_mask =
sm_error_state->hww_global_esr_report_mask;
sm_error_state_record.warp_esr_report_mask =
sm_error_state->hww_warp_esr_report_mask;
if (args->record_size > 0) {
size_t write_size = sizeof(*sm_error_state);
if (write_size > args->record_size)
write_size = args->record_size;
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
err = copy_to_user((void __user *)(uintptr_t)
args->record_mem,
&sm_error_state_record,
write_size);
nvgpu_mutex_release(&g->dbg_sessions_lock);
if (err) {
nvgpu_err(g, "copy_to_user failed!");
return err;
}
args->record_size = write_size;
}
return 0;
}
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct gk20a_ctrl_priv *priv = filp->private_data;
@@ -1887,6 +1937,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
break;
case NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
err = nvgpu_gpu_read_single_sm_error_state(g,
(struct nvgpu_gpu_read_single_sm_error_state_args *)buf);
break;
default:
nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
err = -ENOTTY;