gpu: nvgpu: support storing/reading single SM error state

Add support to store error state of single SM before
preprocessing SM exception

Error state is stored as :
struct nvgpu_dbg_gpu_sm_error_state_record {
u32 hww_global_esr;
u32 hww_warp_esr;
u64 hww_warp_esr_pc;
u32 hww_global_esr_report_mask;
u32 hww_warp_esr_report_mask;
}

Note that we can safely append new fields to above
structure in the future if required

Also, add IOCTL NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE
to support reading SM's error state by user space

Bug 200156699

Change-Id: I9a62cb01e8a35c720b52d5d202986347706c7308
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1120329
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Deepak Nibade
2016-03-09 14:51:43 +05:30
committed by Terje Bergstrom
parent 9cf7e23f57
commit 04e45bc943
8 changed files with 162 additions and 1 deletions

View File

@@ -525,6 +525,45 @@ static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(
return 0;
}
static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args)
{
struct gk20a *g = get_gk20a(dbg_s->dev);
struct gr_gk20a *gr = &g->gr;
struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state;
u32 sm_id;
int err = 0;
sm_id = args->sm_id;
if (sm_id >= gr->no_of_sm)
return -EINVAL;
sm_error_state = gr->sm_error_states + sm_id;
if (args->sm_error_state_record_size > 0) {
size_t write_size = sizeof(*sm_error_state);
if (write_size > args->sm_error_state_record_size)
write_size = args->sm_error_state_record_size;
mutex_lock(&g->dbg_sessions_lock);
err = copy_to_user((void __user *)(uintptr_t)
args->sm_error_state_record_mem,
sm_error_state,
write_size);
mutex_unlock(&g->dbg_sessions_lock);
if (err) {
gk20a_err(dev_from_gk20a(g), "copy_to_user failed!\n");
return err;
}
args->sm_error_state_record_size = write_size;
}
return 0;
}
long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
@@ -622,6 +661,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
(struct nvgpu_dbg_gpu_timeout_args *)buf);
break;
case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s,
(struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf);
break;
default:
gk20a_err(dev_from_gk20a(g),
"unrecognized dbg gpu ioctl cmd: 0x%x",