gpu: nvgpu: Read sm error ioctl support for tsg

Add READ_SM_ERROR IOCTL support to TSG level. Moved the struct to save the sm_error details from gr to tsg as the sm_error support is context based, not global. Also corrected MISRA 21.1 error in header file. nvgpu_dbg_gpu_ioctl_write_single_sm_error_state and nvgpu_dbg_gpu_ioctl_read_single_sm_error_state functions are modified to use the tsg struct nvgpu_tsg_sm_error_state. Bug 200412642 Change-Id: I9e334b059078a4bb0e360b945444cc4bf1cc56ec Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1794856 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2018-08-07 23:09:30 -07:00
parent 3bd47da095
commit bfe65407bd
15 changed files with 387 additions and 258 deletions
--- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
@@ -536,6 +536,57 @@ static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g,
 	return 0;
 }

+static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g,
+		struct tsg_gk20a *tsg,
+		struct nvgpu_tsg_read_single_sm_error_state_args *args)
+{
+	struct gr_gk20a *gr = &g->gr;
+	struct nvgpu_tsg_sm_error_state *sm_error_state;
+	struct nvgpu_tsg_sm_error_state_record sm_error_state_record;
+	u32 sm_id;
+	int err = 0;
+
+	sm_id = args->sm_id;
+	if (sm_id >= gr->no_of_sm)
+		return -EINVAL;
+
+	nvgpu_speculation_barrier();
+
+	sm_error_state = tsg->sm_error_states + sm_id;
+	sm_error_state_record.global_esr =
+		sm_error_state->hww_global_esr;
+	sm_error_state_record.warp_esr =
+		sm_error_state->hww_warp_esr;
+	sm_error_state_record.warp_esr_pc =
+		sm_error_state->hww_warp_esr_pc;
+	sm_error_state_record.global_esr_report_mask =
+		sm_error_state->hww_global_esr_report_mask;
+	sm_error_state_record.warp_esr_report_mask =
+		sm_error_state->hww_warp_esr_report_mask;
+
+	if (args->record_size > 0) {
+		size_t write_size = sizeof(*sm_error_state);
+
+		if (write_size > args->record_size)
+			write_size = args->record_size;
+
+		nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+		err = copy_to_user((void __user *)(uintptr_t)
+						args->record_mem,
+				   &sm_error_state_record,
+				   write_size);
+		nvgpu_mutex_release(&g->dbg_sessions_lock);
+		if (err) {
+			nvgpu_err(g, "copy_to_user failed!");
+			return err;
+		}
+
+		args->record_size = write_size;
+	}
+
+	return 0;
+}
+
 long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
 			     unsigned long arg)
 {
@@ -670,6 +721,13 @@ long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
 		break;
 		}

+	case NVGPU_TSG_IOCTL_READ_SINGLE_SM_ERROR_STATE:
+		{
+		err = gk20a_tsg_ioctl_read_single_sm_error_state(g, tsg,
+			(struct nvgpu_tsg_read_single_sm_error_state_args *)buf);
+		break;
+		}
+
 	default:
 		nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
 			   cmd);