gpu: nvgpu: Read sm error ioctl support for tsg

Add READ_SM_ERROR IOCTL support to TSG level. Moved the struct to save the sm_error details from gr to tsg as the sm_error support is context based, not global. Also corrected MISRA 21.1 error in header file. nvgpu_dbg_gpu_ioctl_write_single_sm_error_state and nvgpu_dbg_gpu_ioctl_read_single_sm_error_state functions are modified to use the tsg struct nvgpu_tsg_sm_error_state. Bug 200412642 Change-Id: I9e334b059078a4bb0e360b945444cc4bf1cc56ec Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1794856 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2018-08-07 23:09:30 -07:00
parent 3bd47da095
commit bfe65407bd
15 changed files with 387 additions and 258 deletions
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -35,6 +35,7 @@

 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
+#include "gk20a/tsg_gk20a.h"
 #include "gk20a/regops_gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
 #include "os_linux.h"
@@ -271,20 +272,23 @@ static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(
 	u32 sm_id;
 	struct channel_gk20a *ch;
 	struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
-	struct nvgpu_gr_sm_error_state sm_error_state;
+	struct nvgpu_tsg_sm_error_state sm_error_state;
 	int err = 0;

 	/* Not currently supported in the virtual case */
-	if (g->is_virtual)
+	if (g->is_virtual) {
 		return -ENOSYS;
+	}

 	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-	if (!ch)
+	if (ch == NULL) {
 		return -EINVAL;
+	}

 	sm_id = args->sm_id;
-	if (sm_id >= gr->no_of_sm)
+	if (sm_id >= gr->no_of_sm) {
 		return -EINVAL;
+	}

 	nvgpu_speculation_barrier();

@@ -300,13 +304,15 @@ static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(
 				args->sm_error_state_record_mem,
 			  read_size);
 		nvgpu_mutex_release(&g->dbg_sessions_lock);
-		if (err)
+		if (err != 0) {
 			return -ENOMEM;
+		}
 	}

 	err = gk20a_busy(g);
-	if (err)
+	if (err != 0) {
 		return err;
+	}

 	sm_error_state.hww_global_esr =
 		sm_error_state_record.hww_global_esr;
@@ -335,18 +341,36 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
 {
 	struct gk20a *g = dbg_s->g;
 	struct gr_gk20a *gr = &g->gr;
-	struct nvgpu_gr_sm_error_state *sm_error_state;
+	struct nvgpu_tsg_sm_error_state *sm_error_state;
 	struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
+	struct channel_gk20a *ch;
+	struct tsg_gk20a *tsg;
 	u32 sm_id;
 	int err = 0;

-	sm_id = args->sm_id;
-	if (sm_id >= gr->no_of_sm)
+	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (ch == NULL) {
 		return -EINVAL;
+	}
+
+	tsg = tsg_gk20a_from_ch(ch);
+	if (tsg == NULL) {
+		nvgpu_err(g, "no valid tsg from ch");
+		return -EINVAL;
+	}
+
+	sm_id = args->sm_id;
+	if (sm_id >= gr->no_of_sm) {
+		return -EINVAL;
+	}
+
+	if (tsg->sm_error_states == NULL) {
+		return -EINVAL;
+	}

 	nvgpu_speculation_barrier();

-	sm_error_state = gr->sm_error_states + sm_id;
+	sm_error_state = tsg->sm_error_states + sm_id;
 	sm_error_state_record.hww_global_esr =
 		sm_error_state->hww_global_esr;
 	sm_error_state_record.hww_warp_esr =
@@ -370,7 +394,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
 				   &sm_error_state_record,
 				   write_size);
 		nvgpu_mutex_release(&g->dbg_sessions_lock);
-		if (err) {
+		if (err != 0) {
 			nvgpu_err(g, "copy_to_user failed!");
 			return err;
 		}
@@ -1500,8 +1524,9 @@ static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
 	int err = 0;

 	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-	if (!ch)
+	if (ch == NULL) {
 		return -EINVAL;
+	}

 	sm_id = args->sm_id;
 	if (sm_id >= gr->no_of_sm)
@@ -1510,8 +1535,9 @@ static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
 	nvgpu_speculation_barrier();

 	err = gk20a_busy(g);
-	if (err)
+	if (err != 0) {
 		return err;
+	}

 	err = gr_gk20a_elpg_protected_call(g,
 			g->ops.gr.clear_sm_error_state(g, ch, sm_id));