gpu: nvgpu: add NVGPU_DEBUGGER flag for SM exception handling

Trap handling and SM preprocessing is not needed in safety build i.e. when NVGPU_DEBUGGER is false Add NVGPU_DEBUGGER flag for all unsafe processing. In safety build we only report the SM exceptions and return error so that recovery is triggered Also add flag for gr_intr_post_bpt_events() since event handling is not needed in safety build Jira NVGPU-3506 Change-Id: I660930fdb185b82c0adb612decbfd3d014ce2524 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2127754 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2019-05-29 15:02:24 +05:30
parent 0908547ad2
commit e40994c884
1 changed files with 22 additions and 14 deletions
--- a/drivers/gpu/nvgpu/common/gr/gr_intr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_intr.c
@@ -132,7 +132,7 @@ static int gr_intr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
 	return ret;
 }

-#ifdef NVGPU_FEATURE_CHANNEL_TSG_CONTROL
+#if defined(NVGPU_FEATURE_CHANNEL_TSG_CONTROL) && defined(NVGPU_DEBUGGER)
 static void gr_intr_post_bpt_events(struct gk20a *g, struct nvgpu_tsg *tsg,
 				    u32 global_esr)
 {
@@ -347,34 +347,29 @@ int nvgpu_gr_intr_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 		u32 *hww_global_esr)
 {
 	int ret = 0;
-	bool do_warp_sync = false, early_exit = false, ignore_debugger = false;
-	bool disable_sm_exceptions = true;
 	u32 offset = nvgpu_safe_add_u32(nvgpu_gr_gpc_offset(g, gpc),
 					  nvgpu_gr_tpc_offset(g, tpc));
-	bool sm_debugger_attached;
 	u32 global_esr, warp_esr, global_mask;
 	u64 hww_warp_esr_pc = 0;
+#ifdef NVGPU_DEBUGGER
+	bool sm_debugger_attached;
+	bool do_warp_sync = false, early_exit = false, ignore_debugger = false;
+	bool disable_sm_exceptions = true;
+#endif

 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");

-	sm_debugger_attached = g->ops.gr.sm_debugger_attached(g);
-
 	global_esr = g->ops.gr.intr.get_sm_hww_global_esr(g, gpc, tpc, sm);
 	*hww_global_esr = global_esr;
+
 	warp_esr = g->ops.gr.intr.get_sm_hww_warp_esr(g, gpc, tpc, sm);
 	global_mask = g->ops.gr.intr.get_sm_no_lock_down_hww_global_esr_mask(g);

-	if (!sm_debugger_attached) {
-		nvgpu_err(g, "sm hww global 0x%08x warp 0x%08x",
-			  global_esr, warp_esr);
-		return -EFAULT;
-	}
-
 	nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
 		  "sm hww global 0x%08x warp 0x%08x", global_esr, warp_esr);

 	/*
-	 * Check and report any fatal wrap errors.
+	 * Check and report any fatal warp errors.
 	 */
 	if ((global_esr & ~global_mask) != 0U) {
 		if (g->ops.gr.intr.get_sm_hww_warp_esr_pc != NULL) {
@@ -384,9 +379,18 @@ int nvgpu_gr_intr_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 		gr_intr_report_sm_exception(g, gpc, tpc, sm, warp_esr,
 				hww_warp_esr_pc);
 	}
+
 	nvgpu_pg_elpg_protected_call(g,
 		g->ops.gr.intr.record_sm_error_state(g, gpc, tpc, sm, fault_ch));

+#ifdef NVGPU_DEBUGGER
+	sm_debugger_attached = g->ops.gr.sm_debugger_attached(g);
+	if (!sm_debugger_attached) {
+		nvgpu_err(g, "sm hww global 0x%08x warp 0x%08x",
+			  global_esr, warp_esr);
+		return -EFAULT;
+	}
+
 	if (g->ops.gr.pre_process_sm_exception != NULL) {
 		ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc, sm,
 				global_esr, warp_esr,
@@ -445,6 +449,10 @@ int nvgpu_gr_intr_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 	} else {
 		*post_event = true;
 	}
+#else
+	/* Return error so that recovery is triggered */
+	ret = -EFAULT;
+#endif

 	return ret;
 }
@@ -844,7 +852,7 @@ int nvgpu_gr_intr_stall_isr(struct gk20a *g)
 	/* Enable fifo access */
 	g->ops.gr.init.fifo_access(g, true);

-#ifdef NVGPU_FEATURE_CHANNEL_TSG_CONTROL
+#if defined(NVGPU_FEATURE_CHANNEL_TSG_CONTROL) && defined(NVGPU_DEBUGGER)
 	/* Posting of BPT events should be the last thing in this function */
 	if ((global_esr != 0U) && (tsg != NULL) && (need_reset == false)) {
 		gr_intr_post_bpt_events(g, tsg, global_esr);