gpu: nvgpu: add sm lock_down gr ops

Add lock_down_sm and wait_for_sm_lock_down gr ops Required to support multiple SM and t19x SM register address changes JIRA GPUT19X-75 Change-Id: I529babde51d9b2143fe3740a4f67c582b7eb404b Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master/r/1514042 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
2025-12-25 02:52:51 +03:00 · 2017-06-22 11:53:04 -07:00
parent 4728761b6c
commit 0852c9f1ab
4 changed files with 37 additions and 32 deletions
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -296,6 +296,10 @@ struct gpu_ops {
 		u32 (*get_sm_hww_global_esr)(struct gk20a *g,
 						u32 gpc, u32 tpc, u32 sm);
 		u32 (*get_sm_no_lock_down_hww_global_esr_mask)(struct gk20a *g);
+		int  (*lock_down_sm)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
+				u32 global_esr_mask, bool check_errors);
+		int  (*wait_for_sm_lock_down)(struct gk20a *g, u32 gpc, u32 tpc,
+				u32 sm, u32 global_esr_mask, bool check_errors);
 		void (*get_esr_sm_sel)(struct gk20a *g, u32 gpc, u32 tpc,
 					 u32 *esr_sm_sel);
 		int (*handle_sm_exception)(struct gk20a *g,
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5467,16 +5467,14 @@ unlock:
 }

 int gk20a_gr_lock_down_sm(struct gk20a *g,
-				 u32 gpc, u32 tpc, u32 global_esr_mask,
-				 bool check_errors)
+			 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
+			 bool check_errors)
 {
-	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
-	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
+	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
 	u32 dbgr_control0;

 	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-			"GPC%d TPC%d: locking down SM", gpc, tpc);
+			"GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm);

 	/* assert stop trigger */
 	dbgr_control0 =
@@ -5485,7 +5483,7 @@ int gk20a_gr_lock_down_sm(struct gk20a *g,
 	gk20a_writel(g,
 		gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);

-	return gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask,
+	return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask,
 			check_errors);
 }

@@ -5598,7 +5596,8 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 	}

 	if (do_warp_sync) {
-		ret = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, true);
+		ret = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
+				 global_mask, true);
 		if (ret) {
 			nvgpu_err(g, "sm did not lock down!");
 			return ret;
@@ -7906,25 +7905,21 @@ void gk20a_init_gr(struct gk20a *g)
 	nvgpu_cond_init(&g->gr.init_wq);
 }

-int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
+int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 		u32 global_esr_mask, bool check_errors)
 {
 	bool locked_down;
 	bool no_error_pending;
 	u32 delay = GR_IDLE_CHECK_DEFAULT;
 	bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g);
-	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
-	u32 offset =
-		gpc_stride * gpc + tpc_in_gpc_stride * tpc;
+	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
 	u32 dbgr_status0 = 0, dbgr_control0 = 0;
 	u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
 	struct nvgpu_timeout timeout;
 	u32 warp_esr;
-	u32 sm = 0;

 	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-		"GPC%d TPC%d: locking down SM", gpc, tpc);
+		"GPC%d TPC%d SM%d: locking down SM", gpc, tpc, sm);

 	nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
 			   NVGPU_TIMER_CPU_TIMER);
@@ -7949,7 +7944,8 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,

 		if (locked_down || no_error_pending) {
 			gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
-				  "GPC%d TPC%d: locked down SM", gpc, tpc);
+				  "GPC%d TPC%d SM%d: locked down SM",
+					gpc, tpc, sm);
 			return 0;
 		}

@@ -7959,7 +7955,7 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
 		     (g->ops.mm.mmu_fault_pending(g))) {
 			nvgpu_err(g,
 				"GPC%d TPC%d: mmu fault pending,"
-				" sm will never lock down!", gpc, tpc);
+				" SM%d will never lock down!", gpc, tpc, sm);
 			return -EFAULT;
 		}

@@ -8017,7 +8013,7 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
 	gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,
 			dbgr_control0);

-	err = gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc,
+	err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm,
 			global_esr_mask, check_errors);
 	if (err) {
 		nvgpu_err(g,
@@ -8030,9 +8026,10 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
 		u32 global_esr_mask, bool check_errors)
 {
 	struct gr_gk20a *gr = &g->gr;
-	u32 gpc, tpc;
+	u32 gpc, tpc, sm;
 	int err;
 	u32 dbgr_control0;
+	u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);

 	/* if an SM debugger isn't attached, skip suspend */
 	if (!g->ops.gr.sm_debugger_attached(g)) {
@@ -8055,13 +8052,14 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,

 	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
 		for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) {
-			err =
-			 gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc,
+			for (sm = 0; sm < sm_per_tpc; sm++) {
+				err = g->ops.gr.wait_for_sm_lock_down(g,
+					gpc, tpc, sm,
 					global_esr_mask, check_errors);
-			if (err) {
-				nvgpu_err(g,
-					"SuspendAllSms failed");
-				return;
+				if (err) {
+					nvgpu_err(g, "SuspendAllSms failed");
+					return;
+				}
 			}
 		}
 	}
@@ -8371,7 +8369,7 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)
 {
 	int err = 0;
 	struct gr_gk20a *gr = &g->gr;
-	u32 gpc, tpc, sm_id;
+	u32 gpc, tpc, sm, sm_id;
 	u32 global_mask;

 	/* Wait for the SMs to reach full stop. This condition is:
@@ -8386,9 +8384,10 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)

 		gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
 		tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
+		sm = g->gr.sm_to_cluster[sm_id].sm_index;

-		err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
-
+		err = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
+				global_mask, false);
 		if (err) {
 			nvgpu_err(g, "sm did not lock down!");
 			return err;
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -622,9 +622,6 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
 void gk20a_gr_suspend_all_sms(struct gk20a *g,
 		u32 global_esr_mask, bool check_errors);
 u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
-int gk20a_gr_lock_down_sm(struct gk20a *g,
-				 u32 gpc, u32 tpc, u32 global_esr_mask,
-				 bool check_errors);
 int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
 	struct channel_gk20a *ch, u64 sms, bool enable);
 bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
@@ -681,7 +678,10 @@ static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)

 int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
 		struct gr_gk20a_isr_data *isr_data);
-int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
+int gk20a_gr_lock_down_sm(struct gk20a *g,
+			 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
+			 bool check_errors);
+int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 		u32 global_esr_mask, bool check_errors);
 void gk20a_gr_clear_sm_hww(struct gk20a *g,
 		u32 gpc, u32 tpc, u32 global_esr);
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1638,4 +1638,6 @@ void gm20b_init_gr(struct gpu_ops *gops)
 	gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr;
 	gops->gr.get_sm_no_lock_down_hww_global_esr_mask =
 			 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask;
+	gops->gr.lock_down_sm = gk20a_gr_lock_down_sm;
+	gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down;
 }