gpu: nvgpu: move gk20a_gr_gpc_offset function

move gk20a_gr_gpc_offset as nvgpu_gr_gpc_offset and gk20a_gr_tpc_offset as nvgpu_gr_tpc_offset function to gr.c from gr_gk20a.c JIRA NVGPU-1885 Change-Id: Ib05d8870e1c77de8b34e46c04dcd7251b666f897 Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2084388 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2019-03-28 17:20:33 -07:00
parent 897c7263f1
commit a2a676669f
7 changed files with 60 additions and 59 deletions
--- a/drivers/gpu/nvgpu/common/gr/gr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr.c
@@ -79,6 +79,23 @@ static void gr_load_tpc_mask(struct gk20a *g)
 	g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask);
 }

+u32 nvgpu_gr_gpc_offset(struct gk20a *g, u32 gpc)
+{
+	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+	u32 gpc_offset = gpc_stride * gpc;
+
+	return gpc_offset;
+}
+
+u32 nvgpu_gr_tpc_offset(struct gk20a *g, u32 tpc)
+{
+	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
+					GPU_LIT_TPC_IN_GPC_STRIDE);
+	u32 tpc_offset = tpc_in_gpc_stride * tpc;
+
+	return tpc_offset;
+}
+
 int nvgpu_gr_suspend(struct gk20a *g)
 {
 	int ret = 0;
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -550,23 +550,6 @@ int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
 	return ret;
 }

-u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc)
-{
-	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
-	u32 gpc_offset = gpc_stride * gpc;
-
-	return gpc_offset;
-}
-
-u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc)
-{
-	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
-					GPU_LIT_TPC_IN_GPC_STRIDE);
-	u32 tpc_offset = tpc_in_gpc_stride * tpc;
-
-	return tpc_offset;
-}
-
 int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
 			struct nvgpu_gr_ctx *gr_ctx, bool patch)
 {
@@ -2427,7 +2410,7 @@ int gk20a_gr_lock_down_sm(struct gk20a *g,
 			 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
 			 bool check_errors)
 {
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);
 	u32 dbgr_control0;

 	nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
@@ -2466,7 +2449,7 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 	int ret = 0;
 	bool do_warp_sync = false, early_exit = false, ignore_debugger = false;
 	bool disable_sm_exceptions = true;
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);
 	bool sm_debugger_attached;
 	u32 global_esr, warp_esr, global_mask;
 	u64 hww_warp_esr_pc = 0;
@@ -2581,7 +2564,7 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
 		u32 *hww_global_esr)
 {
 	int tmp_ret, ret = 0;
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);
 	u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r()
 			+ offset);
 	u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
@@ -2669,7 +2652,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
 		nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
 				"GPC%d exception pending", gpc);

-		gpc_offset = gk20a_gr_gpc_offset(g, gpc);
+		gpc_offset = nvgpu_gr_gpc_offset(g, gpc);

 		gpc_exception = gk20a_readl(g, gr_gpc0_gpccs_gpc_exception_r()
 				+ gpc_offset);
@@ -4621,7 +4604,7 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 	bool no_error_pending;
 	u32 delay = POLL_DELAY_MIN_US;
 	bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g);
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);
 	u32 dbgr_status0 = 0, dbgr_control0 = 0;
 	u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
 	struct nvgpu_timeout timeout;
@@ -4703,7 +4686,7 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
 {
 	int err;
 	u32 dbgr_control0;
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);

 	/* if an SM debugger isn't attached, skip suspend */
 	if (!g->ops.gr.sm_debugger_attached(g)) {
@@ -4794,7 +4777,7 @@ void gk20a_gr_resume_single_sm(struct gk20a *g,
 	* effect, before enabling the run trigger.
 	*/

-	offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);

 	/*De-assert stop trigger */
 	dbgr_control0 =
@@ -5159,7 +5142,7 @@ u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g)

 u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm)
 {
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);
 	u32 hww_warp_esr = gk20a_readl(g,
 			 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
 	return hww_warp_esr;
@@ -5167,7 +5150,7 @@ u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm)

 u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm)
 {
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);

 	u32 hww_global_esr = gk20a_readl(g,
 				 gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -424,8 +424,6 @@ u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);

 int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);

-u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc);
-u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc);
 void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
 				u32 *esr_sm_sel);
 void gk20a_gr_init_ovr_sm_dsm_perf(void);
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -820,7 +820,7 @@ int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
 void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 			u32 global_esr)
 {
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);

 	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
 			global_esr);
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -38,6 +38,7 @@
 #include <nvgpu/regops.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/gr.h>
 #include <nvgpu/gr/config.h>
 #include <nvgpu/engines.h>
 #include <nvgpu/engine_status.h>
@@ -1261,7 +1262,7 @@ clean_up:
 u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g,
 			u32 gpc, u32 tpc, u32 sm)
 {
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);
 	u32 hww_warp_esr = gk20a_readl(g,
 			 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);

--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1498,12 +1498,12 @@ static void gr_gv11b_dump_gr_sm_regs(struct gk20a *g,

 	sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
 	for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
-		gpc_offset = gk20a_gr_gpc_offset(g, gpc);
+		gpc_offset = nvgpu_gr_gpc_offset(g, gpc);

 		for (tpc = 0;
 		     tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc);
 		     tpc++) {
-			tpc_offset = gk20a_gr_tpc_offset(g, tpc);
+			tpc_offset = nvgpu_gr_tpc_offset(g, tpc);

 			for (sm = 0; sm < sm_per_tpc; sm++) {
 				offset = gpc_offset + tpc_offset +
@@ -1719,8 +1719,8 @@ static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g,
 	}

 	/* clear interrupt */
-	offset = gk20a_gr_gpc_offset(g, gpc) +
-			gk20a_gr_tpc_offset(g, tpc) +
+	offset = nvgpu_gr_gpc_offset(g, gpc) +
+			nvgpu_gr_tpc_offset(g, tpc) +
 			gv11b_gr_sm_offset(g, sm);
 	nvgpu_writel(g,
 		gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0);
@@ -1844,8 +1844,8 @@ static int gr_gv11b_handle_all_warp_esr_errors(struct gk20a *g,
 	}

 	/* clear interrupt */
-	offset = gk20a_gr_gpc_offset(g, gpc) +
-			gk20a_gr_tpc_offset(g, tpc) +
+	offset = nvgpu_gr_gpc_offset(g, gpc) +
+			nvgpu_gr_tpc_offset(g, tpc) +
 			gv11b_gr_sm_offset(g, sm);
 	nvgpu_writel(g,
 		gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0);
@@ -1908,8 +1908,8 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,

 	if (cilp_enabled && sm_debugger_attached) {
 		u32 global_mask = 0, dbgr_control0, global_esr_copy;
-		u32 offset = gk20a_gr_gpc_offset(g, gpc) +
-				gk20a_gr_tpc_offset(g, tpc) +
+		u32 offset = nvgpu_gr_gpc_offset(g, gpc) +
+				nvgpu_gr_tpc_offset(g, tpc) +
 				gv11b_gr_sm_offset(g, sm);

 		if ((global_esr &
@@ -2200,7 +2200,7 @@ void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
 				u32 *esr_sm_sel)
 {
 	u32 reg_val;
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);

 	reg_val = gk20a_readl(g, gr_gpc0_tpc0_sm_tpc_esr_sm_sel_r() + offset);
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
@@ -2256,8 +2256,8 @@ void gv11b_gr_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state)
 		tpc = sm_info->tpc_index;
 		sm = sm_info->sm_index;

-		offset = gk20a_gr_gpc_offset(g, gpc) +
-			 gk20a_gr_tpc_offset(g, tpc) +
+		offset = nvgpu_gr_gpc_offset(g, gpc) +
+			 nvgpu_gr_tpc_offset(g, tpc) +
 			 gv11b_gr_sm_offset(g, sm);

 		/* 64 bit read */
@@ -2341,8 +2341,8 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
 		}
 		sm = sm_info->sm_index;

-		reg_offset = gk20a_gr_gpc_offset(g, gpc) +
-				gk20a_gr_tpc_offset(g, tpc) +
+		reg_offset = nvgpu_gr_gpc_offset(g, gpc) +
+				nvgpu_gr_tpc_offset(g, tpc) +
 				gv11b_gr_sm_offset(g, sm);

 		ops[i].op = REGOP(WRITE_32);
@@ -2425,8 +2425,8 @@ int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);

 	sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
-	gpc_offset = gk20a_gr_gpc_offset(g, gpc);
-	gpc_tpc_offset = gpc_offset + gk20a_gr_tpc_offset(g, tpc);
+	gpc_offset = nvgpu_gr_gpc_offset(g, gpc);
+	gpc_tpc_offset = gpc_offset + nvgpu_gr_tpc_offset(g, tpc);

 	tpc_id = gk20a_readl(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset);
 	sm_id = tpc_id * sm_per_tpc + sm;
@@ -2509,8 +2509,8 @@ void gv11b_gr_suspend_single_sm(struct gk20a *g,
 {
 	int err;
 	u32 dbgr_control0;
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) +
-			gk20a_gr_tpc_offset(g, tpc) +
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) +
+			nvgpu_gr_tpc_offset(g, tpc) +
 			gv11b_gr_sm_offset(g, sm);

 	/* if an SM debugger isn't attached, skip suspend */
@@ -2604,7 +2604,7 @@ void gv11b_gr_resume_single_sm(struct gk20a *g,
 	* effect, before enabling the run trigger.
 	*/

-	offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) +
+	offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc) +
 			gv11b_gr_sm_offset(g, sm);

 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
@@ -2741,8 +2741,8 @@ int gv11b_gr_resume_from_pause(struct gk20a *g)
 u32 gv11b_gr_get_sm_hww_warp_esr(struct gk20a *g,
 			u32 gpc, u32 tpc, u32 sm)
 {
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) +
-			 gk20a_gr_tpc_offset(g, tpc) +
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) +
+			 nvgpu_gr_tpc_offset(g, tpc) +
 			 gv11b_gr_sm_offset(g, sm);

 	u32 hww_warp_esr = gk20a_readl(g,
@@ -2753,8 +2753,8 @@ u32 gv11b_gr_get_sm_hww_warp_esr(struct gk20a *g,
 u32 gv11b_gr_get_sm_hww_global_esr(struct gk20a *g,
 			u32 gpc, u32 tpc, u32 sm)
 {
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) +
-			 gk20a_gr_tpc_offset(g, tpc) +
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) +
+			 nvgpu_gr_tpc_offset(g, tpc) +
 			 gv11b_gr_sm_offset(g, sm);

 	u32 hww_global_esr = gk20a_readl(g,
@@ -2835,8 +2835,8 @@ int gv11b_gr_wait_for_sm_lock_down(struct gk20a *g,
 	u32 warp_esr, global_esr;
 	struct nvgpu_timeout timeout;
 	int err;
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) +
-			gk20a_gr_tpc_offset(g, tpc) +
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) +
+			nvgpu_gr_tpc_offset(g, tpc) +
 			gv11b_gr_sm_offset(g, sm);

 	nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
@@ -2924,7 +2924,7 @@ int gv11b_gr_lock_down_sm(struct gk20a *g,
 			 bool check_errors)
 {
 	u32 dbgr_control0;
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) +
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc) +
 			gv11b_gr_sm_offset(g, sm);

 	nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
@@ -2944,7 +2944,7 @@ int gv11b_gr_lock_down_sm(struct gk20a *g,
 void gv11b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 				u32 global_esr)
 {
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) +
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc) +
 			gv11b_gr_sm_offset(g, sm);

 	gk20a_writel(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset,
@@ -2965,7 +2965,7 @@ int gr_gv11b_handle_tpc_mpc_exception(struct gk20a *g,
 		u32 gpc, u32 tpc, bool *post_event)
 {
 	u32 esr;
-	u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+	u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);
 	u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r()
 			+ offset);

@@ -3811,8 +3811,8 @@ int gv11b_gr_clear_sm_error_state(struct gk20a *g,
 		}
 		sm = sm_info->sm_index;

-		offset = gk20a_gr_gpc_offset(g, gpc) +
-				gk20a_gr_tpc_offset(g, tpc) +
+		offset = nvgpu_gr_gpc_offset(g, gpc) +
+				nvgpu_gr_tpc_offset(g, tpc) +
 				gv11b_gr_sm_offset(g, sm);

 		val = gk20a_readl(g, gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset);
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h
@@ -28,6 +28,8 @@
 #define NVGPU_GR_IDLE_CHECK_DEFAULT_US		10U
 #define NVGPU_GR_IDLE_CHECK_MAX_US		200U

+u32 nvgpu_gr_gpc_offset(struct gk20a *g, u32 gpc);
+u32 nvgpu_gr_tpc_offset(struct gk20a *g, u32 tpc);
 int nvgpu_gr_suspend(struct gk20a *g);
 void nvgpu_gr_flush_channel_tlb(struct gk20a *g);
 u32 nvgpu_gr_get_idle_timeout(struct gk20a *g);