From 2adcb51a45c372b5e525a9d4adb829fe3092395a Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 8 Apr 2019 19:30:12 +0530
Subject: [PATCH] gpu: nvgpu: add hal.gr.init hal to detect SM arch

Add new hal g->ops.gr.init.detect_sm_arch() in hal.gr.init unit to get
SM arch information. Remove g->ops.gr.detect_sm_arch().

Move corresponding functions to hal.gr.init unit

Remove unused function declaration for gr_gv11b_init_sw_veid_bundle()

Jira NVGPU-2961

Change-Id: Idfd5ce19c06978dc31cbcec2cd01cb2912eb3cf9
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2097534
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/init/nvgpu_init.c          |  2 +-
 .../gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c    |  2 +-
 .../gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c    |  2 +-
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c                  | 12 ------------
 drivers/gpu/nvgpu/gm20b/gr_gm20b.h                  |  1 -
 drivers/gpu/nvgpu/gm20b/hal_gm20b.c                 |  2 +-
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c                 |  2 +-
 drivers/gpu/nvgpu/gv100/hal_gv100.c                 |  2 +-
 drivers/gpu/nvgpu/gv11b/gr_gv11b.c                  | 12 ------------
 drivers/gpu/nvgpu/gv11b/gr_gv11b.h                  |  2 --
 drivers/gpu/nvgpu/gv11b/hal_gv11b.c                 |  2 +-
 drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c       | 13 +++++++++++++
 drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h       |  1 +
 drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c       | 12 ++++++++++++
 drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h       |  1 +
 drivers/gpu/nvgpu/include/nvgpu/gk20a.h             |  2 +-
 drivers/gpu/nvgpu/tu104/hal_tu104.c                 |  2 +-
 17 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
index f9deb6f98..ad5fb6ef8 100644
--- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c
+++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
@@ -547,7 +547,7 @@ void gk20a_init_gpu_characteristics(struct gk20a *g)
 		}
 	}
 
-	g->ops.gr.detect_sm_arch(g);
+	g->ops.gr.init.detect_sm_arch(g);
 
 	if (g->ops.gr.init_cyclestats != NULL) {
 		g->ops.gr.init_cyclestats(g);
diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
index 8a77e32b6..22b188773 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -134,7 +134,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.set_gpc_tpc_mask = NULL,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
-		.detect_sm_arch = vgpu_gr_detect_sm_arch,
 		.dump_gr_regs = NULL,
 		.update_pc_sampling = vgpu_gr_update_pc_sampling,
 		.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
@@ -357,6 +356,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 			.commit_ctxsw_spill = gp10b_gr_init_commit_ctxsw_spill,
 			.commit_cbes_reserve =
 				gp10b_gr_init_commit_cbes_reserve,
+			.detect_sm_arch = vgpu_gr_detect_sm_arch,
 		},
 	},
 	.class = {
diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
index 29322bec0..8d3fe8117 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -158,7 +158,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.set_gpc_tpc_mask = NULL,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
-		.detect_sm_arch = vgpu_gr_detect_sm_arch,
 		.dump_gr_regs = NULL,
 		.update_pc_sampling = vgpu_gr_update_pc_sampling,
 		.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
@@ -409,6 +408,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 				gv11b_gr_init_commit_cbes_reserve,
 			.gfxp_wfi_timeout =
 				gv11b_gr_init_commit_gfxp_wfi_timeout,
+			.detect_sm_arch = vgpu_gr_detect_sm_arch,
 		},
 		.intr = {
 			.handle_gcc_exception =
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 02fa2fb94..bd327e499 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -341,18 +341,6 @@ u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr)
 	return 0;
 }
 
-void gr_gm20b_detect_sm_arch(struct gk20a *g)
-{
-	u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
-
-	g->params.sm_arch_spa_version =
-		gr_gpc0_tpc0_sm_arch_spa_version_v(v);
-	g->params.sm_arch_sm_version =
-		gr_gpc0_tpc0_sm_arch_sm_version_v(v);
-	g->params.sm_arch_warp_count =
-		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
-}
-
 int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
 			   struct gk20a_debug_output *o)
 {
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
index 5ec0a0342..b22498f78 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
@@ -61,7 +61,6 @@ void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
 	struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
 bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr);
 u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr);
-void gr_gm20b_detect_sm_arch(struct gk20a *g);
 int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
 			   struct gk20a_debug_output *o);
 int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index cf7e2be4b..fc7b7b3ad 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -264,7 +264,6 @@ static const struct gpu_ops gm20b_ops = {
 		.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
-		.detect_sm_arch = gr_gm20b_detect_sm_arch,
 		.dump_gr_regs = gr_gm20b_dump_gr_status_regs,
 		.update_pc_sampling = gr_gm20b_update_pc_sampling,
 		.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -469,6 +468,7 @@ static const struct gpu_ops gm20b_ops = {
 				gm20b_gr_init_load_sw_bundle_init,
 			.get_gfxp_rtv_cb_size = NULL,
 			.get_patch_slots = gm20b_gr_init_get_patch_slots,
+			.detect_sm_arch = gm20b_gr_init_detect_sm_arch,
 		},
 		.intr = {
 			.set_shader_exceptions =
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 1f35736e0..a4947f91b 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -298,7 +298,6 @@ static const struct gpu_ops gp10b_ops = {
 		.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
-		.detect_sm_arch = gr_gm20b_detect_sm_arch,
 		.dump_gr_regs = gr_gp10b_dump_gr_status_regs,
 		.update_pc_sampling = gr_gm20b_update_pc_sampling,
 		.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -564,6 +563,7 @@ static const struct gpu_ops gp10b_ops = {
 			.commit_cbes_reserve =
 				gp10b_gr_init_commit_cbes_reserve,
 			.get_patch_slots = gm20b_gr_init_get_patch_slots,
+			.detect_sm_arch = gm20b_gr_init_detect_sm_arch,
 		},
 		.intr = {
 			.set_shader_exceptions =
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 82c4cb62a..062a022e8 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -401,7 +401,6 @@ static const struct gpu_ops gv100_ops = {
 		.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
-		.detect_sm_arch = gr_gv11b_detect_sm_arch,
 		.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
 		.update_pc_sampling = gr_gm20b_update_pc_sampling,
 		.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -696,6 +695,7 @@ static const struct gpu_ops gv100_ops = {
 			.get_max_subctx_count =
 				gv11b_gr_init_get_max_subctx_count,
 			.get_patch_slots = gv11b_gr_init_get_patch_slots,
+			.detect_sm_arch = gv11b_gr_init_detect_sm_arch,
 		},
 		.intr = {
 			.set_shader_exceptions =
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 9683b7084..5b57243c4 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1619,18 +1619,6 @@ int gr_gv11b_handle_fecs_error(struct gk20a *g,
 	return ret;
 }
 
-void gr_gv11b_detect_sm_arch(struct gk20a *g)
-{
-	u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
-
-	g->params.sm_arch_spa_version =
-		gr_gpc0_tpc0_sm_arch_spa_version_v(v);
-	g->params.sm_arch_sm_version =
-		gr_gpc0_tpc0_sm_arch_sm_version_v(v);
-	g->params.sm_arch_warp_count =
-		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
-}
-
 void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
 				u32 *esr_sm_sel)
 {
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index a7230a9be..5c6870ce7 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -89,8 +89,6 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
 int gr_gv11b_handle_fecs_error(struct gk20a *g,
 				struct channel_gk20a *__ch,
 				struct nvgpu_gr_isr_data *isr_data);
-int gr_gv11b_init_sw_veid_bundle(struct gk20a *g);
-void gr_gv11b_detect_sm_arch(struct gk20a *g);
 void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
 				u32 *esr_sm_sel);
 int gv11b_gr_sm_trigger_suspend(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 1bccb5e9d..e5ed9bc32 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -366,7 +366,6 @@ static const struct gpu_ops gv11b_ops = {
 		.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
-		.detect_sm_arch = gr_gv11b_detect_sm_arch,
 		.powergate_tpc = gr_gv11b_powergate_tpc,
 		.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
 		.update_pc_sampling = gr_gm20b_update_pc_sampling,
@@ -672,6 +671,7 @@ static const struct gpu_ops gv11b_ops = {
 			.get_max_subctx_count =
 				gv11b_gr_init_get_max_subctx_count,
 			.get_patch_slots = gv11b_gr_init_get_patch_slots,
+			.detect_sm_arch = gv11b_gr_init_detect_sm_arch,
 		},
 		.intr = {
 			.set_shader_exceptions =
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c
index 12d7b1da4..050d71be3 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c
@@ -1097,3 +1097,16 @@ u32 gm20b_gr_init_get_patch_slots(struct gk20a *g,
 {
 	return PATCH_CTX_SLOTS_PER_PAGE;
 }
+
+void gm20b_gr_init_detect_sm_arch(struct gk20a *g)
+{
+	u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
+
+	g->params.sm_arch_spa_version =
+		gr_gpc0_tpc0_sm_arch_spa_version_v(v);
+	g->params.sm_arch_sm_version =
+		gr_gpc0_tpc0_sm_arch_sm_version_v(v);
+	g->params.sm_arch_warp_count =
+		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
+}
+
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
index bab9c8246..93f7de274 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
@@ -97,5 +97,6 @@ void gm20b_gr_init_commit_global_cb_manager(struct gk20a *g,
 
 u32 gm20b_gr_init_get_patch_slots(struct gk20a *g,
 	struct nvgpu_gr_config *config);
+void gm20b_gr_init_detect_sm_arch(struct gk20a *g);
 
 #endif /* NVGPU_GR_INIT_GM20B_H */
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c
index c2129f595..5ac643011 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c
@@ -948,3 +948,15 @@ u32 gv11b_gr_init_get_patch_slots(struct gk20a *g,
 	return size;
 }
 
+void gv11b_gr_init_detect_sm_arch(struct gk20a *g)
+{
+	u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
+
+	g->params.sm_arch_spa_version =
+		gr_gpc0_tpc0_sm_arch_spa_version_v(v);
+	g->params.sm_arch_sm_version =
+		gr_gpc0_tpc0_sm_arch_sm_version_v(v);
+	g->params.sm_arch_warp_count =
+		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
+}
+
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h
index 76ba73e98..2980e4078 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h
@@ -81,5 +81,6 @@ void gv11b_gr_init_commit_gfxp_wfi_timeout(struct gk20a *g,
 u32 gv11b_gr_init_get_max_subctx_count(void);
 u32 gv11b_gr_init_get_patch_slots(struct gk20a *g,
 	struct nvgpu_gr_config *config);
+void gv11b_gr_init_detect_sm_arch(struct gk20a *g);
 
 #endif /* NVGPU_GR_INIT_GV11B_H */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 3c33f1c07..b159566df 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -304,7 +304,6 @@ struct gpu_ops {
 				u32 *gpc_num, u32 *tpc_num);
 		u32 (*get_tpc_num)(struct gk20a *g, u32 addr);
 		u32 (*get_egpc_base)(struct gk20a *g);
-		void (*detect_sm_arch)(struct gk20a *g);
 		void (*powergate_tpc)(struct gk20a *g);
 		int (*init_ctxsw_preemption_mode)(struct gk20a *g,
 			  struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
@@ -789,6 +788,7 @@ struct gpu_ops {
 			u32 (*get_max_subctx_count)(void);
 			u32 (*get_patch_slots)(struct gk20a *g,
 				struct nvgpu_gr_config *config);
+			void (*detect_sm_arch)(struct gk20a *g);
 		} init;
 
 		struct {
diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c
index 7016fe311..9e66ad652 100644
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -419,7 +419,6 @@ static const struct gpu_ops tu104_ops = {
 		.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
-		.detect_sm_arch = gr_gv11b_detect_sm_arch,
 		.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
 		.update_pc_sampling = gr_gm20b_update_pc_sampling,
 		.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -729,6 +728,7 @@ static const struct gpu_ops tu104_ops = {
 			.get_max_subctx_count =
 				gv11b_gr_init_get_max_subctx_count,
 			.get_patch_slots = gv11b_gr_init_get_patch_slots,
+			.detect_sm_arch = gv11b_gr_init_detect_sm_arch,
 		},
 		.intr = {
 			.set_shader_exceptions =