From 152d7c9edd09a1faa94b6cb9aaa1a4eff904b3bc Mon Sep 17 00:00:00 2001
From: dt <dt@nvidia.com>
Date: Mon, 9 Aug 2021 18:20:53 +0000
Subject: [PATCH] gpu: nvgpu: Fix for pes_tpc_mask programming

After CONFIG_UBSAN kernel compilation flag to know any shifting
cause overflow or not enablement ,this is identified.
The register "gr_fe_tpc_fs_r(gpc_index)" is read only after
Volta. The gops where we are computing the index is not needed.

Bug 200727116

Change-Id: Ib2306103389ba9df77fd59d012ec70e775104989
Signed-off-by: dt <dt@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2573296
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/gr/fs_state.c        | 62 +------------------
 drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c | 62 ++++++++++++++++++-
 drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h |  2 +
 drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h |  1 -
 .../nvgpu/hal/gr/init/gr_init_gv11b_fusa.c    |  5 --
 drivers/gpu/nvgpu/hal/init/hal_ga100.c        |  3 +-
 drivers/gpu/nvgpu/hal/init/hal_ga10b.c        |  3 +-
 drivers/gpu/nvgpu/hal/init/hal_gm20b.c        |  1 +
 drivers/gpu/nvgpu/hal/init/hal_gp10b.c        |  1 +
 drivers/gpu/nvgpu/hal/init/hal_gv11b.c        |  3 +-
 drivers/gpu/nvgpu/hal/init/hal_tu104.c        |  3 +-
 drivers/gpu/nvgpu/include/nvgpu/gops/gr.h     |  2 +
 12 files changed, 78 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/gr/fs_state.c b/drivers/gpu/nvgpu/common/gr/fs_state.c
index d9ac20984..847a59957 100644
--- a/drivers/gpu/nvgpu/common/gr/fs_state.c
+++ b/drivers/gpu/nvgpu/common/gr/fs_state.c
@@ -49,64 +49,6 @@ static int gr_load_sm_id_config(struct gk20a *g, struct nvgpu_gr_config *config)
 	return err;
 }
 
-static void gr_load_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config)
-{
-	u32 pes_tpc_mask = 0;
-	u32 gpc, pes;
-	u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
-						     GPU_LIT_NUM_TPC_PER_GPC);
-#ifdef CONFIG_NVGPU_NON_FUSA
-	u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(config);
-	u32 fuse_tpc_mask;
-	u32 val;
-	u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
-	u32 gpc_phys_id;
-#endif
-
-	/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
-	for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(config); gpc++) {
-		for (pes = 0;
-		     pes < nvgpu_gr_config_get_pe_count_per_gpc(config);
-		     pes++) {
-			pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
-						config, gpc, pes) <<
-				nvgpu_safe_mult_u32(num_tpc_per_gpc, gpc);
-		}
-	}
-
-	nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask);
-
-#ifdef CONFIG_NVGPU_NON_FUSA
-	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
-		/*
-		 * Fuse registers must be queried with physical gpc-id and not
-		 * the logical ones. For tu104 and before chips logical gpc-id
-		 * is same as physical gpc-id for non-floorswept config but for
-		 * chips after tu104 it may not be true.
-		 */
-		gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
-				cur_gr_instance, 0U);
-		fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
-		if ((g->tpc_fs_mask_user != 0U) &&
-					(g->tpc_fs_mask_user != fuse_tpc_mask)) {
-			if (fuse_tpc_mask == nvgpu_safe_sub_u32(BIT32(max_tpc_count),
-									U32(1))) {
-				val = g->tpc_fs_mask_user;
-				val &= nvgpu_safe_sub_u32(BIT32(max_tpc_count), U32(1));
-				/*
-				 * skip tpc to disable the other tpc cause channel
-				 * timeout
-				 */
-				val = nvgpu_safe_sub_u32(BIT32(hweight32(val)), U32(1));
-				pes_tpc_mask = val;
-			}
-		}
-	}
-#endif
-
-	g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask);
-}
-
 int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config)
 {
 	u32 tpc_index, gpc_index;
@@ -184,7 +126,9 @@ int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config)
 
 	g->ops.gr.init.cwd_gpcs_tpcs_num(g, gpc_cnt, tpc_cnt);
 
-	gr_load_tpc_mask(g, config);
+	if (g->ops.gr.init.gr_load_tpc_mask != NULL) {
+		g->ops.gr.init.gr_load_tpc_mask(g, config);
+	}
 
 	err = gr_load_sm_id_config(g, config);
 	if (err != 0) {
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c
index 00cc2038d..28cd2643d 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -36,6 +36,7 @@
 
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/gr/config.h>
+#include <nvgpu/gr/gr_instances.h>
 
 #include "gr_init_gm20b.h"
 
@@ -388,6 +389,65 @@ void gm20b_gr_init_rop_mapping(struct gk20a *g,
 }
 #endif
 
+void gm20b_gr_init_load_tpc_mask(struct gk20a *g,
+			struct nvgpu_gr_config *config)
+{
+	u32 pes_tpc_mask = 0;
+	u32 gpc, pes;
+	u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
+				GPU_LIT_NUM_TPC_PER_GPC);
+#ifdef CONFIG_NVGPU_NON_FUSA
+	u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(config);
+	u32 fuse_tpc_mask;
+	u32 val;
+	u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
+	u32 gpc_phys_id;
+#endif
+	/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
+	for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(config); gpc++) {
+		for (pes = 0;
+		     pes < nvgpu_gr_config_get_pe_count_per_gpc(config);
+		     pes++) {
+			pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
+					config, gpc, pes) <<
+					nvgpu_safe_mult_u32(num_tpc_per_gpc, gpc);
+		}
+	}
+
+	nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask);
+
+#ifdef CONFIG_NVGPU_NON_FUSA
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		/*
+		 * Fuse registers must be queried with physical gpc-id and not
+		 * the logical ones. For tu104 and before chips logical gpc-id
+		 * is same as physical gpc-id for non-floorswept config but for
+		 * chips after tu104 it may not be true.
+		 */
+		gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
+				cur_gr_instance, 0U);
+		fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
+		if ((g->tpc_fs_mask_user != 0U) &&
+					(g->tpc_fs_mask_user != fuse_tpc_mask)) {
+			if (fuse_tpc_mask == nvgpu_safe_sub_u32(BIT32(max_tpc_count),
+									U32(1))) {
+				val = g->tpc_fs_mask_user;
+				val &= nvgpu_safe_sub_u32(BIT32(max_tpc_count), U32(1));
+				/*
+				 * skip tpc to disable the other tpc cause channel
+				 * timeout
+				 */
+				val = nvgpu_safe_sub_u32(BIT32(hweight32(val)), U32(1));
+				pes_tpc_mask = val;
+			}
+		}
+	}
+#endif
+
+	g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask);
+
+}
+
 void gm20b_gr_init_fs_state(struct gk20a *g)
 {
 	nvgpu_log_fn(g, " ");
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
index 171cf7d32..fc69b5a6b 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
@@ -49,6 +49,8 @@ void gm20b_gr_init_pd_skip_table_gpc(struct gk20a *g,
 			      struct nvgpu_gr_config *gr_config);
 void gm20b_gr_init_cwd_gpcs_tpcs_num(struct gk20a *g,
 				     u32 gpc_count, u32 tpc_count);
+void gm20b_gr_init_load_tpc_mask(struct gk20a *g,
+				struct nvgpu_gr_config *gr_config);
 int gm20b_gr_init_wait_idle(struct gk20a *g);
 int gm20b_gr_init_wait_fe_idle(struct gk20a *g);
 int gm20b_gr_init_fe_pwr_mode_force_on(struct gk20a *g, bool force_on);
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h
index e04ce1a2b..eebd234cd 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h
@@ -53,7 +53,6 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
 				struct nvgpu_gr_config *gr_config,
 				struct nvgpu_gr_ctx *gr_ctx,
 				bool patch);
-void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask);
 void gv11b_gr_init_fs_state(struct gk20a *g);
 
 void gv11b_gr_init_commit_global_timeslice(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c
index abd11ae8d..61cdebfda 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c
@@ -529,11 +529,6 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
 	return 0;
 }
 
-void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask)
-{
-	nvgpu_writel(g, gr_fe_tpc_fs_r(gpc_index), pes_tpc_mask);
-}
-
 void gv11b_gr_init_fs_state(struct gk20a *g)
 {
 	u32 data;
diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga100.c b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
index 30d999b09..a4df921a3 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
@@ -597,11 +597,12 @@ static const struct gops_gr_init ga100_ops_gr_init = {
 	.sm_id_config_early = nvgpu_gr_init_sm_id_early_config,
 	.sm_id_config = gv11b_gr_init_sm_id_config,
 	.sm_id_numbering = ga10b_gr_init_sm_id_numbering,
-	.tpc_mask = gv11b_gr_init_tpc_mask,
+	.tpc_mask = NULL,
 	.fs_state = ga10b_gr_init_fs_state,
 	.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
 	.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
 	.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
+	.gr_load_tpc_mask = NULL,
 	.wait_empty = ga10b_gr_init_wait_empty,
 	.wait_idle = ga10b_gr_init_wait_idle,
 	.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
index fc4d59c02..72d4993f5 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
@@ -575,11 +575,12 @@ static const struct gops_gr_init ga10b_ops_gr_init = {
 	.sm_id_config_early = nvgpu_gr_init_sm_id_early_config,
 	.sm_id_config = gv11b_gr_init_sm_id_config,
 	.sm_id_numbering = ga10b_gr_init_sm_id_numbering,
-	.tpc_mask = gv11b_gr_init_tpc_mask,
+	.tpc_mask = NULL,
 	.fs_state = ga10b_gr_init_fs_state,
 	.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
 	.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
 	.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
+	.gr_load_tpc_mask = NULL,
 	.wait_empty = ga10b_gr_init_wait_empty,
 	.wait_idle = ga10b_gr_init_wait_idle,
 	.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
index 6e23061a6..3bfaa0540 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -327,6 +327,7 @@ static const struct gops_gr_init gm20b_ops_gr_init = {
 	.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
 	.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
 	.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
+	.gr_load_tpc_mask = gm20b_gr_init_load_tpc_mask,
 	.wait_empty = gm20b_gr_init_wait_idle,
 	.wait_idle = gm20b_gr_init_wait_idle,
 	.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
index febb6ea46..80047a516 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -394,6 +394,7 @@ static const struct gops_gr_init gp10b_ops_gr_init = {
 	.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
 	.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
 	.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
+	.gr_load_tpc_mask = gm20b_gr_init_load_tpc_mask,
 	.wait_empty = gp10b_gr_init_wait_empty,
 	.wait_idle = gm20b_gr_init_wait_idle,
 	.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
index 80a610e4b..360a2e4cb 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -478,11 +478,12 @@ static const struct gops_gr_init gv11b_ops_gr_init = {
 	.get_sm_id_size = gp10b_gr_init_get_sm_id_size,
 	.sm_id_config = gv11b_gr_init_sm_id_config,
 	.sm_id_numbering = gv11b_gr_init_sm_id_numbering,
-	.tpc_mask = gv11b_gr_init_tpc_mask,
+	.tpc_mask = NULL,
 	.fs_state = gv11b_gr_init_fs_state,
 	.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
 	.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
 	.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
+	.gr_load_tpc_mask = NULL,
 	.wait_empty = gp10b_gr_init_wait_empty,
 	.wait_idle = gm20b_gr_init_wait_idle,
 	.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
index acfe96a5b..c82db8045 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -523,11 +523,12 @@ static const struct gops_gr_init tu104_ops_gr_init = {
 	.get_sm_id_size = gp10b_gr_init_get_sm_id_size,
 	.sm_id_config = gv11b_gr_init_sm_id_config,
 	.sm_id_numbering = gv11b_gr_init_sm_id_numbering,
-	.tpc_mask = gv11b_gr_init_tpc_mask,
+	.tpc_mask = NULL,
 	.fs_state = gv11b_gr_init_fs_state,
 	.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
 	.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
 	.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
+	.gr_load_tpc_mask = NULL,
 	.wait_empty = gp10b_gr_init_wait_empty,
 	.wait_idle = gm20b_gr_init_wait_idle,
 	.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
index c59d77167..af5ea5244 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
@@ -752,6 +752,8 @@ struct gops_gr_init {
 				  struct nvgpu_gr_config *gr_config);
 	void (*cwd_gpcs_tpcs_num)(struct gk20a *g,
 				  u32 gpc_count, u32 tpc_count);
+	void (*gr_load_tpc_mask)(struct gk20a *g,
+			struct nvgpu_gr_config *gr_config);
 	int (*wait_empty)(struct gk20a *g);
 	void (*override_context_reset)(struct gk20a *g);
 	void (*fe_go_idle_timeout)(struct gk20a *g, bool enable);