gpu: nvgpu: use nvgpu_gr_config in gr.config unit

Remove use of struct gk20a and struct gr_gk20a from common.gr.config hal functions. This requires a reference to struct gk20a *g for many nvgpu_* ops. Also, nvgpu_gr_config is updated to include sm_count_per_tpc. JIRA NVGPU-1884 Change-Id: I874c2b3970d97ef3940b74d8ef121a7261061670 Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2075681 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-25 11:04:51 +03:00 · 2019-03-18 23:00:42 +05:30
parent b5cd0c7956
commit e4a140b7c0
10 changed files with 83 additions and 63 deletions
--- a/drivers/gpu/nvgpu/common/gr/config/gr_config.c
+++ b/drivers/gpu/nvgpu/common/gr/config/gr_config.c
@@ -39,6 +39,8 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
 		return NULL;;
 	}

+	config->g = g;
+
 	config->max_gpc_count = g->ops.top.get_max_gpc_count(g);

 	config->max_tpc_per_gpc_count = g->ops.top.get_max_tpc_per_gpc_count(g);
@@ -65,10 +67,16 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
 		goto clean_up;
 	}

+	config->sm_count_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
+	if (config->sm_count_per_tpc == 0U) {
+		nvgpu_err(g, "sm_count_per_tpc==0!");
+		goto clean_up;
+	}
+
 	/* allocate for max tpc per gpc */
 	sm_info_size = (size_t)config->gpc_count *
 		(size_t)config->max_tpc_per_gpc_count *
-		(size_t)nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC) *
+		(size_t)config->sm_count_per_tpc *
 		sizeof(struct sm_info);

 	if (config->sm_to_cluster == NULL) {
@@ -511,6 +519,11 @@ u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config)
 	return config->pe_count_per_gpc;
 }

+u32 nvgpu_gr_config_get_sm_count_per_tpc(struct nvgpu_gr_config *config)
+{
+	return config->sm_count_per_tpc;
+}
+
 u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config,
 	u32 gpc_index)
 {
--- a/drivers/gpu/nvgpu/common/gr/gr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr.c
@@ -102,7 +102,7 @@ int nvgpu_gr_init_fs_state(struct gk20a *g)
 	}

 	if (g->ops.gr.config.init_sm_id_table != NULL) {
-		err = g->ops.gr.config.init_sm_id_table(g);
+		err = g->ops.gr.config.init_sm_id_table(g->gr.config);
 		if (err != 0) {
 			return err;
 		}
--- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c
@@ -333,6 +333,8 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)

 	config = gr->config;

+	config->g = g;
+
 	config->max_gpc_count = priv->constants.max_gpc_count;
 	config->gpc_count = priv->constants.gpc_count;
 	config->gpc_mask = priv->constants.gpc_mask;
@@ -1094,27 +1096,26 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 }

-int vgpu_gr_init_sm_id_table(struct gk20a *g)
+int vgpu_gr_init_sm_id_table(struct nvgpu_gr_config *gr_config)
 {
 	struct tegra_vgpu_cmd_msg msg = {};
 	struct tegra_vgpu_vsms_mapping_params *p = &msg.params.vsms_mapping;
 	struct tegra_vgpu_vsms_mapping_entry *entry;
-	struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+	struct vgpu_priv_data *priv = vgpu_get_priv_data(gr_config->g);
 	struct sm_info *sm_info;
 	int err;
-	struct gr_gk20a *gr = &g->gr;
-	struct nvgpu_gr_config *config = gr->config;
 	size_t oob_size;
 	void *handle = NULL;
 	u32 sm_id;
 	u32 max_sm;

 	msg.cmd = TEGRA_VGPU_CMD_GET_VSMS_MAPPING;
-	msg.handle = vgpu_get_handle(g);
+	msg.handle = vgpu_get_handle(gr_config->g);
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	err = err ? err : msg.ret;
 	if (err) {
-		nvgpu_err(g, "get vsms mapping failed err %d", err);
+		nvgpu_err(gr_config->g,
+			"get vsms mapping failed err %d", err);
 		return err;
 	}

@@ -1125,8 +1126,8 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g)
 		return -EINVAL;
 	}

-	max_sm = config->gpc_count *
-			config->max_tpc_per_gpc_count *
+	max_sm = gr_config->gpc_count *
+			gr_config->max_tpc_per_gpc_count *
 			priv->constants.sm_per_tpc;
 	if (p->num_sm > max_sm) {
 		return -EINVAL;
@@ -1136,9 +1137,9 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g)
 		return -EINVAL;
 	}

-	gr->config->no_of_sm = p->num_sm;
+	gr_config->no_of_sm = p->num_sm;
 	for (sm_id = 0; sm_id < p->num_sm; sm_id++, entry++) {
-		sm_info = nvgpu_gr_config_get_sm_info(gr->config, sm_id);
+		sm_info = nvgpu_gr_config_get_sm_info(gr_config, sm_id);
 		sm_info->tpc_index = entry->tpc_index;
 		sm_info->gpc_index = entry->gpc_index;
 		sm_info->sm_index = entry->sm_index;
@@ -1155,7 +1156,7 @@ int vgpu_gr_init_fs_state(struct gk20a *g)
 		return -EINVAL;
 	}

-	return g->ops.gr.config.init_sm_id_table(g);
+	return g->ops.gr.config.init_sm_id_table(g->gr.config);
 }

 int vgpu_gr_update_pc_sampling(struct channel_gk20a *ch, bool enable)
--- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h
@@ -76,7 +76,7 @@ int vgpu_gr_resume_contexts(struct gk20a *g,
 		struct dbg_session_gk20a *dbg_s,
 		int *ctx_resident_ch_fd);
 int vgpu_gr_commit_inst(struct channel_gk20a *c, u64 gpu_va);
-int vgpu_gr_init_sm_id_table(struct gk20a *g);
+int vgpu_gr_init_sm_id_table(struct nvgpu_gr_config *gr_config);
 int vgpu_gr_init_fs_state(struct gk20a *g);
 int vgpu_gr_update_pc_sampling(struct channel_gk20a *ch, bool enable);
 void vgpu_gr_init_cyclestats(struct gk20a *g);
--- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c
@@ -28,19 +28,19 @@

 #include <nvgpu/hw/gm20b/hw_gr_gm20b.h>

-int gm20b_gr_config_init_sm_id_table(struct gk20a *g)
+int gm20b_gr_config_init_sm_id_table(struct nvgpu_gr_config *gr_config)
 {
 	u32 gpc, tpc;
 	u32 sm_id = 0;

 	for (tpc = 0;
-	     tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config);
+	     tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(gr_config);
 	     tpc++) {
-		for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
+		for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) {

-			if (tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
+			if (tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc)) {
 				struct sm_info *sm_info =
-					nvgpu_gr_config_get_sm_info(g->gr.config, sm_id);
+					nvgpu_gr_config_get_sm_info(gr_config, sm_id);
 				sm_info->tpc_index = tpc;
 				sm_info->gpc_index = gpc;
 				sm_info->sm_index = 0;
@@ -49,7 +49,7 @@ int gm20b_gr_config_init_sm_id_table(struct gk20a *g)
 			}
 		}
 	}
-	g->gr.config->no_of_sm = sm_id;
+	gr_config->no_of_sm = sm_id;
 	return 0;
 }

--- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.h
@@ -28,7 +28,7 @@
 struct gk20a;
 struct nvgpu_gr_config;

-int gm20b_gr_config_init_sm_id_table(struct gk20a *g);
+int gm20b_gr_config_init_sm_id_table(struct nvgpu_gr_config *gr_config);
 u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g,
 	struct nvgpu_gr_config *config, u32 gpc_index);
 u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g,
--- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c
+++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c
@@ -30,12 +30,11 @@
 * Estimate performance if the given logical TPC in the given logical GPC were
 * removed.
 */
-static int gr_gv100_scg_estimate_perf(struct gk20a *g,
+static int gr_gv100_scg_estimate_perf(struct nvgpu_gr_config *gr_config,
 					unsigned long *gpc_tpc_mask,
 					u32 disable_gpc_id, u32 disable_tpc_id,
 					int *perf)
 {
-	struct gr_gk20a *gr = &g->gr;
 	int err = 0;
 	u32 scale_factor = 512U; /* Use fx23.9 */
 	u32 pix_scale = 1024U*1024U;	/* Pix perf in [29:20] */
@@ -58,8 +57,8 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
 	bool is_tpc_removed_pes = false;
 	u32 max_tpc_gpc = 0U;
 	u32 num_tpc_mask;
-	u32 *num_tpc_gpc = nvgpu_kzalloc(g, sizeof(u32) *
-				nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
+	u32 *num_tpc_gpc = nvgpu_kzalloc(gr_config->g, sizeof(u32) *
+				nvgpu_get_litter_value(gr_config->g, GPU_LIT_NUM_GPCS));

 	if (num_tpc_gpc == NULL) {
 		return -ENOMEM;
@@ -67,7 +66,7 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,

 	/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
 	for (gpc_id = 0;
-	     gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
+	     gpc_id < nvgpu_gr_config_get_gpc_count(gr_config);
 	     gpc_id++) {
 		num_tpc_mask = gpc_tpc_mask[gpc_id];

@@ -97,7 +96,7 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
 		 * ratio represents relative throughput of the GPC
 		 */
 		scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] /
-				nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id);
+				nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_id);

 		if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) {
 			min_scg_gpc_pix_perf = scg_gpc_pix_perf;
@@ -105,11 +104,11 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,

 		/* Calculate # of surviving PES */
 		for (pes_id = 0;
-		     pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_id);
+		     pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr_config, gpc_id);
 		     pes_id++) {
 			/* Count the number of TPC on the set */
 			num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask(
-						gr->config, gpc_id, pes_id) &
+						gr_config, gpc_id, pes_id) &
 					gpc_tpc_mask[gpc_id];

 			if ((gpc_id == disable_gpc_id) &&
@@ -140,12 +139,12 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,

 	/* Now calculate perf */
 	scg_world_perf = (scale_factor * scg_num_pes) /
-		nvgpu_gr_config_get_ppc_count(gr->config);
+		nvgpu_gr_config_get_ppc_count(gr_config);
 	deviation = 0;
 	average_tpcs = scale_factor * average_tpcs /
-			nvgpu_gr_config_get_gpc_count(gr->config);
+			nvgpu_gr_config_get_gpc_count(gr_config);
 	for (gpc_id =0;
-	     gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
+	     gpc_id < nvgpu_gr_config_get_gpc_count(gr_config);
 	     gpc_id++) {
 		diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id];
 		if (diff < 0) {
@@ -154,7 +153,7 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
 		deviation += U32(diff);
 	}

-	deviation /= nvgpu_gr_config_get_gpc_count(gr->config);
+	deviation /= nvgpu_gr_config_get_gpc_count(gr_config);

 	norm_tpc_deviation = deviation / max_tpc_gpc;

@@ -172,62 +171,64 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
 		(world_scale * scg_world_perf) +
 		(tpc_scale * tpc_balance);
 free_resources:
-	nvgpu_kfree(g, num_tpc_gpc);
+	nvgpu_kfree(gr_config->g, num_tpc_gpc);
 	return err;
 }

-int gv100_gr_config_init_sm_id_table(struct gk20a *g)
+int gv100_gr_config_init_sm_id_table(struct nvgpu_gr_config *gr_config)
 {
 	unsigned long tpc;
 	u32 gpc, sm, pes, gtpc;
 	u32 sm_id = 0;
-	u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
-	struct gr_gk20a *gr = &g->gr;
-	u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr->config);
+	u32 sm_per_tpc = nvgpu_gr_config_get_sm_count_per_tpc(gr_config);
+	u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr_config);
 	int perf, maxperf;
 	int err = 0;
 	unsigned long *gpc_tpc_mask;
 	u32 *tpc_table, *gpc_table;

-	if (g->gr.config == NULL) {
+	if (gr_config == NULL) {
 		return -ENOMEM;
 	}

-	gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
-					sizeof(u32));
-	tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
-					sizeof(u32));
-	gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) *
-			nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
+	gpc_table = nvgpu_kzalloc(gr_config->g,
+				nvgpu_gr_config_get_tpc_count(gr_config) *
+				sizeof(u32));
+	tpc_table = nvgpu_kzalloc(gr_config->g,
+				nvgpu_gr_config_get_tpc_count(gr_config) *
+				sizeof(u32));
+	gpc_tpc_mask = nvgpu_kzalloc(gr_config->g,
+				sizeof(unsigned long) *
+				nvgpu_get_litter_value(gr_config->g, GPU_LIT_NUM_GPCS));

 	if ((gpc_table == NULL) ||
 	    (tpc_table == NULL) ||
 	    (gpc_tpc_mask == NULL)) {
-		nvgpu_err(g, "Error allocating memory for sm tables");
+		nvgpu_err(gr_config->g, "Error allocating memory for sm tables");
 		err = -ENOMEM;
 		goto exit_build_table;
 	}

-	for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
+	for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) {
 		for (pes = 0;
-		     pes < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc);
+		     pes < nvgpu_gr_config_get_gpc_ppc_count(gr_config, gpc);
 		     pes++) {
 			gpc_tpc_mask[gpc] |= nvgpu_gr_config_get_pes_tpc_mask(
-						g->gr.config, gpc, pes);
+						gr_config, gpc, pes);
 		}
 	}

-	for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr->config); gtpc++) {
+	for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr_config); gtpc++) {
 		maxperf = -1;
-		for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
+		for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) {
 			for_each_set_bit(tpc, &gpc_tpc_mask[gpc],
-					nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
+					nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc)) {
 				perf = -1;
-				err = gr_gv100_scg_estimate_perf(g,
+				err = gr_gv100_scg_estimate_perf(gr_config,
 						gpc_tpc_mask, gpc, tpc, &perf);

 				if (err != 0) {
-					nvgpu_err(g,
+					nvgpu_err(gr_config->g,
 						"Error while estimating perf");
 					goto exit_build_table;
 				}
@@ -246,13 +247,13 @@ int gv100_gr_config_init_sm_id_table(struct gk20a *g)
 		for (sm = 0; sm < sm_per_tpc; sm++) {
 			u32 index = sm_id + sm;
 			struct sm_info *sm_info =
-				nvgpu_gr_config_get_sm_info(g->gr.config, index);
+				nvgpu_gr_config_get_sm_info(gr_config, index);
 			sm_info->gpc_index = gpc_table[tpc];
 			sm_info->tpc_index = tpc_table[tpc];
 			sm_info->sm_index = sm;
 			sm_info->global_tpc_index = tpc;

-			nvgpu_log_info(g,
+			nvgpu_log_info(gr_config->g,
 				"gpc : %d tpc %d sm_index %d global_index: %d",
 				sm_info->gpc_index,
 				sm_info->tpc_index,
@@ -262,11 +263,12 @@ int gv100_gr_config_init_sm_id_table(struct gk20a *g)
 		}
 	}

-	g->gr.config->no_of_sm = num_sm;
-	nvgpu_log_info(g, " total number of sm = %d", g->gr.config->no_of_sm);
+	gr_config->no_of_sm = num_sm;
+	nvgpu_log_info(gr_config->g, " total number of sm = %d",
+		gr_config->no_of_sm);
 exit_build_table:
-	nvgpu_kfree(g, gpc_table);
-	nvgpu_kfree(g, tpc_table);
-	nvgpu_kfree(g, gpc_tpc_mask);
+	nvgpu_kfree(gr_config->g, gpc_table);
+	nvgpu_kfree(gr_config->g, tpc_table);
+	nvgpu_kfree(gr_config->g, gpc_tpc_mask);
 	return err;
 }
--- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.h
+++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.h
@@ -27,6 +27,6 @@

 struct gk20a;

-int gv100_gr_config_init_sm_id_table(struct gk20a *g);
+int gv100_gr_config_init_sm_id_table(struct nvgpu_gr_config *gr_config);

 #endif /* NVGPU_GR_CONFIG_GV100_H */
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -581,7 +581,7 @@ struct gpu_ops {
 				struct nvgpu_gr_config *config, u32 gpc_index,
 				u32 pes_index);
 			u32 (*get_pd_dist_skip_table_size)(void);
-			int (*init_sm_id_table)(struct gk20a *g);
+			int (*init_sm_id_table)(struct nvgpu_gr_config *gr_config);
 		} config;

 #ifdef CONFIG_GK20A_CTXSW_TRACE
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/config.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/config.h
@@ -37,6 +37,8 @@ struct sm_info {
 };

 struct nvgpu_gr_config {
+	struct gk20a *g;
+
 	u32 max_gpc_count;
 	u32 max_tpc_per_gpc_count;
 	u32 max_zcull_per_gpc_count;
@@ -48,6 +50,7 @@ struct nvgpu_gr_config {
 	u32 zcb_count;

 	u32 pe_count_per_gpc;
+	u32 sm_count_per_tpc;

 	u32 *gpc_ppc_count;
 	u32 *gpc_tpc_count;
@@ -87,6 +90,7 @@ u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config);
 u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config);

 u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config);
+u32 nvgpu_gr_config_get_sm_count_per_tpc(struct nvgpu_gr_config *config);

 u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config,
 	u32 gpc_index);