From d6a543480c3ed7ccf3503f0ea10e6caa35931f58 Mon Sep 17 00:00:00 2001
From: Vinod G <vinodg@nvidia.com>
Date: Thu, 18 Jul 2019 22:17:09 -0700
Subject: [PATCH] gpu: nvgpu: reduce code complexity in gr.config unit

Reduce code complexity of following functions in gr.config unit
gv100_gr_config_init_sm_id_table(complexity : 13)
gr_gv100_scg_estimate_perf(complexity : 23)

Create sub functions by moving the control statement codes
from the function which has high complexity above 10.

Create two sub functions from gv100_gr_config_init_sm_id_table function
gr_gv100_scg_estimate_perf_for_all_gpc_tpc(complexity : 5)
gv100_gr_config_set_sminfo(complexity : 3)
and reduce gv100_gr_config_init_sm_id_table complexity to 8

Create four sub functions from
gv100_gr_config_init_sm_id_table function
gr_gv100_find_max_gpc(with complexity : 2)
gr_gv100_remove_logical_tpc(with complexity : 4)
gr_gv100_calc_valid_pes(with complexity : 6)
gr_gv100_scg_calculate_perf(with complexity : 7)
and reduce gr_gv100_scg_estimate_perf complexity to 10

Jira NVGPU-3661

Change-Id: Iaaef1a98f2c6c55cd7b0a1a57d1c74eb09d43869
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2156744
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 .../hal/gr/config/gr_config_gv100_fusa.c      | 444 +++++++++++-------
 1 file changed, 268 insertions(+), 176 deletions(-)

diff --git a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100_fusa.c b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100_fusa.c
index dbfd75d06..95f5d3168 100644
--- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100_fusa.c
+++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100_fusa.c
@@ -27,134 +27,34 @@
 
 #include "gr_config_gv100.h"
 
-/*
- * Estimate performance if the given logical TPC in the given logical GPC were
- * removed.
- */
-static int gr_gv100_scg_estimate_perf(struct gk20a *g,
-					struct nvgpu_gr_config *gr_config,
-					u32 *gpc_tpc_mask,
-					u32 disable_gpc_id, u32 disable_tpc_id,
-					u32 *perf)
+static int gr_gv100_scg_calculate_perf(struct nvgpu_gr_config *gr_config,
+		u32 scale_factor, u32 scg_num_pes, u32 *num_tpc_gpc,
+		u32 max_tpc_gpc, u32 min_scg_gpc_pix_perf,
+		u32 average_tpcs, u32 *perf)
 {
 	int err = 0;
-	u32 scale_factor = 512U; /* Use fx23.9 */
+	u32 scg_world_perf;
+	u32 tpc_balance;
+	u32 diff;
+	u32 gpc_id;
+	u32 pix_scale_perf, world_scale_perf, tpc_scale_perf;
+	u32 pix_world_scale_sum;
 	u32 pix_scale = 1024U*1024U;	/* Pix perf in [29:20] */
 	u32 world_scale = 1024U;	/* World performance in [19:10] */
 	u32 tpc_scale = 1U;		/* TPC balancing in [9:0] */
-	u32 scg_num_pes = 0U;
-	u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */
-	u32 average_tpcs = 0U;		/* Average of # of TPCs per GPC */
-	u32 deviation;			/* absolute diff between TPC# and
+	u32 norm_tpc_deviation;		/* deviation/max_tpc_per_gpc */
+	u32 deviation = 0U;		/* absolute diff between TPC# and
 					 * average_tpcs, averaged across GPCs
 					 */
-	u32 norm_tpc_deviation;		/* deviation/max_tpc_per_gpc */
-	u32 tpc_balance;
-	u32 scg_gpc_pix_perf = 0U;
-	u32 scg_world_perf;
-	u32 gpc_id;
-	u32 pes_id;
-	u32 diff;
-	bool is_tpc_removed_gpc = false;
-	bool is_tpc_removed_pes = false;
-	u32 max_tpc_gpc = 0U;
-	u32 num_tpc_mask;
-	u32 temp, temp1, temp2, temp3;
-	u32 tpc_cnt = nvgpu_safe_mult_u32((u32)sizeof(u32),
-				nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
-	u32 *num_tpc_gpc = nvgpu_kzalloc(g, tpc_cnt);
 
-	if (num_tpc_gpc == NULL) {
-		return -ENOMEM;
-	}
-
-	/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
-	for (gpc_id = 0;
-	     gpc_id < nvgpu_gr_config_get_gpc_count(gr_config);
-	     gpc_id++) {
-		num_tpc_mask = gpc_tpc_mask[gpc_id];
-
-		if ((gpc_id == disable_gpc_id) &&
-		    ((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) {
-			/* Safety check if a TPC is removed twice */
-			if (is_tpc_removed_gpc) {
-				err = -EINVAL;
-				goto free_resources;
-			}
-			/* Remove logical TPC from set */
-			num_tpc_mask &= ~(BIT32(disable_tpc_id));
-			is_tpc_removed_gpc = true;
-		}
-
-		/* track balancing of tpcs across gpcs */
-		num_tpc_gpc[gpc_id] = hweight32(num_tpc_mask);
-		average_tpcs = nvgpu_safe_add_u32(average_tpcs,
-					num_tpc_gpc[gpc_id]);
-
-		/* save the maximum numer of gpcs */
-		max_tpc_gpc = num_tpc_gpc[gpc_id] > max_tpc_gpc ?
-				num_tpc_gpc[gpc_id] : max_tpc_gpc;
-
-		/*
-		 * Calculate ratio between TPC count and post-FS and post-SCG
-		 *
-		 * ratio represents relative throughput of the GPC
-		 */
-		tpc_cnt = nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_id);
-		if (tpc_cnt > 0U) {
-			scg_gpc_pix_perf = nvgpu_safe_mult_u32(scale_factor,
-						num_tpc_gpc[gpc_id]) / tpc_cnt;
-		}
-
-		if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) {
-			min_scg_gpc_pix_perf = scg_gpc_pix_perf;
-		}
-
-		/* Calculate # of surviving PES */
-		for (pes_id = 0;
-		     pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr_config, gpc_id);
-		     pes_id++) {
-			/* Count the number of TPC on the set */
-			num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask(
-						gr_config, gpc_id, pes_id) &
-					gpc_tpc_mask[gpc_id];
-
-			if ((gpc_id == disable_gpc_id) &&
-			    ((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) {
-
-				if (is_tpc_removed_pes) {
-					err = -EINVAL;
-					goto free_resources;
-				}
-				num_tpc_mask &= ~(BIT32(disable_tpc_id));
-				is_tpc_removed_pes = true;
-			}
-			if (hweight32(num_tpc_mask) != 0UL) {
-				scg_num_pes = nvgpu_safe_add_u32(scg_num_pes, 1U);
-			}
-		}
-	}
-
-	if (!is_tpc_removed_gpc || !is_tpc_removed_pes) {
-		err = -EINVAL;
-		goto free_resources;
-	}
-
-	if (max_tpc_gpc == 0U) {
-		*perf = 0;
-		goto free_resources;
-	}
-
-	/* Now calculate perf */
 	scg_world_perf = nvgpu_safe_mult_u32(scale_factor, scg_num_pes) /
 		nvgpu_gr_config_get_ppc_count(gr_config);
-	deviation = 0;
-	average_tpcs = nvgpu_safe_mult_u32(scale_factor, average_tpcs) /
-			nvgpu_gr_config_get_gpc_count(gr_config);
-	for (gpc_id =0;
+
+	for (gpc_id = 0U;
 	     gpc_id < nvgpu_gr_config_get_gpc_count(gr_config);
 	     gpc_id++) {
-		temp = nvgpu_safe_mult_u32(scale_factor, num_tpc_gpc[gpc_id]);
+		u32 temp = nvgpu_safe_mult_u32(scale_factor,
+						num_tpc_gpc[gpc_id]);
 		if (average_tpcs > temp) {
 			diff = nvgpu_safe_sub_u32(average_tpcs, temp);
 		} else {
@@ -173,34 +73,270 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
 	    (scg_world_perf > scale_factor)       ||
 	    (min_scg_gpc_pix_perf > scale_factor) ||
 	    (norm_tpc_deviation > scale_factor)) {
+		err = -EINVAL;
+		goto calc_perf_end;
+	}
+
+	pix_scale_perf = nvgpu_safe_mult_u32(pix_scale, min_scg_gpc_pix_perf);
+	world_scale_perf = nvgpu_safe_mult_u32(world_scale, scg_world_perf);
+	tpc_scale_perf = nvgpu_safe_mult_u32(tpc_scale, tpc_balance);
+	pix_world_scale_sum = nvgpu_safe_add_u32(pix_scale_perf, world_scale_perf);
+	*perf = nvgpu_safe_add_u32(pix_world_scale_sum, tpc_scale_perf);
+
+calc_perf_end:
+	return err;
+}
+
+static int gr_gv100_calc_valid_pes(struct nvgpu_gr_config *gr_config,
+		u32 gpc_id, u32 *gpc_tpc_mask, u32 disable_gpc_id,
+		u32 disable_tpc_id, bool *is_tpc_removed_pes,
+		u32 *scg_num_pes)
+{
+	int err = 0;
+	u32 pes_id;
+	u32 num_tpc_mask;
+
+	/* Calculate # of surviving PES */
+	for (pes_id = 0;
+	     pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr_config, gpc_id);
+	     pes_id++) {
+		/* Count the number of TPC on the set */
+		num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask(
+					gr_config, gpc_id, pes_id) &
+				gpc_tpc_mask[gpc_id];
+
+		if ((gpc_id == disable_gpc_id) &&
+		    ((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) {
+
+			if (*is_tpc_removed_pes) {
+				err = -EINVAL;
+				goto calc_pes_err;
+			}
+			num_tpc_mask &= ~(BIT32(disable_tpc_id));
+			*is_tpc_removed_pes = true;
+		}
+		if (hweight32(num_tpc_mask) != 0UL) {
+			*scg_num_pes = nvgpu_safe_add_u32(*scg_num_pes, 1U);
+		}
+	}
+
+calc_pes_err:
+	return err;
+}
+
+static int gr_gv100_remove_logical_tpc(struct nvgpu_gr_config *gr_config,
+		u32 gpc_id, u32 *gpc_tpc_mask, u32 disable_gpc_id,
+		u32 disable_tpc_id, bool *is_tpc_removed_gpc,
+		u32 *num_tpc_gpc)
+{
+	int err = 0;
+	u32 num_tpc_mask = gpc_tpc_mask[gpc_id];
+
+	if ((gpc_id == disable_gpc_id) &&
+	    ((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) {
+		/* Safety check if a TPC is removed twice */
+		if (*is_tpc_removed_gpc) {
+			err = -EINVAL;
+			goto remove_tpc_err;
+		}
+		/* Remove logical TPC from set */
+		num_tpc_mask &= ~(BIT32(disable_tpc_id));
+		*is_tpc_removed_gpc = true;
+	}
+
+	/* track balancing of tpcs across gpcs */
+	num_tpc_gpc[gpc_id] = hweight32(num_tpc_mask);
+
+remove_tpc_err:
+	return err;
+}
+
+static u32 gr_gv100_find_max_gpc(u32 *num_tpc_gpc, u32 gpc_id, u32 max_tpc_gpc)
+{
+	return num_tpc_gpc[gpc_id] > max_tpc_gpc ?
+				num_tpc_gpc[gpc_id] : max_tpc_gpc;
+}
+
+/*
+ * Estimate performance if the given logical TPC in the given logical GPC were
+ * removed.
+ */
+static int gr_gv100_scg_estimate_perf(struct gk20a *g,
+					struct nvgpu_gr_config *gr_config,
+					u32 *gpc_tpc_mask,
+					u32 disable_gpc_id, u32 disable_tpc_id,
+					u32 *perf)
+{
+	int err = 0;
+	u32 scale_factor = 512U; /* Use fx23.9 */
+	u32 scg_num_pes = 0U;
+	u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */
+	u32 average_tpcs = 0U;		/* Average of # of TPCs per GPC */
+	u32 scg_gpc_pix_perf = 0U;
+	u32 gpc_id;
+	bool is_tpc_removed_gpc = false;
+	bool is_tpc_removed_pes = false;
+	u32 max_tpc_gpc = 0U;
+	u32 tpc_cnt = nvgpu_safe_mult_u32((u32)sizeof(u32),
+				nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
+	u32 *num_tpc_gpc = nvgpu_kzalloc(g, tpc_cnt);
+
+	if (num_tpc_gpc == NULL) {
+		return -ENOMEM;
+	}
+
+	/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
+	for (gpc_id = 0;
+	     gpc_id < nvgpu_gr_config_get_gpc_count(gr_config);
+	     gpc_id++) {
+
+		err = gr_gv100_remove_logical_tpc(gr_config, gpc_id,
+				gpc_tpc_mask, disable_gpc_id, disable_tpc_id,
+				&is_tpc_removed_gpc, num_tpc_gpc);
+		if (err != 0) {
+			goto free_resources;
+		}
+
+		/* track balancing of tpcs across gpcs */
+		average_tpcs = nvgpu_safe_add_u32(average_tpcs,
+					num_tpc_gpc[gpc_id]);
+
+		/* save the maximum numer of gpcs */
+		max_tpc_gpc = gr_gv100_find_max_gpc(num_tpc_gpc,
+					gpc_id, max_tpc_gpc);
+
+		/*
+		 * Calculate ratio between TPC count and post-FS and post-SCG
+		 *
+		 * ratio represents relative throughput of the GPC
+		 */
+		tpc_cnt = nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_id);
+		if (tpc_cnt > 0U) {
+			scg_gpc_pix_perf = nvgpu_safe_mult_u32(scale_factor,
+						num_tpc_gpc[gpc_id]) / tpc_cnt;
+		}
+
+		if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) {
+			min_scg_gpc_pix_perf = scg_gpc_pix_perf;
+		}
+
+		/* Calculate # of surviving PES */
+		err = gr_gv100_calc_valid_pes(gr_config, gpc_id, gpc_tpc_mask,
+				disable_gpc_id, disable_tpc_id,
+				&is_tpc_removed_pes, &scg_num_pes);
+		if (err != 0) {
+			goto free_resources;
+		}
+	}
+
+	if (!is_tpc_removed_gpc || !is_tpc_removed_pes) {
 		err = -EINVAL;
 		goto free_resources;
 	}
 
-	temp = nvgpu_safe_mult_u32(pix_scale, min_scg_gpc_pix_perf);
-	temp1 = nvgpu_safe_mult_u32(world_scale, scg_world_perf);
-	temp2 = nvgpu_safe_mult_u32(tpc_scale, tpc_balance);
-	temp3 = nvgpu_safe_add_u32(temp, temp1);
-	*perf = nvgpu_safe_add_u32(temp3, temp2);
+	if (max_tpc_gpc == 0U) {
+		*perf = 0;
+		goto free_resources;
+	}
+
+	/* Now calculate perf */
+	average_tpcs = nvgpu_safe_mult_u32(scale_factor, average_tpcs) /
+			nvgpu_gr_config_get_gpc_count(gr_config);
+
+	err = gr_gv100_scg_calculate_perf(gr_config, scale_factor,
+			scg_num_pes, num_tpc_gpc, max_tpc_gpc,
+			min_scg_gpc_pix_perf, average_tpcs, perf);
+
 free_resources:
 	nvgpu_kfree(g, num_tpc_gpc);
 	return err;
 }
 
+static int gr_gv100_scg_estimate_perf_for_all_gpc_tpc(struct gk20a *g,
+			struct nvgpu_gr_config *gr_config, u32 *gpc_tpc_mask,
+			u32 *gpc_table, u32 *tpc_table)
+{
+	unsigned long gpc_tpc_mask_tmp;
+	unsigned long tpc_tmp;
+	u32 perf, maxperf;
+	int err = 0;
+	u32 gtpc, gpc, tpc;
+
+	for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr_config); gtpc++) {
+		maxperf = 0U;
+		for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) {
+			gpc_tpc_mask_tmp = (unsigned long)gpc_tpc_mask[gpc];
+
+			for_each_set_bit(tpc_tmp, &gpc_tpc_mask_tmp,
+					nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc)) {
+				perf = 0U;
+				tpc = (u32)tpc_tmp;
+
+				err = gr_gv100_scg_estimate_perf(g, gr_config,
+						gpc_tpc_mask, gpc, tpc, &perf);
+
+				if (err != 0) {
+					nvgpu_err(g,
+						"Error while estimating perf");
+					goto exit_perf_err;
+				}
+
+				if (perf >= maxperf) {
+					maxperf = perf;
+					gpc_table[gtpc] = gpc;
+					tpc_table[gtpc] = tpc;
+				}
+			}
+		}
+		gpc_tpc_mask[gpc_table[gtpc]] &= ~(BIT32(tpc_table[gtpc]));
+	}
+
+exit_perf_err:
+	return err;
+}
+
+static void gv100_gr_config_set_sminfo(struct gk20a *g,
+		struct nvgpu_gr_config *gr_config, u32 num_sm,
+		u32 sm_per_tpc, u32 *gpc_table, u32 *tpc_table)
+{
+	u32 sm;
+	u32 tpc = 0;
+	u32 sm_id = 0;
+
+	for (sm_id = 0; sm_id < num_sm; sm_id += sm_per_tpc) {
+		for (sm = 0; sm < sm_per_tpc; sm++) {
+			u32 index = nvgpu_safe_add_u32(sm_id, sm);
+			struct nvgpu_sm_info *sm_info =
+				nvgpu_gr_config_get_sm_info(gr_config, index);
+			nvgpu_gr_config_set_sm_info_gpc_index(sm_info,
+							gpc_table[tpc]);
+			nvgpu_gr_config_set_sm_info_tpc_index(sm_info,
+							tpc_table[tpc]);
+			nvgpu_gr_config_set_sm_info_sm_index(sm_info, sm);
+			nvgpu_gr_config_set_sm_info_global_tpc_index(sm_info, tpc);
+
+			nvgpu_log_info(g,
+				"gpc : %d tpc %d sm_index %d global_index: %d",
+				nvgpu_gr_config_get_sm_info_gpc_index(sm_info),
+				nvgpu_gr_config_get_sm_info_tpc_index(sm_info),
+				nvgpu_gr_config_get_sm_info_sm_index(sm_info),
+				nvgpu_gr_config_get_sm_info_global_tpc_index(sm_info));
+
+		}
+		tpc = nvgpu_safe_add_u32(tpc, 1U);
+	}
+}
+
 int gv100_gr_config_init_sm_id_table(struct gk20a *g,
 		struct nvgpu_gr_config *gr_config)
 {
-	u32 gpc, tpc, sm, pes, gtpc;
-	u32 sm_id = 0;
+	u32 gpc, pes;
 	u32 sm_per_tpc = nvgpu_gr_config_get_sm_count_per_tpc(gr_config);
 	u32 tpc_cnt = nvgpu_gr_config_get_tpc_count(gr_config);
 	u32 num_sm = nvgpu_safe_mult_u32(sm_per_tpc, tpc_cnt);
-	u32 perf, maxperf;
 	int err = 0;
 	u32 *gpc_tpc_mask;
 	u32 *tpc_table, *gpc_table;
-	unsigned long gpc_tpc_mask_tmp;
-	unsigned long tpc_tmp;
 	u32 tbl_size = 0U;
 	u32 temp = 0U;
 
@@ -232,58 +368,14 @@ int gv100_gr_config_init_sm_id_table(struct gk20a *g,
 		}
 	}
 
-	for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr_config); gtpc++) {
-		maxperf = 0U;
-		for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) {
-			gpc_tpc_mask_tmp = (unsigned long)gpc_tpc_mask[gpc];
-
-			for_each_set_bit(tpc_tmp, &gpc_tpc_mask_tmp,
-					nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc)) {
-				perf = 0U;
-				tpc = (u32)tpc_tmp;
-
-				err = gr_gv100_scg_estimate_perf(g, gr_config,
-						gpc_tpc_mask, gpc, tpc, &perf);
-
-				if (err != 0) {
-					nvgpu_err(g,
-						"Error while estimating perf");
-					goto exit_build_table;
-				}
-
-				if (perf >= maxperf) {
-					maxperf = perf;
-					gpc_table[gtpc] = gpc;
-					tpc_table[gtpc] = tpc;
-				}
-			}
-		}
-		gpc_tpc_mask[gpc_table[gtpc]] &= ~(BIT32(tpc_table[gtpc]));
+	err = gr_gv100_scg_estimate_perf_for_all_gpc_tpc(g, gr_config,
+				gpc_tpc_mask, gpc_table, tpc_table);
+	if (err != 0) {
+		goto exit_build_table;
 	}
 
-	tpc = 0;
-	for (sm_id = 0; sm_id < num_sm; sm_id += sm_per_tpc) {
-		for (sm = 0; sm < sm_per_tpc; sm++) {
-			u32 index = nvgpu_safe_add_u32(sm_id, sm);
-			struct nvgpu_sm_info *sm_info =
-				nvgpu_gr_config_get_sm_info(gr_config, index);
-			nvgpu_gr_config_set_sm_info_gpc_index(sm_info,
-							gpc_table[tpc]);
-			nvgpu_gr_config_set_sm_info_tpc_index(sm_info,
-							tpc_table[tpc]);
-			nvgpu_gr_config_set_sm_info_sm_index(sm_info, sm);
-			nvgpu_gr_config_set_sm_info_global_tpc_index(sm_info, tpc);
-
-			nvgpu_log_info(g,
-				"gpc : %d tpc %d sm_index %d global_index: %d",
-				nvgpu_gr_config_get_sm_info_gpc_index(sm_info),
-				nvgpu_gr_config_get_sm_info_tpc_index(sm_info),
-				nvgpu_gr_config_get_sm_info_sm_index(sm_info),
-				nvgpu_gr_config_get_sm_info_global_tpc_index(sm_info));
-
-		}
-		tpc = nvgpu_safe_add_u32(tpc, 1U);
-	}
+	gv100_gr_config_set_sminfo(g, gr_config, num_sm,sm_per_tpc,
+					gpc_table, tpc_table);
 
 	nvgpu_gr_config_set_no_of_sm(gr_config, num_sm);
 	nvgpu_log_info(g, " total number of sm = %d", num_sm);