From d6a543480c3ed7ccf3503f0ea10e6caa35931f58 Mon Sep 17 00:00:00 2001 From: Vinod G Date: Thu, 18 Jul 2019 22:17:09 -0700 Subject: [PATCH] gpu: nvgpu: reduce code complexity in gr.config unit Reduce code complexity of following functions in gr.config unit gv100_gr_config_init_sm_id_table(complexity : 13) gr_gv100_scg_estimate_perf(complexity : 23) Create sub functions by moving the control statement codes from the function which has high complexity above 10. Create two sub functions from gv100_gr_config_init_sm_id_table function gr_gv100_scg_estimate_perf_for_all_gpc_tpc(complexity : 5) gv100_gr_config_set_sminfo(complexity : 3) and reduce gv100_gr_config_init_sm_id_table complexity to 8 Create four sub functions from gv100_gr_config_init_sm_id_table function gr_gv100_find_max_gpc(with complexity : 2) gr_gv100_remove_logical_tpc(with complexity : 4) gr_gv100_calc_valid_pes(with complexity : 6) gr_gv100_scg_calculate_perf(with complexity : 7) and reduce gr_gv100_scg_estimate_perf complexity to 10 Jira NVGPU-3661 Change-Id: Iaaef1a98f2c6c55cd7b0a1a57d1c74eb09d43869 Signed-off-by: Vinod G Reviewed-on: https://git-master.nvidia.com/r/2156744 Reviewed-by: svc-mobile-coverity GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman Reviewed-by: mobile promotions Tested-by: mobile promotions --- .../hal/gr/config/gr_config_gv100_fusa.c | 444 +++++++++++------- 1 file changed, 268 insertions(+), 176 deletions(-) diff --git a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100_fusa.c b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100_fusa.c index dbfd75d06..95f5d3168 100644 --- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100_fusa.c @@ -27,134 +27,34 @@ #include "gr_config_gv100.h" -/* - * Estimate performance if the given logical TPC in the given logical GPC were - * removed. - */ -static int gr_gv100_scg_estimate_perf(struct gk20a *g, - struct nvgpu_gr_config *gr_config, - u32 *gpc_tpc_mask, - u32 disable_gpc_id, u32 disable_tpc_id, - u32 *perf) +static int gr_gv100_scg_calculate_perf(struct nvgpu_gr_config *gr_config, + u32 scale_factor, u32 scg_num_pes, u32 *num_tpc_gpc, + u32 max_tpc_gpc, u32 min_scg_gpc_pix_perf, + u32 average_tpcs, u32 *perf) { int err = 0; - u32 scale_factor = 512U; /* Use fx23.9 */ + u32 scg_world_perf; + u32 tpc_balance; + u32 diff; + u32 gpc_id; + u32 pix_scale_perf, world_scale_perf, tpc_scale_perf; + u32 pix_world_scale_sum; u32 pix_scale = 1024U*1024U; /* Pix perf in [29:20] */ u32 world_scale = 1024U; /* World performance in [19:10] */ u32 tpc_scale = 1U; /* TPC balancing in [9:0] */ - u32 scg_num_pes = 0U; - u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */ - u32 average_tpcs = 0U; /* Average of # of TPCs per GPC */ - u32 deviation; /* absolute diff between TPC# and + u32 norm_tpc_deviation; /* deviation/max_tpc_per_gpc */ + u32 deviation = 0U; /* absolute diff between TPC# and * average_tpcs, averaged across GPCs */ - u32 norm_tpc_deviation; /* deviation/max_tpc_per_gpc */ - u32 tpc_balance; - u32 scg_gpc_pix_perf = 0U; - u32 scg_world_perf; - u32 gpc_id; - u32 pes_id; - u32 diff; - bool is_tpc_removed_gpc = false; - bool is_tpc_removed_pes = false; - u32 max_tpc_gpc = 0U; - u32 num_tpc_mask; - u32 temp, temp1, temp2, temp3; - u32 tpc_cnt = nvgpu_safe_mult_u32((u32)sizeof(u32), - nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS)); - u32 *num_tpc_gpc = nvgpu_kzalloc(g, tpc_cnt); - if (num_tpc_gpc == NULL) { - return -ENOMEM; - } - - /* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */ - for (gpc_id = 0; - gpc_id < nvgpu_gr_config_get_gpc_count(gr_config); - gpc_id++) { - num_tpc_mask = gpc_tpc_mask[gpc_id]; - - if ((gpc_id == disable_gpc_id) && - ((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) { - /* Safety check if a TPC is removed twice */ - if (is_tpc_removed_gpc) { - err = -EINVAL; - goto free_resources; - } - /* Remove logical TPC from set */ - num_tpc_mask &= ~(BIT32(disable_tpc_id)); - is_tpc_removed_gpc = true; - } - - /* track balancing of tpcs across gpcs */ - num_tpc_gpc[gpc_id] = hweight32(num_tpc_mask); - average_tpcs = nvgpu_safe_add_u32(average_tpcs, - num_tpc_gpc[gpc_id]); - - /* save the maximum numer of gpcs */ - max_tpc_gpc = num_tpc_gpc[gpc_id] > max_tpc_gpc ? - num_tpc_gpc[gpc_id] : max_tpc_gpc; - - /* - * Calculate ratio between TPC count and post-FS and post-SCG - * - * ratio represents relative throughput of the GPC - */ - tpc_cnt = nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_id); - if (tpc_cnt > 0U) { - scg_gpc_pix_perf = nvgpu_safe_mult_u32(scale_factor, - num_tpc_gpc[gpc_id]) / tpc_cnt; - } - - if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) { - min_scg_gpc_pix_perf = scg_gpc_pix_perf; - } - - /* Calculate # of surviving PES */ - for (pes_id = 0; - pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr_config, gpc_id); - pes_id++) { - /* Count the number of TPC on the set */ - num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask( - gr_config, gpc_id, pes_id) & - gpc_tpc_mask[gpc_id]; - - if ((gpc_id == disable_gpc_id) && - ((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) { - - if (is_tpc_removed_pes) { - err = -EINVAL; - goto free_resources; - } - num_tpc_mask &= ~(BIT32(disable_tpc_id)); - is_tpc_removed_pes = true; - } - if (hweight32(num_tpc_mask) != 0UL) { - scg_num_pes = nvgpu_safe_add_u32(scg_num_pes, 1U); - } - } - } - - if (!is_tpc_removed_gpc || !is_tpc_removed_pes) { - err = -EINVAL; - goto free_resources; - } - - if (max_tpc_gpc == 0U) { - *perf = 0; - goto free_resources; - } - - /* Now calculate perf */ scg_world_perf = nvgpu_safe_mult_u32(scale_factor, scg_num_pes) / nvgpu_gr_config_get_ppc_count(gr_config); - deviation = 0; - average_tpcs = nvgpu_safe_mult_u32(scale_factor, average_tpcs) / - nvgpu_gr_config_get_gpc_count(gr_config); - for (gpc_id =0; + + for (gpc_id = 0U; gpc_id < nvgpu_gr_config_get_gpc_count(gr_config); gpc_id++) { - temp = nvgpu_safe_mult_u32(scale_factor, num_tpc_gpc[gpc_id]); + u32 temp = nvgpu_safe_mult_u32(scale_factor, + num_tpc_gpc[gpc_id]); if (average_tpcs > temp) { diff = nvgpu_safe_sub_u32(average_tpcs, temp); } else { @@ -173,34 +73,270 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g, (scg_world_perf > scale_factor) || (min_scg_gpc_pix_perf > scale_factor) || (norm_tpc_deviation > scale_factor)) { + err = -EINVAL; + goto calc_perf_end; + } + + pix_scale_perf = nvgpu_safe_mult_u32(pix_scale, min_scg_gpc_pix_perf); + world_scale_perf = nvgpu_safe_mult_u32(world_scale, scg_world_perf); + tpc_scale_perf = nvgpu_safe_mult_u32(tpc_scale, tpc_balance); + pix_world_scale_sum = nvgpu_safe_add_u32(pix_scale_perf, world_scale_perf); + *perf = nvgpu_safe_add_u32(pix_world_scale_sum, tpc_scale_perf); + +calc_perf_end: + return err; +} + +static int gr_gv100_calc_valid_pes(struct nvgpu_gr_config *gr_config, + u32 gpc_id, u32 *gpc_tpc_mask, u32 disable_gpc_id, + u32 disable_tpc_id, bool *is_tpc_removed_pes, + u32 *scg_num_pes) +{ + int err = 0; + u32 pes_id; + u32 num_tpc_mask; + + /* Calculate # of surviving PES */ + for (pes_id = 0; + pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr_config, gpc_id); + pes_id++) { + /* Count the number of TPC on the set */ + num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask( + gr_config, gpc_id, pes_id) & + gpc_tpc_mask[gpc_id]; + + if ((gpc_id == disable_gpc_id) && + ((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) { + + if (*is_tpc_removed_pes) { + err = -EINVAL; + goto calc_pes_err; + } + num_tpc_mask &= ~(BIT32(disable_tpc_id)); + *is_tpc_removed_pes = true; + } + if (hweight32(num_tpc_mask) != 0UL) { + *scg_num_pes = nvgpu_safe_add_u32(*scg_num_pes, 1U); + } + } + +calc_pes_err: + return err; +} + +static int gr_gv100_remove_logical_tpc(struct nvgpu_gr_config *gr_config, + u32 gpc_id, u32 *gpc_tpc_mask, u32 disable_gpc_id, + u32 disable_tpc_id, bool *is_tpc_removed_gpc, + u32 *num_tpc_gpc) +{ + int err = 0; + u32 num_tpc_mask = gpc_tpc_mask[gpc_id]; + + if ((gpc_id == disable_gpc_id) && + ((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) { + /* Safety check if a TPC is removed twice */ + if (*is_tpc_removed_gpc) { + err = -EINVAL; + goto remove_tpc_err; + } + /* Remove logical TPC from set */ + num_tpc_mask &= ~(BIT32(disable_tpc_id)); + *is_tpc_removed_gpc = true; + } + + /* track balancing of tpcs across gpcs */ + num_tpc_gpc[gpc_id] = hweight32(num_tpc_mask); + +remove_tpc_err: + return err; +} + +static u32 gr_gv100_find_max_gpc(u32 *num_tpc_gpc, u32 gpc_id, u32 max_tpc_gpc) +{ + return num_tpc_gpc[gpc_id] > max_tpc_gpc ? + num_tpc_gpc[gpc_id] : max_tpc_gpc; +} + +/* + * Estimate performance if the given logical TPC in the given logical GPC were + * removed. + */ +static int gr_gv100_scg_estimate_perf(struct gk20a *g, + struct nvgpu_gr_config *gr_config, + u32 *gpc_tpc_mask, + u32 disable_gpc_id, u32 disable_tpc_id, + u32 *perf) +{ + int err = 0; + u32 scale_factor = 512U; /* Use fx23.9 */ + u32 scg_num_pes = 0U; + u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */ + u32 average_tpcs = 0U; /* Average of # of TPCs per GPC */ + u32 scg_gpc_pix_perf = 0U; + u32 gpc_id; + bool is_tpc_removed_gpc = false; + bool is_tpc_removed_pes = false; + u32 max_tpc_gpc = 0U; + u32 tpc_cnt = nvgpu_safe_mult_u32((u32)sizeof(u32), + nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS)); + u32 *num_tpc_gpc = nvgpu_kzalloc(g, tpc_cnt); + + if (num_tpc_gpc == NULL) { + return -ENOMEM; + } + + /* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */ + for (gpc_id = 0; + gpc_id < nvgpu_gr_config_get_gpc_count(gr_config); + gpc_id++) { + + err = gr_gv100_remove_logical_tpc(gr_config, gpc_id, + gpc_tpc_mask, disable_gpc_id, disable_tpc_id, + &is_tpc_removed_gpc, num_tpc_gpc); + if (err != 0) { + goto free_resources; + } + + /* track balancing of tpcs across gpcs */ + average_tpcs = nvgpu_safe_add_u32(average_tpcs, + num_tpc_gpc[gpc_id]); + + /* save the maximum numer of gpcs */ + max_tpc_gpc = gr_gv100_find_max_gpc(num_tpc_gpc, + gpc_id, max_tpc_gpc); + + /* + * Calculate ratio between TPC count and post-FS and post-SCG + * + * ratio represents relative throughput of the GPC + */ + tpc_cnt = nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_id); + if (tpc_cnt > 0U) { + scg_gpc_pix_perf = nvgpu_safe_mult_u32(scale_factor, + num_tpc_gpc[gpc_id]) / tpc_cnt; + } + + if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) { + min_scg_gpc_pix_perf = scg_gpc_pix_perf; + } + + /* Calculate # of surviving PES */ + err = gr_gv100_calc_valid_pes(gr_config, gpc_id, gpc_tpc_mask, + disable_gpc_id, disable_tpc_id, + &is_tpc_removed_pes, &scg_num_pes); + if (err != 0) { + goto free_resources; + } + } + + if (!is_tpc_removed_gpc || !is_tpc_removed_pes) { err = -EINVAL; goto free_resources; } - temp = nvgpu_safe_mult_u32(pix_scale, min_scg_gpc_pix_perf); - temp1 = nvgpu_safe_mult_u32(world_scale, scg_world_perf); - temp2 = nvgpu_safe_mult_u32(tpc_scale, tpc_balance); - temp3 = nvgpu_safe_add_u32(temp, temp1); - *perf = nvgpu_safe_add_u32(temp3, temp2); + if (max_tpc_gpc == 0U) { + *perf = 0; + goto free_resources; + } + + /* Now calculate perf */ + average_tpcs = nvgpu_safe_mult_u32(scale_factor, average_tpcs) / + nvgpu_gr_config_get_gpc_count(gr_config); + + err = gr_gv100_scg_calculate_perf(gr_config, scale_factor, + scg_num_pes, num_tpc_gpc, max_tpc_gpc, + min_scg_gpc_pix_perf, average_tpcs, perf); + free_resources: nvgpu_kfree(g, num_tpc_gpc); return err; } +static int gr_gv100_scg_estimate_perf_for_all_gpc_tpc(struct gk20a *g, + struct nvgpu_gr_config *gr_config, u32 *gpc_tpc_mask, + u32 *gpc_table, u32 *tpc_table) +{ + unsigned long gpc_tpc_mask_tmp; + unsigned long tpc_tmp; + u32 perf, maxperf; + int err = 0; + u32 gtpc, gpc, tpc; + + for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr_config); gtpc++) { + maxperf = 0U; + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) { + gpc_tpc_mask_tmp = (unsigned long)gpc_tpc_mask[gpc]; + + for_each_set_bit(tpc_tmp, &gpc_tpc_mask_tmp, + nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc)) { + perf = 0U; + tpc = (u32)tpc_tmp; + + err = gr_gv100_scg_estimate_perf(g, gr_config, + gpc_tpc_mask, gpc, tpc, &perf); + + if (err != 0) { + nvgpu_err(g, + "Error while estimating perf"); + goto exit_perf_err; + } + + if (perf >= maxperf) { + maxperf = perf; + gpc_table[gtpc] = gpc; + tpc_table[gtpc] = tpc; + } + } + } + gpc_tpc_mask[gpc_table[gtpc]] &= ~(BIT32(tpc_table[gtpc])); + } + +exit_perf_err: + return err; +} + +static void gv100_gr_config_set_sminfo(struct gk20a *g, + struct nvgpu_gr_config *gr_config, u32 num_sm, + u32 sm_per_tpc, u32 *gpc_table, u32 *tpc_table) +{ + u32 sm; + u32 tpc = 0; + u32 sm_id = 0; + + for (sm_id = 0; sm_id < num_sm; sm_id += sm_per_tpc) { + for (sm = 0; sm < sm_per_tpc; sm++) { + u32 index = nvgpu_safe_add_u32(sm_id, sm); + struct nvgpu_sm_info *sm_info = + nvgpu_gr_config_get_sm_info(gr_config, index); + nvgpu_gr_config_set_sm_info_gpc_index(sm_info, + gpc_table[tpc]); + nvgpu_gr_config_set_sm_info_tpc_index(sm_info, + tpc_table[tpc]); + nvgpu_gr_config_set_sm_info_sm_index(sm_info, sm); + nvgpu_gr_config_set_sm_info_global_tpc_index(sm_info, tpc); + + nvgpu_log_info(g, + "gpc : %d tpc %d sm_index %d global_index: %d", + nvgpu_gr_config_get_sm_info_gpc_index(sm_info), + nvgpu_gr_config_get_sm_info_tpc_index(sm_info), + nvgpu_gr_config_get_sm_info_sm_index(sm_info), + nvgpu_gr_config_get_sm_info_global_tpc_index(sm_info)); + + } + tpc = nvgpu_safe_add_u32(tpc, 1U); + } +} + int gv100_gr_config_init_sm_id_table(struct gk20a *g, struct nvgpu_gr_config *gr_config) { - u32 gpc, tpc, sm, pes, gtpc; - u32 sm_id = 0; + u32 gpc, pes; u32 sm_per_tpc = nvgpu_gr_config_get_sm_count_per_tpc(gr_config); u32 tpc_cnt = nvgpu_gr_config_get_tpc_count(gr_config); u32 num_sm = nvgpu_safe_mult_u32(sm_per_tpc, tpc_cnt); - u32 perf, maxperf; int err = 0; u32 *gpc_tpc_mask; u32 *tpc_table, *gpc_table; - unsigned long gpc_tpc_mask_tmp; - unsigned long tpc_tmp; u32 tbl_size = 0U; u32 temp = 0U; @@ -232,58 +368,14 @@ int gv100_gr_config_init_sm_id_table(struct gk20a *g, } } - for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr_config); gtpc++) { - maxperf = 0U; - for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) { - gpc_tpc_mask_tmp = (unsigned long)gpc_tpc_mask[gpc]; - - for_each_set_bit(tpc_tmp, &gpc_tpc_mask_tmp, - nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc)) { - perf = 0U; - tpc = (u32)tpc_tmp; - - err = gr_gv100_scg_estimate_perf(g, gr_config, - gpc_tpc_mask, gpc, tpc, &perf); - - if (err != 0) { - nvgpu_err(g, - "Error while estimating perf"); - goto exit_build_table; - } - - if (perf >= maxperf) { - maxperf = perf; - gpc_table[gtpc] = gpc; - tpc_table[gtpc] = tpc; - } - } - } - gpc_tpc_mask[gpc_table[gtpc]] &= ~(BIT32(tpc_table[gtpc])); + err = gr_gv100_scg_estimate_perf_for_all_gpc_tpc(g, gr_config, + gpc_tpc_mask, gpc_table, tpc_table); + if (err != 0) { + goto exit_build_table; } - tpc = 0; - for (sm_id = 0; sm_id < num_sm; sm_id += sm_per_tpc) { - for (sm = 0; sm < sm_per_tpc; sm++) { - u32 index = nvgpu_safe_add_u32(sm_id, sm); - struct nvgpu_sm_info *sm_info = - nvgpu_gr_config_get_sm_info(gr_config, index); - nvgpu_gr_config_set_sm_info_gpc_index(sm_info, - gpc_table[tpc]); - nvgpu_gr_config_set_sm_info_tpc_index(sm_info, - tpc_table[tpc]); - nvgpu_gr_config_set_sm_info_sm_index(sm_info, sm); - nvgpu_gr_config_set_sm_info_global_tpc_index(sm_info, tpc); - - nvgpu_log_info(g, - "gpc : %d tpc %d sm_index %d global_index: %d", - nvgpu_gr_config_get_sm_info_gpc_index(sm_info), - nvgpu_gr_config_get_sm_info_tpc_index(sm_info), - nvgpu_gr_config_get_sm_info_sm_index(sm_info), - nvgpu_gr_config_get_sm_info_global_tpc_index(sm_info)); - - } - tpc = nvgpu_safe_add_u32(tpc, 1U); - } + gv100_gr_config_set_sminfo(g, gr_config, num_sm,sm_per_tpc, + gpc_table, tpc_table); nvgpu_gr_config_set_no_of_sm(gr_config, num_sm); nvgpu_log_info(g, " total number of sm = %d", num_sm);