From e4a140b7c0b08fc0e72685822fd515a5a780a97e Mon Sep 17 00:00:00 2001 From: Nitin Kumbhar Date: Mon, 18 Mar 2019 23:00:42 +0530 Subject: [PATCH] gpu: nvgpu: use nvgpu_gr_config in gr.config unit Remove use of struct gk20a and struct gr_gk20a from common.gr.config hal functions. This requires a reference to struct gk20a *g for many nvgpu_* ops. Also, nvgpu_gr_config is updated to include sm_count_per_tpc. JIRA NVGPU-1884 Change-Id: I874c2b3970d97ef3940b74d8ef121a7261061670 Signed-off-by: Nitin Kumbhar Reviewed-on: https://git-master.nvidia.com/r/2075681 Reviewed-by: mobile promotions Tested-by: mobile promotions --- .../gpu/nvgpu/common/gr/config/gr_config.c | 15 +++- drivers/gpu/nvgpu/common/gr/gr.c | 2 +- drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c | 23 +++--- drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h | 2 +- .../gpu/nvgpu/hal/gr/config/gr_config_gm20b.c | 12 +-- .../gpu/nvgpu/hal/gr/config/gr_config_gm20b.h | 2 +- .../gpu/nvgpu/hal/gr/config/gr_config_gv100.c | 82 ++++++++++--------- .../gpu/nvgpu/hal/gr/config/gr_config_gv100.h | 2 +- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 2 +- drivers/gpu/nvgpu/include/nvgpu/gr/config.h | 4 + 10 files changed, 83 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/config/gr_config.c b/drivers/gpu/nvgpu/common/gr/config/gr_config.c index 1cdceafcf..36e4ddd0a 100644 --- a/drivers/gpu/nvgpu/common/gr/config/gr_config.c +++ b/drivers/gpu/nvgpu/common/gr/config/gr_config.c @@ -39,6 +39,8 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g) return NULL;; } + config->g = g; + config->max_gpc_count = g->ops.top.get_max_gpc_count(g); config->max_tpc_per_gpc_count = g->ops.top.get_max_tpc_per_gpc_count(g); @@ -65,10 +67,16 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g) goto clean_up; } + config->sm_count_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + if (config->sm_count_per_tpc == 0U) { + nvgpu_err(g, "sm_count_per_tpc==0!"); + goto clean_up; + } + /* allocate for max tpc per gpc */ sm_info_size = (size_t)config->gpc_count * (size_t)config->max_tpc_per_gpc_count * - (size_t)nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC) * + (size_t)config->sm_count_per_tpc * sizeof(struct sm_info); if (config->sm_to_cluster == NULL) { @@ -511,6 +519,11 @@ u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config) return config->pe_count_per_gpc; } +u32 nvgpu_gr_config_get_sm_count_per_tpc(struct nvgpu_gr_config *config) +{ + return config->sm_count_per_tpc; +} + u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config, u32 gpc_index) { diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index 2bf184cb2..9b86f0c3a 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -102,7 +102,7 @@ int nvgpu_gr_init_fs_state(struct gk20a *g) } if (g->ops.gr.config.init_sm_id_table != NULL) { - err = g->ops.gr.config.init_sm_id_table(g); + err = g->ops.gr.config.init_sm_id_table(g->gr.config); if (err != 0) { return err; } diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c index 4288743c7..8e553dcdf 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -333,6 +333,8 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) config = gr->config; + config->g = g; + config->max_gpc_count = priv->constants.max_gpc_count; config->gpc_count = priv->constants.gpc_count; config->gpc_mask = priv->constants.gpc_mask; @@ -1094,27 +1096,26 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g, nvgpu_mutex_release(&g->dbg_sessions_lock); } -int vgpu_gr_init_sm_id_table(struct gk20a *g) +int vgpu_gr_init_sm_id_table(struct nvgpu_gr_config *gr_config) { struct tegra_vgpu_cmd_msg msg = {}; struct tegra_vgpu_vsms_mapping_params *p = &msg.params.vsms_mapping; struct tegra_vgpu_vsms_mapping_entry *entry; - struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct vgpu_priv_data *priv = vgpu_get_priv_data(gr_config->g); struct sm_info *sm_info; int err; - struct gr_gk20a *gr = &g->gr; - struct nvgpu_gr_config *config = gr->config; size_t oob_size; void *handle = NULL; u32 sm_id; u32 max_sm; msg.cmd = TEGRA_VGPU_CMD_GET_VSMS_MAPPING; - msg.handle = vgpu_get_handle(g); + msg.handle = vgpu_get_handle(gr_config->g); err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); err = err ? err : msg.ret; if (err) { - nvgpu_err(g, "get vsms mapping failed err %d", err); + nvgpu_err(gr_config->g, + "get vsms mapping failed err %d", err); return err; } @@ -1125,8 +1126,8 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g) return -EINVAL; } - max_sm = config->gpc_count * - config->max_tpc_per_gpc_count * + max_sm = gr_config->gpc_count * + gr_config->max_tpc_per_gpc_count * priv->constants.sm_per_tpc; if (p->num_sm > max_sm) { return -EINVAL; @@ -1136,9 +1137,9 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g) return -EINVAL; } - gr->config->no_of_sm = p->num_sm; + gr_config->no_of_sm = p->num_sm; for (sm_id = 0; sm_id < p->num_sm; sm_id++, entry++) { - sm_info = nvgpu_gr_config_get_sm_info(gr->config, sm_id); + sm_info = nvgpu_gr_config_get_sm_info(gr_config, sm_id); sm_info->tpc_index = entry->tpc_index; sm_info->gpc_index = entry->gpc_index; sm_info->sm_index = entry->sm_index; @@ -1155,7 +1156,7 @@ int vgpu_gr_init_fs_state(struct gk20a *g) return -EINVAL; } - return g->ops.gr.config.init_sm_id_table(g); + return g->ops.gr.config.init_sm_id_table(g->gr.config); } int vgpu_gr_update_pc_sampling(struct channel_gk20a *ch, bool enable) diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h index c10141077..3548ef367 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h @@ -76,7 +76,7 @@ int vgpu_gr_resume_contexts(struct gk20a *g, struct dbg_session_gk20a *dbg_s, int *ctx_resident_ch_fd); int vgpu_gr_commit_inst(struct channel_gk20a *c, u64 gpu_va); -int vgpu_gr_init_sm_id_table(struct gk20a *g); +int vgpu_gr_init_sm_id_table(struct nvgpu_gr_config *gr_config); int vgpu_gr_init_fs_state(struct gk20a *g); int vgpu_gr_update_pc_sampling(struct channel_gk20a *ch, bool enable); void vgpu_gr_init_cyclestats(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c index df9efcdc3..11ee8ef52 100644 --- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c @@ -28,19 +28,19 @@ #include -int gm20b_gr_config_init_sm_id_table(struct gk20a *g) +int gm20b_gr_config_init_sm_id_table(struct nvgpu_gr_config *gr_config) { u32 gpc, tpc; u32 sm_id = 0; for (tpc = 0; - tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config); + tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(gr_config); tpc++) { - for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) { - if (tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) { + if (tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc)) { struct sm_info *sm_info = - nvgpu_gr_config_get_sm_info(g->gr.config, sm_id); + nvgpu_gr_config_get_sm_info(gr_config, sm_id); sm_info->tpc_index = tpc; sm_info->gpc_index = gpc; sm_info->sm_index = 0; @@ -49,7 +49,7 @@ int gm20b_gr_config_init_sm_id_table(struct gk20a *g) } } } - g->gr.config->no_of_sm = sm_id; + gr_config->no_of_sm = sm_id; return 0; } diff --git a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.h b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.h index 9b7ce53cd..674b6f5be 100644 --- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.h @@ -28,7 +28,7 @@ struct gk20a; struct nvgpu_gr_config; -int gm20b_gr_config_init_sm_id_table(struct gk20a *g); +int gm20b_gr_config_init_sm_id_table(struct nvgpu_gr_config *gr_config); u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config, u32 gpc_index); u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c index b6bdcc2d8..dc823de2a 100644 --- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c +++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c @@ -30,12 +30,11 @@ * Estimate performance if the given logical TPC in the given logical GPC were * removed. */ -static int gr_gv100_scg_estimate_perf(struct gk20a *g, +static int gr_gv100_scg_estimate_perf(struct nvgpu_gr_config *gr_config, unsigned long *gpc_tpc_mask, u32 disable_gpc_id, u32 disable_tpc_id, int *perf) { - struct gr_gk20a *gr = &g->gr; int err = 0; u32 scale_factor = 512U; /* Use fx23.9 */ u32 pix_scale = 1024U*1024U; /* Pix perf in [29:20] */ @@ -58,8 +57,8 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g, bool is_tpc_removed_pes = false; u32 max_tpc_gpc = 0U; u32 num_tpc_mask; - u32 *num_tpc_gpc = nvgpu_kzalloc(g, sizeof(u32) * - nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS)); + u32 *num_tpc_gpc = nvgpu_kzalloc(gr_config->g, sizeof(u32) * + nvgpu_get_litter_value(gr_config->g, GPU_LIT_NUM_GPCS)); if (num_tpc_gpc == NULL) { return -ENOMEM; @@ -67,7 +66,7 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g, /* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */ for (gpc_id = 0; - gpc_id < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_id < nvgpu_gr_config_get_gpc_count(gr_config); gpc_id++) { num_tpc_mask = gpc_tpc_mask[gpc_id]; @@ -97,7 +96,7 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g, * ratio represents relative throughput of the GPC */ scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] / - nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id); + nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_id); if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) { min_scg_gpc_pix_perf = scg_gpc_pix_perf; @@ -105,11 +104,11 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g, /* Calculate # of surviving PES */ for (pes_id = 0; - pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_id); + pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr_config, gpc_id); pes_id++) { /* Count the number of TPC on the set */ num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask( - gr->config, gpc_id, pes_id) & + gr_config, gpc_id, pes_id) & gpc_tpc_mask[gpc_id]; if ((gpc_id == disable_gpc_id) && @@ -140,12 +139,12 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g, /* Now calculate perf */ scg_world_perf = (scale_factor * scg_num_pes) / - nvgpu_gr_config_get_ppc_count(gr->config); + nvgpu_gr_config_get_ppc_count(gr_config); deviation = 0; average_tpcs = scale_factor * average_tpcs / - nvgpu_gr_config_get_gpc_count(gr->config); + nvgpu_gr_config_get_gpc_count(gr_config); for (gpc_id =0; - gpc_id < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_id < nvgpu_gr_config_get_gpc_count(gr_config); gpc_id++) { diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id]; if (diff < 0) { @@ -154,7 +153,7 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g, deviation += U32(diff); } - deviation /= nvgpu_gr_config_get_gpc_count(gr->config); + deviation /= nvgpu_gr_config_get_gpc_count(gr_config); norm_tpc_deviation = deviation / max_tpc_gpc; @@ -172,62 +171,64 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g, (world_scale * scg_world_perf) + (tpc_scale * tpc_balance); free_resources: - nvgpu_kfree(g, num_tpc_gpc); + nvgpu_kfree(gr_config->g, num_tpc_gpc); return err; } -int gv100_gr_config_init_sm_id_table(struct gk20a *g) +int gv100_gr_config_init_sm_id_table(struct nvgpu_gr_config *gr_config) { unsigned long tpc; u32 gpc, sm, pes, gtpc; u32 sm_id = 0; - u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); - struct gr_gk20a *gr = &g->gr; - u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr->config); + u32 sm_per_tpc = nvgpu_gr_config_get_sm_count_per_tpc(gr_config); + u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr_config); int perf, maxperf; int err = 0; unsigned long *gpc_tpc_mask; u32 *tpc_table, *gpc_table; - if (g->gr.config == NULL) { + if (gr_config == NULL) { return -ENOMEM; } - gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) * - sizeof(u32)); - tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) * - sizeof(u32)); - gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) * - nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS)); + gpc_table = nvgpu_kzalloc(gr_config->g, + nvgpu_gr_config_get_tpc_count(gr_config) * + sizeof(u32)); + tpc_table = nvgpu_kzalloc(gr_config->g, + nvgpu_gr_config_get_tpc_count(gr_config) * + sizeof(u32)); + gpc_tpc_mask = nvgpu_kzalloc(gr_config->g, + sizeof(unsigned long) * + nvgpu_get_litter_value(gr_config->g, GPU_LIT_NUM_GPCS)); if ((gpc_table == NULL) || (tpc_table == NULL) || (gpc_tpc_mask == NULL)) { - nvgpu_err(g, "Error allocating memory for sm tables"); + nvgpu_err(gr_config->g, "Error allocating memory for sm tables"); err = -ENOMEM; goto exit_build_table; } - for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) { for (pes = 0; - pes < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc); + pes < nvgpu_gr_config_get_gpc_ppc_count(gr_config, gpc); pes++) { gpc_tpc_mask[gpc] |= nvgpu_gr_config_get_pes_tpc_mask( - g->gr.config, gpc, pes); + gr_config, gpc, pes); } } - for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr->config); gtpc++) { + for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr_config); gtpc++) { maxperf = -1; - for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) { for_each_set_bit(tpc, &gpc_tpc_mask[gpc], - nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) { + nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc)) { perf = -1; - err = gr_gv100_scg_estimate_perf(g, + err = gr_gv100_scg_estimate_perf(gr_config, gpc_tpc_mask, gpc, tpc, &perf); if (err != 0) { - nvgpu_err(g, + nvgpu_err(gr_config->g, "Error while estimating perf"); goto exit_build_table; } @@ -246,13 +247,13 @@ int gv100_gr_config_init_sm_id_table(struct gk20a *g) for (sm = 0; sm < sm_per_tpc; sm++) { u32 index = sm_id + sm; struct sm_info *sm_info = - nvgpu_gr_config_get_sm_info(g->gr.config, index); + nvgpu_gr_config_get_sm_info(gr_config, index); sm_info->gpc_index = gpc_table[tpc]; sm_info->tpc_index = tpc_table[tpc]; sm_info->sm_index = sm; sm_info->global_tpc_index = tpc; - nvgpu_log_info(g, + nvgpu_log_info(gr_config->g, "gpc : %d tpc %d sm_index %d global_index: %d", sm_info->gpc_index, sm_info->tpc_index, @@ -262,11 +263,12 @@ int gv100_gr_config_init_sm_id_table(struct gk20a *g) } } - g->gr.config->no_of_sm = num_sm; - nvgpu_log_info(g, " total number of sm = %d", g->gr.config->no_of_sm); + gr_config->no_of_sm = num_sm; + nvgpu_log_info(gr_config->g, " total number of sm = %d", + gr_config->no_of_sm); exit_build_table: - nvgpu_kfree(g, gpc_table); - nvgpu_kfree(g, tpc_table); - nvgpu_kfree(g, gpc_tpc_mask); + nvgpu_kfree(gr_config->g, gpc_table); + nvgpu_kfree(gr_config->g, tpc_table); + nvgpu_kfree(gr_config->g, gpc_tpc_mask); return err; } diff --git a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.h b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.h index 2bb77e30f..8f2e18eee 100644 --- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.h +++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.h @@ -27,6 +27,6 @@ struct gk20a; -int gv100_gr_config_init_sm_id_table(struct gk20a *g); +int gv100_gr_config_init_sm_id_table(struct nvgpu_gr_config *gr_config); #endif /* NVGPU_GR_CONFIG_GV100_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 0bd901469..7fc08fb76 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -581,7 +581,7 @@ struct gpu_ops { struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index); u32 (*get_pd_dist_skip_table_size)(void); - int (*init_sm_id_table)(struct gk20a *g); + int (*init_sm_id_table)(struct nvgpu_gr_config *gr_config); } config; #ifdef CONFIG_GK20A_CTXSW_TRACE diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/config.h b/drivers/gpu/nvgpu/include/nvgpu/gr/config.h index b64b71fa9..9f352f225 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/config.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/config.h @@ -37,6 +37,8 @@ struct sm_info { }; struct nvgpu_gr_config { + struct gk20a *g; + u32 max_gpc_count; u32 max_tpc_per_gpc_count; u32 max_zcull_per_gpc_count; @@ -48,6 +50,7 @@ struct nvgpu_gr_config { u32 zcb_count; u32 pe_count_per_gpc; + u32 sm_count_per_tpc; u32 *gpc_ppc_count; u32 *gpc_tpc_count; @@ -87,6 +90,7 @@ u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config); u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config); u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config); +u32 nvgpu_gr_config_get_sm_count_per_tpc(struct nvgpu_gr_config *config); u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config, u32 gpc_index);