gpu: nvgpu: use nvgpu_gr_config in gr.config unit

Remove use of struct gk20a and struct gr_gk20a from common.gr.config
hal functions.

This requires a reference to struct gk20a *g for many nvgpu_* ops. Also,
nvgpu_gr_config is updated to include sm_count_per_tpc.

JIRA NVGPU-1884

Change-Id: I874c2b3970d97ef3940b74d8ef121a7261061670
Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2075681
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Nitin Kumbhar
2019-03-18 23:00:42 +05:30
committed by mobile promotions
parent b5cd0c7956
commit e4a140b7c0
10 changed files with 83 additions and 63 deletions

View File

@@ -39,6 +39,8 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
return NULL;;
}
config->g = g;
config->max_gpc_count = g->ops.top.get_max_gpc_count(g);
config->max_tpc_per_gpc_count = g->ops.top.get_max_tpc_per_gpc_count(g);
@@ -65,10 +67,16 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
goto clean_up;
}
config->sm_count_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
if (config->sm_count_per_tpc == 0U) {
nvgpu_err(g, "sm_count_per_tpc==0!");
goto clean_up;
}
/* allocate for max tpc per gpc */
sm_info_size = (size_t)config->gpc_count *
(size_t)config->max_tpc_per_gpc_count *
(size_t)nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC) *
(size_t)config->sm_count_per_tpc *
sizeof(struct sm_info);
if (config->sm_to_cluster == NULL) {
@@ -511,6 +519,11 @@ u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config)
return config->pe_count_per_gpc;
}
u32 nvgpu_gr_config_get_sm_count_per_tpc(struct nvgpu_gr_config *config)
{
return config->sm_count_per_tpc;
}
u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config,
u32 gpc_index)
{

View File

@@ -102,7 +102,7 @@ int nvgpu_gr_init_fs_state(struct gk20a *g)
}
if (g->ops.gr.config.init_sm_id_table != NULL) {
err = g->ops.gr.config.init_sm_id_table(g);
err = g->ops.gr.config.init_sm_id_table(g->gr.config);
if (err != 0) {
return err;
}

View File

@@ -333,6 +333,8 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
config = gr->config;
config->g = g;
config->max_gpc_count = priv->constants.max_gpc_count;
config->gpc_count = priv->constants.gpc_count;
config->gpc_mask = priv->constants.gpc_mask;
@@ -1094,27 +1096,26 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
nvgpu_mutex_release(&g->dbg_sessions_lock);
}
int vgpu_gr_init_sm_id_table(struct gk20a *g)
int vgpu_gr_init_sm_id_table(struct nvgpu_gr_config *gr_config)
{
struct tegra_vgpu_cmd_msg msg = {};
struct tegra_vgpu_vsms_mapping_params *p = &msg.params.vsms_mapping;
struct tegra_vgpu_vsms_mapping_entry *entry;
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
struct vgpu_priv_data *priv = vgpu_get_priv_data(gr_config->g);
struct sm_info *sm_info;
int err;
struct gr_gk20a *gr = &g->gr;
struct nvgpu_gr_config *config = gr->config;
size_t oob_size;
void *handle = NULL;
u32 sm_id;
u32 max_sm;
msg.cmd = TEGRA_VGPU_CMD_GET_VSMS_MAPPING;
msg.handle = vgpu_get_handle(g);
msg.handle = vgpu_get_handle(gr_config->g);
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
err = err ? err : msg.ret;
if (err) {
nvgpu_err(g, "get vsms mapping failed err %d", err);
nvgpu_err(gr_config->g,
"get vsms mapping failed err %d", err);
return err;
}
@@ -1125,8 +1126,8 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g)
return -EINVAL;
}
max_sm = config->gpc_count *
config->max_tpc_per_gpc_count *
max_sm = gr_config->gpc_count *
gr_config->max_tpc_per_gpc_count *
priv->constants.sm_per_tpc;
if (p->num_sm > max_sm) {
return -EINVAL;
@@ -1136,9 +1137,9 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g)
return -EINVAL;
}
gr->config->no_of_sm = p->num_sm;
gr_config->no_of_sm = p->num_sm;
for (sm_id = 0; sm_id < p->num_sm; sm_id++, entry++) {
sm_info = nvgpu_gr_config_get_sm_info(gr->config, sm_id);
sm_info = nvgpu_gr_config_get_sm_info(gr_config, sm_id);
sm_info->tpc_index = entry->tpc_index;
sm_info->gpc_index = entry->gpc_index;
sm_info->sm_index = entry->sm_index;
@@ -1155,7 +1156,7 @@ int vgpu_gr_init_fs_state(struct gk20a *g)
return -EINVAL;
}
return g->ops.gr.config.init_sm_id_table(g);
return g->ops.gr.config.init_sm_id_table(g->gr.config);
}
int vgpu_gr_update_pc_sampling(struct channel_gk20a *ch, bool enable)

View File

@@ -76,7 +76,7 @@ int vgpu_gr_resume_contexts(struct gk20a *g,
struct dbg_session_gk20a *dbg_s,
int *ctx_resident_ch_fd);
int vgpu_gr_commit_inst(struct channel_gk20a *c, u64 gpu_va);
int vgpu_gr_init_sm_id_table(struct gk20a *g);
int vgpu_gr_init_sm_id_table(struct nvgpu_gr_config *gr_config);
int vgpu_gr_init_fs_state(struct gk20a *g);
int vgpu_gr_update_pc_sampling(struct channel_gk20a *ch, bool enable);
void vgpu_gr_init_cyclestats(struct gk20a *g);

View File

@@ -28,19 +28,19 @@
#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
int gm20b_gr_config_init_sm_id_table(struct gk20a *g)
int gm20b_gr_config_init_sm_id_table(struct nvgpu_gr_config *gr_config)
{
u32 gpc, tpc;
u32 sm_id = 0;
for (tpc = 0;
tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config);
tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(gr_config);
tpc++) {
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) {
if (tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
if (tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc)) {
struct sm_info *sm_info =
nvgpu_gr_config_get_sm_info(g->gr.config, sm_id);
nvgpu_gr_config_get_sm_info(gr_config, sm_id);
sm_info->tpc_index = tpc;
sm_info->gpc_index = gpc;
sm_info->sm_index = 0;
@@ -49,7 +49,7 @@ int gm20b_gr_config_init_sm_id_table(struct gk20a *g)
}
}
}
g->gr.config->no_of_sm = sm_id;
gr_config->no_of_sm = sm_id;
return 0;
}

View File

@@ -28,7 +28,7 @@
struct gk20a;
struct nvgpu_gr_config;
int gm20b_gr_config_init_sm_id_table(struct gk20a *g);
int gm20b_gr_config_init_sm_id_table(struct nvgpu_gr_config *gr_config);
u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index);
u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g,

View File

@@ -30,12 +30,11 @@
* Estimate performance if the given logical TPC in the given logical GPC were
* removed.
*/
static int gr_gv100_scg_estimate_perf(struct gk20a *g,
static int gr_gv100_scg_estimate_perf(struct nvgpu_gr_config *gr_config,
unsigned long *gpc_tpc_mask,
u32 disable_gpc_id, u32 disable_tpc_id,
int *perf)
{
struct gr_gk20a *gr = &g->gr;
int err = 0;
u32 scale_factor = 512U; /* Use fx23.9 */
u32 pix_scale = 1024U*1024U; /* Pix perf in [29:20] */
@@ -58,8 +57,8 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
bool is_tpc_removed_pes = false;
u32 max_tpc_gpc = 0U;
u32 num_tpc_mask;
u32 *num_tpc_gpc = nvgpu_kzalloc(g, sizeof(u32) *
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
u32 *num_tpc_gpc = nvgpu_kzalloc(gr_config->g, sizeof(u32) *
nvgpu_get_litter_value(gr_config->g, GPU_LIT_NUM_GPCS));
if (num_tpc_gpc == NULL) {
return -ENOMEM;
@@ -67,7 +66,7 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
for (gpc_id = 0;
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_id < nvgpu_gr_config_get_gpc_count(gr_config);
gpc_id++) {
num_tpc_mask = gpc_tpc_mask[gpc_id];
@@ -97,7 +96,7 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
* ratio represents relative throughput of the GPC
*/
scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] /
nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id);
nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_id);
if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) {
min_scg_gpc_pix_perf = scg_gpc_pix_perf;
@@ -105,11 +104,11 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
/* Calculate # of surviving PES */
for (pes_id = 0;
pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_id);
pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr_config, gpc_id);
pes_id++) {
/* Count the number of TPC on the set */
num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask(
gr->config, gpc_id, pes_id) &
gr_config, gpc_id, pes_id) &
gpc_tpc_mask[gpc_id];
if ((gpc_id == disable_gpc_id) &&
@@ -140,12 +139,12 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
/* Now calculate perf */
scg_world_perf = (scale_factor * scg_num_pes) /
nvgpu_gr_config_get_ppc_count(gr->config);
nvgpu_gr_config_get_ppc_count(gr_config);
deviation = 0;
average_tpcs = scale_factor * average_tpcs /
nvgpu_gr_config_get_gpc_count(gr->config);
nvgpu_gr_config_get_gpc_count(gr_config);
for (gpc_id =0;
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_id < nvgpu_gr_config_get_gpc_count(gr_config);
gpc_id++) {
diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id];
if (diff < 0) {
@@ -154,7 +153,7 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
deviation += U32(diff);
}
deviation /= nvgpu_gr_config_get_gpc_count(gr->config);
deviation /= nvgpu_gr_config_get_gpc_count(gr_config);
norm_tpc_deviation = deviation / max_tpc_gpc;
@@ -172,62 +171,64 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
(world_scale * scg_world_perf) +
(tpc_scale * tpc_balance);
free_resources:
nvgpu_kfree(g, num_tpc_gpc);
nvgpu_kfree(gr_config->g, num_tpc_gpc);
return err;
}
int gv100_gr_config_init_sm_id_table(struct gk20a *g)
int gv100_gr_config_init_sm_id_table(struct nvgpu_gr_config *gr_config)
{
unsigned long tpc;
u32 gpc, sm, pes, gtpc;
u32 sm_id = 0;
u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
struct gr_gk20a *gr = &g->gr;
u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr->config);
u32 sm_per_tpc = nvgpu_gr_config_get_sm_count_per_tpc(gr_config);
u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr_config);
int perf, maxperf;
int err = 0;
unsigned long *gpc_tpc_mask;
u32 *tpc_table, *gpc_table;
if (g->gr.config == NULL) {
if (gr_config == NULL) {
return -ENOMEM;
}
gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
sizeof(u32));
tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
sizeof(u32));
gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) *
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
gpc_table = nvgpu_kzalloc(gr_config->g,
nvgpu_gr_config_get_tpc_count(gr_config) *
sizeof(u32));
tpc_table = nvgpu_kzalloc(gr_config->g,
nvgpu_gr_config_get_tpc_count(gr_config) *
sizeof(u32));
gpc_tpc_mask = nvgpu_kzalloc(gr_config->g,
sizeof(unsigned long) *
nvgpu_get_litter_value(gr_config->g, GPU_LIT_NUM_GPCS));
if ((gpc_table == NULL) ||
(tpc_table == NULL) ||
(gpc_tpc_mask == NULL)) {
nvgpu_err(g, "Error allocating memory for sm tables");
nvgpu_err(gr_config->g, "Error allocating memory for sm tables");
err = -ENOMEM;
goto exit_build_table;
}
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) {
for (pes = 0;
pes < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc);
pes < nvgpu_gr_config_get_gpc_ppc_count(gr_config, gpc);
pes++) {
gpc_tpc_mask[gpc] |= nvgpu_gr_config_get_pes_tpc_mask(
g->gr.config, gpc, pes);
gr_config, gpc, pes);
}
}
for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr->config); gtpc++) {
for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr_config); gtpc++) {
maxperf = -1;
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) {
for_each_set_bit(tpc, &gpc_tpc_mask[gpc],
nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc)) {
perf = -1;
err = gr_gv100_scg_estimate_perf(g,
err = gr_gv100_scg_estimate_perf(gr_config,
gpc_tpc_mask, gpc, tpc, &perf);
if (err != 0) {
nvgpu_err(g,
nvgpu_err(gr_config->g,
"Error while estimating perf");
goto exit_build_table;
}
@@ -246,13 +247,13 @@ int gv100_gr_config_init_sm_id_table(struct gk20a *g)
for (sm = 0; sm < sm_per_tpc; sm++) {
u32 index = sm_id + sm;
struct sm_info *sm_info =
nvgpu_gr_config_get_sm_info(g->gr.config, index);
nvgpu_gr_config_get_sm_info(gr_config, index);
sm_info->gpc_index = gpc_table[tpc];
sm_info->tpc_index = tpc_table[tpc];
sm_info->sm_index = sm;
sm_info->global_tpc_index = tpc;
nvgpu_log_info(g,
nvgpu_log_info(gr_config->g,
"gpc : %d tpc %d sm_index %d global_index: %d",
sm_info->gpc_index,
sm_info->tpc_index,
@@ -262,11 +263,12 @@ int gv100_gr_config_init_sm_id_table(struct gk20a *g)
}
}
g->gr.config->no_of_sm = num_sm;
nvgpu_log_info(g, " total number of sm = %d", g->gr.config->no_of_sm);
gr_config->no_of_sm = num_sm;
nvgpu_log_info(gr_config->g, " total number of sm = %d",
gr_config->no_of_sm);
exit_build_table:
nvgpu_kfree(g, gpc_table);
nvgpu_kfree(g, tpc_table);
nvgpu_kfree(g, gpc_tpc_mask);
nvgpu_kfree(gr_config->g, gpc_table);
nvgpu_kfree(gr_config->g, tpc_table);
nvgpu_kfree(gr_config->g, gpc_tpc_mask);
return err;
}

View File

@@ -27,6 +27,6 @@
struct gk20a;
int gv100_gr_config_init_sm_id_table(struct gk20a *g);
int gv100_gr_config_init_sm_id_table(struct nvgpu_gr_config *gr_config);
#endif /* NVGPU_GR_CONFIG_GV100_H */

View File

@@ -581,7 +581,7 @@ struct gpu_ops {
struct nvgpu_gr_config *config, u32 gpc_index,
u32 pes_index);
u32 (*get_pd_dist_skip_table_size)(void);
int (*init_sm_id_table)(struct gk20a *g);
int (*init_sm_id_table)(struct nvgpu_gr_config *gr_config);
} config;
#ifdef CONFIG_GK20A_CTXSW_TRACE

View File

@@ -37,6 +37,8 @@ struct sm_info {
};
struct nvgpu_gr_config {
struct gk20a *g;
u32 max_gpc_count;
u32 max_tpc_per_gpc_count;
u32 max_zcull_per_gpc_count;
@@ -48,6 +50,7 @@ struct nvgpu_gr_config {
u32 zcb_count;
u32 pe_count_per_gpc;
u32 sm_count_per_tpc;
u32 *gpc_ppc_count;
u32 *gpc_tpc_count;
@@ -87,6 +90,7 @@ u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_sm_count_per_tpc(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config,
u32 gpc_index);