diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index e27fbca37..a3106f8cb 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -562,18 +563,18 @@ static struct tsg_gk20a *gk20a_tsg_acquire_unused_tsg(struct fifo_gk20a *f) int gk20a_tsg_open_common(struct gk20a *g, struct tsg_gk20a *tsg) { + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr.config); int err; /* we need to allocate this after g->ops.gr.init_fs_state() since - * we initialize gr->no_of_sm in this function + * we initialize gr.config->no_of_sm in this function */ - if (g->gr.no_of_sm == 0U) { - nvgpu_err(g, "no_of_sm %d not set, failed allocation", - g->gr.no_of_sm); + if (no_of_sm == 0U) { + nvgpu_err(g, "no_of_sm %d not set, failed allocation", no_of_sm); return -EINVAL; } - err = gk20a_tsg_alloc_sm_error_states_mem(g, tsg, g->gr.no_of_sm); + err = gk20a_tsg_alloc_sm_error_states_mem(g, tsg, no_of_sm); if (err != 0) { return err; } diff --git a/drivers/gpu/nvgpu/common/gr/config/gr_config.c b/drivers/gpu/nvgpu/common/gr/config/gr_config.c index 34fa642c5..763063476 100644 --- a/drivers/gpu/nvgpu/common/gr/config/gr_config.c +++ b/drivers/gpu/nvgpu/common/gr/config/gr_config.c @@ -542,3 +542,8 @@ u32 nvgpu_gr_config_get_gpc_mask(struct nvgpu_gr_config *config) { return config->gpc_mask; } + +u32 nvgpu_gr_config_get_no_of_sm(struct nvgpu_gr_config *config) +{ + return config->no_of_sm; +} diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index 8664e1db8..6703e3845 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -108,12 +108,13 @@ int nvgpu_gr_init_fs_state(struct gk20a *g) } /* Is table empty ? */ - if (g->gr.no_of_sm == 0U) { + if (nvgpu_gr_config_get_no_of_sm(gr_config) == 0U) { return -EINVAL; } } - for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { + for (sm_id = 0; sm_id < nvgpu_gr_config_get_no_of_sm(gr_config); + sm_id++) { tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c index c83f152c4..30c11edd1 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -1061,10 +1061,10 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g, { struct nvgpu_tsg_sm_error_state *sm_error_states; struct tsg_gk20a *tsg; + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr.config); - if (info->sm_id >= g->gr.no_of_sm) { - nvgpu_err(g, "invalid smd_id %d / %d", - info->sm_id, g->gr.no_of_sm); + if (info->sm_id >= no_of_sm) { + nvgpu_err(g, "invalid smd_id %d / %d", info->sm_id, no_of_sm); return; } @@ -1136,7 +1136,7 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g) return -EINVAL; } - gr->no_of_sm = p->num_sm; + gr->config->no_of_sm = p->num_sm; for (sm_id = 0; sm_id < p->num_sm; sm_id++, entry++) { sm_info = &gr->sm_to_cluster[sm_id]; sm_info->tpc_index = entry->tpc_index; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index b6bf5929d..3c492f101 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1987,7 +1987,7 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) (size_t)sm_per_tpc * sizeof(struct sm_info)); } - gr->no_of_sm = 0; + gr->config->no_of_sm = 0; nvgpu_log_info(g, "fbps: %d", gr->num_fbps); nvgpu_log_info(g, "max_fbps_count: %d", gr->max_fbps_count); @@ -5581,12 +5581,14 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, int err; u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr.config); - ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops)); + ops = nvgpu_kcalloc(g, no_of_sm, sizeof(*ops)); if (ops == NULL) { return -ENOMEM; } - for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { + + for (sm_id = 0; sm_id < no_of_sm; sm_id++) { u32 gpc, tpc; u32 tpc_offset, gpc_offset, reg_offset, reg_mask, reg_val; @@ -5779,6 +5781,7 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state) struct gr_gk20a *gr = &g->gr; u32 gpc, tpc, sm, sm_id; u32 global_mask; + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(gr->config); /* Wait for the SMs to reach full stop. This condition is: * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE) @@ -5788,7 +5791,7 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state) global_mask = g->ops.gr.get_sm_no_lock_down_hww_global_esr_mask(g); /* Lock down all SMs */ - for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { + for (sm_id = 0; sm_id < no_of_sm; sm_id++) { gpc = g->gr.sm_to_cluster[sm_id].gpc_index; tpc = g->gr.sm_to_cluster[sm_id].tpc_index; @@ -5867,8 +5870,9 @@ u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g) u32 offset, regval, tpc_offset, gpc_offset; u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(gr->config); - for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { + for (sm_id = 0; sm_id < no_of_sm; sm_id++) { tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index; gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index e9f75bef6..4ad7e3f98 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -208,7 +208,6 @@ struct gr_gk20a { u32 fbp_en_mask; u32 *fbp_rop_l2_en_mask; - u32 no_of_sm; struct sm_info *sm_to_cluster; #if defined(CONFIG_GK20A_CYCLE_STATS) diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 3ed1c1dfd..f3612467f 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -954,12 +954,13 @@ void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state) u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(gr->config); /* for maxwell & kepler */ u32 numSmPerTpc = 1; u32 numWarpPerTpc = g->params.sm_arch_warp_count * numSmPerTpc; - for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { + for (sm_id = 0; sm_id < no_of_sm; sm_id++) { gpc = g->gr.sm_to_cluster[sm_id].gpc_index; tpc = g->gr.sm_to_cluster[sm_id].tpc_index; @@ -1005,7 +1006,7 @@ void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state) /* Only for debug purpose */ - for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { + for (sm_id = 0; sm_id < no_of_sm; sm_id++) { nvgpu_log_fn(g, "w_state[%d].valid_warps[0]: %llx\n", sm_id, w_state[sm_id].valid_warps[0]); nvgpu_log_fn(g, "w_state[%d].valid_warps[1]: %llx\n", diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 8ee174536..8899ad750 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2398,8 +2398,9 @@ void gv11b_gr_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state) u32 gpc, tpc, sm, sm_id; u32 offset; u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(gr->config); - for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { + for (sm_id = 0; sm_id < no_of_sm; sm_id++) { gpc = g->gr.sm_to_cluster[sm_id].gpc_index; tpc = g->gr.sm_to_cluster[sm_id].tpc_index; sm = g->gr.sm_to_cluster[sm_id].sm_index; @@ -2439,7 +2440,7 @@ void gv11b_gr_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state) /* Only for debug purpose */ - for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { + for (sm_id = 0; sm_id < no_of_sm; sm_id++) { nvgpu_log_fn(g, "w_state[%d].valid_warps[0]: %llx\n", sm_id, w_state[sm_id].valid_warps[0]); nvgpu_log_fn(g, "w_state[%d].valid_warps[1]: %llx\n", @@ -2462,13 +2463,14 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g, { struct nvgpu_dbg_reg_op *ops; unsigned int i = 0, sm_id; + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr.config); int err; - ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops)); + ops = nvgpu_kcalloc(g, no_of_sm, sizeof(*ops)); if (ops == NULL) { return -ENOMEM; } - for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { + for (sm_id = 0; sm_id < no_of_sm; sm_id++) { u32 gpc, tpc, sm; u32 reg_offset, reg_mask, reg_val; diff --git a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c index f5dd8a0a2..d04b6c5ed 100644 --- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c @@ -48,7 +48,7 @@ int gm20b_gr_config_init_sm_id_table(struct gk20a *g) } } } - g->gr.no_of_sm = sm_id; + g->gr.config->no_of_sm = sm_id; return 0; } diff --git a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c index 048b1301b..e1eeb3ef6 100644 --- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c +++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c @@ -189,6 +189,10 @@ int gv100_gr_config_init_sm_id_table(struct gk20a *g) unsigned long *gpc_tpc_mask; u32 *tpc_table, *gpc_table; + if (g->gr.config == NULL) { + return -ENOMEM; + } + gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) * sizeof(u32)); tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) * @@ -256,8 +260,8 @@ int gv100_gr_config_init_sm_id_table(struct gk20a *g) } } - g->gr.no_of_sm = num_sm; - nvgpu_log_info(g, " total number of sm = %d", g->gr.no_of_sm); + g->gr.config->no_of_sm = num_sm; + nvgpu_log_info(g, " total number of sm = %d", g->gr.config->no_of_sm); exit_build_table: nvgpu_kfree(g, gpc_table); nvgpu_kfree(g, tpc_table); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c index 2f9a3c596..aa27f12df 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c @@ -109,6 +109,7 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, u32 tpc_index, gpc_index, tpc_id; u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr.config); /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ for (i = 0U; @@ -125,7 +126,7 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, tpc_id = (i << 2) + j; sm_id = tpc_id * sm_per_tpc; - if (sm_id >= g->gr.no_of_sm) { + if (sm_id >= no_of_sm) { break; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/config.h b/drivers/gpu/nvgpu/include/nvgpu/gr/config.h index 2d8047a91..06ef0b866 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/config.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/config.h @@ -55,6 +55,8 @@ struct nvgpu_gr_config { u8 *map_tiles; u32 map_tile_count; u32 map_row_offset; + + u32 no_of_sm; }; struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g); @@ -94,5 +96,6 @@ u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config, u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index); u32 nvgpu_gr_config_get_gpc_mask(struct nvgpu_gr_config *config); +u32 nvgpu_gr_config_get_no_of_sm(struct nvgpu_gr_config *config); #endif /* NVGPU_GR_CONFIG_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index bc375f84e..6e9044872 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -684,7 +684,7 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, int err; struct warpstate *ioctl_w_state; struct nvgpu_warpstate *w_state = NULL; - u32 sm_count, ioctl_size, size, sm_id; + u32 sm_count, ioctl_size, size, sm_id, no_of_sm; sm_count = nvgpu_gr_config_get_gpc_count(g->gr.config) * nvgpu_gr_config_get_tpc_count(g->gr.config); @@ -708,7 +708,9 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, nvgpu_mutex_acquire(&g->dbg_sessions_lock); g->ops.gr.wait_for_pause(g, w_state); - for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { + no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr.config); + + for (sm_id = 0; sm_id < no_of_sm; sm_id++) { ioctl_w_state[sm_id].valid_warps[0] = w_state[sm_id].valid_warps[0]; ioctl_w_state[sm_id].valid_warps[1] = @@ -791,7 +793,7 @@ static int gk20a_ctrl_get_num_vsms(struct gk20a *g, struct nvgpu_gpu_num_vsms *args) { struct gr_gk20a *gr = &g->gr; - args->num_vsms = gr->no_of_sm; + args->num_vsms = nvgpu_gr_config_get_no_of_sm(gr->config); return 0; } @@ -800,8 +802,9 @@ static int gk20a_ctrl_vsm_mapping(struct gk20a *g, { int err = 0; struct gr_gk20a *gr = &g->gr; - size_t write_size = gr->no_of_sm * - sizeof(struct nvgpu_gpu_vsms_mapping_entry); + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(gr->config); + size_t write_size = no_of_sm * + sizeof(struct nvgpu_gpu_vsms_mapping_entry); struct nvgpu_gpu_vsms_mapping_entry *vsms_buf; u32 i; @@ -809,7 +812,7 @@ static int gk20a_ctrl_vsm_mapping(struct gk20a *g, if (vsms_buf == NULL) return -ENOMEM; - for (i = 0; i < gr->no_of_sm; i++) { + for (i = 0; i < no_of_sm; i++) { vsms_buf[i].gpc_index = gr->sm_to_cluster[i].gpc_index; if (g->ops.gr.get_nonpes_aware_tpc) vsms_buf[i].tpc_index = diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index fc0b39690..5823c8be9 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -284,7 +285,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( } sm_id = args->sm_id; - if (sm_id >= gr->no_of_sm) { + if (sm_id >= nvgpu_gr_config_get_no_of_sm(gr->config)) { return -EINVAL; } @@ -1460,7 +1461,7 @@ static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state( } sm_id = args->sm_id; - if (sm_id >= gr->no_of_sm) + if (sm_id >= nvgpu_gr_config_get_no_of_sm(gr->config)) return -EINVAL; nvgpu_speculation_barrier(); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c index 602f02b77..7b65b6d89 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c @@ -564,7 +564,7 @@ static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g, int err = 0; sm_id = args->sm_id; - if (sm_id >= gr->no_of_sm) + if (sm_id >= nvgpu_gr_config_get_no_of_sm(gr->config)) return -EINVAL; nvgpu_speculation_barrier();