diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 27854644d..7327e3214 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -88,6 +88,8 @@ nvgpu-y += common/bus/bus_gk20a.o \ common/gr/global_ctx.o \ common/gr/ctx.o \ common/gr/subctx.o \ + common/gr/config/gr_config.o \ + common/gr/config/gr_config_gm20b.o \ common/netlist/netlist.o \ common/netlist/netlist_sim.o \ common/netlist/netlist_gm20b.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index b66dfa033..3c2aeb104 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -130,6 +130,8 @@ srcs += common/sim.c \ common/gr/global_ctx.c \ common/gr/subctx.c \ common/gr/ctx.c \ + common/gr/config/gr_config.c \ + common/gr/config/gr_config_gm20b.c \ common/netlist/netlist.c \ common/netlist/netlist_sim.c \ common/netlist/netlist_gm20b.c \ diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c index 0ae4336ba..ab5895898 100644 --- a/drivers/gpu/nvgpu/common/ecc.c +++ b/drivers/gpu/nvgpu/common/ecc.c @@ -21,6 +21,7 @@ */ #include +#include static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat) { @@ -44,16 +45,17 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, { struct gr_gk20a *gr = &g->gr; struct nvgpu_ecc_stat **stats; + u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr->config); u32 gpc, tpc; int err = 0; - stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count); + stats = nvgpu_kzalloc(g, sizeof(*stats) * gpc_count); if (stats == NULL) { return -ENOMEM; } - for (gpc = 0; gpc < gr->gpc_count; gpc++) { - stats[gpc] = nvgpu_kzalloc(g, - sizeof(*stats[gpc]) * gr->gpc_tpc_count[gpc]); + for (gpc = 0; gpc < gpc_count; gpc++) { + stats[gpc] = nvgpu_kzalloc(g, sizeof(*stats[gpc]) * + nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc)); if (stats[gpc] == NULL) { err = -ENOMEM; break; @@ -69,8 +71,10 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, return err; } - for (gpc = 0; gpc < gr->gpc_count; gpc++) { - for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) { + for (gpc = 0; gpc < gpc_count; gpc++) { + for (tpc = 0; + tpc < nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc); + tpc++) { (void) snprintf(stats[gpc][tpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE, "gpc%d_tpc%d_%s", gpc, tpc, name); @@ -87,13 +91,14 @@ int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, { struct gr_gk20a *gr = &g->gr; struct nvgpu_ecc_stat *stats; + u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr->config); u32 gpc; - stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count); + stats = nvgpu_kzalloc(g, sizeof(*stats) * gpc_count); if (stats == NULL) { return -ENOMEM; } - for (gpc = 0; gpc < gr->gpc_count; gpc++) { + for (gpc = 0; gpc < gpc_count; gpc++) { (void) snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE, "gpc%d_%s", gpc, name); nvgpu_ecc_stat_add(g, &stats[gpc]); @@ -189,9 +194,10 @@ void nvgpu_ecc_free(struct gk20a *g) { struct nvgpu_ecc *ecc = &g->ecc; struct gr_gk20a *gr = &g->gr; + u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr->config); u32 i; - for (i = 0; i < gr->gpc_count; i++) { + for (i = 0; i < gpc_count; i++) { if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) { nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]); } diff --git a/drivers/gpu/nvgpu/common/gr/config/gr_config.c b/drivers/gpu/nvgpu/common/gr/config/gr_config.c new file mode 100644 index 000000000..632512722 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/config/gr_config.c @@ -0,0 +1,540 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include +#include + +struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g) +{ + struct nvgpu_gr_config *config; + u32 gpc_index, pes_index; + u32 pes_tpc_mask; + u32 pes_tpc_count; + u32 pes_heavy_index; + u32 gpc_new_skip_mask; + u32 tmp; + + config = nvgpu_kzalloc(g, sizeof(*config)); + if (config == NULL) { + return NULL;; + } + + tmp = nvgpu_readl(g, top_num_gpcs_r()); + config->max_gpc_count = top_num_gpcs_value_v(tmp); + + tmp = nvgpu_readl(g, top_tpc_per_gpc_r()); + config->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp); + + config->max_tpc_count = config->max_gpc_count * + config->max_tpc_per_gpc_count; + + tmp = nvgpu_readl(g, pri_ringmaster_enum_gpc_r()); + config->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp); + if (config->gpc_count == 0U) { + nvgpu_err(g, "gpc_count==0!"); + goto clean_up; + } + + config->pe_count_per_gpc = nvgpu_get_litter_value(g, + GPU_LIT_NUM_PES_PER_GPC); + if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) { + nvgpu_err(g, "too many pes per gpc"); + goto clean_up; + } + + config->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, + GPU_LIT_NUM_ZCULL_BANKS); + + config->gpc_tpc_count = nvgpu_kzalloc(g, config->gpc_count * + sizeof(u32)); + config->gpc_tpc_mask = nvgpu_kzalloc(g, config->max_gpc_count * + sizeof(u32)); + config->gpc_zcb_count = nvgpu_kzalloc(g, config->gpc_count * + sizeof(u32)); + config->gpc_ppc_count = nvgpu_kzalloc(g, config->gpc_count * + sizeof(u32)); + config->gpc_skip_mask = nvgpu_kzalloc(g, + (size_t)g->ops.gr.config.get_pd_dist_skip_table_size() * + (size_t)4 * sizeof(u32)); + + if ((config->gpc_tpc_count == NULL) || (config->gpc_tpc_mask == NULL) || + (config->gpc_zcb_count == NULL) || (config->gpc_ppc_count == NULL) || + (config->gpc_skip_mask == NULL)) { + goto clean_up; + } + + for (gpc_index = 0; gpc_index < config->max_gpc_count; gpc_index++) { + if (g->ops.gr.config.get_gpc_tpc_mask != NULL) { + config->gpc_tpc_mask[gpc_index] = + g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_index); + } + } + + for (pes_index = 0; pes_index < config->pe_count_per_gpc; pes_index++) { + config->pes_tpc_count[pes_index] = nvgpu_kzalloc(g, + config->gpc_count * sizeof(u32)); + config->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g, + config->gpc_count * sizeof(u32)); + if ((config->pes_tpc_count[pes_index] == NULL) || + (config->pes_tpc_mask[pes_index] == NULL)) { + goto clean_up; + } + } + + config->ppc_count = 0; + config->tpc_count = 0; + config->zcb_count = 0; + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + config->gpc_tpc_count[gpc_index] = + g->ops.gr.config.get_tpc_count_in_gpc(g, config, + gpc_index); + config->tpc_count += config->gpc_tpc_count[gpc_index]; + + config->gpc_zcb_count[gpc_index] = + g->ops.gr.config.get_zcull_count_in_gpc(g, config, + gpc_index); + config->zcb_count += config->gpc_zcb_count[gpc_index]; + + for (pes_index = 0; pes_index < config->pe_count_per_gpc; + pes_index++) { + pes_tpc_mask = g->ops.gr.config.get_pes_tpc_mask(g, + config, gpc_index, pes_index); + pes_tpc_count = hweight32(pes_tpc_mask); + + /* detect PES presence by seeing if there are + * TPCs connected to it. + */ + if (pes_tpc_count != 0U) { + config->gpc_ppc_count[gpc_index]++; + } + + config->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count; + config->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask; + } + + config->ppc_count += config->gpc_ppc_count[gpc_index]; + + gpc_new_skip_mask = 0; + if (config->pe_count_per_gpc > 1U && + config->pes_tpc_count[0][gpc_index] + + config->pes_tpc_count[1][gpc_index] == 5U) { + pes_heavy_index = + config->pes_tpc_count[0][gpc_index] > + config->pes_tpc_count[1][gpc_index] ? 0U : 1U; + + gpc_new_skip_mask = + config->pes_tpc_mask[pes_heavy_index][gpc_index] ^ + (config->pes_tpc_mask[pes_heavy_index][gpc_index] & + (config->pes_tpc_mask[pes_heavy_index][gpc_index] - 1U)); + + } else if (config->pe_count_per_gpc > 1U && + (config->pes_tpc_count[0][gpc_index] + + config->pes_tpc_count[1][gpc_index] == 4U) && + (config->pes_tpc_count[0][gpc_index] != + config->pes_tpc_count[1][gpc_index])) { + pes_heavy_index = + config->pes_tpc_count[0][gpc_index] > + config->pes_tpc_count[1][gpc_index] ? 0U : 1U; + + gpc_new_skip_mask = + config->pes_tpc_mask[pes_heavy_index][gpc_index] ^ + (config->pes_tpc_mask[pes_heavy_index][gpc_index] & + (config->pes_tpc_mask[pes_heavy_index][gpc_index] - 1U)); + } + config->gpc_skip_mask[gpc_index] = gpc_new_skip_mask; + } + + nvgpu_log_info(g, "max_gpc_count: %d", config->max_gpc_count); + nvgpu_log_info(g, "max_tpc_per_gpc_count: %d", config->max_tpc_per_gpc_count); + nvgpu_log_info(g, "max_zcull_per_gpc_count: %d", config->max_zcull_per_gpc_count); + nvgpu_log_info(g, "max_tpc_count: %d", config->max_tpc_count); + nvgpu_log_info(g, "gpc_count: %d", config->gpc_count); + nvgpu_log_info(g, "pe_count_per_gpc: %d", config->pe_count_per_gpc); + nvgpu_log_info(g, "tpc_count: %d", config->tpc_count); + nvgpu_log_info(g, "ppc_count: %d", config->ppc_count); + + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + nvgpu_log_info(g, "gpc_tpc_count[%d] : %d", + gpc_index, config->gpc_tpc_count[gpc_index]); + } + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + nvgpu_log_info(g, "gpc_zcb_count[%d] : %d", + gpc_index, config->gpc_zcb_count[gpc_index]); + } + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + nvgpu_log_info(g, "gpc_ppc_count[%d] : %d", + gpc_index, config->gpc_ppc_count[gpc_index]); + } + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + nvgpu_log_info(g, "gpc_skip_mask[%d] : %d", + gpc_index, config->gpc_skip_mask[gpc_index]); + } + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + for (pes_index = 0; + pes_index < config->pe_count_per_gpc; + pes_index++) { + nvgpu_log_info(g, "pes_tpc_count[%d][%d] : %d", + pes_index, gpc_index, + config->pes_tpc_count[pes_index][gpc_index]); + } + } + + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + for (pes_index = 0; + pes_index < config->pe_count_per_gpc; + pes_index++) { + nvgpu_log_info(g, "pes_tpc_mask[%d][%d] : %d", + pes_index, gpc_index, + config->pes_tpc_mask[pes_index][gpc_index]); + } + } + + return config; + +clean_up: + nvgpu_kfree(g, config); + return NULL; +} + +static u32 prime_set[18] = { + 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 }; + +/* + * Return map tiles count for given index + * Return 0 if index is out-of-bounds + */ +u32 nvgpu_gr_config_get_map_tile_count(struct nvgpu_gr_config *config, u32 index) +{ + if (index >= config->map_tile_count) { + return 0; + } + + return config->map_tiles[index]; +} + +u32 nvgpu_gr_config_get_map_row_offset(struct nvgpu_gr_config *config) +{ + return config->map_row_offset; +} + +int nvgpu_gr_config_init_map_tiles(struct gk20a *g, + struct nvgpu_gr_config *config) +{ + s32 comm_denom; + s32 mul_factor; + s32 *init_frac = NULL; + s32 *init_err = NULL; + s32 *run_err = NULL; + u32 *sorted_num_tpcs = NULL; + u32 *sorted_to_unsorted_gpc_map = NULL; + u32 gpc_index; + u32 gpc_mark = 0; + u32 num_tpc; + u32 max_tpc_count = 0; + u32 swap; + u32 tile_count; + u32 index; + bool delete_map = false; + bool gpc_sorted; + int ret = 0; + u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); + u32 map_tile_count = num_gpcs * num_tpc_per_gpc; + + init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32)); + init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32)); + run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32)); + sorted_num_tpcs = + nvgpu_kzalloc(g, (size_t)num_gpcs * + (size_t)num_tpc_per_gpc * + sizeof(s32)); + sorted_to_unsorted_gpc_map = + nvgpu_kzalloc(g, (size_t)num_gpcs * sizeof(s32)); + + if (!((init_frac != NULL) && + (init_err != NULL) && + (run_err != NULL) && + (sorted_num_tpcs != NULL) && + (sorted_to_unsorted_gpc_map != NULL))) { + ret = -ENOMEM; + goto clean_up; + } + + config->map_row_offset = 0xFFFFFFFFU; + + if (config->tpc_count == 3U) { + config->map_row_offset = 2; + } else if (config->tpc_count < 3U) { + config->map_row_offset = 1; + } else { + config->map_row_offset = 3; + + for (index = 1U; index < 18U; index++) { + u32 prime = prime_set[index]; + if ((config->tpc_count % prime) != 0U) { + config->map_row_offset = prime; + break; + } + } + } + + switch (config->tpc_count) { + case 15: + config->map_row_offset = 6; + break; + case 14: + config->map_row_offset = 5; + break; + case 13: + config->map_row_offset = 2; + break; + case 11: + config->map_row_offset = 7; + break; + case 10: + config->map_row_offset = 6; + break; + case 7: + case 5: + config->map_row_offset = 1; + break; + default: + break; + } + + if (config->map_tiles != NULL) { + if (config->map_tile_count != config->tpc_count) { + delete_map = true; + } + + for (tile_count = 0; tile_count < config->map_tile_count; tile_count++) { + if (nvgpu_gr_config_get_map_tile_count(config, tile_count) + >= config->tpc_count) { + delete_map = true; + } + } + + if (delete_map) { + nvgpu_kfree(g, config->map_tiles); + config->map_tiles = NULL; + config->map_tile_count = 0; + } + } + + if (config->map_tiles == NULL) { + config->map_tiles = nvgpu_kzalloc(g, map_tile_count * sizeof(u8)); + if (config->map_tiles == NULL) { + ret = -ENOMEM; + goto clean_up; + } + config->map_tile_count = map_tile_count; + + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + sorted_num_tpcs[gpc_index] = config->gpc_tpc_count[gpc_index]; + sorted_to_unsorted_gpc_map[gpc_index] = gpc_index; + } + + gpc_sorted = false; + while (!gpc_sorted) { + gpc_sorted = true; + for (gpc_index = 0U; gpc_index < config->gpc_count - 1U; gpc_index++) { + if (sorted_num_tpcs[gpc_index + 1U] > sorted_num_tpcs[gpc_index]) { + gpc_sorted = false; + swap = sorted_num_tpcs[gpc_index]; + sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1U]; + sorted_num_tpcs[gpc_index + 1U] = swap; + swap = sorted_to_unsorted_gpc_map[gpc_index]; + sorted_to_unsorted_gpc_map[gpc_index] = + sorted_to_unsorted_gpc_map[gpc_index + 1U]; + sorted_to_unsorted_gpc_map[gpc_index + 1U] = swap; + } + } + } + + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + if (config->gpc_tpc_count[gpc_index] > max_tpc_count) { + max_tpc_count = config->gpc_tpc_count[gpc_index]; + } + } + + mul_factor = S32(config->gpc_count) * S32(max_tpc_count); + if ((U32(mul_factor) & 0x1U) != 0U) { + mul_factor = 2; + } else { + mul_factor = 1; + } + + comm_denom = S32(config->gpc_count) * S32(max_tpc_count) * mul_factor; + + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + num_tpc = sorted_num_tpcs[gpc_index]; + + init_frac[gpc_index] = S32(num_tpc) * S32(config->gpc_count) * mul_factor; + + if (num_tpc != 0U) { + init_err[gpc_index] = S32(gpc_index) * S32(max_tpc_count) * mul_factor - comm_denom/2; + } else { + init_err[gpc_index] = 0; + } + + run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index]; + } + + while (gpc_mark < config->tpc_count) { + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + if ((run_err[gpc_index] * 2) >= comm_denom) { + config->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index]; + run_err[gpc_index] += init_frac[gpc_index] - comm_denom; + } else { + run_err[gpc_index] += init_frac[gpc_index]; + } + } + } + } + +clean_up: + nvgpu_kfree(g, init_frac); + nvgpu_kfree(g, init_err); + nvgpu_kfree(g, run_err); + nvgpu_kfree(g, sorted_num_tpcs); + nvgpu_kfree(g, sorted_to_unsorted_gpc_map); + + if (ret != 0) { + nvgpu_err(g, "fail"); + } else { + nvgpu_log_fn(g, "done"); + } + + return ret; +} + +void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config) +{ + u32 index; + + nvgpu_kfree(g, config->gpc_tpc_count); + nvgpu_kfree(g, config->gpc_zcb_count); + nvgpu_kfree(g, config->gpc_ppc_count); + nvgpu_kfree(g, config->gpc_skip_mask); + nvgpu_kfree(g, config->gpc_tpc_mask); + nvgpu_kfree(g, config->map_tiles); + for (index = 0U; index < config->pe_count_per_gpc; + index++) { + nvgpu_kfree(g, config->pes_tpc_count[index]); + nvgpu_kfree(g, config->pes_tpc_mask[index]); + } + +} + +u32 nvgpu_gr_config_get_max_gpc_count(struct nvgpu_gr_config *config) +{ + return config->max_gpc_count; +} + +u32 nvgpu_gr_config_get_max_tpc_per_gpc_count(struct nvgpu_gr_config *config) +{ + return config->max_tpc_per_gpc_count; +} + +u32 nvgpu_gr_config_get_max_zcull_per_gpc_count(struct nvgpu_gr_config *config) +{ + return config->max_zcull_per_gpc_count; +} + +u32 nvgpu_gr_config_get_max_tpc_count(struct nvgpu_gr_config *config) +{ + return config->max_tpc_count; +} + +u32 nvgpu_gr_config_get_gpc_count(struct nvgpu_gr_config *config) +{ + return config->gpc_count; +} + +u32 nvgpu_gr_config_get_tpc_count(struct nvgpu_gr_config *config) +{ + return config->tpc_count; +} + +u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config) +{ + return config->ppc_count; +} + +u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config) +{ + return config->zcb_count; +} + +u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config) +{ + return config->pe_count_per_gpc; +} + +u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config, + u32 gpc_index) +{ + return config->gpc_ppc_count[gpc_index]; +} + +u32 nvgpu_gr_config_get_gpc_tpc_count(struct nvgpu_gr_config *config, + u32 gpc_index) +{ + if (gpc_index >= config->gpc_count) { + return 0; + } + return config->gpc_tpc_count[gpc_index]; +} + +u32 nvgpu_gr_config_get_gpc_zcb_count(struct nvgpu_gr_config *config, + u32 gpc_index) +{ + return config->gpc_zcb_count[gpc_index]; +} + +u32 nvgpu_gr_config_get_pes_tpc_count(struct nvgpu_gr_config *config, + u32 gpc_index, u32 pes_index) +{ + return config->pes_tpc_count[pes_index][gpc_index]; +} + +u32 nvgpu_gr_config_get_gpc_tpc_mask(struct nvgpu_gr_config *config, + u32 gpc_index) +{ + return config->gpc_tpc_mask[gpc_index]; +} + +u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config, + u32 gpc_index) +{ + return config->gpc_skip_mask[gpc_index]; +} + +u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config, + u32 gpc_index, u32 pes_index) +{ + return config->pes_tpc_mask[pes_index][gpc_index]; +} diff --git a/drivers/gpu/nvgpu/common/gr/config/gr_config_gm20b.c b/drivers/gpu/nvgpu/common/gr/config/gr_config_gm20b.c new file mode 100644 index 000000000..c176a3c53 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/config/gr_config_gm20b.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "gr_config_gm20b.h" + +#include + +u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g, + struct nvgpu_gr_config *config, u32 gpc_index) +{ + u32 val; + + /* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */ + val = g->ops.fuse.fuse_status_opt_tpc_gpc(g, gpc_index); + + return (~val) & (BIT32(config->max_tpc_per_gpc_count) - 1U); +} + +u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g, + struct nvgpu_gr_config *config, u32 gpc_index) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tmp; + + tmp = nvgpu_readl(g, gr_gpc0_fs_gpc_r() + gpc_stride * gpc_index); + + return gr_gpc0_fs_gpc_num_available_tpcs_v(tmp); +} + +u32 gm20b_gr_config_get_zcull_count_in_gpc(struct gk20a *g, + struct nvgpu_gr_config *config, u32 gpc_index) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tmp; + + tmp = nvgpu_readl(g, gr_gpc0_fs_gpc_r() + gpc_stride * gpc_index); + + return gr_gpc0_fs_gpc_num_available_zculls_v(tmp); +} + +u32 gm20b_gr_config_get_pes_tpc_mask(struct gk20a *g, + struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tmp; + + tmp = nvgpu_readl(g, gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + + gpc_index * gpc_stride); + + return gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp); +} + +u32 gm20b_gr_config_get_pd_dist_skip_table_size(void) +{ + return gr_pd_dist_skip_table__size_1_v(); +} diff --git a/drivers/gpu/nvgpu/common/gr/config/gr_config_gm20b.h b/drivers/gpu/nvgpu/common/gr/config/gr_config_gm20b.h new file mode 100644 index 000000000..591e1af6c --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/config/gr_config_gm20b.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_CONFIG_GM20B_H +#define NVGPU_GR_CONFIG_GM20B_H + +#include + +struct gk20a; +struct nvgpu_gr_config; + +u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g, + struct nvgpu_gr_config *config, u32 gpc_index); +u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g, + struct nvgpu_gr_config *config, u32 gpc_index); +u32 gm20b_gr_config_get_zcull_count_in_gpc(struct gk20a *g, + struct nvgpu_gr_config *config, u32 gpc_index); +u32 gm20b_gr_config_get_pes_tpc_mask(struct gk20a *g, + struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index); +u32 gm20b_gr_config_get_pd_dist_skip_table_size(void); + +#endif /* NVGPU_GR_CONFIG_GM20B_H */ diff --git a/drivers/gpu/nvgpu/common/priv_ring/priv_ring_gm20b.c b/drivers/gpu/nvgpu/common/priv_ring/priv_ring_gm20b.c index 111049096..3e9e7434c 100644 --- a/drivers/gpu/nvgpu/common/priv_ring/priv_ring_gm20b.c +++ b/drivers/gpu/nvgpu/common/priv_ring/priv_ring_gm20b.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "priv_ring_gm20b.h" @@ -80,7 +81,7 @@ void gm20b_priv_ring_isr(struct gk20a *g) gk20a_readl(g, pri_ringstation_sys_priv_error_code_r())); } - for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { if ((status1 & BIT32(gpc)) != 0U) { nvgpu_log(g, gpu_dbg_intr, "GPC%u write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x", gpc, gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_adr_r() + gpc * gpc_priv_stride), diff --git a/drivers/gpu/nvgpu/common/priv_ring/priv_ring_gp10b.c b/drivers/gpu/nvgpu/common/priv_ring/priv_ring_gp10b.c index 44d97a5dc..b8acaaba2 100644 --- a/drivers/gpu/nvgpu/common/priv_ring/priv_ring_gp10b.c +++ b/drivers/gpu/nvgpu/common/priv_ring/priv_ring_gp10b.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -157,7 +158,7 @@ void gp10b_priv_ring_isr(struct gk20a *g) if (status1 != 0U) { gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_PRIV_STRIDE); - for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { offset = gpc * gpc_stride; if ((status1 & BIT32(gpc)) != 0U) { error_info = gk20a_readl(g, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 02513bdfb..4e8f82a39 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "gr_gk20a.h" #include "gk20a/fecs_trace_gk20a.h" @@ -819,72 +820,61 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) return 0; } -/* - * Return map tiles count for given index - * Return 0 if index is out-of-bounds - */ -static u32 gr_gk20a_get_map_tile_count(struct gr_gk20a *gr, u32 index) -{ - if (index >= gr->map_tile_count) { - return 0; - } - - return gr->map_tiles[index]; -} - int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) { u32 norm_entries, norm_shift; u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod; u32 map0, map1, map2, map3, map4, map5; - if (gr->map_tiles == NULL) { + if (gr->config->map_tiles == NULL) { return -1; } nvgpu_log_fn(g, " "); gk20a_writel(g, gr_crstr_map_table_cfg_r(), - gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) | - gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count)); + gr_crstr_map_table_cfg_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr->config)) | + gr_crstr_map_table_cfg_num_entries_f( + nvgpu_gr_config_get_tpc_count(gr->config))); - map0 = gr_crstr_gpc_map0_tile0_f(gr_gk20a_get_map_tile_count(gr, 0)) | - gr_crstr_gpc_map0_tile1_f(gr_gk20a_get_map_tile_count(gr, 1)) | - gr_crstr_gpc_map0_tile2_f(gr_gk20a_get_map_tile_count(gr, 2)) | - gr_crstr_gpc_map0_tile3_f(gr_gk20a_get_map_tile_count(gr, 3)) | - gr_crstr_gpc_map0_tile4_f(gr_gk20a_get_map_tile_count(gr, 4)) | - gr_crstr_gpc_map0_tile5_f(gr_gk20a_get_map_tile_count(gr, 5)); + map0 = gr_crstr_gpc_map0_tile0_f(nvgpu_gr_config_get_map_tile_count(gr->config, 0)) | + gr_crstr_gpc_map0_tile1_f(nvgpu_gr_config_get_map_tile_count(gr->config, 1)) | + gr_crstr_gpc_map0_tile2_f(nvgpu_gr_config_get_map_tile_count(gr->config, 2)) | + gr_crstr_gpc_map0_tile3_f(nvgpu_gr_config_get_map_tile_count(gr->config, 3)) | + gr_crstr_gpc_map0_tile4_f(nvgpu_gr_config_get_map_tile_count(gr->config, 4)) | + gr_crstr_gpc_map0_tile5_f(nvgpu_gr_config_get_map_tile_count(gr->config, 5)); - map1 = gr_crstr_gpc_map1_tile6_f(gr_gk20a_get_map_tile_count(gr, 6)) | - gr_crstr_gpc_map1_tile7_f(gr_gk20a_get_map_tile_count(gr, 7)) | - gr_crstr_gpc_map1_tile8_f(gr_gk20a_get_map_tile_count(gr, 8)) | - gr_crstr_gpc_map1_tile9_f(gr_gk20a_get_map_tile_count(gr, 9)) | - gr_crstr_gpc_map1_tile10_f(gr_gk20a_get_map_tile_count(gr, 10)) | - gr_crstr_gpc_map1_tile11_f(gr_gk20a_get_map_tile_count(gr, 11)); + map1 = gr_crstr_gpc_map1_tile6_f(nvgpu_gr_config_get_map_tile_count(gr->config, 6)) | + gr_crstr_gpc_map1_tile7_f(nvgpu_gr_config_get_map_tile_count(gr->config, 7)) | + gr_crstr_gpc_map1_tile8_f(nvgpu_gr_config_get_map_tile_count(gr->config, 8)) | + gr_crstr_gpc_map1_tile9_f(nvgpu_gr_config_get_map_tile_count(gr->config, 9)) | + gr_crstr_gpc_map1_tile10_f(nvgpu_gr_config_get_map_tile_count(gr->config, 10)) | + gr_crstr_gpc_map1_tile11_f(nvgpu_gr_config_get_map_tile_count(gr->config, 11)); - map2 = gr_crstr_gpc_map2_tile12_f(gr_gk20a_get_map_tile_count(gr, 12)) | - gr_crstr_gpc_map2_tile13_f(gr_gk20a_get_map_tile_count(gr, 13)) | - gr_crstr_gpc_map2_tile14_f(gr_gk20a_get_map_tile_count(gr, 14)) | - gr_crstr_gpc_map2_tile15_f(gr_gk20a_get_map_tile_count(gr, 15)) | - gr_crstr_gpc_map2_tile16_f(gr_gk20a_get_map_tile_count(gr, 16)) | - gr_crstr_gpc_map2_tile17_f(gr_gk20a_get_map_tile_count(gr, 17)); + map2 = gr_crstr_gpc_map2_tile12_f(nvgpu_gr_config_get_map_tile_count(gr->config, 12)) | + gr_crstr_gpc_map2_tile13_f(nvgpu_gr_config_get_map_tile_count(gr->config, 13)) | + gr_crstr_gpc_map2_tile14_f(nvgpu_gr_config_get_map_tile_count(gr->config, 14)) | + gr_crstr_gpc_map2_tile15_f(nvgpu_gr_config_get_map_tile_count(gr->config, 15)) | + gr_crstr_gpc_map2_tile16_f(nvgpu_gr_config_get_map_tile_count(gr->config, 16)) | + gr_crstr_gpc_map2_tile17_f(nvgpu_gr_config_get_map_tile_count(gr->config, 17)); - map3 = gr_crstr_gpc_map3_tile18_f(gr_gk20a_get_map_tile_count(gr, 18)) | - gr_crstr_gpc_map3_tile19_f(gr_gk20a_get_map_tile_count(gr, 19)) | - gr_crstr_gpc_map3_tile20_f(gr_gk20a_get_map_tile_count(gr, 20)) | - gr_crstr_gpc_map3_tile21_f(gr_gk20a_get_map_tile_count(gr, 21)) | - gr_crstr_gpc_map3_tile22_f(gr_gk20a_get_map_tile_count(gr, 22)) | - gr_crstr_gpc_map3_tile23_f(gr_gk20a_get_map_tile_count(gr, 23)); + map3 = gr_crstr_gpc_map3_tile18_f(nvgpu_gr_config_get_map_tile_count(gr->config, 18)) | + gr_crstr_gpc_map3_tile19_f(nvgpu_gr_config_get_map_tile_count(gr->config, 19)) | + gr_crstr_gpc_map3_tile20_f(nvgpu_gr_config_get_map_tile_count(gr->config, 20)) | + gr_crstr_gpc_map3_tile21_f(nvgpu_gr_config_get_map_tile_count(gr->config, 21)) | + gr_crstr_gpc_map3_tile22_f(nvgpu_gr_config_get_map_tile_count(gr->config, 22)) | + gr_crstr_gpc_map3_tile23_f(nvgpu_gr_config_get_map_tile_count(gr->config, 23)); - map4 = gr_crstr_gpc_map4_tile24_f(gr_gk20a_get_map_tile_count(gr, 24)) | - gr_crstr_gpc_map4_tile25_f(gr_gk20a_get_map_tile_count(gr, 25)) | - gr_crstr_gpc_map4_tile26_f(gr_gk20a_get_map_tile_count(gr, 26)) | - gr_crstr_gpc_map4_tile27_f(gr_gk20a_get_map_tile_count(gr, 27)) | - gr_crstr_gpc_map4_tile28_f(gr_gk20a_get_map_tile_count(gr, 28)) | - gr_crstr_gpc_map4_tile29_f(gr_gk20a_get_map_tile_count(gr, 29)); + map4 = gr_crstr_gpc_map4_tile24_f(nvgpu_gr_config_get_map_tile_count(gr->config, 24)) | + gr_crstr_gpc_map4_tile25_f(nvgpu_gr_config_get_map_tile_count(gr->config, 25)) | + gr_crstr_gpc_map4_tile26_f(nvgpu_gr_config_get_map_tile_count(gr->config, 26)) | + gr_crstr_gpc_map4_tile27_f(nvgpu_gr_config_get_map_tile_count(gr->config, 27)) | + gr_crstr_gpc_map4_tile28_f(nvgpu_gr_config_get_map_tile_count(gr->config, 28)) | + gr_crstr_gpc_map4_tile29_f(nvgpu_gr_config_get_map_tile_count(gr->config, 29)); - map5 = gr_crstr_gpc_map5_tile30_f(gr_gk20a_get_map_tile_count(gr, 30)) | - gr_crstr_gpc_map5_tile31_f(gr_gk20a_get_map_tile_count(gr, 31)) | + map5 = gr_crstr_gpc_map5_tile30_f(nvgpu_gr_config_get_map_tile_count(gr->config, 30)) | + gr_crstr_gpc_map5_tile31_f(nvgpu_gr_config_get_map_tile_count(gr->config, 31)) | gr_crstr_gpc_map5_tile32_f(0) | gr_crstr_gpc_map5_tile33_f(0) | gr_crstr_gpc_map5_tile34_f(0) | @@ -897,7 +887,7 @@ int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) gk20a_writel(g, gr_crstr_gpc_map4_r(), map4); gk20a_writel(g, gr_crstr_gpc_map5_r(), map5); - switch (gr->tpc_count) { + switch (nvgpu_gr_config_get_tpc_count(gr->config)) { case 1: norm_shift = 4; break; @@ -926,7 +916,7 @@ int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) break; } - norm_entries = gr->tpc_count << norm_shift; + norm_entries = nvgpu_gr_config_get_tpc_count(gr->config) << norm_shift; coeff5_mod = BIT32(5) % norm_entries; coeff6_mod = BIT32(6) % norm_entries; coeff7_mod = BIT32(7) % norm_entries; @@ -936,11 +926,13 @@ int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) coeff11_mod = BIT32(11) % norm_entries; gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(), - gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) | + gr_ppcs_wwdx_map_table_cfg_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr->config)) | gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) | gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) | gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) | - gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count)); + gr_ppcs_wwdx_map_table_cfg_num_entries_f( + nvgpu_gr_config_get_tpc_count(gr->config))); gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(), gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) | @@ -958,8 +950,10 @@ int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5); gk20a_writel(g, gr_rstr2d_map_table_cfg_r(), - gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) | - gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count)); + gr_rstr2d_map_table_cfg_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr->config)) | + gr_rstr2d_map_table_cfg_num_entries_f( + nvgpu_gr_config_get_tpc_count(gr->config))); gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0); gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1); @@ -971,26 +965,17 @@ int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) return 0; } -static inline u32 count_bits(u32 mask) -{ - u32 temp = mask; - u32 count; - for (count = 0U; temp != 0U; count++) { - temp &= temp - 1U; - } - - return count; -} - int gr_gk20a_init_sm_id_table(struct gk20a *g) { u32 gpc, tpc; u32 sm_id = 0; - for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) { - for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { + for (tpc = 0; + tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config); + tpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { - if (tpc < g->gr.gpc_tpc_count[gpc]) { + if (tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) { g->gr.sm_to_cluster[sm_id].tpc_index = tpc; g->gr.sm_to_cluster[sm_id].gpc_index = gpc; g->gr.sm_to_cluster[sm_id].sm_index = 0; @@ -1004,19 +989,6 @@ int gr_gk20a_init_sm_id_table(struct gk20a *g) return 0; } -/* - * Return number of TPCs in a GPC - * Return 0 if GPC index is invalid i.e. GPC is disabled - */ -u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index) -{ - if (gpc_index >= gr->gpc_count) { - return 0; - } - - return gr->gpc_tpc_count[gpc_index]; -} - int gr_gk20a_init_fs_state(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -1057,14 +1029,22 @@ int gr_gk20a_init_fs_state(struct gk20a *g) reg_index++, gpc_id += 8U) { tpc_per_gpc = - gr_pd_num_tpc_per_gpc_count0_f(gr_gk20a_get_tpc_count(gr, gpc_id + 0U)) | - gr_pd_num_tpc_per_gpc_count1_f(gr_gk20a_get_tpc_count(gr, gpc_id + 1U)) | - gr_pd_num_tpc_per_gpc_count2_f(gr_gk20a_get_tpc_count(gr, gpc_id + 2U)) | - gr_pd_num_tpc_per_gpc_count3_f(gr_gk20a_get_tpc_count(gr, gpc_id + 3U)) | - gr_pd_num_tpc_per_gpc_count4_f(gr_gk20a_get_tpc_count(gr, gpc_id + 4U)) | - gr_pd_num_tpc_per_gpc_count5_f(gr_gk20a_get_tpc_count(gr, gpc_id + 5U)) | - gr_pd_num_tpc_per_gpc_count6_f(gr_gk20a_get_tpc_count(gr, gpc_id + 6U)) | - gr_pd_num_tpc_per_gpc_count7_f(gr_gk20a_get_tpc_count(gr, gpc_id + 7U)); + gr_pd_num_tpc_per_gpc_count0_f( + nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id + 0U)) | + gr_pd_num_tpc_per_gpc_count1_f( + nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id + 1U)) | + gr_pd_num_tpc_per_gpc_count2_f( + nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id + 2U)) | + gr_pd_num_tpc_per_gpc_count3_f( + nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id + 3U)) | + gr_pd_num_tpc_per_gpc_count4_f( + nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id + 4U)) | + gr_pd_num_tpc_per_gpc_count5_f( + nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id + 5U)) | + gr_pd_num_tpc_per_gpc_count6_f( + nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id + 6U)) | + gr_pd_num_tpc_per_gpc_count7_f( + nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id + 7U)); gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(reg_index), tpc_per_gpc); gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(reg_index), tpc_per_gpc); @@ -1080,27 +1060,31 @@ int gr_gk20a_init_fs_state(struct gk20a *g) gpc_index < gr_pd_dist_skip_table__size_1_v() * 4U; gpc_index += 4U) { bool skip_mask = - (gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) != 0U) || - (gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1U]) != 0U) || - (gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2U]) != 0U) || - (gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3U]) != 0U); + (gr_pd_dist_skip_table_gpc_4n0_mask_f( + nvgpu_gr_config_get_gpc_skip_mask(gr->config, gpc_index)) != 0U) || + (gr_pd_dist_skip_table_gpc_4n1_mask_f( + nvgpu_gr_config_get_gpc_skip_mask(gr->config, gpc_index + 1U)) != 0U) || + (gr_pd_dist_skip_table_gpc_4n2_mask_f( + nvgpu_gr_config_get_gpc_skip_mask(gr->config, gpc_index + 2U)) != 0U) || + (gr_pd_dist_skip_table_gpc_4n3_mask_f( + nvgpu_gr_config_get_gpc_skip_mask(gr->config, gpc_index + 3U)) != 0U); gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4U), (u32)skip_mask); } - fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0); + fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, gr->config, 0); if ((g->tpc_fs_mask_user != 0U) && - (fuse_tpc_mask == BIT32(gr->max_tpc_count) - 1U)) { + (fuse_tpc_mask == BIT32(nvgpu_gr_config_get_max_tpc_count(gr->config)) - 1U)) { u32 val = g->tpc_fs_mask_user; - val &= BIT32(gr->max_tpc_count) - U32(1); + val &= BIT32(nvgpu_gr_config_get_max_tpc_count(gr->config)) - U32(1); gk20a_writel(g, gr_cwd_fs_r(), - gr_cwd_fs_num_gpcs_f(gr->gpc_count) | + gr_cwd_fs_num_gpcs_f(nvgpu_gr_config_get_gpc_count(gr->config)) | gr_cwd_fs_num_tpcs_f((u32)hweight32(val))); } else { gk20a_writel(g, gr_cwd_fs_r(), - gr_cwd_fs_num_gpcs_f(gr->gpc_count) | - gr_cwd_fs_num_tpcs_f(gr->tpc_count)); + gr_cwd_fs_num_gpcs_f(nvgpu_gr_config_get_gpc_count(gr->config)) | + gr_cwd_fs_num_tpcs_f(nvgpu_gr_config_get_tpc_count(gr->config))); } gk20a_writel(g, gr_bes_zrop_settings_r(), @@ -2433,26 +2417,10 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) (void) memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc)); - nvgpu_kfree(g, gr->gpc_tpc_count); - nvgpu_kfree(g, gr->gpc_zcb_count); - nvgpu_kfree(g, gr->gpc_ppc_count); - nvgpu_kfree(g, gr->pes_tpc_count[0]); - nvgpu_kfree(g, gr->pes_tpc_count[1]); - nvgpu_kfree(g, gr->pes_tpc_mask[0]); - nvgpu_kfree(g, gr->pes_tpc_mask[1]); + nvgpu_gr_config_deinit(g, gr->config); + nvgpu_kfree(g, gr->sm_to_cluster); - nvgpu_kfree(g, gr->gpc_skip_mask); - nvgpu_kfree(g, gr->map_tiles); nvgpu_kfree(g, gr->fbp_rop_l2_en_mask); - gr->gpc_tpc_count = NULL; - gr->gpc_zcb_count = NULL; - gr->gpc_ppc_count = NULL; - gr->pes_tpc_count[0] = NULL; - gr->pes_tpc_count[1] = NULL; - gr->pes_tpc_mask[0] = NULL; - gr->pes_tpc_mask[1] = NULL; - gr->gpc_skip_mask = NULL; - gr->map_tiles = NULL; gr->fbp_rop_l2_en_mask = NULL; nvgpu_netlist_deinit_ctx_vars(g); @@ -2476,21 +2444,17 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) { - u32 gpc_index, pes_index; - u32 pes_tpc_mask; - u32 pes_tpc_count; - u32 pes_heavy_index; - u32 gpc_new_skip_mask; - u32 tmp; - u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + u32 tmp; + + gr->config = nvgpu_gr_config_init(g); + if (gr->config == NULL) { + return -ENOMEM; + } tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r()); gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp); - tmp = gk20a_readl(g, top_num_gpcs_r()); - gr->max_gpc_count = top_num_gpcs_value_v(tmp); - tmp = gk20a_readl(g, top_num_fbps_r()); gr->max_fbps_count = top_num_fbps_value_v(tmp); @@ -2507,226 +2471,24 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) sizeof(u32)); } - tmp = gk20a_readl(g, top_tpc_per_gpc_r()); - gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp); - - gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count; - - tmp = gk20a_readl(g, top_num_fbps_r()); - gr->sys_count = top_num_fbps_value_v(tmp); - - tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r()); - gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp); - - gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); - if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC, - "too many pes per gpc\n")) { - goto clean_up; - } - - gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS); - - if (gr->gpc_count == 0U) { - nvgpu_err(g, "gpc_count==0!"); - goto clean_up; - } - - if (gr->gpc_tpc_count == NULL) { - gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count * - sizeof(u32)); - } else { - (void) memset(gr->gpc_tpc_count, 0, gr->gpc_count * - sizeof(u32)); - } - - if (gr->gpc_tpc_mask == NULL) { - gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->max_gpc_count * - sizeof(u32)); - } else { - (void) memset(gr->gpc_tpc_mask, 0, gr->max_gpc_count * - sizeof(u32)); - } - - if (gr->gpc_zcb_count == NULL) { - gr->gpc_zcb_count = nvgpu_kzalloc(g, gr->gpc_count * - sizeof(u32)); - } else { - (void) memset(gr->gpc_zcb_count, 0, gr->gpc_count * - sizeof(u32)); - } - - if (gr->gpc_ppc_count == NULL) { - gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count * - sizeof(u32)); - } else { - (void) memset(gr->gpc_ppc_count, 0, gr->gpc_count * - sizeof(u32)); - } - - if (gr->gpc_skip_mask == NULL) { - gr->gpc_skip_mask = - nvgpu_kzalloc(g, - (size_t)gr_pd_dist_skip_table__size_1_v() * - (size_t)4 * sizeof(u32)); - } else { - (void) memset(gr->gpc_skip_mask, 0, - (size_t)gr_pd_dist_skip_table__size_1_v() * - (size_t)4 * sizeof(u32)); - } - - if ((gr->gpc_tpc_count == NULL) || (gr->gpc_tpc_mask == NULL) || - (gr->gpc_zcb_count == NULL) || (gr->gpc_ppc_count == NULL) || - (gr->gpc_skip_mask == NULL)) { - goto clean_up; - } - - for (gpc_index = 0; gpc_index < gr->max_gpc_count; gpc_index++) { - if (g->ops.gr.get_gpc_tpc_mask != NULL) { - gr->gpc_tpc_mask[gpc_index] = - g->ops.gr.get_gpc_tpc_mask(g, gpc_index); - } - } - - gr->ppc_count = 0; - gr->tpc_count = 0; - gr->zcb_count = 0; - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r() + - gpc_stride * gpc_index); - - gr->gpc_tpc_count[gpc_index] = - gr_gpc0_fs_gpc_num_available_tpcs_v(tmp); - gr->tpc_count += gr->gpc_tpc_count[gpc_index]; - - gr->gpc_zcb_count[gpc_index] = - gr_gpc0_fs_gpc_num_available_zculls_v(tmp); - gr->zcb_count += gr->gpc_zcb_count[gpc_index]; - - for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) { - if (gr->pes_tpc_count[pes_index] == NULL) { - gr->pes_tpc_count[pes_index] = - nvgpu_kzalloc(g, gr->gpc_count * - sizeof(u32)); - gr->pes_tpc_mask[pes_index] = - nvgpu_kzalloc(g, gr->gpc_count * - sizeof(u32)); - if ((gr->pes_tpc_count[pes_index] == NULL) || - (gr->pes_tpc_mask[pes_index] == NULL)) { - goto clean_up; - } - } - - tmp = gk20a_readl(g, - gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + - gpc_index * gpc_stride); - - pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp); - pes_tpc_count = count_bits(pes_tpc_mask); - - /* detect PES presence by seeing if there are - * TPCs connected to it. - */ - if (pes_tpc_count != 0U) { - gr->gpc_ppc_count[gpc_index]++; - } - - gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count; - gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask; - } - - gr->ppc_count += gr->gpc_ppc_count[gpc_index]; - - gpc_new_skip_mask = 0; - if (gr->pe_count_per_gpc > 1U && - gr->pes_tpc_count[0][gpc_index] + - gr->pes_tpc_count[1][gpc_index] == 5U) { - pes_heavy_index = - gr->pes_tpc_count[0][gpc_index] > - gr->pes_tpc_count[1][gpc_index] ? 0U : 1U; - - gpc_new_skip_mask = - gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^ - (gr->pes_tpc_mask[pes_heavy_index][gpc_index] & - (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1U)); - - } else if (gr->pe_count_per_gpc > 1U && - (gr->pes_tpc_count[0][gpc_index] + - gr->pes_tpc_count[1][gpc_index] == 4U) && - (gr->pes_tpc_count[0][gpc_index] != - gr->pes_tpc_count[1][gpc_index])) { - pes_heavy_index = - gr->pes_tpc_count[0][gpc_index] > - gr->pes_tpc_count[1][gpc_index] ? 0U : 1U; - - gpc_new_skip_mask = - gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^ - (gr->pes_tpc_mask[pes_heavy_index][gpc_index] & - (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1U)); - } - gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask; - } - /* allocate for max tpc per gpc */ if (gr->sm_to_cluster == NULL) { - gr->sm_to_cluster = nvgpu_kzalloc(g, (size_t)gr->gpc_count * - (size_t)gr->max_tpc_per_gpc_count * - (size_t)sm_per_tpc * - sizeof(struct sm_info)); + gr->sm_to_cluster = nvgpu_kzalloc(g, + (size_t)nvgpu_gr_config_get_gpc_count(gr->config) * + (size_t)nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config) * + (size_t)sm_per_tpc * + sizeof(struct sm_info)); } else { - (void) memset(gr->sm_to_cluster, 0, (size_t)gr->gpc_count * - (size_t)gr->max_tpc_per_gpc_count * - (size_t)sm_per_tpc * - sizeof(struct sm_info)); + (void) memset(gr->sm_to_cluster, 0, + (size_t)nvgpu_gr_config_get_gpc_count(gr->config) * + (size_t)nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config) * + (size_t)sm_per_tpc * + sizeof(struct sm_info)); } gr->no_of_sm = 0; nvgpu_log_info(g, "fbps: %d", gr->num_fbps); - nvgpu_log_info(g, "max_gpc_count: %d", gr->max_gpc_count); nvgpu_log_info(g, "max_fbps_count: %d", gr->max_fbps_count); - nvgpu_log_info(g, "max_tpc_per_gpc_count: %d", gr->max_tpc_per_gpc_count); - nvgpu_log_info(g, "max_zcull_per_gpc_count: %d", gr->max_zcull_per_gpc_count); - nvgpu_log_info(g, "max_tpc_count: %d", gr->max_tpc_count); - nvgpu_log_info(g, "sys_count: %d", gr->sys_count); - nvgpu_log_info(g, "gpc_count: %d", gr->gpc_count); - nvgpu_log_info(g, "pe_count_per_gpc: %d", gr->pe_count_per_gpc); - nvgpu_log_info(g, "tpc_count: %d", gr->tpc_count); - nvgpu_log_info(g, "ppc_count: %d", gr->ppc_count); - - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - nvgpu_log_info(g, "gpc_tpc_count[%d] : %d", - gpc_index, gr->gpc_tpc_count[gpc_index]); - } - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - nvgpu_log_info(g, "gpc_zcb_count[%d] : %d", - gpc_index, gr->gpc_zcb_count[gpc_index]); - } - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - nvgpu_log_info(g, "gpc_ppc_count[%d] : %d", - gpc_index, gr->gpc_ppc_count[gpc_index]); - } - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - nvgpu_log_info(g, "gpc_skip_mask[%d] : %d", - gpc_index, gr->gpc_skip_mask[gpc_index]); - } - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - for (pes_index = 0; - pes_index < gr->pe_count_per_gpc; - pes_index++) { - nvgpu_log_info(g, "pes_tpc_count[%d][%d] : %d", - pes_index, gpc_index, - gr->pes_tpc_count[pes_index][gpc_index]); - } - } - - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - for (pes_index = 0; - pes_index < gr->pe_count_per_gpc; - pes_index++) { - nvgpu_log_info(g, "pes_tpc_mask[%d][%d] : %d", - pes_index, gpc_index, - gr->pes_tpc_mask[pes_index][gpc_index]); - } - } g->ops.gr.bundle_cb_defaults(g); g->ops.gr.cb_size_default(g); @@ -2750,207 +2512,14 @@ clean_up: return -ENOMEM; } -static u32 prime_set[18] = { - 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 }; - -static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr) -{ - s32 comm_denom; - s32 mul_factor; - s32 *init_frac = NULL; - s32 *init_err = NULL; - s32 *run_err = NULL; - u32 *sorted_num_tpcs = NULL; - u32 *sorted_to_unsorted_gpc_map = NULL; - u32 gpc_index; - u32 gpc_mark = 0; - u32 num_tpc; - u32 max_tpc_count = 0; - u32 swap; - u32 tile_count; - u32 index; - bool delete_map = false; - bool gpc_sorted; - int ret = 0; - u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); - u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); - u32 map_tile_count = num_gpcs * num_tpc_per_gpc; - - init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32)); - init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32)); - run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32)); - sorted_num_tpcs = - nvgpu_kzalloc(g, (size_t)num_gpcs * - (size_t)num_tpc_per_gpc * - sizeof(s32)); - sorted_to_unsorted_gpc_map = - nvgpu_kzalloc(g, (size_t)num_gpcs * sizeof(s32)); - - if (!((init_frac != NULL) && - (init_err != NULL) && - (run_err != NULL) && - (sorted_num_tpcs != NULL) && - (sorted_to_unsorted_gpc_map != NULL))) { - ret = -ENOMEM; - goto clean_up; - } - - gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET; - - if (gr->tpc_count == 3U) { - gr->map_row_offset = 2; - } else if (gr->tpc_count < 3U) { - gr->map_row_offset = 1; - } else { - gr->map_row_offset = 3; - - for (index = 1U; index < 18U; index++) { - u32 prime = prime_set[index]; - if ((gr->tpc_count % prime) != 0U) { - gr->map_row_offset = prime; - break; - } - } - } - - switch (gr->tpc_count) { - case 15: - gr->map_row_offset = 6; - break; - case 14: - gr->map_row_offset = 5; - break; - case 13: - gr->map_row_offset = 2; - break; - case 11: - gr->map_row_offset = 7; - break; - case 10: - gr->map_row_offset = 6; - break; - case 7: - case 5: - gr->map_row_offset = 1; - break; - default: - break; - } - - if (gr->map_tiles != NULL) { - if (gr->map_tile_count != gr->tpc_count) { - delete_map = true; - } - - for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) { - if (gr_gk20a_get_map_tile_count(gr, tile_count) - >= gr->tpc_count) { - delete_map = true; - } - } - - if (delete_map) { - nvgpu_kfree(g, gr->map_tiles); - gr->map_tiles = NULL; - gr->map_tile_count = 0; - } - } - - if (gr->map_tiles == NULL) { - gr->map_tiles = nvgpu_kzalloc(g, map_tile_count * sizeof(u8)); - if (gr->map_tiles == NULL) { - ret = -ENOMEM; - goto clean_up; - } - gr->map_tile_count = map_tile_count; - - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index]; - sorted_to_unsorted_gpc_map[gpc_index] = gpc_index; - } - - gpc_sorted = false; - while (!gpc_sorted) { - gpc_sorted = true; - for (gpc_index = 0U; gpc_index < gr->gpc_count - 1U; gpc_index++) { - if (sorted_num_tpcs[gpc_index + 1U] > sorted_num_tpcs[gpc_index]) { - gpc_sorted = false; - swap = sorted_num_tpcs[gpc_index]; - sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1U]; - sorted_num_tpcs[gpc_index + 1U] = swap; - swap = sorted_to_unsorted_gpc_map[gpc_index]; - sorted_to_unsorted_gpc_map[gpc_index] = - sorted_to_unsorted_gpc_map[gpc_index + 1U]; - sorted_to_unsorted_gpc_map[gpc_index + 1U] = swap; - } - } - } - - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - if (gr->gpc_tpc_count[gpc_index] > max_tpc_count) { - max_tpc_count = gr->gpc_tpc_count[gpc_index]; - } - } - - mul_factor = S32(gr->gpc_count) * S32(max_tpc_count); - if ((U32(mul_factor) & 0x1U) != 0U) { - mul_factor = 2; - } else { - mul_factor = 1; - } - - comm_denom = S32(gr->gpc_count) * S32(max_tpc_count) * mul_factor; - - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - num_tpc = sorted_num_tpcs[gpc_index]; - - init_frac[gpc_index] = S32(num_tpc) * S32(gr->gpc_count) * mul_factor; - - if (num_tpc != 0U) { - init_err[gpc_index] = S32(gpc_index) * S32(max_tpc_count) * mul_factor - comm_denom/2; - } else { - init_err[gpc_index] = 0; - } - - run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index]; - } - - while (gpc_mark < gr->tpc_count) { - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - if ((run_err[gpc_index] * 2) >= comm_denom) { - gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index]; - run_err[gpc_index] += init_frac[gpc_index] - comm_denom; - } else { - run_err[gpc_index] += init_frac[gpc_index]; - } - } - } - } - -clean_up: - nvgpu_kfree(g, init_frac); - nvgpu_kfree(g, init_err); - nvgpu_kfree(g, run_err); - nvgpu_kfree(g, sorted_num_tpcs); - nvgpu_kfree(g, sorted_to_unsorted_gpc_map); - - if (ret != 0) { - nvgpu_err(g, "fail"); - } else { - nvgpu_log_fn(g, "done"); - } - - return ret; -} - static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr) { struct gr_zcull_gk20a *zcull = &gr->zcull; - zcull->aliquot_width = gr->tpc_count * 16U; + zcull->aliquot_width = nvgpu_gr_config_get_tpc_count(gr->config) * 16U; zcull->aliquot_height = 16; - zcull->width_align_pixels = gr->tpc_count * 16U; + zcull->width_align_pixels = nvgpu_gr_config_get_tpc_count(gr->config) * 16U; zcull->height_align_pixels = 32; zcull->aliquot_size = @@ -2958,8 +2527,10 @@ static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr) /* assume no floor sweeping since we only have 1 tpc in 1 gpc */ zcull->pixel_squares_by_aliquots = - gr->zcb_count * 16U * 16U * gr->tpc_count / - (gr->gpc_count * gr->gpc_tpc_count[0]); + nvgpu_gr_config_get_zcb_count(gr->config) * 16U * 16U * + nvgpu_gr_config_get_tpc_count(gr->config) / + (nvgpu_gr_config_get_gpc_count(gr->config) * + nvgpu_gr_config_get_gpc_tpc_count(gr->config, 0U)); zcull->total_aliquots = gr_gpc0_zcull_total_ram_size_num_aliquots_f( @@ -3004,7 +2575,8 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, zcull_params->aliquot_total = zcull->total_aliquots; zcull_params->region_byte_multiplier = - gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v(); + nvgpu_gr_config_get_gpc_count(gr->config) * + gr_zcull_bytes_per_aliquot_per_gpu_v(); zcull_params->region_header_size = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) * gr_zcull_save_restore_header_bytes_per_gpc_v(); @@ -3014,7 +2586,8 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(); zcull_params->subregion_width_align_pixels = - gr->tpc_count * gr_gpc0_zcull_zcsize_width_subregion__multiple_v(); + nvgpu_gr_config_get_tpc_count(gr->config) * + gr_gpc0_zcull_zcsize_width_subregion__multiple_v(); zcull_params->subregion_height_align_pixels = gr_gpc0_zcull_zcsize_height_subregion__multiple_v(); zcull_params->subregion_count = gr_zcull_subregion_qty_v(); @@ -3623,7 +3196,7 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc; u32 map_tile_count; - if (gr->map_tiles == NULL) { + if (gr->config->map_tiles == NULL) { return -1; } @@ -3648,8 +3221,10 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) return -ENOMEM; } - for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) { - map_tile_count = gr_gk20a_get_map_tile_count(gr, map_counter); + for (map_counter = 0; + map_counter < nvgpu_gr_config_get_tpc_count(gr->config); + map_counter++) { + map_tile_count = nvgpu_gr_config_get_map_tile_count(gr->config, map_counter); zcull_map_tiles[map_counter] = zcull_bank_counters[map_tile_count]; zcull_bank_counters[map_tile_count]++; @@ -3663,18 +3238,22 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) nvgpu_kfree(g, zcull_map_tiles); nvgpu_kfree(g, zcull_bank_counters); - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - gpc_tpc_count = gr->gpc_tpc_count[gpc_index]; - gpc_zcull_count = gr->gpc_zcb_count[gpc_index]; + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_index++) { + gpc_tpc_count = nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_index); + gpc_zcull_count = nvgpu_gr_config_get_gpc_zcb_count(gr->config, gpc_index); - if (gpc_zcull_count != gr->max_zcull_per_gpc_count && + if (gpc_zcull_count != + nvgpu_gr_config_get_max_zcull_per_gpc_count(gr->config) && gpc_zcull_count < gpc_tpc_count) { nvgpu_err(g, "zcull_banks (%d) less than tpcs (%d) for gpc (%d)", gpc_zcull_count, gpc_tpc_count, gpc_index); return -EINVAL; } - if (gpc_zcull_count != gr->max_zcull_per_gpc_count && + if (gpc_zcull_count != + nvgpu_gr_config_get_max_zcull_per_gpc_count(gr->config) && gpc_zcull_count != 0U) { floorsweep = true; } @@ -3682,26 +3261,32 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) /* ceil(1.0f / SM_NUM * gr_gpc0_zcull_sm_num_rcp_conservative__max_v()) */ rcp_conserv = DIV_ROUND_UP(gr_gpc0_zcull_sm_num_rcp_conservative__max_v(), - gr->gpc_tpc_count[0]); + nvgpu_gr_config_get_gpc_tpc_count(gr->config, 0U)); - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_index++) { offset = gpc_index * gpc_stride; if (floorsweep) { gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, - gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) | + gr_gpc0_zcull_ram_addr_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr->config)) | gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f( - gr->max_zcull_per_gpc_count)); + nvgpu_gr_config_get_max_zcull_per_gpc_count(gr->config))); } else { gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, - gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) | + gr_gpc0_zcull_ram_addr_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr->config)) | gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f( - gr->gpc_tpc_count[gpc_index])); + nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_index))); } gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset, - gr_gpc0_zcull_fs_num_active_banks_f(gr->gpc_zcb_count[gpc_index]) | - gr_gpc0_zcull_fs_num_sms_f(gr->tpc_count)); + gr_gpc0_zcull_fs_num_active_banks_f( + nvgpu_gr_config_get_gpc_zcb_count(gr->config, gpc_index)) | + gr_gpc0_zcull_fs_num_sms_f( + nvgpu_gr_config_get_tpc_count(gr->config))); gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset, gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv)); @@ -3733,7 +3318,8 @@ void gk20a_gr_enable_gpc_exceptions(struct gk20a *g) gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); tpc_mask = - gr_gpcs_gpccs_gpc_exception_en_tpc_f(BIT32(gr->max_tpc_per_gpc_count) - 1U); + gr_gpcs_gpccs_gpc_exception_en_tpc_f( + BIT32(nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config)) - 1U); gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), tpc_mask); } @@ -4136,7 +3722,7 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g) goto clean_up; } - err = gr_gk20a_init_map_tiles(g, gr); + err = nvgpu_gr_config_init_map_tiles(g, gr->config); if (err != 0) { goto clean_up; } @@ -5169,7 +4755,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, " "); - for (gpc = 0; gpc < gr->gpc_count; gpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) { if ((exception1 & BIT32(gpc)) == 0U) { continue; } @@ -5183,7 +4769,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, + gpc_offset); /* check if any tpc has an exception */ - for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) { + for (tpc = 0; tpc < nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc); tpc++) { if ((gr_gpc0_gpccs_gpc_exception_tpc_v(gpc_exception) & BIT32(tpc)) == 0U) { continue; @@ -5777,16 +5363,18 @@ int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, u32 gpc_num, u32 *priv_addr_table, u32 *t) { - u32 ppc_num; + u32 ppc_num; - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); - for (ppc_num = 0; ppc_num < g->gr.gpc_ppc_count[gpc_num]; ppc_num++) { - priv_addr_table[(*t)++] = pri_ppc_addr(g, pri_ppccs_addr_mask(addr), - gpc_num, ppc_num); - } + for (ppc_num = 0; + ppc_num < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc_num); + ppc_num++) { + priv_addr_table[(*t)++] = pri_ppc_addr(g, pri_ppccs_addr_mask(addr), + gpc_num, ppc_num); + } - return 0; + return 0; } /* @@ -5841,11 +5429,13 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, * tables. Convert a GPC/TPC broadcast address to unicast addresses so * that we can look up the offsets. */ if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) != 0U) { - for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { + for (gpc_num = 0; + gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config); + gpc_num++) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) { for (tpc_num = 0; - tpc_num < g->gr.gpc_tpc_count[gpc_num]; + tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); tpc_num++) { priv_addr_table[t++] = pri_tpc_addr(g, pri_tpccs_addr_mask(addr), @@ -5865,7 +5455,7 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, gpc_addr = pri_gpccs_addr_mask(priv_addr); tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); - if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) { + if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num)) { continue; } @@ -5891,7 +5481,7 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, } else if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) == 0U) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) { for (tpc_num = 0; - tpc_num < g->gr.gpc_tpc_count[gpc_num]; + tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); tpc_num++) { priv_addr_table[t++] = pri_tpc_addr(g, pri_tpccs_addr_mask(addr), @@ -5923,8 +5513,9 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, int err = 0; struct gr_gk20a *gr = &g->gr; u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); - u32 potential_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count * - sm_per_tpc; + u32 potential_offsets = nvgpu_gr_config_get_max_gpc_count(gr->config) * + nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config) * + sm_per_tpc; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); @@ -6008,8 +5599,9 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g, int err = 0; struct gr_gk20a *gr = &g->gr; u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); - u32 potential_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count * - sm_per_tpc; + u32 potential_offsets = nvgpu_gr_config_get_max_gpc_count(gr->config) * + nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config) * + sm_per_tpc; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); @@ -6117,7 +5709,7 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, struct nvgpu_mem *mem, struct nvgpu_gr_ctx *gr_ctx) { - u32 num_gpc = g->gr.gpc_count; + u32 num_gpc = nvgpu_gr_config_get_gpc_count(g->gr.config); u32 num_tpc; u32 tpc, gpc, reg; u32 chk_addr; @@ -6135,7 +5727,7 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, for (reg = 0; reg < num_ovr_perf_regs; reg++) { for (gpc = 0; gpc < num_gpc; gpc++) { - num_tpc = g->gr.gpc_tpc_count[gpc]; + num_tpc = nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc); for (tpc = 0; tpc < num_tpc; tpc++) { chk_addr = ((gpc_stride * gpc) + (tpc_in_gpc_stride * tpc) + @@ -6366,7 +5958,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, * max tpc count for the gpcs,in 256b chunks. */ - max_tpc_count = gr->max_tpc_per_gpc_count; + max_tpc_count = nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config); num_ext_gpccs_ext_buffer_segments = (u32)((max_tpc_count + 1U) / 2U); @@ -6962,7 +6554,7 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, struct ctxsw_buf_offset_map_entry *map, u32 *count, u32 *offset, u32 max_cnt) { - u32 num_gpcs = g->gr.gpc_count; + u32 num_gpcs = nvgpu_gr_config_get_gpc_count(g->gr.config); u32 num_ppcs, num_tpcs, gpc_num, base; u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); @@ -6972,7 +6564,7 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) { - num_tpcs = g->gr.gpc_tpc_count[gpc_num]; + num_tpcs = nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); base = gpc_base + (gpc_stride * gpc_num) + tpc_in_gpc_base; if (add_ctxsw_buffer_map_entries_subunits(map, &g->netlist_vars->ctxsw_regs.pm_tpc, @@ -6982,7 +6574,7 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, return -EINVAL; } - num_ppcs = g->gr.gpc_ppc_count[gpc_num]; + num_ppcs = nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc_num); base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base; if (add_ctxsw_buffer_map_entries_subunits(map, &g->netlist_vars->ctxsw_regs.pm_ppc, @@ -7346,8 +6938,9 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, u32 i, j, offset, v; struct gr_gk20a *gr = &g->gr; u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); - u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count * - sm_per_tpc; + u32 max_offsets = nvgpu_gr_config_get_max_gpc_count(gr->config) * + nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config) * + sm_per_tpc; u32 *offsets = NULL; u32 *offset_addrs = NULL; u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops}; @@ -7791,8 +7384,10 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g, gk20a_writel(g, gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); - for (gpc = 0; gpc < gr->gpc_count; gpc++) { - for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) { + for (tpc = 0; + tpc < nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc); + tpc++) { for (sm = 0; sm < sm_per_tpc; sm++) { err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, @@ -8134,10 +7729,12 @@ int gr_gk20a_clear_sm_errors(struct gk20a *g) u32 global_esr; u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); - for (gpc = 0; gpc < gr->gpc_count; gpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) { /* check if any tpc has an exception */ - for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) { + for (tpc = 0; + tpc < nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc); + tpc++) { for (sm = 0; sm < sm_per_tpc; sm++) { global_esr = g->ops.gr.get_sm_hww_global_esr(g, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index d1316d516..fc25ed637 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -36,14 +36,11 @@ #define GR_IDLE_CHECK_MAX 200U /* usec */ #define GR_FECS_POLL_INTERVAL 5U /* usec */ -#define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFFU #define INVALID_MAX_WAYS 0xFFFFFFFFU #define GK20A_FECS_UCODE_IMAGE "fecs.bin" #define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin" -#define GK20A_GR_MAX_PES_PER_GPC 3U - #define GK20A_TIMEOUT_FPGA 100000U /* 100 sec */ /* Flags to be passed to g->ops.gr.alloc_obj_ctx() */ @@ -258,6 +255,7 @@ struct gr_gk20a { bool initialized; u32 num_fbps; + u32 max_fbps_count; u32 max_comptag_lines; u32 compbit_backing_size; @@ -266,26 +264,6 @@ struct gr_gk20a { u32 cacheline_size; u32 gobs_per_comptagline_per_slice; - u32 max_gpc_count; - u32 max_fbps_count; - u32 max_tpc_per_gpc_count; - u32 max_zcull_per_gpc_count; - u32 max_tpc_count; - - u32 sys_count; - u32 gpc_count; - u32 pe_count_per_gpc; - u32 ppc_count; - u32 *gpc_ppc_count; - u32 tpc_count; - u32 *gpc_tpc_count; - u32 *gpc_tpc_mask; - u32 zcb_count; - u32 *gpc_zcb_count; - u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC]; - u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC]; - u32 *gpc_skip_mask; - u32 bundle_cb_default_size; u32 min_gpm_fifo_depth; u32 bundle_cb_token_limit; @@ -312,9 +290,7 @@ struct gr_gk20a { struct nvgpu_gr_ctx_desc *gr_ctx_desc; - u8 *map_tiles; - u32 map_tile_count; - u32 map_row_offset; + struct nvgpu_gr_config *config; u32 max_comptag_mem; /* max memory size (MB) for comptag */ struct compbit_store_desc compbit_store; @@ -565,7 +541,6 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g, u32 global_esr_mask, bool check_errors); void gk20a_gr_suspend_all_sms(struct gk20a *g, u32 global_esr_mask, bool check_errors); -u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index); int gr_gk20a_set_sm_debug_mode(struct gk20a *g, struct channel_gk20a *ch, u64 sms, bool enable); bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch); diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 38bc78e74..273dce680 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "gk20a/gr_gk20a.h" @@ -109,11 +110,11 @@ u32 gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g) size = gr->attrib_cb_size * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * - gr->max_tpc_count; + nvgpu_gr_config_get_max_tpc_count(gr->config); size += gr->alpha_cb_size * gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * - gr->max_tpc_count; + nvgpu_gr_config_get_max_tpc_count(gr->config); return size; } @@ -201,17 +202,23 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); alpha_offset_in_chunk = attrib_offset_in_chunk + - gr->tpc_count * gr->attrib_cb_size; + nvgpu_gr_config_get_tpc_count(gr->config) * gr->attrib_cb_size; - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_index++) { u32 temp = gpc_stride * gpc_index; u32 temp2 = num_pes_per_gpc * gpc_index; - for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; + for (ppc_index = 0; + ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, + gpc_index); ppc_index++) { cbm_cfg_size1 = gr->attrib_cb_default_size * - gr->pes_tpc_count[ppc_index][gpc_index]; + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index); cbm_cfg_size2 = gr->alpha_cb_default_size * - gr->pes_tpc_count[ppc_index][gpc_index]; + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index); nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + @@ -224,7 +231,8 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, attrib_offset_in_chunk, patch); attrib_offset_in_chunk += gr->attrib_cb_size * - gr->pes_tpc_count[ppc_index][gpc_index]; + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index); nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + @@ -237,7 +245,8 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, alpha_offset_in_chunk, patch); alpha_offset_in_chunk += gr->alpha_cb_size * - gr->pes_tpc_count[ppc_index][gpc_index]; + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index); nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), @@ -348,11 +357,14 @@ void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_batches_init_f()); - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_index++) { stride = gpc_stride * gpc_index; - for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; - ppc_index++) { + for (ppc_index = 0; + ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index); + ppc_index++) { val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + stride + @@ -360,7 +372,8 @@ void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * - gr->pes_tpc_count[ppc_index][gpc_index])); + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index))); gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + stride + @@ -388,11 +401,14 @@ void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) ~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_index++) { stride = gpc_stride * gpc_index; - for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; - ppc_index++) { + for (ppc_index = 0; + ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index); + ppc_index++) { val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + stride + @@ -401,7 +417,8 @@ void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) val = set_field(val, gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size * - gr->pes_tpc_count[ppc_index][gpc_index])); + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index))); gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + stride + @@ -413,11 +430,11 @@ void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) val = set_field(val, gr_gpcs_swdx_tc_beta_cb_size_v_m(), gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size * - gr->gpc_ppc_count[gpc_index])); + nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index))); val = set_field(val, gr_gpcs_swdx_tc_beta_cb_size_div3_m(), gr_gpcs_swdx_tc_beta_cb_size_div3_f((cb_size * - gr->gpc_ppc_count[gpc_index])/3U)); + nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index))/3U)); gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r( ppc_index + gpc_index), val); @@ -554,18 +571,7 @@ u32 gr_gm20b_get_gpc_mask(struct gk20a *g) */ val = g->ops.fuse.fuse_status_opt_gpc(g); - return (~val) & (BIT32(gr->max_gpc_count) - 1U); -} - -u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) -{ - u32 val; - struct gr_gk20a *gr = &g->gr; - - /* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */ - val = g->ops.fuse.fuse_status_opt_tpc_gpc(g, gpc_index); - - return (~val) & (BIT32(gr->max_tpc_per_gpc_count) - 1U); + return (~val) & (BIT32(nvgpu_gr_config_get_max_gpc_count(gr->config)) - 1U); } void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) @@ -573,10 +579,11 @@ void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) nvgpu_tegra_fuse_write_bypass(g, 0x1); nvgpu_tegra_fuse_write_access_sw(g, 0x0); - if (g->gr.gpc_tpc_mask[gpc_index] == 0x1U) { + if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == 0x1U) { nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0); nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x1); - } else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2U) { + } else if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == + 0x2U) { nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1); nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0); } else { @@ -590,20 +597,24 @@ void gr_gm20b_load_tpc_mask(struct gk20a *g) u32 pes_tpc_mask = 0, fuse_tpc_mask; u32 gpc, pes; u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); + u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(g->gr.config); - for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { - for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) { - pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] << + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { + for (pes = 0; + pes < nvgpu_gr_config_get_pe_count_per_gpc(g->gr.config); + pes++) { + pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask( + g->gr.config, gpc, pes) << num_tpc_per_gpc * gpc; } } - fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0); + fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, g->gr.config, 0); if ((g->tpc_fs_mask_user != 0U) && (g->tpc_fs_mask_user != fuse_tpc_mask) && - (fuse_tpc_mask == BIT32(g->gr.max_tpc_count) - U32(1))) { + (fuse_tpc_mask == BIT32(max_tpc_count) - U32(1))) { u32 val = g->tpc_fs_mask_user; - val &= BIT32(g->gr.max_tpc_count) - U32(1); + val &= BIT32(max_tpc_count) - U32(1); /* skip tpc to disable the other tpc cause channel timeout */ val = BIT32(hweight32(val)) - U32(1); gk20a_writel(g, gr_fe_tpc_fs_r(), val); @@ -640,7 +651,9 @@ int gr_gm20b_load_smid_config(struct gk20a *g) } /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ - for (i = 0U; i <= ((g->gr.tpc_count-1U) / 4U); i++) { + for (i = 0U; + i <= ((nvgpu_gr_config_get_tpc_count(g->gr.config) - 1U) / 4U); + i++) { u32 reg = 0; u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + gr_cwd_gpc_tpc_id_tpc0_s(); @@ -649,7 +662,7 @@ int gr_gm20b_load_smid_config(struct gk20a *g) u32 sm_id = (i * 4U) + j; u32 bits; - if (sm_id >= g->gr.tpc_count) { + if (sm_id >= nvgpu_gr_config_get_tpc_count(g->gr.config)) { break; } @@ -959,7 +972,7 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g, gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r())); - if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) { + if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) { gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r())); } @@ -975,7 +988,7 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g, gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r())); - if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) { + if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) { gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r())); } diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index ca4ef05da..5d0172f14 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h @@ -79,7 +79,6 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, u32 *num_sm_dsm_perf_ctrl_regs, u32 **sm_dsm_perf_ctrl_regs, u32 *ctrl_register_stride); -u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); u32 gr_gm20b_get_gpc_mask(struct gk20a *g); void gr_gm20b_load_tpc_mask(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index d8ef4e383..25d2f9a92 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -44,6 +44,7 @@ #include "common/fb/fb_gm20b.h" #include "common/netlist/netlist_gm20b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" +#include "common/gr/config/gr_config_gm20b.h" #include "common/therm/therm_gm20b.h" #include "common/ltc/ltc_gm20b.h" #include "common/fuse/fuse_gm20b.h" @@ -241,7 +242,6 @@ static const struct gpu_ops gm20b_ops = { .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask, - .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .get_zcull_info = gr_gk20a_get_zcull_info, @@ -404,6 +404,16 @@ static const struct gpu_ops gm20b_ops = { gm20b_ctxsw_prog_get_ts_buffer_aperture_mask, .set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records, .set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr, + }, + .config = { + .get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask, + .get_tpc_count_in_gpc = + gm20b_gr_config_get_tpc_count_in_gpc, + .get_zcull_count_in_gpc = + gm20b_gr_config_get_zcull_count_in_gpc, + .get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask, + .get_pd_dist_skip_table_size = + gm20b_gr_config_get_pd_dist_skip_table_size, } }, .fb = { @@ -788,6 +798,7 @@ int gm20b_init_hal(struct gk20a *g) gops->ce2 = gm20b_ops.ce2; gops->gr = gm20b_ops.gr; gops->gr.ctxsw_prog = gm20b_ops.gr.ctxsw_prog; + gops->gr.config = gm20b_ops.gr.config; gops->fb = gm20b_ops.fb; gops->clock_gating = gm20b_ops.clock_gating; gops->fifo = gm20b_ops.fifo; diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c index bf68ce9b7..5032c022f 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.c +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "gk20a/gr_gk20a.h" #include "gm20b/gr_gm20b.h" @@ -182,7 +183,7 @@ int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * - g->gr.max_tpc_count; + nvgpu_gr_config_get_max_tpc_count(g->gr.config); attrib_cb_size = ALIGN(attrib_cb_size, 128); nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size); diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 878025c99..5692c2f3b 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "gk20a/gr_gk20a.h" @@ -437,19 +438,25 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, } attrib_offset_in_chunk = alpha_offset_in_chunk + - gr->tpc_count * gr->alpha_cb_size; + nvgpu_gr_config_get_tpc_count(gr->config) * gr->alpha_cb_size; - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_index++) { temp = gpc_stride * gpc_index; temp2 = num_pes_per_gpc * gpc_index; - for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; + for (ppc_index = 0; + ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index); ppc_index++) { cbm_cfg_size_beta = cb_attrib_cache_size_init * - gr->pes_tpc_count[ppc_index][gpc_index]; + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index); cbm_cfg_size_alpha = gr->alpha_cb_default_size * - gr->pes_tpc_count[ppc_index][gpc_index]; + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index); cbm_cfg_size_steadystate = gr->attrib_cb_default_size * - gr->pes_tpc_count[ppc_index][gpc_index]; + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index); nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + @@ -468,7 +475,8 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, patch); attrib_offset_in_chunk += attrib_size_in_chunk * - gr->pes_tpc_count[ppc_index][gpc_index]; + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index); nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + @@ -481,7 +489,8 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, alpha_offset_in_chunk, patch); alpha_offset_in_chunk += gr->alpha_cb_size * - gr->pes_tpc_count[ppc_index][gpc_index]; + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index); nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), @@ -594,17 +603,19 @@ u32 gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g) gr->alpha_cb_size = gr->alpha_cb_default_size; gr->attrib_cb_size = min(gr->attrib_cb_size, - gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / g->gr.tpc_count); + gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / + nvgpu_gr_config_get_tpc_count(gr->config)); gr->alpha_cb_size = min(gr->alpha_cb_size, - gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / g->gr.tpc_count); + gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / + nvgpu_gr_config_get_tpc_count(gr->config)); size = gr->attrib_cb_size * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * - gr->max_tpc_count; + nvgpu_gr_config_get_max_tpc_count(gr->config); size += gr->alpha_cb_size * gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * - gr->max_tpc_count; + nvgpu_gr_config_get_max_tpc_count(gr->config); size = ALIGN(size, 128); @@ -786,11 +797,14 @@ void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) gr_pd_ab_dist_cfg1_max_batches_init_f()); } - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_index++) { stride = gpc_stride * gpc_index; - for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; - ppc_index++) { + for (ppc_index = 0; + ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index); + ppc_index++) { val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + stride + @@ -798,7 +812,8 @@ void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * - gr->pes_tpc_count[ppc_index][gpc_index])); + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index))); gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + stride + @@ -835,11 +850,14 @@ void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) ~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady)); - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_index++) { stride = gpc_stride * gpc_index; - for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; - ppc_index++) { + for (ppc_index = 0; + ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index); + ppc_index++) { val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + stride + @@ -848,7 +866,8 @@ void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) val = set_field(val, gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size * - gr->pes_tpc_count[ppc_index][gpc_index])); + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index))); gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + stride + @@ -867,7 +886,7 @@ void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) gr_gpcs_swdx_tc_beta_cb_size_v_m(), gr_gpcs_swdx_tc_beta_cb_size_v_f( cb_size_steady * - gr->gpc_ppc_count[gpc_index])); + nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index))); gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r( ppc_index + gpc_index), val); @@ -965,7 +984,7 @@ int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * - g->gr.max_tpc_count; + nvgpu_gr_config_get_max_tpc_count(g->gr.config); attrib_cb_size = ALIGN(attrib_cb_size, 128); nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size); @@ -1211,7 +1230,7 @@ int gr_gp10b_dump_gr_status_regs(struct gk20a *g, gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r())); - if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) { + if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) { gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r())); } @@ -1227,7 +1246,7 @@ int gr_gp10b_dump_gr_status_regs(struct gk20a *g, gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r())); - if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) { + if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) { gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r())); } @@ -1435,7 +1454,9 @@ int gr_gp10b_load_smid_config(struct gk20a *g) } /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ - for (i = 0U; i <= ((g->gr.tpc_count-1U) / 4U); i++) { + for (i = 0U; + i <= ((nvgpu_gr_config_get_tpc_count(g->gr.config) - 1U) / 4U); + i++) { u32 reg = 0; u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + gr_cwd_gpc_tpc_id_tpc0_s(); @@ -1444,7 +1465,7 @@ int gr_gp10b_load_smid_config(struct gk20a *g) u32 sm_id = (i * 4U) + j; u32 bits; - if (sm_id >= g->gr.tpc_count) { + if (sm_id >= nvgpu_gr_config_get_tpc_count(g->gr.config)) { break; } @@ -1500,9 +1521,10 @@ void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) nvgpu_tegra_fuse_write_bypass(g, 0x1); nvgpu_tegra_fuse_write_access_sw(g, 0x0); - if (g->gr.gpc_tpc_mask[gpc_index] == 0x1U) { + if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == 0x1U) { nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x2); - } else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2U) { + } else if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == + 0x2U) { nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1); } else { nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0); diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index c47ae9b89..0a6edcee0 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -49,6 +49,7 @@ #include "common/netlist/netlist_gp10b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" +#include "common/gr/config/gr_config_gm20b.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp10b.h" #include "common/ltc/ltc_gm20b.h" @@ -261,7 +262,6 @@ static const struct gpu_ops gp10b_ops = { .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, - .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .get_zcull_info = gr_gk20a_get_zcull_info, @@ -451,6 +451,16 @@ static const struct gpu_ops gp10b_ops = { .set_full_preemption_ptr = gp10b_ctxsw_prog_set_full_preemption_ptr, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + }, + .config = { + .get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask, + .get_tpc_count_in_gpc = + gm20b_gr_config_get_tpc_count_in_gpc, + .get_zcull_count_in_gpc = + gm20b_gr_config_get_zcull_count_in_gpc, + .get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask, + .get_pd_dist_skip_table_size = + gm20b_gr_config_get_pd_dist_skip_table_size, } }, .fb = { @@ -869,6 +879,7 @@ int gp10b_init_hal(struct gk20a *g) gops->ce2 = gp10b_ops.ce2; gops->gr = gp10b_ops.gr; gops->gr.ctxsw_prog = gp10b_ops.gr.ctxsw_prog; + gops->gr.config = gp10b_ops.gr.config; gops->fb = gp10b_ops.fb; gops->clock_gating = gp10b_ops.clock_gating; gops->fifo = gp10b_ops.fifo; diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 8bbc1694d..fa66b541b 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "gk20a/gr_gk20a.h" #include "gk20a/gr_pri_gk20a.h" @@ -81,7 +82,9 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g, } /* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */ - for (gpc_id = 0; gpc_id < gr->gpc_count; gpc_id++) { + for (gpc_id = 0; + gpc_id < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_id++) { num_tpc_mask = gpc_tpc_mask[gpc_id]; if ((gpc_id == disable_gpc_id) && @@ -110,16 +113,19 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g, * ratio represents relative throughput of the GPC */ scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] / - gr->gpc_tpc_count[gpc_id]; + nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id); if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) { min_scg_gpc_pix_perf = scg_gpc_pix_perf; } /* Calculate # of surviving PES */ - for (pes_id = 0; pes_id < gr->gpc_ppc_count[gpc_id]; pes_id++) { + for (pes_id = 0; + pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_id); + pes_id++) { /* Count the number of TPC on the set */ - num_tpc_mask = gr->pes_tpc_mask[pes_id][gpc_id] & + num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask( + gr->config, gpc_id, pes_id) & gpc_tpc_mask[gpc_id]; if ((gpc_id == disable_gpc_id) && @@ -149,10 +155,14 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g, } /* Now calculate perf */ - scg_world_perf = (scale_factor * scg_num_pes) / gr->ppc_count; + scg_world_perf = (scale_factor * scg_num_pes) / + nvgpu_gr_config_get_ppc_count(gr->config); deviation = 0; - average_tpcs = scale_factor * average_tpcs / gr->gpc_count; - for (gpc_id =0; gpc_id < gr->gpc_count; gpc_id++) { + average_tpcs = scale_factor * average_tpcs / + nvgpu_gr_config_get_gpc_count(gr->config); + for (gpc_id =0; + gpc_id < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_id++) { diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id]; if (diff < 0) { diff = -diff; @@ -160,7 +170,7 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g, deviation += U32(diff); } - deviation /= gr->gpc_count; + deviation /= nvgpu_gr_config_get_gpc_count(gr->config); norm_tpc_deviation = deviation / max_tpc_gpc; @@ -216,14 +226,17 @@ int gr_gv100_init_sm_id_table(struct gk20a *g) u32 gpc, sm, pes, gtpc; u32 sm_id = 0; u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); - u32 num_sm = sm_per_tpc * g->gr.tpc_count; + struct gr_gk20a *gr = &g->gr; + u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr->config); int perf, maxperf; int err = 0; unsigned long *gpc_tpc_mask; u32 *tpc_table, *gpc_table; - gpc_table = nvgpu_kzalloc(g, g->gr.tpc_count * sizeof(u32)); - tpc_table = nvgpu_kzalloc(g, g->gr.tpc_count * sizeof(u32)); + gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) * + sizeof(u32)); + tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) * + sizeof(u32)); gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) * nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS)); @@ -235,17 +248,20 @@ int gr_gv100_init_sm_id_table(struct gk20a *g) goto exit_build_table; } - for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { - for (pes = 0; pes < g->gr.gpc_ppc_count[gpc]; pes++) { - gpc_tpc_mask[gpc] |= g->gr.pes_tpc_mask[pes][gpc]; + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) { + for (pes = 0; + pes < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc); + pes++) { + gpc_tpc_mask[gpc] |= nvgpu_gr_config_get_pes_tpc_mask( + g->gr.config, gpc, pes); } } - for (gtpc = 0; gtpc < g->gr.tpc_count; gtpc++) { + for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr->config); gtpc++) { maxperf = -1; - for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) { for_each_set_bit(tpc, &gpc_tpc_mask[gpc], - g->gr.gpc_tpc_count[gpc]) { + nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) { perf = -1; err = gr_gv100_scg_estimate_perf(g, gpc_tpc_mask, gpc, tpc, &perf); @@ -308,13 +324,13 @@ u32 gr_gv100_get_patch_slots(struct gk20a *g) * Update PE table contents * for PE table, each patch buffer update writes 32 TPCs */ - size += DIV_ROUND_UP(gr->tpc_count, 32U); + size += DIV_ROUND_UP(nvgpu_gr_config_get_tpc_count(gr->config), 32U); /* * Update the PL table contents * For PL table, each patch buffer update configures 4 TPCs */ - size += DIV_ROUND_UP(gr->tpc_count, 4U); + size += DIV_ROUND_UP(nvgpu_gr_config_get_tpc_count(gr->config), 4U); /* * We need this for all subcontexts @@ -515,5 +531,6 @@ void gr_gv100_init_hwpm_pmm_register(struct gk20a *g) g->ops.gr.set_pmm_register(g, perf_pmmfbp_engine_sel_r(0), 0xFFFFFFFFU, g->gr.num_fbps, num_fbp_perfmon); g->ops.gr.set_pmm_register(g, perf_pmmgpc_engine_sel_r(0), - 0xFFFFFFFFU, g->gr.gpc_count, num_gpc_perfmon); + 0xFFFFFFFFU, nvgpu_gr_config_get_gpc_count(g->gr.config), + num_gpc_perfmon); } diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 36a65192d..77796f6d9 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -39,6 +39,7 @@ #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h" +#include "common/gr/config/gr_config_gm20b.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp106.h" #include "common/therm/therm_gp10b.h" @@ -361,7 +362,6 @@ static const struct gpu_ops gv100_ops = { .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, - .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, .get_gpc_mask = gr_gm20b_get_gpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, @@ -583,6 +583,16 @@ static const struct gpu_ops gv100_ops = { .set_type_per_veid_header = gv11b_ctxsw_prog_set_type_per_veid_header, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + }, + .config = { + .get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask, + .get_tpc_count_in_gpc = + gm20b_gr_config_get_tpc_count_in_gpc, + .get_zcull_count_in_gpc = + gm20b_gr_config_get_zcull_count_in_gpc, + .get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask, + .get_pd_dist_skip_table_size = + gm20b_gr_config_get_pd_dist_skip_table_size, } }, .fb = { @@ -1139,6 +1149,7 @@ int gv100_init_hal(struct gk20a *g) gops->ce2 = gv100_ops.ce2; gops->gr = gv100_ops.gr; gops->gr.ctxsw_prog = gv100_ops.gr.ctxsw_prog; + gops->gr.config = gv100_ops.gr.config; gops->fb = gv100_ops.fb; gops->nvdec = gv100_ops.nvdec; gops->clock_gating = gv100_ops.clock_gating; diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 126c4d9d7..420a40862 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -438,7 +439,8 @@ void gr_gv11b_enable_exceptions(struct gk20a *g) /* enable exceptions */ gk20a_writel(g, gr_exception2_en_r(), 0x0U); /* BE not enabled */ - gk20a_writel(g, gr_exception1_en_r(), BIT32(gr->gpc_count) - 1U); + gk20a_writel(g, gr_exception1_en_r(), + BIT32(nvgpu_gr_config_get_gpc_count(gr->config)) - 1U); reg_val = gr_exception_en_fe_enabled_f() | gr_exception_en_memfmt_enabled_f() | @@ -1122,7 +1124,7 @@ void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) tpc_mask = gr_gpcs_gpccs_gpc_exception_en_tpc_f( - BIT32(gr->max_tpc_per_gpc_count) - 1U); + BIT32(nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config)) - 1U); gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1U) | @@ -1291,17 +1293,19 @@ u32 gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g) gr->alpha_cb_size = gr->alpha_cb_default_size; gr->attrib_cb_size = min(gr->attrib_cb_size, - gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / g->gr.tpc_count); + gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / + nvgpu_gr_config_get_tpc_count(gr->config)); gr->alpha_cb_size = min(gr->alpha_cb_size, - gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / g->gr.tpc_count); + gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / + nvgpu_gr_config_get_tpc_count(gr->config)); size = gr->attrib_cb_size * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * - gr->max_tpc_count; + nvgpu_gr_config_get_max_tpc_count(gr->config); size += gr->alpha_cb_size * gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * - gr->max_tpc_count; + nvgpu_gr_config_get_max_tpc_count(gr->config); size = ALIGN(size, 128); @@ -1531,11 +1535,14 @@ void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_batches_init_f()); - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_index++) { stride = proj_gpc_stride_v() * gpc_index; - for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; - ppc_index++) { + for (ppc_index = 0; + ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index); + ppc_index++) { val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + stride + @@ -1543,7 +1550,7 @@ void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * - gr->pes_tpc_count[ppc_index][gpc_index])); + nvgpu_gr_config_get_pes_tpc_count(gr->config, gpc_index, ppc_index))); gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + stride + @@ -1578,11 +1585,14 @@ void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data) ~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady)); - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_index++) { stride = proj_gpc_stride_v() * gpc_index; - for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; - ppc_index++) { + for (ppc_index = 0; + ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index); + ppc_index++) { val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + stride + @@ -1591,7 +1601,8 @@ void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data) val = set_field(val, gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size * - gr->pes_tpc_count[ppc_index][gpc_index])); + nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc_index, ppc_index))); gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + stride + @@ -1610,7 +1621,7 @@ void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data) gr_gpcs_swdx_tc_beta_cb_size_v_m(), gr_gpcs_swdx_tc_beta_cb_size_v_f( cb_size_steady * - gr->gpc_ppc_count[gpc_index])); + nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index))); gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r( ppc_index + gpc_index), val); @@ -1671,7 +1682,7 @@ int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * - g->gr.max_tpc_count; + nvgpu_gr_config_get_max_tpc_count(g->gr.config); attrib_cb_size = ALIGN(attrib_cb_size, 128); nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size); @@ -1911,10 +1922,12 @@ static void gr_gv11b_dump_gr_sm_regs(struct gk20a *g, gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_1_r())); sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); - for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { gpc_offset = gk20a_gr_gpc_offset(g, gpc); - for (tpc = 0; tpc < g->gr.gpc_tpc_count[gpc]; tpc++) { + for (tpc = 0; + tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc); + tpc++) { tpc_offset = gk20a_gr_tpc_offset(g, tpc); for (sm = 0; sm < sm_per_tpc; sm++) { @@ -1976,7 +1989,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g, gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r())); - if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) { + if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) { gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r())); } @@ -1992,7 +2005,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g, gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r())); - if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) { + if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) { gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r())); } @@ -2170,18 +2183,18 @@ void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { u32 fuse_val; - if (g->gr.gpc_tpc_mask[gpc_index] == 0U) { + if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == 0U) { return; } /* - * For s/w value g->gr.gpc_tpc_mask[gpc_index], bit value 1 indicates + * For s/w value nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index), bit value 1 indicates * corresponding TPC is enabled. But for h/w fuse register, bit value 1 * indicates corresponding TPC is disabled. * So we need to flip the bits and ensure we don't write to bits greater * than TPC count */ - fuse_val = g->gr.gpc_tpc_mask[gpc_index]; + fuse_val = nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index); fuse_val = ~fuse_val; fuse_val = fuse_val & 0xfU; /* tpc0_disable fuse is only 4-bit wide */ @@ -2678,13 +2691,15 @@ int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) nvgpu_log_fn(g, " "); - if (gr->map_tiles == NULL) { + if (gr->config->map_tiles == NULL) { return -1; } gk20a_writel(g, gr_crstr_map_table_cfg_r(), - gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) | - gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count)); + gr_crstr_map_table_cfg_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr->config)) | + gr_crstr_map_table_cfg_num_entries_f( + nvgpu_gr_config_get_tpc_count(gr->config))); /* * 6 tpc can be stored in one map register. * But number of tpcs are not always multiple of six, @@ -2702,27 +2717,33 @@ int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) switch (offset) { case 0: map = map | gr_crstr_gpc_map_tile0_f( - gr->map_tiles[base + offset]); + nvgpu_gr_config_get_map_tile_count( + gr->config, base + offset)); break; case 1: map = map | gr_crstr_gpc_map_tile1_f( - gr->map_tiles[base + offset]); + nvgpu_gr_config_get_map_tile_count( + gr->config, base + offset)); break; case 2: map = map | gr_crstr_gpc_map_tile2_f( - gr->map_tiles[base + offset]); + nvgpu_gr_config_get_map_tile_count( + gr->config, base + offset)); break; case 3: map = map | gr_crstr_gpc_map_tile3_f( - gr->map_tiles[base + offset]); + nvgpu_gr_config_get_map_tile_count( + gr->config, base + offset)); break; case 4: map = map | gr_crstr_gpc_map_tile4_f( - gr->map_tiles[base + offset]); + nvgpu_gr_config_get_map_tile_count( + gr->config, base + offset)); break; case 5: map = map | gr_crstr_gpc_map_tile5_f( - gr->map_tiles[base + offset]); + nvgpu_gr_config_get_map_tile_count( + gr->config, base + offset)); break; default: nvgpu_err(g, "incorrect rop mapping %x", offset); @@ -2736,25 +2757,33 @@ int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) } gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(), - gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) | - gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count)); + gr_ppcs_wwdx_map_table_cfg_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr->config)) | + gr_ppcs_wwdx_map_table_cfg_num_entries_f( + nvgpu_gr_config_get_tpc_count(gr->config))); for (i = 0U, j = 1U; i < gr_ppcs_wwdx_map_table_cfg_coeff__size_1_v(); i++, j = j + 4U) { gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i), gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f( - (BIT32(j) % gr->tpc_count)) | + (BIT32(j) % + nvgpu_gr_config_get_tpc_count(gr->config))) | gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f( - (BIT32(j + 1U) % gr->tpc_count)) | + (BIT32(j + 1U) % + nvgpu_gr_config_get_tpc_count(gr->config))) | gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f( - (BIT32(j + 2U) % gr->tpc_count)) | + (BIT32(j + 2U) % + nvgpu_gr_config_get_tpc_count(gr->config))) | gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f( - (BIT32(j + 3U) % gr->tpc_count))); + (BIT32(j + 3U) % + nvgpu_gr_config_get_tpc_count(gr->config)))); } gk20a_writel(g, gr_rstr2d_map_table_cfg_r(), - gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) | - gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count)); + gr_rstr2d_map_table_cfg_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr->config)) | + gr_rstr2d_map_table_cfg_num_entries_f( + nvgpu_gr_config_get_tpc_count(gr->config))); return 0; } @@ -2867,13 +2896,18 @@ u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc) u32 pes; struct gr_gk20a *gr = &g->gr; - for (pes = 0U; pes < gr->gpc_ppc_count[gpc]; pes++) { - if ((gr->pes_tpc_mask[pes][gpc] & BIT32(tpc)) != 0U) { + for (pes = 0U; + pes < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc); + pes++) { + if ((nvgpu_gr_config_get_pes_tpc_mask(gr->config, gpc, pes) & + BIT32(tpc)) != 0U) { break; } - tpc_new += gr->pes_tpc_count[pes][gpc]; + tpc_new += nvgpu_gr_config_get_pes_tpc_count(gr->config, + gpc, pes); } - temp = (BIT32(tpc) - 1U) & gr->pes_tpc_mask[pes][gpc]; + temp = (BIT32(tpc) - 1U) & + nvgpu_gr_config_get_pes_tpc_mask(gr->config, gpc, pes); temp = (u32)hweight32(temp); tpc_new += temp; @@ -2916,7 +2950,9 @@ int gr_gv11b_load_smid_config(struct gk20a *g) } /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ - for (i = 0U; i <= ((g->gr.tpc_count-1U) / 4U); i++) { + for (i = 0U; + i <= ((nvgpu_gr_config_get_tpc_count(g->gr.config) - 1U) / 4U); + i++) { u32 reg = 0; u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + gr_cwd_gpc_tpc_id_tpc0_s(); @@ -3039,24 +3075,28 @@ void gr_gv11b_load_tpc_mask(struct gk20a *g) { u32 pes_tpc_mask = 0, fuse_tpc_mask; u32 gpc, pes, val; + u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(g->gr.config); u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); /* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */ - for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { - for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) { - pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] << + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { + for (pes = 0; + pes < nvgpu_gr_config_get_pe_count_per_gpc(g->gr.config); + pes++) { + pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask( + g->gr.config, gpc, pes) << num_tpc_per_gpc * gpc; } } nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask); - fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, gpc); + fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, g->gr.config, gpc); if ((g->tpc_fs_mask_user != 0U) && (g->tpc_fs_mask_user != fuse_tpc_mask) && - (fuse_tpc_mask == BIT32(g->gr.max_tpc_count) - U32(1))) { + (fuse_tpc_mask == BIT32(max_tpc_count) - U32(1))) { val = g->tpc_fs_mask_user; - val &= BIT32(g->gr.max_tpc_count) - U32(1); + val &= BIT32(max_tpc_count) - U32(1); val = BIT32(hweight32(val)) - U32(1); gk20a_writel(g, gr_fe_tpc_fs_r(0), val); } else { @@ -3506,8 +3546,10 @@ void gv11b_gr_suspend_all_sms(struct gk20a *g, gk20a_writel(g, gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0); - for (gpc = 0; gpc < gr->gpc_count; gpc++) { - for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) { + for (tpc = 0; + tpc < nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc); + tpc++) { for (sm = 0; sm < sm_per_tpc; sm++) { err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, @@ -4254,12 +4296,14 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, */ if ((broadcast_flags & PRI_BROADCAST_FLAGS_EGPC) != 0U) { nvgpu_log_info(g, "broadcast flags egpc"); - for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { + for (gpc_num = 0; + gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config); + gpc_num++) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) != 0U) { nvgpu_log_info(g, "broadcast flags etpc"); for (tpc_num = 0; - tpc_num < g->gr.gpc_tpc_count[gpc_num]; + tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); tpc_num++) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) != 0U) { @@ -4289,7 +4333,7 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, gpc_addr = pri_gpccs_addr_mask(priv_addr); tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); - if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) { + if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num)) { continue; } @@ -4303,7 +4347,7 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, if ((broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) != 0U) { nvgpu_log_info(g, "broadcast flags etpc but not egpc"); for (tpc_num = 0; - tpc_num < g->gr.gpc_tpc_count[gpc_num]; + tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); tpc_num++) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) != 0U) { @@ -4425,10 +4469,12 @@ static int gr_gv11b_ecc_scrub_is_done(struct gk20a *g, return err; } - for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { gpc_offset = gk20a_gr_gpc_offset(g, gpc); - for (tpc = 0; tpc < g->gr.gpc_tpc_count[gpc]; tpc++) { + for (tpc = 0; + tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc); + tpc++) { tpc_offset = gk20a_gr_tpc_offset(g, tpc); do { @@ -4946,11 +4992,13 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, * that we can look up the offsets */ if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) != 0U) { - for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { + for (gpc_num = 0; + gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config); + gpc_num++) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) { for (tpc_num = 0; - tpc_num < g->gr.gpc_tpc_count[gpc_num]; + tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); tpc_num++) { priv_addr_table[t++] = pri_tpc_addr(g, @@ -4972,7 +5020,7 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, gpc_addr = pri_gpccs_addr_mask(priv_addr); tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); - if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) { + if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num)) { continue; } @@ -5010,7 +5058,9 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, return -EINVAL; } - for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { + for (gpc_num = 0; + gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config); + gpc_num++) { for (domain_idx = pmm_domain_start; domain_idx < (pmm_domain_start + num_domains); domain_idx++) { @@ -5063,7 +5113,7 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, } else if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) == 0U) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) { for (tpc_num = 0; - tpc_num < g->gr.gpc_tpc_count[gpc_num]; + tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); tpc_num++) { priv_addr_table[t++] = pri_tpc_addr(g, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 996335024..814b5a5f3 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -39,6 +39,7 @@ #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h" +#include "common/gr/config/gr_config_gm20b.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp10b.h" #include "common/therm/therm_gv11b.h" @@ -314,7 +315,6 @@ static const struct gpu_ops gv11b_ops = { .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, - .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .get_zcull_info = gr_gk20a_get_zcull_info, @@ -544,6 +544,16 @@ static const struct gpu_ops gv11b_ops = { .set_type_per_veid_header = gv11b_ctxsw_prog_set_type_per_veid_header, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + }, + .config = { + .get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask, + .get_tpc_count_in_gpc = + gm20b_gr_config_get_tpc_count_in_gpc, + .get_zcull_count_in_gpc = + gm20b_gr_config_get_zcull_count_in_gpc, + .get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask, + .get_pd_dist_skip_table_size = + gm20b_gr_config_get_pd_dist_skip_table_size, } }, .fb = { @@ -1006,6 +1016,7 @@ int gv11b_init_hal(struct gk20a *g) gops->ce2 = gv11b_ops.ce2; gops->gr = gv11b_ops.gr; gops->gr.ctxsw_prog = gv11b_ops.gr.ctxsw_prog; + gops->gr.config = gv11b_ops.gr.config; gops->fb = gv11b_ops.fb; gops->clock_gating = gv11b_ops.clock_gating; gops->fifo = gv11b_ops.fifo; diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index bfbe2d047..4885b8327 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -291,7 +291,6 @@ struct gpu_ops { u32 reg_offset); int (*load_ctxsw_ucode)(struct gk20a *g); u32 (*get_gpc_mask)(struct gk20a *g); - u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); int (*alloc_obj_ctx)(struct channel_gk20a *c, u32 class_num, u32 flags); @@ -622,6 +621,20 @@ struct gpu_ops { void (*dump_ctxsw_stats)(struct gk20a *g, struct nvgpu_mem *ctx_mem); } ctxsw_prog; + + struct { + u32 (*get_gpc_tpc_mask)(struct gk20a *g, + struct nvgpu_gr_config *config, u32 gpc_index); + u32 (*get_tpc_count_in_gpc)(struct gk20a *g, + struct nvgpu_gr_config *config, u32 gpc_index); + u32 (*get_zcull_count_in_gpc)(struct gk20a *g, + struct nvgpu_gr_config *config, u32 gpc_index); + u32 (*get_pes_tpc_mask)(struct gk20a *g, + struct nvgpu_gr_config *config, u32 gpc_index, + u32 pes_index); + u32 (*get_pd_dist_skip_table_size)(void); + } config; + u32 (*fecs_falcon_base_addr)(void); u32 (*gpccs_falcon_base_addr)(void); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/config.h b/drivers/gpu/nvgpu/include/nvgpu/gr/config.h new file mode 100644 index 000000000..96a52a991 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/config.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_CONFIG_H +#define NVGPU_GR_CONFIG_H + +#include + +#define GK20A_GR_MAX_PES_PER_GPC 3U + +struct gk20a; + +struct nvgpu_gr_config { + u32 max_gpc_count; + u32 max_tpc_per_gpc_count; + u32 max_zcull_per_gpc_count; + u32 max_tpc_count; + + u32 gpc_count; + u32 tpc_count; + u32 ppc_count; + u32 zcb_count; + + u32 pe_count_per_gpc; + + u32 *gpc_ppc_count; + u32 *gpc_tpc_count; + u32 *gpc_zcb_count; + u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC]; + + u32 *gpc_tpc_mask; + u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC]; + u32 *gpc_skip_mask; + + u8 *map_tiles; + u32 map_tile_count; + u32 map_row_offset; +}; + +struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g); +void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config); +int nvgpu_gr_config_init_map_tiles(struct gk20a *g, + struct nvgpu_gr_config *config); + +u32 nvgpu_gr_config_get_map_tile_count(struct nvgpu_gr_config *config, + u32 index); +u32 nvgpu_gr_config_get_map_row_offset(struct nvgpu_gr_config *config); + +u32 nvgpu_gr_config_get_max_gpc_count(struct nvgpu_gr_config *config); +u32 nvgpu_gr_config_get_max_tpc_per_gpc_count(struct nvgpu_gr_config *config); +u32 nvgpu_gr_config_get_max_zcull_per_gpc_count(struct nvgpu_gr_config *config); +u32 nvgpu_gr_config_get_max_tpc_count(struct nvgpu_gr_config *config); + +u32 nvgpu_gr_config_get_gpc_count(struct nvgpu_gr_config *config); +u32 nvgpu_gr_config_get_tpc_count(struct nvgpu_gr_config *config); +u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config); +u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config); + +u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config); + +u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config, + u32 gpc_index); +u32 nvgpu_gr_config_get_gpc_tpc_count(struct nvgpu_gr_config *config, + u32 gpc_index); +u32 nvgpu_gr_config_get_gpc_zcb_count(struct nvgpu_gr_config *config, + u32 gpc_index); +u32 nvgpu_gr_config_get_pes_tpc_count(struct nvgpu_gr_config *config, + u32 gpc_index, u32 pes_index); + +u32 nvgpu_gr_config_get_gpc_tpc_mask(struct nvgpu_gr_config *config, + u32 gpc_index); +u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config, + u32 gpc_index); +u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config, + u32 gpc_index, u32 pes_index); + +#endif /* NVGPU_GR_CONFIG_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 3b364c7d7..60841b9a5 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -281,10 +282,10 @@ gk20a_ctrl_ioctl_gpu_characteristics( gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g); gpu.on_board_video_memory_size = 0; /* integrated GPU */ - gpu.num_gpc = g->gr.gpc_count; - gpu.max_gpc_count = g->gr.max_gpc_count; + gpu.num_gpc = nvgpu_gr_config_get_gpc_count(g->gr.config); + gpu.max_gpc_count = nvgpu_gr_config_get_max_gpc_count(g->gr.config); - gpu.num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count; + gpu.num_tpc_per_gpc = nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config); gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ @@ -293,7 +294,7 @@ gk20a_ctrl_ioctl_gpu_characteristics( if (g->ops.gr.get_gpc_mask) { gpu.gpc_mask = g->ops.gr.get_gpc_mask(g); } else { - gpu.gpc_mask = BIT32(g->gr.gpc_count) - 1; + gpu.gpc_mask = BIT32(gpu.num_gpc) - 1; } gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g); @@ -553,7 +554,7 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, { struct gr_gk20a *gr = &g->gr; int err = 0; - const u32 gpc_tpc_mask_size = sizeof(u32) * gr->max_gpc_count; + const u32 gpc_tpc_mask_size = sizeof(u32) * gr->config->max_gpc_count; if (args->mask_buf_size > 0) { size_t write_size = gpc_tpc_mask_size; @@ -564,7 +565,7 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, err = copy_to_user((void __user *)(uintptr_t) args->mask_buf_addr, - gr->gpc_tpc_mask, write_size); + gr->config->gpc_tpc_mask, write_size); } if (err == 0) @@ -687,7 +688,8 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state = NULL; u32 sm_count, ioctl_size, size, sm_id; - sm_count = g->gr.gpc_count * g->gr.tpc_count; + sm_count = nvgpu_gr_config_get_gpc_count(g->gr.config) * + nvgpu_gr_config_get_tpc_count(g->gr.config); ioctl_size = sm_count * sizeof(struct warpstate); ioctl_w_state = nvgpu_kzalloc(g, ioctl_size); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c index 38fcf221f..2e49f1421 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -84,8 +85,9 @@ static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g, } if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) { - if ((arg->num_active_tpcs > gr->max_tpc_count) || - !(arg->num_active_tpcs)) { + if ((arg->num_active_tpcs > + nvgpu_gr_config_get_max_tpc_count(gr->config)) || + !(arg->num_active_tpcs)) { nvgpu_err(g, "Invalid num of active TPCs"); err = -EINVAL; goto ch_put; diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c index 009342765..3187b65a5 100644 --- a/drivers/gpu/nvgpu/os/linux/sysfs.c +++ b/drivers/gpu/nvgpu/os/linux/sysfs.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "os_linux.h" #include "sysfs.h" @@ -930,16 +931,17 @@ static ssize_t tpc_fs_mask_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct gk20a *g = get_gk20a(dev); + struct nvgpu_gr_config *config = g->gr.config; unsigned long val = 0; if (kstrtoul(buf, 10, &val) < 0) return -EINVAL; - if (!g->gr.gpc_tpc_mask) + if (!config->gpc_tpc_mask) return -ENODEV; - if (val && val != g->gr.gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) { - g->gr.gpc_tpc_mask[0] = val; + if (val && val != config->gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) { + config->gpc_tpc_mask[0] = val; g->tpc_fs_mask_user = val; g->ops.gr.set_gpc_tpc_mask(g, 0); @@ -951,6 +953,7 @@ static ssize_t tpc_fs_mask_store(struct device *dev, g->gr.ctx_vars.golden_image_initialized = false; } g->gr.ctx_vars.golden_image_size = 0; + nvgpu_gr_config_deinit(g, g->gr.config); /* Cause next poweron to reinit just gr */ g->gr.sw_ready = false; } @@ -971,11 +974,13 @@ static ssize_t tpc_fs_mask_read(struct device *dev, if (err) return err; - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - if (g->ops.gr.get_gpc_tpc_mask) + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_index++) { + if (g->ops.gr.config.get_gpc_tpc_mask) tpc_fs_mask |= - g->ops.gr.get_gpc_tpc_mask(g, gpc_index) << - (gr->max_tpc_per_gpc_count * gpc_index); + g->ops.gr.config.get_gpc_tpc_mask(g, gr->config, gpc_index) << + (nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config) * gpc_index); } gk20a_idle(g); diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.c b/drivers/gpu/nvgpu/tu104/gr_tu104.c index 86d5f2ab7..9de1bccb9 100644 --- a/drivers/gpu/nvgpu/tu104/gr_tu104.c +++ b/drivers/gpu/nvgpu/tu104/gr_tu104.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "gk20a/gr_gk20a.h" #include "gk20a/gr_pri_gk20a.h" @@ -304,7 +305,8 @@ void gr_tu104_enable_gpc_exceptions(struct gk20a *g) gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); tpc_mask = - gr_gpcs_gpccs_gpc_exception_en_tpc_f(BIT32(gr->max_tpc_per_gpc_count) - 1U); + gr_gpcs_gpccs_gpc_exception_en_tpc_f( + BIT32(nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config)) - 1U); gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1U) | diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index c60740889..80894bfa6 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -41,6 +41,7 @@ #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h" +#include "common/gr/config/gr_config_gm20b.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp10b.h" #include "common/therm/therm_gp106.h" @@ -378,7 +379,6 @@ static const struct gpu_ops tu104_ops = { .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, - .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, .get_gpc_mask = gr_gm20b_get_gpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, @@ -608,6 +608,16 @@ static const struct gpu_ops tu104_ops = { .set_type_per_veid_header = gv11b_ctxsw_prog_set_type_per_veid_header, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + }, + .config = { + .get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask, + .get_tpc_count_in_gpc = + gm20b_gr_config_get_tpc_count_in_gpc, + .get_zcull_count_in_gpc = + gm20b_gr_config_get_zcull_count_in_gpc, + .get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask, + .get_pd_dist_skip_table_size = + gm20b_gr_config_get_pd_dist_skip_table_size, } }, .fb = { @@ -1173,6 +1183,7 @@ int tu104_init_hal(struct gk20a *g) gops->ce2 = tu104_ops.ce2; gops->gr = tu104_ops.gr; gops->gr.ctxsw_prog = tu104_ops.gr.ctxsw_prog; + gops->gr.config = tu104_ops.gr.config; gops->fb = tu104_ops.fb; gops->nvdec = tu104_ops.nvdec; gops->clock_gating = tu104_ops.clock_gating; diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c index f641a2c04..1106c414c 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "vgpu/gm20b/vgpu_gr_gm20b.h" @@ -128,7 +129,7 @@ int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * - g->gr.max_tpc_count; + nvgpu_gr_config_get_max_tpc_count(g->gr.config); struct nvgpu_mem *desc; attrib_cb_size = ALIGN(attrib_cb_size, 128); diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index bde0b043c..abed2cd6f 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -30,6 +30,7 @@ #include "common/netlist/netlist_gp10b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" +#include "common/gr/config/gr_config_gm20b.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp10b.h" #include "common/ltc/ltc_gm20b.h" @@ -128,7 +129,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .falcon_load_ucode = NULL, .load_ctxsw_ucode = NULL, .set_gpc_tpc_mask = NULL, - .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, .get_zcull_info = vgpu_gr_get_zcull_info, @@ -304,6 +304,9 @@ static const struct gpu_ops vgpu_gp10b_ops = { .set_full_preemption_ptr = gp10b_ctxsw_prog_set_full_preemption_ptr, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + }, + .config = { + .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, } }, .fb = { @@ -659,6 +662,7 @@ int vgpu_gp10b_init_hal(struct gk20a *g) gops->ce2 = vgpu_gp10b_ops.ce2; gops->gr = vgpu_gp10b_ops.gr; gops->gr.ctxsw_prog = vgpu_gp10b_ops.gr.ctxsw_prog; + gops->gr.config = vgpu_gp10b_ops.gr.config; gops->fb = vgpu_gp10b_ops.fb; gops->clock_gating = vgpu_gp10b_ops.clock_gating; gops->fifo = vgpu_gp10b_ops.fifo; diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 404db9b26..2ae690910 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "gr_vgpu.h" #include "gk20a/fecs_trace_gk20a.h" @@ -582,6 +583,7 @@ out: static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) { struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct nvgpu_gr_config *config; u32 gpc_index; u32 sm_per_tpc; u32 pes_index; @@ -589,79 +591,87 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) nvgpu_log_fn(g, " "); - gr->max_gpc_count = priv->constants.max_gpc_count; - gr->gpc_count = priv->constants.gpc_count; - gr->max_tpc_per_gpc_count = priv->constants.max_tpc_per_gpc_count; + gr->config = nvgpu_kzalloc(g, sizeof(*gr->config)); + if (gr->config == NULL) { + return -ENOMEM; + } - gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count; + config = gr->config; - gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32)); - if (!gr->gpc_tpc_count) { + config->max_gpc_count = priv->constants.max_gpc_count; + config->gpc_count = priv->constants.gpc_count; + config->max_tpc_per_gpc_count = priv->constants.max_tpc_per_gpc_count; + + config->max_tpc_count = config->max_gpc_count * config->max_tpc_per_gpc_count; + + config->gpc_tpc_count = nvgpu_kzalloc(g, config->gpc_count * sizeof(u32)); + if (!config->gpc_tpc_count) { goto cleanup; } - gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32)); - if (!gr->gpc_tpc_mask) { + config->gpc_tpc_mask = nvgpu_kzalloc(g, config->gpc_count * sizeof(u32)); + if (!config->gpc_tpc_mask) { goto cleanup; } sm_per_tpc = priv->constants.sm_per_tpc; - gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count * - gr->max_tpc_per_gpc_count * + gr->sm_to_cluster = nvgpu_kzalloc(g, config->gpc_count * + config->max_tpc_per_gpc_count * sm_per_tpc * sizeof(struct sm_info)); if (!gr->sm_to_cluster) { goto cleanup; } - gr->tpc_count = 0; - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - gr->gpc_tpc_count[gpc_index] = + config->tpc_count = 0; + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + config->gpc_tpc_count[gpc_index] = priv->constants.gpc_tpc_count[gpc_index]; - gr->tpc_count += gr->gpc_tpc_count[gpc_index]; + config->tpc_count += config->gpc_tpc_count[gpc_index]; - if (g->ops.gr.get_gpc_tpc_mask) { - gr->gpc_tpc_mask[gpc_index] = - g->ops.gr.get_gpc_tpc_mask(g, gpc_index); + if (g->ops.gr.config.get_gpc_tpc_mask) { + gr->config->gpc_tpc_mask[gpc_index] = + g->ops.gr.config.get_gpc_tpc_mask(g, + g->gr.config, gpc_index); } } - gr->pe_count_per_gpc = + config->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); - if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC, - "too many pes per gpc %u\n", gr->pe_count_per_gpc)) { + if (WARN(config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC, + "too many pes per gpc %u\n", config->pe_count_per_gpc)) { goto cleanup; } - if (gr->pe_count_per_gpc > TEGRA_VGPU_MAX_PES_COUNT_PER_GPC) { + if (config->pe_count_per_gpc > TEGRA_VGPU_MAX_PES_COUNT_PER_GPC) { nvgpu_err(g, "pe_count_per_gpc %d is too big!", - gr->pe_count_per_gpc); + config->pe_count_per_gpc); goto cleanup; } - if (gr->gpc_ppc_count == NULL) { - gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count * + if (config->gpc_ppc_count == NULL) { + config->gpc_ppc_count = nvgpu_kzalloc(g, config->gpc_count * sizeof(u32)); } else { - (void) memset(gr->gpc_ppc_count, 0, gr->gpc_count * + (void) memset(config->gpc_ppc_count, 0, config->gpc_count * sizeof(u32)); } - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - gr->gpc_ppc_count[gpc_index] = + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + config->gpc_ppc_count[gpc_index] = priv->constants.gpc_ppc_count[gpc_index]; - for (pes_index = 0u; pes_index < gr->pe_count_per_gpc; + for (pes_index = 0u; pes_index < config->pe_count_per_gpc; pes_index++) { u32 pes_tpc_count, pes_tpc_mask; - if (gr->pes_tpc_count[pes_index] == NULL) { - gr->pes_tpc_count[pes_index] = nvgpu_kzalloc(g, - gr->gpc_count * sizeof(u32)); - gr->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g, - gr->gpc_count * sizeof(u32)); - if (gr->pes_tpc_count[pes_index] == NULL || - gr->pes_tpc_mask[pes_index] == NULL) { + if (config->pes_tpc_count[pes_index] == NULL) { + config->pes_tpc_count[pes_index] = nvgpu_kzalloc(g, + config->gpc_count * sizeof(u32)); + config->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g, + config->gpc_count * sizeof(u32)); + if (config->pes_tpc_count[pes_index] == NULL || + config->pes_tpc_mask[pes_index] == NULL) { goto cleanup; } } @@ -672,8 +682,8 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) pes_tpc_mask = priv->constants. pes_tpc_mask[TEGRA_VGPU_MAX_PES_COUNT_PER_GPC * gpc_index + pes_index]; - gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count; - gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask; + config->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count; + config->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask; } } @@ -688,21 +698,21 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) cleanup: nvgpu_err(g, "out of memory"); - for (pes_index = 0u; pes_index < gr->pe_count_per_gpc; pes_index++) { - nvgpu_kfree(g, gr->pes_tpc_count[pes_index]); - gr->pes_tpc_count[pes_index] = NULL; - nvgpu_kfree(g, gr->pes_tpc_mask[pes_index]); - gr->pes_tpc_mask[pes_index] = NULL; + for (pes_index = 0u; pes_index < config->pe_count_per_gpc; pes_index++) { + nvgpu_kfree(g, config->pes_tpc_count[pes_index]); + config->pes_tpc_count[pes_index] = NULL; + nvgpu_kfree(g, config->pes_tpc_mask[pes_index]); + config->pes_tpc_mask[pes_index] = NULL; } - nvgpu_kfree(g, gr->gpc_ppc_count); - gr->gpc_ppc_count = NULL; + nvgpu_kfree(g, config->gpc_ppc_count); + config->gpc_ppc_count = NULL; - nvgpu_kfree(g, gr->gpc_tpc_count); - gr->gpc_tpc_count = NULL; + nvgpu_kfree(g, config->gpc_tpc_count); + config->gpc_tpc_count = NULL; - nvgpu_kfree(g, gr->gpc_tpc_mask); - gr->gpc_tpc_mask = NULL; + nvgpu_kfree(g, config->gpc_tpc_mask); + config->gpc_tpc_mask = NULL; return err; } @@ -759,7 +769,8 @@ int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, return 0; } -u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) +u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config, + u32 gpc_index) { struct vgpu_priv_data *priv = vgpu_get_priv_data(g); @@ -907,15 +918,11 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr) gk20a_comptag_allocator_destroy(gr->g, &gr->comp_tags); - nvgpu_kfree(gr->g, gr->gpc_tpc_mask); - gr->gpc_tpc_mask = NULL; + nvgpu_gr_config_deinit(gr->g, gr->config); nvgpu_kfree(gr->g, gr->sm_to_cluster); gr->sm_to_cluster = NULL; - nvgpu_kfree(gr->g, gr->gpc_tpc_count); - gr->gpc_tpc_count = NULL; - nvgpu_kfree(gr->g, gr->fbp_rop_l2_en_mask); gr->fbp_rop_l2_en_mask = NULL; } @@ -1353,6 +1360,7 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g) struct sm_info *sm_info; int err; struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr_config *config = gr->config; size_t oob_size; void *handle = NULL; u32 sm_id; @@ -1374,8 +1382,8 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g) return -EINVAL; } - max_sm = gr->gpc_count * - gr->max_tpc_per_gpc_count * + max_sm = config->gpc_count * + config->max_tpc_per_gpc_count * priv->constants.sm_per_tpc; if (p->num_sm > max_sm) { return -EINVAL; diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.h b/drivers/gpu/nvgpu/vgpu/gr_vgpu.h index 8a3e56da4..73b9ebc7d 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.h +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.h @@ -43,7 +43,8 @@ int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, u32 mode); int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, struct gr_zcull_info *zcull_params); -u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); +u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config, + u32 gpc_index); u32 vgpu_gr_get_max_fbps_count(struct gk20a *g); u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g); u32 vgpu_gr_get_max_ltc_per_fbp(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index 746cbe9a1..15e8dc44b 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c @@ -143,7 +143,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .falcon_load_ucode = NULL, .load_ctxsw_ucode = NULL, .set_gpc_tpc_mask = NULL, - .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, .get_zcull_info = vgpu_gr_get_zcull_info, @@ -351,6 +350,9 @@ static const struct gpu_ops vgpu_gv11b_ops = { .set_type_per_veid_header = gv11b_ctxsw_prog_set_type_per_veid_header, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + }, + .config = { + .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, } }, .fb = { @@ -736,6 +738,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g) gops->ce2 = vgpu_gv11b_ops.ce2; gops->gr = vgpu_gv11b_ops.gr; gops->gr.ctxsw_prog = vgpu_gv11b_ops.gr.ctxsw_prog; + gops->gr.config = vgpu_gv11b_ops.gr.config; gops->fb = vgpu_gv11b_ops.fb; gops->clock_gating = vgpu_gv11b_ops.clock_gating; gops->fifo = vgpu_gv11b_ops.fifo;