Files
linux-nvgpu/drivers/gpu/nvgpu/common/gr/gr_config.c
Debarshi Dutta 2e3c3aada6 gpu: nvgpu: fix deinit of GR
Existing implementation of GR de-init doesn't account for multiple
instances of struct nvgpu_gr. As a fix, below changes are added.

1) nvgpu_gr_free is unified for VGPU as well as native.
2) All the GR instances are freed.
3) Appropriate NULL checks are added when freeing GR memories.
4) 2D, 3D, I2M and ZBC etc are explicitely disabled when MIG is set.
5) In ioctl_ctrl, checks are added to not return error when zbc is NULL
   for VGPU as requests are rerouted to RMserver.

Jira NVGPU-6920

Change-Id: Icaa40f88f523c2cdbfe3a4fd6a55681ea7a83d12
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2578500
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: Dinesh T <dt@nvidia.com>
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: Antony Clince Alex <aalex@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
2021-08-23 05:27:45 -07:00

898 lines
24 KiB
C

/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/io.h>
#include <nvgpu/static_analysis.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/grmgr.h>
#include "gr_config_priv.h"
static void gr_config_init_pes_tpc(struct gk20a *g,
struct nvgpu_gr_config *config,
u32 gpc_index)
{
u32 pes_index;
u32 pes_tpc_mask;
u32 pes_tpc_count;
for (pes_index = 0; pes_index < config->pe_count_per_gpc;
pes_index++) {
pes_tpc_mask = g->ops.gr.config.get_pes_tpc_mask(g,
config, gpc_index, pes_index);
pes_tpc_count = hweight32(pes_tpc_mask);
/* detect PES presence by seeing if there are
* TPCs connected to it.
*/
if (pes_tpc_count != 0U) {
config->gpc_ppc_count[gpc_index] = nvgpu_safe_add_u32(
config->gpc_ppc_count[gpc_index], 1U);
}
config->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
config->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
}
}
static void gr_config_init_gpc_skip_mask(struct nvgpu_gr_config *config,
u32 gpc_index)
{
u32 pes_heavy_index;
u32 gpc_new_skip_mask = 0U;
u32 pes_tpc_cnt = 0U, pes_tpc_mask = 0U;
if (config->pe_count_per_gpc <= 1U) {
goto skip_mask_end;
}
pes_tpc_cnt = nvgpu_safe_add_u32(
config->pes_tpc_count[0][gpc_index],
config->pes_tpc_count[1][gpc_index]);
pes_heavy_index =
(config->pes_tpc_count[0][gpc_index] >
config->pes_tpc_count[1][gpc_index]) ? 0U : 1U;
if ((pes_tpc_cnt == 5U) || ((pes_tpc_cnt == 4U) &&
(config->pes_tpc_count[0][gpc_index] !=
config->pes_tpc_count[1][gpc_index]))) {
pes_tpc_mask = nvgpu_safe_sub_u32(
config->pes_tpc_mask[pes_heavy_index][gpc_index], 1U);
gpc_new_skip_mask =
config->pes_tpc_mask[pes_heavy_index][gpc_index] ^
(config->pes_tpc_mask[pes_heavy_index][gpc_index] &
pes_tpc_mask);
}
skip_mask_end:
config->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
}
static void gr_config_log_info(struct gk20a *g,
struct nvgpu_gr_config *config)
{
u32 gpc_index, pes_index;
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_gpc_count: %d", config->max_gpc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_count: %d", config->gpc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_mask: 0x%x", config->gpc_mask);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_tpc_per_gpc_count: %d", config->max_tpc_per_gpc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_tpc_count: %d", config->max_tpc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "tpc_count: %d", config->tpc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "sm_count_per_tpc: %d", config->sm_count_per_tpc);
#ifdef CONFIG_NVGPU_GRAPHICS
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_zcull_per_gpc_count: %d", config->max_zcull_per_gpc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "zcb_count: %d", config->zcb_count);
#endif
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pe_count_per_gpc: %d", config->pe_count_per_gpc);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "ppc_count: %d", config->ppc_count);
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_tpc_count[%d] : %d",
gpc_index, config->gpc_tpc_count[gpc_index]);
}
for (gpc_index = 0; gpc_index < config->max_gpc_count; gpc_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_tpc_mask[%d] : 0x%x",
gpc_index, config->gpc_tpc_mask[gpc_index]);
}
#ifdef CONFIG_NVGPU_GRAPHICS
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_zcb_count[%d] : %d",
gpc_index, config->gpc_zcb_count != NULL ?
config->gpc_zcb_count[gpc_index] : 0U);
}
#endif
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_ppc_count[%d] : %d",
gpc_index, config->gpc_ppc_count[gpc_index]);
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_skip_mask[%d] : 0x%x",
gpc_index, config->gpc_skip_mask[gpc_index]);
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
for (pes_index = 0;
pes_index < config->pe_count_per_gpc;
pes_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pes_tpc_count[%d][%d] : %d",
pes_index, gpc_index,
config->pes_tpc_count[pes_index][gpc_index]);
}
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
for (pes_index = 0;
pes_index < config->pe_count_per_gpc;
pes_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pes_tpc_mask[%d][%d] : 0x%x",
pes_index, gpc_index,
config->pes_tpc_mask[pes_index][gpc_index]);
}
}
}
static void gr_config_set_gpc_mask(struct gk20a *g,
struct nvgpu_gr_config *config)
{
#ifdef CONFIG_NVGPU_DGPU
if (g->ops.gr.config.get_gpc_mask != NULL) {
config->gpc_mask = g->ops.gr.config.get_gpc_mask(g);
} else
#endif
{
config->gpc_mask = nvgpu_safe_sub_u32(BIT32(config->gpc_count),
1U);
}
}
static bool gr_config_alloc_valid(struct nvgpu_gr_config *config)
{
if ((config->gpc_tpc_count == NULL) || (config->gpc_tpc_mask == NULL) ||
(config->gpc_ppc_count == NULL) ||
(config->gpc_skip_mask == NULL)) {
return false;
}
#ifdef CONFIG_NVGPU_GRAPHICS
if (!nvgpu_is_enabled(config->g, NVGPU_SUPPORT_MIG) &&
(config->gpc_zcb_count == NULL)) {
return false;
}
#endif
return true;
}
static void gr_config_free_mem(struct gk20a *g,
struct nvgpu_gr_config *config)
{
u32 pes_index;
for (pes_index = 0U; pes_index < config->pe_count_per_gpc; pes_index++) {
nvgpu_kfree(g, config->pes_tpc_count[pes_index]);
nvgpu_kfree(g, config->pes_tpc_mask[pes_index]);
}
nvgpu_kfree(g, config->gpc_skip_mask);
nvgpu_kfree(g, config->gpc_ppc_count);
#ifdef CONFIG_NVGPU_GRAPHICS
nvgpu_kfree(g, config->gpc_zcb_count);
#endif
nvgpu_kfree(g, config->gpc_tpc_mask);
nvgpu_kfree(g, config->gpc_tpc_count);
}
static bool gr_config_alloc_struct_mem(struct gk20a *g,
struct nvgpu_gr_config *config)
{
u32 pes_index;
u32 total_tpc_cnt;
size_t sm_info_size;
size_t gpc_size, sm_size, max_gpc_cnt;
size_t pd_tbl_size;
total_tpc_cnt = nvgpu_safe_mult_u32(config->gpc_count,
config->max_tpc_per_gpc_count);
sm_size = nvgpu_safe_mult_u64((size_t)config->sm_count_per_tpc,
sizeof(struct nvgpu_sm_info));
/* allocate for max tpc per gpc */
sm_info_size = nvgpu_safe_mult_u64((size_t)total_tpc_cnt, sm_size);
config->sm_to_cluster = nvgpu_kzalloc(g, sm_info_size);
if (config->sm_to_cluster == NULL) {
nvgpu_err(g, "sm_to_cluster == NULL");
goto alloc_err;
}
#ifdef CONFIG_NVGPU_SM_DIVERSITY
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SM_DIVERSITY)) {
config->sm_to_cluster_redex_config =
nvgpu_kzalloc(g, sm_info_size);
if (config->sm_to_cluster_redex_config == NULL) {
nvgpu_err(g, "sm_to_cluster_redex_config == NULL");
goto clean_alloc_mem;
}
}
#endif
config->no_of_sm = 0;
gpc_size = nvgpu_safe_mult_u64((size_t)config->gpc_count, sizeof(u32));
max_gpc_cnt = nvgpu_safe_mult_u64((size_t)config->max_gpc_count, sizeof(u32));
config->gpc_tpc_count = nvgpu_kzalloc(g, gpc_size);
config->gpc_tpc_mask = nvgpu_kzalloc(g, max_gpc_cnt);
config->gpc_tpc_mask_physical = nvgpu_kzalloc(g, max_gpc_cnt);
#ifdef CONFIG_NVGPU_GRAPHICS
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
config->max_zcull_per_gpc_count = nvgpu_get_litter_value(g,
GPU_LIT_NUM_ZCULL_BANKS);
config->gpc_zcb_count = nvgpu_kzalloc(g, gpc_size);
}
#endif
config->gpc_ppc_count = nvgpu_kzalloc(g, gpc_size);
pd_tbl_size = nvgpu_safe_mult_u64(
(size_t)g->ops.gr.config.get_pd_dist_skip_table_size(),
sizeof(u32));
pd_tbl_size = nvgpu_safe_mult_u64(pd_tbl_size, 4UL);
config->gpc_skip_mask = nvgpu_kzalloc(g, pd_tbl_size);
if (gr_config_alloc_valid(config) == false) {
goto clean_alloc_mem;
}
for (pes_index = 0U; pes_index < config->pe_count_per_gpc; pes_index++) {
config->pes_tpc_count[pes_index] = nvgpu_kzalloc(g, gpc_size);
config->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g, gpc_size);
if ((config->pes_tpc_count[pes_index] == NULL) ||
(config->pes_tpc_mask[pes_index] == NULL)) {
goto clean_alloc_mem;
}
}
return true;
clean_alloc_mem:
nvgpu_kfree(g, config->sm_to_cluster);
config->sm_to_cluster = NULL;
#ifdef CONFIG_NVGPU_SM_DIVERSITY
if (config->sm_to_cluster_redex_config != NULL) {
nvgpu_kfree(g, config->sm_to_cluster_redex_config);
config->sm_to_cluster_redex_config = NULL;
}
#endif
gr_config_free_mem(g, config);
alloc_err:
return false;
}
static int gr_config_init_mig_gpcs(struct nvgpu_gr_config *config)
{
struct gk20a *g = config->g;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
config->max_gpc_count = nvgpu_grmgr_get_max_gpc_count(g);
config->gpc_count = nvgpu_grmgr_get_gr_num_gpcs(g, cur_gr_instance);
if (config->gpc_count == 0U) {
nvgpu_err(g, "gpc_count==0!");
return -EINVAL;
}
config->gpc_mask = nvgpu_grmgr_get_gr_logical_gpc_mask(
g, cur_gr_instance);
return 0;
}
static int gr_config_init_gpcs(struct nvgpu_gr_config *config)
{
struct gk20a *g = config->g;
config->max_gpc_count = g->ops.top.get_max_gpc_count(g);
config->gpc_count = g->ops.priv_ring.get_gpc_count(g);
if (config->gpc_count == 0U) {
nvgpu_err(g, "gpc_count==0!");
return -EINVAL;
}
gr_config_set_gpc_mask(g, config);
return 0;
}
struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
{
struct nvgpu_gr_config *config;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_index;
u32 gpc_phys_id;
u32 gpc_id;
int err;
config = nvgpu_kzalloc(g, sizeof(*config));
if (config == NULL) {
return NULL;
}
config->g = g;
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
err = gr_config_init_mig_gpcs(config);
if (err < 0) {
nvgpu_err(g, "MIG GPC config init failed");
nvgpu_kfree(g, config);
return NULL;
}
} else {
err = gr_config_init_gpcs(config);
if (err < 0) {
nvgpu_err(g, "GPC config init failed");
nvgpu_kfree(g, config);
return NULL;
}
}
/* Required to read gpc_tpc_mask below */
config->max_tpc_per_gpc_count = g->ops.top.get_max_tpc_per_gpc_count(g);
config->max_tpc_count = nvgpu_safe_mult_u32(config->max_gpc_count,
config->max_tpc_per_gpc_count);
config->pe_count_per_gpc = nvgpu_get_litter_value(g,
GPU_LIT_NUM_PES_PER_GPC);
if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) {
nvgpu_err(g, "too many pes per gpc");
goto clean_up_init;
}
config->sm_count_per_tpc =
nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
if (config->sm_count_per_tpc == 0U) {
nvgpu_err(g, "sm_count_per_tpc==0!");
goto clean_up_init;
}
if (gr_config_alloc_struct_mem(g, config) == false) {
goto clean_up_init;
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
/*
* Fuse registers must be queried with physical gpc-id and not
* the logical ones. For tu104 and before chips logical gpc-id
* is same as physical gpc-id for non-floorswept config but for
* chips after tu104 it may not be true.
*/
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
cur_gr_instance, gpc_index);
/*
* The gpc_tpc_mask_physical masks are ordered by gpc_id.
* Where gpc_id = gpc_logical_id when MIG=true, else
* gpc_physical_id.
*/
gpc_id = gpc_phys_id;
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
gpc_id = nvgpu_grmgr_get_gr_gpc_logical_id(g,
cur_gr_instance, gpc_index);
}
config->gpc_tpc_mask[gpc_index] =
g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
config->gpc_tpc_mask_physical[gpc_id] =
g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
}
config->ppc_count = 0;
config->tpc_count = 0;
#ifdef CONFIG_NVGPU_GRAPHICS
config->zcb_count = 0;
#endif
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
config->gpc_tpc_count[gpc_index] =
g->ops.gr.config.get_tpc_count_in_gpc(g, config,
gpc_index);
config->tpc_count = nvgpu_safe_add_u32(config->tpc_count,
config->gpc_tpc_count[gpc_index]);
#ifdef CONFIG_NVGPU_GRAPHICS
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
config->gpc_zcb_count[gpc_index] =
g->ops.gr.config.get_zcull_count_in_gpc(g, config,
gpc_index);
config->zcb_count = nvgpu_safe_add_u32(config->zcb_count,
config->gpc_zcb_count[gpc_index]);
}
#endif
gr_config_init_pes_tpc(g, config, gpc_index);
config->ppc_count = nvgpu_safe_add_u32(config->ppc_count,
config->gpc_ppc_count[gpc_index]);
gr_config_init_gpc_skip_mask(config, gpc_index);
}
gr_config_log_info(g, config);
return config;
clean_up_init:
nvgpu_kfree(g, config);
return NULL;
}
#ifdef CONFIG_NVGPU_GRAPHICS
static u32 prime_set[18] = {
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 };
/*
* Return map tiles count for given index
* Return 0 if index is out-of-bounds
*/
u32 nvgpu_gr_config_get_map_tile_count(struct nvgpu_gr_config *config, u32 index)
{
if (index >= config->map_tile_count) {
return 0;
}
return config->map_tiles[index];
}
u8 *nvgpu_gr_config_get_map_tiles(struct nvgpu_gr_config *config)
{
return config->map_tiles;
}
u32 nvgpu_gr_config_get_map_row_offset(struct nvgpu_gr_config *config)
{
return config->map_row_offset;
}
int nvgpu_gr_config_init_map_tiles(struct gk20a *g,
struct nvgpu_gr_config *config)
{
s32 comm_denom;
s32 mul_factor;
s32 *init_frac = NULL;
s32 *init_err = NULL;
s32 *run_err = NULL;
u32 *sorted_num_tpcs = NULL;
u32 *sorted_to_unsorted_gpc_map = NULL;
u32 gpc_index;
u32 gpc_mark = 0;
u32 num_tpc;
u32 max_tpc_count = 0;
u32 swap;
u32 tile_count;
u32 index;
bool delete_map = false;
bool gpc_sorted;
int ret = 0;
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
u32 map_tile_count = num_gpcs * num_tpc_per_gpc;
nvgpu_log(g, gpu_dbg_gr, " ");
init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
sorted_num_tpcs =
nvgpu_kzalloc(g, (size_t)num_gpcs *
(size_t)num_tpc_per_gpc *
sizeof(s32));
sorted_to_unsorted_gpc_map =
nvgpu_kzalloc(g, (size_t)num_gpcs * sizeof(s32));
if (!((init_frac != NULL) &&
(init_err != NULL) &&
(run_err != NULL) &&
(sorted_num_tpcs != NULL) &&
(sorted_to_unsorted_gpc_map != NULL))) {
ret = -ENOMEM;
goto clean_up;
}
config->map_row_offset = 0xFFFFFFFFU;
if (config->tpc_count == 3U) {
config->map_row_offset = 2;
} else if (config->tpc_count < 3U) {
config->map_row_offset = 1;
} else {
config->map_row_offset = 3;
for (index = 1U; index < 18U; index++) {
u32 prime = prime_set[index];
if ((config->tpc_count % prime) != 0U) {
config->map_row_offset = prime;
break;
}
}
}
switch (config->tpc_count) {
case 15:
config->map_row_offset = 6;
break;
case 14:
config->map_row_offset = 5;
break;
case 13:
config->map_row_offset = 2;
break;
case 11:
config->map_row_offset = 7;
break;
case 10:
config->map_row_offset = 6;
break;
case 7:
case 5:
config->map_row_offset = 1;
break;
default:
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "unsupported tpc count = %u",
config->tpc_count);
break;
}
if (config->map_tiles != NULL) {
if (config->map_tile_count != config->tpc_count) {
delete_map = true;
}
for (tile_count = 0; tile_count < config->map_tile_count; tile_count++) {
if (nvgpu_gr_config_get_map_tile_count(config, tile_count)
>= config->tpc_count) {
delete_map = true;
}
}
if (delete_map) {
nvgpu_kfree(g, config->map_tiles);
config->map_tiles = NULL;
config->map_tile_count = 0;
}
}
if (config->map_tiles == NULL) {
config->map_tiles = nvgpu_kzalloc(g, map_tile_count * sizeof(u8));
if (config->map_tiles == NULL) {
ret = -ENOMEM;
goto clean_up;
}
config->map_tile_count = map_tile_count;
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
sorted_num_tpcs[gpc_index] = config->gpc_tpc_count[gpc_index];
sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
}
gpc_sorted = false;
while (!gpc_sorted) {
gpc_sorted = true;
for (gpc_index = 0U; gpc_index < config->gpc_count - 1U; gpc_index++) {
if (sorted_num_tpcs[gpc_index + 1U] > sorted_num_tpcs[gpc_index]) {
gpc_sorted = false;
swap = sorted_num_tpcs[gpc_index];
sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1U];
sorted_num_tpcs[gpc_index + 1U] = swap;
swap = sorted_to_unsorted_gpc_map[gpc_index];
sorted_to_unsorted_gpc_map[gpc_index] =
sorted_to_unsorted_gpc_map[gpc_index + 1U];
sorted_to_unsorted_gpc_map[gpc_index + 1U] = swap;
}
}
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
if (config->gpc_tpc_count[gpc_index] > max_tpc_count) {
max_tpc_count = config->gpc_tpc_count[gpc_index];
}
}
mul_factor = S32(config->gpc_count) * S32(max_tpc_count);
if ((U32(mul_factor) & 0x1U) != 0U) {
mul_factor = 2;
} else {
mul_factor = 1;
}
comm_denom = S32(config->gpc_count) * S32(max_tpc_count) * mul_factor;
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
num_tpc = sorted_num_tpcs[gpc_index];
init_frac[gpc_index] = S32(num_tpc) * S32(config->gpc_count) * mul_factor;
if (num_tpc != 0U) {
init_err[gpc_index] = S32(gpc_index) * S32(max_tpc_count) * mul_factor - comm_denom/2;
} else {
init_err[gpc_index] = 0;
}
run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
}
while (gpc_mark < config->tpc_count) {
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
if ((run_err[gpc_index] * 2) >= comm_denom) {
config->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
} else {
run_err[gpc_index] += init_frac[gpc_index];
}
}
}
}
clean_up:
nvgpu_kfree(g, init_frac);
nvgpu_kfree(g, init_err);
nvgpu_kfree(g, run_err);
nvgpu_kfree(g, sorted_num_tpcs);
nvgpu_kfree(g, sorted_to_unsorted_gpc_map);
if (ret != 0) {
nvgpu_err(g, "fail");
} else {
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
}
return ret;
}
u32 nvgpu_gr_config_get_max_zcull_per_gpc_count(struct nvgpu_gr_config *config)
{
return config->max_zcull_per_gpc_count;
}
u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config)
{
return config->zcb_count;
}
u32 nvgpu_gr_config_get_gpc_zcb_count(struct nvgpu_gr_config *config,
u32 gpc_index)
{
return config->gpc_zcb_count[gpc_index];
}
#endif
void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config)
{
if (config == NULL) {
return;
}
gr_config_free_mem(g, config);
#ifdef CONFIG_NVGPU_GRAPHICS
nvgpu_kfree(g, config->map_tiles);
#endif
nvgpu_kfree(g, config->sm_to_cluster);
config->sm_to_cluster = NULL;
#ifdef CONFIG_NVGPU_SM_DIVERSITY
if (config->sm_to_cluster_redex_config != NULL) {
nvgpu_kfree(g, config->sm_to_cluster_redex_config);
config->sm_to_cluster_redex_config = NULL;
}
#endif
nvgpu_kfree(g, config);
}
u32 nvgpu_gr_config_get_max_gpc_count(struct nvgpu_gr_config *config)
{
return config->max_gpc_count;
}
u32 nvgpu_gr_config_get_max_tpc_per_gpc_count(struct nvgpu_gr_config *config)
{
return config->max_tpc_per_gpc_count;
}
u32 nvgpu_gr_config_get_max_tpc_count(struct nvgpu_gr_config *config)
{
return config->max_tpc_count;
}
u32 nvgpu_gr_config_get_gpc_count(struct nvgpu_gr_config *config)
{
return config->gpc_count;
}
u32 nvgpu_gr_config_get_tpc_count(struct nvgpu_gr_config *config)
{
return config->tpc_count;
}
u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config)
{
return config->ppc_count;
}
u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config)
{
return config->pe_count_per_gpc;
}
u32 nvgpu_gr_config_get_sm_count_per_tpc(struct nvgpu_gr_config *config)
{
return config->sm_count_per_tpc;
}
u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config,
u32 gpc_index)
{
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
return config->gpc_ppc_count[gpc_index];
}
u32 *nvgpu_gr_config_get_base_count_gpc_tpc(struct nvgpu_gr_config *config)
{
return config->gpc_tpc_count;
}
u32 nvgpu_gr_config_get_gpc_tpc_count(struct nvgpu_gr_config *config,
u32 gpc_index)
{
if (gpc_index >= config->gpc_count) {
return 0;
}
return config->gpc_tpc_count[gpc_index];
}
u32 nvgpu_gr_config_get_pes_tpc_count(struct nvgpu_gr_config *config,
u32 gpc_index, u32 pes_index)
{
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
nvgpu_assert(pes_index < nvgpu_gr_config_get_pe_count_per_gpc(config));
return config->pes_tpc_count[pes_index][gpc_index];
}
u32 *nvgpu_gr_config_get_base_mask_gpc_tpc(struct nvgpu_gr_config *config)
{
return config->gpc_tpc_mask;
}
u32 *nvgpu_gr_config_get_gpc_tpc_mask_physical_base(struct nvgpu_gr_config *config)
{
return config->gpc_tpc_mask_physical;
}
u32 nvgpu_gr_config_get_gpc_tpc_mask(struct nvgpu_gr_config *config,
u32 gpc_index)
{
nvgpu_assert(gpc_index < nvgpu_gr_config_get_max_gpc_count(config));
return config->gpc_tpc_mask[gpc_index];
}
u32 nvgpu_gr_config_get_gpc_tpc_mask_physical(struct nvgpu_gr_config *config,
u32 gpc_index)
{
nvgpu_assert(gpc_index < nvgpu_gr_config_get_max_gpc_count(config));
return config->gpc_tpc_mask_physical[gpc_index];
}
void nvgpu_gr_config_set_gpc_tpc_mask(struct nvgpu_gr_config *config,
u32 gpc_index, u32 val)
{
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
config->gpc_tpc_mask[gpc_index] = val;
}
u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config,
u32 gpc_index)
{
if (gpc_index >= config->gpc_count) {
return 0;
}
return config->gpc_skip_mask[gpc_index];
}
u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
u32 gpc_index, u32 pes_index)
{
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
nvgpu_assert(pes_index < nvgpu_gr_config_get_pe_count_per_gpc(config));
return config->pes_tpc_mask[pes_index][gpc_index];
}
u32 nvgpu_gr_config_get_gpc_mask(struct nvgpu_gr_config *config)
{
return config->gpc_mask;
}
u32 nvgpu_gr_config_get_no_of_sm(struct nvgpu_gr_config *config)
{
return config->no_of_sm;
}
void nvgpu_gr_config_set_no_of_sm(struct nvgpu_gr_config *config, u32 no_of_sm)
{
config->no_of_sm = no_of_sm;
}
struct nvgpu_sm_info *nvgpu_gr_config_get_sm_info(struct nvgpu_gr_config *config,
u32 sm_id)
{
if (sm_id < config->no_of_sm) {
return &config->sm_to_cluster[sm_id];
}
return NULL;
}
#ifdef CONFIG_NVGPU_SM_DIVERSITY
struct nvgpu_sm_info *nvgpu_gr_config_get_redex_sm_info(
struct nvgpu_gr_config *config, u32 sm_id)
{
return &config->sm_to_cluster_redex_config[sm_id];
}
#endif
u32 nvgpu_gr_config_get_sm_info_gpc_index(struct nvgpu_sm_info *sm_info)
{
return sm_info->gpc_index;
}
void nvgpu_gr_config_set_sm_info_gpc_index(struct nvgpu_sm_info *sm_info,
u32 gpc_index)
{
sm_info->gpc_index = gpc_index;
}
u32 nvgpu_gr_config_get_sm_info_tpc_index(struct nvgpu_sm_info *sm_info)
{
return sm_info->tpc_index;
}
void nvgpu_gr_config_set_sm_info_tpc_index(struct nvgpu_sm_info *sm_info,
u32 tpc_index)
{
sm_info->tpc_index = tpc_index;
}
u32 nvgpu_gr_config_get_sm_info_global_tpc_index(struct nvgpu_sm_info *sm_info)
{
return sm_info->global_tpc_index;
}
void nvgpu_gr_config_set_sm_info_global_tpc_index(struct nvgpu_sm_info *sm_info,
u32 global_tpc_index)
{
sm_info->global_tpc_index = global_tpc_index;
}
u32 nvgpu_gr_config_get_sm_info_sm_index(struct nvgpu_sm_info *sm_info)
{
return sm_info->sm_index;
}
void nvgpu_gr_config_set_sm_info_sm_index(struct nvgpu_sm_info *sm_info,
u32 sm_index)
{
sm_info->sm_index = sm_index;
}