gpu: nvgpu: add new gr/config unit to initialize GR configuration

Add new unit gr/config to initialize GR configuration like GPC/TPC
count, MAX count and mask

Create new structure nvgpu_gr_config that stores all the configuration
and that is owned by the new unit

Move below fields from struct gr_gk20a to nvgpu_gr_config in gr/config.h
Struct gr_gk20a now only holds the pointer to struct nvgpu_gr_config

u32 max_gpc_count;
u32 max_tpc_per_gpc_count;
u32 max_zcull_per_gpc_count;
u32 max_tpc_count;

u32 gpc_count;
u32 tpc_count;
u32 ppc_count;
u32 zcb_count;

u32 pe_count_per_gpc;

u32 *gpc_tpc_count;
u32 *gpc_ppc_count;
u32 *gpc_zcb_count;
u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];

u32 *gpc_tpc_mask;
u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
u32 *gpc_skip_mask;

u8 *map_tiles;
u32 map_tile_count;
u32 map_row_offset;

Remove gr->sys_count since it was already no longer used

common/gr/config/gr_config.c unit now exposes the APIs to initialize
the configuration and also to query the configuration values

nvgpu_gr_config_init() is called to initialize GR configuration from
gr_gk20a_init_gr_config() and gr_gk20a_init_map_tiles() is simply
renamed as nvgpu_gr_config_init_map_tiles()

Expose new API nvgpu_gr_config_deinit() to deinit the configuration

Expose nvgpu_gr_config_get_*() APIs to query above configuration
fields stored in nvgpu_gr_config structure

Update vgpu_gr_init_gr_config() to initialize the configuration
from gr->config structure

Chip specific HALs that access GR register for initialization
are implemented in common/gr/config/gr_config_gm20b.c
Set these HALs for all GPUs

Jira NVGPU-1879

Change-Id: Ided658b43124ea61b9f273b82b73fdde4ed3c8f0
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2012167
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2019-01-30 19:22:39 +05:30
committed by mobile promotions
parent e212e851a3
commit a5eb150635
32 changed files with 1401 additions and 863 deletions

View File

@@ -88,6 +88,8 @@ nvgpu-y += common/bus/bus_gk20a.o \
common/gr/global_ctx.o \ common/gr/global_ctx.o \
common/gr/ctx.o \ common/gr/ctx.o \
common/gr/subctx.o \ common/gr/subctx.o \
common/gr/config/gr_config.o \
common/gr/config/gr_config_gm20b.o \
common/netlist/netlist.o \ common/netlist/netlist.o \
common/netlist/netlist_sim.o \ common/netlist/netlist_sim.o \
common/netlist/netlist_gm20b.o \ common/netlist/netlist_gm20b.o \

View File

@@ -130,6 +130,8 @@ srcs += common/sim.c \
common/gr/global_ctx.c \ common/gr/global_ctx.c \
common/gr/subctx.c \ common/gr/subctx.c \
common/gr/ctx.c \ common/gr/ctx.c \
common/gr/config/gr_config.c \
common/gr/config/gr_config_gm20b.c \
common/netlist/netlist.c \ common/netlist/netlist.c \
common/netlist/netlist_sim.c \ common/netlist/netlist_sim.c \
common/netlist/netlist_gm20b.c \ common/netlist/netlist_gm20b.c \

View File

@@ -21,6 +21,7 @@
*/ */
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/gr/config.h>
static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat) static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat)
{ {
@@ -44,16 +45,17 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
struct nvgpu_ecc_stat **stats; struct nvgpu_ecc_stat **stats;
u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr->config);
u32 gpc, tpc; u32 gpc, tpc;
int err = 0; int err = 0;
stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count); stats = nvgpu_kzalloc(g, sizeof(*stats) * gpc_count);
if (stats == NULL) { if (stats == NULL) {
return -ENOMEM; return -ENOMEM;
} }
for (gpc = 0; gpc < gr->gpc_count; gpc++) { for (gpc = 0; gpc < gpc_count; gpc++) {
stats[gpc] = nvgpu_kzalloc(g, stats[gpc] = nvgpu_kzalloc(g, sizeof(*stats[gpc]) *
sizeof(*stats[gpc]) * gr->gpc_tpc_count[gpc]); nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc));
if (stats[gpc] == NULL) { if (stats[gpc] == NULL) {
err = -ENOMEM; err = -ENOMEM;
break; break;
@@ -69,8 +71,10 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
return err; return err;
} }
for (gpc = 0; gpc < gr->gpc_count; gpc++) { for (gpc = 0; gpc < gpc_count; gpc++) {
for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) { for (tpc = 0;
tpc < nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc);
tpc++) {
(void) snprintf(stats[gpc][tpc].name, (void) snprintf(stats[gpc][tpc].name,
NVGPU_ECC_STAT_NAME_MAX_SIZE, NVGPU_ECC_STAT_NAME_MAX_SIZE,
"gpc%d_tpc%d_%s", gpc, tpc, name); "gpc%d_tpc%d_%s", gpc, tpc, name);
@@ -87,13 +91,14 @@ int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
struct nvgpu_ecc_stat *stats; struct nvgpu_ecc_stat *stats;
u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr->config);
u32 gpc; u32 gpc;
stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count); stats = nvgpu_kzalloc(g, sizeof(*stats) * gpc_count);
if (stats == NULL) { if (stats == NULL) {
return -ENOMEM; return -ENOMEM;
} }
for (gpc = 0; gpc < gr->gpc_count; gpc++) { for (gpc = 0; gpc < gpc_count; gpc++) {
(void) snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE, (void) snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
"gpc%d_%s", gpc, name); "gpc%d_%s", gpc, name);
nvgpu_ecc_stat_add(g, &stats[gpc]); nvgpu_ecc_stat_add(g, &stats[gpc]);
@@ -189,9 +194,10 @@ void nvgpu_ecc_free(struct gk20a *g)
{ {
struct nvgpu_ecc *ecc = &g->ecc; struct nvgpu_ecc *ecc = &g->ecc;
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr->config);
u32 i; u32 i;
for (i = 0; i < gr->gpc_count; i++) { for (i = 0; i < gpc_count; i++) {
if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) { if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]); nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]);
} }

View File

@@ -0,0 +1,540 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/io.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
#include <nvgpu/hw/gm20b/hw_pri_ringmaster_gm20b.h>
struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
{
struct nvgpu_gr_config *config;
u32 gpc_index, pes_index;
u32 pes_tpc_mask;
u32 pes_tpc_count;
u32 pes_heavy_index;
u32 gpc_new_skip_mask;
u32 tmp;
config = nvgpu_kzalloc(g, sizeof(*config));
if (config == NULL) {
return NULL;;
}
tmp = nvgpu_readl(g, top_num_gpcs_r());
config->max_gpc_count = top_num_gpcs_value_v(tmp);
tmp = nvgpu_readl(g, top_tpc_per_gpc_r());
config->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
config->max_tpc_count = config->max_gpc_count *
config->max_tpc_per_gpc_count;
tmp = nvgpu_readl(g, pri_ringmaster_enum_gpc_r());
config->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
if (config->gpc_count == 0U) {
nvgpu_err(g, "gpc_count==0!");
goto clean_up;
}
config->pe_count_per_gpc = nvgpu_get_litter_value(g,
GPU_LIT_NUM_PES_PER_GPC);
if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) {
nvgpu_err(g, "too many pes per gpc");
goto clean_up;
}
config->max_zcull_per_gpc_count = nvgpu_get_litter_value(g,
GPU_LIT_NUM_ZCULL_BANKS);
config->gpc_tpc_count = nvgpu_kzalloc(g, config->gpc_count *
sizeof(u32));
config->gpc_tpc_mask = nvgpu_kzalloc(g, config->max_gpc_count *
sizeof(u32));
config->gpc_zcb_count = nvgpu_kzalloc(g, config->gpc_count *
sizeof(u32));
config->gpc_ppc_count = nvgpu_kzalloc(g, config->gpc_count *
sizeof(u32));
config->gpc_skip_mask = nvgpu_kzalloc(g,
(size_t)g->ops.gr.config.get_pd_dist_skip_table_size() *
(size_t)4 * sizeof(u32));
if ((config->gpc_tpc_count == NULL) || (config->gpc_tpc_mask == NULL) ||
(config->gpc_zcb_count == NULL) || (config->gpc_ppc_count == NULL) ||
(config->gpc_skip_mask == NULL)) {
goto clean_up;
}
for (gpc_index = 0; gpc_index < config->max_gpc_count; gpc_index++) {
if (g->ops.gr.config.get_gpc_tpc_mask != NULL) {
config->gpc_tpc_mask[gpc_index] =
g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_index);
}
}
for (pes_index = 0; pes_index < config->pe_count_per_gpc; pes_index++) {
config->pes_tpc_count[pes_index] = nvgpu_kzalloc(g,
config->gpc_count * sizeof(u32));
config->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g,
config->gpc_count * sizeof(u32));
if ((config->pes_tpc_count[pes_index] == NULL) ||
(config->pes_tpc_mask[pes_index] == NULL)) {
goto clean_up;
}
}
config->ppc_count = 0;
config->tpc_count = 0;
config->zcb_count = 0;
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
config->gpc_tpc_count[gpc_index] =
g->ops.gr.config.get_tpc_count_in_gpc(g, config,
gpc_index);
config->tpc_count += config->gpc_tpc_count[gpc_index];
config->gpc_zcb_count[gpc_index] =
g->ops.gr.config.get_zcull_count_in_gpc(g, config,
gpc_index);
config->zcb_count += config->gpc_zcb_count[gpc_index];
for (pes_index = 0; pes_index < config->pe_count_per_gpc;
pes_index++) {
pes_tpc_mask = g->ops.gr.config.get_pes_tpc_mask(g,
config, gpc_index, pes_index);
pes_tpc_count = hweight32(pes_tpc_mask);
/* detect PES presence by seeing if there are
* TPCs connected to it.
*/
if (pes_tpc_count != 0U) {
config->gpc_ppc_count[gpc_index]++;
}
config->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
config->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
}
config->ppc_count += config->gpc_ppc_count[gpc_index];
gpc_new_skip_mask = 0;
if (config->pe_count_per_gpc > 1U &&
config->pes_tpc_count[0][gpc_index] +
config->pes_tpc_count[1][gpc_index] == 5U) {
pes_heavy_index =
config->pes_tpc_count[0][gpc_index] >
config->pes_tpc_count[1][gpc_index] ? 0U : 1U;
gpc_new_skip_mask =
config->pes_tpc_mask[pes_heavy_index][gpc_index] ^
(config->pes_tpc_mask[pes_heavy_index][gpc_index] &
(config->pes_tpc_mask[pes_heavy_index][gpc_index] - 1U));
} else if (config->pe_count_per_gpc > 1U &&
(config->pes_tpc_count[0][gpc_index] +
config->pes_tpc_count[1][gpc_index] == 4U) &&
(config->pes_tpc_count[0][gpc_index] !=
config->pes_tpc_count[1][gpc_index])) {
pes_heavy_index =
config->pes_tpc_count[0][gpc_index] >
config->pes_tpc_count[1][gpc_index] ? 0U : 1U;
gpc_new_skip_mask =
config->pes_tpc_mask[pes_heavy_index][gpc_index] ^
(config->pes_tpc_mask[pes_heavy_index][gpc_index] &
(config->pes_tpc_mask[pes_heavy_index][gpc_index] - 1U));
}
config->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
}
nvgpu_log_info(g, "max_gpc_count: %d", config->max_gpc_count);
nvgpu_log_info(g, "max_tpc_per_gpc_count: %d", config->max_tpc_per_gpc_count);
nvgpu_log_info(g, "max_zcull_per_gpc_count: %d", config->max_zcull_per_gpc_count);
nvgpu_log_info(g, "max_tpc_count: %d", config->max_tpc_count);
nvgpu_log_info(g, "gpc_count: %d", config->gpc_count);
nvgpu_log_info(g, "pe_count_per_gpc: %d", config->pe_count_per_gpc);
nvgpu_log_info(g, "tpc_count: %d", config->tpc_count);
nvgpu_log_info(g, "ppc_count: %d", config->ppc_count);
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
nvgpu_log_info(g, "gpc_tpc_count[%d] : %d",
gpc_index, config->gpc_tpc_count[gpc_index]);
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
nvgpu_log_info(g, "gpc_zcb_count[%d] : %d",
gpc_index, config->gpc_zcb_count[gpc_index]);
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
nvgpu_log_info(g, "gpc_ppc_count[%d] : %d",
gpc_index, config->gpc_ppc_count[gpc_index]);
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
nvgpu_log_info(g, "gpc_skip_mask[%d] : %d",
gpc_index, config->gpc_skip_mask[gpc_index]);
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
for (pes_index = 0;
pes_index < config->pe_count_per_gpc;
pes_index++) {
nvgpu_log_info(g, "pes_tpc_count[%d][%d] : %d",
pes_index, gpc_index,
config->pes_tpc_count[pes_index][gpc_index]);
}
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
for (pes_index = 0;
pes_index < config->pe_count_per_gpc;
pes_index++) {
nvgpu_log_info(g, "pes_tpc_mask[%d][%d] : %d",
pes_index, gpc_index,
config->pes_tpc_mask[pes_index][gpc_index]);
}
}
return config;
clean_up:
nvgpu_kfree(g, config);
return NULL;
}
static u32 prime_set[18] = {
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 };
/*
* Return map tiles count for given index
* Return 0 if index is out-of-bounds
*/
u32 nvgpu_gr_config_get_map_tile_count(struct nvgpu_gr_config *config, u32 index)
{
if (index >= config->map_tile_count) {
return 0;
}
return config->map_tiles[index];
}
u32 nvgpu_gr_config_get_map_row_offset(struct nvgpu_gr_config *config)
{
return config->map_row_offset;
}
int nvgpu_gr_config_init_map_tiles(struct gk20a *g,
struct nvgpu_gr_config *config)
{
s32 comm_denom;
s32 mul_factor;
s32 *init_frac = NULL;
s32 *init_err = NULL;
s32 *run_err = NULL;
u32 *sorted_num_tpcs = NULL;
u32 *sorted_to_unsorted_gpc_map = NULL;
u32 gpc_index;
u32 gpc_mark = 0;
u32 num_tpc;
u32 max_tpc_count = 0;
u32 swap;
u32 tile_count;
u32 index;
bool delete_map = false;
bool gpc_sorted;
int ret = 0;
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
u32 map_tile_count = num_gpcs * num_tpc_per_gpc;
init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
sorted_num_tpcs =
nvgpu_kzalloc(g, (size_t)num_gpcs *
(size_t)num_tpc_per_gpc *
sizeof(s32));
sorted_to_unsorted_gpc_map =
nvgpu_kzalloc(g, (size_t)num_gpcs * sizeof(s32));
if (!((init_frac != NULL) &&
(init_err != NULL) &&
(run_err != NULL) &&
(sorted_num_tpcs != NULL) &&
(sorted_to_unsorted_gpc_map != NULL))) {
ret = -ENOMEM;
goto clean_up;
}
config->map_row_offset = 0xFFFFFFFFU;
if (config->tpc_count == 3U) {
config->map_row_offset = 2;
} else if (config->tpc_count < 3U) {
config->map_row_offset = 1;
} else {
config->map_row_offset = 3;
for (index = 1U; index < 18U; index++) {
u32 prime = prime_set[index];
if ((config->tpc_count % prime) != 0U) {
config->map_row_offset = prime;
break;
}
}
}
switch (config->tpc_count) {
case 15:
config->map_row_offset = 6;
break;
case 14:
config->map_row_offset = 5;
break;
case 13:
config->map_row_offset = 2;
break;
case 11:
config->map_row_offset = 7;
break;
case 10:
config->map_row_offset = 6;
break;
case 7:
case 5:
config->map_row_offset = 1;
break;
default:
break;
}
if (config->map_tiles != NULL) {
if (config->map_tile_count != config->tpc_count) {
delete_map = true;
}
for (tile_count = 0; tile_count < config->map_tile_count; tile_count++) {
if (nvgpu_gr_config_get_map_tile_count(config, tile_count)
>= config->tpc_count) {
delete_map = true;
}
}
if (delete_map) {
nvgpu_kfree(g, config->map_tiles);
config->map_tiles = NULL;
config->map_tile_count = 0;
}
}
if (config->map_tiles == NULL) {
config->map_tiles = nvgpu_kzalloc(g, map_tile_count * sizeof(u8));
if (config->map_tiles == NULL) {
ret = -ENOMEM;
goto clean_up;
}
config->map_tile_count = map_tile_count;
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
sorted_num_tpcs[gpc_index] = config->gpc_tpc_count[gpc_index];
sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
}
gpc_sorted = false;
while (!gpc_sorted) {
gpc_sorted = true;
for (gpc_index = 0U; gpc_index < config->gpc_count - 1U; gpc_index++) {
if (sorted_num_tpcs[gpc_index + 1U] > sorted_num_tpcs[gpc_index]) {
gpc_sorted = false;
swap = sorted_num_tpcs[gpc_index];
sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1U];
sorted_num_tpcs[gpc_index + 1U] = swap;
swap = sorted_to_unsorted_gpc_map[gpc_index];
sorted_to_unsorted_gpc_map[gpc_index] =
sorted_to_unsorted_gpc_map[gpc_index + 1U];
sorted_to_unsorted_gpc_map[gpc_index + 1U] = swap;
}
}
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
if (config->gpc_tpc_count[gpc_index] > max_tpc_count) {
max_tpc_count = config->gpc_tpc_count[gpc_index];
}
}
mul_factor = S32(config->gpc_count) * S32(max_tpc_count);
if ((U32(mul_factor) & 0x1U) != 0U) {
mul_factor = 2;
} else {
mul_factor = 1;
}
comm_denom = S32(config->gpc_count) * S32(max_tpc_count) * mul_factor;
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
num_tpc = sorted_num_tpcs[gpc_index];
init_frac[gpc_index] = S32(num_tpc) * S32(config->gpc_count) * mul_factor;
if (num_tpc != 0U) {
init_err[gpc_index] = S32(gpc_index) * S32(max_tpc_count) * mul_factor - comm_denom/2;
} else {
init_err[gpc_index] = 0;
}
run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
}
while (gpc_mark < config->tpc_count) {
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
if ((run_err[gpc_index] * 2) >= comm_denom) {
config->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
} else {
run_err[gpc_index] += init_frac[gpc_index];
}
}
}
}
clean_up:
nvgpu_kfree(g, init_frac);
nvgpu_kfree(g, init_err);
nvgpu_kfree(g, run_err);
nvgpu_kfree(g, sorted_num_tpcs);
nvgpu_kfree(g, sorted_to_unsorted_gpc_map);
if (ret != 0) {
nvgpu_err(g, "fail");
} else {
nvgpu_log_fn(g, "done");
}
return ret;
}
void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config)
{
u32 index;
nvgpu_kfree(g, config->gpc_tpc_count);
nvgpu_kfree(g, config->gpc_zcb_count);
nvgpu_kfree(g, config->gpc_ppc_count);
nvgpu_kfree(g, config->gpc_skip_mask);
nvgpu_kfree(g, config->gpc_tpc_mask);
nvgpu_kfree(g, config->map_tiles);
for (index = 0U; index < config->pe_count_per_gpc;
index++) {
nvgpu_kfree(g, config->pes_tpc_count[index]);
nvgpu_kfree(g, config->pes_tpc_mask[index]);
}
}
u32 nvgpu_gr_config_get_max_gpc_count(struct nvgpu_gr_config *config)
{
return config->max_gpc_count;
}
u32 nvgpu_gr_config_get_max_tpc_per_gpc_count(struct nvgpu_gr_config *config)
{
return config->max_tpc_per_gpc_count;
}
u32 nvgpu_gr_config_get_max_zcull_per_gpc_count(struct nvgpu_gr_config *config)
{
return config->max_zcull_per_gpc_count;
}
u32 nvgpu_gr_config_get_max_tpc_count(struct nvgpu_gr_config *config)
{
return config->max_tpc_count;
}
u32 nvgpu_gr_config_get_gpc_count(struct nvgpu_gr_config *config)
{
return config->gpc_count;
}
u32 nvgpu_gr_config_get_tpc_count(struct nvgpu_gr_config *config)
{
return config->tpc_count;
}
u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config)
{
return config->ppc_count;
}
u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config)
{
return config->zcb_count;
}
u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config)
{
return config->pe_count_per_gpc;
}
u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config,
u32 gpc_index)
{
return config->gpc_ppc_count[gpc_index];
}
u32 nvgpu_gr_config_get_gpc_tpc_count(struct nvgpu_gr_config *config,
u32 gpc_index)
{
if (gpc_index >= config->gpc_count) {
return 0;
}
return config->gpc_tpc_count[gpc_index];
}
u32 nvgpu_gr_config_get_gpc_zcb_count(struct nvgpu_gr_config *config,
u32 gpc_index)
{
return config->gpc_zcb_count[gpc_index];
}
u32 nvgpu_gr_config_get_pes_tpc_count(struct nvgpu_gr_config *config,
u32 gpc_index, u32 pes_index)
{
return config->pes_tpc_count[pes_index][gpc_index];
}
u32 nvgpu_gr_config_get_gpc_tpc_mask(struct nvgpu_gr_config *config,
u32 gpc_index)
{
return config->gpc_tpc_mask[gpc_index];
}
u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config,
u32 gpc_index)
{
return config->gpc_skip_mask[gpc_index];
}
u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
u32 gpc_index, u32 pes_index)
{
return config->pes_tpc_mask[pes_index][gpc_index];
}

View File

@@ -0,0 +1,79 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/io.h>
#include <nvgpu/gr/config.h>
#include "gr_config_gm20b.h"
#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index)
{
u32 val;
/* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */
val = g->ops.fuse.fuse_status_opt_tpc_gpc(g, gpc_index);
return (~val) & (BIT32(config->max_tpc_per_gpc_count) - 1U);
}
u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index)
{
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
u32 tmp;
tmp = nvgpu_readl(g, gr_gpc0_fs_gpc_r() + gpc_stride * gpc_index);
return gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
}
u32 gm20b_gr_config_get_zcull_count_in_gpc(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index)
{
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
u32 tmp;
tmp = nvgpu_readl(g, gr_gpc0_fs_gpc_r() + gpc_stride * gpc_index);
return gr_gpc0_fs_gpc_num_available_zculls_v(tmp);
}
u32 gm20b_gr_config_get_pes_tpc_mask(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index)
{
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
u32 tmp;
tmp = nvgpu_readl(g, gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
gpc_index * gpc_stride);
return gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp);
}
u32 gm20b_gr_config_get_pd_dist_skip_table_size(void)
{
return gr_pd_dist_skip_table__size_1_v();
}

View File

@@ -0,0 +1,41 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_CONFIG_GM20B_H
#define NVGPU_GR_CONFIG_GM20B_H
#include <nvgpu/types.h>
struct gk20a;
struct nvgpu_gr_config;
u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index);
u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index);
u32 gm20b_gr_config_get_zcull_count_in_gpc(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index);
u32 gm20b_gr_config_get_pes_tpc_mask(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index);
u32 gm20b_gr_config_get_pd_dist_skip_table_size(void);
#endif /* NVGPU_GR_CONFIG_GM20B_H */

View File

@@ -26,6 +26,7 @@
#include <nvgpu/enabled.h> #include <nvgpu/enabled.h>
#include <nvgpu/io.h> #include <nvgpu/io.h>
#include <nvgpu/utils.h> #include <nvgpu/utils.h>
#include <nvgpu/gr/config.h>
#include "priv_ring_gm20b.h" #include "priv_ring_gm20b.h"
@@ -80,7 +81,7 @@ void gm20b_priv_ring_isr(struct gk20a *g)
gk20a_readl(g, pri_ringstation_sys_priv_error_code_r())); gk20a_readl(g, pri_ringstation_sys_priv_error_code_r()));
} }
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
if ((status1 & BIT32(gpc)) != 0U) { if ((status1 & BIT32(gpc)) != 0U) {
nvgpu_log(g, gpu_dbg_intr, "GPC%u write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x", gpc, nvgpu_log(g, gpu_dbg_intr, "GPC%u write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x", gpc,
gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_adr_r() + gpc * gpc_priv_stride), gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_adr_r() + gpc * gpc_priv_stride),

View File

@@ -27,6 +27,7 @@
#include <nvgpu/enabled.h> #include <nvgpu/enabled.h>
#include <nvgpu/io.h> #include <nvgpu/io.h>
#include <nvgpu/utils.h> #include <nvgpu/utils.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/hw/gp10b/hw_pri_ringmaster_gp10b.h> #include <nvgpu/hw/gp10b/hw_pri_ringmaster_gp10b.h>
@@ -157,7 +158,7 @@ void gp10b_priv_ring_isr(struct gk20a *g)
if (status1 != 0U) { if (status1 != 0U) {
gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_PRIV_STRIDE); gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_PRIV_STRIDE);
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
offset = gpc * gpc_stride; offset = gpc * gpc_stride;
if ((status1 & BIT32(gpc)) != 0U) { if ((status1 & BIT32(gpc)) != 0U) {
error_info = gk20a_readl(g, error_info = gk20a_readl(g,

View File

File diff suppressed because it is too large Load Diff

View File

@@ -36,14 +36,11 @@
#define GR_IDLE_CHECK_MAX 200U /* usec */ #define GR_IDLE_CHECK_MAX 200U /* usec */
#define GR_FECS_POLL_INTERVAL 5U /* usec */ #define GR_FECS_POLL_INTERVAL 5U /* usec */
#define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFFU
#define INVALID_MAX_WAYS 0xFFFFFFFFU #define INVALID_MAX_WAYS 0xFFFFFFFFU
#define GK20A_FECS_UCODE_IMAGE "fecs.bin" #define GK20A_FECS_UCODE_IMAGE "fecs.bin"
#define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin" #define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin"
#define GK20A_GR_MAX_PES_PER_GPC 3U
#define GK20A_TIMEOUT_FPGA 100000U /* 100 sec */ #define GK20A_TIMEOUT_FPGA 100000U /* 100 sec */
/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */ /* Flags to be passed to g->ops.gr.alloc_obj_ctx() */
@@ -258,6 +255,7 @@ struct gr_gk20a {
bool initialized; bool initialized;
u32 num_fbps; u32 num_fbps;
u32 max_fbps_count;
u32 max_comptag_lines; u32 max_comptag_lines;
u32 compbit_backing_size; u32 compbit_backing_size;
@@ -266,26 +264,6 @@ struct gr_gk20a {
u32 cacheline_size; u32 cacheline_size;
u32 gobs_per_comptagline_per_slice; u32 gobs_per_comptagline_per_slice;
u32 max_gpc_count;
u32 max_fbps_count;
u32 max_tpc_per_gpc_count;
u32 max_zcull_per_gpc_count;
u32 max_tpc_count;
u32 sys_count;
u32 gpc_count;
u32 pe_count_per_gpc;
u32 ppc_count;
u32 *gpc_ppc_count;
u32 tpc_count;
u32 *gpc_tpc_count;
u32 *gpc_tpc_mask;
u32 zcb_count;
u32 *gpc_zcb_count;
u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
u32 *gpc_skip_mask;
u32 bundle_cb_default_size; u32 bundle_cb_default_size;
u32 min_gpm_fifo_depth; u32 min_gpm_fifo_depth;
u32 bundle_cb_token_limit; u32 bundle_cb_token_limit;
@@ -312,9 +290,7 @@ struct gr_gk20a {
struct nvgpu_gr_ctx_desc *gr_ctx_desc; struct nvgpu_gr_ctx_desc *gr_ctx_desc;
u8 *map_tiles; struct nvgpu_gr_config *config;
u32 map_tile_count;
u32 map_row_offset;
u32 max_comptag_mem; /* max memory size (MB) for comptag */ u32 max_comptag_mem; /* max memory size (MB) for comptag */
struct compbit_store_desc compbit_store; struct compbit_store_desc compbit_store;
@@ -565,7 +541,6 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
u32 global_esr_mask, bool check_errors); u32 global_esr_mask, bool check_errors);
void gk20a_gr_suspend_all_sms(struct gk20a *g, void gk20a_gr_suspend_all_sms(struct gk20a *g,
u32 global_esr_mask, bool check_errors); u32 global_esr_mask, bool check_errors);
u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
int gr_gk20a_set_sm_debug_mode(struct gk20a *g, int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
struct channel_gk20a *ch, u64 sms, bool enable); struct channel_gk20a *ch, u64 sms, bool enable);
bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch); bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);

View File

@@ -32,6 +32,7 @@
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/channel.h> #include <nvgpu/channel.h>
#include <nvgpu/gr/ctx.h> #include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include "gk20a/gr_gk20a.h" #include "gk20a/gr_gk20a.h"
@@ -109,11 +110,11 @@ u32 gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
size = gr->attrib_cb_size * size = gr->attrib_cb_size *
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
gr->max_tpc_count; nvgpu_gr_config_get_max_tpc_count(gr->config);
size += gr->alpha_cb_size * size += gr->alpha_cb_size *
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
gr->max_tpc_count; nvgpu_gr_config_get_max_tpc_count(gr->config);
return size; return size;
} }
@@ -201,17 +202,23 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
alpha_offset_in_chunk = attrib_offset_in_chunk + alpha_offset_in_chunk = attrib_offset_in_chunk +
gr->tpc_count * gr->attrib_cb_size; nvgpu_gr_config_get_tpc_count(gr->config) * gr->attrib_cb_size;
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_index++) {
u32 temp = gpc_stride * gpc_index; u32 temp = gpc_stride * gpc_index;
u32 temp2 = num_pes_per_gpc * gpc_index; u32 temp2 = num_pes_per_gpc * gpc_index;
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; for (ppc_index = 0;
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config,
gpc_index);
ppc_index++) { ppc_index++) {
cbm_cfg_size1 = gr->attrib_cb_default_size * cbm_cfg_size1 = gr->attrib_cb_default_size *
gr->pes_tpc_count[ppc_index][gpc_index]; nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index);
cbm_cfg_size2 = gr->alpha_cb_default_size * cbm_cfg_size2 = gr->alpha_cb_default_size *
gr->pes_tpc_count[ppc_index][gpc_index]; nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index);
nvgpu_gr_ctx_patch_write(g, gr_ctx, nvgpu_gr_ctx_patch_write(g, gr_ctx,
gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
@@ -224,7 +231,8 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
attrib_offset_in_chunk, patch); attrib_offset_in_chunk, patch);
attrib_offset_in_chunk += gr->attrib_cb_size * attrib_offset_in_chunk += gr->attrib_cb_size *
gr->pes_tpc_count[ppc_index][gpc_index]; nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index);
nvgpu_gr_ctx_patch_write(g, gr_ctx, nvgpu_gr_ctx_patch_write(g, gr_ctx,
gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
@@ -237,7 +245,8 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
alpha_offset_in_chunk, patch); alpha_offset_in_chunk, patch);
alpha_offset_in_chunk += gr->alpha_cb_size * alpha_offset_in_chunk += gr->alpha_cb_size *
gr->pes_tpc_count[ppc_index][gpc_index]; nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index);
nvgpu_gr_ctx_patch_write(g, gr_ctx, nvgpu_gr_ctx_patch_write(g, gr_ctx,
gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
@@ -348,10 +357,13 @@ void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
gr_pd_ab_dist_cfg1_max_batches_init_f()); gr_pd_ab_dist_cfg1_max_batches_init_f());
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_index++) {
stride = gpc_stride * gpc_index; stride = gpc_stride * gpc_index;
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; for (ppc_index = 0;
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
ppc_index++) { ppc_index++) {
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
@@ -360,7 +372,8 @@ void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
gr->pes_tpc_count[ppc_index][gpc_index])); nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index)));
gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
stride + stride +
@@ -388,10 +401,13 @@ void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) | ~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) |
gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_index++) {
stride = gpc_stride * gpc_index; stride = gpc_stride * gpc_index;
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; for (ppc_index = 0;
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
ppc_index++) { ppc_index++) {
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
@@ -401,7 +417,8 @@ void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
val = set_field(val, val = set_field(val,
gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size * gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
gr->pes_tpc_count[ppc_index][gpc_index])); nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index)));
gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
stride + stride +
@@ -413,11 +430,11 @@ void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
val = set_field(val, val = set_field(val,
gr_gpcs_swdx_tc_beta_cb_size_v_m(), gr_gpcs_swdx_tc_beta_cb_size_v_m(),
gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size * gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size *
gr->gpc_ppc_count[gpc_index])); nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index)));
val = set_field(val, val = set_field(val,
gr_gpcs_swdx_tc_beta_cb_size_div3_m(), gr_gpcs_swdx_tc_beta_cb_size_div3_m(),
gr_gpcs_swdx_tc_beta_cb_size_div3_f((cb_size * gr_gpcs_swdx_tc_beta_cb_size_div3_f((cb_size *
gr->gpc_ppc_count[gpc_index])/3U)); nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index))/3U));
gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r( gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
ppc_index + gpc_index), val); ppc_index + gpc_index), val);
@@ -554,18 +571,7 @@ u32 gr_gm20b_get_gpc_mask(struct gk20a *g)
*/ */
val = g->ops.fuse.fuse_status_opt_gpc(g); val = g->ops.fuse.fuse_status_opt_gpc(g);
return (~val) & (BIT32(gr->max_gpc_count) - 1U); return (~val) & (BIT32(nvgpu_gr_config_get_max_gpc_count(gr->config)) - 1U);
}
u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
{
u32 val;
struct gr_gk20a *gr = &g->gr;
/* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */
val = g->ops.fuse.fuse_status_opt_tpc_gpc(g, gpc_index);
return (~val) & (BIT32(gr->max_tpc_per_gpc_count) - 1U);
} }
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
@@ -573,10 +579,11 @@ void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
nvgpu_tegra_fuse_write_bypass(g, 0x1); nvgpu_tegra_fuse_write_bypass(g, 0x1);
nvgpu_tegra_fuse_write_access_sw(g, 0x0); nvgpu_tegra_fuse_write_access_sw(g, 0x0);
if (g->gr.gpc_tpc_mask[gpc_index] == 0x1U) { if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == 0x1U) {
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0); nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x1); nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x1);
} else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2U) { } else if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) ==
0x2U) {
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1); nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1);
nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0); nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0);
} else { } else {
@@ -590,20 +597,24 @@ void gr_gm20b_load_tpc_mask(struct gk20a *g)
u32 pes_tpc_mask = 0, fuse_tpc_mask; u32 pes_tpc_mask = 0, fuse_tpc_mask;
u32 gpc, pes; u32 gpc, pes;
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(g->gr.config);
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) { for (pes = 0;
pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] << pes < nvgpu_gr_config_get_pe_count_per_gpc(g->gr.config);
pes++) {
pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
g->gr.config, gpc, pes) <<
num_tpc_per_gpc * gpc; num_tpc_per_gpc * gpc;
} }
} }
fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0); fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, g->gr.config, 0);
if ((g->tpc_fs_mask_user != 0U) && if ((g->tpc_fs_mask_user != 0U) &&
(g->tpc_fs_mask_user != fuse_tpc_mask) && (g->tpc_fs_mask_user != fuse_tpc_mask) &&
(fuse_tpc_mask == BIT32(g->gr.max_tpc_count) - U32(1))) { (fuse_tpc_mask == BIT32(max_tpc_count) - U32(1))) {
u32 val = g->tpc_fs_mask_user; u32 val = g->tpc_fs_mask_user;
val &= BIT32(g->gr.max_tpc_count) - U32(1); val &= BIT32(max_tpc_count) - U32(1);
/* skip tpc to disable the other tpc cause channel timeout */ /* skip tpc to disable the other tpc cause channel timeout */
val = BIT32(hweight32(val)) - U32(1); val = BIT32(hweight32(val)) - U32(1);
gk20a_writel(g, gr_fe_tpc_fs_r(), val); gk20a_writel(g, gr_fe_tpc_fs_r(), val);
@@ -640,7 +651,9 @@ int gr_gm20b_load_smid_config(struct gk20a *g)
} }
/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
for (i = 0U; i <= ((g->gr.tpc_count-1U) / 4U); i++) { for (i = 0U;
i <= ((nvgpu_gr_config_get_tpc_count(g->gr.config) - 1U) / 4U);
i++) {
u32 reg = 0; u32 reg = 0;
u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
gr_cwd_gpc_tpc_id_tpc0_s(); gr_cwd_gpc_tpc_id_tpc0_s();
@@ -649,7 +662,7 @@ int gr_gm20b_load_smid_config(struct gk20a *g)
u32 sm_id = (i * 4U) + j; u32 sm_id = (i * 4U) + j;
u32 bits; u32 bits;
if (sm_id >= g->gr.tpc_count) { if (sm_id >= nvgpu_gr_config_get_tpc_count(g->gr.config)) {
break; break;
} }
@@ -959,7 +972,7 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r())); gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r())); gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) { if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r())); gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
} }
@@ -975,7 +988,7 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r())); gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r())); gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) { if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r())); gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
} }

View File

@@ -79,7 +79,6 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
u32 *num_sm_dsm_perf_ctrl_regs, u32 *num_sm_dsm_perf_ctrl_regs,
u32 **sm_dsm_perf_ctrl_regs, u32 **sm_dsm_perf_ctrl_regs,
u32 *ctrl_register_stride); u32 *ctrl_register_stride);
u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
u32 gr_gm20b_get_gpc_mask(struct gk20a *g); u32 gr_gm20b_get_gpc_mask(struct gk20a *g);
void gr_gm20b_load_tpc_mask(struct gk20a *g); void gr_gm20b_load_tpc_mask(struct gk20a *g);

View File

@@ -44,6 +44,7 @@
#include "common/fb/fb_gm20b.h" #include "common/fb/fb_gm20b.h"
#include "common/netlist/netlist_gm20b.h" #include "common/netlist/netlist_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/config/gr_config_gm20b.h"
#include "common/therm/therm_gm20b.h" #include "common/therm/therm_gm20b.h"
#include "common/ltc/ltc_gm20b.h" #include "common/ltc/ltc_gm20b.h"
#include "common/fuse/fuse_gm20b.h" #include "common/fuse/fuse_gm20b.h"
@@ -241,7 +242,6 @@ static const struct gpu_ops gm20b_ops = {
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info, .get_zcull_info = gr_gk20a_get_zcull_info,
@@ -404,6 +404,16 @@ static const struct gpu_ops gm20b_ops = {
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask, gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records, .set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr, .set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
},
.config = {
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
.get_tpc_count_in_gpc =
gm20b_gr_config_get_tpc_count_in_gpc,
.get_zcull_count_in_gpc =
gm20b_gr_config_get_zcull_count_in_gpc,
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
.get_pd_dist_skip_table_size =
gm20b_gr_config_get_pd_dist_skip_table_size,
} }
}, },
.fb = { .fb = {
@@ -788,6 +798,7 @@ int gm20b_init_hal(struct gk20a *g)
gops->ce2 = gm20b_ops.ce2; gops->ce2 = gm20b_ops.ce2;
gops->gr = gm20b_ops.gr; gops->gr = gm20b_ops.gr;
gops->gr.ctxsw_prog = gm20b_ops.gr.ctxsw_prog; gops->gr.ctxsw_prog = gm20b_ops.gr.ctxsw_prog;
gops->gr.config = gm20b_ops.gr.config;
gops->fb = gm20b_ops.fb; gops->fb = gm20b_ops.fb;
gops->clock_gating = gm20b_ops.clock_gating; gops->clock_gating = gm20b_ops.clock_gating;
gops->fifo = gm20b_ops.fifo; gops->fifo = gm20b_ops.fifo;

View File

@@ -25,6 +25,7 @@
#include <nvgpu/dma.h> #include <nvgpu/dma.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/gr/ctx.h> #include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include "gk20a/gr_gk20a.h" #include "gk20a/gr_gk20a.h"
#include "gm20b/gr_gm20b.h" #include "gm20b/gr_gm20b.h"
@@ -182,7 +183,7 @@ int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
g->gr.max_tpc_count; nvgpu_gr_config_get_max_tpc_count(g->gr.config);
attrib_cb_size = ALIGN(attrib_cb_size, 128); attrib_cb_size = ALIGN(attrib_cb_size, 128);
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size); nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);

View File

@@ -38,6 +38,7 @@
#include <nvgpu/regops.h> #include <nvgpu/regops.h>
#include <nvgpu/gr/subctx.h> #include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/ctx.h> #include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include "gk20a/gr_gk20a.h" #include "gk20a/gr_gk20a.h"
@@ -437,19 +438,25 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
} }
attrib_offset_in_chunk = alpha_offset_in_chunk + attrib_offset_in_chunk = alpha_offset_in_chunk +
gr->tpc_count * gr->alpha_cb_size; nvgpu_gr_config_get_tpc_count(gr->config) * gr->alpha_cb_size;
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_index++) {
temp = gpc_stride * gpc_index; temp = gpc_stride * gpc_index;
temp2 = num_pes_per_gpc * gpc_index; temp2 = num_pes_per_gpc * gpc_index;
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; for (ppc_index = 0;
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
ppc_index++) { ppc_index++) {
cbm_cfg_size_beta = cb_attrib_cache_size_init * cbm_cfg_size_beta = cb_attrib_cache_size_init *
gr->pes_tpc_count[ppc_index][gpc_index]; nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index);
cbm_cfg_size_alpha = gr->alpha_cb_default_size * cbm_cfg_size_alpha = gr->alpha_cb_default_size *
gr->pes_tpc_count[ppc_index][gpc_index]; nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index);
cbm_cfg_size_steadystate = gr->attrib_cb_default_size * cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
gr->pes_tpc_count[ppc_index][gpc_index]; nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index);
nvgpu_gr_ctx_patch_write(g, gr_ctx, nvgpu_gr_ctx_patch_write(g, gr_ctx,
gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
@@ -468,7 +475,8 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
patch); patch);
attrib_offset_in_chunk += attrib_size_in_chunk * attrib_offset_in_chunk += attrib_size_in_chunk *
gr->pes_tpc_count[ppc_index][gpc_index]; nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index);
nvgpu_gr_ctx_patch_write(g, gr_ctx, nvgpu_gr_ctx_patch_write(g, gr_ctx,
gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
@@ -481,7 +489,8 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
alpha_offset_in_chunk, patch); alpha_offset_in_chunk, patch);
alpha_offset_in_chunk += gr->alpha_cb_size * alpha_offset_in_chunk += gr->alpha_cb_size *
gr->pes_tpc_count[ppc_index][gpc_index]; nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index);
nvgpu_gr_ctx_patch_write(g, gr_ctx, nvgpu_gr_ctx_patch_write(g, gr_ctx,
gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
@@ -594,17 +603,19 @@ u32 gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
gr->alpha_cb_size = gr->alpha_cb_default_size; gr->alpha_cb_size = gr->alpha_cb_default_size;
gr->attrib_cb_size = min(gr->attrib_cb_size, gr->attrib_cb_size = min(gr->attrib_cb_size,
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / g->gr.tpc_count); gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) /
nvgpu_gr_config_get_tpc_count(gr->config));
gr->alpha_cb_size = min(gr->alpha_cb_size, gr->alpha_cb_size = min(gr->alpha_cb_size,
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / g->gr.tpc_count); gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) /
nvgpu_gr_config_get_tpc_count(gr->config));
size = gr->attrib_cb_size * size = gr->attrib_cb_size *
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
gr->max_tpc_count; nvgpu_gr_config_get_max_tpc_count(gr->config);
size += gr->alpha_cb_size * size += gr->alpha_cb_size *
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
gr->max_tpc_count; nvgpu_gr_config_get_max_tpc_count(gr->config);
size = ALIGN(size, 128); size = ALIGN(size, 128);
@@ -786,10 +797,13 @@ void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
gr_pd_ab_dist_cfg1_max_batches_init_f()); gr_pd_ab_dist_cfg1_max_batches_init_f());
} }
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_index++) {
stride = gpc_stride * gpc_index; stride = gpc_stride * gpc_index;
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; for (ppc_index = 0;
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
ppc_index++) { ppc_index++) {
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
@@ -798,7 +812,8 @@ void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
gr->pes_tpc_count[ppc_index][gpc_index])); nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index)));
gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
stride + stride +
@@ -835,10 +850,13 @@ void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) | ~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) |
gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady)); gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_index++) {
stride = gpc_stride * gpc_index; stride = gpc_stride * gpc_index;
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; for (ppc_index = 0;
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
ppc_index++) { ppc_index++) {
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
@@ -848,7 +866,8 @@ void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
val = set_field(val, val = set_field(val,
gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size * gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
gr->pes_tpc_count[ppc_index][gpc_index])); nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index)));
gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
stride + stride +
@@ -867,7 +886,7 @@ void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
gr_gpcs_swdx_tc_beta_cb_size_v_m(), gr_gpcs_swdx_tc_beta_cb_size_v_m(),
gr_gpcs_swdx_tc_beta_cb_size_v_f( gr_gpcs_swdx_tc_beta_cb_size_v_f(
cb_size_steady * cb_size_steady *
gr->gpc_ppc_count[gpc_index])); nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index)));
gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r( gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
ppc_index + gpc_index), val); ppc_index + gpc_index), val);
@@ -965,7 +984,7 @@ int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
g->gr.max_tpc_count; nvgpu_gr_config_get_max_tpc_count(g->gr.config);
attrib_cb_size = ALIGN(attrib_cb_size, 128); attrib_cb_size = ALIGN(attrib_cb_size, 128);
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size); nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);
@@ -1211,7 +1230,7 @@ int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r())); gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r())); gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) { if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r())); gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
} }
@@ -1227,7 +1246,7 @@ int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r())); gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r())); gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) { if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r())); gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
} }
@@ -1435,7 +1454,9 @@ int gr_gp10b_load_smid_config(struct gk20a *g)
} }
/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
for (i = 0U; i <= ((g->gr.tpc_count-1U) / 4U); i++) { for (i = 0U;
i <= ((nvgpu_gr_config_get_tpc_count(g->gr.config) - 1U) / 4U);
i++) {
u32 reg = 0; u32 reg = 0;
u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
gr_cwd_gpc_tpc_id_tpc0_s(); gr_cwd_gpc_tpc_id_tpc0_s();
@@ -1444,7 +1465,7 @@ int gr_gp10b_load_smid_config(struct gk20a *g)
u32 sm_id = (i * 4U) + j; u32 sm_id = (i * 4U) + j;
u32 bits; u32 bits;
if (sm_id >= g->gr.tpc_count) { if (sm_id >= nvgpu_gr_config_get_tpc_count(g->gr.config)) {
break; break;
} }
@@ -1500,9 +1521,10 @@ void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
nvgpu_tegra_fuse_write_bypass(g, 0x1); nvgpu_tegra_fuse_write_bypass(g, 0x1);
nvgpu_tegra_fuse_write_access_sw(g, 0x0); nvgpu_tegra_fuse_write_access_sw(g, 0x0);
if (g->gr.gpc_tpc_mask[gpc_index] == 0x1U) { if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == 0x1U) {
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x2); nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x2);
} else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2U) { } else if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) ==
0x2U) {
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1); nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1);
} else { } else {
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0); nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);

View File

@@ -49,6 +49,7 @@
#include "common/netlist/netlist_gp10b.h" #include "common/netlist/netlist_gp10b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
#include "common/gr/config/gr_config_gm20b.h"
#include "common/therm/therm_gm20b.h" #include "common/therm/therm_gm20b.h"
#include "common/therm/therm_gp10b.h" #include "common/therm/therm_gp10b.h"
#include "common/ltc/ltc_gm20b.h" #include "common/ltc/ltc_gm20b.h"
@@ -261,7 +262,6 @@ static const struct gpu_ops gp10b_ops = {
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info, .get_zcull_info = gr_gk20a_get_zcull_info,
@@ -451,6 +451,16 @@ static const struct gpu_ops gp10b_ops = {
.set_full_preemption_ptr = .set_full_preemption_ptr =
gp10b_ctxsw_prog_set_full_preemption_ptr, gp10b_ctxsw_prog_set_full_preemption_ptr,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
},
.config = {
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
.get_tpc_count_in_gpc =
gm20b_gr_config_get_tpc_count_in_gpc,
.get_zcull_count_in_gpc =
gm20b_gr_config_get_zcull_count_in_gpc,
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
.get_pd_dist_skip_table_size =
gm20b_gr_config_get_pd_dist_skip_table_size,
} }
}, },
.fb = { .fb = {
@@ -869,6 +879,7 @@ int gp10b_init_hal(struct gk20a *g)
gops->ce2 = gp10b_ops.ce2; gops->ce2 = gp10b_ops.ce2;
gops->gr = gp10b_ops.gr; gops->gr = gp10b_ops.gr;
gops->gr.ctxsw_prog = gp10b_ops.gr.ctxsw_prog; gops->gr.ctxsw_prog = gp10b_ops.gr.ctxsw_prog;
gops->gr.config = gp10b_ops.gr.config;
gops->fb = gp10b_ops.fb; gops->fb = gp10b_ops.fb;
gops->clock_gating = gp10b_ops.clock_gating; gops->clock_gating = gp10b_ops.clock_gating;
gops->fifo = gp10b_ops.fifo; gops->fifo = gp10b_ops.fifo;

View File

@@ -28,6 +28,7 @@
#include <nvgpu/io.h> #include <nvgpu/io.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/gr/ctx.h> #include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include "gk20a/gr_gk20a.h" #include "gk20a/gr_gk20a.h"
#include "gk20a/gr_pri_gk20a.h" #include "gk20a/gr_pri_gk20a.h"
@@ -81,7 +82,9 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
} }
/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */ /* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
for (gpc_id = 0; gpc_id < gr->gpc_count; gpc_id++) { for (gpc_id = 0;
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_id++) {
num_tpc_mask = gpc_tpc_mask[gpc_id]; num_tpc_mask = gpc_tpc_mask[gpc_id];
if ((gpc_id == disable_gpc_id) && if ((gpc_id == disable_gpc_id) &&
@@ -110,16 +113,19 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
* ratio represents relative throughput of the GPC * ratio represents relative throughput of the GPC
*/ */
scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] / scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] /
gr->gpc_tpc_count[gpc_id]; nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id);
if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) { if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) {
min_scg_gpc_pix_perf = scg_gpc_pix_perf; min_scg_gpc_pix_perf = scg_gpc_pix_perf;
} }
/* Calculate # of surviving PES */ /* Calculate # of surviving PES */
for (pes_id = 0; pes_id < gr->gpc_ppc_count[gpc_id]; pes_id++) { for (pes_id = 0;
pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_id);
pes_id++) {
/* Count the number of TPC on the set */ /* Count the number of TPC on the set */
num_tpc_mask = gr->pes_tpc_mask[pes_id][gpc_id] & num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask(
gr->config, gpc_id, pes_id) &
gpc_tpc_mask[gpc_id]; gpc_tpc_mask[gpc_id];
if ((gpc_id == disable_gpc_id) && if ((gpc_id == disable_gpc_id) &&
@@ -149,10 +155,14 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
} }
/* Now calculate perf */ /* Now calculate perf */
scg_world_perf = (scale_factor * scg_num_pes) / gr->ppc_count; scg_world_perf = (scale_factor * scg_num_pes) /
nvgpu_gr_config_get_ppc_count(gr->config);
deviation = 0; deviation = 0;
average_tpcs = scale_factor * average_tpcs / gr->gpc_count; average_tpcs = scale_factor * average_tpcs /
for (gpc_id =0; gpc_id < gr->gpc_count; gpc_id++) { nvgpu_gr_config_get_gpc_count(gr->config);
for (gpc_id =0;
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_id++) {
diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id]; diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id];
if (diff < 0) { if (diff < 0) {
diff = -diff; diff = -diff;
@@ -160,7 +170,7 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
deviation += U32(diff); deviation += U32(diff);
} }
deviation /= gr->gpc_count; deviation /= nvgpu_gr_config_get_gpc_count(gr->config);
norm_tpc_deviation = deviation / max_tpc_gpc; norm_tpc_deviation = deviation / max_tpc_gpc;
@@ -216,14 +226,17 @@ int gr_gv100_init_sm_id_table(struct gk20a *g)
u32 gpc, sm, pes, gtpc; u32 gpc, sm, pes, gtpc;
u32 sm_id = 0; u32 sm_id = 0;
u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
u32 num_sm = sm_per_tpc * g->gr.tpc_count; struct gr_gk20a *gr = &g->gr;
u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr->config);
int perf, maxperf; int perf, maxperf;
int err = 0; int err = 0;
unsigned long *gpc_tpc_mask; unsigned long *gpc_tpc_mask;
u32 *tpc_table, *gpc_table; u32 *tpc_table, *gpc_table;
gpc_table = nvgpu_kzalloc(g, g->gr.tpc_count * sizeof(u32)); gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
tpc_table = nvgpu_kzalloc(g, g->gr.tpc_count * sizeof(u32)); sizeof(u32));
tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
sizeof(u32));
gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) * gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) *
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS)); nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
@@ -235,17 +248,20 @@ int gr_gv100_init_sm_id_table(struct gk20a *g)
goto exit_build_table; goto exit_build_table;
} }
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
for (pes = 0; pes < g->gr.gpc_ppc_count[gpc]; pes++) { for (pes = 0;
gpc_tpc_mask[gpc] |= g->gr.pes_tpc_mask[pes][gpc]; pes < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc);
pes++) {
gpc_tpc_mask[gpc] |= nvgpu_gr_config_get_pes_tpc_mask(
g->gr.config, gpc, pes);
} }
} }
for (gtpc = 0; gtpc < g->gr.tpc_count; gtpc++) { for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr->config); gtpc++) {
maxperf = -1; maxperf = -1;
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
for_each_set_bit(tpc, &gpc_tpc_mask[gpc], for_each_set_bit(tpc, &gpc_tpc_mask[gpc],
g->gr.gpc_tpc_count[gpc]) { nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
perf = -1; perf = -1;
err = gr_gv100_scg_estimate_perf(g, err = gr_gv100_scg_estimate_perf(g,
gpc_tpc_mask, gpc, tpc, &perf); gpc_tpc_mask, gpc, tpc, &perf);
@@ -308,13 +324,13 @@ u32 gr_gv100_get_patch_slots(struct gk20a *g)
* Update PE table contents * Update PE table contents
* for PE table, each patch buffer update writes 32 TPCs * for PE table, each patch buffer update writes 32 TPCs
*/ */
size += DIV_ROUND_UP(gr->tpc_count, 32U); size += DIV_ROUND_UP(nvgpu_gr_config_get_tpc_count(gr->config), 32U);
/* /*
* Update the PL table contents * Update the PL table contents
* For PL table, each patch buffer update configures 4 TPCs * For PL table, each patch buffer update configures 4 TPCs
*/ */
size += DIV_ROUND_UP(gr->tpc_count, 4U); size += DIV_ROUND_UP(nvgpu_gr_config_get_tpc_count(gr->config), 4U);
/* /*
* We need this for all subcontexts * We need this for all subcontexts
@@ -515,5 +531,6 @@ void gr_gv100_init_hwpm_pmm_register(struct gk20a *g)
g->ops.gr.set_pmm_register(g, perf_pmmfbp_engine_sel_r(0), g->ops.gr.set_pmm_register(g, perf_pmmfbp_engine_sel_r(0),
0xFFFFFFFFU, g->gr.num_fbps, num_fbp_perfmon); 0xFFFFFFFFU, g->gr.num_fbps, num_fbp_perfmon);
g->ops.gr.set_pmm_register(g, perf_pmmgpc_engine_sel_r(0), g->ops.gr.set_pmm_register(g, perf_pmmgpc_engine_sel_r(0),
0xFFFFFFFFU, g->gr.gpc_count, num_gpc_perfmon); 0xFFFFFFFFU, nvgpu_gr_config_get_gpc_count(g->gr.config),
num_gpc_perfmon);
} }

View File

@@ -39,6 +39,7 @@
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
#include "common/gr/config/gr_config_gm20b.h"
#include "common/therm/therm_gm20b.h" #include "common/therm/therm_gm20b.h"
#include "common/therm/therm_gp106.h" #include "common/therm/therm_gp106.h"
#include "common/therm/therm_gp10b.h" #include "common/therm/therm_gp10b.h"
@@ -361,7 +362,6 @@ static const struct gpu_ops gv100_ops = {
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.get_gpc_mask = gr_gm20b_get_gpc_mask, .get_gpc_mask = gr_gm20b_get_gpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
@@ -583,6 +583,16 @@ static const struct gpu_ops gv100_ops = {
.set_type_per_veid_header = .set_type_per_veid_header =
gv11b_ctxsw_prog_set_type_per_veid_header, gv11b_ctxsw_prog_set_type_per_veid_header,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
},
.config = {
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
.get_tpc_count_in_gpc =
gm20b_gr_config_get_tpc_count_in_gpc,
.get_zcull_count_in_gpc =
gm20b_gr_config_get_zcull_count_in_gpc,
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
.get_pd_dist_skip_table_size =
gm20b_gr_config_get_pd_dist_skip_table_size,
} }
}, },
.fb = { .fb = {
@@ -1139,6 +1149,7 @@ int gv100_init_hal(struct gk20a *g)
gops->ce2 = gv100_ops.ce2; gops->ce2 = gv100_ops.ce2;
gops->gr = gv100_ops.gr; gops->gr = gv100_ops.gr;
gops->gr.ctxsw_prog = gv100_ops.gr.ctxsw_prog; gops->gr.ctxsw_prog = gv100_ops.gr.ctxsw_prog;
gops->gr.config = gv100_ops.gr.config;
gops->fb = gv100_ops.fb; gops->fb = gv100_ops.fb;
gops->nvdec = gv100_ops.nvdec; gops->nvdec = gv100_ops.nvdec;
gops->clock_gating = gv100_ops.clock_gating; gops->clock_gating = gv100_ops.clock_gating;

View File

@@ -40,6 +40,7 @@
#include <nvgpu/regops.h> #include <nvgpu/regops.h>
#include <nvgpu/gr/subctx.h> #include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/ctx.h> #include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/channel.h> #include <nvgpu/channel.h>
#include <nvgpu/nvgpu_err.h> #include <nvgpu/nvgpu_err.h>
@@ -438,7 +439,8 @@ void gr_gv11b_enable_exceptions(struct gk20a *g)
/* enable exceptions */ /* enable exceptions */
gk20a_writel(g, gr_exception2_en_r(), 0x0U); /* BE not enabled */ gk20a_writel(g, gr_exception2_en_r(), 0x0U); /* BE not enabled */
gk20a_writel(g, gr_exception1_en_r(), BIT32(gr->gpc_count) - 1U); gk20a_writel(g, gr_exception1_en_r(),
BIT32(nvgpu_gr_config_get_gpc_count(gr->config)) - 1U);
reg_val = gr_exception_en_fe_enabled_f() | reg_val = gr_exception_en_fe_enabled_f() |
gr_exception_en_memfmt_enabled_f() | gr_exception_en_memfmt_enabled_f() |
@@ -1122,7 +1124,7 @@ void gr_gv11b_enable_gpc_exceptions(struct gk20a *g)
tpc_mask = tpc_mask =
gr_gpcs_gpccs_gpc_exception_en_tpc_f( gr_gpcs_gpccs_gpc_exception_en_tpc_f(
BIT32(gr->max_tpc_per_gpc_count) - 1U); BIT32(nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config)) - 1U);
gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(),
(tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1U) | (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1U) |
@@ -1291,17 +1293,19 @@ u32 gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g)
gr->alpha_cb_size = gr->alpha_cb_default_size; gr->alpha_cb_size = gr->alpha_cb_default_size;
gr->attrib_cb_size = min(gr->attrib_cb_size, gr->attrib_cb_size = min(gr->attrib_cb_size,
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / g->gr.tpc_count); gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) /
nvgpu_gr_config_get_tpc_count(gr->config));
gr->alpha_cb_size = min(gr->alpha_cb_size, gr->alpha_cb_size = min(gr->alpha_cb_size,
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / g->gr.tpc_count); gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) /
nvgpu_gr_config_get_tpc_count(gr->config));
size = gr->attrib_cb_size * size = gr->attrib_cb_size *
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
gr->max_tpc_count; nvgpu_gr_config_get_max_tpc_count(gr->config);
size += gr->alpha_cb_size * size += gr->alpha_cb_size *
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
gr->max_tpc_count; nvgpu_gr_config_get_max_tpc_count(gr->config);
size = ALIGN(size, 128); size = ALIGN(size, 128);
@@ -1531,10 +1535,13 @@ void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
gr_pd_ab_dist_cfg1_max_batches_init_f()); gr_pd_ab_dist_cfg1_max_batches_init_f());
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_index++) {
stride = proj_gpc_stride_v() * gpc_index; stride = proj_gpc_stride_v() * gpc_index;
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; for (ppc_index = 0;
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
ppc_index++) { ppc_index++) {
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
@@ -1543,7 +1550,7 @@ void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
gr->pes_tpc_count[ppc_index][gpc_index])); nvgpu_gr_config_get_pes_tpc_count(gr->config, gpc_index, ppc_index)));
gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
stride + stride +
@@ -1578,10 +1585,13 @@ void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data)
~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) | ~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) |
gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady)); gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_index++) {
stride = proj_gpc_stride_v() * gpc_index; stride = proj_gpc_stride_v() * gpc_index;
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; for (ppc_index = 0;
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
ppc_index++) { ppc_index++) {
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
@@ -1591,7 +1601,8 @@ void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data)
val = set_field(val, val = set_field(val,
gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size * gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
gr->pes_tpc_count[ppc_index][gpc_index])); nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc_index, ppc_index)));
gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
stride + stride +
@@ -1610,7 +1621,7 @@ void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data)
gr_gpcs_swdx_tc_beta_cb_size_v_m(), gr_gpcs_swdx_tc_beta_cb_size_v_m(),
gr_gpcs_swdx_tc_beta_cb_size_v_f( gr_gpcs_swdx_tc_beta_cb_size_v_f(
cb_size_steady * cb_size_steady *
gr->gpc_ppc_count[gpc_index])); nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index)));
gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r( gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
ppc_index + gpc_index), val); ppc_index + gpc_index), val);
@@ -1671,7 +1682,7 @@ int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
g->gr.max_tpc_count; nvgpu_gr_config_get_max_tpc_count(g->gr.config);
attrib_cb_size = ALIGN(attrib_cb_size, 128); attrib_cb_size = ALIGN(attrib_cb_size, 128);
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size); nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);
@@ -1911,10 +1922,12 @@ static void gr_gv11b_dump_gr_sm_regs(struct gk20a *g,
gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_1_r())); gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_1_r()));
sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
gpc_offset = gk20a_gr_gpc_offset(g, gpc); gpc_offset = gk20a_gr_gpc_offset(g, gpc);
for (tpc = 0; tpc < g->gr.gpc_tpc_count[gpc]; tpc++) { for (tpc = 0;
tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc);
tpc++) {
tpc_offset = gk20a_gr_tpc_offset(g, tpc); tpc_offset = gk20a_gr_tpc_offset(g, tpc);
for (sm = 0; sm < sm_per_tpc; sm++) { for (sm = 0; sm < sm_per_tpc; sm++) {
@@ -1976,7 +1989,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r())); gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r())); gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) { if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r())); gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
} }
@@ -1992,7 +2005,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r())); gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r())); gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) { if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r())); gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
} }
@@ -2170,18 +2183,18 @@ void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
{ {
u32 fuse_val; u32 fuse_val;
if (g->gr.gpc_tpc_mask[gpc_index] == 0U) { if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == 0U) {
return; return;
} }
/* /*
* For s/w value g->gr.gpc_tpc_mask[gpc_index], bit value 1 indicates * For s/w value nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index), bit value 1 indicates
* corresponding TPC is enabled. But for h/w fuse register, bit value 1 * corresponding TPC is enabled. But for h/w fuse register, bit value 1
* indicates corresponding TPC is disabled. * indicates corresponding TPC is disabled.
* So we need to flip the bits and ensure we don't write to bits greater * So we need to flip the bits and ensure we don't write to bits greater
* than TPC count * than TPC count
*/ */
fuse_val = g->gr.gpc_tpc_mask[gpc_index]; fuse_val = nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index);
fuse_val = ~fuse_val; fuse_val = ~fuse_val;
fuse_val = fuse_val & 0xfU; /* tpc0_disable fuse is only 4-bit wide */ fuse_val = fuse_val & 0xfU; /* tpc0_disable fuse is only 4-bit wide */
@@ -2678,13 +2691,15 @@ int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
if (gr->map_tiles == NULL) { if (gr->config->map_tiles == NULL) {
return -1; return -1;
} }
gk20a_writel(g, gr_crstr_map_table_cfg_r(), gk20a_writel(g, gr_crstr_map_table_cfg_r(),
gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) | gr_crstr_map_table_cfg_row_offset_f(
gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count)); nvgpu_gr_config_get_map_row_offset(gr->config)) |
gr_crstr_map_table_cfg_num_entries_f(
nvgpu_gr_config_get_tpc_count(gr->config)));
/* /*
* 6 tpc can be stored in one map register. * 6 tpc can be stored in one map register.
* But number of tpcs are not always multiple of six, * But number of tpcs are not always multiple of six,
@@ -2702,27 +2717,33 @@ int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
switch (offset) { switch (offset) {
case 0: case 0:
map = map | gr_crstr_gpc_map_tile0_f( map = map | gr_crstr_gpc_map_tile0_f(
gr->map_tiles[base + offset]); nvgpu_gr_config_get_map_tile_count(
gr->config, base + offset));
break; break;
case 1: case 1:
map = map | gr_crstr_gpc_map_tile1_f( map = map | gr_crstr_gpc_map_tile1_f(
gr->map_tiles[base + offset]); nvgpu_gr_config_get_map_tile_count(
gr->config, base + offset));
break; break;
case 2: case 2:
map = map | gr_crstr_gpc_map_tile2_f( map = map | gr_crstr_gpc_map_tile2_f(
gr->map_tiles[base + offset]); nvgpu_gr_config_get_map_tile_count(
gr->config, base + offset));
break; break;
case 3: case 3:
map = map | gr_crstr_gpc_map_tile3_f( map = map | gr_crstr_gpc_map_tile3_f(
gr->map_tiles[base + offset]); nvgpu_gr_config_get_map_tile_count(
gr->config, base + offset));
break; break;
case 4: case 4:
map = map | gr_crstr_gpc_map_tile4_f( map = map | gr_crstr_gpc_map_tile4_f(
gr->map_tiles[base + offset]); nvgpu_gr_config_get_map_tile_count(
gr->config, base + offset));
break; break;
case 5: case 5:
map = map | gr_crstr_gpc_map_tile5_f( map = map | gr_crstr_gpc_map_tile5_f(
gr->map_tiles[base + offset]); nvgpu_gr_config_get_map_tile_count(
gr->config, base + offset));
break; break;
default: default:
nvgpu_err(g, "incorrect rop mapping %x", offset); nvgpu_err(g, "incorrect rop mapping %x", offset);
@@ -2736,25 +2757,33 @@ int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
} }
gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(), gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) | gr_ppcs_wwdx_map_table_cfg_row_offset_f(
gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count)); nvgpu_gr_config_get_map_row_offset(gr->config)) |
gr_ppcs_wwdx_map_table_cfg_num_entries_f(
nvgpu_gr_config_get_tpc_count(gr->config)));
for (i = 0U, j = 1U; i < gr_ppcs_wwdx_map_table_cfg_coeff__size_1_v(); for (i = 0U, j = 1U; i < gr_ppcs_wwdx_map_table_cfg_coeff__size_1_v();
i++, j = j + 4U) { i++, j = j + 4U) {
gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i), gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i),
gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f( gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f(
(BIT32(j) % gr->tpc_count)) | (BIT32(j) %
nvgpu_gr_config_get_tpc_count(gr->config))) |
gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f( gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f(
(BIT32(j + 1U) % gr->tpc_count)) | (BIT32(j + 1U) %
nvgpu_gr_config_get_tpc_count(gr->config))) |
gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f( gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f(
(BIT32(j + 2U) % gr->tpc_count)) | (BIT32(j + 2U) %
nvgpu_gr_config_get_tpc_count(gr->config))) |
gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f( gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f(
(BIT32(j + 3U) % gr->tpc_count))); (BIT32(j + 3U) %
nvgpu_gr_config_get_tpc_count(gr->config))));
} }
gk20a_writel(g, gr_rstr2d_map_table_cfg_r(), gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) | gr_rstr2d_map_table_cfg_row_offset_f(
gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count)); nvgpu_gr_config_get_map_row_offset(gr->config)) |
gr_rstr2d_map_table_cfg_num_entries_f(
nvgpu_gr_config_get_tpc_count(gr->config)));
return 0; return 0;
} }
@@ -2867,13 +2896,18 @@ u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc)
u32 pes; u32 pes;
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
for (pes = 0U; pes < gr->gpc_ppc_count[gpc]; pes++) { for (pes = 0U;
if ((gr->pes_tpc_mask[pes][gpc] & BIT32(tpc)) != 0U) { pes < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc);
pes++) {
if ((nvgpu_gr_config_get_pes_tpc_mask(gr->config, gpc, pes) &
BIT32(tpc)) != 0U) {
break; break;
} }
tpc_new += gr->pes_tpc_count[pes][gpc]; tpc_new += nvgpu_gr_config_get_pes_tpc_count(gr->config,
gpc, pes);
} }
temp = (BIT32(tpc) - 1U) & gr->pes_tpc_mask[pes][gpc]; temp = (BIT32(tpc) - 1U) &
nvgpu_gr_config_get_pes_tpc_mask(gr->config, gpc, pes);
temp = (u32)hweight32(temp); temp = (u32)hweight32(temp);
tpc_new += temp; tpc_new += temp;
@@ -2916,7 +2950,9 @@ int gr_gv11b_load_smid_config(struct gk20a *g)
} }
/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
for (i = 0U; i <= ((g->gr.tpc_count-1U) / 4U); i++) { for (i = 0U;
i <= ((nvgpu_gr_config_get_tpc_count(g->gr.config) - 1U) / 4U);
i++) {
u32 reg = 0; u32 reg = 0;
u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
gr_cwd_gpc_tpc_id_tpc0_s(); gr_cwd_gpc_tpc_id_tpc0_s();
@@ -3039,24 +3075,28 @@ void gr_gv11b_load_tpc_mask(struct gk20a *g)
{ {
u32 pes_tpc_mask = 0, fuse_tpc_mask; u32 pes_tpc_mask = 0, fuse_tpc_mask;
u32 gpc, pes, val; u32 gpc, pes, val;
u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(g->gr.config);
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
GPU_LIT_NUM_TPC_PER_GPC); GPU_LIT_NUM_TPC_PER_GPC);
/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */ /* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) { for (pes = 0;
pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] << pes < nvgpu_gr_config_get_pe_count_per_gpc(g->gr.config);
pes++) {
pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
g->gr.config, gpc, pes) <<
num_tpc_per_gpc * gpc; num_tpc_per_gpc * gpc;
} }
} }
nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask); nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask);
fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, gpc); fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, g->gr.config, gpc);
if ((g->tpc_fs_mask_user != 0U) && if ((g->tpc_fs_mask_user != 0U) &&
(g->tpc_fs_mask_user != fuse_tpc_mask) && (g->tpc_fs_mask_user != fuse_tpc_mask) &&
(fuse_tpc_mask == BIT32(g->gr.max_tpc_count) - U32(1))) { (fuse_tpc_mask == BIT32(max_tpc_count) - U32(1))) {
val = g->tpc_fs_mask_user; val = g->tpc_fs_mask_user;
val &= BIT32(g->gr.max_tpc_count) - U32(1); val &= BIT32(max_tpc_count) - U32(1);
val = BIT32(hweight32(val)) - U32(1); val = BIT32(hweight32(val)) - U32(1);
gk20a_writel(g, gr_fe_tpc_fs_r(0), val); gk20a_writel(g, gr_fe_tpc_fs_r(0), val);
} else { } else {
@@ -3506,8 +3546,10 @@ void gv11b_gr_suspend_all_sms(struct gk20a *g,
gk20a_writel(g, gk20a_writel(g,
gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0); gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0);
for (gpc = 0; gpc < gr->gpc_count; gpc++) { for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { for (tpc = 0;
tpc < nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc);
tpc++) {
for (sm = 0; sm < sm_per_tpc; sm++) { for (sm = 0; sm < sm_per_tpc; sm++) {
err = g->ops.gr.wait_for_sm_lock_down(g, err = g->ops.gr.wait_for_sm_lock_down(g,
gpc, tpc, sm, gpc, tpc, sm,
@@ -4254,12 +4296,14 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr,
*/ */
if ((broadcast_flags & PRI_BROADCAST_FLAGS_EGPC) != 0U) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_EGPC) != 0U) {
nvgpu_log_info(g, "broadcast flags egpc"); nvgpu_log_info(g, "broadcast flags egpc");
for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { for (gpc_num = 0;
gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config);
gpc_num++) {
if ((broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) != 0U) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) != 0U) {
nvgpu_log_info(g, "broadcast flags etpc"); nvgpu_log_info(g, "broadcast flags etpc");
for (tpc_num = 0; for (tpc_num = 0;
tpc_num < g->gr.gpc_tpc_count[gpc_num]; tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num);
tpc_num++) { tpc_num++) {
if ((broadcast_flags & if ((broadcast_flags &
PRI_BROADCAST_FLAGS_SMPC) != 0U) { PRI_BROADCAST_FLAGS_SMPC) != 0U) {
@@ -4289,7 +4333,7 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr,
gpc_addr = pri_gpccs_addr_mask(priv_addr); gpc_addr = pri_gpccs_addr_mask(priv_addr);
tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) { if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num)) {
continue; continue;
} }
@@ -4303,7 +4347,7 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr,
if ((broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) != 0U) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) != 0U) {
nvgpu_log_info(g, "broadcast flags etpc but not egpc"); nvgpu_log_info(g, "broadcast flags etpc but not egpc");
for (tpc_num = 0; for (tpc_num = 0;
tpc_num < g->gr.gpc_tpc_count[gpc_num]; tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num);
tpc_num++) { tpc_num++) {
if ((broadcast_flags & if ((broadcast_flags &
PRI_BROADCAST_FLAGS_SMPC) != 0U) { PRI_BROADCAST_FLAGS_SMPC) != 0U) {
@@ -4425,10 +4469,12 @@ static int gr_gv11b_ecc_scrub_is_done(struct gk20a *g,
return err; return err;
} }
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
gpc_offset = gk20a_gr_gpc_offset(g, gpc); gpc_offset = gk20a_gr_gpc_offset(g, gpc);
for (tpc = 0; tpc < g->gr.gpc_tpc_count[gpc]; tpc++) { for (tpc = 0;
tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc);
tpc++) {
tpc_offset = gk20a_gr_tpc_offset(g, tpc); tpc_offset = gk20a_gr_tpc_offset(g, tpc);
do { do {
@@ -4946,11 +4992,13 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
* that we can look up the offsets * that we can look up the offsets
*/ */
if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) != 0U) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) != 0U) {
for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { for (gpc_num = 0;
gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config);
gpc_num++) {
if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) {
for (tpc_num = 0; for (tpc_num = 0;
tpc_num < g->gr.gpc_tpc_count[gpc_num]; tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num);
tpc_num++) { tpc_num++) {
priv_addr_table[t++] = priv_addr_table[t++] =
pri_tpc_addr(g, pri_tpc_addr(g,
@@ -4972,7 +5020,7 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
gpc_addr = pri_gpccs_addr_mask(priv_addr); gpc_addr = pri_gpccs_addr_mask(priv_addr);
tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) { if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num)) {
continue; continue;
} }
@@ -5010,7 +5058,9 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
return -EINVAL; return -EINVAL;
} }
for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { for (gpc_num = 0;
gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config);
gpc_num++) {
for (domain_idx = pmm_domain_start; for (domain_idx = pmm_domain_start;
domain_idx < (pmm_domain_start + num_domains); domain_idx < (pmm_domain_start + num_domains);
domain_idx++) { domain_idx++) {
@@ -5063,7 +5113,7 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
} else if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) == 0U) { } else if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) == 0U) {
if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) {
for (tpc_num = 0; for (tpc_num = 0;
tpc_num < g->gr.gpc_tpc_count[gpc_num]; tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num);
tpc_num++) { tpc_num++) {
priv_addr_table[t++] = priv_addr_table[t++] =
pri_tpc_addr(g, pri_tpc_addr(g,

View File

@@ -39,6 +39,7 @@
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
#include "common/gr/config/gr_config_gm20b.h"
#include "common/therm/therm_gm20b.h" #include "common/therm/therm_gm20b.h"
#include "common/therm/therm_gp10b.h" #include "common/therm/therm_gp10b.h"
#include "common/therm/therm_gv11b.h" #include "common/therm/therm_gv11b.h"
@@ -314,7 +315,6 @@ static const struct gpu_ops gv11b_ops = {
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info, .get_zcull_info = gr_gk20a_get_zcull_info,
@@ -544,6 +544,16 @@ static const struct gpu_ops gv11b_ops = {
.set_type_per_veid_header = .set_type_per_veid_header =
gv11b_ctxsw_prog_set_type_per_veid_header, gv11b_ctxsw_prog_set_type_per_veid_header,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
},
.config = {
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
.get_tpc_count_in_gpc =
gm20b_gr_config_get_tpc_count_in_gpc,
.get_zcull_count_in_gpc =
gm20b_gr_config_get_zcull_count_in_gpc,
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
.get_pd_dist_skip_table_size =
gm20b_gr_config_get_pd_dist_skip_table_size,
} }
}, },
.fb = { .fb = {
@@ -1006,6 +1016,7 @@ int gv11b_init_hal(struct gk20a *g)
gops->ce2 = gv11b_ops.ce2; gops->ce2 = gv11b_ops.ce2;
gops->gr = gv11b_ops.gr; gops->gr = gv11b_ops.gr;
gops->gr.ctxsw_prog = gv11b_ops.gr.ctxsw_prog; gops->gr.ctxsw_prog = gv11b_ops.gr.ctxsw_prog;
gops->gr.config = gv11b_ops.gr.config;
gops->fb = gv11b_ops.fb; gops->fb = gv11b_ops.fb;
gops->clock_gating = gv11b_ops.clock_gating; gops->clock_gating = gv11b_ops.clock_gating;
gops->fifo = gv11b_ops.fifo; gops->fifo = gv11b_ops.fifo;

View File

@@ -291,7 +291,6 @@ struct gpu_ops {
u32 reg_offset); u32 reg_offset);
int (*load_ctxsw_ucode)(struct gk20a *g); int (*load_ctxsw_ucode)(struct gk20a *g);
u32 (*get_gpc_mask)(struct gk20a *g); u32 (*get_gpc_mask)(struct gk20a *g);
u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
int (*alloc_obj_ctx)(struct channel_gk20a *c, int (*alloc_obj_ctx)(struct channel_gk20a *c,
u32 class_num, u32 flags); u32 class_num, u32 flags);
@@ -622,6 +621,20 @@ struct gpu_ops {
void (*dump_ctxsw_stats)(struct gk20a *g, void (*dump_ctxsw_stats)(struct gk20a *g,
struct nvgpu_mem *ctx_mem); struct nvgpu_mem *ctx_mem);
} ctxsw_prog; } ctxsw_prog;
struct {
u32 (*get_gpc_tpc_mask)(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index);
u32 (*get_tpc_count_in_gpc)(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index);
u32 (*get_zcull_count_in_gpc)(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index);
u32 (*get_pes_tpc_mask)(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index,
u32 pes_index);
u32 (*get_pd_dist_skip_table_size)(void);
} config;
u32 (*fecs_falcon_base_addr)(void); u32 (*fecs_falcon_base_addr)(void);
u32 (*gpccs_falcon_base_addr)(void); u32 (*gpccs_falcon_base_addr)(void);

View File

@@ -0,0 +1,96 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_CONFIG_H
#define NVGPU_GR_CONFIG_H
#include <nvgpu/types.h>
#define GK20A_GR_MAX_PES_PER_GPC 3U
struct gk20a;
struct nvgpu_gr_config {
u32 max_gpc_count;
u32 max_tpc_per_gpc_count;
u32 max_zcull_per_gpc_count;
u32 max_tpc_count;
u32 gpc_count;
u32 tpc_count;
u32 ppc_count;
u32 zcb_count;
u32 pe_count_per_gpc;
u32 *gpc_ppc_count;
u32 *gpc_tpc_count;
u32 *gpc_zcb_count;
u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
u32 *gpc_tpc_mask;
u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
u32 *gpc_skip_mask;
u8 *map_tiles;
u32 map_tile_count;
u32 map_row_offset;
};
struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g);
void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config);
int nvgpu_gr_config_init_map_tiles(struct gk20a *g,
struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_map_tile_count(struct nvgpu_gr_config *config,
u32 index);
u32 nvgpu_gr_config_get_map_row_offset(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_max_gpc_count(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_max_tpc_per_gpc_count(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_max_zcull_per_gpc_count(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_max_tpc_count(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_gpc_count(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_tpc_count(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config);
u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config,
u32 gpc_index);
u32 nvgpu_gr_config_get_gpc_tpc_count(struct nvgpu_gr_config *config,
u32 gpc_index);
u32 nvgpu_gr_config_get_gpc_zcb_count(struct nvgpu_gr_config *config,
u32 gpc_index);
u32 nvgpu_gr_config_get_pes_tpc_count(struct nvgpu_gr_config *config,
u32 gpc_index, u32 pes_index);
u32 nvgpu_gr_config_get_gpc_tpc_mask(struct nvgpu_gr_config *config,
u32 gpc_index);
u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config,
u32 gpc_index);
u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
u32 gpc_index, u32 pes_index);
#endif /* NVGPU_GR_CONFIG_H */

View File

@@ -32,6 +32,7 @@
#include <nvgpu/list.h> #include <nvgpu/list.h>
#include <nvgpu/clk_arb.h> #include <nvgpu/clk_arb.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/channel.h> #include <nvgpu/channel.h>
#include <nvgpu/pmu/pmgr.h> #include <nvgpu/pmu/pmgr.h>
@@ -281,10 +282,10 @@ gk20a_ctrl_ioctl_gpu_characteristics(
gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g); gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
gpu.on_board_video_memory_size = 0; /* integrated GPU */ gpu.on_board_video_memory_size = 0; /* integrated GPU */
gpu.num_gpc = g->gr.gpc_count; gpu.num_gpc = nvgpu_gr_config_get_gpc_count(g->gr.config);
gpu.max_gpc_count = g->gr.max_gpc_count; gpu.max_gpc_count = nvgpu_gr_config_get_max_gpc_count(g->gr.config);
gpu.num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count; gpu.num_tpc_per_gpc = nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config);
gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
@@ -293,7 +294,7 @@ gk20a_ctrl_ioctl_gpu_characteristics(
if (g->ops.gr.get_gpc_mask) { if (g->ops.gr.get_gpc_mask) {
gpu.gpc_mask = g->ops.gr.get_gpc_mask(g); gpu.gpc_mask = g->ops.gr.get_gpc_mask(g);
} else { } else {
gpu.gpc_mask = BIT32(g->gr.gpc_count) - 1; gpu.gpc_mask = BIT32(gpu.num_gpc) - 1;
} }
gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g); gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g);
@@ -553,7 +554,7 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
int err = 0; int err = 0;
const u32 gpc_tpc_mask_size = sizeof(u32) * gr->max_gpc_count; const u32 gpc_tpc_mask_size = sizeof(u32) * gr->config->max_gpc_count;
if (args->mask_buf_size > 0) { if (args->mask_buf_size > 0) {
size_t write_size = gpc_tpc_mask_size; size_t write_size = gpc_tpc_mask_size;
@@ -564,7 +565,7 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
err = copy_to_user((void __user *)(uintptr_t) err = copy_to_user((void __user *)(uintptr_t)
args->mask_buf_addr, args->mask_buf_addr,
gr->gpc_tpc_mask, write_size); gr->config->gpc_tpc_mask, write_size);
} }
if (err == 0) if (err == 0)
@@ -687,7 +688,8 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
struct nvgpu_warpstate *w_state = NULL; struct nvgpu_warpstate *w_state = NULL;
u32 sm_count, ioctl_size, size, sm_id; u32 sm_count, ioctl_size, size, sm_id;
sm_count = g->gr.gpc_count * g->gr.tpc_count; sm_count = nvgpu_gr_config_get_gpc_count(g->gr.config) *
nvgpu_gr_config_get_tpc_count(g->gr.config);
ioctl_size = sm_count * sizeof(struct warpstate); ioctl_size = sm_count * sizeof(struct warpstate);
ioctl_w_state = nvgpu_kzalloc(g, ioctl_size); ioctl_w_state = nvgpu_kzalloc(g, ioctl_size);

View File

@@ -26,6 +26,7 @@
#include <nvgpu/log.h> #include <nvgpu/log.h>
#include <nvgpu/os_sched.h> #include <nvgpu/os_sched.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/channel.h> #include <nvgpu/channel.h>
#include <nvgpu/tsg.h> #include <nvgpu/tsg.h>
@@ -84,7 +85,8 @@ static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g,
} }
if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) { if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) {
if ((arg->num_active_tpcs > gr->max_tpc_count) || if ((arg->num_active_tpcs >
nvgpu_gr_config_get_max_tpc_count(gr->config)) ||
!(arg->num_active_tpcs)) { !(arg->num_active_tpcs)) {
nvgpu_err(g, "Invalid num of active TPCs"); nvgpu_err(g, "Invalid num of active TPCs");
err = -EINVAL; err = -EINVAL;

View File

@@ -23,6 +23,7 @@
#include <nvgpu/ptimer.h> #include <nvgpu/ptimer.h>
#include <nvgpu/string.h> #include <nvgpu/string.h>
#include <nvgpu/gr/global_ctx.h> #include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/config.h>
#include "os_linux.h" #include "os_linux.h"
#include "sysfs.h" #include "sysfs.h"
@@ -930,16 +931,17 @@ static ssize_t tpc_fs_mask_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count) struct device_attribute *attr, const char *buf, size_t count)
{ {
struct gk20a *g = get_gk20a(dev); struct gk20a *g = get_gk20a(dev);
struct nvgpu_gr_config *config = g->gr.config;
unsigned long val = 0; unsigned long val = 0;
if (kstrtoul(buf, 10, &val) < 0) if (kstrtoul(buf, 10, &val) < 0)
return -EINVAL; return -EINVAL;
if (!g->gr.gpc_tpc_mask) if (!config->gpc_tpc_mask)
return -ENODEV; return -ENODEV;
if (val && val != g->gr.gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) { if (val && val != config->gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) {
g->gr.gpc_tpc_mask[0] = val; config->gpc_tpc_mask[0] = val;
g->tpc_fs_mask_user = val; g->tpc_fs_mask_user = val;
g->ops.gr.set_gpc_tpc_mask(g, 0); g->ops.gr.set_gpc_tpc_mask(g, 0);
@@ -951,6 +953,7 @@ static ssize_t tpc_fs_mask_store(struct device *dev,
g->gr.ctx_vars.golden_image_initialized = false; g->gr.ctx_vars.golden_image_initialized = false;
} }
g->gr.ctx_vars.golden_image_size = 0; g->gr.ctx_vars.golden_image_size = 0;
nvgpu_gr_config_deinit(g, g->gr.config);
/* Cause next poweron to reinit just gr */ /* Cause next poweron to reinit just gr */
g->gr.sw_ready = false; g->gr.sw_ready = false;
} }
@@ -971,11 +974,13 @@ static ssize_t tpc_fs_mask_read(struct device *dev,
if (err) if (err)
return err; return err;
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { for (gpc_index = 0;
if (g->ops.gr.get_gpc_tpc_mask) gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_index++) {
if (g->ops.gr.config.get_gpc_tpc_mask)
tpc_fs_mask |= tpc_fs_mask |=
g->ops.gr.get_gpc_tpc_mask(g, gpc_index) << g->ops.gr.config.get_gpc_tpc_mask(g, gr->config, gpc_index) <<
(gr->max_tpc_per_gpc_count * gpc_index); (nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config) * gpc_index);
} }
gk20a_idle(g); gk20a_idle(g);

View File

@@ -29,6 +29,7 @@
#include <nvgpu/netlist.h> #include <nvgpu/netlist.h>
#include <nvgpu/gr/global_ctx.h> #include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/ctx.h> #include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include "gk20a/gr_gk20a.h" #include "gk20a/gr_gk20a.h"
#include "gk20a/gr_pri_gk20a.h" #include "gk20a/gr_pri_gk20a.h"
@@ -304,7 +305,8 @@ void gr_tu104_enable_gpc_exceptions(struct gk20a *g)
gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
tpc_mask = tpc_mask =
gr_gpcs_gpccs_gpc_exception_en_tpc_f(BIT32(gr->max_tpc_per_gpc_count) - 1U); gr_gpcs_gpccs_gpc_exception_en_tpc_f(
BIT32(nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config)) - 1U);
gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(),
(tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1U) | (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1U) |

View File

@@ -41,6 +41,7 @@
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
#include "common/gr/config/gr_config_gm20b.h"
#include "common/therm/therm_gm20b.h" #include "common/therm/therm_gm20b.h"
#include "common/therm/therm_gp10b.h" #include "common/therm/therm_gp10b.h"
#include "common/therm/therm_gp106.h" #include "common/therm/therm_gp106.h"
@@ -378,7 +379,6 @@ static const struct gpu_ops tu104_ops = {
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.get_gpc_mask = gr_gm20b_get_gpc_mask, .get_gpc_mask = gr_gm20b_get_gpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
@@ -608,6 +608,16 @@ static const struct gpu_ops tu104_ops = {
.set_type_per_veid_header = .set_type_per_veid_header =
gv11b_ctxsw_prog_set_type_per_veid_header, gv11b_ctxsw_prog_set_type_per_veid_header,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
},
.config = {
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
.get_tpc_count_in_gpc =
gm20b_gr_config_get_tpc_count_in_gpc,
.get_zcull_count_in_gpc =
gm20b_gr_config_get_zcull_count_in_gpc,
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
.get_pd_dist_skip_table_size =
gm20b_gr_config_get_pd_dist_skip_table_size,
} }
}, },
.fb = { .fb = {
@@ -1173,6 +1183,7 @@ int tu104_init_hal(struct gk20a *g)
gops->ce2 = tu104_ops.ce2; gops->ce2 = tu104_ops.ce2;
gops->gr = tu104_ops.gr; gops->gr = tu104_ops.gr;
gops->gr.ctxsw_prog = tu104_ops.gr.ctxsw_prog; gops->gr.ctxsw_prog = tu104_ops.gr.ctxsw_prog;
gops->gr.config = tu104_ops.gr.config;
gops->fb = tu104_ops.fb; gops->fb = tu104_ops.fb;
gops->nvdec = tu104_ops.nvdec; gops->nvdec = tu104_ops.nvdec;
gops->clock_gating = tu104_ops.clock_gating; gops->clock_gating = tu104_ops.clock_gating;

View File

@@ -27,6 +27,7 @@
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/channel.h> #include <nvgpu/channel.h>
#include <nvgpu/gr/ctx.h> #include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/log.h> #include <nvgpu/log.h>
#include "vgpu/gm20b/vgpu_gr_gm20b.h" #include "vgpu/gm20b/vgpu_gr_gm20b.h"
@@ -128,7 +129,7 @@ int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
g->gr.max_tpc_count; nvgpu_gr_config_get_max_tpc_count(g->gr.config);
struct nvgpu_mem *desc; struct nvgpu_mem *desc;
attrib_cb_size = ALIGN(attrib_cb_size, 128); attrib_cb_size = ALIGN(attrib_cb_size, 128);

View File

@@ -30,6 +30,7 @@
#include "common/netlist/netlist_gp10b.h" #include "common/netlist/netlist_gp10b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
#include "common/gr/config/gr_config_gm20b.h"
#include "common/therm/therm_gm20b.h" #include "common/therm/therm_gm20b.h"
#include "common/therm/therm_gp10b.h" #include "common/therm/therm_gp10b.h"
#include "common/ltc/ltc_gm20b.h" #include "common/ltc/ltc_gm20b.h"
@@ -128,7 +129,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.falcon_load_ucode = NULL, .falcon_load_ucode = NULL,
.load_ctxsw_ucode = NULL, .load_ctxsw_ucode = NULL,
.set_gpc_tpc_mask = NULL, .set_gpc_tpc_mask = NULL,
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
.get_zcull_info = vgpu_gr_get_zcull_info, .get_zcull_info = vgpu_gr_get_zcull_info,
@@ -304,6 +304,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.set_full_preemption_ptr = .set_full_preemption_ptr =
gp10b_ctxsw_prog_set_full_preemption_ptr, gp10b_ctxsw_prog_set_full_preemption_ptr,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
},
.config = {
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
} }
}, },
.fb = { .fb = {
@@ -659,6 +662,7 @@ int vgpu_gp10b_init_hal(struct gk20a *g)
gops->ce2 = vgpu_gp10b_ops.ce2; gops->ce2 = vgpu_gp10b_ops.ce2;
gops->gr = vgpu_gp10b_ops.gr; gops->gr = vgpu_gp10b_ops.gr;
gops->gr.ctxsw_prog = vgpu_gp10b_ops.gr.ctxsw_prog; gops->gr.ctxsw_prog = vgpu_gp10b_ops.gr.ctxsw_prog;
gops->gr.config = vgpu_gp10b_ops.gr.config;
gops->fb = vgpu_gp10b_ops.fb; gops->fb = vgpu_gp10b_ops.fb;
gops->clock_gating = vgpu_gp10b_ops.clock_gating; gops->clock_gating = vgpu_gp10b_ops.clock_gating;
gops->fifo = vgpu_gp10b_ops.fifo; gops->fifo = vgpu_gp10b_ops.fifo;

View File

@@ -36,6 +36,7 @@
#include <nvgpu/string.h> #include <nvgpu/string.h>
#include <nvgpu/gr/global_ctx.h> #include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/ctx.h> #include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include "gr_vgpu.h" #include "gr_vgpu.h"
#include "gk20a/fecs_trace_gk20a.h" #include "gk20a/fecs_trace_gk20a.h"
@@ -582,6 +583,7 @@ out:
static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
{ {
struct vgpu_priv_data *priv = vgpu_get_priv_data(g); struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
struct nvgpu_gr_config *config;
u32 gpc_index; u32 gpc_index;
u32 sm_per_tpc; u32 sm_per_tpc;
u32 pes_index; u32 pes_index;
@@ -589,79 +591,87 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
gr->max_gpc_count = priv->constants.max_gpc_count; gr->config = nvgpu_kzalloc(g, sizeof(*gr->config));
gr->gpc_count = priv->constants.gpc_count; if (gr->config == NULL) {
gr->max_tpc_per_gpc_count = priv->constants.max_tpc_per_gpc_count; return -ENOMEM;
}
gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count; config = gr->config;
gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32)); config->max_gpc_count = priv->constants.max_gpc_count;
if (!gr->gpc_tpc_count) { config->gpc_count = priv->constants.gpc_count;
config->max_tpc_per_gpc_count = priv->constants.max_tpc_per_gpc_count;
config->max_tpc_count = config->max_gpc_count * config->max_tpc_per_gpc_count;
config->gpc_tpc_count = nvgpu_kzalloc(g, config->gpc_count * sizeof(u32));
if (!config->gpc_tpc_count) {
goto cleanup; goto cleanup;
} }
gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32)); config->gpc_tpc_mask = nvgpu_kzalloc(g, config->gpc_count * sizeof(u32));
if (!gr->gpc_tpc_mask) { if (!config->gpc_tpc_mask) {
goto cleanup; goto cleanup;
} }
sm_per_tpc = priv->constants.sm_per_tpc; sm_per_tpc = priv->constants.sm_per_tpc;
gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count * gr->sm_to_cluster = nvgpu_kzalloc(g, config->gpc_count *
gr->max_tpc_per_gpc_count * config->max_tpc_per_gpc_count *
sm_per_tpc * sm_per_tpc *
sizeof(struct sm_info)); sizeof(struct sm_info));
if (!gr->sm_to_cluster) { if (!gr->sm_to_cluster) {
goto cleanup; goto cleanup;
} }
gr->tpc_count = 0; config->tpc_count = 0;
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
gr->gpc_tpc_count[gpc_index] = config->gpc_tpc_count[gpc_index] =
priv->constants.gpc_tpc_count[gpc_index]; priv->constants.gpc_tpc_count[gpc_index];
gr->tpc_count += gr->gpc_tpc_count[gpc_index]; config->tpc_count += config->gpc_tpc_count[gpc_index];
if (g->ops.gr.get_gpc_tpc_mask) { if (g->ops.gr.config.get_gpc_tpc_mask) {
gr->gpc_tpc_mask[gpc_index] = gr->config->gpc_tpc_mask[gpc_index] =
g->ops.gr.get_gpc_tpc_mask(g, gpc_index); g->ops.gr.config.get_gpc_tpc_mask(g,
g->gr.config, gpc_index);
} }
} }
gr->pe_count_per_gpc = config->pe_count_per_gpc =
nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC, if (WARN(config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC,
"too many pes per gpc %u\n", gr->pe_count_per_gpc)) { "too many pes per gpc %u\n", config->pe_count_per_gpc)) {
goto cleanup; goto cleanup;
} }
if (gr->pe_count_per_gpc > TEGRA_VGPU_MAX_PES_COUNT_PER_GPC) { if (config->pe_count_per_gpc > TEGRA_VGPU_MAX_PES_COUNT_PER_GPC) {
nvgpu_err(g, "pe_count_per_gpc %d is too big!", nvgpu_err(g, "pe_count_per_gpc %d is too big!",
gr->pe_count_per_gpc); config->pe_count_per_gpc);
goto cleanup; goto cleanup;
} }
if (gr->gpc_ppc_count == NULL) { if (config->gpc_ppc_count == NULL) {
gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count * config->gpc_ppc_count = nvgpu_kzalloc(g, config->gpc_count *
sizeof(u32)); sizeof(u32));
} else { } else {
(void) memset(gr->gpc_ppc_count, 0, gr->gpc_count * (void) memset(config->gpc_ppc_count, 0, config->gpc_count *
sizeof(u32)); sizeof(u32));
} }
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
gr->gpc_ppc_count[gpc_index] = config->gpc_ppc_count[gpc_index] =
priv->constants.gpc_ppc_count[gpc_index]; priv->constants.gpc_ppc_count[gpc_index];
for (pes_index = 0u; pes_index < gr->pe_count_per_gpc; for (pes_index = 0u; pes_index < config->pe_count_per_gpc;
pes_index++) { pes_index++) {
u32 pes_tpc_count, pes_tpc_mask; u32 pes_tpc_count, pes_tpc_mask;
if (gr->pes_tpc_count[pes_index] == NULL) { if (config->pes_tpc_count[pes_index] == NULL) {
gr->pes_tpc_count[pes_index] = nvgpu_kzalloc(g, config->pes_tpc_count[pes_index] = nvgpu_kzalloc(g,
gr->gpc_count * sizeof(u32)); config->gpc_count * sizeof(u32));
gr->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g, config->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g,
gr->gpc_count * sizeof(u32)); config->gpc_count * sizeof(u32));
if (gr->pes_tpc_count[pes_index] == NULL || if (config->pes_tpc_count[pes_index] == NULL ||
gr->pes_tpc_mask[pes_index] == NULL) { config->pes_tpc_mask[pes_index] == NULL) {
goto cleanup; goto cleanup;
} }
} }
@@ -672,8 +682,8 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
pes_tpc_mask = priv->constants. pes_tpc_mask = priv->constants.
pes_tpc_mask[TEGRA_VGPU_MAX_PES_COUNT_PER_GPC * pes_tpc_mask[TEGRA_VGPU_MAX_PES_COUNT_PER_GPC *
gpc_index + pes_index]; gpc_index + pes_index];
gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count; config->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask; config->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
} }
} }
@@ -688,21 +698,21 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
cleanup: cleanup:
nvgpu_err(g, "out of memory"); nvgpu_err(g, "out of memory");
for (pes_index = 0u; pes_index < gr->pe_count_per_gpc; pes_index++) { for (pes_index = 0u; pes_index < config->pe_count_per_gpc; pes_index++) {
nvgpu_kfree(g, gr->pes_tpc_count[pes_index]); nvgpu_kfree(g, config->pes_tpc_count[pes_index]);
gr->pes_tpc_count[pes_index] = NULL; config->pes_tpc_count[pes_index] = NULL;
nvgpu_kfree(g, gr->pes_tpc_mask[pes_index]); nvgpu_kfree(g, config->pes_tpc_mask[pes_index]);
gr->pes_tpc_mask[pes_index] = NULL; config->pes_tpc_mask[pes_index] = NULL;
} }
nvgpu_kfree(g, gr->gpc_ppc_count); nvgpu_kfree(g, config->gpc_ppc_count);
gr->gpc_ppc_count = NULL; config->gpc_ppc_count = NULL;
nvgpu_kfree(g, gr->gpc_tpc_count); nvgpu_kfree(g, config->gpc_tpc_count);
gr->gpc_tpc_count = NULL; config->gpc_tpc_count = NULL;
nvgpu_kfree(g, gr->gpc_tpc_mask); nvgpu_kfree(g, config->gpc_tpc_mask);
gr->gpc_tpc_mask = NULL; config->gpc_tpc_mask = NULL;
return err; return err;
} }
@@ -759,7 +769,8 @@ int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
return 0; return 0;
} }
u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config,
u32 gpc_index)
{ {
struct vgpu_priv_data *priv = vgpu_get_priv_data(g); struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
@@ -907,15 +918,11 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr)
gk20a_comptag_allocator_destroy(gr->g, &gr->comp_tags); gk20a_comptag_allocator_destroy(gr->g, &gr->comp_tags);
nvgpu_kfree(gr->g, gr->gpc_tpc_mask); nvgpu_gr_config_deinit(gr->g, gr->config);
gr->gpc_tpc_mask = NULL;
nvgpu_kfree(gr->g, gr->sm_to_cluster); nvgpu_kfree(gr->g, gr->sm_to_cluster);
gr->sm_to_cluster = NULL; gr->sm_to_cluster = NULL;
nvgpu_kfree(gr->g, gr->gpc_tpc_count);
gr->gpc_tpc_count = NULL;
nvgpu_kfree(gr->g, gr->fbp_rop_l2_en_mask); nvgpu_kfree(gr->g, gr->fbp_rop_l2_en_mask);
gr->fbp_rop_l2_en_mask = NULL; gr->fbp_rop_l2_en_mask = NULL;
} }
@@ -1353,6 +1360,7 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g)
struct sm_info *sm_info; struct sm_info *sm_info;
int err; int err;
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
struct nvgpu_gr_config *config = gr->config;
size_t oob_size; size_t oob_size;
void *handle = NULL; void *handle = NULL;
u32 sm_id; u32 sm_id;
@@ -1374,8 +1382,8 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g)
return -EINVAL; return -EINVAL;
} }
max_sm = gr->gpc_count * max_sm = config->gpc_count *
gr->max_tpc_per_gpc_count * config->max_tpc_per_gpc_count *
priv->constants.sm_per_tpc; priv->constants.sm_per_tpc;
if (p->num_sm > max_sm) { if (p->num_sm > max_sm) {
return -EINVAL; return -EINVAL;

View File

@@ -43,7 +43,8 @@ int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
u32 mode); u32 mode);
int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
struct gr_zcull_info *zcull_params); struct gr_zcull_info *zcull_params);
u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config,
u32 gpc_index);
u32 vgpu_gr_get_max_fbps_count(struct gk20a *g); u32 vgpu_gr_get_max_fbps_count(struct gk20a *g);
u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g); u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g);
u32 vgpu_gr_get_max_ltc_per_fbp(struct gk20a *g); u32 vgpu_gr_get_max_ltc_per_fbp(struct gk20a *g);

View File

@@ -143,7 +143,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.falcon_load_ucode = NULL, .falcon_load_ucode = NULL,
.load_ctxsw_ucode = NULL, .load_ctxsw_ucode = NULL,
.set_gpc_tpc_mask = NULL, .set_gpc_tpc_mask = NULL,
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
.get_zcull_info = vgpu_gr_get_zcull_info, .get_zcull_info = vgpu_gr_get_zcull_info,
@@ -351,6 +350,9 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.set_type_per_veid_header = .set_type_per_veid_header =
gv11b_ctxsw_prog_set_type_per_veid_header, gv11b_ctxsw_prog_set_type_per_veid_header,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
},
.config = {
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
} }
}, },
.fb = { .fb = {
@@ -736,6 +738,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g)
gops->ce2 = vgpu_gv11b_ops.ce2; gops->ce2 = vgpu_gv11b_ops.ce2;
gops->gr = vgpu_gv11b_ops.gr; gops->gr = vgpu_gv11b_ops.gr;
gops->gr.ctxsw_prog = vgpu_gv11b_ops.gr.ctxsw_prog; gops->gr.ctxsw_prog = vgpu_gv11b_ops.gr.ctxsw_prog;
gops->gr.config = vgpu_gv11b_ops.gr.config;
gops->fb = vgpu_gv11b_ops.fb; gops->fb = vgpu_gv11b_ops.fb;
gops->clock_gating = vgpu_gv11b_ops.clock_gating; gops->clock_gating = vgpu_gv11b_ops.clock_gating;
gops->fifo = vgpu_gv11b_ops.fifo; gops->fifo = vgpu_gv11b_ops.fifo;