mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: add new gr/config unit to initialize GR configuration
Add new unit gr/config to initialize GR configuration like GPC/TPC count, MAX count and mask Create new structure nvgpu_gr_config that stores all the configuration and that is owned by the new unit Move below fields from struct gr_gk20a to nvgpu_gr_config in gr/config.h Struct gr_gk20a now only holds the pointer to struct nvgpu_gr_config u32 max_gpc_count; u32 max_tpc_per_gpc_count; u32 max_zcull_per_gpc_count; u32 max_tpc_count; u32 gpc_count; u32 tpc_count; u32 ppc_count; u32 zcb_count; u32 pe_count_per_gpc; u32 *gpc_tpc_count; u32 *gpc_ppc_count; u32 *gpc_zcb_count; u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC]; u32 *gpc_tpc_mask; u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC]; u32 *gpc_skip_mask; u8 *map_tiles; u32 map_tile_count; u32 map_row_offset; Remove gr->sys_count since it was already no longer used common/gr/config/gr_config.c unit now exposes the APIs to initialize the configuration and also to query the configuration values nvgpu_gr_config_init() is called to initialize GR configuration from gr_gk20a_init_gr_config() and gr_gk20a_init_map_tiles() is simply renamed as nvgpu_gr_config_init_map_tiles() Expose new API nvgpu_gr_config_deinit() to deinit the configuration Expose nvgpu_gr_config_get_*() APIs to query above configuration fields stored in nvgpu_gr_config structure Update vgpu_gr_init_gr_config() to initialize the configuration from gr->config structure Chip specific HALs that access GR register for initialization are implemented in common/gr/config/gr_config_gm20b.c Set these HALs for all GPUs Jira NVGPU-1879 Change-Id: Ided658b43124ea61b9f273b82b73fdde4ed3c8f0 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2012167 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
e212e851a3
commit
a5eb150635
@@ -88,6 +88,8 @@ nvgpu-y += common/bus/bus_gk20a.o \
|
|||||||
common/gr/global_ctx.o \
|
common/gr/global_ctx.o \
|
||||||
common/gr/ctx.o \
|
common/gr/ctx.o \
|
||||||
common/gr/subctx.o \
|
common/gr/subctx.o \
|
||||||
|
common/gr/config/gr_config.o \
|
||||||
|
common/gr/config/gr_config_gm20b.o \
|
||||||
common/netlist/netlist.o \
|
common/netlist/netlist.o \
|
||||||
common/netlist/netlist_sim.o \
|
common/netlist/netlist_sim.o \
|
||||||
common/netlist/netlist_gm20b.o \
|
common/netlist/netlist_gm20b.o \
|
||||||
|
|||||||
@@ -130,6 +130,8 @@ srcs += common/sim.c \
|
|||||||
common/gr/global_ctx.c \
|
common/gr/global_ctx.c \
|
||||||
common/gr/subctx.c \
|
common/gr/subctx.c \
|
||||||
common/gr/ctx.c \
|
common/gr/ctx.c \
|
||||||
|
common/gr/config/gr_config.c \
|
||||||
|
common/gr/config/gr_config_gm20b.c \
|
||||||
common/netlist/netlist.c \
|
common/netlist/netlist.c \
|
||||||
common/netlist/netlist_sim.c \
|
common/netlist/netlist_sim.c \
|
||||||
common/netlist/netlist_gm20b.c \
|
common/netlist/netlist_gm20b.c \
|
||||||
|
|||||||
@@ -21,6 +21,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <nvgpu/gk20a.h>
|
#include <nvgpu/gk20a.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
|
|
||||||
static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat)
|
static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat)
|
||||||
{
|
{
|
||||||
@@ -44,16 +45,17 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
|
|||||||
{
|
{
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
struct nvgpu_ecc_stat **stats;
|
struct nvgpu_ecc_stat **stats;
|
||||||
|
u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
u32 gpc, tpc;
|
u32 gpc, tpc;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
|
stats = nvgpu_kzalloc(g, sizeof(*stats) * gpc_count);
|
||||||
if (stats == NULL) {
|
if (stats == NULL) {
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
|
for (gpc = 0; gpc < gpc_count; gpc++) {
|
||||||
stats[gpc] = nvgpu_kzalloc(g,
|
stats[gpc] = nvgpu_kzalloc(g, sizeof(*stats[gpc]) *
|
||||||
sizeof(*stats[gpc]) * gr->gpc_tpc_count[gpc]);
|
nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc));
|
||||||
if (stats[gpc] == NULL) {
|
if (stats[gpc] == NULL) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
break;
|
break;
|
||||||
@@ -69,8 +71,10 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
|
for (gpc = 0; gpc < gpc_count; gpc++) {
|
||||||
for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
|
for (tpc = 0;
|
||||||
|
tpc < nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc);
|
||||||
|
tpc++) {
|
||||||
(void) snprintf(stats[gpc][tpc].name,
|
(void) snprintf(stats[gpc][tpc].name,
|
||||||
NVGPU_ECC_STAT_NAME_MAX_SIZE,
|
NVGPU_ECC_STAT_NAME_MAX_SIZE,
|
||||||
"gpc%d_tpc%d_%s", gpc, tpc, name);
|
"gpc%d_tpc%d_%s", gpc, tpc, name);
|
||||||
@@ -87,13 +91,14 @@ int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
|
|||||||
{
|
{
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
struct nvgpu_ecc_stat *stats;
|
struct nvgpu_ecc_stat *stats;
|
||||||
|
u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
u32 gpc;
|
u32 gpc;
|
||||||
|
|
||||||
stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
|
stats = nvgpu_kzalloc(g, sizeof(*stats) * gpc_count);
|
||||||
if (stats == NULL) {
|
if (stats == NULL) {
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
|
for (gpc = 0; gpc < gpc_count; gpc++) {
|
||||||
(void) snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
|
(void) snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
|
||||||
"gpc%d_%s", gpc, name);
|
"gpc%d_%s", gpc, name);
|
||||||
nvgpu_ecc_stat_add(g, &stats[gpc]);
|
nvgpu_ecc_stat_add(g, &stats[gpc]);
|
||||||
@@ -189,9 +194,10 @@ void nvgpu_ecc_free(struct gk20a *g)
|
|||||||
{
|
{
|
||||||
struct nvgpu_ecc *ecc = &g->ecc;
|
struct nvgpu_ecc *ecc = &g->ecc;
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
u32 i;
|
u32 i;
|
||||||
|
|
||||||
for (i = 0; i < gr->gpc_count; i++) {
|
for (i = 0; i < gpc_count; i++) {
|
||||||
if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) {
|
if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) {
|
||||||
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]);
|
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]);
|
||||||
}
|
}
|
||||||
|
|||||||
540
drivers/gpu/nvgpu/common/gr/config/gr_config.c
Normal file
540
drivers/gpu/nvgpu/common/gr/config/gr_config.c
Normal file
@@ -0,0 +1,540 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <nvgpu/gk20a.h>
|
||||||
|
#include <nvgpu/io.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
|
|
||||||
|
#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
|
||||||
|
#include <nvgpu/hw/gm20b/hw_pri_ringmaster_gm20b.h>
|
||||||
|
|
||||||
|
struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct nvgpu_gr_config *config;
|
||||||
|
u32 gpc_index, pes_index;
|
||||||
|
u32 pes_tpc_mask;
|
||||||
|
u32 pes_tpc_count;
|
||||||
|
u32 pes_heavy_index;
|
||||||
|
u32 gpc_new_skip_mask;
|
||||||
|
u32 tmp;
|
||||||
|
|
||||||
|
config = nvgpu_kzalloc(g, sizeof(*config));
|
||||||
|
if (config == NULL) {
|
||||||
|
return NULL;;
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = nvgpu_readl(g, top_num_gpcs_r());
|
||||||
|
config->max_gpc_count = top_num_gpcs_value_v(tmp);
|
||||||
|
|
||||||
|
tmp = nvgpu_readl(g, top_tpc_per_gpc_r());
|
||||||
|
config->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
|
||||||
|
|
||||||
|
config->max_tpc_count = config->max_gpc_count *
|
||||||
|
config->max_tpc_per_gpc_count;
|
||||||
|
|
||||||
|
tmp = nvgpu_readl(g, pri_ringmaster_enum_gpc_r());
|
||||||
|
config->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
|
||||||
|
if (config->gpc_count == 0U) {
|
||||||
|
nvgpu_err(g, "gpc_count==0!");
|
||||||
|
goto clean_up;
|
||||||
|
}
|
||||||
|
|
||||||
|
config->pe_count_per_gpc = nvgpu_get_litter_value(g,
|
||||||
|
GPU_LIT_NUM_PES_PER_GPC);
|
||||||
|
if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) {
|
||||||
|
nvgpu_err(g, "too many pes per gpc");
|
||||||
|
goto clean_up;
|
||||||
|
}
|
||||||
|
|
||||||
|
config->max_zcull_per_gpc_count = nvgpu_get_litter_value(g,
|
||||||
|
GPU_LIT_NUM_ZCULL_BANKS);
|
||||||
|
|
||||||
|
config->gpc_tpc_count = nvgpu_kzalloc(g, config->gpc_count *
|
||||||
|
sizeof(u32));
|
||||||
|
config->gpc_tpc_mask = nvgpu_kzalloc(g, config->max_gpc_count *
|
||||||
|
sizeof(u32));
|
||||||
|
config->gpc_zcb_count = nvgpu_kzalloc(g, config->gpc_count *
|
||||||
|
sizeof(u32));
|
||||||
|
config->gpc_ppc_count = nvgpu_kzalloc(g, config->gpc_count *
|
||||||
|
sizeof(u32));
|
||||||
|
config->gpc_skip_mask = nvgpu_kzalloc(g,
|
||||||
|
(size_t)g->ops.gr.config.get_pd_dist_skip_table_size() *
|
||||||
|
(size_t)4 * sizeof(u32));
|
||||||
|
|
||||||
|
if ((config->gpc_tpc_count == NULL) || (config->gpc_tpc_mask == NULL) ||
|
||||||
|
(config->gpc_zcb_count == NULL) || (config->gpc_ppc_count == NULL) ||
|
||||||
|
(config->gpc_skip_mask == NULL)) {
|
||||||
|
goto clean_up;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (gpc_index = 0; gpc_index < config->max_gpc_count; gpc_index++) {
|
||||||
|
if (g->ops.gr.config.get_gpc_tpc_mask != NULL) {
|
||||||
|
config->gpc_tpc_mask[gpc_index] =
|
||||||
|
g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (pes_index = 0; pes_index < config->pe_count_per_gpc; pes_index++) {
|
||||||
|
config->pes_tpc_count[pes_index] = nvgpu_kzalloc(g,
|
||||||
|
config->gpc_count * sizeof(u32));
|
||||||
|
config->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g,
|
||||||
|
config->gpc_count * sizeof(u32));
|
||||||
|
if ((config->pes_tpc_count[pes_index] == NULL) ||
|
||||||
|
(config->pes_tpc_mask[pes_index] == NULL)) {
|
||||||
|
goto clean_up;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
config->ppc_count = 0;
|
||||||
|
config->tpc_count = 0;
|
||||||
|
config->zcb_count = 0;
|
||||||
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
|
config->gpc_tpc_count[gpc_index] =
|
||||||
|
g->ops.gr.config.get_tpc_count_in_gpc(g, config,
|
||||||
|
gpc_index);
|
||||||
|
config->tpc_count += config->gpc_tpc_count[gpc_index];
|
||||||
|
|
||||||
|
config->gpc_zcb_count[gpc_index] =
|
||||||
|
g->ops.gr.config.get_zcull_count_in_gpc(g, config,
|
||||||
|
gpc_index);
|
||||||
|
config->zcb_count += config->gpc_zcb_count[gpc_index];
|
||||||
|
|
||||||
|
for (pes_index = 0; pes_index < config->pe_count_per_gpc;
|
||||||
|
pes_index++) {
|
||||||
|
pes_tpc_mask = g->ops.gr.config.get_pes_tpc_mask(g,
|
||||||
|
config, gpc_index, pes_index);
|
||||||
|
pes_tpc_count = hweight32(pes_tpc_mask);
|
||||||
|
|
||||||
|
/* detect PES presence by seeing if there are
|
||||||
|
* TPCs connected to it.
|
||||||
|
*/
|
||||||
|
if (pes_tpc_count != 0U) {
|
||||||
|
config->gpc_ppc_count[gpc_index]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
config->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
|
||||||
|
config->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
config->ppc_count += config->gpc_ppc_count[gpc_index];
|
||||||
|
|
||||||
|
gpc_new_skip_mask = 0;
|
||||||
|
if (config->pe_count_per_gpc > 1U &&
|
||||||
|
config->pes_tpc_count[0][gpc_index] +
|
||||||
|
config->pes_tpc_count[1][gpc_index] == 5U) {
|
||||||
|
pes_heavy_index =
|
||||||
|
config->pes_tpc_count[0][gpc_index] >
|
||||||
|
config->pes_tpc_count[1][gpc_index] ? 0U : 1U;
|
||||||
|
|
||||||
|
gpc_new_skip_mask =
|
||||||
|
config->pes_tpc_mask[pes_heavy_index][gpc_index] ^
|
||||||
|
(config->pes_tpc_mask[pes_heavy_index][gpc_index] &
|
||||||
|
(config->pes_tpc_mask[pes_heavy_index][gpc_index] - 1U));
|
||||||
|
|
||||||
|
} else if (config->pe_count_per_gpc > 1U &&
|
||||||
|
(config->pes_tpc_count[0][gpc_index] +
|
||||||
|
config->pes_tpc_count[1][gpc_index] == 4U) &&
|
||||||
|
(config->pes_tpc_count[0][gpc_index] !=
|
||||||
|
config->pes_tpc_count[1][gpc_index])) {
|
||||||
|
pes_heavy_index =
|
||||||
|
config->pes_tpc_count[0][gpc_index] >
|
||||||
|
config->pes_tpc_count[1][gpc_index] ? 0U : 1U;
|
||||||
|
|
||||||
|
gpc_new_skip_mask =
|
||||||
|
config->pes_tpc_mask[pes_heavy_index][gpc_index] ^
|
||||||
|
(config->pes_tpc_mask[pes_heavy_index][gpc_index] &
|
||||||
|
(config->pes_tpc_mask[pes_heavy_index][gpc_index] - 1U));
|
||||||
|
}
|
||||||
|
config->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_log_info(g, "max_gpc_count: %d", config->max_gpc_count);
|
||||||
|
nvgpu_log_info(g, "max_tpc_per_gpc_count: %d", config->max_tpc_per_gpc_count);
|
||||||
|
nvgpu_log_info(g, "max_zcull_per_gpc_count: %d", config->max_zcull_per_gpc_count);
|
||||||
|
nvgpu_log_info(g, "max_tpc_count: %d", config->max_tpc_count);
|
||||||
|
nvgpu_log_info(g, "gpc_count: %d", config->gpc_count);
|
||||||
|
nvgpu_log_info(g, "pe_count_per_gpc: %d", config->pe_count_per_gpc);
|
||||||
|
nvgpu_log_info(g, "tpc_count: %d", config->tpc_count);
|
||||||
|
nvgpu_log_info(g, "ppc_count: %d", config->ppc_count);
|
||||||
|
|
||||||
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
|
nvgpu_log_info(g, "gpc_tpc_count[%d] : %d",
|
||||||
|
gpc_index, config->gpc_tpc_count[gpc_index]);
|
||||||
|
}
|
||||||
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
|
nvgpu_log_info(g, "gpc_zcb_count[%d] : %d",
|
||||||
|
gpc_index, config->gpc_zcb_count[gpc_index]);
|
||||||
|
}
|
||||||
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
|
nvgpu_log_info(g, "gpc_ppc_count[%d] : %d",
|
||||||
|
gpc_index, config->gpc_ppc_count[gpc_index]);
|
||||||
|
}
|
||||||
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
|
nvgpu_log_info(g, "gpc_skip_mask[%d] : %d",
|
||||||
|
gpc_index, config->gpc_skip_mask[gpc_index]);
|
||||||
|
}
|
||||||
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
|
for (pes_index = 0;
|
||||||
|
pes_index < config->pe_count_per_gpc;
|
||||||
|
pes_index++) {
|
||||||
|
nvgpu_log_info(g, "pes_tpc_count[%d][%d] : %d",
|
||||||
|
pes_index, gpc_index,
|
||||||
|
config->pes_tpc_count[pes_index][gpc_index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
|
for (pes_index = 0;
|
||||||
|
pes_index < config->pe_count_per_gpc;
|
||||||
|
pes_index++) {
|
||||||
|
nvgpu_log_info(g, "pes_tpc_mask[%d][%d] : %d",
|
||||||
|
pes_index, gpc_index,
|
||||||
|
config->pes_tpc_mask[pes_index][gpc_index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return config;
|
||||||
|
|
||||||
|
clean_up:
|
||||||
|
nvgpu_kfree(g, config);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 prime_set[18] = {
|
||||||
|
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 };
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return map tiles count for given index
|
||||||
|
* Return 0 if index is out-of-bounds
|
||||||
|
*/
|
||||||
|
u32 nvgpu_gr_config_get_map_tile_count(struct nvgpu_gr_config *config, u32 index)
|
||||||
|
{
|
||||||
|
if (index >= config->map_tile_count) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return config->map_tiles[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_map_row_offset(struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
return config->map_row_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvgpu_gr_config_init_map_tiles(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
s32 comm_denom;
|
||||||
|
s32 mul_factor;
|
||||||
|
s32 *init_frac = NULL;
|
||||||
|
s32 *init_err = NULL;
|
||||||
|
s32 *run_err = NULL;
|
||||||
|
u32 *sorted_num_tpcs = NULL;
|
||||||
|
u32 *sorted_to_unsorted_gpc_map = NULL;
|
||||||
|
u32 gpc_index;
|
||||||
|
u32 gpc_mark = 0;
|
||||||
|
u32 num_tpc;
|
||||||
|
u32 max_tpc_count = 0;
|
||||||
|
u32 swap;
|
||||||
|
u32 tile_count;
|
||||||
|
u32 index;
|
||||||
|
bool delete_map = false;
|
||||||
|
bool gpc_sorted;
|
||||||
|
int ret = 0;
|
||||||
|
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
|
||||||
|
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
|
||||||
|
u32 map_tile_count = num_gpcs * num_tpc_per_gpc;
|
||||||
|
|
||||||
|
init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
|
||||||
|
init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
|
||||||
|
run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
|
||||||
|
sorted_num_tpcs =
|
||||||
|
nvgpu_kzalloc(g, (size_t)num_gpcs *
|
||||||
|
(size_t)num_tpc_per_gpc *
|
||||||
|
sizeof(s32));
|
||||||
|
sorted_to_unsorted_gpc_map =
|
||||||
|
nvgpu_kzalloc(g, (size_t)num_gpcs * sizeof(s32));
|
||||||
|
|
||||||
|
if (!((init_frac != NULL) &&
|
||||||
|
(init_err != NULL) &&
|
||||||
|
(run_err != NULL) &&
|
||||||
|
(sorted_num_tpcs != NULL) &&
|
||||||
|
(sorted_to_unsorted_gpc_map != NULL))) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto clean_up;
|
||||||
|
}
|
||||||
|
|
||||||
|
config->map_row_offset = 0xFFFFFFFFU;
|
||||||
|
|
||||||
|
if (config->tpc_count == 3U) {
|
||||||
|
config->map_row_offset = 2;
|
||||||
|
} else if (config->tpc_count < 3U) {
|
||||||
|
config->map_row_offset = 1;
|
||||||
|
} else {
|
||||||
|
config->map_row_offset = 3;
|
||||||
|
|
||||||
|
for (index = 1U; index < 18U; index++) {
|
||||||
|
u32 prime = prime_set[index];
|
||||||
|
if ((config->tpc_count % prime) != 0U) {
|
||||||
|
config->map_row_offset = prime;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (config->tpc_count) {
|
||||||
|
case 15:
|
||||||
|
config->map_row_offset = 6;
|
||||||
|
break;
|
||||||
|
case 14:
|
||||||
|
config->map_row_offset = 5;
|
||||||
|
break;
|
||||||
|
case 13:
|
||||||
|
config->map_row_offset = 2;
|
||||||
|
break;
|
||||||
|
case 11:
|
||||||
|
config->map_row_offset = 7;
|
||||||
|
break;
|
||||||
|
case 10:
|
||||||
|
config->map_row_offset = 6;
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
case 5:
|
||||||
|
config->map_row_offset = 1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config->map_tiles != NULL) {
|
||||||
|
if (config->map_tile_count != config->tpc_count) {
|
||||||
|
delete_map = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (tile_count = 0; tile_count < config->map_tile_count; tile_count++) {
|
||||||
|
if (nvgpu_gr_config_get_map_tile_count(config, tile_count)
|
||||||
|
>= config->tpc_count) {
|
||||||
|
delete_map = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (delete_map) {
|
||||||
|
nvgpu_kfree(g, config->map_tiles);
|
||||||
|
config->map_tiles = NULL;
|
||||||
|
config->map_tile_count = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config->map_tiles == NULL) {
|
||||||
|
config->map_tiles = nvgpu_kzalloc(g, map_tile_count * sizeof(u8));
|
||||||
|
if (config->map_tiles == NULL) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto clean_up;
|
||||||
|
}
|
||||||
|
config->map_tile_count = map_tile_count;
|
||||||
|
|
||||||
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
|
sorted_num_tpcs[gpc_index] = config->gpc_tpc_count[gpc_index];
|
||||||
|
sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
gpc_sorted = false;
|
||||||
|
while (!gpc_sorted) {
|
||||||
|
gpc_sorted = true;
|
||||||
|
for (gpc_index = 0U; gpc_index < config->gpc_count - 1U; gpc_index++) {
|
||||||
|
if (sorted_num_tpcs[gpc_index + 1U] > sorted_num_tpcs[gpc_index]) {
|
||||||
|
gpc_sorted = false;
|
||||||
|
swap = sorted_num_tpcs[gpc_index];
|
||||||
|
sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1U];
|
||||||
|
sorted_num_tpcs[gpc_index + 1U] = swap;
|
||||||
|
swap = sorted_to_unsorted_gpc_map[gpc_index];
|
||||||
|
sorted_to_unsorted_gpc_map[gpc_index] =
|
||||||
|
sorted_to_unsorted_gpc_map[gpc_index + 1U];
|
||||||
|
sorted_to_unsorted_gpc_map[gpc_index + 1U] = swap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
|
if (config->gpc_tpc_count[gpc_index] > max_tpc_count) {
|
||||||
|
max_tpc_count = config->gpc_tpc_count[gpc_index];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mul_factor = S32(config->gpc_count) * S32(max_tpc_count);
|
||||||
|
if ((U32(mul_factor) & 0x1U) != 0U) {
|
||||||
|
mul_factor = 2;
|
||||||
|
} else {
|
||||||
|
mul_factor = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
comm_denom = S32(config->gpc_count) * S32(max_tpc_count) * mul_factor;
|
||||||
|
|
||||||
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
|
num_tpc = sorted_num_tpcs[gpc_index];
|
||||||
|
|
||||||
|
init_frac[gpc_index] = S32(num_tpc) * S32(config->gpc_count) * mul_factor;
|
||||||
|
|
||||||
|
if (num_tpc != 0U) {
|
||||||
|
init_err[gpc_index] = S32(gpc_index) * S32(max_tpc_count) * mul_factor - comm_denom/2;
|
||||||
|
} else {
|
||||||
|
init_err[gpc_index] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
while (gpc_mark < config->tpc_count) {
|
||||||
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
|
if ((run_err[gpc_index] * 2) >= comm_denom) {
|
||||||
|
config->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
|
||||||
|
run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
|
||||||
|
} else {
|
||||||
|
run_err[gpc_index] += init_frac[gpc_index];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clean_up:
|
||||||
|
nvgpu_kfree(g, init_frac);
|
||||||
|
nvgpu_kfree(g, init_err);
|
||||||
|
nvgpu_kfree(g, run_err);
|
||||||
|
nvgpu_kfree(g, sorted_num_tpcs);
|
||||||
|
nvgpu_kfree(g, sorted_to_unsorted_gpc_map);
|
||||||
|
|
||||||
|
if (ret != 0) {
|
||||||
|
nvgpu_err(g, "fail");
|
||||||
|
} else {
|
||||||
|
nvgpu_log_fn(g, "done");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
u32 index;
|
||||||
|
|
||||||
|
nvgpu_kfree(g, config->gpc_tpc_count);
|
||||||
|
nvgpu_kfree(g, config->gpc_zcb_count);
|
||||||
|
nvgpu_kfree(g, config->gpc_ppc_count);
|
||||||
|
nvgpu_kfree(g, config->gpc_skip_mask);
|
||||||
|
nvgpu_kfree(g, config->gpc_tpc_mask);
|
||||||
|
nvgpu_kfree(g, config->map_tiles);
|
||||||
|
for (index = 0U; index < config->pe_count_per_gpc;
|
||||||
|
index++) {
|
||||||
|
nvgpu_kfree(g, config->pes_tpc_count[index]);
|
||||||
|
nvgpu_kfree(g, config->pes_tpc_mask[index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_max_gpc_count(struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
return config->max_gpc_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_max_tpc_per_gpc_count(struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
return config->max_tpc_per_gpc_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_max_zcull_per_gpc_count(struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
return config->max_zcull_per_gpc_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_max_tpc_count(struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
return config->max_tpc_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_gpc_count(struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
return config->gpc_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_tpc_count(struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
return config->tpc_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
return config->ppc_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
return config->zcb_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
return config->pe_count_per_gpc;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index)
|
||||||
|
{
|
||||||
|
return config->gpc_ppc_count[gpc_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_gpc_tpc_count(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index)
|
||||||
|
{
|
||||||
|
if (gpc_index >= config->gpc_count) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return config->gpc_tpc_count[gpc_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_gpc_zcb_count(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index)
|
||||||
|
{
|
||||||
|
return config->gpc_zcb_count[gpc_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_pes_tpc_count(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index, u32 pes_index)
|
||||||
|
{
|
||||||
|
return config->pes_tpc_count[pes_index][gpc_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_gpc_tpc_mask(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index)
|
||||||
|
{
|
||||||
|
return config->gpc_tpc_mask[gpc_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index)
|
||||||
|
{
|
||||||
|
return config->gpc_skip_mask[gpc_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index, u32 pes_index)
|
||||||
|
{
|
||||||
|
return config->pes_tpc_mask[pes_index][gpc_index];
|
||||||
|
}
|
||||||
79
drivers/gpu/nvgpu/common/gr/config/gr_config_gm20b.c
Normal file
79
drivers/gpu/nvgpu/common/gr/config/gr_config_gm20b.c
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <nvgpu/gk20a.h>
|
||||||
|
#include <nvgpu/io.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
|
|
||||||
|
#include "gr_config_gm20b.h"
|
||||||
|
|
||||||
|
#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
|
||||||
|
|
||||||
|
u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config, u32 gpc_index)
|
||||||
|
{
|
||||||
|
u32 val;
|
||||||
|
|
||||||
|
/* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */
|
||||||
|
val = g->ops.fuse.fuse_status_opt_tpc_gpc(g, gpc_index);
|
||||||
|
|
||||||
|
return (~val) & (BIT32(config->max_tpc_per_gpc_count) - 1U);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config, u32 gpc_index)
|
||||||
|
{
|
||||||
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||||
|
u32 tmp;
|
||||||
|
|
||||||
|
tmp = nvgpu_readl(g, gr_gpc0_fs_gpc_r() + gpc_stride * gpc_index);
|
||||||
|
|
||||||
|
return gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_config_get_zcull_count_in_gpc(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config, u32 gpc_index)
|
||||||
|
{
|
||||||
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||||
|
u32 tmp;
|
||||||
|
|
||||||
|
tmp = nvgpu_readl(g, gr_gpc0_fs_gpc_r() + gpc_stride * gpc_index);
|
||||||
|
|
||||||
|
return gr_gpc0_fs_gpc_num_available_zculls_v(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_config_get_pes_tpc_mask(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index)
|
||||||
|
{
|
||||||
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||||
|
u32 tmp;
|
||||||
|
|
||||||
|
tmp = nvgpu_readl(g, gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
|
||||||
|
gpc_index * gpc_stride);
|
||||||
|
|
||||||
|
return gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_config_get_pd_dist_skip_table_size(void)
|
||||||
|
{
|
||||||
|
return gr_pd_dist_skip_table__size_1_v();
|
||||||
|
}
|
||||||
41
drivers/gpu/nvgpu/common/gr/config/gr_config_gm20b.h
Normal file
41
drivers/gpu/nvgpu/common/gr/config/gr_config_gm20b.h
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef NVGPU_GR_CONFIG_GM20B_H
|
||||||
|
#define NVGPU_GR_CONFIG_GM20B_H
|
||||||
|
|
||||||
|
#include <nvgpu/types.h>
|
||||||
|
|
||||||
|
struct gk20a;
|
||||||
|
struct nvgpu_gr_config;
|
||||||
|
|
||||||
|
u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config, u32 gpc_index);
|
||||||
|
u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config, u32 gpc_index);
|
||||||
|
u32 gm20b_gr_config_get_zcull_count_in_gpc(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config, u32 gpc_index);
|
||||||
|
u32 gm20b_gr_config_get_pes_tpc_mask(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index);
|
||||||
|
u32 gm20b_gr_config_get_pd_dist_skip_table_size(void);
|
||||||
|
|
||||||
|
#endif /* NVGPU_GR_CONFIG_GM20B_H */
|
||||||
@@ -26,6 +26,7 @@
|
|||||||
#include <nvgpu/enabled.h>
|
#include <nvgpu/enabled.h>
|
||||||
#include <nvgpu/io.h>
|
#include <nvgpu/io.h>
|
||||||
#include <nvgpu/utils.h>
|
#include <nvgpu/utils.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
|
|
||||||
#include "priv_ring_gm20b.h"
|
#include "priv_ring_gm20b.h"
|
||||||
|
|
||||||
@@ -80,7 +81,7 @@ void gm20b_priv_ring_isr(struct gk20a *g)
|
|||||||
gk20a_readl(g, pri_ringstation_sys_priv_error_code_r()));
|
gk20a_readl(g, pri_ringstation_sys_priv_error_code_r()));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
|
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
|
||||||
if ((status1 & BIT32(gpc)) != 0U) {
|
if ((status1 & BIT32(gpc)) != 0U) {
|
||||||
nvgpu_log(g, gpu_dbg_intr, "GPC%u write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x", gpc,
|
nvgpu_log(g, gpu_dbg_intr, "GPC%u write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x", gpc,
|
||||||
gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_adr_r() + gpc * gpc_priv_stride),
|
gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_adr_r() + gpc * gpc_priv_stride),
|
||||||
|
|||||||
@@ -27,6 +27,7 @@
|
|||||||
#include <nvgpu/enabled.h>
|
#include <nvgpu/enabled.h>
|
||||||
#include <nvgpu/io.h>
|
#include <nvgpu/io.h>
|
||||||
#include <nvgpu/utils.h>
|
#include <nvgpu/utils.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
#include <nvgpu/gk20a.h>
|
#include <nvgpu/gk20a.h>
|
||||||
|
|
||||||
#include <nvgpu/hw/gp10b/hw_pri_ringmaster_gp10b.h>
|
#include <nvgpu/hw/gp10b/hw_pri_ringmaster_gp10b.h>
|
||||||
@@ -157,7 +158,7 @@ void gp10b_priv_ring_isr(struct gk20a *g)
|
|||||||
|
|
||||||
if (status1 != 0U) {
|
if (status1 != 0U) {
|
||||||
gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_PRIV_STRIDE);
|
gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_PRIV_STRIDE);
|
||||||
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
|
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
|
||||||
offset = gpc * gpc_stride;
|
offset = gpc * gpc_stride;
|
||||||
if ((status1 & BIT32(gpc)) != 0U) {
|
if ((status1 & BIT32(gpc)) != 0U) {
|
||||||
error_info = gk20a_readl(g,
|
error_info = gk20a_readl(g,
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -36,14 +36,11 @@
|
|||||||
#define GR_IDLE_CHECK_MAX 200U /* usec */
|
#define GR_IDLE_CHECK_MAX 200U /* usec */
|
||||||
#define GR_FECS_POLL_INTERVAL 5U /* usec */
|
#define GR_FECS_POLL_INTERVAL 5U /* usec */
|
||||||
|
|
||||||
#define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFFU
|
|
||||||
#define INVALID_MAX_WAYS 0xFFFFFFFFU
|
#define INVALID_MAX_WAYS 0xFFFFFFFFU
|
||||||
|
|
||||||
#define GK20A_FECS_UCODE_IMAGE "fecs.bin"
|
#define GK20A_FECS_UCODE_IMAGE "fecs.bin"
|
||||||
#define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin"
|
#define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin"
|
||||||
|
|
||||||
#define GK20A_GR_MAX_PES_PER_GPC 3U
|
|
||||||
|
|
||||||
#define GK20A_TIMEOUT_FPGA 100000U /* 100 sec */
|
#define GK20A_TIMEOUT_FPGA 100000U /* 100 sec */
|
||||||
|
|
||||||
/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */
|
/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */
|
||||||
@@ -258,6 +255,7 @@ struct gr_gk20a {
|
|||||||
bool initialized;
|
bool initialized;
|
||||||
|
|
||||||
u32 num_fbps;
|
u32 num_fbps;
|
||||||
|
u32 max_fbps_count;
|
||||||
|
|
||||||
u32 max_comptag_lines;
|
u32 max_comptag_lines;
|
||||||
u32 compbit_backing_size;
|
u32 compbit_backing_size;
|
||||||
@@ -266,26 +264,6 @@ struct gr_gk20a {
|
|||||||
u32 cacheline_size;
|
u32 cacheline_size;
|
||||||
u32 gobs_per_comptagline_per_slice;
|
u32 gobs_per_comptagline_per_slice;
|
||||||
|
|
||||||
u32 max_gpc_count;
|
|
||||||
u32 max_fbps_count;
|
|
||||||
u32 max_tpc_per_gpc_count;
|
|
||||||
u32 max_zcull_per_gpc_count;
|
|
||||||
u32 max_tpc_count;
|
|
||||||
|
|
||||||
u32 sys_count;
|
|
||||||
u32 gpc_count;
|
|
||||||
u32 pe_count_per_gpc;
|
|
||||||
u32 ppc_count;
|
|
||||||
u32 *gpc_ppc_count;
|
|
||||||
u32 tpc_count;
|
|
||||||
u32 *gpc_tpc_count;
|
|
||||||
u32 *gpc_tpc_mask;
|
|
||||||
u32 zcb_count;
|
|
||||||
u32 *gpc_zcb_count;
|
|
||||||
u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
|
|
||||||
u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
|
|
||||||
u32 *gpc_skip_mask;
|
|
||||||
|
|
||||||
u32 bundle_cb_default_size;
|
u32 bundle_cb_default_size;
|
||||||
u32 min_gpm_fifo_depth;
|
u32 min_gpm_fifo_depth;
|
||||||
u32 bundle_cb_token_limit;
|
u32 bundle_cb_token_limit;
|
||||||
@@ -312,9 +290,7 @@ struct gr_gk20a {
|
|||||||
|
|
||||||
struct nvgpu_gr_ctx_desc *gr_ctx_desc;
|
struct nvgpu_gr_ctx_desc *gr_ctx_desc;
|
||||||
|
|
||||||
u8 *map_tiles;
|
struct nvgpu_gr_config *config;
|
||||||
u32 map_tile_count;
|
|
||||||
u32 map_row_offset;
|
|
||||||
|
|
||||||
u32 max_comptag_mem; /* max memory size (MB) for comptag */
|
u32 max_comptag_mem; /* max memory size (MB) for comptag */
|
||||||
struct compbit_store_desc compbit_store;
|
struct compbit_store_desc compbit_store;
|
||||||
@@ -565,7 +541,6 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
|
|||||||
u32 global_esr_mask, bool check_errors);
|
u32 global_esr_mask, bool check_errors);
|
||||||
void gk20a_gr_suspend_all_sms(struct gk20a *g,
|
void gk20a_gr_suspend_all_sms(struct gk20a *g,
|
||||||
u32 global_esr_mask, bool check_errors);
|
u32 global_esr_mask, bool check_errors);
|
||||||
u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
|
|
||||||
int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
|
int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
|
||||||
struct channel_gk20a *ch, u64 sms, bool enable);
|
struct channel_gk20a *ch, u64 sms, bool enable);
|
||||||
bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
|
bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
|
||||||
|
|||||||
@@ -32,6 +32,7 @@
|
|||||||
#include <nvgpu/gk20a.h>
|
#include <nvgpu/gk20a.h>
|
||||||
#include <nvgpu/channel.h>
|
#include <nvgpu/channel.h>
|
||||||
#include <nvgpu/gr/ctx.h>
|
#include <nvgpu/gr/ctx.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
|
|
||||||
#include "gk20a/gr_gk20a.h"
|
#include "gk20a/gr_gk20a.h"
|
||||||
|
|
||||||
@@ -109,11 +110,11 @@ u32 gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
|
|||||||
|
|
||||||
size = gr->attrib_cb_size *
|
size = gr->attrib_cb_size *
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
||||||
gr->max_tpc_count;
|
nvgpu_gr_config_get_max_tpc_count(gr->config);
|
||||||
|
|
||||||
size += gr->alpha_cb_size *
|
size += gr->alpha_cb_size *
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
|
||||||
gr->max_tpc_count;
|
nvgpu_gr_config_get_max_tpc_count(gr->config);
|
||||||
|
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
@@ -201,17 +202,23 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
|
gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
|
||||||
|
|
||||||
alpha_offset_in_chunk = attrib_offset_in_chunk +
|
alpha_offset_in_chunk = attrib_offset_in_chunk +
|
||||||
gr->tpc_count * gr->attrib_cb_size;
|
nvgpu_gr_config_get_tpc_count(gr->config) * gr->attrib_cb_size;
|
||||||
|
|
||||||
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
for (gpc_index = 0;
|
||||||
|
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
gpc_index++) {
|
||||||
u32 temp = gpc_stride * gpc_index;
|
u32 temp = gpc_stride * gpc_index;
|
||||||
u32 temp2 = num_pes_per_gpc * gpc_index;
|
u32 temp2 = num_pes_per_gpc * gpc_index;
|
||||||
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
|
for (ppc_index = 0;
|
||||||
|
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config,
|
||||||
|
gpc_index);
|
||||||
ppc_index++) {
|
ppc_index++) {
|
||||||
cbm_cfg_size1 = gr->attrib_cb_default_size *
|
cbm_cfg_size1 = gr->attrib_cb_default_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index];
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index);
|
||||||
cbm_cfg_size2 = gr->alpha_cb_default_size *
|
cbm_cfg_size2 = gr->alpha_cb_default_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index];
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index);
|
||||||
|
|
||||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
|
gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
|
||||||
@@ -224,7 +231,8 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
attrib_offset_in_chunk, patch);
|
attrib_offset_in_chunk, patch);
|
||||||
|
|
||||||
attrib_offset_in_chunk += gr->attrib_cb_size *
|
attrib_offset_in_chunk += gr->attrib_cb_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index];
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index);
|
||||||
|
|
||||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
|
gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
|
||||||
@@ -237,7 +245,8 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
alpha_offset_in_chunk, patch);
|
alpha_offset_in_chunk, patch);
|
||||||
|
|
||||||
alpha_offset_in_chunk += gr->alpha_cb_size *
|
alpha_offset_in_chunk += gr->alpha_cb_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index];
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index);
|
||||||
|
|
||||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||||
gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
|
gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
|
||||||
@@ -348,11 +357,14 @@ void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
|
gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
|
||||||
gr_pd_ab_dist_cfg1_max_batches_init_f());
|
gr_pd_ab_dist_cfg1_max_batches_init_f());
|
||||||
|
|
||||||
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
for (gpc_index = 0;
|
||||||
|
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
gpc_index++) {
|
||||||
stride = gpc_stride * gpc_index;
|
stride = gpc_stride * gpc_index;
|
||||||
|
|
||||||
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
|
for (ppc_index = 0;
|
||||||
ppc_index++) {
|
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
|
||||||
|
ppc_index++) {
|
||||||
|
|
||||||
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
|
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
|
||||||
stride +
|
stride +
|
||||||
@@ -360,7 +372,8 @@ void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
|
|
||||||
val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
|
val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index]));
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index)));
|
||||||
|
|
||||||
gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
|
gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
|
||||||
stride +
|
stride +
|
||||||
@@ -388,11 +401,14 @@ void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) |
|
~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) |
|
||||||
gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
|
gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
|
||||||
|
|
||||||
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
for (gpc_index = 0;
|
||||||
|
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
gpc_index++) {
|
||||||
stride = gpc_stride * gpc_index;
|
stride = gpc_stride * gpc_index;
|
||||||
|
|
||||||
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
|
for (ppc_index = 0;
|
||||||
ppc_index++) {
|
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
|
||||||
|
ppc_index++) {
|
||||||
|
|
||||||
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
||||||
stride +
|
stride +
|
||||||
@@ -401,7 +417,8 @@ void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
val = set_field(val,
|
val = set_field(val,
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index]));
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index)));
|
||||||
|
|
||||||
gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
||||||
stride +
|
stride +
|
||||||
@@ -413,11 +430,11 @@ void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
val = set_field(val,
|
val = set_field(val,
|
||||||
gr_gpcs_swdx_tc_beta_cb_size_v_m(),
|
gr_gpcs_swdx_tc_beta_cb_size_v_m(),
|
||||||
gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size *
|
gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size *
|
||||||
gr->gpc_ppc_count[gpc_index]));
|
nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index)));
|
||||||
val = set_field(val,
|
val = set_field(val,
|
||||||
gr_gpcs_swdx_tc_beta_cb_size_div3_m(),
|
gr_gpcs_swdx_tc_beta_cb_size_div3_m(),
|
||||||
gr_gpcs_swdx_tc_beta_cb_size_div3_f((cb_size *
|
gr_gpcs_swdx_tc_beta_cb_size_div3_f((cb_size *
|
||||||
gr->gpc_ppc_count[gpc_index])/3U));
|
nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index))/3U));
|
||||||
|
|
||||||
gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
|
gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
|
||||||
ppc_index + gpc_index), val);
|
ppc_index + gpc_index), val);
|
||||||
@@ -554,18 +571,7 @@ u32 gr_gm20b_get_gpc_mask(struct gk20a *g)
|
|||||||
*/
|
*/
|
||||||
val = g->ops.fuse.fuse_status_opt_gpc(g);
|
val = g->ops.fuse.fuse_status_opt_gpc(g);
|
||||||
|
|
||||||
return (~val) & (BIT32(gr->max_gpc_count) - 1U);
|
return (~val) & (BIT32(nvgpu_gr_config_get_max_gpc_count(gr->config)) - 1U);
|
||||||
}
|
|
||||||
|
|
||||||
u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
|
||||||
{
|
|
||||||
u32 val;
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
|
|
||||||
/* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */
|
|
||||||
val = g->ops.fuse.fuse_status_opt_tpc_gpc(g, gpc_index);
|
|
||||||
|
|
||||||
return (~val) & (BIT32(gr->max_tpc_per_gpc_count) - 1U);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
||||||
@@ -573,10 +579,11 @@ void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
|||||||
nvgpu_tegra_fuse_write_bypass(g, 0x1);
|
nvgpu_tegra_fuse_write_bypass(g, 0x1);
|
||||||
nvgpu_tegra_fuse_write_access_sw(g, 0x0);
|
nvgpu_tegra_fuse_write_access_sw(g, 0x0);
|
||||||
|
|
||||||
if (g->gr.gpc_tpc_mask[gpc_index] == 0x1U) {
|
if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == 0x1U) {
|
||||||
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
|
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
|
||||||
nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x1);
|
nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x1);
|
||||||
} else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2U) {
|
} else if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) ==
|
||||||
|
0x2U) {
|
||||||
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1);
|
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1);
|
||||||
nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0);
|
nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0);
|
||||||
} else {
|
} else {
|
||||||
@@ -590,20 +597,24 @@ void gr_gm20b_load_tpc_mask(struct gk20a *g)
|
|||||||
u32 pes_tpc_mask = 0, fuse_tpc_mask;
|
u32 pes_tpc_mask = 0, fuse_tpc_mask;
|
||||||
u32 gpc, pes;
|
u32 gpc, pes;
|
||||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
|
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
|
||||||
|
u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
||||||
|
|
||||||
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
|
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
|
||||||
for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) {
|
for (pes = 0;
|
||||||
pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] <<
|
pes < nvgpu_gr_config_get_pe_count_per_gpc(g->gr.config);
|
||||||
|
pes++) {
|
||||||
|
pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
|
||||||
|
g->gr.config, gpc, pes) <<
|
||||||
num_tpc_per_gpc * gpc;
|
num_tpc_per_gpc * gpc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0);
|
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, g->gr.config, 0);
|
||||||
if ((g->tpc_fs_mask_user != 0U) &&
|
if ((g->tpc_fs_mask_user != 0U) &&
|
||||||
(g->tpc_fs_mask_user != fuse_tpc_mask) &&
|
(g->tpc_fs_mask_user != fuse_tpc_mask) &&
|
||||||
(fuse_tpc_mask == BIT32(g->gr.max_tpc_count) - U32(1))) {
|
(fuse_tpc_mask == BIT32(max_tpc_count) - U32(1))) {
|
||||||
u32 val = g->tpc_fs_mask_user;
|
u32 val = g->tpc_fs_mask_user;
|
||||||
val &= BIT32(g->gr.max_tpc_count) - U32(1);
|
val &= BIT32(max_tpc_count) - U32(1);
|
||||||
/* skip tpc to disable the other tpc cause channel timeout */
|
/* skip tpc to disable the other tpc cause channel timeout */
|
||||||
val = BIT32(hweight32(val)) - U32(1);
|
val = BIT32(hweight32(val)) - U32(1);
|
||||||
gk20a_writel(g, gr_fe_tpc_fs_r(), val);
|
gk20a_writel(g, gr_fe_tpc_fs_r(), val);
|
||||||
@@ -640,7 +651,9 @@ int gr_gm20b_load_smid_config(struct gk20a *g)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
|
/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
|
||||||
for (i = 0U; i <= ((g->gr.tpc_count-1U) / 4U); i++) {
|
for (i = 0U;
|
||||||
|
i <= ((nvgpu_gr_config_get_tpc_count(g->gr.config) - 1U) / 4U);
|
||||||
|
i++) {
|
||||||
u32 reg = 0;
|
u32 reg = 0;
|
||||||
u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
|
u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
|
||||||
gr_cwd_gpc_tpc_id_tpc0_s();
|
gr_cwd_gpc_tpc_id_tpc0_s();
|
||||||
@@ -649,7 +662,7 @@ int gr_gm20b_load_smid_config(struct gk20a *g)
|
|||||||
u32 sm_id = (i * 4U) + j;
|
u32 sm_id = (i * 4U) + j;
|
||||||
u32 bits;
|
u32 bits;
|
||||||
|
|
||||||
if (sm_id >= g->gr.tpc_count) {
|
if (sm_id >= nvgpu_gr_config_get_tpc_count(g->gr.config)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -959,7 +972,7 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
|
|||||||
gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
|
gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
|
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
|
||||||
if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) {
|
if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
|
gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
|
||||||
}
|
}
|
||||||
@@ -975,7 +988,7 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
|
|||||||
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
|
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
|
gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
|
||||||
if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) {
|
if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
|
gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -79,7 +79,6 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
|
|||||||
u32 *num_sm_dsm_perf_ctrl_regs,
|
u32 *num_sm_dsm_perf_ctrl_regs,
|
||||||
u32 **sm_dsm_perf_ctrl_regs,
|
u32 **sm_dsm_perf_ctrl_regs,
|
||||||
u32 *ctrl_register_stride);
|
u32 *ctrl_register_stride);
|
||||||
u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
|
|
||||||
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
|
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
|
||||||
u32 gr_gm20b_get_gpc_mask(struct gk20a *g);
|
u32 gr_gm20b_get_gpc_mask(struct gk20a *g);
|
||||||
void gr_gm20b_load_tpc_mask(struct gk20a *g);
|
void gr_gm20b_load_tpc_mask(struct gk20a *g);
|
||||||
|
|||||||
@@ -44,6 +44,7 @@
|
|||||||
#include "common/fb/fb_gm20b.h"
|
#include "common/fb/fb_gm20b.h"
|
||||||
#include "common/netlist/netlist_gm20b.h"
|
#include "common/netlist/netlist_gm20b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||||
|
#include "common/gr/config/gr_config_gm20b.h"
|
||||||
#include "common/therm/therm_gm20b.h"
|
#include "common/therm/therm_gm20b.h"
|
||||||
#include "common/ltc/ltc_gm20b.h"
|
#include "common/ltc/ltc_gm20b.h"
|
||||||
#include "common/fuse/fuse_gm20b.h"
|
#include "common/fuse/fuse_gm20b.h"
|
||||||
@@ -241,7 +242,6 @@ static const struct gpu_ops gm20b_ops = {
|
|||||||
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
||||||
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
|
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
|
||||||
.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
|
.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
|
||||||
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
|
|
||||||
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
||||||
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
||||||
.get_zcull_info = gr_gk20a_get_zcull_info,
|
.get_zcull_info = gr_gk20a_get_zcull_info,
|
||||||
@@ -404,6 +404,16 @@ static const struct gpu_ops gm20b_ops = {
|
|||||||
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
|
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
|
||||||
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
|
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
|
||||||
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
|
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
|
||||||
|
},
|
||||||
|
.config = {
|
||||||
|
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
|
||||||
|
.get_tpc_count_in_gpc =
|
||||||
|
gm20b_gr_config_get_tpc_count_in_gpc,
|
||||||
|
.get_zcull_count_in_gpc =
|
||||||
|
gm20b_gr_config_get_zcull_count_in_gpc,
|
||||||
|
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
|
||||||
|
.get_pd_dist_skip_table_size =
|
||||||
|
gm20b_gr_config_get_pd_dist_skip_table_size,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
.fb = {
|
.fb = {
|
||||||
@@ -788,6 +798,7 @@ int gm20b_init_hal(struct gk20a *g)
|
|||||||
gops->ce2 = gm20b_ops.ce2;
|
gops->ce2 = gm20b_ops.ce2;
|
||||||
gops->gr = gm20b_ops.gr;
|
gops->gr = gm20b_ops.gr;
|
||||||
gops->gr.ctxsw_prog = gm20b_ops.gr.ctxsw_prog;
|
gops->gr.ctxsw_prog = gm20b_ops.gr.ctxsw_prog;
|
||||||
|
gops->gr.config = gm20b_ops.gr.config;
|
||||||
gops->fb = gm20b_ops.fb;
|
gops->fb = gm20b_ops.fb;
|
||||||
gops->clock_gating = gm20b_ops.clock_gating;
|
gops->clock_gating = gm20b_ops.clock_gating;
|
||||||
gops->fifo = gm20b_ops.fifo;
|
gops->fifo = gm20b_ops.fifo;
|
||||||
|
|||||||
@@ -25,6 +25,7 @@
|
|||||||
#include <nvgpu/dma.h>
|
#include <nvgpu/dma.h>
|
||||||
#include <nvgpu/gk20a.h>
|
#include <nvgpu/gk20a.h>
|
||||||
#include <nvgpu/gr/ctx.h>
|
#include <nvgpu/gr/ctx.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
|
|
||||||
#include "gk20a/gr_gk20a.h"
|
#include "gk20a/gr_gk20a.h"
|
||||||
#include "gm20b/gr_gm20b.h"
|
#include "gm20b/gr_gm20b.h"
|
||||||
@@ -182,7 +183,7 @@ int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
|
|||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||||
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
|
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
||||||
g->gr.max_tpc_count;
|
nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
||||||
attrib_cb_size = ALIGN(attrib_cb_size, 128);
|
attrib_cb_size = ALIGN(attrib_cb_size, 128);
|
||||||
|
|
||||||
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);
|
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);
|
||||||
|
|||||||
@@ -38,6 +38,7 @@
|
|||||||
#include <nvgpu/regops.h>
|
#include <nvgpu/regops.h>
|
||||||
#include <nvgpu/gr/subctx.h>
|
#include <nvgpu/gr/subctx.h>
|
||||||
#include <nvgpu/gr/ctx.h>
|
#include <nvgpu/gr/ctx.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
|
|
||||||
#include "gk20a/gr_gk20a.h"
|
#include "gk20a/gr_gk20a.h"
|
||||||
|
|
||||||
@@ -437,19 +438,25 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
attrib_offset_in_chunk = alpha_offset_in_chunk +
|
attrib_offset_in_chunk = alpha_offset_in_chunk +
|
||||||
gr->tpc_count * gr->alpha_cb_size;
|
nvgpu_gr_config_get_tpc_count(gr->config) * gr->alpha_cb_size;
|
||||||
|
|
||||||
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
for (gpc_index = 0;
|
||||||
|
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
gpc_index++) {
|
||||||
temp = gpc_stride * gpc_index;
|
temp = gpc_stride * gpc_index;
|
||||||
temp2 = num_pes_per_gpc * gpc_index;
|
temp2 = num_pes_per_gpc * gpc_index;
|
||||||
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
|
for (ppc_index = 0;
|
||||||
|
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
|
||||||
ppc_index++) {
|
ppc_index++) {
|
||||||
cbm_cfg_size_beta = cb_attrib_cache_size_init *
|
cbm_cfg_size_beta = cb_attrib_cache_size_init *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index];
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index);
|
||||||
cbm_cfg_size_alpha = gr->alpha_cb_default_size *
|
cbm_cfg_size_alpha = gr->alpha_cb_default_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index];
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index);
|
||||||
cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
|
cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index];
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index);
|
||||||
|
|
||||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
|
gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
|
||||||
@@ -468,7 +475,8 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
patch);
|
patch);
|
||||||
|
|
||||||
attrib_offset_in_chunk += attrib_size_in_chunk *
|
attrib_offset_in_chunk += attrib_size_in_chunk *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index];
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index);
|
||||||
|
|
||||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
|
gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
|
||||||
@@ -481,7 +489,8 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
alpha_offset_in_chunk, patch);
|
alpha_offset_in_chunk, patch);
|
||||||
|
|
||||||
alpha_offset_in_chunk += gr->alpha_cb_size *
|
alpha_offset_in_chunk += gr->alpha_cb_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index];
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index);
|
||||||
|
|
||||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||||
gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
|
gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
|
||||||
@@ -594,17 +603,19 @@ u32 gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
|
|||||||
gr->alpha_cb_size = gr->alpha_cb_default_size;
|
gr->alpha_cb_size = gr->alpha_cb_default_size;
|
||||||
|
|
||||||
gr->attrib_cb_size = min(gr->attrib_cb_size,
|
gr->attrib_cb_size = min(gr->attrib_cb_size,
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / g->gr.tpc_count);
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) /
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
gr->alpha_cb_size = min(gr->alpha_cb_size,
|
gr->alpha_cb_size = min(gr->alpha_cb_size,
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / g->gr.tpc_count);
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) /
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
|
|
||||||
size = gr->attrib_cb_size *
|
size = gr->attrib_cb_size *
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
||||||
gr->max_tpc_count;
|
nvgpu_gr_config_get_max_tpc_count(gr->config);
|
||||||
|
|
||||||
size += gr->alpha_cb_size *
|
size += gr->alpha_cb_size *
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
|
||||||
gr->max_tpc_count;
|
nvgpu_gr_config_get_max_tpc_count(gr->config);
|
||||||
|
|
||||||
size = ALIGN(size, 128);
|
size = ALIGN(size, 128);
|
||||||
|
|
||||||
@@ -786,11 +797,14 @@ void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
gr_pd_ab_dist_cfg1_max_batches_init_f());
|
gr_pd_ab_dist_cfg1_max_batches_init_f());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
for (gpc_index = 0;
|
||||||
|
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
gpc_index++) {
|
||||||
stride = gpc_stride * gpc_index;
|
stride = gpc_stride * gpc_index;
|
||||||
|
|
||||||
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
|
for (ppc_index = 0;
|
||||||
ppc_index++) {
|
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
|
||||||
|
ppc_index++) {
|
||||||
|
|
||||||
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
|
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
|
||||||
stride +
|
stride +
|
||||||
@@ -798,7 +812,8 @@ void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
|
|
||||||
val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
|
val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index]));
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index)));
|
||||||
|
|
||||||
gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
|
gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
|
||||||
stride +
|
stride +
|
||||||
@@ -835,11 +850,14 @@ void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) |
|
~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) |
|
||||||
gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
|
gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
|
||||||
|
|
||||||
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
for (gpc_index = 0;
|
||||||
|
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
gpc_index++) {
|
||||||
stride = gpc_stride * gpc_index;
|
stride = gpc_stride * gpc_index;
|
||||||
|
|
||||||
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
|
for (ppc_index = 0;
|
||||||
ppc_index++) {
|
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
|
||||||
|
ppc_index++) {
|
||||||
|
|
||||||
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
||||||
stride +
|
stride +
|
||||||
@@ -848,7 +866,8 @@ void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
val = set_field(val,
|
val = set_field(val,
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index]));
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index)));
|
||||||
|
|
||||||
gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
||||||
stride +
|
stride +
|
||||||
@@ -867,7 +886,7 @@ void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
gr_gpcs_swdx_tc_beta_cb_size_v_m(),
|
gr_gpcs_swdx_tc_beta_cb_size_v_m(),
|
||||||
gr_gpcs_swdx_tc_beta_cb_size_v_f(
|
gr_gpcs_swdx_tc_beta_cb_size_v_f(
|
||||||
cb_size_steady *
|
cb_size_steady *
|
||||||
gr->gpc_ppc_count[gpc_index]));
|
nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index)));
|
||||||
|
|
||||||
gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
|
gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
|
||||||
ppc_index + gpc_index), val);
|
ppc_index + gpc_index), val);
|
||||||
@@ -965,7 +984,7 @@ int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
|
|||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||||
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
|
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
||||||
g->gr.max_tpc_count;
|
nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
||||||
attrib_cb_size = ALIGN(attrib_cb_size, 128);
|
attrib_cb_size = ALIGN(attrib_cb_size, 128);
|
||||||
|
|
||||||
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);
|
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);
|
||||||
@@ -1211,7 +1230,7 @@ int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
|
|||||||
gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
|
gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
|
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
|
||||||
if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) {
|
if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
|
gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
|
||||||
}
|
}
|
||||||
@@ -1227,7 +1246,7 @@ int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
|
|||||||
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
|
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
|
gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
|
||||||
if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) {
|
if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
|
gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
|
||||||
}
|
}
|
||||||
@@ -1435,7 +1454,9 @@ int gr_gp10b_load_smid_config(struct gk20a *g)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
|
/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
|
||||||
for (i = 0U; i <= ((g->gr.tpc_count-1U) / 4U); i++) {
|
for (i = 0U;
|
||||||
|
i <= ((nvgpu_gr_config_get_tpc_count(g->gr.config) - 1U) / 4U);
|
||||||
|
i++) {
|
||||||
u32 reg = 0;
|
u32 reg = 0;
|
||||||
u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
|
u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
|
||||||
gr_cwd_gpc_tpc_id_tpc0_s();
|
gr_cwd_gpc_tpc_id_tpc0_s();
|
||||||
@@ -1444,7 +1465,7 @@ int gr_gp10b_load_smid_config(struct gk20a *g)
|
|||||||
u32 sm_id = (i * 4U) + j;
|
u32 sm_id = (i * 4U) + j;
|
||||||
u32 bits;
|
u32 bits;
|
||||||
|
|
||||||
if (sm_id >= g->gr.tpc_count) {
|
if (sm_id >= nvgpu_gr_config_get_tpc_count(g->gr.config)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1500,9 +1521,10 @@ void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
|||||||
nvgpu_tegra_fuse_write_bypass(g, 0x1);
|
nvgpu_tegra_fuse_write_bypass(g, 0x1);
|
||||||
nvgpu_tegra_fuse_write_access_sw(g, 0x0);
|
nvgpu_tegra_fuse_write_access_sw(g, 0x0);
|
||||||
|
|
||||||
if (g->gr.gpc_tpc_mask[gpc_index] == 0x1U) {
|
if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == 0x1U) {
|
||||||
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x2);
|
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x2);
|
||||||
} else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2U) {
|
} else if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) ==
|
||||||
|
0x2U) {
|
||||||
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1);
|
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1);
|
||||||
} else {
|
} else {
|
||||||
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
|
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
|
||||||
|
|||||||
@@ -49,6 +49,7 @@
|
|||||||
#include "common/netlist/netlist_gp10b.h"
|
#include "common/netlist/netlist_gp10b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||||
|
#include "common/gr/config/gr_config_gm20b.h"
|
||||||
#include "common/therm/therm_gm20b.h"
|
#include "common/therm/therm_gm20b.h"
|
||||||
#include "common/therm/therm_gp10b.h"
|
#include "common/therm/therm_gp10b.h"
|
||||||
#include "common/ltc/ltc_gm20b.h"
|
#include "common/ltc/ltc_gm20b.h"
|
||||||
@@ -261,7 +262,6 @@ static const struct gpu_ops gp10b_ops = {
|
|||||||
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
||||||
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
|
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
|
||||||
.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
|
.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
|
||||||
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
|
|
||||||
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
||||||
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
||||||
.get_zcull_info = gr_gk20a_get_zcull_info,
|
.get_zcull_info = gr_gk20a_get_zcull_info,
|
||||||
@@ -451,6 +451,16 @@ static const struct gpu_ops gp10b_ops = {
|
|||||||
.set_full_preemption_ptr =
|
.set_full_preemption_ptr =
|
||||||
gp10b_ctxsw_prog_set_full_preemption_ptr,
|
gp10b_ctxsw_prog_set_full_preemption_ptr,
|
||||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||||
|
},
|
||||||
|
.config = {
|
||||||
|
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
|
||||||
|
.get_tpc_count_in_gpc =
|
||||||
|
gm20b_gr_config_get_tpc_count_in_gpc,
|
||||||
|
.get_zcull_count_in_gpc =
|
||||||
|
gm20b_gr_config_get_zcull_count_in_gpc,
|
||||||
|
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
|
||||||
|
.get_pd_dist_skip_table_size =
|
||||||
|
gm20b_gr_config_get_pd_dist_skip_table_size,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
.fb = {
|
.fb = {
|
||||||
@@ -869,6 +879,7 @@ int gp10b_init_hal(struct gk20a *g)
|
|||||||
gops->ce2 = gp10b_ops.ce2;
|
gops->ce2 = gp10b_ops.ce2;
|
||||||
gops->gr = gp10b_ops.gr;
|
gops->gr = gp10b_ops.gr;
|
||||||
gops->gr.ctxsw_prog = gp10b_ops.gr.ctxsw_prog;
|
gops->gr.ctxsw_prog = gp10b_ops.gr.ctxsw_prog;
|
||||||
|
gops->gr.config = gp10b_ops.gr.config;
|
||||||
gops->fb = gp10b_ops.fb;
|
gops->fb = gp10b_ops.fb;
|
||||||
gops->clock_gating = gp10b_ops.clock_gating;
|
gops->clock_gating = gp10b_ops.clock_gating;
|
||||||
gops->fifo = gp10b_ops.fifo;
|
gops->fifo = gp10b_ops.fifo;
|
||||||
|
|||||||
@@ -28,6 +28,7 @@
|
|||||||
#include <nvgpu/io.h>
|
#include <nvgpu/io.h>
|
||||||
#include <nvgpu/gk20a.h>
|
#include <nvgpu/gk20a.h>
|
||||||
#include <nvgpu/gr/ctx.h>
|
#include <nvgpu/gr/ctx.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
|
|
||||||
#include "gk20a/gr_gk20a.h"
|
#include "gk20a/gr_gk20a.h"
|
||||||
#include "gk20a/gr_pri_gk20a.h"
|
#include "gk20a/gr_pri_gk20a.h"
|
||||||
@@ -81,7 +82,9 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
|
/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
|
||||||
for (gpc_id = 0; gpc_id < gr->gpc_count; gpc_id++) {
|
for (gpc_id = 0;
|
||||||
|
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
gpc_id++) {
|
||||||
num_tpc_mask = gpc_tpc_mask[gpc_id];
|
num_tpc_mask = gpc_tpc_mask[gpc_id];
|
||||||
|
|
||||||
if ((gpc_id == disable_gpc_id) &&
|
if ((gpc_id == disable_gpc_id) &&
|
||||||
@@ -110,16 +113,19 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
|
|||||||
* ratio represents relative throughput of the GPC
|
* ratio represents relative throughput of the GPC
|
||||||
*/
|
*/
|
||||||
scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] /
|
scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] /
|
||||||
gr->gpc_tpc_count[gpc_id];
|
nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id);
|
||||||
|
|
||||||
if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) {
|
if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) {
|
||||||
min_scg_gpc_pix_perf = scg_gpc_pix_perf;
|
min_scg_gpc_pix_perf = scg_gpc_pix_perf;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Calculate # of surviving PES */
|
/* Calculate # of surviving PES */
|
||||||
for (pes_id = 0; pes_id < gr->gpc_ppc_count[gpc_id]; pes_id++) {
|
for (pes_id = 0;
|
||||||
|
pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_id);
|
||||||
|
pes_id++) {
|
||||||
/* Count the number of TPC on the set */
|
/* Count the number of TPC on the set */
|
||||||
num_tpc_mask = gr->pes_tpc_mask[pes_id][gpc_id] &
|
num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask(
|
||||||
|
gr->config, gpc_id, pes_id) &
|
||||||
gpc_tpc_mask[gpc_id];
|
gpc_tpc_mask[gpc_id];
|
||||||
|
|
||||||
if ((gpc_id == disable_gpc_id) &&
|
if ((gpc_id == disable_gpc_id) &&
|
||||||
@@ -149,10 +155,14 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Now calculate perf */
|
/* Now calculate perf */
|
||||||
scg_world_perf = (scale_factor * scg_num_pes) / gr->ppc_count;
|
scg_world_perf = (scale_factor * scg_num_pes) /
|
||||||
|
nvgpu_gr_config_get_ppc_count(gr->config);
|
||||||
deviation = 0;
|
deviation = 0;
|
||||||
average_tpcs = scale_factor * average_tpcs / gr->gpc_count;
|
average_tpcs = scale_factor * average_tpcs /
|
||||||
for (gpc_id =0; gpc_id < gr->gpc_count; gpc_id++) {
|
nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
for (gpc_id =0;
|
||||||
|
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
gpc_id++) {
|
||||||
diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id];
|
diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id];
|
||||||
if (diff < 0) {
|
if (diff < 0) {
|
||||||
diff = -diff;
|
diff = -diff;
|
||||||
@@ -160,7 +170,7 @@ static int gr_gv100_scg_estimate_perf(struct gk20a *g,
|
|||||||
deviation += U32(diff);
|
deviation += U32(diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
deviation /= gr->gpc_count;
|
deviation /= nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
|
||||||
norm_tpc_deviation = deviation / max_tpc_gpc;
|
norm_tpc_deviation = deviation / max_tpc_gpc;
|
||||||
|
|
||||||
@@ -216,14 +226,17 @@ int gr_gv100_init_sm_id_table(struct gk20a *g)
|
|||||||
u32 gpc, sm, pes, gtpc;
|
u32 gpc, sm, pes, gtpc;
|
||||||
u32 sm_id = 0;
|
u32 sm_id = 0;
|
||||||
u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
|
u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
|
||||||
u32 num_sm = sm_per_tpc * g->gr.tpc_count;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr->config);
|
||||||
int perf, maxperf;
|
int perf, maxperf;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
unsigned long *gpc_tpc_mask;
|
unsigned long *gpc_tpc_mask;
|
||||||
u32 *tpc_table, *gpc_table;
|
u32 *tpc_table, *gpc_table;
|
||||||
|
|
||||||
gpc_table = nvgpu_kzalloc(g, g->gr.tpc_count * sizeof(u32));
|
gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
|
||||||
tpc_table = nvgpu_kzalloc(g, g->gr.tpc_count * sizeof(u32));
|
sizeof(u32));
|
||||||
|
tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
|
||||||
|
sizeof(u32));
|
||||||
gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) *
|
gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) *
|
||||||
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
|
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
|
||||||
|
|
||||||
@@ -235,17 +248,20 @@ int gr_gv100_init_sm_id_table(struct gk20a *g)
|
|||||||
goto exit_build_table;
|
goto exit_build_table;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
|
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
|
||||||
for (pes = 0; pes < g->gr.gpc_ppc_count[gpc]; pes++) {
|
for (pes = 0;
|
||||||
gpc_tpc_mask[gpc] |= g->gr.pes_tpc_mask[pes][gpc];
|
pes < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc);
|
||||||
|
pes++) {
|
||||||
|
gpc_tpc_mask[gpc] |= nvgpu_gr_config_get_pes_tpc_mask(
|
||||||
|
g->gr.config, gpc, pes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (gtpc = 0; gtpc < g->gr.tpc_count; gtpc++) {
|
for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr->config); gtpc++) {
|
||||||
maxperf = -1;
|
maxperf = -1;
|
||||||
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
|
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
|
||||||
for_each_set_bit(tpc, &gpc_tpc_mask[gpc],
|
for_each_set_bit(tpc, &gpc_tpc_mask[gpc],
|
||||||
g->gr.gpc_tpc_count[gpc]) {
|
nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
|
||||||
perf = -1;
|
perf = -1;
|
||||||
err = gr_gv100_scg_estimate_perf(g,
|
err = gr_gv100_scg_estimate_perf(g,
|
||||||
gpc_tpc_mask, gpc, tpc, &perf);
|
gpc_tpc_mask, gpc, tpc, &perf);
|
||||||
@@ -308,13 +324,13 @@ u32 gr_gv100_get_patch_slots(struct gk20a *g)
|
|||||||
* Update PE table contents
|
* Update PE table contents
|
||||||
* for PE table, each patch buffer update writes 32 TPCs
|
* for PE table, each patch buffer update writes 32 TPCs
|
||||||
*/
|
*/
|
||||||
size += DIV_ROUND_UP(gr->tpc_count, 32U);
|
size += DIV_ROUND_UP(nvgpu_gr_config_get_tpc_count(gr->config), 32U);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Update the PL table contents
|
* Update the PL table contents
|
||||||
* For PL table, each patch buffer update configures 4 TPCs
|
* For PL table, each patch buffer update configures 4 TPCs
|
||||||
*/
|
*/
|
||||||
size += DIV_ROUND_UP(gr->tpc_count, 4U);
|
size += DIV_ROUND_UP(nvgpu_gr_config_get_tpc_count(gr->config), 4U);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need this for all subcontexts
|
* We need this for all subcontexts
|
||||||
@@ -515,5 +531,6 @@ void gr_gv100_init_hwpm_pmm_register(struct gk20a *g)
|
|||||||
g->ops.gr.set_pmm_register(g, perf_pmmfbp_engine_sel_r(0),
|
g->ops.gr.set_pmm_register(g, perf_pmmfbp_engine_sel_r(0),
|
||||||
0xFFFFFFFFU, g->gr.num_fbps, num_fbp_perfmon);
|
0xFFFFFFFFU, g->gr.num_fbps, num_fbp_perfmon);
|
||||||
g->ops.gr.set_pmm_register(g, perf_pmmgpc_engine_sel_r(0),
|
g->ops.gr.set_pmm_register(g, perf_pmmgpc_engine_sel_r(0),
|
||||||
0xFFFFFFFFU, g->gr.gpc_count, num_gpc_perfmon);
|
0xFFFFFFFFU, nvgpu_gr_config_get_gpc_count(g->gr.config),
|
||||||
|
num_gpc_perfmon);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,6 +39,7 @@
|
|||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
|
||||||
|
#include "common/gr/config/gr_config_gm20b.h"
|
||||||
#include "common/therm/therm_gm20b.h"
|
#include "common/therm/therm_gm20b.h"
|
||||||
#include "common/therm/therm_gp106.h"
|
#include "common/therm/therm_gp106.h"
|
||||||
#include "common/therm/therm_gp10b.h"
|
#include "common/therm/therm_gp10b.h"
|
||||||
@@ -361,7 +362,6 @@ static const struct gpu_ops gv100_ops = {
|
|||||||
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
||||||
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
||||||
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
|
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
|
||||||
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
|
|
||||||
.get_gpc_mask = gr_gm20b_get_gpc_mask,
|
.get_gpc_mask = gr_gm20b_get_gpc_mask,
|
||||||
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
||||||
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
||||||
@@ -583,6 +583,16 @@ static const struct gpu_ops gv100_ops = {
|
|||||||
.set_type_per_veid_header =
|
.set_type_per_veid_header =
|
||||||
gv11b_ctxsw_prog_set_type_per_veid_header,
|
gv11b_ctxsw_prog_set_type_per_veid_header,
|
||||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||||
|
},
|
||||||
|
.config = {
|
||||||
|
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
|
||||||
|
.get_tpc_count_in_gpc =
|
||||||
|
gm20b_gr_config_get_tpc_count_in_gpc,
|
||||||
|
.get_zcull_count_in_gpc =
|
||||||
|
gm20b_gr_config_get_zcull_count_in_gpc,
|
||||||
|
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
|
||||||
|
.get_pd_dist_skip_table_size =
|
||||||
|
gm20b_gr_config_get_pd_dist_skip_table_size,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
.fb = {
|
.fb = {
|
||||||
@@ -1139,6 +1149,7 @@ int gv100_init_hal(struct gk20a *g)
|
|||||||
gops->ce2 = gv100_ops.ce2;
|
gops->ce2 = gv100_ops.ce2;
|
||||||
gops->gr = gv100_ops.gr;
|
gops->gr = gv100_ops.gr;
|
||||||
gops->gr.ctxsw_prog = gv100_ops.gr.ctxsw_prog;
|
gops->gr.ctxsw_prog = gv100_ops.gr.ctxsw_prog;
|
||||||
|
gops->gr.config = gv100_ops.gr.config;
|
||||||
gops->fb = gv100_ops.fb;
|
gops->fb = gv100_ops.fb;
|
||||||
gops->nvdec = gv100_ops.nvdec;
|
gops->nvdec = gv100_ops.nvdec;
|
||||||
gops->clock_gating = gv100_ops.clock_gating;
|
gops->clock_gating = gv100_ops.clock_gating;
|
||||||
|
|||||||
@@ -40,6 +40,7 @@
|
|||||||
#include <nvgpu/regops.h>
|
#include <nvgpu/regops.h>
|
||||||
#include <nvgpu/gr/subctx.h>
|
#include <nvgpu/gr/subctx.h>
|
||||||
#include <nvgpu/gr/ctx.h>
|
#include <nvgpu/gr/ctx.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
#include <nvgpu/channel.h>
|
#include <nvgpu/channel.h>
|
||||||
#include <nvgpu/nvgpu_err.h>
|
#include <nvgpu/nvgpu_err.h>
|
||||||
|
|
||||||
@@ -438,7 +439,8 @@ void gr_gv11b_enable_exceptions(struct gk20a *g)
|
|||||||
|
|
||||||
/* enable exceptions */
|
/* enable exceptions */
|
||||||
gk20a_writel(g, gr_exception2_en_r(), 0x0U); /* BE not enabled */
|
gk20a_writel(g, gr_exception2_en_r(), 0x0U); /* BE not enabled */
|
||||||
gk20a_writel(g, gr_exception1_en_r(), BIT32(gr->gpc_count) - 1U);
|
gk20a_writel(g, gr_exception1_en_r(),
|
||||||
|
BIT32(nvgpu_gr_config_get_gpc_count(gr->config)) - 1U);
|
||||||
|
|
||||||
reg_val = gr_exception_en_fe_enabled_f() |
|
reg_val = gr_exception_en_fe_enabled_f() |
|
||||||
gr_exception_en_memfmt_enabled_f() |
|
gr_exception_en_memfmt_enabled_f() |
|
||||||
@@ -1122,7 +1124,7 @@ void gr_gv11b_enable_gpc_exceptions(struct gk20a *g)
|
|||||||
|
|
||||||
tpc_mask =
|
tpc_mask =
|
||||||
gr_gpcs_gpccs_gpc_exception_en_tpc_f(
|
gr_gpcs_gpccs_gpc_exception_en_tpc_f(
|
||||||
BIT32(gr->max_tpc_per_gpc_count) - 1U);
|
BIT32(nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config)) - 1U);
|
||||||
|
|
||||||
gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(),
|
gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(),
|
||||||
(tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1U) |
|
(tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1U) |
|
||||||
@@ -1291,17 +1293,19 @@ u32 gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g)
|
|||||||
gr->alpha_cb_size = gr->alpha_cb_default_size;
|
gr->alpha_cb_size = gr->alpha_cb_default_size;
|
||||||
|
|
||||||
gr->attrib_cb_size = min(gr->attrib_cb_size,
|
gr->attrib_cb_size = min(gr->attrib_cb_size,
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / g->gr.tpc_count);
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) /
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
gr->alpha_cb_size = min(gr->alpha_cb_size,
|
gr->alpha_cb_size = min(gr->alpha_cb_size,
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / g->gr.tpc_count);
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) /
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
|
|
||||||
size = gr->attrib_cb_size *
|
size = gr->attrib_cb_size *
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
||||||
gr->max_tpc_count;
|
nvgpu_gr_config_get_max_tpc_count(gr->config);
|
||||||
|
|
||||||
size += gr->alpha_cb_size *
|
size += gr->alpha_cb_size *
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
|
||||||
gr->max_tpc_count;
|
nvgpu_gr_config_get_max_tpc_count(gr->config);
|
||||||
|
|
||||||
size = ALIGN(size, 128);
|
size = ALIGN(size, 128);
|
||||||
|
|
||||||
@@ -1531,11 +1535,14 @@ void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
|
gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
|
||||||
gr_pd_ab_dist_cfg1_max_batches_init_f());
|
gr_pd_ab_dist_cfg1_max_batches_init_f());
|
||||||
|
|
||||||
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
for (gpc_index = 0;
|
||||||
|
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
gpc_index++) {
|
||||||
stride = proj_gpc_stride_v() * gpc_index;
|
stride = proj_gpc_stride_v() * gpc_index;
|
||||||
|
|
||||||
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
|
for (ppc_index = 0;
|
||||||
ppc_index++) {
|
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
|
||||||
|
ppc_index++) {
|
||||||
|
|
||||||
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
|
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
|
||||||
stride +
|
stride +
|
||||||
@@ -1543,7 +1550,7 @@ void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
|
|
||||||
val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
|
val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index]));
|
nvgpu_gr_config_get_pes_tpc_count(gr->config, gpc_index, ppc_index)));
|
||||||
|
|
||||||
gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
|
gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
|
||||||
stride +
|
stride +
|
||||||
@@ -1578,11 +1585,14 @@ void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) |
|
~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) |
|
||||||
gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
|
gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
|
||||||
|
|
||||||
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
for (gpc_index = 0;
|
||||||
|
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
gpc_index++) {
|
||||||
stride = proj_gpc_stride_v() * gpc_index;
|
stride = proj_gpc_stride_v() * gpc_index;
|
||||||
|
|
||||||
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
|
for (ppc_index = 0;
|
||||||
ppc_index++) {
|
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
|
||||||
|
ppc_index++) {
|
||||||
|
|
||||||
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
||||||
stride +
|
stride +
|
||||||
@@ -1591,7 +1601,8 @@ void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
val = set_field(val,
|
val = set_field(val,
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
|
||||||
gr->pes_tpc_count[ppc_index][gpc_index]));
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc_index, ppc_index)));
|
||||||
|
|
||||||
gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
||||||
stride +
|
stride +
|
||||||
@@ -1610,7 +1621,7 @@ void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
gr_gpcs_swdx_tc_beta_cb_size_v_m(),
|
gr_gpcs_swdx_tc_beta_cb_size_v_m(),
|
||||||
gr_gpcs_swdx_tc_beta_cb_size_v_f(
|
gr_gpcs_swdx_tc_beta_cb_size_v_f(
|
||||||
cb_size_steady *
|
cb_size_steady *
|
||||||
gr->gpc_ppc_count[gpc_index]));
|
nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index)));
|
||||||
|
|
||||||
gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
|
gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
|
||||||
ppc_index + gpc_index), val);
|
ppc_index + gpc_index), val);
|
||||||
@@ -1671,7 +1682,7 @@ int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
|
|||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||||
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
|
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
||||||
g->gr.max_tpc_count;
|
nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
||||||
attrib_cb_size = ALIGN(attrib_cb_size, 128);
|
attrib_cb_size = ALIGN(attrib_cb_size, 128);
|
||||||
|
|
||||||
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);
|
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);
|
||||||
@@ -1911,10 +1922,12 @@ static void gr_gv11b_dump_gr_sm_regs(struct gk20a *g,
|
|||||||
gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_1_r()));
|
gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_1_r()));
|
||||||
|
|
||||||
sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
|
sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
|
||||||
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
|
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
|
||||||
gpc_offset = gk20a_gr_gpc_offset(g, gpc);
|
gpc_offset = gk20a_gr_gpc_offset(g, gpc);
|
||||||
|
|
||||||
for (tpc = 0; tpc < g->gr.gpc_tpc_count[gpc]; tpc++) {
|
for (tpc = 0;
|
||||||
|
tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc);
|
||||||
|
tpc++) {
|
||||||
tpc_offset = gk20a_gr_tpc_offset(g, tpc);
|
tpc_offset = gk20a_gr_tpc_offset(g, tpc);
|
||||||
|
|
||||||
for (sm = 0; sm < sm_per_tpc; sm++) {
|
for (sm = 0; sm < sm_per_tpc; sm++) {
|
||||||
@@ -1976,7 +1989,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
|
|||||||
gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
|
gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
|
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
|
||||||
if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) {
|
if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
|
gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
|
||||||
}
|
}
|
||||||
@@ -1992,7 +2005,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
|
|||||||
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
|
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
|
gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
|
||||||
if ((gr->gpc_tpc_count != NULL) && (gr->gpc_tpc_count[0] == 2U)) {
|
if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
|
gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
|
||||||
}
|
}
|
||||||
@@ -2170,18 +2183,18 @@ void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
|||||||
{
|
{
|
||||||
u32 fuse_val;
|
u32 fuse_val;
|
||||||
|
|
||||||
if (g->gr.gpc_tpc_mask[gpc_index] == 0U) {
|
if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == 0U) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For s/w value g->gr.gpc_tpc_mask[gpc_index], bit value 1 indicates
|
* For s/w value nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index), bit value 1 indicates
|
||||||
* corresponding TPC is enabled. But for h/w fuse register, bit value 1
|
* corresponding TPC is enabled. But for h/w fuse register, bit value 1
|
||||||
* indicates corresponding TPC is disabled.
|
* indicates corresponding TPC is disabled.
|
||||||
* So we need to flip the bits and ensure we don't write to bits greater
|
* So we need to flip the bits and ensure we don't write to bits greater
|
||||||
* than TPC count
|
* than TPC count
|
||||||
*/
|
*/
|
||||||
fuse_val = g->gr.gpc_tpc_mask[gpc_index];
|
fuse_val = nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index);
|
||||||
fuse_val = ~fuse_val;
|
fuse_val = ~fuse_val;
|
||||||
fuse_val = fuse_val & 0xfU; /* tpc0_disable fuse is only 4-bit wide */
|
fuse_val = fuse_val & 0xfU; /* tpc0_disable fuse is only 4-bit wide */
|
||||||
|
|
||||||
@@ -2678,13 +2691,15 @@ int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
|
|||||||
|
|
||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
if (gr->map_tiles == NULL) {
|
if (gr->config->map_tiles == NULL) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
gk20a_writel(g, gr_crstr_map_table_cfg_r(),
|
gk20a_writel(g, gr_crstr_map_table_cfg_r(),
|
||||||
gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) |
|
gr_crstr_map_table_cfg_row_offset_f(
|
||||||
gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count));
|
nvgpu_gr_config_get_map_row_offset(gr->config)) |
|
||||||
|
gr_crstr_map_table_cfg_num_entries_f(
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config)));
|
||||||
/*
|
/*
|
||||||
* 6 tpc can be stored in one map register.
|
* 6 tpc can be stored in one map register.
|
||||||
* But number of tpcs are not always multiple of six,
|
* But number of tpcs are not always multiple of six,
|
||||||
@@ -2702,27 +2717,33 @@ int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
|
|||||||
switch (offset) {
|
switch (offset) {
|
||||||
case 0:
|
case 0:
|
||||||
map = map | gr_crstr_gpc_map_tile0_f(
|
map = map | gr_crstr_gpc_map_tile0_f(
|
||||||
gr->map_tiles[base + offset]);
|
nvgpu_gr_config_get_map_tile_count(
|
||||||
|
gr->config, base + offset));
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
map = map | gr_crstr_gpc_map_tile1_f(
|
map = map | gr_crstr_gpc_map_tile1_f(
|
||||||
gr->map_tiles[base + offset]);
|
nvgpu_gr_config_get_map_tile_count(
|
||||||
|
gr->config, base + offset));
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
map = map | gr_crstr_gpc_map_tile2_f(
|
map = map | gr_crstr_gpc_map_tile2_f(
|
||||||
gr->map_tiles[base + offset]);
|
nvgpu_gr_config_get_map_tile_count(
|
||||||
|
gr->config, base + offset));
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
map = map | gr_crstr_gpc_map_tile3_f(
|
map = map | gr_crstr_gpc_map_tile3_f(
|
||||||
gr->map_tiles[base + offset]);
|
nvgpu_gr_config_get_map_tile_count(
|
||||||
|
gr->config, base + offset));
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
map = map | gr_crstr_gpc_map_tile4_f(
|
map = map | gr_crstr_gpc_map_tile4_f(
|
||||||
gr->map_tiles[base + offset]);
|
nvgpu_gr_config_get_map_tile_count(
|
||||||
|
gr->config, base + offset));
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
map = map | gr_crstr_gpc_map_tile5_f(
|
map = map | gr_crstr_gpc_map_tile5_f(
|
||||||
gr->map_tiles[base + offset]);
|
nvgpu_gr_config_get_map_tile_count(
|
||||||
|
gr->config, base + offset));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
nvgpu_err(g, "incorrect rop mapping %x", offset);
|
nvgpu_err(g, "incorrect rop mapping %x", offset);
|
||||||
@@ -2736,25 +2757,33 @@ int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
|
gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
|
||||||
gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) |
|
gr_ppcs_wwdx_map_table_cfg_row_offset_f(
|
||||||
gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count));
|
nvgpu_gr_config_get_map_row_offset(gr->config)) |
|
||||||
|
gr_ppcs_wwdx_map_table_cfg_num_entries_f(
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config)));
|
||||||
|
|
||||||
for (i = 0U, j = 1U; i < gr_ppcs_wwdx_map_table_cfg_coeff__size_1_v();
|
for (i = 0U, j = 1U; i < gr_ppcs_wwdx_map_table_cfg_coeff__size_1_v();
|
||||||
i++, j = j + 4U) {
|
i++, j = j + 4U) {
|
||||||
gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i),
|
gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i),
|
||||||
gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f(
|
gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f(
|
||||||
(BIT32(j) % gr->tpc_count)) |
|
(BIT32(j) %
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config))) |
|
||||||
gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f(
|
gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f(
|
||||||
(BIT32(j + 1U) % gr->tpc_count)) |
|
(BIT32(j + 1U) %
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config))) |
|
||||||
gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f(
|
gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f(
|
||||||
(BIT32(j + 2U) % gr->tpc_count)) |
|
(BIT32(j + 2U) %
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config))) |
|
||||||
gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f(
|
gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f(
|
||||||
(BIT32(j + 3U) % gr->tpc_count)));
|
(BIT32(j + 3U) %
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config))));
|
||||||
}
|
}
|
||||||
|
|
||||||
gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
|
gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
|
||||||
gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) |
|
gr_rstr2d_map_table_cfg_row_offset_f(
|
||||||
gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count));
|
nvgpu_gr_config_get_map_row_offset(gr->config)) |
|
||||||
|
gr_rstr2d_map_table_cfg_num_entries_f(
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config)));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -2867,13 +2896,18 @@ u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc)
|
|||||||
u32 pes;
|
u32 pes;
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
|
||||||
for (pes = 0U; pes < gr->gpc_ppc_count[gpc]; pes++) {
|
for (pes = 0U;
|
||||||
if ((gr->pes_tpc_mask[pes][gpc] & BIT32(tpc)) != 0U) {
|
pes < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc);
|
||||||
|
pes++) {
|
||||||
|
if ((nvgpu_gr_config_get_pes_tpc_mask(gr->config, gpc, pes) &
|
||||||
|
BIT32(tpc)) != 0U) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
tpc_new += gr->pes_tpc_count[pes][gpc];
|
tpc_new += nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
|
gpc, pes);
|
||||||
}
|
}
|
||||||
temp = (BIT32(tpc) - 1U) & gr->pes_tpc_mask[pes][gpc];
|
temp = (BIT32(tpc) - 1U) &
|
||||||
|
nvgpu_gr_config_get_pes_tpc_mask(gr->config, gpc, pes);
|
||||||
temp = (u32)hweight32(temp);
|
temp = (u32)hweight32(temp);
|
||||||
tpc_new += temp;
|
tpc_new += temp;
|
||||||
|
|
||||||
@@ -2916,7 +2950,9 @@ int gr_gv11b_load_smid_config(struct gk20a *g)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
|
/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
|
||||||
for (i = 0U; i <= ((g->gr.tpc_count-1U) / 4U); i++) {
|
for (i = 0U;
|
||||||
|
i <= ((nvgpu_gr_config_get_tpc_count(g->gr.config) - 1U) / 4U);
|
||||||
|
i++) {
|
||||||
u32 reg = 0;
|
u32 reg = 0;
|
||||||
u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
|
u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
|
||||||
gr_cwd_gpc_tpc_id_tpc0_s();
|
gr_cwd_gpc_tpc_id_tpc0_s();
|
||||||
@@ -3039,24 +3075,28 @@ void gr_gv11b_load_tpc_mask(struct gk20a *g)
|
|||||||
{
|
{
|
||||||
u32 pes_tpc_mask = 0, fuse_tpc_mask;
|
u32 pes_tpc_mask = 0, fuse_tpc_mask;
|
||||||
u32 gpc, pes, val;
|
u32 gpc, pes, val;
|
||||||
|
u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
||||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
|
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
|
||||||
GPU_LIT_NUM_TPC_PER_GPC);
|
GPU_LIT_NUM_TPC_PER_GPC);
|
||||||
|
|
||||||
/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
|
/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
|
||||||
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
|
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
|
||||||
for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) {
|
for (pes = 0;
|
||||||
pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] <<
|
pes < nvgpu_gr_config_get_pe_count_per_gpc(g->gr.config);
|
||||||
|
pes++) {
|
||||||
|
pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
|
||||||
|
g->gr.config, gpc, pes) <<
|
||||||
num_tpc_per_gpc * gpc;
|
num_tpc_per_gpc * gpc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask);
|
nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask);
|
||||||
fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, gpc);
|
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, g->gr.config, gpc);
|
||||||
if ((g->tpc_fs_mask_user != 0U) &&
|
if ((g->tpc_fs_mask_user != 0U) &&
|
||||||
(g->tpc_fs_mask_user != fuse_tpc_mask) &&
|
(g->tpc_fs_mask_user != fuse_tpc_mask) &&
|
||||||
(fuse_tpc_mask == BIT32(g->gr.max_tpc_count) - U32(1))) {
|
(fuse_tpc_mask == BIT32(max_tpc_count) - U32(1))) {
|
||||||
val = g->tpc_fs_mask_user;
|
val = g->tpc_fs_mask_user;
|
||||||
val &= BIT32(g->gr.max_tpc_count) - U32(1);
|
val &= BIT32(max_tpc_count) - U32(1);
|
||||||
val = BIT32(hweight32(val)) - U32(1);
|
val = BIT32(hweight32(val)) - U32(1);
|
||||||
gk20a_writel(g, gr_fe_tpc_fs_r(0), val);
|
gk20a_writel(g, gr_fe_tpc_fs_r(0), val);
|
||||||
} else {
|
} else {
|
||||||
@@ -3506,8 +3546,10 @@ void gv11b_gr_suspend_all_sms(struct gk20a *g,
|
|||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0);
|
gr_gpcs_tpcs_sms_dbgr_control0_r(), dbgr_control0);
|
||||||
|
|
||||||
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
|
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
|
||||||
for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) {
|
for (tpc = 0;
|
||||||
|
tpc < nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc);
|
||||||
|
tpc++) {
|
||||||
for (sm = 0; sm < sm_per_tpc; sm++) {
|
for (sm = 0; sm < sm_per_tpc; sm++) {
|
||||||
err = g->ops.gr.wait_for_sm_lock_down(g,
|
err = g->ops.gr.wait_for_sm_lock_down(g,
|
||||||
gpc, tpc, sm,
|
gpc, tpc, sm,
|
||||||
@@ -4254,12 +4296,14 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr,
|
|||||||
*/
|
*/
|
||||||
if ((broadcast_flags & PRI_BROADCAST_FLAGS_EGPC) != 0U) {
|
if ((broadcast_flags & PRI_BROADCAST_FLAGS_EGPC) != 0U) {
|
||||||
nvgpu_log_info(g, "broadcast flags egpc");
|
nvgpu_log_info(g, "broadcast flags egpc");
|
||||||
for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
|
for (gpc_num = 0;
|
||||||
|
gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config);
|
||||||
|
gpc_num++) {
|
||||||
|
|
||||||
if ((broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) != 0U) {
|
if ((broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) != 0U) {
|
||||||
nvgpu_log_info(g, "broadcast flags etpc");
|
nvgpu_log_info(g, "broadcast flags etpc");
|
||||||
for (tpc_num = 0;
|
for (tpc_num = 0;
|
||||||
tpc_num < g->gr.gpc_tpc_count[gpc_num];
|
tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num);
|
||||||
tpc_num++) {
|
tpc_num++) {
|
||||||
if ((broadcast_flags &
|
if ((broadcast_flags &
|
||||||
PRI_BROADCAST_FLAGS_SMPC) != 0U) {
|
PRI_BROADCAST_FLAGS_SMPC) != 0U) {
|
||||||
@@ -4289,7 +4333,7 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr,
|
|||||||
|
|
||||||
gpc_addr = pri_gpccs_addr_mask(priv_addr);
|
gpc_addr = pri_gpccs_addr_mask(priv_addr);
|
||||||
tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
|
tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
|
||||||
if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) {
|
if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4303,7 +4347,7 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr,
|
|||||||
if ((broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) != 0U) {
|
if ((broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) != 0U) {
|
||||||
nvgpu_log_info(g, "broadcast flags etpc but not egpc");
|
nvgpu_log_info(g, "broadcast flags etpc but not egpc");
|
||||||
for (tpc_num = 0;
|
for (tpc_num = 0;
|
||||||
tpc_num < g->gr.gpc_tpc_count[gpc_num];
|
tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num);
|
||||||
tpc_num++) {
|
tpc_num++) {
|
||||||
if ((broadcast_flags &
|
if ((broadcast_flags &
|
||||||
PRI_BROADCAST_FLAGS_SMPC) != 0U) {
|
PRI_BROADCAST_FLAGS_SMPC) != 0U) {
|
||||||
@@ -4425,10 +4469,12 @@ static int gr_gv11b_ecc_scrub_is_done(struct gk20a *g,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
|
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
|
||||||
gpc_offset = gk20a_gr_gpc_offset(g, gpc);
|
gpc_offset = gk20a_gr_gpc_offset(g, gpc);
|
||||||
|
|
||||||
for (tpc = 0; tpc < g->gr.gpc_tpc_count[gpc]; tpc++) {
|
for (tpc = 0;
|
||||||
|
tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc);
|
||||||
|
tpc++) {
|
||||||
tpc_offset = gk20a_gr_tpc_offset(g, tpc);
|
tpc_offset = gk20a_gr_tpc_offset(g, tpc);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
@@ -4946,11 +4992,13 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
|
|||||||
* that we can look up the offsets
|
* that we can look up the offsets
|
||||||
*/
|
*/
|
||||||
if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) != 0U) {
|
if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) != 0U) {
|
||||||
for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
|
for (gpc_num = 0;
|
||||||
|
gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config);
|
||||||
|
gpc_num++) {
|
||||||
|
|
||||||
if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) {
|
if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) {
|
||||||
for (tpc_num = 0;
|
for (tpc_num = 0;
|
||||||
tpc_num < g->gr.gpc_tpc_count[gpc_num];
|
tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num);
|
||||||
tpc_num++) {
|
tpc_num++) {
|
||||||
priv_addr_table[t++] =
|
priv_addr_table[t++] =
|
||||||
pri_tpc_addr(g,
|
pri_tpc_addr(g,
|
||||||
@@ -4972,7 +5020,7 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
|
|||||||
|
|
||||||
gpc_addr = pri_gpccs_addr_mask(priv_addr);
|
gpc_addr = pri_gpccs_addr_mask(priv_addr);
|
||||||
tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
|
tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
|
||||||
if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) {
|
if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5010,7 +5058,9 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
|
for (gpc_num = 0;
|
||||||
|
gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config);
|
||||||
|
gpc_num++) {
|
||||||
for (domain_idx = pmm_domain_start;
|
for (domain_idx = pmm_domain_start;
|
||||||
domain_idx < (pmm_domain_start + num_domains);
|
domain_idx < (pmm_domain_start + num_domains);
|
||||||
domain_idx++) {
|
domain_idx++) {
|
||||||
@@ -5063,7 +5113,7 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
|
|||||||
} else if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) == 0U) {
|
} else if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) == 0U) {
|
||||||
if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) {
|
if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) {
|
||||||
for (tpc_num = 0;
|
for (tpc_num = 0;
|
||||||
tpc_num < g->gr.gpc_tpc_count[gpc_num];
|
tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num);
|
||||||
tpc_num++) {
|
tpc_num++) {
|
||||||
priv_addr_table[t++] =
|
priv_addr_table[t++] =
|
||||||
pri_tpc_addr(g,
|
pri_tpc_addr(g,
|
||||||
|
|||||||
@@ -39,6 +39,7 @@
|
|||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
|
||||||
|
#include "common/gr/config/gr_config_gm20b.h"
|
||||||
#include "common/therm/therm_gm20b.h"
|
#include "common/therm/therm_gm20b.h"
|
||||||
#include "common/therm/therm_gp10b.h"
|
#include "common/therm/therm_gp10b.h"
|
||||||
#include "common/therm/therm_gv11b.h"
|
#include "common/therm/therm_gv11b.h"
|
||||||
@@ -314,7 +315,6 @@ static const struct gpu_ops gv11b_ops = {
|
|||||||
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
||||||
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
|
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
|
||||||
.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
|
.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
|
||||||
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
|
|
||||||
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
||||||
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
||||||
.get_zcull_info = gr_gk20a_get_zcull_info,
|
.get_zcull_info = gr_gk20a_get_zcull_info,
|
||||||
@@ -544,6 +544,16 @@ static const struct gpu_ops gv11b_ops = {
|
|||||||
.set_type_per_veid_header =
|
.set_type_per_veid_header =
|
||||||
gv11b_ctxsw_prog_set_type_per_veid_header,
|
gv11b_ctxsw_prog_set_type_per_veid_header,
|
||||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||||
|
},
|
||||||
|
.config = {
|
||||||
|
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
|
||||||
|
.get_tpc_count_in_gpc =
|
||||||
|
gm20b_gr_config_get_tpc_count_in_gpc,
|
||||||
|
.get_zcull_count_in_gpc =
|
||||||
|
gm20b_gr_config_get_zcull_count_in_gpc,
|
||||||
|
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
|
||||||
|
.get_pd_dist_skip_table_size =
|
||||||
|
gm20b_gr_config_get_pd_dist_skip_table_size,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
.fb = {
|
.fb = {
|
||||||
@@ -1006,6 +1016,7 @@ int gv11b_init_hal(struct gk20a *g)
|
|||||||
gops->ce2 = gv11b_ops.ce2;
|
gops->ce2 = gv11b_ops.ce2;
|
||||||
gops->gr = gv11b_ops.gr;
|
gops->gr = gv11b_ops.gr;
|
||||||
gops->gr.ctxsw_prog = gv11b_ops.gr.ctxsw_prog;
|
gops->gr.ctxsw_prog = gv11b_ops.gr.ctxsw_prog;
|
||||||
|
gops->gr.config = gv11b_ops.gr.config;
|
||||||
gops->fb = gv11b_ops.fb;
|
gops->fb = gv11b_ops.fb;
|
||||||
gops->clock_gating = gv11b_ops.clock_gating;
|
gops->clock_gating = gv11b_ops.clock_gating;
|
||||||
gops->fifo = gv11b_ops.fifo;
|
gops->fifo = gv11b_ops.fifo;
|
||||||
|
|||||||
@@ -291,7 +291,6 @@ struct gpu_ops {
|
|||||||
u32 reg_offset);
|
u32 reg_offset);
|
||||||
int (*load_ctxsw_ucode)(struct gk20a *g);
|
int (*load_ctxsw_ucode)(struct gk20a *g);
|
||||||
u32 (*get_gpc_mask)(struct gk20a *g);
|
u32 (*get_gpc_mask)(struct gk20a *g);
|
||||||
u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
|
|
||||||
void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
|
void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
|
||||||
int (*alloc_obj_ctx)(struct channel_gk20a *c,
|
int (*alloc_obj_ctx)(struct channel_gk20a *c,
|
||||||
u32 class_num, u32 flags);
|
u32 class_num, u32 flags);
|
||||||
@@ -622,6 +621,20 @@ struct gpu_ops {
|
|||||||
void (*dump_ctxsw_stats)(struct gk20a *g,
|
void (*dump_ctxsw_stats)(struct gk20a *g,
|
||||||
struct nvgpu_mem *ctx_mem);
|
struct nvgpu_mem *ctx_mem);
|
||||||
} ctxsw_prog;
|
} ctxsw_prog;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
u32 (*get_gpc_tpc_mask)(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config, u32 gpc_index);
|
||||||
|
u32 (*get_tpc_count_in_gpc)(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config, u32 gpc_index);
|
||||||
|
u32 (*get_zcull_count_in_gpc)(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config, u32 gpc_index);
|
||||||
|
u32 (*get_pes_tpc_mask)(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config, u32 gpc_index,
|
||||||
|
u32 pes_index);
|
||||||
|
u32 (*get_pd_dist_skip_table_size)(void);
|
||||||
|
} config;
|
||||||
|
|
||||||
u32 (*fecs_falcon_base_addr)(void);
|
u32 (*fecs_falcon_base_addr)(void);
|
||||||
u32 (*gpccs_falcon_base_addr)(void);
|
u32 (*gpccs_falcon_base_addr)(void);
|
||||||
|
|
||||||
|
|||||||
96
drivers/gpu/nvgpu/include/nvgpu/gr/config.h
Normal file
96
drivers/gpu/nvgpu/include/nvgpu/gr/config.h
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef NVGPU_GR_CONFIG_H
|
||||||
|
#define NVGPU_GR_CONFIG_H
|
||||||
|
|
||||||
|
#include <nvgpu/types.h>
|
||||||
|
|
||||||
|
#define GK20A_GR_MAX_PES_PER_GPC 3U
|
||||||
|
|
||||||
|
struct gk20a;
|
||||||
|
|
||||||
|
struct nvgpu_gr_config {
|
||||||
|
u32 max_gpc_count;
|
||||||
|
u32 max_tpc_per_gpc_count;
|
||||||
|
u32 max_zcull_per_gpc_count;
|
||||||
|
u32 max_tpc_count;
|
||||||
|
|
||||||
|
u32 gpc_count;
|
||||||
|
u32 tpc_count;
|
||||||
|
u32 ppc_count;
|
||||||
|
u32 zcb_count;
|
||||||
|
|
||||||
|
u32 pe_count_per_gpc;
|
||||||
|
|
||||||
|
u32 *gpc_ppc_count;
|
||||||
|
u32 *gpc_tpc_count;
|
||||||
|
u32 *gpc_zcb_count;
|
||||||
|
u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
|
||||||
|
|
||||||
|
u32 *gpc_tpc_mask;
|
||||||
|
u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
|
||||||
|
u32 *gpc_skip_mask;
|
||||||
|
|
||||||
|
u8 *map_tiles;
|
||||||
|
u32 map_tile_count;
|
||||||
|
u32 map_row_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g);
|
||||||
|
void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config);
|
||||||
|
int nvgpu_gr_config_init_map_tiles(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config);
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_map_tile_count(struct nvgpu_gr_config *config,
|
||||||
|
u32 index);
|
||||||
|
u32 nvgpu_gr_config_get_map_row_offset(struct nvgpu_gr_config *config);
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_max_gpc_count(struct nvgpu_gr_config *config);
|
||||||
|
u32 nvgpu_gr_config_get_max_tpc_per_gpc_count(struct nvgpu_gr_config *config);
|
||||||
|
u32 nvgpu_gr_config_get_max_zcull_per_gpc_count(struct nvgpu_gr_config *config);
|
||||||
|
u32 nvgpu_gr_config_get_max_tpc_count(struct nvgpu_gr_config *config);
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_gpc_count(struct nvgpu_gr_config *config);
|
||||||
|
u32 nvgpu_gr_config_get_tpc_count(struct nvgpu_gr_config *config);
|
||||||
|
u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config);
|
||||||
|
u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config);
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config);
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index);
|
||||||
|
u32 nvgpu_gr_config_get_gpc_tpc_count(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index);
|
||||||
|
u32 nvgpu_gr_config_get_gpc_zcb_count(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index);
|
||||||
|
u32 nvgpu_gr_config_get_pes_tpc_count(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index, u32 pes_index);
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_gpc_tpc_mask(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index);
|
||||||
|
u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index);
|
||||||
|
u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index, u32 pes_index);
|
||||||
|
|
||||||
|
#endif /* NVGPU_GR_CONFIG_H */
|
||||||
@@ -32,6 +32,7 @@
|
|||||||
#include <nvgpu/list.h>
|
#include <nvgpu/list.h>
|
||||||
#include <nvgpu/clk_arb.h>
|
#include <nvgpu/clk_arb.h>
|
||||||
#include <nvgpu/gk20a.h>
|
#include <nvgpu/gk20a.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
#include <nvgpu/channel.h>
|
#include <nvgpu/channel.h>
|
||||||
#include <nvgpu/pmu/pmgr.h>
|
#include <nvgpu/pmu/pmgr.h>
|
||||||
|
|
||||||
@@ -281,10 +282,10 @@ gk20a_ctrl_ioctl_gpu_characteristics(
|
|||||||
gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
|
gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
|
||||||
gpu.on_board_video_memory_size = 0; /* integrated GPU */
|
gpu.on_board_video_memory_size = 0; /* integrated GPU */
|
||||||
|
|
||||||
gpu.num_gpc = g->gr.gpc_count;
|
gpu.num_gpc = nvgpu_gr_config_get_gpc_count(g->gr.config);
|
||||||
gpu.max_gpc_count = g->gr.max_gpc_count;
|
gpu.max_gpc_count = nvgpu_gr_config_get_max_gpc_count(g->gr.config);
|
||||||
|
|
||||||
gpu.num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count;
|
gpu.num_tpc_per_gpc = nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config);
|
||||||
|
|
||||||
gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
|
gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
|
||||||
|
|
||||||
@@ -293,7 +294,7 @@ gk20a_ctrl_ioctl_gpu_characteristics(
|
|||||||
if (g->ops.gr.get_gpc_mask) {
|
if (g->ops.gr.get_gpc_mask) {
|
||||||
gpu.gpc_mask = g->ops.gr.get_gpc_mask(g);
|
gpu.gpc_mask = g->ops.gr.get_gpc_mask(g);
|
||||||
} else {
|
} else {
|
||||||
gpu.gpc_mask = BIT32(g->gr.gpc_count) - 1;
|
gpu.gpc_mask = BIT32(gpu.num_gpc) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g);
|
gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g);
|
||||||
@@ -553,7 +554,7 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
|
|||||||
{
|
{
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
const u32 gpc_tpc_mask_size = sizeof(u32) * gr->max_gpc_count;
|
const u32 gpc_tpc_mask_size = sizeof(u32) * gr->config->max_gpc_count;
|
||||||
|
|
||||||
if (args->mask_buf_size > 0) {
|
if (args->mask_buf_size > 0) {
|
||||||
size_t write_size = gpc_tpc_mask_size;
|
size_t write_size = gpc_tpc_mask_size;
|
||||||
@@ -564,7 +565,7 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
|
|||||||
|
|
||||||
err = copy_to_user((void __user *)(uintptr_t)
|
err = copy_to_user((void __user *)(uintptr_t)
|
||||||
args->mask_buf_addr,
|
args->mask_buf_addr,
|
||||||
gr->gpc_tpc_mask, write_size);
|
gr->config->gpc_tpc_mask, write_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
@@ -687,7 +688,8 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
|
|||||||
struct nvgpu_warpstate *w_state = NULL;
|
struct nvgpu_warpstate *w_state = NULL;
|
||||||
u32 sm_count, ioctl_size, size, sm_id;
|
u32 sm_count, ioctl_size, size, sm_id;
|
||||||
|
|
||||||
sm_count = g->gr.gpc_count * g->gr.tpc_count;
|
sm_count = nvgpu_gr_config_get_gpc_count(g->gr.config) *
|
||||||
|
nvgpu_gr_config_get_tpc_count(g->gr.config);
|
||||||
|
|
||||||
ioctl_size = sm_count * sizeof(struct warpstate);
|
ioctl_size = sm_count * sizeof(struct warpstate);
|
||||||
ioctl_w_state = nvgpu_kzalloc(g, ioctl_size);
|
ioctl_w_state = nvgpu_kzalloc(g, ioctl_size);
|
||||||
|
|||||||
@@ -26,6 +26,7 @@
|
|||||||
#include <nvgpu/log.h>
|
#include <nvgpu/log.h>
|
||||||
#include <nvgpu/os_sched.h>
|
#include <nvgpu/os_sched.h>
|
||||||
#include <nvgpu/gk20a.h>
|
#include <nvgpu/gk20a.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
#include <nvgpu/channel.h>
|
#include <nvgpu/channel.h>
|
||||||
#include <nvgpu/tsg.h>
|
#include <nvgpu/tsg.h>
|
||||||
|
|
||||||
@@ -84,8 +85,9 @@ static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) {
|
if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) {
|
||||||
if ((arg->num_active_tpcs > gr->max_tpc_count) ||
|
if ((arg->num_active_tpcs >
|
||||||
!(arg->num_active_tpcs)) {
|
nvgpu_gr_config_get_max_tpc_count(gr->config)) ||
|
||||||
|
!(arg->num_active_tpcs)) {
|
||||||
nvgpu_err(g, "Invalid num of active TPCs");
|
nvgpu_err(g, "Invalid num of active TPCs");
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
goto ch_put;
|
goto ch_put;
|
||||||
|
|||||||
@@ -23,6 +23,7 @@
|
|||||||
#include <nvgpu/ptimer.h>
|
#include <nvgpu/ptimer.h>
|
||||||
#include <nvgpu/string.h>
|
#include <nvgpu/string.h>
|
||||||
#include <nvgpu/gr/global_ctx.h>
|
#include <nvgpu/gr/global_ctx.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
|
|
||||||
#include "os_linux.h"
|
#include "os_linux.h"
|
||||||
#include "sysfs.h"
|
#include "sysfs.h"
|
||||||
@@ -930,16 +931,17 @@ static ssize_t tpc_fs_mask_store(struct device *dev,
|
|||||||
struct device_attribute *attr, const char *buf, size_t count)
|
struct device_attribute *attr, const char *buf, size_t count)
|
||||||
{
|
{
|
||||||
struct gk20a *g = get_gk20a(dev);
|
struct gk20a *g = get_gk20a(dev);
|
||||||
|
struct nvgpu_gr_config *config = g->gr.config;
|
||||||
unsigned long val = 0;
|
unsigned long val = 0;
|
||||||
|
|
||||||
if (kstrtoul(buf, 10, &val) < 0)
|
if (kstrtoul(buf, 10, &val) < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (!g->gr.gpc_tpc_mask)
|
if (!config->gpc_tpc_mask)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
if (val && val != g->gr.gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) {
|
if (val && val != config->gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) {
|
||||||
g->gr.gpc_tpc_mask[0] = val;
|
config->gpc_tpc_mask[0] = val;
|
||||||
g->tpc_fs_mask_user = val;
|
g->tpc_fs_mask_user = val;
|
||||||
|
|
||||||
g->ops.gr.set_gpc_tpc_mask(g, 0);
|
g->ops.gr.set_gpc_tpc_mask(g, 0);
|
||||||
@@ -951,6 +953,7 @@ static ssize_t tpc_fs_mask_store(struct device *dev,
|
|||||||
g->gr.ctx_vars.golden_image_initialized = false;
|
g->gr.ctx_vars.golden_image_initialized = false;
|
||||||
}
|
}
|
||||||
g->gr.ctx_vars.golden_image_size = 0;
|
g->gr.ctx_vars.golden_image_size = 0;
|
||||||
|
nvgpu_gr_config_deinit(g, g->gr.config);
|
||||||
/* Cause next poweron to reinit just gr */
|
/* Cause next poweron to reinit just gr */
|
||||||
g->gr.sw_ready = false;
|
g->gr.sw_ready = false;
|
||||||
}
|
}
|
||||||
@@ -971,11 +974,13 @@ static ssize_t tpc_fs_mask_read(struct device *dev,
|
|||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
for (gpc_index = 0;
|
||||||
if (g->ops.gr.get_gpc_tpc_mask)
|
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
|
gpc_index++) {
|
||||||
|
if (g->ops.gr.config.get_gpc_tpc_mask)
|
||||||
tpc_fs_mask |=
|
tpc_fs_mask |=
|
||||||
g->ops.gr.get_gpc_tpc_mask(g, gpc_index) <<
|
g->ops.gr.config.get_gpc_tpc_mask(g, gr->config, gpc_index) <<
|
||||||
(gr->max_tpc_per_gpc_count * gpc_index);
|
(nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config) * gpc_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
gk20a_idle(g);
|
gk20a_idle(g);
|
||||||
|
|||||||
@@ -29,6 +29,7 @@
|
|||||||
#include <nvgpu/netlist.h>
|
#include <nvgpu/netlist.h>
|
||||||
#include <nvgpu/gr/global_ctx.h>
|
#include <nvgpu/gr/global_ctx.h>
|
||||||
#include <nvgpu/gr/ctx.h>
|
#include <nvgpu/gr/ctx.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
|
|
||||||
#include "gk20a/gr_gk20a.h"
|
#include "gk20a/gr_gk20a.h"
|
||||||
#include "gk20a/gr_pri_gk20a.h"
|
#include "gk20a/gr_pri_gk20a.h"
|
||||||
@@ -304,7 +305,8 @@ void gr_tu104_enable_gpc_exceptions(struct gk20a *g)
|
|||||||
gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
|
gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
|
||||||
|
|
||||||
tpc_mask =
|
tpc_mask =
|
||||||
gr_gpcs_gpccs_gpc_exception_en_tpc_f(BIT32(gr->max_tpc_per_gpc_count) - 1U);
|
gr_gpcs_gpccs_gpc_exception_en_tpc_f(
|
||||||
|
BIT32(nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config)) - 1U);
|
||||||
|
|
||||||
gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(),
|
gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(),
|
||||||
(tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1U) |
|
(tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1U) |
|
||||||
|
|||||||
@@ -41,6 +41,7 @@
|
|||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
|
||||||
|
#include "common/gr/config/gr_config_gm20b.h"
|
||||||
#include "common/therm/therm_gm20b.h"
|
#include "common/therm/therm_gm20b.h"
|
||||||
#include "common/therm/therm_gp10b.h"
|
#include "common/therm/therm_gp10b.h"
|
||||||
#include "common/therm/therm_gp106.h"
|
#include "common/therm/therm_gp106.h"
|
||||||
@@ -378,7 +379,6 @@ static const struct gpu_ops tu104_ops = {
|
|||||||
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
||||||
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
||||||
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
|
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
|
||||||
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
|
|
||||||
.get_gpc_mask = gr_gm20b_get_gpc_mask,
|
.get_gpc_mask = gr_gm20b_get_gpc_mask,
|
||||||
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
||||||
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
||||||
@@ -608,6 +608,16 @@ static const struct gpu_ops tu104_ops = {
|
|||||||
.set_type_per_veid_header =
|
.set_type_per_veid_header =
|
||||||
gv11b_ctxsw_prog_set_type_per_veid_header,
|
gv11b_ctxsw_prog_set_type_per_veid_header,
|
||||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||||
|
},
|
||||||
|
.config = {
|
||||||
|
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
|
||||||
|
.get_tpc_count_in_gpc =
|
||||||
|
gm20b_gr_config_get_tpc_count_in_gpc,
|
||||||
|
.get_zcull_count_in_gpc =
|
||||||
|
gm20b_gr_config_get_zcull_count_in_gpc,
|
||||||
|
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
|
||||||
|
.get_pd_dist_skip_table_size =
|
||||||
|
gm20b_gr_config_get_pd_dist_skip_table_size,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
.fb = {
|
.fb = {
|
||||||
@@ -1173,6 +1183,7 @@ int tu104_init_hal(struct gk20a *g)
|
|||||||
gops->ce2 = tu104_ops.ce2;
|
gops->ce2 = tu104_ops.ce2;
|
||||||
gops->gr = tu104_ops.gr;
|
gops->gr = tu104_ops.gr;
|
||||||
gops->gr.ctxsw_prog = tu104_ops.gr.ctxsw_prog;
|
gops->gr.ctxsw_prog = tu104_ops.gr.ctxsw_prog;
|
||||||
|
gops->gr.config = tu104_ops.gr.config;
|
||||||
gops->fb = tu104_ops.fb;
|
gops->fb = tu104_ops.fb;
|
||||||
gops->nvdec = tu104_ops.nvdec;
|
gops->nvdec = tu104_ops.nvdec;
|
||||||
gops->clock_gating = tu104_ops.clock_gating;
|
gops->clock_gating = tu104_ops.clock_gating;
|
||||||
|
|||||||
@@ -27,6 +27,7 @@
|
|||||||
#include <nvgpu/gk20a.h>
|
#include <nvgpu/gk20a.h>
|
||||||
#include <nvgpu/channel.h>
|
#include <nvgpu/channel.h>
|
||||||
#include <nvgpu/gr/ctx.h>
|
#include <nvgpu/gr/ctx.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
#include <nvgpu/log.h>
|
#include <nvgpu/log.h>
|
||||||
|
|
||||||
#include "vgpu/gm20b/vgpu_gr_gm20b.h"
|
#include "vgpu/gm20b/vgpu_gr_gm20b.h"
|
||||||
@@ -128,7 +129,7 @@ int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
|
|||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||||
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
|
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
||||||
g->gr.max_tpc_count;
|
nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
||||||
struct nvgpu_mem *desc;
|
struct nvgpu_mem *desc;
|
||||||
|
|
||||||
attrib_cb_size = ALIGN(attrib_cb_size, 128);
|
attrib_cb_size = ALIGN(attrib_cb_size, 128);
|
||||||
|
|||||||
@@ -30,6 +30,7 @@
|
|||||||
#include "common/netlist/netlist_gp10b.h"
|
#include "common/netlist/netlist_gp10b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||||
|
#include "common/gr/config/gr_config_gm20b.h"
|
||||||
#include "common/therm/therm_gm20b.h"
|
#include "common/therm/therm_gm20b.h"
|
||||||
#include "common/therm/therm_gp10b.h"
|
#include "common/therm/therm_gp10b.h"
|
||||||
#include "common/ltc/ltc_gm20b.h"
|
#include "common/ltc/ltc_gm20b.h"
|
||||||
@@ -128,7 +129,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
|||||||
.falcon_load_ucode = NULL,
|
.falcon_load_ucode = NULL,
|
||||||
.load_ctxsw_ucode = NULL,
|
.load_ctxsw_ucode = NULL,
|
||||||
.set_gpc_tpc_mask = NULL,
|
.set_gpc_tpc_mask = NULL,
|
||||||
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
|
|
||||||
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
|
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
|
||||||
.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
|
.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
|
||||||
.get_zcull_info = vgpu_gr_get_zcull_info,
|
.get_zcull_info = vgpu_gr_get_zcull_info,
|
||||||
@@ -304,6 +304,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
|||||||
.set_full_preemption_ptr =
|
.set_full_preemption_ptr =
|
||||||
gp10b_ctxsw_prog_set_full_preemption_ptr,
|
gp10b_ctxsw_prog_set_full_preemption_ptr,
|
||||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||||
|
},
|
||||||
|
.config = {
|
||||||
|
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
.fb = {
|
.fb = {
|
||||||
@@ -659,6 +662,7 @@ int vgpu_gp10b_init_hal(struct gk20a *g)
|
|||||||
gops->ce2 = vgpu_gp10b_ops.ce2;
|
gops->ce2 = vgpu_gp10b_ops.ce2;
|
||||||
gops->gr = vgpu_gp10b_ops.gr;
|
gops->gr = vgpu_gp10b_ops.gr;
|
||||||
gops->gr.ctxsw_prog = vgpu_gp10b_ops.gr.ctxsw_prog;
|
gops->gr.ctxsw_prog = vgpu_gp10b_ops.gr.ctxsw_prog;
|
||||||
|
gops->gr.config = vgpu_gp10b_ops.gr.config;
|
||||||
gops->fb = vgpu_gp10b_ops.fb;
|
gops->fb = vgpu_gp10b_ops.fb;
|
||||||
gops->clock_gating = vgpu_gp10b_ops.clock_gating;
|
gops->clock_gating = vgpu_gp10b_ops.clock_gating;
|
||||||
gops->fifo = vgpu_gp10b_ops.fifo;
|
gops->fifo = vgpu_gp10b_ops.fifo;
|
||||||
|
|||||||
@@ -36,6 +36,7 @@
|
|||||||
#include <nvgpu/string.h>
|
#include <nvgpu/string.h>
|
||||||
#include <nvgpu/gr/global_ctx.h>
|
#include <nvgpu/gr/global_ctx.h>
|
||||||
#include <nvgpu/gr/ctx.h>
|
#include <nvgpu/gr/ctx.h>
|
||||||
|
#include <nvgpu/gr/config.h>
|
||||||
|
|
||||||
#include "gr_vgpu.h"
|
#include "gr_vgpu.h"
|
||||||
#include "gk20a/fecs_trace_gk20a.h"
|
#include "gk20a/fecs_trace_gk20a.h"
|
||||||
@@ -582,6 +583,7 @@ out:
|
|||||||
static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
||||||
{
|
{
|
||||||
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
|
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
|
||||||
|
struct nvgpu_gr_config *config;
|
||||||
u32 gpc_index;
|
u32 gpc_index;
|
||||||
u32 sm_per_tpc;
|
u32 sm_per_tpc;
|
||||||
u32 pes_index;
|
u32 pes_index;
|
||||||
@@ -589,79 +591,87 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
|||||||
|
|
||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
gr->max_gpc_count = priv->constants.max_gpc_count;
|
gr->config = nvgpu_kzalloc(g, sizeof(*gr->config));
|
||||||
gr->gpc_count = priv->constants.gpc_count;
|
if (gr->config == NULL) {
|
||||||
gr->max_tpc_per_gpc_count = priv->constants.max_tpc_per_gpc_count;
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
|
config = gr->config;
|
||||||
|
|
||||||
gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32));
|
config->max_gpc_count = priv->constants.max_gpc_count;
|
||||||
if (!gr->gpc_tpc_count) {
|
config->gpc_count = priv->constants.gpc_count;
|
||||||
|
config->max_tpc_per_gpc_count = priv->constants.max_tpc_per_gpc_count;
|
||||||
|
|
||||||
|
config->max_tpc_count = config->max_gpc_count * config->max_tpc_per_gpc_count;
|
||||||
|
|
||||||
|
config->gpc_tpc_count = nvgpu_kzalloc(g, config->gpc_count * sizeof(u32));
|
||||||
|
if (!config->gpc_tpc_count) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32));
|
config->gpc_tpc_mask = nvgpu_kzalloc(g, config->gpc_count * sizeof(u32));
|
||||||
if (!gr->gpc_tpc_mask) {
|
if (!config->gpc_tpc_mask) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
sm_per_tpc = priv->constants.sm_per_tpc;
|
sm_per_tpc = priv->constants.sm_per_tpc;
|
||||||
gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count *
|
gr->sm_to_cluster = nvgpu_kzalloc(g, config->gpc_count *
|
||||||
gr->max_tpc_per_gpc_count *
|
config->max_tpc_per_gpc_count *
|
||||||
sm_per_tpc *
|
sm_per_tpc *
|
||||||
sizeof(struct sm_info));
|
sizeof(struct sm_info));
|
||||||
if (!gr->sm_to_cluster) {
|
if (!gr->sm_to_cluster) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
gr->tpc_count = 0;
|
config->tpc_count = 0;
|
||||||
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
gr->gpc_tpc_count[gpc_index] =
|
config->gpc_tpc_count[gpc_index] =
|
||||||
priv->constants.gpc_tpc_count[gpc_index];
|
priv->constants.gpc_tpc_count[gpc_index];
|
||||||
|
|
||||||
gr->tpc_count += gr->gpc_tpc_count[gpc_index];
|
config->tpc_count += config->gpc_tpc_count[gpc_index];
|
||||||
|
|
||||||
if (g->ops.gr.get_gpc_tpc_mask) {
|
if (g->ops.gr.config.get_gpc_tpc_mask) {
|
||||||
gr->gpc_tpc_mask[gpc_index] =
|
gr->config->gpc_tpc_mask[gpc_index] =
|
||||||
g->ops.gr.get_gpc_tpc_mask(g, gpc_index);
|
g->ops.gr.config.get_gpc_tpc_mask(g,
|
||||||
|
g->gr.config, gpc_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gr->pe_count_per_gpc =
|
config->pe_count_per_gpc =
|
||||||
nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
|
nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
|
||||||
if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC,
|
if (WARN(config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC,
|
||||||
"too many pes per gpc %u\n", gr->pe_count_per_gpc)) {
|
"too many pes per gpc %u\n", config->pe_count_per_gpc)) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
if (gr->pe_count_per_gpc > TEGRA_VGPU_MAX_PES_COUNT_PER_GPC) {
|
if (config->pe_count_per_gpc > TEGRA_VGPU_MAX_PES_COUNT_PER_GPC) {
|
||||||
nvgpu_err(g, "pe_count_per_gpc %d is too big!",
|
nvgpu_err(g, "pe_count_per_gpc %d is too big!",
|
||||||
gr->pe_count_per_gpc);
|
config->pe_count_per_gpc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gr->gpc_ppc_count == NULL) {
|
if (config->gpc_ppc_count == NULL) {
|
||||||
gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count *
|
config->gpc_ppc_count = nvgpu_kzalloc(g, config->gpc_count *
|
||||||
sizeof(u32));
|
sizeof(u32));
|
||||||
} else {
|
} else {
|
||||||
(void) memset(gr->gpc_ppc_count, 0, gr->gpc_count *
|
(void) memset(config->gpc_ppc_count, 0, config->gpc_count *
|
||||||
sizeof(u32));
|
sizeof(u32));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||||
gr->gpc_ppc_count[gpc_index] =
|
config->gpc_ppc_count[gpc_index] =
|
||||||
priv->constants.gpc_ppc_count[gpc_index];
|
priv->constants.gpc_ppc_count[gpc_index];
|
||||||
|
|
||||||
for (pes_index = 0u; pes_index < gr->pe_count_per_gpc;
|
for (pes_index = 0u; pes_index < config->pe_count_per_gpc;
|
||||||
pes_index++) {
|
pes_index++) {
|
||||||
u32 pes_tpc_count, pes_tpc_mask;
|
u32 pes_tpc_count, pes_tpc_mask;
|
||||||
|
|
||||||
if (gr->pes_tpc_count[pes_index] == NULL) {
|
if (config->pes_tpc_count[pes_index] == NULL) {
|
||||||
gr->pes_tpc_count[pes_index] = nvgpu_kzalloc(g,
|
config->pes_tpc_count[pes_index] = nvgpu_kzalloc(g,
|
||||||
gr->gpc_count * sizeof(u32));
|
config->gpc_count * sizeof(u32));
|
||||||
gr->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g,
|
config->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g,
|
||||||
gr->gpc_count * sizeof(u32));
|
config->gpc_count * sizeof(u32));
|
||||||
if (gr->pes_tpc_count[pes_index] == NULL ||
|
if (config->pes_tpc_count[pes_index] == NULL ||
|
||||||
gr->pes_tpc_mask[pes_index] == NULL) {
|
config->pes_tpc_mask[pes_index] == NULL) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -672,8 +682,8 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
|||||||
pes_tpc_mask = priv->constants.
|
pes_tpc_mask = priv->constants.
|
||||||
pes_tpc_mask[TEGRA_VGPU_MAX_PES_COUNT_PER_GPC *
|
pes_tpc_mask[TEGRA_VGPU_MAX_PES_COUNT_PER_GPC *
|
||||||
gpc_index + pes_index];
|
gpc_index + pes_index];
|
||||||
gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
|
config->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
|
||||||
gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
|
config->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -688,21 +698,21 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
|||||||
cleanup:
|
cleanup:
|
||||||
nvgpu_err(g, "out of memory");
|
nvgpu_err(g, "out of memory");
|
||||||
|
|
||||||
for (pes_index = 0u; pes_index < gr->pe_count_per_gpc; pes_index++) {
|
for (pes_index = 0u; pes_index < config->pe_count_per_gpc; pes_index++) {
|
||||||
nvgpu_kfree(g, gr->pes_tpc_count[pes_index]);
|
nvgpu_kfree(g, config->pes_tpc_count[pes_index]);
|
||||||
gr->pes_tpc_count[pes_index] = NULL;
|
config->pes_tpc_count[pes_index] = NULL;
|
||||||
nvgpu_kfree(g, gr->pes_tpc_mask[pes_index]);
|
nvgpu_kfree(g, config->pes_tpc_mask[pes_index]);
|
||||||
gr->pes_tpc_mask[pes_index] = NULL;
|
config->pes_tpc_mask[pes_index] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
nvgpu_kfree(g, gr->gpc_ppc_count);
|
nvgpu_kfree(g, config->gpc_ppc_count);
|
||||||
gr->gpc_ppc_count = NULL;
|
config->gpc_ppc_count = NULL;
|
||||||
|
|
||||||
nvgpu_kfree(g, gr->gpc_tpc_count);
|
nvgpu_kfree(g, config->gpc_tpc_count);
|
||||||
gr->gpc_tpc_count = NULL;
|
config->gpc_tpc_count = NULL;
|
||||||
|
|
||||||
nvgpu_kfree(g, gr->gpc_tpc_mask);
|
nvgpu_kfree(g, config->gpc_tpc_mask);
|
||||||
gr->gpc_tpc_mask = NULL;
|
config->gpc_tpc_mask = NULL;
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@@ -759,7 +769,8 @@ int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index)
|
||||||
{
|
{
|
||||||
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
|
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
|
||||||
|
|
||||||
@@ -907,15 +918,11 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr)
|
|||||||
|
|
||||||
gk20a_comptag_allocator_destroy(gr->g, &gr->comp_tags);
|
gk20a_comptag_allocator_destroy(gr->g, &gr->comp_tags);
|
||||||
|
|
||||||
nvgpu_kfree(gr->g, gr->gpc_tpc_mask);
|
nvgpu_gr_config_deinit(gr->g, gr->config);
|
||||||
gr->gpc_tpc_mask = NULL;
|
|
||||||
|
|
||||||
nvgpu_kfree(gr->g, gr->sm_to_cluster);
|
nvgpu_kfree(gr->g, gr->sm_to_cluster);
|
||||||
gr->sm_to_cluster = NULL;
|
gr->sm_to_cluster = NULL;
|
||||||
|
|
||||||
nvgpu_kfree(gr->g, gr->gpc_tpc_count);
|
|
||||||
gr->gpc_tpc_count = NULL;
|
|
||||||
|
|
||||||
nvgpu_kfree(gr->g, gr->fbp_rop_l2_en_mask);
|
nvgpu_kfree(gr->g, gr->fbp_rop_l2_en_mask);
|
||||||
gr->fbp_rop_l2_en_mask = NULL;
|
gr->fbp_rop_l2_en_mask = NULL;
|
||||||
}
|
}
|
||||||
@@ -1353,6 +1360,7 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g)
|
|||||||
struct sm_info *sm_info;
|
struct sm_info *sm_info;
|
||||||
int err;
|
int err;
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
struct nvgpu_gr_config *config = gr->config;
|
||||||
size_t oob_size;
|
size_t oob_size;
|
||||||
void *handle = NULL;
|
void *handle = NULL;
|
||||||
u32 sm_id;
|
u32 sm_id;
|
||||||
@@ -1374,8 +1382,8 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
max_sm = gr->gpc_count *
|
max_sm = config->gpc_count *
|
||||||
gr->max_tpc_per_gpc_count *
|
config->max_tpc_per_gpc_count *
|
||||||
priv->constants.sm_per_tpc;
|
priv->constants.sm_per_tpc;
|
||||||
if (p->num_sm > max_sm) {
|
if (p->num_sm > max_sm) {
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|||||||
@@ -43,7 +43,8 @@ int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
|
|||||||
u32 mode);
|
u32 mode);
|
||||||
int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
|
int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
|
||||||
struct gr_zcull_info *zcull_params);
|
struct gr_zcull_info *zcull_params);
|
||||||
u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
|
u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config,
|
||||||
|
u32 gpc_index);
|
||||||
u32 vgpu_gr_get_max_fbps_count(struct gk20a *g);
|
u32 vgpu_gr_get_max_fbps_count(struct gk20a *g);
|
||||||
u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g);
|
u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g);
|
||||||
u32 vgpu_gr_get_max_ltc_per_fbp(struct gk20a *g);
|
u32 vgpu_gr_get_max_ltc_per_fbp(struct gk20a *g);
|
||||||
|
|||||||
@@ -143,7 +143,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
|||||||
.falcon_load_ucode = NULL,
|
.falcon_load_ucode = NULL,
|
||||||
.load_ctxsw_ucode = NULL,
|
.load_ctxsw_ucode = NULL,
|
||||||
.set_gpc_tpc_mask = NULL,
|
.set_gpc_tpc_mask = NULL,
|
||||||
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
|
|
||||||
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
|
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
|
||||||
.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
|
.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
|
||||||
.get_zcull_info = vgpu_gr_get_zcull_info,
|
.get_zcull_info = vgpu_gr_get_zcull_info,
|
||||||
@@ -351,6 +350,9 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
|||||||
.set_type_per_veid_header =
|
.set_type_per_veid_header =
|
||||||
gv11b_ctxsw_prog_set_type_per_veid_header,
|
gv11b_ctxsw_prog_set_type_per_veid_header,
|
||||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||||
|
},
|
||||||
|
.config = {
|
||||||
|
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
.fb = {
|
.fb = {
|
||||||
@@ -736,6 +738,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g)
|
|||||||
gops->ce2 = vgpu_gv11b_ops.ce2;
|
gops->ce2 = vgpu_gv11b_ops.ce2;
|
||||||
gops->gr = vgpu_gv11b_ops.gr;
|
gops->gr = vgpu_gv11b_ops.gr;
|
||||||
gops->gr.ctxsw_prog = vgpu_gv11b_ops.gr.ctxsw_prog;
|
gops->gr.ctxsw_prog = vgpu_gv11b_ops.gr.ctxsw_prog;
|
||||||
|
gops->gr.config = vgpu_gv11b_ops.gr.config;
|
||||||
gops->fb = vgpu_gv11b_ops.fb;
|
gops->fb = vgpu_gv11b_ops.fb;
|
||||||
gops->clock_gating = vgpu_gv11b_ops.clock_gating;
|
gops->clock_gating = vgpu_gv11b_ops.clock_gating;
|
||||||
gops->fifo = vgpu_gv11b_ops.fifo;
|
gops->fifo = vgpu_gv11b_ops.fifo;
|
||||||
|
|||||||
Reference in New Issue
Block a user