gpu: nvgpu: read fuse reg using physical gpc-id

Fuse registers should be queried with physical gpc-id and not the
logical ones. For tu104 and before chips physical gpc-ids are same as
logical for non-floorswept config but for newer chips it may differ.
Also, logical to physical mapping is not present for a floorswept gpc so
query gpc_tpc mask only upto actual gpcs that are present.

Jira NVGPU-6080

Change-Id: I84c4a3c1f256fdd1927f4365af26e9892fe91beb
Signed-off-by: shashank singh <shashsingh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2417721
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
shashank singh
2020-09-22 11:40:38 +05:30
committed by Alex Waterman
parent 78fb67bb0b
commit d003fa57df
4 changed files with 67 additions and 40 deletions

View File

@@ -25,6 +25,8 @@
#include <nvgpu/gr/config.h> #include <nvgpu/gr/config.h>
#include <nvgpu/gr/fs_state.h> #include <nvgpu/gr/fs_state.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/grmgr.h>
static int gr_load_sm_id_config(struct gk20a *g, struct nvgpu_gr_config *config) static int gr_load_sm_id_config(struct gk20a *g, struct nvgpu_gr_config *config)
{ {
@@ -57,6 +59,8 @@ static void gr_load_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config)
u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(config); u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(config);
u32 fuse_tpc_mask; u32 fuse_tpc_mask;
u32 val; u32 val;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_phys_id;
#endif #endif
/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */ /* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
@@ -74,7 +78,15 @@ static void gr_load_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config)
#ifdef CONFIG_NVGPU_NON_FUSA #ifdef CONFIG_NVGPU_NON_FUSA
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, 0); /*
* Fuse registers must be queried with physical gpc-id and not
* the logical ones. For tu104 and before chips logical gpc-id
* is same as physical gpc-id for non-floorswept config but for
* chips after tu104 it may not be true.
*/
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
cur_gr_instance, 0U);
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
if ((g->tpc_fs_mask_user != 0U) && if ((g->tpc_fs_mask_user != 0U) &&
(g->tpc_fs_mask_user != fuse_tpc_mask)) { (g->tpc_fs_mask_user != fuse_tpc_mask)) {
if (fuse_tpc_mask == nvgpu_safe_sub_u32(BIT32(max_tpc_count), if (fuse_tpc_mask == nvgpu_safe_sub_u32(BIT32(max_tpc_count),
@@ -102,6 +114,8 @@ int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config)
#ifdef CONFIG_NVGPU_NON_FUSA #ifdef CONFIG_NVGPU_NON_FUSA
u32 fuse_tpc_mask; u32 fuse_tpc_mask;
u32 max_tpc_cnt; u32 max_tpc_cnt;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_phys_id;
#endif #endif
u32 gpc_cnt, tpc_cnt; u32 gpc_cnt, tpc_cnt;
u32 num_sm; u32 num_sm;
@@ -145,7 +159,15 @@ int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config)
#ifdef CONFIG_NVGPU_NON_FUSA #ifdef CONFIG_NVGPU_NON_FUSA
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, 0); /*
* Fuse registers must be queried with physical gpc-id and not
* the logical ones. For tu104 and before chips logical gpc-id
* is same as physical gpc-id for non-floorswept config but for
* chips after tu104 it may not be true.
*/
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
cur_gr_instance, 0U);
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
max_tpc_cnt = nvgpu_gr_config_get_max_tpc_count(config); max_tpc_cnt = nvgpu_gr_config_get_max_tpc_count(config);
if ((g->tpc_fs_mask_user != 0U) && if ((g->tpc_fs_mask_user != 0U) &&

View File

@@ -209,7 +209,7 @@ static bool gr_config_alloc_struct_mem(struct gk20a *g,
u32 pes_index; u32 pes_index;
u32 total_tpc_cnt; u32 total_tpc_cnt;
size_t sm_info_size; size_t sm_info_size;
size_t gpc_size, sm_size; size_t gpc_size, sm_size, max_gpc_cnt;
size_t pd_tbl_size; size_t pd_tbl_size;
total_tpc_cnt = nvgpu_safe_mult_u32(config->gpc_count, total_tpc_cnt = nvgpu_safe_mult_u32(config->gpc_count,
@@ -238,7 +238,9 @@ static bool gr_config_alloc_struct_mem(struct gk20a *g,
config->no_of_sm = 0; config->no_of_sm = 0;
gpc_size = nvgpu_safe_mult_u64((size_t)config->gpc_count, sizeof(u32)); gpc_size = nvgpu_safe_mult_u64((size_t)config->gpc_count, sizeof(u32));
max_gpc_cnt = nvgpu_safe_mult_u64((size_t)config->max_gpc_count, sizeof(u32));
config->gpc_tpc_count = nvgpu_kzalloc(g, gpc_size); config->gpc_tpc_count = nvgpu_kzalloc(g, gpc_size);
config->gpc_tpc_mask = nvgpu_kzalloc(g, max_gpc_cnt);
#ifdef CONFIG_NVGPU_GRAPHICS #ifdef CONFIG_NVGPU_GRAPHICS
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
config->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, config->max_zcull_per_gpc_count = nvgpu_get_litter_value(g,
@@ -289,8 +291,6 @@ static int gr_config_init_mig_gpcs(struct nvgpu_gr_config *config)
{ {
struct gk20a *g = config->g; struct gk20a *g = config->g;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g); u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_phys_id;
u32 gpc_id;
config->max_gpc_count = nvgpu_grmgr_get_gr_num_gpcs(g, cur_gr_instance); config->max_gpc_count = nvgpu_grmgr_get_gr_num_gpcs(g, cur_gr_instance);
config->gpc_count = nvgpu_grmgr_get_gr_num_gpcs(g, cur_gr_instance); config->gpc_count = nvgpu_grmgr_get_gr_num_gpcs(g, cur_gr_instance);
@@ -301,29 +301,12 @@ static int gr_config_init_mig_gpcs(struct nvgpu_gr_config *config)
config->gpc_mask = nvgpu_safe_sub_u32(BIT32(config->gpc_count), 1U); config->gpc_mask = nvgpu_safe_sub_u32(BIT32(config->gpc_count), 1U);
config->gpc_tpc_mask = nvgpu_kzalloc(g, config->max_gpc_count * sizeof(u32));
if (config->gpc_tpc_mask == NULL) {
return -ENOMEM;
}
/* Required to read gpc_tpc_mask below */
config->max_tpc_per_gpc_count = g->ops.top.get_max_tpc_per_gpc_count(g);
/* Fuse regsiters index GPCs by physical ID */
for (gpc_id = 0; gpc_id < config->gpc_count; gpc_id++) {
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
cur_gr_instance, gpc_id);
config->gpc_tpc_mask[gpc_id] =
g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
}
return 0; return 0;
} }
static int gr_config_init_gpcs(struct nvgpu_gr_config *config) static int gr_config_init_gpcs(struct nvgpu_gr_config *config)
{ {
struct gk20a *g = config->g; struct gk20a *g = config->g;
u32 gpc_index;
config->max_gpc_count = g->ops.top.get_max_gpc_count(g); config->max_gpc_count = g->ops.top.get_max_gpc_count(g);
config->gpc_count = g->ops.priv_ring.get_gpc_count(g); config->gpc_count = g->ops.priv_ring.get_gpc_count(g);
@@ -334,26 +317,15 @@ static int gr_config_init_gpcs(struct nvgpu_gr_config *config)
gr_config_set_gpc_mask(g, config); gr_config_set_gpc_mask(g, config);
config->gpc_tpc_mask = nvgpu_kzalloc(g, config->max_gpc_count * sizeof(u32));
if (config->gpc_tpc_mask == NULL) {
return -ENOMEM;
}
/* Required to read gpc_tpc_mask below */
config->max_tpc_per_gpc_count = g->ops.top.get_max_tpc_per_gpc_count(g);
for (gpc_index = 0; gpc_index < config->max_gpc_count; gpc_index++) {
config->gpc_tpc_mask[gpc_index] =
g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_index);
}
return 0; return 0;
} }
struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g) struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
{ {
struct nvgpu_gr_config *config; struct nvgpu_gr_config *config;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_index; u32 gpc_index;
u32 gpc_phys_id;
int err; int err;
config = nvgpu_kzalloc(g, sizeof(*config)); config = nvgpu_kzalloc(g, sizeof(*config));
@@ -367,16 +339,21 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
err = gr_config_init_mig_gpcs(config); err = gr_config_init_mig_gpcs(config);
if (err < 0) { if (err < 0) {
nvgpu_err(g, "MIG GPC config init failed"); nvgpu_err(g, "MIG GPC config init failed");
nvgpu_kfree(g, config);
return NULL; return NULL;
} }
} else { } else {
err = gr_config_init_gpcs(config); err = gr_config_init_gpcs(config);
if (err < 0) { if (err < 0) {
nvgpu_err(g, "GPC config init failed"); nvgpu_err(g, "GPC config init failed");
nvgpu_kfree(g, config);
return NULL; return NULL;
} }
} }
/* Required to read gpc_tpc_mask below */
config->max_tpc_per_gpc_count = g->ops.top.get_max_tpc_per_gpc_count(g);
config->max_tpc_count = nvgpu_safe_mult_u32(config->max_gpc_count, config->max_tpc_count = nvgpu_safe_mult_u32(config->max_gpc_count,
config->max_tpc_per_gpc_count); config->max_tpc_per_gpc_count);
@@ -398,6 +375,19 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
goto clean_up_init; goto clean_up_init;
} }
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
/*
* Fuse registers must be queried with physical gpc-id and not
* the logical ones. For tu104 and before chips logical gpc-id
* is same as physical gpc-id for non-floorswept config but for
* chips after tu104 it may not be true.
*/
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
cur_gr_instance, gpc_index);
config->gpc_tpc_mask[gpc_index] =
g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
}
config->ppc_count = 0; config->ppc_count = 0;
config->tpc_count = 0; config->tpc_count = 0;
#ifdef CONFIG_NVGPU_GRAPHICS #ifdef CONFIG_NVGPU_GRAPHICS

View File

@@ -34,6 +34,8 @@ int nvgpu_init_gr_manager(struct gk20a *g)
u32 gpc_id; u32 gpc_id;
struct nvgpu_gpu_instance *gpu_instance = &g->mig.gpu_instance[0]; struct nvgpu_gpu_instance *gpu_instance = &g->mig.gpu_instance[0];
struct nvgpu_gr_syspipe *gr_syspipe = &gpu_instance->gr_syspipe; struct nvgpu_gr_syspipe *gr_syspipe = &gpu_instance->gr_syspipe;
u32 local_gpc_mask;
u32 ffs_bit = 0U;
/* Number of gpu instance is 1 for legacy mode */ /* Number of gpu instance is 1 for legacy mode */
g->mig.gpc_count = g->ops.priv_ring.get_gpc_count(g); g->mig.gpc_count = g->ops.priv_ring.get_gpc_count(g);
@@ -69,14 +71,21 @@ int nvgpu_init_gr_manager(struct gk20a *g)
} }
} else { } else {
/* /*
* For Legacy gpu, * For tu104 and before chips,
* Local GPC Id = physical GPC Id = Logical GPC Id. * Local GPC Id = physical GPC Id = Logical GPC Id for
* non-floorswept config else physical gpcs are assigned
* serially and floorswept gpcs are skipped.
*/ */
local_gpc_mask = gr_syspipe->gpc_mask;
for (gpc_id = 0U; gpc_id < gr_syspipe->num_gpc; gpc_id++) { for (gpc_id = 0U; gpc_id < gr_syspipe->num_gpc; gpc_id++) {
gr_syspipe->gpcs[gpc_id].logical_id = gr_syspipe->gpcs[gpc_id].logical_id = gpc_id;
gr_syspipe->gpcs[gpc_id].physical_id = gpc_id; nvgpu_assert(local_gpc_mask != 0U);
ffs_bit = nvgpu_ffs(local_gpc_mask) - 1U;
local_gpc_mask &= ~(1U << ffs_bit);
gr_syspipe->gpcs[gpc_id].physical_id = ffs_bit;
gr_syspipe->gpcs[gpc_id].gpcgrp_id = 0U; gr_syspipe->gpcs[gpc_id].gpcgrp_id = 0U;
} }
nvgpu_assert(local_gpc_mask == 0U);
} }
gr_syspipe->max_veid_count_per_tsg = g->fifo.max_subctx_count; gr_syspipe->max_veid_count_per_tsg = g->fifo.max_subctx_count;
gr_syspipe->veid_start_offset = 0U; gr_syspipe->veid_start_offset = 0U;

View File

@@ -28,6 +28,8 @@
#include <nvgpu/gr/gr_falcon.h> #include <nvgpu/gr/gr_falcon.h>
#include <nvgpu/gr/gr.h> #include <nvgpu/gr/gr.h>
#include <nvgpu/gr/gr_utils.h> #include <nvgpu/gr/gr_utils.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/grmgr.h>
#include <nvgpu/power_features/cg.h> #include <nvgpu/power_features/cg.h>
#include <nvgpu/power_features/pg.h> #include <nvgpu/power_features/pg.h>
#include <nvgpu/pmu/pmu_perfmon.h> #include <nvgpu/pmu/pmu_perfmon.h>
@@ -947,6 +949,8 @@ static ssize_t tpc_fs_mask_read(struct device *dev,
u32 gpc_index; u32 gpc_index;
u32 tpc_fs_mask = 0; u32 tpc_fs_mask = 0;
int err = 0; int err = 0;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_phys_id;
err = gk20a_busy(g); err = gk20a_busy(g);
if (err) if (err)
@@ -955,9 +959,11 @@ static ssize_t tpc_fs_mask_read(struct device *dev,
for (gpc_index = 0; for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr_config); gpc_index < nvgpu_gr_config_get_gpc_count(gr_config);
gpc_index++) { gpc_index++) {
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
cur_gr_instance, gpc_index);
if (g->ops.gr.config.get_gpc_tpc_mask) if (g->ops.gr.config.get_gpc_tpc_mask)
tpc_fs_mask |= tpc_fs_mask |=
g->ops.gr.config.get_gpc_tpc_mask(g, gr_config, gpc_index) << g->ops.gr.config.get_gpc_tpc_mask(g, gr_config, gpc_phys_id) <<
(nvgpu_gr_config_get_max_tpc_per_gpc_count(gr_config) * gpc_index); (nvgpu_gr_config_get_max_tpc_per_gpc_count(gr_config) * gpc_index);
} }