mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: Fix for pes_tpc_mask programming
After CONFIG_UBSAN kernel compilation flag to know any shifting cause overflow or not enablement ,this is identified. The register "gr_fe_tpc_fs_r(gpc_index)" is read only after Volta. The gops where we are computing the index is not needed. Bug 200727116 Change-Id: Ib2306103389ba9df77fd59d012ec70e775104989 Signed-off-by: dt <dt@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2573296 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
@@ -49,64 +49,6 @@ static int gr_load_sm_id_config(struct gk20a *g, struct nvgpu_gr_config *config)
|
||||
return err;
|
||||
}
|
||||
|
||||
static void gr_load_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config)
|
||||
{
|
||||
u32 pes_tpc_mask = 0;
|
||||
u32 gpc, pes;
|
||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_NUM_TPC_PER_GPC);
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(config);
|
||||
u32 fuse_tpc_mask;
|
||||
u32 val;
|
||||
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
|
||||
u32 gpc_phys_id;
|
||||
#endif
|
||||
|
||||
/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(config); gpc++) {
|
||||
for (pes = 0;
|
||||
pes < nvgpu_gr_config_get_pe_count_per_gpc(config);
|
||||
pes++) {
|
||||
pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
|
||||
config, gpc, pes) <<
|
||||
nvgpu_safe_mult_u32(num_tpc_per_gpc, gpc);
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask);
|
||||
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
/*
|
||||
* Fuse registers must be queried with physical gpc-id and not
|
||||
* the logical ones. For tu104 and before chips logical gpc-id
|
||||
* is same as physical gpc-id for non-floorswept config but for
|
||||
* chips after tu104 it may not be true.
|
||||
*/
|
||||
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
|
||||
cur_gr_instance, 0U);
|
||||
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
|
||||
if ((g->tpc_fs_mask_user != 0U) &&
|
||||
(g->tpc_fs_mask_user != fuse_tpc_mask)) {
|
||||
if (fuse_tpc_mask == nvgpu_safe_sub_u32(BIT32(max_tpc_count),
|
||||
U32(1))) {
|
||||
val = g->tpc_fs_mask_user;
|
||||
val &= nvgpu_safe_sub_u32(BIT32(max_tpc_count), U32(1));
|
||||
/*
|
||||
* skip tpc to disable the other tpc cause channel
|
||||
* timeout
|
||||
*/
|
||||
val = nvgpu_safe_sub_u32(BIT32(hweight32(val)), U32(1));
|
||||
pes_tpc_mask = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask);
|
||||
}
|
||||
|
||||
int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config)
|
||||
{
|
||||
u32 tpc_index, gpc_index;
|
||||
@@ -184,7 +126,9 @@ int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config)
|
||||
|
||||
g->ops.gr.init.cwd_gpcs_tpcs_num(g, gpc_cnt, tpc_cnt);
|
||||
|
||||
gr_load_tpc_mask(g, config);
|
||||
if (g->ops.gr.init.gr_load_tpc_mask != NULL) {
|
||||
g->ops.gr.init.gr_load_tpc_mask(g, config);
|
||||
}
|
||||
|
||||
err = gr_load_sm_id_config(g, config);
|
||||
if (err != 0) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -36,6 +36,7 @@
|
||||
|
||||
#include <nvgpu/gr/gr.h>
|
||||
#include <nvgpu/gr/config.h>
|
||||
#include <nvgpu/gr/gr_instances.h>
|
||||
|
||||
#include "gr_init_gm20b.h"
|
||||
|
||||
@@ -388,6 +389,65 @@ void gm20b_gr_init_rop_mapping(struct gk20a *g,
|
||||
}
|
||||
#endif
|
||||
|
||||
void gm20b_gr_init_load_tpc_mask(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config)
|
||||
{
|
||||
u32 pes_tpc_mask = 0;
|
||||
u32 gpc, pes;
|
||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_NUM_TPC_PER_GPC);
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(config);
|
||||
u32 fuse_tpc_mask;
|
||||
u32 val;
|
||||
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
|
||||
u32 gpc_phys_id;
|
||||
#endif
|
||||
/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(config); gpc++) {
|
||||
for (pes = 0;
|
||||
pes < nvgpu_gr_config_get_pe_count_per_gpc(config);
|
||||
pes++) {
|
||||
pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
|
||||
config, gpc, pes) <<
|
||||
nvgpu_safe_mult_u32(num_tpc_per_gpc, gpc);
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask);
|
||||
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
/*
|
||||
* Fuse registers must be queried with physical gpc-id and not
|
||||
* the logical ones. For tu104 and before chips logical gpc-id
|
||||
* is same as physical gpc-id for non-floorswept config but for
|
||||
* chips after tu104 it may not be true.
|
||||
*/
|
||||
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
|
||||
cur_gr_instance, 0U);
|
||||
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
|
||||
if ((g->tpc_fs_mask_user != 0U) &&
|
||||
(g->tpc_fs_mask_user != fuse_tpc_mask)) {
|
||||
if (fuse_tpc_mask == nvgpu_safe_sub_u32(BIT32(max_tpc_count),
|
||||
U32(1))) {
|
||||
val = g->tpc_fs_mask_user;
|
||||
val &= nvgpu_safe_sub_u32(BIT32(max_tpc_count), U32(1));
|
||||
/*
|
||||
* skip tpc to disable the other tpc cause channel
|
||||
* timeout
|
||||
*/
|
||||
val = nvgpu_safe_sub_u32(BIT32(hweight32(val)), U32(1));
|
||||
pes_tpc_mask = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask);
|
||||
|
||||
}
|
||||
|
||||
void gm20b_gr_init_fs_state(struct gk20a *g)
|
||||
{
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
@@ -49,6 +49,8 @@ void gm20b_gr_init_pd_skip_table_gpc(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config);
|
||||
void gm20b_gr_init_cwd_gpcs_tpcs_num(struct gk20a *g,
|
||||
u32 gpc_count, u32 tpc_count);
|
||||
void gm20b_gr_init_load_tpc_mask(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config);
|
||||
int gm20b_gr_init_wait_idle(struct gk20a *g);
|
||||
int gm20b_gr_init_wait_fe_idle(struct gk20a *g);
|
||||
int gm20b_gr_init_fe_pwr_mode_force_on(struct gk20a *g, bool force_on);
|
||||
|
||||
@@ -53,7 +53,6 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
|
||||
struct nvgpu_gr_config *gr_config,
|
||||
struct nvgpu_gr_ctx *gr_ctx,
|
||||
bool patch);
|
||||
void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask);
|
||||
void gv11b_gr_init_fs_state(struct gk20a *g);
|
||||
|
||||
void gv11b_gr_init_commit_global_timeslice(struct gk20a *g);
|
||||
|
||||
@@ -529,11 +529,6 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask)
|
||||
{
|
||||
nvgpu_writel(g, gr_fe_tpc_fs_r(gpc_index), pes_tpc_mask);
|
||||
}
|
||||
|
||||
void gv11b_gr_init_fs_state(struct gk20a *g)
|
||||
{
|
||||
u32 data;
|
||||
|
||||
@@ -597,11 +597,12 @@ static const struct gops_gr_init ga100_ops_gr_init = {
|
||||
.sm_id_config_early = nvgpu_gr_init_sm_id_early_config,
|
||||
.sm_id_config = gv11b_gr_init_sm_id_config,
|
||||
.sm_id_numbering = ga10b_gr_init_sm_id_numbering,
|
||||
.tpc_mask = gv11b_gr_init_tpc_mask,
|
||||
.tpc_mask = NULL,
|
||||
.fs_state = ga10b_gr_init_fs_state,
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
|
||||
.gr_load_tpc_mask = NULL,
|
||||
.wait_empty = ga10b_gr_init_wait_empty,
|
||||
.wait_idle = ga10b_gr_init_wait_idle,
|
||||
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
|
||||
|
||||
@@ -575,11 +575,12 @@ static const struct gops_gr_init ga10b_ops_gr_init = {
|
||||
.sm_id_config_early = nvgpu_gr_init_sm_id_early_config,
|
||||
.sm_id_config = gv11b_gr_init_sm_id_config,
|
||||
.sm_id_numbering = ga10b_gr_init_sm_id_numbering,
|
||||
.tpc_mask = gv11b_gr_init_tpc_mask,
|
||||
.tpc_mask = NULL,
|
||||
.fs_state = ga10b_gr_init_fs_state,
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
|
||||
.gr_load_tpc_mask = NULL,
|
||||
.wait_empty = ga10b_gr_init_wait_empty,
|
||||
.wait_idle = ga10b_gr_init_wait_idle,
|
||||
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
|
||||
|
||||
@@ -327,6 +327,7 @@ static const struct gops_gr_init gm20b_ops_gr_init = {
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
|
||||
.gr_load_tpc_mask = gm20b_gr_init_load_tpc_mask,
|
||||
.wait_empty = gm20b_gr_init_wait_idle,
|
||||
.wait_idle = gm20b_gr_init_wait_idle,
|
||||
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
|
||||
|
||||
@@ -394,6 +394,7 @@ static const struct gops_gr_init gp10b_ops_gr_init = {
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
|
||||
.gr_load_tpc_mask = gm20b_gr_init_load_tpc_mask,
|
||||
.wait_empty = gp10b_gr_init_wait_empty,
|
||||
.wait_idle = gm20b_gr_init_wait_idle,
|
||||
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
|
||||
|
||||
@@ -478,11 +478,12 @@ static const struct gops_gr_init gv11b_ops_gr_init = {
|
||||
.get_sm_id_size = gp10b_gr_init_get_sm_id_size,
|
||||
.sm_id_config = gv11b_gr_init_sm_id_config,
|
||||
.sm_id_numbering = gv11b_gr_init_sm_id_numbering,
|
||||
.tpc_mask = gv11b_gr_init_tpc_mask,
|
||||
.tpc_mask = NULL,
|
||||
.fs_state = gv11b_gr_init_fs_state,
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
|
||||
.gr_load_tpc_mask = NULL,
|
||||
.wait_empty = gp10b_gr_init_wait_empty,
|
||||
.wait_idle = gm20b_gr_init_wait_idle,
|
||||
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
|
||||
|
||||
@@ -523,11 +523,12 @@ static const struct gops_gr_init tu104_ops_gr_init = {
|
||||
.get_sm_id_size = gp10b_gr_init_get_sm_id_size,
|
||||
.sm_id_config = gv11b_gr_init_sm_id_config,
|
||||
.sm_id_numbering = gv11b_gr_init_sm_id_numbering,
|
||||
.tpc_mask = gv11b_gr_init_tpc_mask,
|
||||
.tpc_mask = NULL,
|
||||
.fs_state = gv11b_gr_init_fs_state,
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
|
||||
.gr_load_tpc_mask = NULL,
|
||||
.wait_empty = gp10b_gr_init_wait_empty,
|
||||
.wait_idle = gm20b_gr_init_wait_idle,
|
||||
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
|
||||
|
||||
@@ -752,6 +752,8 @@ struct gops_gr_init {
|
||||
struct nvgpu_gr_config *gr_config);
|
||||
void (*cwd_gpcs_tpcs_num)(struct gk20a *g,
|
||||
u32 gpc_count, u32 tpc_count);
|
||||
void (*gr_load_tpc_mask)(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config);
|
||||
int (*wait_empty)(struct gk20a *g);
|
||||
void (*override_context_reset)(struct gk20a *g);
|
||||
void (*fe_go_idle_timeout)(struct gk20a *g, bool enable);
|
||||
|
||||
Reference in New Issue
Block a user