From 152d7c9edd09a1faa94b6cb9aaa1a4eff904b3bc Mon Sep 17 00:00:00 2001 From: dt Date: Mon, 9 Aug 2021 18:20:53 +0000 Subject: [PATCH] gpu: nvgpu: Fix for pes_tpc_mask programming After CONFIG_UBSAN kernel compilation flag to know any shifting cause overflow or not enablement ,this is identified. The register "gr_fe_tpc_fs_r(gpc_index)" is read only after Volta. The gops where we are computing the index is not needed. Bug 200727116 Change-Id: Ib2306103389ba9df77fd59d012ec70e775104989 Signed-off-by: dt Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2573296 Tested-by: mobile promotions Reviewed-by: mobile promotions --- drivers/gpu/nvgpu/common/gr/fs_state.c | 62 +------------------ drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c | 62 ++++++++++++++++++- drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h | 2 + drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h | 1 - .../nvgpu/hal/gr/init/gr_init_gv11b_fusa.c | 5 -- drivers/gpu/nvgpu/hal/init/hal_ga100.c | 3 +- drivers/gpu/nvgpu/hal/init/hal_ga10b.c | 3 +- drivers/gpu/nvgpu/hal/init/hal_gm20b.c | 1 + drivers/gpu/nvgpu/hal/init/hal_gp10b.c | 1 + drivers/gpu/nvgpu/hal/init/hal_gv11b.c | 3 +- drivers/gpu/nvgpu/hal/init/hal_tu104.c | 3 +- drivers/gpu/nvgpu/include/nvgpu/gops/gr.h | 2 + 12 files changed, 78 insertions(+), 70 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/fs_state.c b/drivers/gpu/nvgpu/common/gr/fs_state.c index d9ac20984..847a59957 100644 --- a/drivers/gpu/nvgpu/common/gr/fs_state.c +++ b/drivers/gpu/nvgpu/common/gr/fs_state.c @@ -49,64 +49,6 @@ static int gr_load_sm_id_config(struct gk20a *g, struct nvgpu_gr_config *config) return err; } -static void gr_load_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config) -{ - u32 pes_tpc_mask = 0; - u32 gpc, pes; - u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, - GPU_LIT_NUM_TPC_PER_GPC); -#ifdef CONFIG_NVGPU_NON_FUSA - u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(config); - u32 fuse_tpc_mask; - u32 val; - u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g); - u32 gpc_phys_id; -#endif - - /* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */ - for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(config); gpc++) { - for (pes = 0; - pes < nvgpu_gr_config_get_pe_count_per_gpc(config); - pes++) { - pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask( - config, gpc, pes) << - nvgpu_safe_mult_u32(num_tpc_per_gpc, gpc); - } - } - - nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask); - -#ifdef CONFIG_NVGPU_NON_FUSA - if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { - /* - * Fuse registers must be queried with physical gpc-id and not - * the logical ones. For tu104 and before chips logical gpc-id - * is same as physical gpc-id for non-floorswept config but for - * chips after tu104 it may not be true. - */ - gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, - cur_gr_instance, 0U); - fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id); - if ((g->tpc_fs_mask_user != 0U) && - (g->tpc_fs_mask_user != fuse_tpc_mask)) { - if (fuse_tpc_mask == nvgpu_safe_sub_u32(BIT32(max_tpc_count), - U32(1))) { - val = g->tpc_fs_mask_user; - val &= nvgpu_safe_sub_u32(BIT32(max_tpc_count), U32(1)); - /* - * skip tpc to disable the other tpc cause channel - * timeout - */ - val = nvgpu_safe_sub_u32(BIT32(hweight32(val)), U32(1)); - pes_tpc_mask = val; - } - } - } -#endif - - g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask); -} - int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config) { u32 tpc_index, gpc_index; @@ -184,7 +126,9 @@ int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config) g->ops.gr.init.cwd_gpcs_tpcs_num(g, gpc_cnt, tpc_cnt); - gr_load_tpc_mask(g, config); + if (g->ops.gr.init.gr_load_tpc_mask != NULL) { + g->ops.gr.init.gr_load_tpc_mask(g, config); + } err = gr_load_sm_id_config(g, config); if (err != 0) { diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c index 00cc2038d..28cd2643d 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -36,6 +36,7 @@ #include #include +#include #include "gr_init_gm20b.h" @@ -388,6 +389,65 @@ void gm20b_gr_init_rop_mapping(struct gk20a *g, } #endif +void gm20b_gr_init_load_tpc_mask(struct gk20a *g, + struct nvgpu_gr_config *config) +{ + u32 pes_tpc_mask = 0; + u32 gpc, pes; + u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, + GPU_LIT_NUM_TPC_PER_GPC); +#ifdef CONFIG_NVGPU_NON_FUSA + u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(config); + u32 fuse_tpc_mask; + u32 val; + u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g); + u32 gpc_phys_id; +#endif + /* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */ + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(config); gpc++) { + for (pes = 0; + pes < nvgpu_gr_config_get_pe_count_per_gpc(config); + pes++) { + pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask( + config, gpc, pes) << + nvgpu_safe_mult_u32(num_tpc_per_gpc, gpc); + } + } + + nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask); + +#ifdef CONFIG_NVGPU_NON_FUSA + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + /* + * Fuse registers must be queried with physical gpc-id and not + * the logical ones. For tu104 and before chips logical gpc-id + * is same as physical gpc-id for non-floorswept config but for + * chips after tu104 it may not be true. + */ + gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, + cur_gr_instance, 0U); + fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id); + if ((g->tpc_fs_mask_user != 0U) && + (g->tpc_fs_mask_user != fuse_tpc_mask)) { + if (fuse_tpc_mask == nvgpu_safe_sub_u32(BIT32(max_tpc_count), + U32(1))) { + val = g->tpc_fs_mask_user; + val &= nvgpu_safe_sub_u32(BIT32(max_tpc_count), U32(1)); + /* + * skip tpc to disable the other tpc cause channel + * timeout + */ + val = nvgpu_safe_sub_u32(BIT32(hweight32(val)), U32(1)); + pes_tpc_mask = val; + } + } + } +#endif + + g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask); + +} + void gm20b_gr_init_fs_state(struct gk20a *g) { nvgpu_log_fn(g, " "); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h index 171cf7d32..fc69b5a6b 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h @@ -49,6 +49,8 @@ void gm20b_gr_init_pd_skip_table_gpc(struct gk20a *g, struct nvgpu_gr_config *gr_config); void gm20b_gr_init_cwd_gpcs_tpcs_num(struct gk20a *g, u32 gpc_count, u32 tpc_count); +void gm20b_gr_init_load_tpc_mask(struct gk20a *g, + struct nvgpu_gr_config *gr_config); int gm20b_gr_init_wait_idle(struct gk20a *g); int gm20b_gr_init_wait_fe_idle(struct gk20a *g); int gm20b_gr_init_fe_pwr_mode_force_on(struct gk20a *g, bool force_on); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h index e04ce1a2b..eebd234cd 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h @@ -53,7 +53,6 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, struct nvgpu_gr_config *gr_config, struct nvgpu_gr_ctx *gr_ctx, bool patch); -void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask); void gv11b_gr_init_fs_state(struct gk20a *g); void gv11b_gr_init_commit_global_timeslice(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c index abd11ae8d..61cdebfda 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c @@ -529,11 +529,6 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, return 0; } -void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask) -{ - nvgpu_writel(g, gr_fe_tpc_fs_r(gpc_index), pes_tpc_mask); -} - void gv11b_gr_init_fs_state(struct gk20a *g) { u32 data; diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga100.c b/drivers/gpu/nvgpu/hal/init/hal_ga100.c index 30d999b09..a4df921a3 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c @@ -597,11 +597,12 @@ static const struct gops_gr_init ga100_ops_gr_init = { .sm_id_config_early = nvgpu_gr_init_sm_id_early_config, .sm_id_config = gv11b_gr_init_sm_id_config, .sm_id_numbering = ga10b_gr_init_sm_id_numbering, - .tpc_mask = gv11b_gr_init_tpc_mask, + .tpc_mask = NULL, .fs_state = ga10b_gr_init_fs_state, .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, .cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num, + .gr_load_tpc_mask = NULL, .wait_empty = ga10b_gr_init_wait_empty, .wait_idle = ga10b_gr_init_wait_idle, .wait_fe_idle = gm20b_gr_init_wait_fe_idle, diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c index fc4d59c02..72d4993f5 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c @@ -575,11 +575,12 @@ static const struct gops_gr_init ga10b_ops_gr_init = { .sm_id_config_early = nvgpu_gr_init_sm_id_early_config, .sm_id_config = gv11b_gr_init_sm_id_config, .sm_id_numbering = ga10b_gr_init_sm_id_numbering, - .tpc_mask = gv11b_gr_init_tpc_mask, + .tpc_mask = NULL, .fs_state = ga10b_gr_init_fs_state, .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, .cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num, + .gr_load_tpc_mask = NULL, .wait_empty = ga10b_gr_init_wait_empty, .wait_idle = ga10b_gr_init_wait_idle, .wait_fe_idle = gm20b_gr_init_wait_fe_idle, diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c index 6e23061a6..3bfaa0540 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c @@ -327,6 +327,7 @@ static const struct gops_gr_init gm20b_ops_gr_init = { .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, .cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num, + .gr_load_tpc_mask = gm20b_gr_init_load_tpc_mask, .wait_empty = gm20b_gr_init_wait_idle, .wait_idle = gm20b_gr_init_wait_idle, .wait_fe_idle = gm20b_gr_init_wait_fe_idle, diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c index febb6ea46..80047a516 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c @@ -394,6 +394,7 @@ static const struct gops_gr_init gp10b_ops_gr_init = { .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, .cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num, + .gr_load_tpc_mask = gm20b_gr_init_load_tpc_mask, .wait_empty = gp10b_gr_init_wait_empty, .wait_idle = gm20b_gr_init_wait_idle, .wait_fe_idle = gm20b_gr_init_wait_fe_idle, diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index 80a610e4b..360a2e4cb 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -478,11 +478,12 @@ static const struct gops_gr_init gv11b_ops_gr_init = { .get_sm_id_size = gp10b_gr_init_get_sm_id_size, .sm_id_config = gv11b_gr_init_sm_id_config, .sm_id_numbering = gv11b_gr_init_sm_id_numbering, - .tpc_mask = gv11b_gr_init_tpc_mask, + .tpc_mask = NULL, .fs_state = gv11b_gr_init_fs_state, .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, .cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num, + .gr_load_tpc_mask = NULL, .wait_empty = gp10b_gr_init_wait_empty, .wait_idle = gm20b_gr_init_wait_idle, .wait_fe_idle = gm20b_gr_init_wait_fe_idle, diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index acfe96a5b..c82db8045 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -523,11 +523,12 @@ static const struct gops_gr_init tu104_ops_gr_init = { .get_sm_id_size = gp10b_gr_init_get_sm_id_size, .sm_id_config = gv11b_gr_init_sm_id_config, .sm_id_numbering = gv11b_gr_init_sm_id_numbering, - .tpc_mask = gv11b_gr_init_tpc_mask, + .tpc_mask = NULL, .fs_state = gv11b_gr_init_fs_state, .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, .cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num, + .gr_load_tpc_mask = NULL, .wait_empty = gp10b_gr_init_wait_empty, .wait_idle = gm20b_gr_init_wait_idle, .wait_fe_idle = gm20b_gr_init_wait_fe_idle, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h index c59d77167..af5ea5244 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h @@ -752,6 +752,8 @@ struct gops_gr_init { struct nvgpu_gr_config *gr_config); void (*cwd_gpcs_tpcs_num)(struct gk20a *g, u32 gpc_count, u32 tpc_count); + void (*gr_load_tpc_mask)(struct gk20a *g, + struct nvgpu_gr_config *gr_config); int (*wait_empty)(struct gk20a *g); void (*override_context_reset)(struct gk20a *g); void (*fe_go_idle_timeout)(struct gk20a *g, bool enable);