From c0c06f0307c42fb17c85271f6c75d8d7bb83a069 Mon Sep 17 00:00:00 2001 From: Vinod G Date: Mon, 18 Mar 2019 16:05:57 -0700 Subject: [PATCH] gpu: nvgpu: move load_smid_config and program_sm_id_numbering hals Move load_smid_config and program_sm_id_numbering hal functions to corresponding hal files in hal.gr.init. Add new hal for get_sm_id_size and new static function in common.gr init code for gr_load_sm_id_config. JIRA NVGPU-2951 Change-Id: I27755306d278db9fcd5ab60169a64057b2989ea8 Signed-off-by: Vinod G Reviewed-on: https://git-master.nvidia.com/r/2075875 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/gr/gr.c | 23 +++++- .../nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c | 2 - .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c | 2 - drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 64 ---------------- drivers/gpu/nvgpu/gm20b/gr_gm20b.h | 5 +- drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 5 +- drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 50 ------------- drivers/gpu/nvgpu/gp10b/gr_gp10b.h | 1 - drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 5 +- drivers/gpu/nvgpu/gv100/hal_gv100.c | 6 +- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 74 ------------------- drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 3 - drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 6 +- drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c | 65 ++++++++++++++++ drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h | 5 ++ drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c | 51 +++++++++++++ drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h | 3 + drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c | 68 +++++++++++++++++ drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h | 4 + drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 8 +- drivers/gpu/nvgpu/tu104/hal_tu104.c | 6 +- 21 files changed, 241 insertions(+), 215 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index abbb70494..8867abbd7 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -25,6 +25,25 @@ #include #include +static int gr_load_sm_id_config(struct gk20a *g) +{ + int err; + u32 *tpc_sm_id; + u32 sm_id_size = g->ops.gr.init.get_sm_id_size(); + struct nvgpu_gr_config *gr_config = g->gr.config; + + tpc_sm_id = nvgpu_kcalloc(g, sm_id_size, sizeof(u32)); + if (tpc_sm_id == NULL) { + return -ENOMEM; + } + + err = g->ops.gr.init.sm_id_config(g, tpc_sm_id, gr_config); + + nvgpu_kfree(g, tpc_sm_id); + + return err; +} + static void gr_load_tpc_mask(struct gk20a *g) { u32 pes_tpc_mask = 0, fuse_tpc_mask; @@ -98,7 +117,7 @@ int nvgpu_gr_init_fs_state(struct gk20a *g) tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; - g->ops.gr.program_sm_id_numbering(g, gpc_index, tpc_index, sm_id); + g->ops.gr.init.sm_id_numbering(g, gpc_index, tpc_index, sm_id); } g->ops.gr.init.pd_tpc_per_gpc(g, gr_config); @@ -123,7 +142,7 @@ int nvgpu_gr_init_fs_state(struct gk20a *g) gr_load_tpc_mask(g); - err = g->ops.gr.load_smid_config(g); + err = gr_load_sm_id_config(g); if (err != 0) { nvgpu_err(g, "load_smid_config failed err=%d", err); } diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index 82a283f69..3bce2d6fa 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -166,8 +166,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .resume_contexts = vgpu_gr_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, .init_sm_id_table = vgpu_gr_init_sm_id_table, - .load_smid_config = NULL, - .program_sm_id_numbering = NULL, .program_zcull_mapping = NULL, .commit_inst = vgpu_gr_commit_inst, .trigger_suspend = NULL, diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index b7dbdb23f..c275e91c8 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -184,8 +184,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .resume_contexts = vgpu_gr_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, .init_sm_id_table = vgpu_gr_init_sm_id_table, - .load_smid_config = NULL, - .program_sm_id_numbering = NULL, .program_zcull_mapping = NULL, .commit_inst = vgpu_gr_commit_inst, .trigger_suspend = NULL, diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index a5696ff49..efa3c6a7b 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -577,70 +577,6 @@ void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) } } -void gr_gm20b_program_sm_id_numbering(struct gk20a *g, - u32 gpc, u32 tpc, u32 smid) -{ - u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); - u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); - u32 gpc_offset = gpc_stride * gpc; - u32 tpc_offset = tpc_in_gpc_stride * tpc; - - gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, - gr_gpc0_tpc0_sm_cfg_sm_id_f(smid)); - gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, - gr_gpc0_gpm_pd_sm_id_id_f(smid)); - gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, - gr_gpc0_tpc0_pe_cfg_smid_value_f(smid)); -} - -int gr_gm20b_load_smid_config(struct gk20a *g) -{ - u32 *tpc_sm_id; - u32 i, j; - u32 tpc_index, gpc_index; - - tpc_sm_id = nvgpu_kcalloc(g, gr_cwd_sm_id__size_1_v(), sizeof(u32)); - if (tpc_sm_id == NULL) { - return -ENOMEM; - } - - /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ - for (i = 0U; - i <= ((nvgpu_gr_config_get_tpc_count(g->gr.config) - 1U) / 4U); - i++) { - u32 reg = 0; - u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + - gr_cwd_gpc_tpc_id_tpc0_s(); - - for (j = 0U; j < 4U; j++) { - u32 sm_id = (i * 4U) + j; - u32 bits; - - if (sm_id >= nvgpu_gr_config_get_tpc_count(g->gr.config)) { - break; - } - - gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; - tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; - - bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | - gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); - reg |= bits << (j * bit_stride); - - tpc_sm_id[gpc_index] |= sm_id << tpc_index * bit_stride; - } - gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); - } - - for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) { - gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); - } - - nvgpu_kfree(g, tpc_sm_id); - - return 0; -} - void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) { diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index ead5bbb77..bdd975265 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h @@ -1,7 +1,7 @@ /* * GM20B GPC MMU * - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -79,9 +79,6 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, u32 **sm_dsm_perf_ctrl_regs, u32 *ctrl_register_stride); void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); -void gr_gm20b_program_sm_id_numbering(struct gk20a *g, - u32 gpc, u32 tpc, u32 smid); -int gr_gm20b_load_smid_config(struct gk20a *g); void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr); diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 7e57b4f0c..f9ad120f0 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -289,8 +289,6 @@ static const struct gpu_ops gm20b_ops = { .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags, .init_sm_id_table = gr_gk20a_init_sm_id_table, - .load_smid_config = gr_gm20b_load_smid_config, - .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering, .program_zcull_mapping = gr_gk20a_program_zcull_mapping, .commit_inst = gr_gk20a_commit_inst, .trigger_suspend = gr_gk20a_trigger_suspend, @@ -418,6 +416,9 @@ static const struct gpu_ops gm20b_ops = { .get_gpcs_swdx_dss_zbc_z_format_reg = NULL, }, .init = { + .get_sm_id_size = gm20b_gr_init_get_sm_id_size, + .sm_id_config = gm20b_gr_init_sm_id_config, + .sm_id_numbering = gm20b_gr_init_sm_id_numbering, .tpc_mask = gm20b_gr_init_tpc_mask, .rop_mapping = gm20b_gr_init_rop_mapping, .fs_state = gm20b_gr_init_fs_state, diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 971d205b0..aea59aaca 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -1334,56 +1334,6 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, gr_pd_ab_dist_cfg2_state_limit_f(data), patch); } -int gr_gp10b_load_smid_config(struct gk20a *g) -{ - u32 *tpc_sm_id; - u32 i, j; - u32 tpc_index, gpc_index; - u32 max_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); - - tpc_sm_id = nvgpu_kcalloc(g, gr_cwd_sm_id__size_1_v(), sizeof(u32)); - if (tpc_sm_id == NULL) { - return -ENOMEM; - } - - /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ - for (i = 0U; - i <= ((nvgpu_gr_config_get_tpc_count(g->gr.config) - 1U) / 4U); - i++) { - u32 reg = 0; - u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + - gr_cwd_gpc_tpc_id_tpc0_s(); - - for (j = 0U; j < 4U; j++) { - u32 sm_id = (i * 4U) + j; - u32 bits; - - if (sm_id >= nvgpu_gr_config_get_tpc_count(g->gr.config)) { - break; - } - - gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; - tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; - - bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | - gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); - reg |= bits << (j * bit_stride); - - tpc_sm_id[gpc_index + max_gpcs * ((tpc_index & 4U) >> 2U)] - |= sm_id << (bit_stride * (tpc_index & 3U)); - } - gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); - } - - for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) { - gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); - } - - nvgpu_kfree(g, tpc_sm_id); - - return 0; -} - void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { nvgpu_tegra_fuse_write_bypass(g, 0x1); diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index e5f04e7b7..2a19f737f 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h @@ -104,7 +104,6 @@ void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, u64 addr, u64 size, bool patch); -int gr_gp10b_load_smid_config(struct gk20a *g); void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); void gr_gp10b_get_access_map(struct gk20a *g, u32 **whitelist, int *num_entries); diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 7d278f561..cbd3d1e19 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -311,8 +311,6 @@ static const struct gpu_ops gp10b_ops = { .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, .init_sm_id_table = gr_gk20a_init_sm_id_table, - .load_smid_config = gr_gp10b_load_smid_config, - .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering, .program_zcull_mapping = gr_gk20a_program_zcull_mapping, .commit_inst = gr_gk20a_commit_inst, .trigger_suspend = gr_gk20a_trigger_suspend, @@ -488,6 +486,9 @@ static const struct gpu_ops gp10b_ops = { gp10b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg, }, .init = { + .get_sm_id_size = gp10b_gr_init_get_sm_id_size, + .sm_id_config = gp10b_gr_init_sm_id_config, + .sm_id_numbering = gm20b_gr_init_sm_id_numbering, .tpc_mask = gm20b_gr_init_tpc_mask, .rop_mapping = gm20b_gr_init_rop_mapping, .fs_state = gp10b_gr_init_fs_state, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index e408fde71..915dca6e0 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -49,6 +49,7 @@ #include "hal/gr/zbc/zbc_gp10b.h" #include "hal/gr/zbc/zbc_gv11b.h" #include "hal/gr/init/gr_init_gm20b.h" +#include "hal/gr/init/gr_init_gp10b.h" #include "hal/gr/init/gr_init_gv11b.h" #include "hal/gr/hwpm_map/hwpm_map_gv100.h" #include "hal/falcon/falcon_gk20a.h" @@ -418,8 +419,6 @@ static const struct gpu_ops gv100_ops = { .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, .init_sm_id_table = gr_gv100_init_sm_id_table, - .load_smid_config = gr_gv11b_load_smid_config, - .program_sm_id_numbering = gr_gv11b_program_sm_id_numbering, .program_zcull_mapping = gr_gv11b_program_zcull_mapping, .commit_inst = gr_gv11b_commit_inst, .trigger_suspend = gv11b_gr_sm_trigger_suspend, @@ -624,6 +623,9 @@ static const struct gpu_ops gv100_ops = { gv100_gr_hwpm_map_get_active_fbpa_mask, }, .init = { + .get_sm_id_size = gp10b_gr_init_get_sm_id_size, + .sm_id_config = gv11b_gr_init_sm_id_config, + .sm_id_numbering = gv11b_gr_init_sm_id_numbering, .tpc_mask = gv11b_gr_init_tpc_mask, .rop_mapping = gv11b_gr_init_rop_mapping, .fs_state = gv11b_gr_init_fs_state, diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 03ab5a474..52fafc40b 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2548,80 +2548,6 @@ u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc) return tpc_new; } -void gr_gv11b_program_sm_id_numbering(struct gk20a *g, - u32 gpc, u32 tpc, u32 smid) -{ - u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); - u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, - GPU_LIT_TPC_IN_GPC_STRIDE); - u32 gpc_offset = gpc_stride * gpc; - u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index; - u32 tpc_offset; - - tpc = g->ops.gr.get_nonpes_aware_tpc(g, gpc, tpc); - tpc_offset = tpc_in_gpc_stride * tpc; - - gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, - gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index)); - gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, - gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index)); - gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, - gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index)); -} - -int gr_gv11b_load_smid_config(struct gk20a *g) -{ - u32 *tpc_sm_id; - u32 i, j; - u32 tpc_index, gpc_index, tpc_id; - u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); - u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); - - tpc_sm_id = nvgpu_kcalloc(g, gr_cwd_sm_id__size_1_v(), sizeof(u32)); - if (tpc_sm_id == NULL) { - return -ENOMEM; - } - - /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ - for (i = 0U; - i <= ((nvgpu_gr_config_get_tpc_count(g->gr.config) - 1U) / 4U); - i++) { - u32 reg = 0; - u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + - gr_cwd_gpc_tpc_id_tpc0_s(); - - for (j = 0U; j < 4U; j++) { - u32 sm_id; - u32 bits; - - tpc_id = (i << 2) + j; - sm_id = tpc_id * sm_per_tpc; - - if (sm_id >= g->gr.no_of_sm) { - break; - } - - gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; - tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; - - bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | - gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); - reg |= bits << (j * bit_stride); - - tpc_sm_id[gpc_index + (num_gpcs * ((tpc_index & 4U) - >> 2U))] |= tpc_id << tpc_index * bit_stride; - } - gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); - } - - for (i = 0U; i < gr_cwd_sm_id__size_1_v(); i++) { - gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); - } - nvgpu_kfree(g, tpc_sm_id); - - return 0; -} - int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) { u32 addr_lo; diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index f1e1bbe48..80113c340 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -123,9 +123,6 @@ int gr_gv11b_handle_fecs_error(struct gk20a *g, struct gr_gk20a_isr_data *isr_data); int gr_gv11b_init_sw_veid_bundle(struct gk20a *g); void gr_gv11b_detect_sm_arch(struct gk20a *g); -void gr_gv11b_program_sm_id_numbering(struct gk20a *g, - u32 gpc, u32 tpc, u32 smid); -int gr_gv11b_load_smid_config(struct gk20a *g); int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va); void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, u32 *esr_sm_sel); diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 21c090d9d..047c883d2 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -49,6 +49,7 @@ #include "hal/gr/zbc/zbc_gp10b.h" #include "hal/gr/zbc/zbc_gv11b.h" #include "hal/gr/init/gr_init_gm20b.h" +#include "hal/gr/init/gr_init_gp10b.h" #include "hal/gr/init/gr_init_gv11b.h" #include "hal/gr/hwpm_map/hwpm_map_gv100.h" #include "hal/falcon/falcon_gk20a.h" @@ -370,8 +371,6 @@ static const struct gpu_ops gv11b_ops = { .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, .init_sm_id_table = gr_gv100_init_sm_id_table, - .load_smid_config = gr_gv11b_load_smid_config, - .program_sm_id_numbering = gr_gv11b_program_sm_id_numbering, .program_zcull_mapping = gr_gv11b_program_zcull_mapping, .commit_inst = gr_gv11b_commit_inst, .trigger_suspend = gv11b_gr_sm_trigger_suspend, @@ -583,6 +582,9 @@ static const struct gpu_ops gv11b_ops = { gv100_gr_hwpm_map_align_regs_perf_pma, }, .init = { + .get_sm_id_size = gp10b_gr_init_get_sm_id_size, + .sm_id_config = gv11b_gr_init_sm_id_config, + .sm_id_numbering = gv11b_gr_init_sm_id_numbering, .tpc_mask = gv11b_gr_init_tpc_mask, .rop_mapping = gv11b_gr_init_rop_mapping, .fs_state = gv11b_gr_init_fs_state, diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c index 9c9e5431d..78d87b0b9 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c @@ -39,6 +39,71 @@ #define FE_PWR_MODE_TIMEOUT_DEFAULT_US 10U #define FECS_CTXSW_RESET_DELAY_US 10U +void gm20b_gr_init_sm_id_numbering(struct gk20a *g, + u32 gpc, u32 tpc, u32 smid) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_TPC_IN_GPC_STRIDE); + u32 gpc_offset = gpc_stride * gpc; + u32 tpc_offset = tpc_in_gpc_stride * tpc; + + nvgpu_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, + gr_gpc0_tpc0_sm_cfg_sm_id_f(smid)); + nvgpu_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, + gr_gpc0_gpm_pd_sm_id_id_f(smid)); + nvgpu_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, + gr_gpc0_tpc0_pe_cfg_smid_value_f(smid)); +} + +u32 gm20b_gr_init_get_sm_id_size(void) +{ + return gr_cwd_sm_id__size_1_v(); +} + +int gm20b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, + struct nvgpu_gr_config *gr_config) +{ + u32 i, j; + u32 tpc_index, gpc_index; + + /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ + for (i = 0U; + i <= ((nvgpu_gr_config_get_tpc_count(gr_config) - 1U) / 4U); + i++) { + u32 reg = 0; + u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + + gr_cwd_gpc_tpc_id_tpc0_s(); + + for (j = 0U; j < 4U; j++) { + u32 sm_id = (i * 4U) + j; + u32 bits; + + if (sm_id >= + nvgpu_gr_config_get_tpc_count(gr_config)) { + break; + } + + gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; + tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; + + bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | + gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); + reg |= bits << (j * bit_stride); + + tpc_sm_id[gpc_index] |= + (sm_id << tpc_index * bit_stride); + } + nvgpu_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); + } + + for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) { + nvgpu_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); + } + + return 0; +} + void gm20b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask) { nvgpu_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h index 5baf9ad65..05eec1214 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h @@ -29,6 +29,11 @@ struct gk20a; struct netlist_av_list; struct nvgpu_gr_config; +void gm20b_gr_init_sm_id_numbering(struct gk20a *g, + u32 gpc, u32 tpc, u32 smid); +u32 gm20b_gr_init_get_sm_id_size(void); +int gm20b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, + struct nvgpu_gr_config *gr_config); void gm20b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask); int gm20b_gr_init_rop_mapping(struct gk20a *g, struct nvgpu_gr_config *gr_config); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c index fec76801b..cebb0d1a8 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c @@ -23,11 +23,62 @@ #include #include +#include + #include "gr_init_gm20b.h" #include "gr_init_gp10b.h" #include +u32 gp10b_gr_init_get_sm_id_size(void) +{ + return gr_cwd_sm_id__size_1_v(); +} + +int gp10b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, + struct nvgpu_gr_config *gr_config) +{ + u32 i, j; + u32 tpc_index, gpc_index; + u32 max_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + + /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ + for (i = 0U; + i <= ((nvgpu_gr_config_get_tpc_count(gr_config) - 1U) / 4U); + i++) { + u32 reg = 0; + u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + + gr_cwd_gpc_tpc_id_tpc0_s(); + + for (j = 0U; j < 4U; j++) { + u32 sm_id = (i * 4U) + j; + u32 bits; + + if (sm_id >= + nvgpu_gr_config_get_tpc_count(gr_config)) { + break; + } + + gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; + tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; + + bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | + gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); + reg |= bits << (j * bit_stride); + + tpc_sm_id[gpc_index + max_gpcs * ((tpc_index & 4U) >> 2U)] + |= sm_id << (bit_stride * (tpc_index & 3U)); + } + nvgpu_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); + } + + for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) { + nvgpu_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); + } + + return 0; +} + int gp10b_gr_init_fs_state(struct gk20a *g) { u32 data; diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h index 871aa0c5f..5422f762b 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h @@ -27,6 +27,9 @@ struct gk20a; +u32 gp10b_gr_init_get_sm_id_size(void); +int gp10b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, + struct nvgpu_gr_config *gr_config); int gp10b_gr_init_fs_state(struct gk20a *g); int gp10b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count, bool gfxp_wfi_timeout_unit_usec); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c index f73787eee..4affcc66c 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c @@ -37,6 +37,74 @@ */ #define GR_TPCS_INFO_FOR_MAPREGISTER 6U +void gv11b_gr_init_sm_id_numbering(struct gk20a *g, + u32 gpc, u32 tpc, u32 smid) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_TPC_IN_GPC_STRIDE); + u32 gpc_offset = gpc_stride * gpc; + u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index; + u32 tpc_offset; + + tpc = g->ops.gr.get_nonpes_aware_tpc(g, gpc, tpc); + tpc_offset = tpc_in_gpc_stride * tpc; + + nvgpu_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, + gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index)); + nvgpu_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, + gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index)); + nvgpu_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, + gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index)); +} + +int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, + struct nvgpu_gr_config *gr_config) +{ + u32 i, j; + u32 tpc_index, gpc_index, tpc_id; + u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + + /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ + for (i = 0U; + i <= ((nvgpu_gr_config_get_tpc_count(gr_config) - 1U) / 4U); + i++) { + u32 reg = 0; + u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + + gr_cwd_gpc_tpc_id_tpc0_s(); + + for (j = 0U; j < 4U; j++) { + u32 sm_id; + u32 bits; + + tpc_id = (i << 2) + j; + sm_id = tpc_id * sm_per_tpc; + + if (sm_id >= g->gr.no_of_sm) { + break; + } + + gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; + tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; + + bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | + gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); + reg |= bits << (j * bit_stride); + + tpc_sm_id[gpc_index + (num_gpcs * ((tpc_index & 4U) + >> 2U))] |= tpc_id << tpc_index * bit_stride; + } + nvgpu_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); + } + + for (i = 0U; i < gr_cwd_sm_id__size_1_v(); i++) { + nvgpu_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); + } + + return 0; +} + void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask) { nvgpu_writel(g, gr_fe_tpc_fs_r(gpc_index), pes_tpc_mask); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h index 1ef13e927..7daced836 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h @@ -28,6 +28,10 @@ struct gk20a; struct nvgpu_gr_config; +void gv11b_gr_init_sm_id_numbering(struct gk20a *g, + u32 gpc, u32 tpc, u32 smid); +int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, + struct nvgpu_gr_config *gr_config); void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask); int gv11b_gr_init_rop_mapping(struct gk20a *g, struct nvgpu_gr_config *gr_config); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 5a3f59f25..cc0cbfb45 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -428,9 +428,6 @@ struct gpu_ops { u32 compute_preempt_mode); int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost); int (*init_sm_id_table)(struct gk20a *g); - int (*load_smid_config)(struct gk20a *g); - void (*program_sm_id_numbering)(struct gk20a *g, - u32 gpc, u32 tpc, u32 smid); int (*init_sw_veid_bundle)(struct gk20a *g); void (*program_zcull_mapping)(struct gk20a *g, u32 zcull_alloc_num, u32 *zcull_map_tiles); @@ -664,6 +661,11 @@ struct gpu_ops { } hwpm_map; struct { + u32 (*get_sm_id_size)(void); + int (*sm_id_config)(struct gk20a *g, u32 *tpc_sm_id, + struct nvgpu_gr_config *gr_config); + void (*sm_id_numbering)(struct gk20a *g, + u32 gpc, u32 tpc, u32 smid); void (*tpc_mask)(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask); int (*rop_mapping)(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 61faa669b..b75e38cef 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -53,6 +53,7 @@ #include "hal/gr/zbc/zbc_gp10b.h" #include "hal/gr/zbc/zbc_gv11b.h" #include "hal/gr/init/gr_init_gm20b.h" +#include "hal/gr/init/gr_init_gp10b.h" #include "hal/gr/init/gr_init_gv11b.h" #include "hal/gr/init/gr_init_tu104.h" #include "hal/gr/hwpm_map/hwpm_map_gv100.h" @@ -438,8 +439,6 @@ static const struct gpu_ops tu104_ops = { .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, .init_sm_id_table = gr_gv100_init_sm_id_table, - .load_smid_config = gr_gv11b_load_smid_config, - .program_sm_id_numbering = gr_gv11b_program_sm_id_numbering, .program_zcull_mapping = gr_gv11b_program_zcull_mapping, .commit_inst = gr_gv11b_commit_inst, .trigger_suspend = gv11b_gr_sm_trigger_suspend, @@ -652,6 +651,9 @@ static const struct gpu_ops tu104_ops = { gv100_gr_hwpm_map_get_active_fbpa_mask, }, .init = { + .get_sm_id_size = gp10b_gr_init_get_sm_id_size, + .sm_id_config = gv11b_gr_init_sm_id_config, + .sm_id_numbering = gv11b_gr_init_sm_id_numbering, .tpc_mask = gv11b_gr_init_tpc_mask, .rop_mapping = gv11b_gr_init_rop_mapping, .fs_state = gv11b_gr_init_fs_state,