diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index f89c91174..abbb70494 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -25,6 +25,41 @@ #include #include +static void gr_load_tpc_mask(struct gk20a *g) +{ + u32 pes_tpc_mask = 0, fuse_tpc_mask; + u32 gpc, pes, val; + u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, + GPU_LIT_NUM_TPC_PER_GPC); + u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(g->gr.config); + + /* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */ + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); + gpc++) { + for (pes = 0; + pes < nvgpu_gr_config_get_pe_count_per_gpc(g->gr.config); + pes++) { + pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask( + g->gr.config, gpc, pes) << + num_tpc_per_gpc * gpc; + } + } + + nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask); + + fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, g->gr.config, 0); + if ((g->tpc_fs_mask_user != 0U) && + (g->tpc_fs_mask_user != fuse_tpc_mask) && + (fuse_tpc_mask == BIT32(max_tpc_count) - U32(1))) { + val = g->tpc_fs_mask_user; + val &= BIT32(max_tpc_count) - U32(1); + /* skip tpc to disable the other tpc cause channel timeout */ + val = BIT32(hweight32(val)) - U32(1); + pes_tpc_mask = val; + } + g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask); +} + u32 nvgpu_gr_get_idle_timeout(struct gk20a *g) { return nvgpu_is_timeouts_enabled(g) ? @@ -66,12 +101,12 @@ int nvgpu_gr_init_fs_state(struct gk20a *g) g->ops.gr.program_sm_id_numbering(g, gpc_index, tpc_index, sm_id); } - g->ops.gr.init.pd_tpc_per_gpc(g); + g->ops.gr.init.pd_tpc_per_gpc(g, gr_config); /* gr__setup_pd_mapping */ - g->ops.gr.setup_rop_mapping(g, &g->gr); + g->ops.gr.init.rop_mapping(g, gr_config); - g->ops.gr.init.pd_skip_table_gpc(g); + g->ops.gr.init.pd_skip_table_gpc(g, gr_config); fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, gr_config, 0); gpc_cnt = nvgpu_gr_config_get_gpc_count(gr_config); @@ -86,7 +121,7 @@ int nvgpu_gr_init_fs_state(struct gk20a *g) } g->ops.gr.init.cwd_gpcs_tpcs_num(g, gpc_cnt, tpc_cnt); - g->ops.gr.load_tpc_mask(g); + gr_load_tpc_mask(g); err = g->ops.gr.load_smid_config(g); if (err != 0) { diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index d688a4e5a..82a283f69 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -168,10 +168,8 @@ static const struct gpu_ops vgpu_gp10b_ops = { .init_sm_id_table = vgpu_gr_init_sm_id_table, .load_smid_config = NULL, .program_sm_id_numbering = NULL, - .setup_rop_mapping = NULL, .program_zcull_mapping = NULL, .commit_inst = vgpu_gr_commit_inst, - .load_tpc_mask = NULL, .trigger_suspend = NULL, .wait_for_pause = gr_gk20a_wait_for_pause, .resume_from_pause = NULL, diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index 67f8208fc..b7dbdb23f 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -186,10 +186,8 @@ static const struct gpu_ops vgpu_gv11b_ops = { .init_sm_id_table = vgpu_gr_init_sm_id_table, .load_smid_config = NULL, .program_sm_id_numbering = NULL, - .setup_rop_mapping = NULL, .program_zcull_mapping = NULL, .commit_inst = vgpu_gr_commit_inst, - .load_tpc_mask = NULL, .trigger_suspend = NULL, .wait_for_pause = gr_gk20a_wait_for_pause, .resume_from_pause = NULL, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index c07940253..0befa4743 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -740,151 +740,6 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, return 0; } -int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) -{ - u32 norm_entries, norm_shift; - u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod; - u32 map0, map1, map2, map3, map4, map5; - - if (gr->config->map_tiles == NULL) { - return -1; - } - - nvgpu_log_fn(g, " "); - - gk20a_writel(g, gr_crstr_map_table_cfg_r(), - gr_crstr_map_table_cfg_row_offset_f( - nvgpu_gr_config_get_map_row_offset(gr->config)) | - gr_crstr_map_table_cfg_num_entries_f( - nvgpu_gr_config_get_tpc_count(gr->config))); - - map0 = gr_crstr_gpc_map0_tile0_f(nvgpu_gr_config_get_map_tile_count(gr->config, 0)) | - gr_crstr_gpc_map0_tile1_f(nvgpu_gr_config_get_map_tile_count(gr->config, 1)) | - gr_crstr_gpc_map0_tile2_f(nvgpu_gr_config_get_map_tile_count(gr->config, 2)) | - gr_crstr_gpc_map0_tile3_f(nvgpu_gr_config_get_map_tile_count(gr->config, 3)) | - gr_crstr_gpc_map0_tile4_f(nvgpu_gr_config_get_map_tile_count(gr->config, 4)) | - gr_crstr_gpc_map0_tile5_f(nvgpu_gr_config_get_map_tile_count(gr->config, 5)); - - map1 = gr_crstr_gpc_map1_tile6_f(nvgpu_gr_config_get_map_tile_count(gr->config, 6)) | - gr_crstr_gpc_map1_tile7_f(nvgpu_gr_config_get_map_tile_count(gr->config, 7)) | - gr_crstr_gpc_map1_tile8_f(nvgpu_gr_config_get_map_tile_count(gr->config, 8)) | - gr_crstr_gpc_map1_tile9_f(nvgpu_gr_config_get_map_tile_count(gr->config, 9)) | - gr_crstr_gpc_map1_tile10_f(nvgpu_gr_config_get_map_tile_count(gr->config, 10)) | - gr_crstr_gpc_map1_tile11_f(nvgpu_gr_config_get_map_tile_count(gr->config, 11)); - - map2 = gr_crstr_gpc_map2_tile12_f(nvgpu_gr_config_get_map_tile_count(gr->config, 12)) | - gr_crstr_gpc_map2_tile13_f(nvgpu_gr_config_get_map_tile_count(gr->config, 13)) | - gr_crstr_gpc_map2_tile14_f(nvgpu_gr_config_get_map_tile_count(gr->config, 14)) | - gr_crstr_gpc_map2_tile15_f(nvgpu_gr_config_get_map_tile_count(gr->config, 15)) | - gr_crstr_gpc_map2_tile16_f(nvgpu_gr_config_get_map_tile_count(gr->config, 16)) | - gr_crstr_gpc_map2_tile17_f(nvgpu_gr_config_get_map_tile_count(gr->config, 17)); - - map3 = gr_crstr_gpc_map3_tile18_f(nvgpu_gr_config_get_map_tile_count(gr->config, 18)) | - gr_crstr_gpc_map3_tile19_f(nvgpu_gr_config_get_map_tile_count(gr->config, 19)) | - gr_crstr_gpc_map3_tile20_f(nvgpu_gr_config_get_map_tile_count(gr->config, 20)) | - gr_crstr_gpc_map3_tile21_f(nvgpu_gr_config_get_map_tile_count(gr->config, 21)) | - gr_crstr_gpc_map3_tile22_f(nvgpu_gr_config_get_map_tile_count(gr->config, 22)) | - gr_crstr_gpc_map3_tile23_f(nvgpu_gr_config_get_map_tile_count(gr->config, 23)); - - map4 = gr_crstr_gpc_map4_tile24_f(nvgpu_gr_config_get_map_tile_count(gr->config, 24)) | - gr_crstr_gpc_map4_tile25_f(nvgpu_gr_config_get_map_tile_count(gr->config, 25)) | - gr_crstr_gpc_map4_tile26_f(nvgpu_gr_config_get_map_tile_count(gr->config, 26)) | - gr_crstr_gpc_map4_tile27_f(nvgpu_gr_config_get_map_tile_count(gr->config, 27)) | - gr_crstr_gpc_map4_tile28_f(nvgpu_gr_config_get_map_tile_count(gr->config, 28)) | - gr_crstr_gpc_map4_tile29_f(nvgpu_gr_config_get_map_tile_count(gr->config, 29)); - - map5 = gr_crstr_gpc_map5_tile30_f(nvgpu_gr_config_get_map_tile_count(gr->config, 30)) | - gr_crstr_gpc_map5_tile31_f(nvgpu_gr_config_get_map_tile_count(gr->config, 31)) | - gr_crstr_gpc_map5_tile32_f(0) | - gr_crstr_gpc_map5_tile33_f(0) | - gr_crstr_gpc_map5_tile34_f(0) | - gr_crstr_gpc_map5_tile35_f(0); - - gk20a_writel(g, gr_crstr_gpc_map0_r(), map0); - gk20a_writel(g, gr_crstr_gpc_map1_r(), map1); - gk20a_writel(g, gr_crstr_gpc_map2_r(), map2); - gk20a_writel(g, gr_crstr_gpc_map3_r(), map3); - gk20a_writel(g, gr_crstr_gpc_map4_r(), map4); - gk20a_writel(g, gr_crstr_gpc_map5_r(), map5); - - switch (nvgpu_gr_config_get_tpc_count(gr->config)) { - case 1: - norm_shift = 4; - break; - case 2: - case 3: - norm_shift = 3; - break; - case 4: - case 5: - case 6: - case 7: - norm_shift = 2; - break; - case 8: - case 9: - case 10: - case 11: - case 12: - case 13: - case 14: - case 15: - norm_shift = 1; - break; - default: - norm_shift = 0; - break; - } - - norm_entries = nvgpu_gr_config_get_tpc_count(gr->config) << norm_shift; - coeff5_mod = BIT32(5) % norm_entries; - coeff6_mod = BIT32(6) % norm_entries; - coeff7_mod = BIT32(7) % norm_entries; - coeff8_mod = BIT32(8) % norm_entries; - coeff9_mod = BIT32(9) % norm_entries; - coeff10_mod = BIT32(10) % norm_entries; - coeff11_mod = BIT32(11) % norm_entries; - - gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(), - gr_ppcs_wwdx_map_table_cfg_row_offset_f( - nvgpu_gr_config_get_map_row_offset(gr->config)) | - gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) | - gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) | - gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) | - gr_ppcs_wwdx_map_table_cfg_num_entries_f( - nvgpu_gr_config_get_tpc_count(gr->config))); - - gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(), - gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) | - gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) | - gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) | - gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) | - gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) | - gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod)); - - gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0); - gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1); - gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2); - gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3); - gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4); - gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5); - - gk20a_writel(g, gr_rstr2d_map_table_cfg_r(), - gr_rstr2d_map_table_cfg_row_offset_f( - nvgpu_gr_config_get_map_row_offset(gr->config)) | - gr_rstr2d_map_table_cfg_num_entries_f( - nvgpu_gr_config_get_tpc_count(gr->config))); - - gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0); - gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1); - gk20a_writel(g, gr_rstr2d_gpc_map2_r(), map2); - gk20a_writel(g, gr_rstr2d_gpc_map3_r(), map3); - gk20a_writel(g, gr_rstr2d_gpc_map4_r(), map4); - gk20a_writel(g, gr_rstr2d_gpc_map5_r(), map5); - - return 0; -} - int gr_gk20a_init_sm_id_table(struct gk20a *g) { u32 gpc, tpc; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 6698ae27c..eaf6cd954 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -409,7 +409,6 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g, u64 addr, u32 size, bool patch); void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); void gr_gk20a_enable_hww_exceptions(struct gk20a *g); -int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 52aac2c4a..a5696ff49 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -577,37 +577,6 @@ void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) } } -void gr_gm20b_load_tpc_mask(struct gk20a *g) -{ - u32 pes_tpc_mask = 0, fuse_tpc_mask; - u32 gpc, pes; - u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); - u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(g->gr.config); - - for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { - for (pes = 0; - pes < nvgpu_gr_config_get_pe_count_per_gpc(g->gr.config); - pes++) { - pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask( - g->gr.config, gpc, pes) << - num_tpc_per_gpc * gpc; - } - } - - fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, g->gr.config, 0); - if ((g->tpc_fs_mask_user != 0U) && - (g->tpc_fs_mask_user != fuse_tpc_mask) && - (fuse_tpc_mask == BIT32(max_tpc_count) - U32(1))) { - u32 val = g->tpc_fs_mask_user; - val &= BIT32(max_tpc_count) - U32(1); - /* skip tpc to disable the other tpc cause channel timeout */ - val = BIT32(hweight32(val)) - U32(1); - gk20a_writel(g, gr_fe_tpc_fs_r(), val); - } else { - gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask); - } -} - void gr_gm20b_program_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid) { diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index e9297044e..ead5bbb77 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h @@ -79,7 +79,6 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, u32 **sm_dsm_perf_ctrl_regs, u32 *ctrl_register_stride); void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); -void gr_gm20b_load_tpc_mask(struct gk20a *g); void gr_gm20b_program_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid); int gr_gm20b_load_smid_config(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 20495543b..7e57b4f0c 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -291,10 +291,8 @@ static const struct gpu_ops gm20b_ops = { .init_sm_id_table = gr_gk20a_init_sm_id_table, .load_smid_config = gr_gm20b_load_smid_config, .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering, - .setup_rop_mapping = gr_gk20a_setup_rop_mapping, .program_zcull_mapping = gr_gk20a_program_zcull_mapping, .commit_inst = gr_gk20a_commit_inst, - .load_tpc_mask = gr_gm20b_load_tpc_mask, .trigger_suspend = gr_gk20a_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, .resume_from_pause = gr_gk20a_resume_from_pause, @@ -420,6 +418,8 @@ static const struct gpu_ops gm20b_ops = { .get_gpcs_swdx_dss_zbc_z_format_reg = NULL, }, .init = { + .tpc_mask = gm20b_gr_init_tpc_mask, + .rop_mapping = gm20b_gr_init_rop_mapping, .fs_state = gm20b_gr_init_fs_state, .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 2524205c4..7d278f561 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -313,10 +313,8 @@ static const struct gpu_ops gp10b_ops = { .init_sm_id_table = gr_gk20a_init_sm_id_table, .load_smid_config = gr_gp10b_load_smid_config, .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering, - .setup_rop_mapping = gr_gk20a_setup_rop_mapping, .program_zcull_mapping = gr_gk20a_program_zcull_mapping, .commit_inst = gr_gk20a_commit_inst, - .load_tpc_mask = gr_gm20b_load_tpc_mask, .trigger_suspend = gr_gk20a_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, .resume_from_pause = gr_gk20a_resume_from_pause, @@ -490,6 +488,8 @@ static const struct gpu_ops gp10b_ops = { gp10b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg, }, .init = { + .tpc_mask = gm20b_gr_init_tpc_mask, + .rop_mapping = gm20b_gr_init_rop_mapping, .fs_state = gp10b_gr_init_fs_state, .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index d7078b80f..e408fde71 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -420,10 +420,8 @@ static const struct gpu_ops gv100_ops = { .init_sm_id_table = gr_gv100_init_sm_id_table, .load_smid_config = gr_gv11b_load_smid_config, .program_sm_id_numbering = gr_gv11b_program_sm_id_numbering, - .setup_rop_mapping = gr_gv11b_setup_rop_mapping, .program_zcull_mapping = gr_gv11b_program_zcull_mapping, .commit_inst = gr_gv11b_commit_inst, - .load_tpc_mask = gr_gv11b_load_tpc_mask, .trigger_suspend = gv11b_gr_sm_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, .resume_from_pause = gv11b_gr_resume_from_pause, @@ -626,6 +624,8 @@ static const struct gpu_ops gv100_ops = { gv100_gr_hwpm_map_get_active_fbpa_mask, }, .init = { + .tpc_mask = gv11b_gr_init_tpc_mask, + .rop_mapping = gv11b_gr_init_rop_mapping, .fs_state = gv11b_gr_init_fs_state, .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index f98ac64ca..03ab5a474 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2421,115 +2421,6 @@ int gr_gv11b_handle_fecs_error(struct gk20a *g, return ret; } -int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) -{ - u32 map; - u32 i, j; - u32 mapreg_num, base, offset, mapregs; - u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); - u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, - GPU_LIT_NUM_TPC_PER_GPC); - u32 num_tpcs = num_gpcs * num_tpc_per_gpc; - - nvgpu_log_fn(g, " "); - - if (gr->config->map_tiles == NULL) { - return -1; - } - - gk20a_writel(g, gr_crstr_map_table_cfg_r(), - gr_crstr_map_table_cfg_row_offset_f( - nvgpu_gr_config_get_map_row_offset(gr->config)) | - gr_crstr_map_table_cfg_num_entries_f( - nvgpu_gr_config_get_tpc_count(gr->config))); - /* - * 6 tpc can be stored in one map register. - * But number of tpcs are not always multiple of six, - * so adding additional check for valid number of - * tpcs before programming map register. - */ - mapregs = DIV_ROUND_UP(num_tpcs, GR_TPCS_INFO_FOR_MAPREGISTER); - - for (mapreg_num = 0U, base = 0U; mapreg_num < mapregs; mapreg_num++, - base = base + GR_TPCS_INFO_FOR_MAPREGISTER) { - map = 0U; - for (offset = 0U; - (offset < GR_TPCS_INFO_FOR_MAPREGISTER && num_tpcs > 0U); - offset++, num_tpcs--) { - switch (offset) { - case 0: - map = map | gr_crstr_gpc_map_tile0_f( - nvgpu_gr_config_get_map_tile_count( - gr->config, base + offset)); - break; - case 1: - map = map | gr_crstr_gpc_map_tile1_f( - nvgpu_gr_config_get_map_tile_count( - gr->config, base + offset)); - break; - case 2: - map = map | gr_crstr_gpc_map_tile2_f( - nvgpu_gr_config_get_map_tile_count( - gr->config, base + offset)); - break; - case 3: - map = map | gr_crstr_gpc_map_tile3_f( - nvgpu_gr_config_get_map_tile_count( - gr->config, base + offset)); - break; - case 4: - map = map | gr_crstr_gpc_map_tile4_f( - nvgpu_gr_config_get_map_tile_count( - gr->config, base + offset)); - break; - case 5: - map = map | gr_crstr_gpc_map_tile5_f( - nvgpu_gr_config_get_map_tile_count( - gr->config, base + offset)); - break; - default: - nvgpu_err(g, "incorrect rop mapping %x", offset); - break; - } - } - - gk20a_writel(g, gr_crstr_gpc_map_r(mapreg_num), map); - gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map_r(mapreg_num), map); - gk20a_writel(g, gr_rstr2d_gpc_map_r(mapreg_num), map); - } - - gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(), - gr_ppcs_wwdx_map_table_cfg_row_offset_f( - nvgpu_gr_config_get_map_row_offset(gr->config)) | - gr_ppcs_wwdx_map_table_cfg_num_entries_f( - nvgpu_gr_config_get_tpc_count(gr->config))); - - for (i = 0U, j = 1U; i < gr_ppcs_wwdx_map_table_cfg_coeff__size_1_v(); - i++, j = j + 4U) { - gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i), - gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f( - (BIT32(j) % - nvgpu_gr_config_get_tpc_count(gr->config))) | - gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f( - (BIT32(j + 1U) % - nvgpu_gr_config_get_tpc_count(gr->config))) | - gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f( - (BIT32(j + 2U) % - nvgpu_gr_config_get_tpc_count(gr->config))) | - gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f( - (BIT32(j + 3U) % - nvgpu_gr_config_get_tpc_count(gr->config)))); - } - - gk20a_writel(g, gr_rstr2d_map_table_cfg_r(), - gr_rstr2d_map_table_cfg_row_offset_f( - nvgpu_gr_config_get_map_row_offset(gr->config)) | - gr_rstr2d_map_table_cfg_num_entries_f( - nvgpu_gr_config_get_tpc_count(gr->config))); - - return 0; -} - static int gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) { struct netlist_av_list *sw_veid_bundle_init = @@ -2771,40 +2662,6 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) return 0; } -void gr_gv11b_load_tpc_mask(struct gk20a *g) -{ - u32 pes_tpc_mask = 0, fuse_tpc_mask; - u32 gpc, pes, val; - u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(g->gr.config); - u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, - GPU_LIT_NUM_TPC_PER_GPC); - - /* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */ - for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { - for (pes = 0; - pes < nvgpu_gr_config_get_pe_count_per_gpc(g->gr.config); - pes++) { - pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask( - g->gr.config, gpc, pes) << - num_tpc_per_gpc * gpc; - } - } - - nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask); - fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, g->gr.config, gpc); - if ((g->tpc_fs_mask_user != 0U) && - (g->tpc_fs_mask_user != fuse_tpc_mask) && - (fuse_tpc_mask == BIT32(max_tpc_count) - U32(1))) { - val = g->tpc_fs_mask_user; - val &= BIT32(max_tpc_count) - U32(1); - val = BIT32(hweight32(val)) - U32(1); - gk20a_writel(g, gr_fe_tpc_fs_r(0), val); - } else { - gk20a_writel(g, gr_fe_tpc_fs_r(0), pes_tpc_mask); - } - -} - void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, u32 *esr_sm_sel) { diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index 8c57f42f9..f1e1bbe48 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -121,14 +121,12 @@ void gr_gv11b_fecs_host_int_enable(struct gk20a *g); int gr_gv11b_handle_fecs_error(struct gk20a *g, struct channel_gk20a *__ch, struct gr_gk20a_isr_data *isr_data); -int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); int gr_gv11b_init_sw_veid_bundle(struct gk20a *g); void gr_gv11b_detect_sm_arch(struct gk20a *g); void gr_gv11b_program_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid); int gr_gv11b_load_smid_config(struct gk20a *g); int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va); -void gr_gv11b_load_tpc_mask(struct gk20a *g); void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, u32 *esr_sm_sel); int gv11b_gr_sm_trigger_suspend(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index ddbe45b8b..21c090d9d 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -372,10 +372,8 @@ static const struct gpu_ops gv11b_ops = { .init_sm_id_table = gr_gv100_init_sm_id_table, .load_smid_config = gr_gv11b_load_smid_config, .program_sm_id_numbering = gr_gv11b_program_sm_id_numbering, - .setup_rop_mapping = gr_gv11b_setup_rop_mapping, .program_zcull_mapping = gr_gv11b_program_zcull_mapping, .commit_inst = gr_gv11b_commit_inst, - .load_tpc_mask = gr_gv11b_load_tpc_mask, .trigger_suspend = gv11b_gr_sm_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, .resume_from_pause = gv11b_gr_resume_from_pause, @@ -585,6 +583,8 @@ static const struct gpu_ops gv11b_ops = { gv100_gr_hwpm_map_align_regs_perf_pma, }, .init = { + .tpc_mask = gv11b_gr_init_tpc_mask, + .rop_mapping = gv11b_gr_init_rop_mapping, .fs_state = gv11b_gr_init_fs_state, .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c index c7df0967a..9c9e5431d 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c @@ -39,6 +39,190 @@ #define FE_PWR_MODE_TIMEOUT_DEFAULT_US 10U #define FECS_CTXSW_RESET_DELAY_US 10U +void gm20b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask) +{ + nvgpu_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask); +} + +int gm20b_gr_init_rop_mapping(struct gk20a *g, + struct nvgpu_gr_config *gr_config) +{ + u32 norm_entries, norm_shift; + u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod; + u32 coeff9_mod, coeff10_mod, coeff11_mod; + u32 map0, map1, map2, map3, map4, map5; + u32 tpc_cnt; + + if (gr_config->map_tiles == NULL) { + return -1; + } + + nvgpu_log_fn(g, " "); + + tpc_cnt = nvgpu_gr_config_get_tpc_count(gr_config); + + nvgpu_writel(g, gr_crstr_map_table_cfg_r(), + gr_crstr_map_table_cfg_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr_config)) | + gr_crstr_map_table_cfg_num_entries_f(tpc_cnt)); + + map0 = gr_crstr_gpc_map0_tile0_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 0)) | + gr_crstr_gpc_map0_tile1_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 1)) | + gr_crstr_gpc_map0_tile2_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 2)) | + gr_crstr_gpc_map0_tile3_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 3)) | + gr_crstr_gpc_map0_tile4_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 4)) | + gr_crstr_gpc_map0_tile5_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 5)); + + map1 = gr_crstr_gpc_map1_tile6_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 6)) | + gr_crstr_gpc_map1_tile7_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 7)) | + gr_crstr_gpc_map1_tile8_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 8)) | + gr_crstr_gpc_map1_tile9_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 9)) | + gr_crstr_gpc_map1_tile10_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 10)) | + gr_crstr_gpc_map1_tile11_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 11)); + + map2 = gr_crstr_gpc_map2_tile12_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 12)) | + gr_crstr_gpc_map2_tile13_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 13)) | + gr_crstr_gpc_map2_tile14_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 14)) | + gr_crstr_gpc_map2_tile15_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 15)) | + gr_crstr_gpc_map2_tile16_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 16)) | + gr_crstr_gpc_map2_tile17_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 17)); + + map3 = gr_crstr_gpc_map3_tile18_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 18)) | + gr_crstr_gpc_map3_tile19_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 19)) | + gr_crstr_gpc_map3_tile20_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 20)) | + gr_crstr_gpc_map3_tile21_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 21)) | + gr_crstr_gpc_map3_tile22_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 22)) | + gr_crstr_gpc_map3_tile23_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 23)); + + map4 = gr_crstr_gpc_map4_tile24_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 24)) | + gr_crstr_gpc_map4_tile25_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 25)) | + gr_crstr_gpc_map4_tile26_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 26)) | + gr_crstr_gpc_map4_tile27_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 27)) | + gr_crstr_gpc_map4_tile28_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 28)) | + gr_crstr_gpc_map4_tile29_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 29)); + + map5 = gr_crstr_gpc_map5_tile30_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 30)) | + gr_crstr_gpc_map5_tile31_f( + nvgpu_gr_config_get_map_tile_count(gr_config, 31)) | + gr_crstr_gpc_map5_tile32_f(0) | + gr_crstr_gpc_map5_tile33_f(0) | + gr_crstr_gpc_map5_tile34_f(0) | + gr_crstr_gpc_map5_tile35_f(0); + + nvgpu_writel(g, gr_crstr_gpc_map0_r(), map0); + nvgpu_writel(g, gr_crstr_gpc_map1_r(), map1); + nvgpu_writel(g, gr_crstr_gpc_map2_r(), map2); + nvgpu_writel(g, gr_crstr_gpc_map3_r(), map3); + nvgpu_writel(g, gr_crstr_gpc_map4_r(), map4); + nvgpu_writel(g, gr_crstr_gpc_map5_r(), map5); + + switch (tpc_cnt) { + case 1: + norm_shift = 4; + break; + case 2: + case 3: + norm_shift = 3; + break; + case 4: + case 5: + case 6: + case 7: + norm_shift = 2; + break; + case 8: + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + norm_shift = 1; + break; + default: + norm_shift = 0; + break; + } + + norm_entries = tpc_cnt << norm_shift; + coeff5_mod = BIT32(5) % norm_entries; + coeff6_mod = BIT32(6) % norm_entries; + coeff7_mod = BIT32(7) % norm_entries; + coeff8_mod = BIT32(8) % norm_entries; + coeff9_mod = BIT32(9) % norm_entries; + coeff10_mod = BIT32(10) % norm_entries; + coeff11_mod = BIT32(11) % norm_entries; + + nvgpu_writel(g, gr_ppcs_wwdx_map_table_cfg_r(), + gr_ppcs_wwdx_map_table_cfg_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr_config)) | + gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) | + gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) | + gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) | + gr_ppcs_wwdx_map_table_cfg_num_entries_f(tpc_cnt)); + + nvgpu_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(), + gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) | + gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) | + gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) | + gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) | + gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) | + gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod)); + + nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0); + nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1); + nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2); + nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3); + nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4); + nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5); + + nvgpu_writel(g, gr_rstr2d_map_table_cfg_r(), + gr_rstr2d_map_table_cfg_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr_config)) | + gr_rstr2d_map_table_cfg_num_entries_f(tpc_cnt)); + + nvgpu_writel(g, gr_rstr2d_gpc_map0_r(), map0); + nvgpu_writel(g, gr_rstr2d_gpc_map1_r(), map1); + nvgpu_writel(g, gr_rstr2d_gpc_map2_r(), map2); + nvgpu_writel(g, gr_rstr2d_gpc_map3_r(), map3); + nvgpu_writel(g, gr_rstr2d_gpc_map4_r(), map4); + nvgpu_writel(g, gr_rstr2d_gpc_map5_r(), map5); + + return 0; +} + int gm20b_gr_init_fs_state(struct gk20a *g) { int err = 0; @@ -57,12 +241,12 @@ int gm20b_gr_init_fs_state(struct gk20a *g) return err; } -void gm20b_gr_init_pd_tpc_per_gpc(struct gk20a *g) +void gm20b_gr_init_pd_tpc_per_gpc(struct gk20a *g, + struct nvgpu_gr_config *gr_config) { u32 reg_index; u32 tpc_per_gpc; u32 gpc_id = 0; - struct nvgpu_gr_config *gr_config = g->gr.config; for (reg_index = 0U, gpc_id = 0U; reg_index < gr_pd_num_tpc_per_gpc__size_1_v(); @@ -91,11 +275,11 @@ void gm20b_gr_init_pd_tpc_per_gpc(struct gk20a *g) } } -void gm20b_gr_init_pd_skip_table_gpc(struct gk20a *g) +void gm20b_gr_init_pd_skip_table_gpc(struct gk20a *g, + struct nvgpu_gr_config *gr_config) { u32 gpc_index; bool skip_mask; - struct nvgpu_gr_config *gr_config = g->gr.config; for (gpc_index = 0; gpc_index < gr_pd_dist_skip_table__size_1_v() * 4U; diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h index 5c47f9229..5baf9ad65 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h @@ -27,10 +27,16 @@ struct gk20a; struct netlist_av_list; +struct nvgpu_gr_config; +void gm20b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask); +int gm20b_gr_init_rop_mapping(struct gk20a *g, + struct nvgpu_gr_config *gr_config); int gm20b_gr_init_fs_state(struct gk20a *g); -void gm20b_gr_init_pd_tpc_per_gpc(struct gk20a *g); -void gm20b_gr_init_pd_skip_table_gpc(struct gk20a *g); +void gm20b_gr_init_pd_tpc_per_gpc(struct gk20a *g, + struct nvgpu_gr_config *gr_config); +void gm20b_gr_init_pd_skip_table_gpc(struct gk20a *g, + struct nvgpu_gr_config *gr_config); void gm20b_gr_init_cwd_gpcs_tpcs_num(struct gk20a *g, u32 gpc_count, u32 tpc_count); int gm20b_gr_init_wait_idle(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c index f59af67b0..f73787eee 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c @@ -25,10 +25,121 @@ #include #include +#include + #include "gr_init_gv11b.h" #include +/* + * Each gpc can have maximum 32 tpcs, so each tpc index need + * 5 bits. Each map register(32bits) can hold 6 tpcs info. + */ +#define GR_TPCS_INFO_FOR_MAPREGISTER 6U + +void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask) +{ + nvgpu_writel(g, gr_fe_tpc_fs_r(gpc_index), pes_tpc_mask); +} + +int gv11b_gr_init_rop_mapping(struct gk20a *g, + struct nvgpu_gr_config *gr_config) +{ + u32 map; + u32 i, j; + u32 mapreg_num, base, offset, mapregs, tile_cnt, tpc_cnt; + u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, + GPU_LIT_NUM_TPC_PER_GPC); + u32 num_tpcs = num_gpcs * num_tpc_per_gpc; + + nvgpu_log_fn(g, " "); + + if (gr_config->map_tiles == NULL) { + return -1; + } + + nvgpu_writel(g, gr_crstr_map_table_cfg_r(), + gr_crstr_map_table_cfg_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr_config)) | + gr_crstr_map_table_cfg_num_entries_f( + nvgpu_gr_config_get_tpc_count(gr_config))); + /* + * 6 tpc can be stored in one map register. + * But number of tpcs are not always multiple of six, + * so adding additional check for valid number of + * tpcs before programming map register. + */ + mapregs = DIV_ROUND_UP(num_tpcs, GR_TPCS_INFO_FOR_MAPREGISTER); + + for (mapreg_num = 0U, base = 0U; mapreg_num < mapregs; mapreg_num++, + base = base + GR_TPCS_INFO_FOR_MAPREGISTER) { + map = 0U; + for (offset = 0U; + (offset < GR_TPCS_INFO_FOR_MAPREGISTER && num_tpcs > 0U); + offset++, num_tpcs--) { + tile_cnt = nvgpu_gr_config_get_map_tile_count( + gr_config, base + offset); + switch (offset) { + case 0: + map = map | gr_crstr_gpc_map_tile0_f(tile_cnt); + break; + case 1: + map = map | gr_crstr_gpc_map_tile1_f(tile_cnt); + break; + case 2: + map = map | gr_crstr_gpc_map_tile2_f(tile_cnt); + break; + case 3: + map = map | gr_crstr_gpc_map_tile3_f(tile_cnt); + break; + case 4: + map = map | gr_crstr_gpc_map_tile4_f(tile_cnt); + break; + case 5: + map = map | gr_crstr_gpc_map_tile5_f(tile_cnt); + break; + default: + nvgpu_err(g, "incorrect rop mapping %x", + offset); + break; + } + } + + nvgpu_writel(g, gr_crstr_gpc_map_r(mapreg_num), map); + nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map_r(mapreg_num), map); + nvgpu_writel(g, gr_rstr2d_gpc_map_r(mapreg_num), map); + } + + nvgpu_writel(g, gr_ppcs_wwdx_map_table_cfg_r(), + gr_ppcs_wwdx_map_table_cfg_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr_config)) | + gr_ppcs_wwdx_map_table_cfg_num_entries_f( + nvgpu_gr_config_get_tpc_count(gr_config))); + + for (i = 0U, j = 1U; i < gr_ppcs_wwdx_map_table_cfg_coeff__size_1_v(); + i++, j = j + 4U) { + tpc_cnt = nvgpu_gr_config_get_tpc_count(gr_config); + nvgpu_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i), + gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f( + (BIT32(j) % tpc_cnt)) | + gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f( + (BIT32(j + 1U) % tpc_cnt)) | + gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f( + (BIT32(j + 2U) % tpc_cnt)) | + gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f( + (BIT32(j + 3U) % tpc_cnt))); + } + + nvgpu_writel(g, gr_rstr2d_map_table_cfg_r(), + gr_rstr2d_map_table_cfg_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr_config)) | + gr_rstr2d_map_table_cfg_num_entries_f( + nvgpu_gr_config_get_tpc_count(gr_config))); + + return 0; +} + int gv11b_gr_init_fs_state(struct gk20a *g) { u32 data; diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h index f1784f7a5..1ef13e927 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h @@ -26,7 +26,11 @@ #include struct gk20a; +struct nvgpu_gr_config; +void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask); +int gv11b_gr_init_rop_mapping(struct gk20a *g, + struct nvgpu_gr_config *gr_config); int gv11b_gr_init_fs_state(struct gk20a *g); int gv11b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count, bool gfxp_wfi_timeout_unit_usec); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index a82c003b7..5a3f59f25 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -71,6 +71,7 @@ struct nvgpu_gr_zbc_query_params; struct nvgpu_channel_hw_state; struct nvgpu_engine_status_info; struct nvgpu_pbdma_status_info; +struct nvgpu_gr_config; enum nvgpu_nvlink_minion_dlcmd; struct nvgpu_cbc; @@ -430,12 +431,10 @@ struct gpu_ops { int (*load_smid_config)(struct gk20a *g); void (*program_sm_id_numbering)(struct gk20a *g, u32 gpc, u32 tpc, u32 smid); - int (*setup_rop_mapping)(struct gk20a *g, struct gr_gk20a *gr); int (*init_sw_veid_bundle)(struct gk20a *g); void (*program_zcull_mapping)(struct gk20a *g, u32 zcull_alloc_num, u32 *zcull_map_tiles); int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va); - void (*load_tpc_mask)(struct gk20a *g); int (*trigger_suspend)(struct gk20a *g); int (*wait_for_pause)(struct gk20a *g, struct nvgpu_warpstate *w_state); int (*resume_from_pause)(struct gk20a *g); @@ -665,9 +664,15 @@ struct gpu_ops { } hwpm_map; struct { + void (*tpc_mask)(struct gk20a *g, + u32 gpc_index, u32 pes_tpc_mask); + int (*rop_mapping)(struct gk20a *g, + struct nvgpu_gr_config *gr_config); int (*fs_state)(struct gk20a *g); - void (*pd_tpc_per_gpc)(struct gk20a *g); - void (*pd_skip_table_gpc)(struct gk20a *g); + void (*pd_tpc_per_gpc)(struct gk20a *g, + struct nvgpu_gr_config *gr_config); + void (*pd_skip_table_gpc)(struct gk20a *g, + struct nvgpu_gr_config *gr_config); void (*cwd_gpcs_tpcs_num)(struct gk20a *g, u32 gpc_count, u32 tpc_count); diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 675771687..61faa669b 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -440,10 +440,8 @@ static const struct gpu_ops tu104_ops = { .init_sm_id_table = gr_gv100_init_sm_id_table, .load_smid_config = gr_gv11b_load_smid_config, .program_sm_id_numbering = gr_gv11b_program_sm_id_numbering, - .setup_rop_mapping = gr_gv11b_setup_rop_mapping, .program_zcull_mapping = gr_gv11b_program_zcull_mapping, .commit_inst = gr_gv11b_commit_inst, - .load_tpc_mask = gr_gv11b_load_tpc_mask, .trigger_suspend = gv11b_gr_sm_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, .resume_from_pause = gv11b_gr_resume_from_pause, @@ -654,6 +652,8 @@ static const struct gpu_ops tu104_ops = { gv100_gr_hwpm_map_get_active_fbpa_mask, }, .init = { + .tpc_mask = gv11b_gr_init_tpc_mask, + .rop_mapping = gv11b_gr_init_rop_mapping, .fs_state = gv11b_gr_init_fs_state, .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,