mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: move load_tpc_mask and setup_rop_mapping to hal.gr.init
Move load_tpc_mask and setup_rop_mapping hal functions to hal.gr.init. Existing load_tpc_mask hal code is split to two parts, one as a common code in gr_load_tpc_mask and register write to init.tpc_mask hal functions. Modify pd_tpc_per_gpc and pd_skip_table_gpc hals in the hal.gr.init to pass struct nvgpu_gr_config as a parameter. JIRA NVGPU-2951 Change-Id: I52e26d0f023afa511a8cf8c3e4c54f45350be4ae Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2074892 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
10be3e8778
commit
d466ab8007
@@ -25,6 +25,41 @@
|
||||
#include <nvgpu/gr/gr.h>
|
||||
#include <nvgpu/gr/config.h>
|
||||
|
||||
static void gr_load_tpc_mask(struct gk20a *g)
|
||||
{
|
||||
u32 pes_tpc_mask = 0, fuse_tpc_mask;
|
||||
u32 gpc, pes, val;
|
||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_NUM_TPC_PER_GPC);
|
||||
u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
||||
|
||||
/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config);
|
||||
gpc++) {
|
||||
for (pes = 0;
|
||||
pes < nvgpu_gr_config_get_pe_count_per_gpc(g->gr.config);
|
||||
pes++) {
|
||||
pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
|
||||
g->gr.config, gpc, pes) <<
|
||||
num_tpc_per_gpc * gpc;
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask);
|
||||
|
||||
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, g->gr.config, 0);
|
||||
if ((g->tpc_fs_mask_user != 0U) &&
|
||||
(g->tpc_fs_mask_user != fuse_tpc_mask) &&
|
||||
(fuse_tpc_mask == BIT32(max_tpc_count) - U32(1))) {
|
||||
val = g->tpc_fs_mask_user;
|
||||
val &= BIT32(max_tpc_count) - U32(1);
|
||||
/* skip tpc to disable the other tpc cause channel timeout */
|
||||
val = BIT32(hweight32(val)) - U32(1);
|
||||
pes_tpc_mask = val;
|
||||
}
|
||||
g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask);
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_get_idle_timeout(struct gk20a *g)
|
||||
{
|
||||
return nvgpu_is_timeouts_enabled(g) ?
|
||||
@@ -66,12 +101,12 @@ int nvgpu_gr_init_fs_state(struct gk20a *g)
|
||||
g->ops.gr.program_sm_id_numbering(g, gpc_index, tpc_index, sm_id);
|
||||
}
|
||||
|
||||
g->ops.gr.init.pd_tpc_per_gpc(g);
|
||||
g->ops.gr.init.pd_tpc_per_gpc(g, gr_config);
|
||||
|
||||
/* gr__setup_pd_mapping */
|
||||
g->ops.gr.setup_rop_mapping(g, &g->gr);
|
||||
g->ops.gr.init.rop_mapping(g, gr_config);
|
||||
|
||||
g->ops.gr.init.pd_skip_table_gpc(g);
|
||||
g->ops.gr.init.pd_skip_table_gpc(g, gr_config);
|
||||
|
||||
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, gr_config, 0);
|
||||
gpc_cnt = nvgpu_gr_config_get_gpc_count(gr_config);
|
||||
@@ -86,7 +121,7 @@ int nvgpu_gr_init_fs_state(struct gk20a *g)
|
||||
}
|
||||
g->ops.gr.init.cwd_gpcs_tpcs_num(g, gpc_cnt, tpc_cnt);
|
||||
|
||||
g->ops.gr.load_tpc_mask(g);
|
||||
gr_load_tpc_mask(g);
|
||||
|
||||
err = g->ops.gr.load_smid_config(g);
|
||||
if (err != 0) {
|
||||
|
||||
@@ -168,10 +168,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.init_sm_id_table = vgpu_gr_init_sm_id_table,
|
||||
.load_smid_config = NULL,
|
||||
.program_sm_id_numbering = NULL,
|
||||
.setup_rop_mapping = NULL,
|
||||
.program_zcull_mapping = NULL,
|
||||
.commit_inst = vgpu_gr_commit_inst,
|
||||
.load_tpc_mask = NULL,
|
||||
.trigger_suspend = NULL,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
.resume_from_pause = NULL,
|
||||
|
||||
@@ -186,10 +186,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.init_sm_id_table = vgpu_gr_init_sm_id_table,
|
||||
.load_smid_config = NULL,
|
||||
.program_sm_id_numbering = NULL,
|
||||
.setup_rop_mapping = NULL,
|
||||
.program_zcull_mapping = NULL,
|
||||
.commit_inst = vgpu_gr_commit_inst,
|
||||
.load_tpc_mask = NULL,
|
||||
.trigger_suspend = NULL,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
.resume_from_pause = NULL,
|
||||
|
||||
@@ -740,151 +740,6 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
|
||||
{
|
||||
u32 norm_entries, norm_shift;
|
||||
u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod;
|
||||
u32 map0, map1, map2, map3, map4, map5;
|
||||
|
||||
if (gr->config->map_tiles == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
gk20a_writel(g, gr_crstr_map_table_cfg_r(),
|
||||
gr_crstr_map_table_cfg_row_offset_f(
|
||||
nvgpu_gr_config_get_map_row_offset(gr->config)) |
|
||||
gr_crstr_map_table_cfg_num_entries_f(
|
||||
nvgpu_gr_config_get_tpc_count(gr->config)));
|
||||
|
||||
map0 = gr_crstr_gpc_map0_tile0_f(nvgpu_gr_config_get_map_tile_count(gr->config, 0)) |
|
||||
gr_crstr_gpc_map0_tile1_f(nvgpu_gr_config_get_map_tile_count(gr->config, 1)) |
|
||||
gr_crstr_gpc_map0_tile2_f(nvgpu_gr_config_get_map_tile_count(gr->config, 2)) |
|
||||
gr_crstr_gpc_map0_tile3_f(nvgpu_gr_config_get_map_tile_count(gr->config, 3)) |
|
||||
gr_crstr_gpc_map0_tile4_f(nvgpu_gr_config_get_map_tile_count(gr->config, 4)) |
|
||||
gr_crstr_gpc_map0_tile5_f(nvgpu_gr_config_get_map_tile_count(gr->config, 5));
|
||||
|
||||
map1 = gr_crstr_gpc_map1_tile6_f(nvgpu_gr_config_get_map_tile_count(gr->config, 6)) |
|
||||
gr_crstr_gpc_map1_tile7_f(nvgpu_gr_config_get_map_tile_count(gr->config, 7)) |
|
||||
gr_crstr_gpc_map1_tile8_f(nvgpu_gr_config_get_map_tile_count(gr->config, 8)) |
|
||||
gr_crstr_gpc_map1_tile9_f(nvgpu_gr_config_get_map_tile_count(gr->config, 9)) |
|
||||
gr_crstr_gpc_map1_tile10_f(nvgpu_gr_config_get_map_tile_count(gr->config, 10)) |
|
||||
gr_crstr_gpc_map1_tile11_f(nvgpu_gr_config_get_map_tile_count(gr->config, 11));
|
||||
|
||||
map2 = gr_crstr_gpc_map2_tile12_f(nvgpu_gr_config_get_map_tile_count(gr->config, 12)) |
|
||||
gr_crstr_gpc_map2_tile13_f(nvgpu_gr_config_get_map_tile_count(gr->config, 13)) |
|
||||
gr_crstr_gpc_map2_tile14_f(nvgpu_gr_config_get_map_tile_count(gr->config, 14)) |
|
||||
gr_crstr_gpc_map2_tile15_f(nvgpu_gr_config_get_map_tile_count(gr->config, 15)) |
|
||||
gr_crstr_gpc_map2_tile16_f(nvgpu_gr_config_get_map_tile_count(gr->config, 16)) |
|
||||
gr_crstr_gpc_map2_tile17_f(nvgpu_gr_config_get_map_tile_count(gr->config, 17));
|
||||
|
||||
map3 = gr_crstr_gpc_map3_tile18_f(nvgpu_gr_config_get_map_tile_count(gr->config, 18)) |
|
||||
gr_crstr_gpc_map3_tile19_f(nvgpu_gr_config_get_map_tile_count(gr->config, 19)) |
|
||||
gr_crstr_gpc_map3_tile20_f(nvgpu_gr_config_get_map_tile_count(gr->config, 20)) |
|
||||
gr_crstr_gpc_map3_tile21_f(nvgpu_gr_config_get_map_tile_count(gr->config, 21)) |
|
||||
gr_crstr_gpc_map3_tile22_f(nvgpu_gr_config_get_map_tile_count(gr->config, 22)) |
|
||||
gr_crstr_gpc_map3_tile23_f(nvgpu_gr_config_get_map_tile_count(gr->config, 23));
|
||||
|
||||
map4 = gr_crstr_gpc_map4_tile24_f(nvgpu_gr_config_get_map_tile_count(gr->config, 24)) |
|
||||
gr_crstr_gpc_map4_tile25_f(nvgpu_gr_config_get_map_tile_count(gr->config, 25)) |
|
||||
gr_crstr_gpc_map4_tile26_f(nvgpu_gr_config_get_map_tile_count(gr->config, 26)) |
|
||||
gr_crstr_gpc_map4_tile27_f(nvgpu_gr_config_get_map_tile_count(gr->config, 27)) |
|
||||
gr_crstr_gpc_map4_tile28_f(nvgpu_gr_config_get_map_tile_count(gr->config, 28)) |
|
||||
gr_crstr_gpc_map4_tile29_f(nvgpu_gr_config_get_map_tile_count(gr->config, 29));
|
||||
|
||||
map5 = gr_crstr_gpc_map5_tile30_f(nvgpu_gr_config_get_map_tile_count(gr->config, 30)) |
|
||||
gr_crstr_gpc_map5_tile31_f(nvgpu_gr_config_get_map_tile_count(gr->config, 31)) |
|
||||
gr_crstr_gpc_map5_tile32_f(0) |
|
||||
gr_crstr_gpc_map5_tile33_f(0) |
|
||||
gr_crstr_gpc_map5_tile34_f(0) |
|
||||
gr_crstr_gpc_map5_tile35_f(0);
|
||||
|
||||
gk20a_writel(g, gr_crstr_gpc_map0_r(), map0);
|
||||
gk20a_writel(g, gr_crstr_gpc_map1_r(), map1);
|
||||
gk20a_writel(g, gr_crstr_gpc_map2_r(), map2);
|
||||
gk20a_writel(g, gr_crstr_gpc_map3_r(), map3);
|
||||
gk20a_writel(g, gr_crstr_gpc_map4_r(), map4);
|
||||
gk20a_writel(g, gr_crstr_gpc_map5_r(), map5);
|
||||
|
||||
switch (nvgpu_gr_config_get_tpc_count(gr->config)) {
|
||||
case 1:
|
||||
norm_shift = 4;
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
norm_shift = 3;
|
||||
break;
|
||||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
norm_shift = 2;
|
||||
break;
|
||||
case 8:
|
||||
case 9:
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
norm_shift = 1;
|
||||
break;
|
||||
default:
|
||||
norm_shift = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
norm_entries = nvgpu_gr_config_get_tpc_count(gr->config) << norm_shift;
|
||||
coeff5_mod = BIT32(5) % norm_entries;
|
||||
coeff6_mod = BIT32(6) % norm_entries;
|
||||
coeff7_mod = BIT32(7) % norm_entries;
|
||||
coeff8_mod = BIT32(8) % norm_entries;
|
||||
coeff9_mod = BIT32(9) % norm_entries;
|
||||
coeff10_mod = BIT32(10) % norm_entries;
|
||||
coeff11_mod = BIT32(11) % norm_entries;
|
||||
|
||||
gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
|
||||
gr_ppcs_wwdx_map_table_cfg_row_offset_f(
|
||||
nvgpu_gr_config_get_map_row_offset(gr->config)) |
|
||||
gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) |
|
||||
gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) |
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) |
|
||||
gr_ppcs_wwdx_map_table_cfg_num_entries_f(
|
||||
nvgpu_gr_config_get_tpc_count(gr->config)));
|
||||
|
||||
gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(),
|
||||
gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) |
|
||||
gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) |
|
||||
gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) |
|
||||
gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) |
|
||||
gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) |
|
||||
gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod));
|
||||
|
||||
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0);
|
||||
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1);
|
||||
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2);
|
||||
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3);
|
||||
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4);
|
||||
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5);
|
||||
|
||||
gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
|
||||
gr_rstr2d_map_table_cfg_row_offset_f(
|
||||
nvgpu_gr_config_get_map_row_offset(gr->config)) |
|
||||
gr_rstr2d_map_table_cfg_num_entries_f(
|
||||
nvgpu_gr_config_get_tpc_count(gr->config)));
|
||||
|
||||
gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0);
|
||||
gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1);
|
||||
gk20a_writel(g, gr_rstr2d_gpc_map2_r(), map2);
|
||||
gk20a_writel(g, gr_rstr2d_gpc_map3_r(), map3);
|
||||
gk20a_writel(g, gr_rstr2d_gpc_map4_r(), map4);
|
||||
gk20a_writel(g, gr_rstr2d_gpc_map5_r(), map5);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gr_gk20a_init_sm_id_table(struct gk20a *g)
|
||||
{
|
||||
u32 gpc, tpc;
|
||||
|
||||
@@ -409,7 +409,6 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g,
|
||||
u64 addr, u32 size, bool patch);
|
||||
void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
|
||||
void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
|
||||
int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
|
||||
int gr_gk20a_init_ctxsw_ucode(struct gk20a *g);
|
||||
int gr_gk20a_load_ctxsw_ucode(struct gk20a *g);
|
||||
void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g);
|
||||
|
||||
@@ -577,37 +577,6 @@ void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
||||
}
|
||||
}
|
||||
|
||||
void gr_gm20b_load_tpc_mask(struct gk20a *g)
|
||||
{
|
||||
u32 pes_tpc_mask = 0, fuse_tpc_mask;
|
||||
u32 gpc, pes;
|
||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
|
||||
u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
||||
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
|
||||
for (pes = 0;
|
||||
pes < nvgpu_gr_config_get_pe_count_per_gpc(g->gr.config);
|
||||
pes++) {
|
||||
pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
|
||||
g->gr.config, gpc, pes) <<
|
||||
num_tpc_per_gpc * gpc;
|
||||
}
|
||||
}
|
||||
|
||||
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, g->gr.config, 0);
|
||||
if ((g->tpc_fs_mask_user != 0U) &&
|
||||
(g->tpc_fs_mask_user != fuse_tpc_mask) &&
|
||||
(fuse_tpc_mask == BIT32(max_tpc_count) - U32(1))) {
|
||||
u32 val = g->tpc_fs_mask_user;
|
||||
val &= BIT32(max_tpc_count) - U32(1);
|
||||
/* skip tpc to disable the other tpc cause channel timeout */
|
||||
val = BIT32(hweight32(val)) - U32(1);
|
||||
gk20a_writel(g, gr_fe_tpc_fs_r(), val);
|
||||
} else {
|
||||
gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask);
|
||||
}
|
||||
}
|
||||
|
||||
void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 smid)
|
||||
{
|
||||
|
||||
@@ -79,7 +79,6 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
|
||||
u32 **sm_dsm_perf_ctrl_regs,
|
||||
u32 *ctrl_register_stride);
|
||||
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
|
||||
void gr_gm20b_load_tpc_mask(struct gk20a *g);
|
||||
void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 smid);
|
||||
int gr_gm20b_load_smid_config(struct gk20a *g);
|
||||
|
||||
@@ -291,10 +291,8 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.init_sm_id_table = gr_gk20a_init_sm_id_table,
|
||||
.load_smid_config = gr_gm20b_load_smid_config,
|
||||
.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
|
||||
.setup_rop_mapping = gr_gk20a_setup_rop_mapping,
|
||||
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
|
||||
.commit_inst = gr_gk20a_commit_inst,
|
||||
.load_tpc_mask = gr_gm20b_load_tpc_mask,
|
||||
.trigger_suspend = gr_gk20a_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
.resume_from_pause = gr_gk20a_resume_from_pause,
|
||||
@@ -420,6 +418,8 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.get_gpcs_swdx_dss_zbc_z_format_reg = NULL,
|
||||
},
|
||||
.init = {
|
||||
.tpc_mask = gm20b_gr_init_tpc_mask,
|
||||
.rop_mapping = gm20b_gr_init_rop_mapping,
|
||||
.fs_state = gm20b_gr_init_fs_state,
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
|
||||
@@ -313,10 +313,8 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.init_sm_id_table = gr_gk20a_init_sm_id_table,
|
||||
.load_smid_config = gr_gp10b_load_smid_config,
|
||||
.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
|
||||
.setup_rop_mapping = gr_gk20a_setup_rop_mapping,
|
||||
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
|
||||
.commit_inst = gr_gk20a_commit_inst,
|
||||
.load_tpc_mask = gr_gm20b_load_tpc_mask,
|
||||
.trigger_suspend = gr_gk20a_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
.resume_from_pause = gr_gk20a_resume_from_pause,
|
||||
@@ -490,6 +488,8 @@ static const struct gpu_ops gp10b_ops = {
|
||||
gp10b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg,
|
||||
},
|
||||
.init = {
|
||||
.tpc_mask = gm20b_gr_init_tpc_mask,
|
||||
.rop_mapping = gm20b_gr_init_rop_mapping,
|
||||
.fs_state = gp10b_gr_init_fs_state,
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
|
||||
@@ -420,10 +420,8 @@ static const struct gpu_ops gv100_ops = {
|
||||
.init_sm_id_table = gr_gv100_init_sm_id_table,
|
||||
.load_smid_config = gr_gv11b_load_smid_config,
|
||||
.program_sm_id_numbering = gr_gv11b_program_sm_id_numbering,
|
||||
.setup_rop_mapping = gr_gv11b_setup_rop_mapping,
|
||||
.program_zcull_mapping = gr_gv11b_program_zcull_mapping,
|
||||
.commit_inst = gr_gv11b_commit_inst,
|
||||
.load_tpc_mask = gr_gv11b_load_tpc_mask,
|
||||
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
.resume_from_pause = gv11b_gr_resume_from_pause,
|
||||
@@ -626,6 +624,8 @@ static const struct gpu_ops gv100_ops = {
|
||||
gv100_gr_hwpm_map_get_active_fbpa_mask,
|
||||
},
|
||||
.init = {
|
||||
.tpc_mask = gv11b_gr_init_tpc_mask,
|
||||
.rop_mapping = gv11b_gr_init_rop_mapping,
|
||||
.fs_state = gv11b_gr_init_fs_state,
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
|
||||
@@ -2421,115 +2421,6 @@ int gr_gv11b_handle_fecs_error(struct gk20a *g,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
|
||||
{
|
||||
u32 map;
|
||||
u32 i, j;
|
||||
u32 mapreg_num, base, offset, mapregs;
|
||||
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
|
||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_NUM_TPC_PER_GPC);
|
||||
u32 num_tpcs = num_gpcs * num_tpc_per_gpc;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (gr->config->map_tiles == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
gk20a_writel(g, gr_crstr_map_table_cfg_r(),
|
||||
gr_crstr_map_table_cfg_row_offset_f(
|
||||
nvgpu_gr_config_get_map_row_offset(gr->config)) |
|
||||
gr_crstr_map_table_cfg_num_entries_f(
|
||||
nvgpu_gr_config_get_tpc_count(gr->config)));
|
||||
/*
|
||||
* 6 tpc can be stored in one map register.
|
||||
* But number of tpcs are not always multiple of six,
|
||||
* so adding additional check for valid number of
|
||||
* tpcs before programming map register.
|
||||
*/
|
||||
mapregs = DIV_ROUND_UP(num_tpcs, GR_TPCS_INFO_FOR_MAPREGISTER);
|
||||
|
||||
for (mapreg_num = 0U, base = 0U; mapreg_num < mapregs; mapreg_num++,
|
||||
base = base + GR_TPCS_INFO_FOR_MAPREGISTER) {
|
||||
map = 0U;
|
||||
for (offset = 0U;
|
||||
(offset < GR_TPCS_INFO_FOR_MAPREGISTER && num_tpcs > 0U);
|
||||
offset++, num_tpcs--) {
|
||||
switch (offset) {
|
||||
case 0:
|
||||
map = map | gr_crstr_gpc_map_tile0_f(
|
||||
nvgpu_gr_config_get_map_tile_count(
|
||||
gr->config, base + offset));
|
||||
break;
|
||||
case 1:
|
||||
map = map | gr_crstr_gpc_map_tile1_f(
|
||||
nvgpu_gr_config_get_map_tile_count(
|
||||
gr->config, base + offset));
|
||||
break;
|
||||
case 2:
|
||||
map = map | gr_crstr_gpc_map_tile2_f(
|
||||
nvgpu_gr_config_get_map_tile_count(
|
||||
gr->config, base + offset));
|
||||
break;
|
||||
case 3:
|
||||
map = map | gr_crstr_gpc_map_tile3_f(
|
||||
nvgpu_gr_config_get_map_tile_count(
|
||||
gr->config, base + offset));
|
||||
break;
|
||||
case 4:
|
||||
map = map | gr_crstr_gpc_map_tile4_f(
|
||||
nvgpu_gr_config_get_map_tile_count(
|
||||
gr->config, base + offset));
|
||||
break;
|
||||
case 5:
|
||||
map = map | gr_crstr_gpc_map_tile5_f(
|
||||
nvgpu_gr_config_get_map_tile_count(
|
||||
gr->config, base + offset));
|
||||
break;
|
||||
default:
|
||||
nvgpu_err(g, "incorrect rop mapping %x", offset);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
gk20a_writel(g, gr_crstr_gpc_map_r(mapreg_num), map);
|
||||
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map_r(mapreg_num), map);
|
||||
gk20a_writel(g, gr_rstr2d_gpc_map_r(mapreg_num), map);
|
||||
}
|
||||
|
||||
gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
|
||||
gr_ppcs_wwdx_map_table_cfg_row_offset_f(
|
||||
nvgpu_gr_config_get_map_row_offset(gr->config)) |
|
||||
gr_ppcs_wwdx_map_table_cfg_num_entries_f(
|
||||
nvgpu_gr_config_get_tpc_count(gr->config)));
|
||||
|
||||
for (i = 0U, j = 1U; i < gr_ppcs_wwdx_map_table_cfg_coeff__size_1_v();
|
||||
i++, j = j + 4U) {
|
||||
gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i),
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f(
|
||||
(BIT32(j) %
|
||||
nvgpu_gr_config_get_tpc_count(gr->config))) |
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f(
|
||||
(BIT32(j + 1U) %
|
||||
nvgpu_gr_config_get_tpc_count(gr->config))) |
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f(
|
||||
(BIT32(j + 2U) %
|
||||
nvgpu_gr_config_get_tpc_count(gr->config))) |
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f(
|
||||
(BIT32(j + 3U) %
|
||||
nvgpu_gr_config_get_tpc_count(gr->config))));
|
||||
}
|
||||
|
||||
gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
|
||||
gr_rstr2d_map_table_cfg_row_offset_f(
|
||||
nvgpu_gr_config_get_map_row_offset(gr->config)) |
|
||||
gr_rstr2d_map_table_cfg_num_entries_f(
|
||||
nvgpu_gr_config_get_tpc_count(gr->config)));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gv11b_write_bundle_veid_state(struct gk20a *g, u32 index)
|
||||
{
|
||||
struct netlist_av_list *sw_veid_bundle_init =
|
||||
@@ -2771,40 +2662,6 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gr_gv11b_load_tpc_mask(struct gk20a *g)
|
||||
{
|
||||
u32 pes_tpc_mask = 0, fuse_tpc_mask;
|
||||
u32 gpc, pes, val;
|
||||
u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_NUM_TPC_PER_GPC);
|
||||
|
||||
/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
|
||||
for (pes = 0;
|
||||
pes < nvgpu_gr_config_get_pe_count_per_gpc(g->gr.config);
|
||||
pes++) {
|
||||
pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
|
||||
g->gr.config, gpc, pes) <<
|
||||
num_tpc_per_gpc * gpc;
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask);
|
||||
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, g->gr.config, gpc);
|
||||
if ((g->tpc_fs_mask_user != 0U) &&
|
||||
(g->tpc_fs_mask_user != fuse_tpc_mask) &&
|
||||
(fuse_tpc_mask == BIT32(max_tpc_count) - U32(1))) {
|
||||
val = g->tpc_fs_mask_user;
|
||||
val &= BIT32(max_tpc_count) - U32(1);
|
||||
val = BIT32(hweight32(val)) - U32(1);
|
||||
gk20a_writel(g, gr_fe_tpc_fs_r(0), val);
|
||||
} else {
|
||||
gk20a_writel(g, gr_fe_tpc_fs_r(0), pes_tpc_mask);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
u32 *esr_sm_sel)
|
||||
{
|
||||
|
||||
@@ -121,14 +121,12 @@ void gr_gv11b_fecs_host_int_enable(struct gk20a *g);
|
||||
int gr_gv11b_handle_fecs_error(struct gk20a *g,
|
||||
struct channel_gk20a *__ch,
|
||||
struct gr_gk20a_isr_data *isr_data);
|
||||
int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
|
||||
int gr_gv11b_init_sw_veid_bundle(struct gk20a *g);
|
||||
void gr_gv11b_detect_sm_arch(struct gk20a *g);
|
||||
void gr_gv11b_program_sm_id_numbering(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 smid);
|
||||
int gr_gv11b_load_smid_config(struct gk20a *g);
|
||||
int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va);
|
||||
void gr_gv11b_load_tpc_mask(struct gk20a *g);
|
||||
void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
u32 *esr_sm_sel);
|
||||
int gv11b_gr_sm_trigger_suspend(struct gk20a *g);
|
||||
|
||||
@@ -372,10 +372,8 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.init_sm_id_table = gr_gv100_init_sm_id_table,
|
||||
.load_smid_config = gr_gv11b_load_smid_config,
|
||||
.program_sm_id_numbering = gr_gv11b_program_sm_id_numbering,
|
||||
.setup_rop_mapping = gr_gv11b_setup_rop_mapping,
|
||||
.program_zcull_mapping = gr_gv11b_program_zcull_mapping,
|
||||
.commit_inst = gr_gv11b_commit_inst,
|
||||
.load_tpc_mask = gr_gv11b_load_tpc_mask,
|
||||
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
.resume_from_pause = gv11b_gr_resume_from_pause,
|
||||
@@ -585,6 +583,8 @@ static const struct gpu_ops gv11b_ops = {
|
||||
gv100_gr_hwpm_map_align_regs_perf_pma,
|
||||
},
|
||||
.init = {
|
||||
.tpc_mask = gv11b_gr_init_tpc_mask,
|
||||
.rop_mapping = gv11b_gr_init_rop_mapping,
|
||||
.fs_state = gv11b_gr_init_fs_state,
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
|
||||
@@ -39,6 +39,190 @@
|
||||
#define FE_PWR_MODE_TIMEOUT_DEFAULT_US 10U
|
||||
#define FECS_CTXSW_RESET_DELAY_US 10U
|
||||
|
||||
void gm20b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask)
|
||||
{
|
||||
nvgpu_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask);
|
||||
}
|
||||
|
||||
int gm20b_gr_init_rop_mapping(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config)
|
||||
{
|
||||
u32 norm_entries, norm_shift;
|
||||
u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod;
|
||||
u32 coeff9_mod, coeff10_mod, coeff11_mod;
|
||||
u32 map0, map1, map2, map3, map4, map5;
|
||||
u32 tpc_cnt;
|
||||
|
||||
if (gr_config->map_tiles == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
tpc_cnt = nvgpu_gr_config_get_tpc_count(gr_config);
|
||||
|
||||
nvgpu_writel(g, gr_crstr_map_table_cfg_r(),
|
||||
gr_crstr_map_table_cfg_row_offset_f(
|
||||
nvgpu_gr_config_get_map_row_offset(gr_config)) |
|
||||
gr_crstr_map_table_cfg_num_entries_f(tpc_cnt));
|
||||
|
||||
map0 = gr_crstr_gpc_map0_tile0_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 0)) |
|
||||
gr_crstr_gpc_map0_tile1_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 1)) |
|
||||
gr_crstr_gpc_map0_tile2_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 2)) |
|
||||
gr_crstr_gpc_map0_tile3_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 3)) |
|
||||
gr_crstr_gpc_map0_tile4_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 4)) |
|
||||
gr_crstr_gpc_map0_tile5_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 5));
|
||||
|
||||
map1 = gr_crstr_gpc_map1_tile6_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 6)) |
|
||||
gr_crstr_gpc_map1_tile7_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 7)) |
|
||||
gr_crstr_gpc_map1_tile8_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 8)) |
|
||||
gr_crstr_gpc_map1_tile9_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 9)) |
|
||||
gr_crstr_gpc_map1_tile10_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 10)) |
|
||||
gr_crstr_gpc_map1_tile11_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 11));
|
||||
|
||||
map2 = gr_crstr_gpc_map2_tile12_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 12)) |
|
||||
gr_crstr_gpc_map2_tile13_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 13)) |
|
||||
gr_crstr_gpc_map2_tile14_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 14)) |
|
||||
gr_crstr_gpc_map2_tile15_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 15)) |
|
||||
gr_crstr_gpc_map2_tile16_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 16)) |
|
||||
gr_crstr_gpc_map2_tile17_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 17));
|
||||
|
||||
map3 = gr_crstr_gpc_map3_tile18_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 18)) |
|
||||
gr_crstr_gpc_map3_tile19_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 19)) |
|
||||
gr_crstr_gpc_map3_tile20_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 20)) |
|
||||
gr_crstr_gpc_map3_tile21_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 21)) |
|
||||
gr_crstr_gpc_map3_tile22_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 22)) |
|
||||
gr_crstr_gpc_map3_tile23_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 23));
|
||||
|
||||
map4 = gr_crstr_gpc_map4_tile24_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 24)) |
|
||||
gr_crstr_gpc_map4_tile25_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 25)) |
|
||||
gr_crstr_gpc_map4_tile26_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 26)) |
|
||||
gr_crstr_gpc_map4_tile27_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 27)) |
|
||||
gr_crstr_gpc_map4_tile28_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 28)) |
|
||||
gr_crstr_gpc_map4_tile29_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 29));
|
||||
|
||||
map5 = gr_crstr_gpc_map5_tile30_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 30)) |
|
||||
gr_crstr_gpc_map5_tile31_f(
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config, 31)) |
|
||||
gr_crstr_gpc_map5_tile32_f(0) |
|
||||
gr_crstr_gpc_map5_tile33_f(0) |
|
||||
gr_crstr_gpc_map5_tile34_f(0) |
|
||||
gr_crstr_gpc_map5_tile35_f(0);
|
||||
|
||||
nvgpu_writel(g, gr_crstr_gpc_map0_r(), map0);
|
||||
nvgpu_writel(g, gr_crstr_gpc_map1_r(), map1);
|
||||
nvgpu_writel(g, gr_crstr_gpc_map2_r(), map2);
|
||||
nvgpu_writel(g, gr_crstr_gpc_map3_r(), map3);
|
||||
nvgpu_writel(g, gr_crstr_gpc_map4_r(), map4);
|
||||
nvgpu_writel(g, gr_crstr_gpc_map5_r(), map5);
|
||||
|
||||
switch (tpc_cnt) {
|
||||
case 1:
|
||||
norm_shift = 4;
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
norm_shift = 3;
|
||||
break;
|
||||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
norm_shift = 2;
|
||||
break;
|
||||
case 8:
|
||||
case 9:
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
norm_shift = 1;
|
||||
break;
|
||||
default:
|
||||
norm_shift = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
norm_entries = tpc_cnt << norm_shift;
|
||||
coeff5_mod = BIT32(5) % norm_entries;
|
||||
coeff6_mod = BIT32(6) % norm_entries;
|
||||
coeff7_mod = BIT32(7) % norm_entries;
|
||||
coeff8_mod = BIT32(8) % norm_entries;
|
||||
coeff9_mod = BIT32(9) % norm_entries;
|
||||
coeff10_mod = BIT32(10) % norm_entries;
|
||||
coeff11_mod = BIT32(11) % norm_entries;
|
||||
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
|
||||
gr_ppcs_wwdx_map_table_cfg_row_offset_f(
|
||||
nvgpu_gr_config_get_map_row_offset(gr_config)) |
|
||||
gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) |
|
||||
gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) |
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) |
|
||||
gr_ppcs_wwdx_map_table_cfg_num_entries_f(tpc_cnt));
|
||||
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(),
|
||||
gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) |
|
||||
gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) |
|
||||
gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) |
|
||||
gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) |
|
||||
gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) |
|
||||
gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod));
|
||||
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0);
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1);
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2);
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3);
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4);
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5);
|
||||
|
||||
nvgpu_writel(g, gr_rstr2d_map_table_cfg_r(),
|
||||
gr_rstr2d_map_table_cfg_row_offset_f(
|
||||
nvgpu_gr_config_get_map_row_offset(gr_config)) |
|
||||
gr_rstr2d_map_table_cfg_num_entries_f(tpc_cnt));
|
||||
|
||||
nvgpu_writel(g, gr_rstr2d_gpc_map0_r(), map0);
|
||||
nvgpu_writel(g, gr_rstr2d_gpc_map1_r(), map1);
|
||||
nvgpu_writel(g, gr_rstr2d_gpc_map2_r(), map2);
|
||||
nvgpu_writel(g, gr_rstr2d_gpc_map3_r(), map3);
|
||||
nvgpu_writel(g, gr_rstr2d_gpc_map4_r(), map4);
|
||||
nvgpu_writel(g, gr_rstr2d_gpc_map5_r(), map5);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gm20b_gr_init_fs_state(struct gk20a *g)
|
||||
{
|
||||
int err = 0;
|
||||
@@ -57,12 +241,12 @@ int gm20b_gr_init_fs_state(struct gk20a *g)
|
||||
return err;
|
||||
}
|
||||
|
||||
void gm20b_gr_init_pd_tpc_per_gpc(struct gk20a *g)
|
||||
void gm20b_gr_init_pd_tpc_per_gpc(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config)
|
||||
{
|
||||
u32 reg_index;
|
||||
u32 tpc_per_gpc;
|
||||
u32 gpc_id = 0;
|
||||
struct nvgpu_gr_config *gr_config = g->gr.config;
|
||||
|
||||
for (reg_index = 0U, gpc_id = 0U;
|
||||
reg_index < gr_pd_num_tpc_per_gpc__size_1_v();
|
||||
@@ -91,11 +275,11 @@ void gm20b_gr_init_pd_tpc_per_gpc(struct gk20a *g)
|
||||
}
|
||||
}
|
||||
|
||||
void gm20b_gr_init_pd_skip_table_gpc(struct gk20a *g)
|
||||
void gm20b_gr_init_pd_skip_table_gpc(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config)
|
||||
{
|
||||
u32 gpc_index;
|
||||
bool skip_mask;
|
||||
struct nvgpu_gr_config *gr_config = g->gr.config;
|
||||
|
||||
for (gpc_index = 0;
|
||||
gpc_index < gr_pd_dist_skip_table__size_1_v() * 4U;
|
||||
|
||||
@@ -27,10 +27,16 @@
|
||||
|
||||
struct gk20a;
|
||||
struct netlist_av_list;
|
||||
struct nvgpu_gr_config;
|
||||
|
||||
void gm20b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask);
|
||||
int gm20b_gr_init_rop_mapping(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config);
|
||||
int gm20b_gr_init_fs_state(struct gk20a *g);
|
||||
void gm20b_gr_init_pd_tpc_per_gpc(struct gk20a *g);
|
||||
void gm20b_gr_init_pd_skip_table_gpc(struct gk20a *g);
|
||||
void gm20b_gr_init_pd_tpc_per_gpc(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config);
|
||||
void gm20b_gr_init_pd_skip_table_gpc(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config);
|
||||
void gm20b_gr_init_cwd_gpcs_tpcs_num(struct gk20a *g,
|
||||
u32 gpc_count, u32 tpc_count);
|
||||
int gm20b_gr_init_wait_idle(struct gk20a *g);
|
||||
|
||||
@@ -25,10 +25,121 @@
|
||||
#include <nvgpu/soc.h>
|
||||
#include <nvgpu/gr/ctx.h>
|
||||
|
||||
#include <nvgpu/gr/config.h>
|
||||
|
||||
#include "gr_init_gv11b.h"
|
||||
|
||||
#include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
|
||||
|
||||
/*
|
||||
* Each gpc can have maximum 32 tpcs, so each tpc index need
|
||||
* 5 bits. Each map register(32bits) can hold 6 tpcs info.
|
||||
*/
|
||||
#define GR_TPCS_INFO_FOR_MAPREGISTER 6U
|
||||
|
||||
void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask)
|
||||
{
|
||||
nvgpu_writel(g, gr_fe_tpc_fs_r(gpc_index), pes_tpc_mask);
|
||||
}
|
||||
|
||||
int gv11b_gr_init_rop_mapping(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config)
|
||||
{
|
||||
u32 map;
|
||||
u32 i, j;
|
||||
u32 mapreg_num, base, offset, mapregs, tile_cnt, tpc_cnt;
|
||||
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
|
||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_NUM_TPC_PER_GPC);
|
||||
u32 num_tpcs = num_gpcs * num_tpc_per_gpc;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (gr_config->map_tiles == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
nvgpu_writel(g, gr_crstr_map_table_cfg_r(),
|
||||
gr_crstr_map_table_cfg_row_offset_f(
|
||||
nvgpu_gr_config_get_map_row_offset(gr_config)) |
|
||||
gr_crstr_map_table_cfg_num_entries_f(
|
||||
nvgpu_gr_config_get_tpc_count(gr_config)));
|
||||
/*
|
||||
* 6 tpc can be stored in one map register.
|
||||
* But number of tpcs are not always multiple of six,
|
||||
* so adding additional check for valid number of
|
||||
* tpcs before programming map register.
|
||||
*/
|
||||
mapregs = DIV_ROUND_UP(num_tpcs, GR_TPCS_INFO_FOR_MAPREGISTER);
|
||||
|
||||
for (mapreg_num = 0U, base = 0U; mapreg_num < mapregs; mapreg_num++,
|
||||
base = base + GR_TPCS_INFO_FOR_MAPREGISTER) {
|
||||
map = 0U;
|
||||
for (offset = 0U;
|
||||
(offset < GR_TPCS_INFO_FOR_MAPREGISTER && num_tpcs > 0U);
|
||||
offset++, num_tpcs--) {
|
||||
tile_cnt = nvgpu_gr_config_get_map_tile_count(
|
||||
gr_config, base + offset);
|
||||
switch (offset) {
|
||||
case 0:
|
||||
map = map | gr_crstr_gpc_map_tile0_f(tile_cnt);
|
||||
break;
|
||||
case 1:
|
||||
map = map | gr_crstr_gpc_map_tile1_f(tile_cnt);
|
||||
break;
|
||||
case 2:
|
||||
map = map | gr_crstr_gpc_map_tile2_f(tile_cnt);
|
||||
break;
|
||||
case 3:
|
||||
map = map | gr_crstr_gpc_map_tile3_f(tile_cnt);
|
||||
break;
|
||||
case 4:
|
||||
map = map | gr_crstr_gpc_map_tile4_f(tile_cnt);
|
||||
break;
|
||||
case 5:
|
||||
map = map | gr_crstr_gpc_map_tile5_f(tile_cnt);
|
||||
break;
|
||||
default:
|
||||
nvgpu_err(g, "incorrect rop mapping %x",
|
||||
offset);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_writel(g, gr_crstr_gpc_map_r(mapreg_num), map);
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map_r(mapreg_num), map);
|
||||
nvgpu_writel(g, gr_rstr2d_gpc_map_r(mapreg_num), map);
|
||||
}
|
||||
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
|
||||
gr_ppcs_wwdx_map_table_cfg_row_offset_f(
|
||||
nvgpu_gr_config_get_map_row_offset(gr_config)) |
|
||||
gr_ppcs_wwdx_map_table_cfg_num_entries_f(
|
||||
nvgpu_gr_config_get_tpc_count(gr_config)));
|
||||
|
||||
for (i = 0U, j = 1U; i < gr_ppcs_wwdx_map_table_cfg_coeff__size_1_v();
|
||||
i++, j = j + 4U) {
|
||||
tpc_cnt = nvgpu_gr_config_get_tpc_count(gr_config);
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i),
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f(
|
||||
(BIT32(j) % tpc_cnt)) |
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f(
|
||||
(BIT32(j + 1U) % tpc_cnt)) |
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f(
|
||||
(BIT32(j + 2U) % tpc_cnt)) |
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f(
|
||||
(BIT32(j + 3U) % tpc_cnt)));
|
||||
}
|
||||
|
||||
nvgpu_writel(g, gr_rstr2d_map_table_cfg_r(),
|
||||
gr_rstr2d_map_table_cfg_row_offset_f(
|
||||
nvgpu_gr_config_get_map_row_offset(gr_config)) |
|
||||
gr_rstr2d_map_table_cfg_num_entries_f(
|
||||
nvgpu_gr_config_get_tpc_count(gr_config)));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gv11b_gr_init_fs_state(struct gk20a *g)
|
||||
{
|
||||
u32 data;
|
||||
|
||||
@@ -26,7 +26,11 @@
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
struct gk20a;
|
||||
struct nvgpu_gr_config;
|
||||
|
||||
void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask);
|
||||
int gv11b_gr_init_rop_mapping(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config);
|
||||
int gv11b_gr_init_fs_state(struct gk20a *g);
|
||||
int gv11b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count,
|
||||
bool gfxp_wfi_timeout_unit_usec);
|
||||
|
||||
@@ -71,6 +71,7 @@ struct nvgpu_gr_zbc_query_params;
|
||||
struct nvgpu_channel_hw_state;
|
||||
struct nvgpu_engine_status_info;
|
||||
struct nvgpu_pbdma_status_info;
|
||||
struct nvgpu_gr_config;
|
||||
enum nvgpu_nvlink_minion_dlcmd;
|
||||
struct nvgpu_cbc;
|
||||
|
||||
@@ -430,12 +431,10 @@ struct gpu_ops {
|
||||
int (*load_smid_config)(struct gk20a *g);
|
||||
void (*program_sm_id_numbering)(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 smid);
|
||||
int (*setup_rop_mapping)(struct gk20a *g, struct gr_gk20a *gr);
|
||||
int (*init_sw_veid_bundle)(struct gk20a *g);
|
||||
void (*program_zcull_mapping)(struct gk20a *g,
|
||||
u32 zcull_alloc_num, u32 *zcull_map_tiles);
|
||||
int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va);
|
||||
void (*load_tpc_mask)(struct gk20a *g);
|
||||
int (*trigger_suspend)(struct gk20a *g);
|
||||
int (*wait_for_pause)(struct gk20a *g, struct nvgpu_warpstate *w_state);
|
||||
int (*resume_from_pause)(struct gk20a *g);
|
||||
@@ -665,9 +664,15 @@ struct gpu_ops {
|
||||
} hwpm_map;
|
||||
|
||||
struct {
|
||||
void (*tpc_mask)(struct gk20a *g,
|
||||
u32 gpc_index, u32 pes_tpc_mask);
|
||||
int (*rop_mapping)(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config);
|
||||
int (*fs_state)(struct gk20a *g);
|
||||
void (*pd_tpc_per_gpc)(struct gk20a *g);
|
||||
void (*pd_skip_table_gpc)(struct gk20a *g);
|
||||
void (*pd_tpc_per_gpc)(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config);
|
||||
void (*pd_skip_table_gpc)(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config);
|
||||
void (*cwd_gpcs_tpcs_num)(struct gk20a *g,
|
||||
u32 gpc_count,
|
||||
u32 tpc_count);
|
||||
|
||||
@@ -440,10 +440,8 @@ static const struct gpu_ops tu104_ops = {
|
||||
.init_sm_id_table = gr_gv100_init_sm_id_table,
|
||||
.load_smid_config = gr_gv11b_load_smid_config,
|
||||
.program_sm_id_numbering = gr_gv11b_program_sm_id_numbering,
|
||||
.setup_rop_mapping = gr_gv11b_setup_rop_mapping,
|
||||
.program_zcull_mapping = gr_gv11b_program_zcull_mapping,
|
||||
.commit_inst = gr_gv11b_commit_inst,
|
||||
.load_tpc_mask = gr_gv11b_load_tpc_mask,
|
||||
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
.resume_from_pause = gv11b_gr_resume_from_pause,
|
||||
@@ -654,6 +652,8 @@ static const struct gpu_ops tu104_ops = {
|
||||
gv100_gr_hwpm_map_get_active_fbpa_mask,
|
||||
},
|
||||
.init = {
|
||||
.tpc_mask = gv11b_gr_init_tpc_mask,
|
||||
.rop_mapping = gv11b_gr_init_rop_mapping,
|
||||
.fs_state = gv11b_gr_init_fs_state,
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
|
||||
Reference in New Issue
Block a user