diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index aac686afc..74219df24 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -113,7 +113,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { }, .gr = { .get_patch_slots = gr_gk20a_get_patch_slots, - .init_gpc_mmu = NULL, .commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb, .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, .commit_global_pagepool = gr_gp10b_commit_global_pagepool, @@ -182,7 +181,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .clear_sm_hww = NULL, .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf, .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs, - .disable_rd_coalesce = NULL, .set_boosted_ctx = NULL, .set_preemption_mode = vgpu_gr_set_preemption_mode, .pre_process_sm_exception = NULL, diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index bf0ba0e8f..20f385efc 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -134,7 +134,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .get_num_pce = vgpu_ce_get_num_pce, }, .gr = { - .init_gpc_mmu = NULL, .commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb, .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, .commit_global_pagepool = gr_gp10b_commit_global_pagepool, @@ -203,7 +202,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .clear_sm_hww = NULL, .init_ovr_sm_dsm_perf = gv11b_gr_init_ovr_sm_dsm_perf, .get_ovr_perf_regs = gv11b_gr_get_ovr_perf_regs, - .disable_rd_coalesce = NULL, .set_boosted_ctx = NULL, .set_preemption_mode = vgpu_gr_set_preemption_mode, .pre_process_sm_exception = NULL, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 77b358d26..1f534cdbf 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -2020,20 +2020,16 @@ void gr_gk20a_fecs_host_int_enable(struct gk20a *g) static int gk20a_init_gr_setup_hw(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; - u32 data; int err; nvgpu_log_fn(g, " "); - if (g->ops.gr.init_gpc_mmu != NULL) { - g->ops.gr.init_gpc_mmu(g); + if (g->ops.gr.init.gpc_mmu != NULL) { + g->ops.gr.init.gpc_mmu(g); } /* load gr floorsweeping registers */ - data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r()); - data = set_field(data, gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(), - gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f()); - gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data); + g->ops.gr.init.pes_vsc_stream(g); nvgpu_gr_zcull_init_hw(g, gr->zcull, gr->config); @@ -2075,8 +2071,15 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) goto out; } - if (g->ops.gr.disable_rd_coalesce != NULL) { - g->ops.gr.disable_rd_coalesce(g); + /* + * Disable both surface and LG coalesce. + */ + if (g->ops.gr.init.su_coalesce != NULL) { + g->ops.gr.init.su_coalesce(g, 0); + } + + if (g->ops.gr.init.lg_coalesce != NULL) { + g->ops.gr.init.lg_coalesce(g, 0); } if (g->ops.gr.init.preemption_state != NULL) { diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index fa5994c93..c93145eb0 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -46,38 +46,6 @@ #include #include -void gr_gm20b_init_gpc_mmu(struct gk20a *g) -{ - u32 temp; - - nvgpu_log_info(g, "initialize gpc mmu"); - - temp = g->ops.fb.mmu_ctrl(g); - temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | - gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | - gr_gpcs_pri_mmu_ctrl_use_full_comp_tag_line_m() | - gr_gpcs_pri_mmu_ctrl_vol_fault_m() | - gr_gpcs_pri_mmu_ctrl_comp_fault_m() | - gr_gpcs_pri_mmu_ctrl_miss_gran_m() | - gr_gpcs_pri_mmu_ctrl_cache_mode_m() | - gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | - gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | - gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); - gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); - gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); - gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); - - gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), - g->ops.fb.mmu_debug_ctrl(g)); - gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), - g->ops.fb.mmu_debug_wr(g)); - gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), - g->ops.fb.mmu_debug_rd(g)); - - gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(), - nvgpu_ltc_get_ltc_count(g)); -} - void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, u64 addr, bool patch) @@ -200,20 +168,6 @@ void gr_gm20b_commit_global_pagepool(struct gk20a *g, } -void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data) -{ - u32 val; - - nvgpu_log_fn(g, " "); - - val = gk20a_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r()); - val = set_field(val, gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(), - gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(data)); - gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), val); - - nvgpu_log_fn(g, "done"); -} - int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data) { @@ -225,7 +179,7 @@ int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, gk20a_gr_set_shader_exceptions(g, data); break; case NVB1C0_SET_RD_COALESCE: - gr_gm20b_set_rd_coalesce(g, data); + g->ops.gr.init.lg_coalesce(g, data); break; default: goto fail; @@ -244,7 +198,7 @@ int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, g->ops.gr.set_alpha_circular_buffer_size(g, data); break; case NVB197_SET_RD_COALESCE: - gr_gm20b_set_rd_coalesce(g, data); + g->ops.gr.init.lg_coalesce(g, data); break; default: goto fail; @@ -1155,24 +1109,6 @@ void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, 0); } -/* - * Disable both surface and LG coalesce. - */ -void gm20a_gr_disable_rd_coalesce(struct gk20a *g) -{ - u32 dbg2_reg; - - dbg2_reg = gk20a_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r()); - dbg2_reg = set_field(dbg2_reg, - gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(), - gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(0)); - dbg2_reg = set_field(dbg2_reg, - gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_m(), - gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_f(0)); - - gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg); -} - void gm20b_gr_set_debug_mode(struct gk20a *g, bool enable) { u32 reg_val, gpc_debug_ctrl; diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index 27a5773a8..cdd2f562b 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h @@ -47,9 +47,6 @@ void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, u64 addr, bool patch); int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask); -void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data); -void gm20a_gr_disable_rd_coalesce(struct gk20a *g); -void gr_gm20b_init_gpc_mmu(struct gk20a *g); int gr_gm20b_commit_global_cb_manager(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, bool patch); void gr_gm20b_commit_global_pagepool(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index c4f2303c6..149f0978a 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -234,7 +234,6 @@ static const struct gpu_ops gm20b_ops = { }, .gr = { .get_patch_slots = gr_gk20a_get_patch_slots, - .init_gpc_mmu = gr_gm20b_init_gpc_mmu, .commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb, .commit_global_cb_manager = gr_gm20b_commit_global_cb_manager, .commit_global_pagepool = gr_gm20b_commit_global_pagepool, @@ -306,7 +305,6 @@ static const struct gpu_ops gm20b_ops = { .clear_sm_hww = gm20b_gr_clear_sm_hww, .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf, .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs, - .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce, .fecs_host_int_enable = gr_gk20a_fecs_host_int_enable, .handle_notify_pending = gk20a_gr_handle_notify_pending, .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending, @@ -416,6 +414,10 @@ static const struct gpu_ops gm20b_ops = { .program_zcull_mapping = gm20b_gr_program_zcull_mapping, }, .init = { + .lg_coalesce = gm20b_gr_init_lg_coalesce, + .su_coalesce = gm20b_gr_init_su_coalesce, + .pes_vsc_stream = gm20b_gr_init_pes_vsc_stream, + .gpc_mmu = gm20b_gr_init_gpc_mmu, .fifo_access = gm20b_gr_init_fifo_access, .get_access_map = gm20b_gr_init_get_access_map, .get_sm_id_size = gm20b_gr_init_get_sm_id_size, diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c index 7d7be7710..130a80fc5 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.c +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c @@ -87,7 +87,7 @@ int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr, gk20a_gr_set_shader_exceptions(g, data); break; case NVC0C0_SET_RD_COALESCE: - gr_gm20b_set_rd_coalesce(g, data); + g->ops.gr.init.lg_coalesce(g, data); break; default: goto fail; @@ -109,7 +109,7 @@ int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr, gr_gp106_set_go_idle_timeout(g, data); break; case NVC097_SET_RD_COALESCE: - gr_gm20b_set_rd_coalesce(g, data); + g->ops.gr.init.lg_coalesce(g, data); break; case NVC097_SET_BES_CROP_DEBUG3: g->ops.gr.set_bes_crop_debug3(g, data); diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index b67d3e3e9..9a67a8452 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -607,7 +607,7 @@ int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, gk20a_gr_set_shader_exceptions(g, data); break; case NVC0C0_SET_RD_COALESCE: - gr_gm20b_set_rd_coalesce(g, data); + g->ops.gr.init.lg_coalesce(g, data); break; default: goto fail; @@ -632,7 +632,7 @@ int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, gr_gp10b_set_coalesce_buffer_size(g, data); break; case NVC097_SET_RD_COALESCE: - gr_gm20b_set_rd_coalesce(g, data); + g->ops.gr.init.lg_coalesce(g, data); break; case NVC097_SET_BES_CROP_DEBUG3: g->ops.gr.set_bes_crop_debug3(g, data); diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 0a191ac7c..62933290d 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -257,7 +257,6 @@ static const struct gpu_ops gp10b_ops = { }, .gr = { .get_patch_slots = gr_gk20a_get_patch_slots, - .init_gpc_mmu = gr_gm20b_init_gpc_mmu, .commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb, .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, .commit_global_pagepool = gr_gp10b_commit_global_pagepool, @@ -329,7 +328,6 @@ static const struct gpu_ops gp10b_ops = { .clear_sm_hww = gm20b_gr_clear_sm_hww, .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf, .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs, - .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce, .set_boosted_ctx = gr_gp10b_set_boosted_ctx, .set_preemption_mode = gr_gp10b_set_preemption_mode, .pre_process_sm_exception = gr_gp10b_pre_process_sm_exception, @@ -487,6 +485,10 @@ static const struct gpu_ops gp10b_ops = { .program_zcull_mapping = gm20b_gr_program_zcull_mapping, }, .init = { + .lg_coalesce = gm20b_gr_init_lg_coalesce, + .su_coalesce = gm20b_gr_init_su_coalesce, + .pes_vsc_stream = gm20b_gr_init_pes_vsc_stream, + .gpc_mmu = gm20b_gr_init_gpc_mmu, .fifo_access = gm20b_gr_init_fifo_access, .get_access_map = gp10b_gr_init_get_access_map, .get_sm_id_size = gp10b_gr_init_get_sm_id_size, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 5fbf8d0f7..c33d29b52 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -369,7 +369,6 @@ static const struct gpu_ops gv100_ops = { }, .gr = { .get_patch_slots = gr_gv100_get_patch_slots, - .init_gpc_mmu = gr_gv11b_init_gpc_mmu, .commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb, .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, .commit_global_pagepool = gr_gp10b_commit_global_pagepool, @@ -444,7 +443,6 @@ static const struct gpu_ops gv100_ops = { .clear_sm_hww = gv11b_gr_clear_sm_hww, .init_ovr_sm_dsm_perf = gv11b_gr_init_ovr_sm_dsm_perf, .get_ovr_perf_regs = gv11b_gr_get_ovr_perf_regs, - .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce, .set_boosted_ctx = gr_gp10b_set_boosted_ctx, .set_preemption_mode = gr_gp10b_set_preemption_mode, .pre_process_sm_exception = gr_gv11b_pre_process_sm_exception, @@ -631,6 +629,10 @@ static const struct gpu_ops gv100_ops = { gv100_gr_hwpm_map_get_active_fbpa_mask, }, .init = { + .lg_coalesce = gm20b_gr_init_lg_coalesce, + .su_coalesce = gm20b_gr_init_su_coalesce, + .pes_vsc_stream = gm20b_gr_init_pes_vsc_stream, + .gpc_mmu = gv11b_gr_init_gpc_mmu, .fifo_access = gm20b_gr_init_fifo_access, .get_access_map = gv11b_gr_init_get_access_map, .get_sm_id_size = gp10b_gr_init_get_sm_id_size, diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 2bbc7ae7d..3494b8c3f 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -3560,36 +3560,6 @@ u32 gv11b_gr_get_egpc_base(struct gk20a *g) return EGPC_PRI_BASE; } -void gr_gv11b_init_gpc_mmu(struct gk20a *g) -{ - u32 temp; - - nvgpu_log_info(g, "initialize gpc mmu"); - - temp = g->ops.fb.mmu_ctrl(g); - temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | - gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | - gr_gpcs_pri_mmu_ctrl_vol_fault_m() | - gr_gpcs_pri_mmu_ctrl_comp_fault_m() | - gr_gpcs_pri_mmu_ctrl_miss_gran_m() | - gr_gpcs_pri_mmu_ctrl_cache_mode_m() | - gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | - gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | - gr_gpcs_pri_mmu_ctrl_mmu_disable_m()| - gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m()| - gr_gpcs_pri_mmu_ctrl_atomic_capability_sys_ncoh_mode_m(); - gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); - gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); - gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); - - gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), - g->ops.fb.mmu_debug_ctrl(g)); - gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), - g->ops.fb.mmu_debug_wr(g)); - gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), - g->ops.fb.mmu_debug_rd(g)); -} - void gr_gv11b_init_gfxp_wfi_timeout_count(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index 031cb7f71..97e039af8 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -175,7 +175,6 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, u32 gpc_num, u32 tpc_num, u32 broadcast_flags, u32 *priv_addr_table, u32 *t); u32 gv11b_gr_get_egpc_base(struct gk20a *g); -void gr_gv11b_init_gpc_mmu(struct gk20a *g); int gr_gv11b_init_preemption_state(struct gk20a *g); void gr_gv11b_init_gfxp_wfi_timeout_count(struct gk20a *g); unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 7df76508b..d5bf7de2d 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -319,7 +319,6 @@ static const struct gpu_ops gv11b_ops = { }, .gr = { .get_patch_slots = gr_gv100_get_patch_slots, - .init_gpc_mmu = gr_gv11b_init_gpc_mmu, .commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb, .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, .commit_global_pagepool = gr_gp10b_commit_global_pagepool, @@ -395,7 +394,6 @@ static const struct gpu_ops gv11b_ops = { .clear_sm_hww = gv11b_gr_clear_sm_hww, .init_ovr_sm_dsm_perf = gv11b_gr_init_ovr_sm_dsm_perf, .get_ovr_perf_regs = gv11b_gr_get_ovr_perf_regs, - .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce, .set_boosted_ctx = gr_gp10b_set_boosted_ctx, .set_preemption_mode = gr_gp10b_set_preemption_mode, .pre_process_sm_exception = gr_gv11b_pre_process_sm_exception, @@ -589,6 +587,10 @@ static const struct gpu_ops gv11b_ops = { gv100_gr_hwpm_map_align_regs_perf_pma, }, .init = { + .lg_coalesce = gm20b_gr_init_lg_coalesce, + .su_coalesce = gm20b_gr_init_su_coalesce, + .pes_vsc_stream = gm20b_gr_init_pes_vsc_stream, + .gpc_mmu = gv11b_gr_init_gpc_mmu, .fifo_access = gm20b_gr_init_fifo_access, .get_access_map = gv11b_gr_init_get_access_map, .get_sm_id_size = gp10b_gr_init_get_sm_id_size, diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c index 7c3808d8d..6864154be 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c @@ -41,6 +41,71 @@ #define FE_PWR_MODE_TIMEOUT_DEFAULT_US 10U #define FECS_CTXSW_RESET_DELAY_US 10U +void gm20b_gr_init_lg_coalesce(struct gk20a *g, u32 data) +{ + u32 val; + + nvgpu_log_fn(g, " "); + + val = nvgpu_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r()); + val = set_field(val, + gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(), + gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(data)); + nvgpu_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), val); +} + +void gm20b_gr_init_su_coalesce(struct gk20a *g, u32 data) +{ + u32 reg; + + reg = nvgpu_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r()); + reg = set_field(reg, + gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_m(), + gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_f(data)); + + nvgpu_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), reg); +} + +void gm20b_gr_init_pes_vsc_stream(struct gk20a *g) +{ + u32 data = nvgpu_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r()); + data = set_field(data, gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(), + gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f()); + nvgpu_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data); +} + +void gm20b_gr_init_gpc_mmu(struct gk20a *g) +{ + u32 temp; + + nvgpu_log_info(g, "initialize gpc mmu"); + + temp = g->ops.fb.mmu_ctrl(g); + temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | + gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | + gr_gpcs_pri_mmu_ctrl_use_full_comp_tag_line_m() | + gr_gpcs_pri_mmu_ctrl_vol_fault_m() | + gr_gpcs_pri_mmu_ctrl_comp_fault_m() | + gr_gpcs_pri_mmu_ctrl_miss_gran_m() | + gr_gpcs_pri_mmu_ctrl_cache_mode_m() | + gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | + gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | + gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); + nvgpu_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); + nvgpu_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); + nvgpu_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); + + nvgpu_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), + g->ops.fb.mmu_debug_ctrl(g)); + nvgpu_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), + g->ops.fb.mmu_debug_wr(g)); + nvgpu_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), + g->ops.fb.mmu_debug_rd(g)); + + nvgpu_writel(g, gr_gpcs_mmu_num_active_ltcs_r(), + nvgpu_ltc_get_ltc_count(g)); +} + void gm20b_gr_init_fifo_access(struct gk20a *g, bool enable) { u32 fifo_val; diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h index 3e499f45d..b545d9179 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h @@ -30,6 +30,10 @@ struct nvgpu_gr_ctx; struct netlist_av_list; struct nvgpu_gr_config; +void gm20b_gr_init_lg_coalesce(struct gk20a *g, u32 data); +void gm20b_gr_init_su_coalesce(struct gk20a *g, u32 data); +void gm20b_gr_init_pes_vsc_stream(struct gk20a *g); +void gm20b_gr_init_gpc_mmu(struct gk20a *g); void gm20b_gr_init_fifo_access(struct gk20a *g, bool enable); void gm20b_gr_init_get_access_map(struct gk20a *g, u32 **whitelist, int *num_entries); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c index 4542c69dd..71238530f 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c @@ -38,6 +38,36 @@ */ #define GR_TPCS_INFO_FOR_MAPREGISTER 6U +void gv11b_gr_init_gpc_mmu(struct gk20a *g) +{ + u32 temp; + + nvgpu_log_info(g, "initialize gpc mmu"); + + temp = g->ops.fb.mmu_ctrl(g); + temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | + gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | + gr_gpcs_pri_mmu_ctrl_vol_fault_m() | + gr_gpcs_pri_mmu_ctrl_comp_fault_m() | + gr_gpcs_pri_mmu_ctrl_miss_gran_m() | + gr_gpcs_pri_mmu_ctrl_cache_mode_m() | + gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | + gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | + gr_gpcs_pri_mmu_ctrl_mmu_disable_m()| + gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m()| + gr_gpcs_pri_mmu_ctrl_atomic_capability_sys_ncoh_mode_m(); + nvgpu_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); + nvgpu_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); + nvgpu_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); + + nvgpu_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), + g->ops.fb.mmu_debug_ctrl(g)); + nvgpu_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), + g->ops.fb.mmu_debug_wr(g)); + nvgpu_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), + g->ops.fb.mmu_debug_rd(g)); +} + void gv11b_gr_init_get_access_map(struct gk20a *g, u32 **whitelist, int *num_entries) { diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h index 8b5c609b8..593423df4 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h @@ -28,6 +28,7 @@ struct gk20a; struct nvgpu_gr_config; +void gv11b_gr_init_gpc_mmu(struct gk20a *g); void gv11b_gr_init_get_access_map(struct gk20a *g, u32 **whitelist, int *num_entries); void gv11b_gr_init_sm_id_numbering(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 7fc08fb76..cdc71836d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -268,7 +268,6 @@ struct gpu_ops { void (*commit_global_pagepool)(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, u64 addr, u32 size, bool patch); - void (*init_gpc_mmu)(struct gk20a *g); int (*handle_sw_method)(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data); void (*set_alpha_circular_buffer_size)(struct gk20a *g, @@ -431,7 +430,6 @@ struct gpu_ops { void (*resume_single_sm)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm); void (*resume_all_sms)(struct gk20a *g); - void (*disable_rd_coalesce)(struct gk20a *g); void (*init_gfxp_wfi_timeout_count)(struct gk20a *g); unsigned long (*get_max_gfxp_wfi_timeout_count) (struct gk20a *g); @@ -664,6 +662,10 @@ struct gpu_ops { } hwpm_map; struct { + void (*lg_coalesce)(struct gk20a *g, u32 data); + void (*su_coalesce)(struct gk20a *g, u32 data); + void (*pes_vsc_stream)(struct gk20a *g); + void (*gpc_mmu)(struct gk20a *g); void (*fifo_access)(struct gk20a *g, bool enable); void (*get_access_map)(struct gk20a *g, u32 **whitelist, int *num_entries); diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 30f096145..aef320b04 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -389,7 +389,6 @@ static const struct gpu_ops tu104_ops = { }, .gr = { .get_patch_slots = gr_gv100_get_patch_slots, - .init_gpc_mmu = gr_gv11b_init_gpc_mmu, .commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb, .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, .commit_global_pagepool = gr_gp10b_commit_global_pagepool, @@ -464,7 +463,6 @@ static const struct gpu_ops tu104_ops = { .clear_sm_hww = gv11b_gr_clear_sm_hww, .init_ovr_sm_dsm_perf = gv11b_gr_init_ovr_sm_dsm_perf, .get_ovr_perf_regs = gv11b_gr_get_ovr_perf_regs, - .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce, .set_boosted_ctx = gr_gp10b_set_boosted_ctx, .set_preemption_mode = gr_gp10b_set_preemption_mode, .pre_process_sm_exception = gr_gv11b_pre_process_sm_exception, @@ -659,6 +657,10 @@ static const struct gpu_ops tu104_ops = { gv100_gr_hwpm_map_get_active_fbpa_mask, }, .init = { + .lg_coalesce = gm20b_gr_init_lg_coalesce, + .su_coalesce = gm20b_gr_init_su_coalesce, + .pes_vsc_stream = gm20b_gr_init_pes_vsc_stream, + .gpc_mmu = gv11b_gr_init_gpc_mmu, .fifo_access = gm20b_gr_init_fifo_access, .get_access_map = gv11b_gr_init_get_access_map, .get_sm_id_size = gp10b_gr_init_get_sm_id_size,