From 0ff5a49f45e8148713d158403adeeba504e683db Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Mon, 7 Jan 2019 15:51:35 +0530 Subject: [PATCH] gpu: nvgpu: move patch context update calls to gr/ctx unit We use below APIs to update patch context gr_gk20a_ctx_patch_write_begin() gr_gk20a_ctx_patch_write_end() gr_gk20a_ctx_patch_write() Since patch context is owned by gr/ctx unit, move these APIs to this unit and rename them to nvgpu_gr_ctx_patch_write_begin() nvgpu_gr_ctx_patch_write_end() nvgpu_gr_ctx_patch_write() Jira NVGPU-1527 Change-Id: Iee19c7a71d074763d3dcb9b1997cb2a3159d5299 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1989214 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-misra GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/gr/ctx.c | 63 ++++++++++++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 100 +++++------------------ drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 17 ---- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 32 ++++---- drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 60 +++++++------- drivers/gpu/nvgpu/gv100/gr_gv100.c | 3 +- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 30 +++---- drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h | 19 +++++ drivers/gpu/nvgpu/tu104/gr_tu104.c | 12 +-- 9 files changed, 171 insertions(+), 165 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/ctx.c b/drivers/gpu/nvgpu/common/gr/ctx.c index de33ff69d..b19517311 100644 --- a/drivers/gpu/nvgpu/common/gr/ctx.c +++ b/drivers/gpu/nvgpu/common/gr/ctx.c @@ -24,6 +24,7 @@ #include #include #include +#include #include static void nvgpu_gr_ctx_unmap_global_ctx_buffers(struct gk20a *g, @@ -528,3 +529,65 @@ int nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g, return 0; } + +/* + * Context state can be written directly, or "patched" at times. So that code + * can be used in either situation it is written using a series of + * _ctx_patch_write(..., patch) statements. However any necessary map overhead + * should be minimized; thus, bundle the sequence of these writes together, and + * set them up and close with _ctx_patch_write_begin/_ctx_patch_write_end. + */ +int nvgpu_gr_ctx_patch_write_begin(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + bool update_patch_count) +{ + if (update_patch_count) { + /* reset patch count if ucode has already processed it */ + gr_ctx->patch_ctx.data_count = + g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem); + nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", + gr_ctx->patch_ctx.data_count); + } + return 0; +} + +void nvgpu_gr_ctx_patch_write_end(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + bool update_patch_count) +{ + /* Write context count to context image if it is mapped */ + if (update_patch_count) { + g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem, + gr_ctx->patch_ctx.data_count); + nvgpu_log(g, gpu_dbg_info, "write patch count %d", + gr_ctx->patch_ctx.data_count); + } +} + +void nvgpu_gr_ctx_patch_write(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + u32 addr, u32 data, bool patch) +{ + if (patch) { + u32 patch_slot = gr_ctx->patch_ctx.data_count * + PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; + + if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE( + gr_ctx->patch_ctx.mem.size) - + PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) { + nvgpu_err(g, "failed to access patch_slot %d", + patch_slot); + return; + } + + nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot, addr); + nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot + 1U, data); + gr_ctx->patch_ctx.data_count++; + + nvgpu_log(g, gpu_dbg_info, + "patch addr = 0x%x data = 0x%x data_count %d", + addr, data, gr_ctx->patch_ctx.data_count); + } else { + nvgpu_writel(g, addr, data); + } +} diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index f709626a1..689dc2d33 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -599,66 +599,6 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) return 0; } -/* - * Context state can be written directly, or "patched" at times. So that code - * can be used in either situation it is written using a series of - * _ctx_patch_write(..., patch) statements. However any necessary map overhead - * should be minimized; thus, bundle the sequence of these writes together, and - * set them up and close with _ctx_patch_write_begin/_ctx_patch_write_end. - */ - -int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - bool update_patch_count) -{ - if (update_patch_count) { - /* reset patch count if ucode has already processed it */ - gr_ctx->patch_ctx.data_count = - g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem); - nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", - gr_ctx->patch_ctx.data_count); - } - return 0; -} - -void gr_gk20a_ctx_patch_write_end(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - bool update_patch_count) -{ - /* Write context count to context image if it is mapped */ - if (update_patch_count) { - g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem, - gr_ctx->patch_ctx.data_count); - nvgpu_log(g, gpu_dbg_info, "write patch count %d", - gr_ctx->patch_ctx.data_count); - } -} - -void gr_gk20a_ctx_patch_write(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - u32 addr, u32 data, bool patch) -{ - if (patch) { - u32 patch_slot = gr_ctx->patch_ctx.data_count * - PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; - if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE( - gr_ctx->patch_ctx.mem.size) - - PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) { - nvgpu_err(g, "failed to access patch_slot %d", - patch_slot); - return; - } - nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot, addr); - nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot + 1U, data); - gr_ctx->patch_ctx.data_count++; - nvgpu_log(g, gpu_dbg_info, - "patch addr = 0x%x data = 0x%x data_count %d", - addr, data, gr_ctx->patch_ctx.data_count); - } else { - gk20a_writel(g, addr, data); - } -} - static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) { u64 ptr = nvgpu_inst_block_addr(g, inst_block) >> @@ -774,7 +714,7 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, if (patch) { int err; - err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); + err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, false); if (err != 0) { return err; } @@ -820,7 +760,7 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, g->ops.gr.commit_global_cb_manager(g, gr_ctx, patch); if (patch) { - gr_gk20a_ctx_patch_write_end(g, gr_ctx, false); + nvgpu_gr_ctx_patch_write_end(g, gr_ctx, false); } return 0; @@ -855,22 +795,22 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false); + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false); + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false); + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); } else { gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg; pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0; ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug; mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug; - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false); + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); } return 0; @@ -6166,7 +6106,7 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, gr_ctx->patch_ctx.data_count = 0; } - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, addr, data, true); g->ops.gr.ctxsw_prog.set_patch_count(g, mem, @@ -7450,7 +7390,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, } offset_addrs = offsets + max_offsets; - err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); + err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, false); if (err != 0) { goto cleanup; } @@ -7582,7 +7522,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, } if (gr_ctx->patch_ctx.mem.cpu_va != NULL) { - gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready); + nvgpu_gr_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready); } return err; @@ -7632,20 +7572,20 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g, u64 addr, u32 size, bool patch) { BUG_ON(u64_hi32(addr) != 0U); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(), gr_scc_pagepool_base_addr_39_8_f((u32)addr), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(), gr_scc_pagepool_total_pages_f(size) | gr_scc_pagepool_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(), gr_gpcs_gcc_pagepool_base_addr_39_8_f((u32)addr), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(), gr_gpcs_gcc_pagepool_total_pages_f(size), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_pagepool_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_pagepool_r(), gr_pd_pagepool_total_pages_f(size) | gr_pd_pagepool_valid_true_f(), patch); } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index f4e3aa526..d1316d516 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -50,15 +50,6 @@ #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP BIT32(1) #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP BIT32(2) -/* - * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries - * of address and data pairs - */ -#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY 2U -#define PATCH_CTX_SLOTS_PER_PAGE \ - (PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * (u32)sizeof(u32))) -#define PATCH_CTX_ENTRIES_FROM_SIZE(size) ((size)/sizeof(u32)) - #define NVGPU_PREEMPTION_MODE_GRAPHICS_WFI BIT32(0) #define NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP BIT32(1) @@ -547,14 +538,6 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, u64 gpu_va, u32 mode); -void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, - u32 addr, u32 data, bool patch); -int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - bool update_patch_count); -void gr_gk20a_ctx_patch_write_end(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - bool update_patch_count); void gr_gk20a_commit_global_pagepool(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, u64 addr, u32 size, bool patch); diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index e78600676..500529960 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -122,15 +122,15 @@ void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, u64 addr, bool patch) { - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) | gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(), + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(), gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); } @@ -141,17 +141,17 @@ void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, { u32 data; - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), gr_scc_bundle_cb_size_div_256b_f(size) | gr_scc_bundle_cb_size_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); @@ -165,7 +165,7 @@ void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d", g->gr.bundle_cb_token_limit, data); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); @@ -187,7 +187,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, nvgpu_log_fn(g, " "); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_r(), gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), patch); @@ -196,7 +196,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) / gr_pd_ab_dist_cfg1_max_output_granularity_v(); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(), gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); @@ -213,12 +213,12 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, cbm_cfg_size2 = gr->alpha_cb_default_size * gr->pes_tpc_count[ppc_index][gpc_index]; - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + ppc_in_gpc_stride * ppc_index, cbm_cfg_size1, patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + ppc_in_gpc_stride * ppc_index, attrib_offset_in_chunk, patch); @@ -226,12 +226,12 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, attrib_offset_in_chunk += gr->attrib_cb_size * gr->pes_tpc_count[ppc_index][gpc_index]; - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + ppc_in_gpc_stride * ppc_index, cbm_cfg_size2, patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + ppc_in_gpc_stride * ppc_index, alpha_offset_in_chunk, patch); @@ -239,7 +239,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, alpha_offset_in_chunk += gr->alpha_cb_size * gr->pes_tpc_count[ppc_index][gpc_index]; - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) | gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3U), @@ -256,7 +256,7 @@ void gr_gm20b_commit_global_pagepool(struct gk20a *g, { gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_rm_pagepool_r(), + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_rm_pagepool_r(), gr_gpcs_swdx_rm_pagepool_total_pages_f(size) | gr_gpcs_swdx_rm_pagepool_valid_true_f(), patch); diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index d7ec60e15..f0fcc81c8 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -416,9 +416,9 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, cb_attrib_cache_size_init = gr->attrib_cb_default_size; } - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_beta_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_beta_r(), gr->attrib_cb_default_size, patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(), gr->alpha_cb_default_size, patch); pd_ab_max_output = (gr->alpha_cb_default_size * @@ -426,11 +426,11 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, gr_pd_ab_dist_cfg1_max_output_granularity_v(); if (g->gr.pd_max_batches != 0U) { - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(), gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch); } else { - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(), gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); } @@ -450,17 +450,17 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, cbm_cfg_size_steadystate = gr->attrib_cb_default_size * gr->pes_tpc_count[ppc_index][gpc_index]; - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + ppc_in_gpc_stride * ppc_index, cbm_cfg_size_beta, patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + ppc_in_gpc_stride * ppc_index, attrib_offset_in_chunk, patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + ppc_in_gpc_stride * ppc_index, cbm_cfg_size_steadystate, @@ -469,12 +469,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, attrib_offset_in_chunk += attrib_size_in_chunk * gr->pes_tpc_count[ppc_index][gpc_index]; - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + ppc_in_gpc_stride * ppc_index, cbm_cfg_size_alpha, patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + ppc_in_gpc_stride * ppc_index, alpha_offset_in_chunk, patch); @@ -482,7 +482,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, alpha_offset_in_chunk += gr->alpha_cb_size * gr->pes_tpc_count[ppc_index][gpc_index]; - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate), patch); @@ -496,17 +496,17 @@ void gr_gp10b_commit_global_pagepool(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, u64 addr, u32 size, bool patch) { - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(), gr_scc_pagepool_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(), gr_scc_pagepool_total_pages_f(size) | gr_scc_pagepool_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(), gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(), gr_gpcs_gcc_pagepool_total_pages_f(size), patch); } @@ -1100,7 +1100,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, } } - err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true); + err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true); if (err != 0) { nvgpu_err(g, "can't map patch context"); goto out; @@ -1133,28 +1133,28 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, size = gr_ctx->spill_ctxsw_buffer.size / gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_swdx_rm_spill_buffer_addr_r(), gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), true); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_swdx_rm_spill_buffer_size_r(), gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), true); cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_beta_cb_ctrl_r(), gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( cbes_reserve), true); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( cbes_reserve), true); - gr_gk20a_ctx_patch_write_end(g, gr_ctx, true); + nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true); } out: @@ -1373,14 +1373,14 @@ void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); } @@ -1391,17 +1391,17 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, { u32 data; - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(), gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_size_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_size_r(), gr_scc_bundle_cb_size_div_256b_f(size) | gr_scc_bundle_cb_size_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_base_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_base_r(), gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_size_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_size_r(), gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); @@ -1415,7 +1415,7 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d", g->gr.bundle_cb_token_limit, data); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(), gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); } @@ -2172,13 +2172,13 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, g->ops.gr.update_ctxsw_preemption_mode(ch->g, gr_ctx, &ch->ctx_header); - err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true); + err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true); if (err != 0) { nvgpu_err(g, "can't map patch context"); goto enable_ch; } g->ops.gr.commit_global_cb_manager(g, gr_ctx, true); - gr_gk20a_ctx_patch_write_end(g, gr_ctx, true); + nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true); } enable_ch: diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index cfa3f2761..8bbc1694d 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -1,7 +1,7 @@ /* * GV100 GPU GR * - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,6 +27,7 @@ #include #include #include +#include #include "gk20a/gr_gk20a.h" #include "gk20a/gr_pri_gk20a.h" diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 6ac511063..e41f261f0 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1787,7 +1787,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, } } - err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true); + err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true); if (err != 0) { nvgpu_err(g, "can't map patch context"); goto out; @@ -1822,28 +1822,28 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, size = (u32)gr_ctx->spill_ctxsw_buffer.size / gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_swdx_rm_spill_buffer_addr_r(), gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), true); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpc0_swdx_rm_spill_buffer_size_r(), gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), true); cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_beta_cb_ctrl_r(), gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( cbes_reserve), true); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( cbes_reserve), true); - gr_gk20a_ctx_patch_write(g, gr_ctx, + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_fe_gfxp_wfi_timeout_r(), g->gr.gfxp_wfi_timeout_count, true); @@ -1852,7 +1852,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, g->ops.gr.commit_gfxp_rtv_cb(g, gr_ctx, true); } - gr_gk20a_ctx_patch_write_end(g, gr_ctx, true); + nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true); } out: @@ -2164,14 +2164,14 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch); BUG_ON(u64_hi32(addr) != 0U); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f((u32)addr) | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f((u32)addr), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); } @@ -3023,14 +3023,14 @@ int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); + nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); return 0; diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h index 436427953..27d83c268 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h @@ -26,6 +26,15 @@ #include #include +/* + * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries + * of address and data pairs + */ +#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY 2U +#define PATCH_CTX_SLOTS_PER_PAGE \ + (PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * (u32)sizeof(u32))) +#define PATCH_CTX_ENTRIES_FROM_SIZE(size) ((size)/sizeof(u32)) + struct gk20a; struct vm_gk20a; @@ -158,4 +167,14 @@ int nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g, struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image, bool cde); +int nvgpu_gr_ctx_patch_write_begin(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + bool update_patch_count); +void nvgpu_gr_ctx_patch_write_end(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + bool update_patch_count); +void nvgpu_gr_ctx_patch_write(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + u32 addr, u32 data, bool patch); + #endif /* NVGPU_INCLUDE_GR_CTX_H */ diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.c b/drivers/gpu/nvgpu/tu104/gr_tu104.c index 7d0825f85..966faec79 100644 --- a/drivers/gpu/nvgpu/tu104/gr_tu104.c +++ b/drivers/gpu/nvgpu/tu104/gr_tu104.c @@ -153,13 +153,13 @@ static void gr_tu104_commit_rtv_circular_buffer(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, u64 addr, u32 size, u32 gfxpAddSize, bool patch) { - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_base_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_base_r(), gr_scc_rm_rtv_cb_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_size_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_size_r(), gr_scc_rm_rtv_cb_size_div_256b_f(size), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_rm_rtv_cb_base_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_rm_rtv_cb_base_r(), gr_gpcs_gcc_rm_rtv_cb_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_gfxp_reserve_r(), + nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_rm_gfxp_reserve_r(), gr_scc_rm_gfxp_reserve_rtv_cb_size_div_256b_f(gfxpAddSize), patch); } @@ -181,7 +181,7 @@ int gr_tu104_commit_global_ctx_buffers(struct gk20a *g, if (patch) { int err; - err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); + err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, false); if (err != 0) { return err; } @@ -199,7 +199,7 @@ int gr_tu104_commit_global_ctx_buffers(struct gk20a *g, gfxpaddsize, patch); if (patch) { - gr_gk20a_ctx_patch_write_end(g, gr_ctx, false); + nvgpu_gr_ctx_patch_write_end(g, gr_ctx, false); } return 0;