From 0ff5a49f45e8148713d158403adeeba504e683db Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 7 Jan 2019 15:51:35 +0530
Subject: [PATCH] gpu: nvgpu: move patch context update calls to gr/ctx unit

We use below APIs to update patch context
gr_gk20a_ctx_patch_write_begin()
gr_gk20a_ctx_patch_write_end()
gr_gk20a_ctx_patch_write()

Since patch context is owned by gr/ctx unit, move these APIs
to this unit and rename them to
nvgpu_gr_ctx_patch_write_begin()
nvgpu_gr_ctx_patch_write_end()
nvgpu_gr_ctx_patch_write()

Jira NVGPU-1527

Change-Id: Iee19c7a71d074763d3dcb9b1997cb2a3159d5299
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1989214
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/gr/ctx.c        |  63 ++++++++++++++
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c       | 100 +++++------------------
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h       |  17 ----
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c       |  32 ++++----
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c       |  60 +++++++-------
 drivers/gpu/nvgpu/gv100/gr_gv100.c       |   3 +-
 drivers/gpu/nvgpu/gv11b/gr_gv11b.c       |  30 +++----
 drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h |  19 +++++
 drivers/gpu/nvgpu/tu104/gr_tu104.c       |  12 +--
 9 files changed, 171 insertions(+), 165 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/gr/ctx.c b/drivers/gpu/nvgpu/common/gr/ctx.c
index de33ff69d..b19517311 100644
--- a/drivers/gpu/nvgpu/common/gr/ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/ctx.c
@@ -24,6 +24,7 @@
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/vm.h>
+#include <nvgpu/io.h>
 #include <nvgpu/gmmu.h>
 
 static void nvgpu_gr_ctx_unmap_global_ctx_buffers(struct gk20a *g,
@@ -528,3 +529,65 @@ int nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g,
 
 	return 0;
 }
+
+/*
+ * Context state can be written directly, or "patched" at times. So that code
+ * can be used in either situation it is written using a series of
+ * _ctx_patch_write(..., patch) statements. However any necessary map overhead
+ * should be minimized; thus, bundle the sequence of these writes together, and
+ * set them up and close with _ctx_patch_write_begin/_ctx_patch_write_end.
+ */
+int nvgpu_gr_ctx_patch_write_begin(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	bool update_patch_count)
+{
+	if (update_patch_count) {
+		/* reset patch count if ucode has already processed it */
+		gr_ctx->patch_ctx.data_count =
+			g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem);
+		nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
+					gr_ctx->patch_ctx.data_count);
+	}
+	return 0;
+}
+
+void nvgpu_gr_ctx_patch_write_end(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	bool update_patch_count)
+{
+	/* Write context count to context image if it is mapped */
+	if (update_patch_count) {
+		g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem,
+			     gr_ctx->patch_ctx.data_count);
+		nvgpu_log(g, gpu_dbg_info, "write patch count %d",
+			gr_ctx->patch_ctx.data_count);
+	}
+}
+
+void nvgpu_gr_ctx_patch_write(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	u32 addr, u32 data, bool patch)
+{
+	if (patch) {
+		u32 patch_slot = gr_ctx->patch_ctx.data_count *
+				PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
+
+		if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE(
+					gr_ctx->patch_ctx.mem.size) -
+				PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) {
+			nvgpu_err(g, "failed to access patch_slot %d",
+				patch_slot);
+			return;
+		}
+
+		nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot, addr);
+		nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot + 1U, data);
+		gr_ctx->patch_ctx.data_count++;
+
+		nvgpu_log(g, gpu_dbg_info,
+			"patch addr = 0x%x data = 0x%x data_count %d",
+			addr, data, gr_ctx->patch_ctx.data_count);
+	} else {
+		nvgpu_writel(g, addr, data);
+	}
+}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index f709626a1..689dc2d33 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -599,66 +599,6 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
 	return 0;
 }
 
-/*
- * Context state can be written directly, or "patched" at times. So that code
- * can be used in either situation it is written using a series of
- * _ctx_patch_write(..., patch) statements. However any necessary map overhead
- * should be minimized; thus, bundle the sequence of these writes together, and
- * set them up and close with _ctx_patch_write_begin/_ctx_patch_write_end.
- */
-
-int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
-					  struct nvgpu_gr_ctx *gr_ctx,
-					  bool update_patch_count)
-{
-	if (update_patch_count) {
-		/* reset patch count if ucode has already processed it */
-		gr_ctx->patch_ctx.data_count =
-			g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem);
-		nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
-					gr_ctx->patch_ctx.data_count);
-	}
-	return 0;
-}
-
-void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
-					struct nvgpu_gr_ctx *gr_ctx,
-					bool update_patch_count)
-{
-	/* Write context count to context image if it is mapped */
-	if (update_patch_count) {
-		g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem,
-			     gr_ctx->patch_ctx.data_count);
-		nvgpu_log(g, gpu_dbg_info, "write patch count %d",
-			gr_ctx->patch_ctx.data_count);
-	}
-}
-
-void gr_gk20a_ctx_patch_write(struct gk20a *g,
-				    struct nvgpu_gr_ctx *gr_ctx,
-				    u32 addr, u32 data, bool patch)
-{
-	if (patch) {
-		u32 patch_slot = gr_ctx->patch_ctx.data_count *
-				PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
-		if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE(
-					gr_ctx->patch_ctx.mem.size) -
-				PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) {
-			nvgpu_err(g, "failed to access patch_slot %d",
-				patch_slot);
-			return;
-		}
-		nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot, addr);
-		nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot + 1U, data);
-		gr_ctx->patch_ctx.data_count++;
-		nvgpu_log(g, gpu_dbg_info,
-			"patch addr = 0x%x data = 0x%x data_count %d",
-			addr, data, gr_ctx->patch_ctx.data_count);
-	} else {
-		gk20a_writel(g, addr, data);
-	}
-}
-
 static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
 {
 	u64 ptr = nvgpu_inst_block_addr(g, inst_block) >>
@@ -774,7 +714,7 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
 
 	if (patch) {
 		int err;
-		err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
+		err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, false);
 		if (err != 0) {
 			return err;
 		}
@@ -820,7 +760,7 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
 	g->ops.gr.commit_global_cb_manager(g, gr_ctx, patch);
 
 	if (patch) {
-		gr_gk20a_ctx_patch_write_end(g, gr_ctx, false);
+		nvgpu_gr_ctx_patch_write_end(g, gr_ctx, false);
 	}
 
 	return 0;
@@ -855,22 +795,22 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
 		ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
 		mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
 
-		gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
-		gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false);
-		gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false);
-		gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
-		gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false);
-		gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
+		nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
+		nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false);
+		nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false);
+		nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
+		nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false);
+		nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
 	} else {
 		gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
 		pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
 		ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
 		mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
 
-		gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
-		gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
-		gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false);
-		gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
+		nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
+		nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
+		nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false);
+		nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
 	}
 
 	return 0;
@@ -6166,7 +6106,7 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
 					gr_ctx->patch_ctx.data_count = 0;
 				}
 
-				gr_gk20a_ctx_patch_write(g, gr_ctx,
+				nvgpu_gr_ctx_patch_write(g, gr_ctx,
 							 addr, data, true);
 
 				g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
@@ -7450,7 +7390,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 	}
 	offset_addrs = offsets + max_offsets;
 
-	err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
+	err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, false);
 	if (err != 0) {
 		goto cleanup;
 	}
@@ -7582,7 +7522,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 	}
 
 	if (gr_ctx->patch_ctx.mem.cpu_va != NULL) {
-		gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready);
+		nvgpu_gr_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready);
 	}
 
 	return err;
@@ -7632,20 +7572,20 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g,
 					    u64 addr, u32 size, bool patch)
 {
 	BUG_ON(u64_hi32(addr) != 0U);
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
 		gr_scc_pagepool_base_addr_39_8_f((u32)addr), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(),
 		gr_scc_pagepool_total_pages_f(size) |
 		gr_scc_pagepool_valid_true_f(), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(),
 		gr_gpcs_gcc_pagepool_base_addr_39_8_f((u32)addr), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(),
 		gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_pagepool_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_pagepool_r(),
 		gr_pd_pagepool_total_pages_f(size) |
 		gr_pd_pagepool_valid_true_f(), patch);
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index f4e3aa526..d1316d516 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -50,15 +50,6 @@
 #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP		BIT32(1)
 #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP		BIT32(2)
 
-/*
- * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries
- * of address and data pairs
- */
-#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY	2U
-#define PATCH_CTX_SLOTS_PER_PAGE \
-	(PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * (u32)sizeof(u32)))
-#define PATCH_CTX_ENTRIES_FROM_SIZE(size) ((size)/sizeof(u32))
-
 #define NVGPU_PREEMPTION_MODE_GRAPHICS_WFI	BIT32(0)
 #define NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP	BIT32(1)
 
@@ -547,14 +538,6 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 				  u64 gpu_va,
 				  u32 mode);
 
-void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
-				    u32 addr, u32 data, bool patch);
-int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
-					  struct nvgpu_gr_ctx *gr_ctx,
-					  bool update_patch_count);
-void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
-					struct nvgpu_gr_ctx *gr_ctx,
-					bool update_patch_count);
 void gr_gk20a_commit_global_pagepool(struct gk20a *g,
 				     struct nvgpu_gr_ctx *gr_ctx,
 				     u64 addr, u32 size, bool patch);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index e78600676..500529960 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -122,15 +122,15 @@ void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
 				      struct nvgpu_gr_ctx *ch_ctx,
 				      u64 addr, bool patch)
 {
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(),
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(),
 		gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) |
 		gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch);
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(),
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(),
 		gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) |
 		gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch);
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
 		gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
 		gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
 }
@@ -141,17 +141,17 @@ void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
 {
 	u32 data;
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
 		gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
 		gr_scc_bundle_cb_size_div_256b_f(size) |
 		gr_scc_bundle_cb_size_valid_true_f(), patch);
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
 		gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
 		gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
 		gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
 
@@ -165,7 +165,7 @@ void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
 	nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d",
 		   g->gr.bundle_cb_token_limit, data);
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
 		gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
 		gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
 
@@ -187,7 +187,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
 
 	nvgpu_log_fn(g, " ");
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_r(),
 		gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
 		gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
 		patch);
@@ -196,7 +196,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
 		gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
 		gr_pd_ab_dist_cfg1_max_output_granularity_v();
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(),
 		gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
 		gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
 
@@ -213,12 +213,12 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
 			cbm_cfg_size2 = gr->alpha_cb_default_size *
 				gr->pes_tpc_count[ppc_index][gpc_index];
 
-			gr_gk20a_ctx_patch_write(g, gr_ctx,
+			nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
 				ppc_in_gpc_stride * ppc_index,
 				cbm_cfg_size1, patch);
 
-			gr_gk20a_ctx_patch_write(g, gr_ctx,
+			nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
 				ppc_in_gpc_stride * ppc_index,
 				attrib_offset_in_chunk, patch);
@@ -226,12 +226,12 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
 			attrib_offset_in_chunk += gr->attrib_cb_size *
 				gr->pes_tpc_count[ppc_index][gpc_index];
 
-			gr_gk20a_ctx_patch_write(g, gr_ctx,
+			nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
 				ppc_in_gpc_stride * ppc_index,
 				cbm_cfg_size2, patch);
 
-			gr_gk20a_ctx_patch_write(g, gr_ctx,
+			nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
 				ppc_in_gpc_stride * ppc_index,
 				alpha_offset_in_chunk, patch);
@@ -239,7 +239,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
 			alpha_offset_in_chunk += gr->alpha_cb_size *
 				gr->pes_tpc_count[ppc_index][gpc_index];
 
-			gr_gk20a_ctx_patch_write(g, gr_ctx,
+			nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
 				gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) |
 				gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3U),
@@ -256,7 +256,7 @@ void gr_gm20b_commit_global_pagepool(struct gk20a *g,
 {
 	gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch);
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_rm_pagepool_r(),
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_rm_pagepool_r(),
 		gr_gpcs_swdx_rm_pagepool_total_pages_f(size) |
 		gr_gpcs_swdx_rm_pagepool_valid_true_f(), patch);
 
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index d7ec60e15..f0fcc81c8 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -416,9 +416,9 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 		cb_attrib_cache_size_init = gr->attrib_cb_default_size;
 	}
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_beta_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_beta_r(),
 		gr->attrib_cb_default_size, patch);
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(),
 		gr->alpha_cb_default_size, patch);
 
 	pd_ab_max_output = (gr->alpha_cb_default_size *
@@ -426,11 +426,11 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 		gr_pd_ab_dist_cfg1_max_output_granularity_v();
 
 	if (g->gr.pd_max_batches != 0U) {
-		gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(),
+		nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(),
 			gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
 			gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch);
 	} else {
-		gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(),
+		nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(),
 			gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
 			gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
 	}
@@ -450,17 +450,17 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 			cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
 				gr->pes_tpc_count[ppc_index][gpc_index];
 
-			gr_gk20a_ctx_patch_write(g, gr_ctx,
+			nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
 				ppc_in_gpc_stride * ppc_index,
 				cbm_cfg_size_beta, patch);
 
-			gr_gk20a_ctx_patch_write(g, gr_ctx,
+			nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
 				ppc_in_gpc_stride * ppc_index,
 				attrib_offset_in_chunk, patch);
 
-			gr_gk20a_ctx_patch_write(g, gr_ctx,
+			nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
 				ppc_in_gpc_stride * ppc_index,
 				cbm_cfg_size_steadystate,
@@ -469,12 +469,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 			attrib_offset_in_chunk += attrib_size_in_chunk *
 				gr->pes_tpc_count[ppc_index][gpc_index];
 
-			gr_gk20a_ctx_patch_write(g, gr_ctx,
+			nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
 				ppc_in_gpc_stride * ppc_index,
 				cbm_cfg_size_alpha, patch);
 
-			gr_gk20a_ctx_patch_write(g, gr_ctx,
+			nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
 				ppc_in_gpc_stride * ppc_index,
 				alpha_offset_in_chunk, patch);
@@ -482,7 +482,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 			alpha_offset_in_chunk += gr->alpha_cb_size *
 				gr->pes_tpc_count[ppc_index][gpc_index];
 
-			gr_gk20a_ctx_patch_write(g, gr_ctx,
+			nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
 				gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate),
 				patch);
@@ -496,17 +496,17 @@ void gr_gp10b_commit_global_pagepool(struct gk20a *g,
 					    struct nvgpu_gr_ctx *gr_ctx,
 					    u64 addr, u32 size, bool patch)
 {
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
 		gr_scc_pagepool_base_addr_39_8_f(addr), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(),
 		gr_scc_pagepool_total_pages_f(size) |
 		gr_scc_pagepool_valid_true_f(), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(),
 		gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(),
 		gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
 }
 
@@ -1100,7 +1100,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 			}
 		}
 
-		err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
+		err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
 		if (err != 0) {
 			nvgpu_err(g, "can't map patch context");
 			goto out;
@@ -1133,28 +1133,28 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 		size = gr_ctx->spill_ctxsw_buffer.size /
 			gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
 
-		gr_gk20a_ctx_patch_write(g, gr_ctx,
+		nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_swdx_rm_spill_buffer_addr_r(),
 				gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
 				true);
-		gr_gk20a_ctx_patch_write(g, gr_ctx,
+		nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_swdx_rm_spill_buffer_size_r(),
 				gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
 				true);
 
 		cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
-		gr_gk20a_ctx_patch_write(g, gr_ctx,
+		nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpcs_swdx_beta_cb_ctrl_r(),
 				gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
 					cbes_reserve),
 				true);
-		gr_gk20a_ctx_patch_write(g, gr_ctx,
+		nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
 				gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
 					cbes_reserve),
 				true);
 
-		gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
+		nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
 	}
 
 out:
@@ -1373,14 +1373,14 @@ void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
 
 	gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
 		gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
 		gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
 		gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
 		gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
 		gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
 }
@@ -1391,17 +1391,17 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
 {
 	u32 data;
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(),
 		gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_size_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_size_r(),
 		gr_scc_bundle_cb_size_div_256b_f(size) |
 		gr_scc_bundle_cb_size_valid_true_f(), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
 		gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
 		gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
 		gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
 
@@ -1415,7 +1415,7 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
 	nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d",
 		   g->gr.bundle_cb_token_limit, data);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(),
 		gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
 		gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
 }
@@ -2172,13 +2172,13 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 		g->ops.gr.update_ctxsw_preemption_mode(ch->g, gr_ctx,
 				&ch->ctx_header);
 
-		err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
+		err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
 		if (err != 0) {
 			nvgpu_err(g, "can't map patch context");
 			goto enable_ch;
 		}
 		g->ops.gr.commit_global_cb_manager(g, gr_ctx, true);
-		gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
+		nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
 	}
 
 enable_ch:
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c
index cfa3f2761..8bbc1694d 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -1,7 +1,7 @@
 /*
  * GV100 GPU GR
  *
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -27,6 +27,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/io.h>
 #include <nvgpu/gk20a.h>
+#include <nvgpu/gr/ctx.h>
 
 #include "gk20a/gr_gk20a.h"
 #include "gk20a/gr_pri_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 6ac511063..e41f261f0 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1787,7 +1787,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
 			}
 		}
 
-		err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
+		err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
 		if (err != 0) {
 			nvgpu_err(g, "can't map patch context");
 			goto out;
@@ -1822,28 +1822,28 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
 		size = (u32)gr_ctx->spill_ctxsw_buffer.size /
 			gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
 
-		gr_gk20a_ctx_patch_write(g, gr_ctx,
+		nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_swdx_rm_spill_buffer_addr_r(),
 				gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
 				true);
-		gr_gk20a_ctx_patch_write(g, gr_ctx,
+		nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpc0_swdx_rm_spill_buffer_size_r(),
 				gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
 				true);
 
 		cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
-		gr_gk20a_ctx_patch_write(g, gr_ctx,
+		nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpcs_swdx_beta_cb_ctrl_r(),
 				gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
 					cbes_reserve),
 				true);
-		gr_gk20a_ctx_patch_write(g, gr_ctx,
+		nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
 				gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
 					cbes_reserve),
 				true);
 
-		gr_gk20a_ctx_patch_write(g, gr_ctx,
+		nvgpu_gr_ctx_patch_write(g, gr_ctx,
 				gr_fe_gfxp_wfi_timeout_r(),
 				g->gr.gfxp_wfi_timeout_count,
 				true);
@@ -1852,7 +1852,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
 			g->ops.gr.commit_gfxp_rtv_cb(g, gr_ctx, true);
 		}
 
-		gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
+		nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
 	}
 
 out:
@@ -2164,14 +2164,14 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
 	gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch);
 
 	BUG_ON(u64_hi32(addr) != 0U);
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
 		gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f((u32)addr) |
 		gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
 		gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f((u32)addr), patch);
 
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
 		gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
 		gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
 }
@@ -3023,14 +3023,14 @@ int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
 	mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() |
 							mpc_vtg_debug;
 
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf,
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf,
 									false);
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(),
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(),
 							pe_vsc_vpc, false);
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(),
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(),
 							pd_ab_dist_cfg0, false);
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false);
-	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(),
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false);
+	nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(),
 							mpc_vtg_debug, false);
 
 	return 0;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h
index 436427953..27d83c268 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h
@@ -26,6 +26,15 @@
 #include <nvgpu/types.h>
 #include <nvgpu/nvgpu_mem.h>
 
+/*
+ * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries
+ * of address and data pairs
+ */
+#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY	2U
+#define PATCH_CTX_SLOTS_PER_PAGE \
+	(PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * (u32)sizeof(u32)))
+#define PATCH_CTX_ENTRIES_FROM_SIZE(size) ((size)/sizeof(u32))
+
 struct gk20a;
 struct vm_gk20a;
 
@@ -158,4 +167,14 @@ int nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
 	bool cde);
 
+int nvgpu_gr_ctx_patch_write_begin(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	bool update_patch_count);
+void nvgpu_gr_ctx_patch_write_end(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	bool update_patch_count);
+void nvgpu_gr_ctx_patch_write(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	u32 addr, u32 data, bool patch);
+
 #endif /* NVGPU_INCLUDE_GR_CTX_H */
diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.c b/drivers/gpu/nvgpu/tu104/gr_tu104.c
index 7d0825f85..966faec79 100644
--- a/drivers/gpu/nvgpu/tu104/gr_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/gr_tu104.c
@@ -153,13 +153,13 @@ static void gr_tu104_commit_rtv_circular_buffer(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
 	u64 addr, u32 size, u32 gfxpAddSize, bool patch)
 {
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_base_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_base_r(),
 		gr_scc_rm_rtv_cb_base_addr_39_8_f(addr), patch);
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_size_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_size_r(),
 		gr_scc_rm_rtv_cb_size_div_256b_f(size), patch);
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_rm_rtv_cb_base_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_rm_rtv_cb_base_r(),
 		gr_gpcs_gcc_rm_rtv_cb_base_addr_39_8_f(addr), patch);
-	gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_gfxp_reserve_r(),
+	nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_rm_gfxp_reserve_r(),
 		gr_scc_rm_gfxp_reserve_rtv_cb_size_div_256b_f(gfxpAddSize),
 		patch);
 }
@@ -181,7 +181,7 @@ int gr_tu104_commit_global_ctx_buffers(struct gk20a *g,
 
 	if (patch) {
 		int err;
-		err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
+		err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, false);
 		if (err != 0) {
 			return err;
 		}
@@ -199,7 +199,7 @@ int gr_tu104_commit_global_ctx_buffers(struct gk20a *g,
 						gfxpaddsize, patch);
 
 	if (patch) {
-		gr_gk20a_ctx_patch_write_end(g, gr_ctx, false);
+		nvgpu_gr_ctx_patch_write_end(g, gr_ctx, false);
 	}
 
 	return 0;