From f5c093d47e07b4b61f2312982bb1e6405fb3b0a7 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 4 Apr 2019 16:19:26 +0530
Subject: [PATCH] gpu: nvgpu: add common.gr.setup apis to allocate/free context

Add below apis in common.gr.setup to allocate/free context
nvgpu_gr_setup_alloc_obj_ctx()
nvgpu_gr_setup_free_gr_ctx()

Define two new hals
g->ops.gr.setup.alloc_obj_ctx()
g->ops.gr.setup.free_gr_ctx()

Move corresponding code from gr_gk20a.c to common.gr.setup unit

Jira NVGPU-1886

Change-Id: Icf170a6ed8979afebcedaa98e3df1483437b427b
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2092169
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/fifo/tsg.c           |   5 +-
 drivers/gpu/nvgpu/common/gr/gr_setup.c        | 110 ++++++++++++++++
 .../nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c  |   4 +-
 .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c  |   4 +-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c            | 117 ------------------
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h            |   5 -
 drivers/gpu/nvgpu/gm20b/hal_gm20b.c           |   4 +-
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c           |   4 +-
 drivers/gpu/nvgpu/gv100/hal_gv100.c           |   4 +-
 drivers/gpu/nvgpu/gv11b/hal_gv11b.c           |   4 +-
 drivers/gpu/nvgpu/include/nvgpu/gk20a.h       |   9 +-
 drivers/gpu/nvgpu/include/nvgpu/gr/setup.h    |   7 ++
 drivers/gpu/nvgpu/os/linux/cde.c              |   2 +-
 drivers/gpu/nvgpu/os/linux/ioctl_channel.c    |   2 +-
 drivers/gpu/nvgpu/tu104/hal_tu104.c           |   4 +-
 15 files changed, 141 insertions(+), 144 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index 7821a485d..563ce4426 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -652,8 +652,9 @@ void gk20a_tsg_release(struct nvgpu_ref *ref)
 	struct gk20a *g = tsg->g;
 	struct gk20a_event_id_data *event_id_data, *event_id_data_temp;
 
-	if (tsg->gr_ctx != NULL && nvgpu_mem_is_valid(&tsg->gr_ctx->mem)) {
-		gr_gk20a_free_tsg_gr_ctx(tsg);
+	if (tsg->gr_ctx != NULL && nvgpu_mem_is_valid(&tsg->gr_ctx->mem) &&
+			tsg->vm != NULL) {
+		g->ops.gr.setup.free_gr_ctx(g, tsg->vm, tsg->gr_ctx);
 	}
 
 	/* unhook all events created on this TSG */
diff --git a/drivers/gpu/nvgpu/common/gr/gr_setup.c b/drivers/gpu/nvgpu/common/gr/gr_setup.c
index cc593ab95..c098a3d5b 100644
--- a/drivers/gpu/nvgpu/common/gr/gr_setup.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_setup.c
@@ -23,6 +23,8 @@
 #include <nvgpu/log.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/subctx.h>
+#include <nvgpu/gr/obj_ctx.h>
 #include <nvgpu/gr/zcull.h>
 #include <nvgpu/gr/setup.h>
 #include <nvgpu/channel.h>
@@ -78,3 +80,111 @@ int nvgpu_gr_setup_bind_ctxsw_zcull(struct gk20a *g, struct channel_gk20a *c,
 
 	return nvgpu_gr_setup_zcull(g, c, gr_ctx);
 }
+
+int nvgpu_gr_setup_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num,
+		u32 flags)
+{
+	struct gk20a *g = c->g;
+	struct nvgpu_gr_ctx *gr_ctx;
+	struct tsg_gk20a *tsg = NULL;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	/* an address space needs to have been bound at this point.*/
+	if (!gk20a_channel_as_bound(c) && (c->vm == NULL)) {
+		nvgpu_err(g,
+			   "not bound to address space at time"
+			   " of grctx allocation");
+		return -EINVAL;
+	}
+
+	if (!g->ops.gr.is_valid_class(g, class_num)) {
+		nvgpu_err(g,
+			   "invalid obj class 0x%x", class_num);
+		err = -EINVAL;
+		goto out;
+	}
+	c->obj_class = class_num;
+
+	tsg = tsg_gk20a_from_ch(c);
+	if (tsg == NULL) {
+		return -EINVAL;
+	}
+
+	gr_ctx = tsg->gr_ctx;
+
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		if (c->subctx == NULL) {
+			c->subctx = nvgpu_gr_subctx_alloc(g, c->vm);
+			if (c->subctx == NULL) {
+				err = -ENOMEM;
+				goto out;
+			}
+		}
+	}
+
+	if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
+		tsg->vm = c->vm;
+		nvgpu_vm_get(tsg->vm);
+
+		err = nvgpu_gr_obj_ctx_alloc(g, g->gr.golden_image,
+				g->gr.global_ctx_buffer, gr_ctx, c->subctx,
+				tsg->vm, &c->inst_block, class_num, flags,
+				c->cde, c->vpr);
+		if (err != 0) {
+			nvgpu_err(g,
+				"failed to allocate gr ctx buffer");
+			nvgpu_vm_put(tsg->vm);
+			tsg->vm = NULL;
+			goto out;
+		}
+
+		gr_ctx->tsgid = tsg->tsgid;
+	} else {
+		/* commit gr ctx buffer */
+		nvgpu_gr_obj_ctx_commit_inst(g, &c->inst_block, gr_ctx,
+			c->subctx, gr_ctx->mem.gpu_va);
+	}
+
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+	if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
+		err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
+			c->subctx, gr_ctx, tsg->tgid, 0);
+		if (err != 0) {
+			nvgpu_warn(g,
+				"fail to bind channel for ctxsw trace");
+		}
+	}
+#endif
+
+	nvgpu_log_fn(g, "done");
+	return 0;
+out:
+	if (c->subctx != NULL) {
+		nvgpu_gr_subctx_free(g, c->subctx, c->vm);
+	}
+
+	/* 1. gr_ctx, patch_ctx and global ctx buffer mapping
+	   can be reused so no need to release them.
+	   2. golden image init and load is a one time thing so if
+	   they pass, no need to undo. */
+	nvgpu_err(g, "fail");
+	return err;
+}
+
+void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g,
+		struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
+{
+	nvgpu_log_fn(g, " ");
+
+	if (gr_ctx != NULL) {
+		if ((g->ops.gr.ctxsw_prog.dump_ctxsw_stats != NULL) &&
+		     g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) {
+			g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g, &gr_ctx->mem);
+		}
+
+		nvgpu_gr_ctx_free(g, gr_ctx, g->gr.global_ctx_buffer, vm);
+	}
+}
+
diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
index 8ce6911db..090b44d04 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -137,11 +137,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
 		.set_hww_esr_report_mask = NULL,
 		.set_gpc_tpc_mask = NULL,
-		.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
 		.detect_sm_arch = vgpu_gr_detect_sm_arch,
-		.free_gr_ctx = vgpu_gr_free_gr_ctx,
 		.dump_gr_regs = NULL,
 		.update_pc_sampling = vgpu_gr_update_pc_sampling,
 		.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
@@ -274,6 +272,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		},
 		.setup = {
 			.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
+			.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
+			.free_gr_ctx = vgpu_gr_free_gr_ctx,
 		},
 		.zbc = {
 			.add_color = NULL,
diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
index ad0247d75..2cf45ab17 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -160,11 +160,9 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs,
 		.set_hww_esr_report_mask = NULL,
 		.set_gpc_tpc_mask = NULL,
-		.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
 		.detect_sm_arch = vgpu_gr_detect_sm_arch,
-		.free_gr_ctx = vgpu_gr_free_gr_ctx,
 		.dump_gr_regs = NULL,
 		.update_pc_sampling = vgpu_gr_update_pc_sampling,
 		.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
@@ -317,6 +315,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		},
 		.setup = {
 			.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
+			.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
+			.free_gr_ctx = vgpu_gr_free_gr_ctx,
 		},
 		.zbc = {
 			.add_color = NULL,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 69ad1c92d..7dc43aa87 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -355,128 +355,11 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
 	return 0;
 }
 
-void gr_gk20a_free_gr_ctx(struct gk20a *g,
-			  struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
-{
-	nvgpu_log_fn(g, " ");
-
-	if (gr_ctx != NULL) {
-		if ((g->ops.gr.ctxsw_prog.dump_ctxsw_stats != NULL) &&
-		     g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) {
-			g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g, &gr_ctx->mem);
-		}
-
-		nvgpu_gr_ctx_free(g, gr_ctx, g->gr.global_ctx_buffer, vm);
-	}
-}
-
-void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)
-{
-	struct gk20a *g = tsg->g;
-
-	if (tsg->vm == NULL) {
-		nvgpu_err(g, "No address space bound");
-		return;
-	}
-	tsg->g->ops.gr.free_gr_ctx(g, tsg->vm, tsg->gr_ctx);
-}
-
 u32 gr_gk20a_get_patch_slots(struct gk20a *g)
 {
 	return PATCH_CTX_SLOTS_PER_PAGE;
 }
 
-int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
-{
-	struct gk20a *g = c->g;
-	struct nvgpu_gr_ctx *gr_ctx;
-	struct tsg_gk20a *tsg = NULL;
-	int err = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	/* an address space needs to have been bound at this point.*/
-	if (!gk20a_channel_as_bound(c) && (c->vm == NULL)) {
-		nvgpu_err(g,
-			   "not bound to address space at time"
-			   " of grctx allocation");
-		return -EINVAL;
-	}
-
-	if (!g->ops.gr.is_valid_class(g, class_num)) {
-		nvgpu_err(g,
-			   "invalid obj class 0x%x", class_num);
-		err = -EINVAL;
-		goto out;
-	}
-	c->obj_class = class_num;
-
-	tsg = tsg_gk20a_from_ch(c);
-	if (tsg == NULL) {
-		return -EINVAL;
-	}
-
-	gr_ctx = tsg->gr_ctx;
-
-	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
-		if (c->subctx == NULL) {
-			c->subctx = nvgpu_gr_subctx_alloc(g, c->vm);
-			if (c->subctx == NULL) {
-				err = -ENOMEM;
-				goto out;
-			}
-		}
-	}
-
-	if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
-		tsg->vm = c->vm;
-		nvgpu_vm_get(tsg->vm);
-
-		err = nvgpu_gr_obj_ctx_alloc(g, g->gr.golden_image,
-				g->gr.global_ctx_buffer, gr_ctx, c->subctx,
-				tsg->vm, &c->inst_block, class_num, flags,
-				c->cde, c->vpr);
-		if (err != 0) {
-			nvgpu_err(g,
-				"failed to allocate gr ctx buffer");
-			nvgpu_vm_put(tsg->vm);
-			tsg->vm = NULL;
-			goto out;
-		}
-
-		gr_ctx->tsgid = tsg->tsgid;
-	} else {
-		/* commit gr ctx buffer */
-		nvgpu_gr_obj_ctx_commit_inst(g, &c->inst_block, gr_ctx,
-			c->subctx, gr_ctx->mem.gpu_va);
-	}
-
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-	if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
-		err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
-			c->subctx, gr_ctx, tsg->tgid, 0);
-		if (err != 0) {
-			nvgpu_warn(g,
-				"fail to bind channel for ctxsw trace");
-		}
-	}
-#endif
-
-	nvgpu_log_fn(g, "done");
-	return 0;
-out:
-	if (c->subctx != NULL) {
-		nvgpu_gr_subctx_free(g, c->subctx, c->vm);
-	}
-
-	/* 1. gr_ctx, patch_ctx and global ctx buffer mapping
-	   can be reused so no need to release them.
-	   2. golden image init and load is a one time thing so if
-	   they pass, no need to undo. */
-	nvgpu_err(g, "fail");
-	return err;
-}
-
 #define NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE	0x02dcU
 #define NVA297_SET_CIRCULAR_BUFFER_SIZE		0x1280U
 #define NVA297_SET_SHADER_EXCEPTIONS		0x1528U
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index efabfc14d..7d9256ecd 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -253,8 +253,6 @@ int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
 void gk20a_init_gr(struct gk20a *g);
 int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a);
 
-int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags);
-
 int gk20a_gr_isr(struct gk20a *g);
 
 void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config);
@@ -292,7 +290,6 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 				  u32 mode);
 
 void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
-void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg);
 void gk20a_gr_resume_single_sm(struct gk20a *g,
 		u32 gpc, u32 tpc, u32 sm);
 void gk20a_gr_resume_all_sms(struct gk20a *g);
@@ -308,8 +305,6 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 		bool *post_event, struct channel_gk20a *fault_ch,
 		u32 *hww_global_esr);
 int gr_gk20a_init_ctx_state(struct gk20a *g);
-void gr_gk20a_free_gr_ctx(struct gk20a *g,
-		       struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
 
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 int gr_gk20a_css_attach(struct channel_gk20a *ch,   /* in - main hw structure */
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 71adfb14d..7d2b1472f 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -264,11 +264,9 @@ static const struct gpu_ops gm20b_ops = {
 		.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
 		.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
 		.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
-		.alloc_obj_ctx = gk20a_alloc_obj_ctx,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
 		.detect_sm_arch = gr_gm20b_detect_sm_arch,
-		.free_gr_ctx = gr_gk20a_free_gr_ctx,
 		.dump_gr_regs = gr_gm20b_dump_gr_status_regs,
 		.update_pc_sampling = gr_gm20b_update_pc_sampling,
 		.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -392,6 +390,8 @@ static const struct gpu_ops gm20b_ops = {
 		},
 		.setup = {
 			.bind_ctxsw_zcull = nvgpu_gr_setup_bind_ctxsw_zcull,
+			.alloc_obj_ctx = nvgpu_gr_setup_alloc_obj_ctx,
+			.free_gr_ctx = nvgpu_gr_setup_free_gr_ctx,
 		},
 		.zbc = {
 			.add_color = gm20b_gr_zbc_add_color,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 658b412c0..d1e7ad3d9 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -293,11 +293,9 @@ static const struct gpu_ops gp10b_ops = {
 		.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
 		.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
 		.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
-		.alloc_obj_ctx = gk20a_alloc_obj_ctx,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
 		.detect_sm_arch = gr_gm20b_detect_sm_arch,
-		.free_gr_ctx = gr_gk20a_free_gr_ctx,
 		.dump_gr_regs = gr_gp10b_dump_gr_status_regs,
 		.update_pc_sampling = gr_gm20b_update_pc_sampling,
 		.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -462,6 +460,8 @@ static const struct gpu_ops gp10b_ops = {
 #endif /* CONFIG_GK20A_CTXSW_TRACE */
 		.setup = {
 			.bind_ctxsw_zcull = nvgpu_gr_setup_bind_ctxsw_zcull,
+			.alloc_obj_ctx = nvgpu_gr_setup_alloc_obj_ctx,
+			.free_gr_ctx = nvgpu_gr_setup_free_gr_ctx,
 		},
 		.zbc = {
 			.add_color = gp10b_gr_zbc_add_color,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 86da49808..62234ac46 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -405,11 +405,9 @@ static const struct gpu_ops gv100_ops = {
 		.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs,
 		.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,
 		.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
-		.alloc_obj_ctx = gk20a_alloc_obj_ctx,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
 		.detect_sm_arch = gr_gv11b_detect_sm_arch,
-		.free_gr_ctx = gr_gk20a_free_gr_ctx,
 		.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
 		.update_pc_sampling = gr_gm20b_update_pc_sampling,
 		.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -593,6 +591,8 @@ static const struct gpu_ops gv100_ops = {
 #endif /* CONFIG_GK20A_CTXSW_TRACE */
 		.setup = {
 			.bind_ctxsw_zcull = nvgpu_gr_setup_bind_ctxsw_zcull,
+			.alloc_obj_ctx = nvgpu_gr_setup_alloc_obj_ctx,
+			.free_gr_ctx = nvgpu_gr_setup_free_gr_ctx,
 		},
 		.zbc = {
 			.add_color = gp10b_gr_zbc_add_color,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 15dd03d99..036b10a98 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -357,11 +357,9 @@ static const struct gpu_ops gv11b_ops = {
 		.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs,
 		.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,
 		.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
-		.alloc_obj_ctx = gk20a_alloc_obj_ctx,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
 		.detect_sm_arch = gr_gv11b_detect_sm_arch,
-		.free_gr_ctx = gr_gk20a_free_gr_ctx,
 		.powergate_tpc = gr_gv11b_powergate_tpc,
 		.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
 		.update_pc_sampling = gr_gm20b_update_pc_sampling,
@@ -554,6 +552,8 @@ static const struct gpu_ops gv11b_ops = {
 #endif /* CONFIG_GK20A_CTXSW_TRACE */
 		.setup = {
 			.bind_ctxsw_zcull = nvgpu_gr_setup_bind_ctxsw_zcull,
+			.alloc_obj_ctx = nvgpu_gr_setup_alloc_obj_ctx,
+			.free_gr_ctx = nvgpu_gr_setup_free_gr_ctx,
 		},
 		.zbc = {
 			.add_color = gp10b_gr_zbc_add_color,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index e7b295def..758b8eb36 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -292,8 +292,6 @@ struct gpu_ops {
 						  u32 **ovr_perf_regsr);
 		void (*set_hww_esr_report_mask)(struct gk20a *g);
 		void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
-		int (*alloc_obj_ctx)(struct channel_gk20a  *c,
-				     u32 class_num, u32 flags);
 		int (*decode_egpc_addr)(struct gk20a *g,
 			u32 addr, enum ctxsw_addr_type *addr_type,
 			u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags);
@@ -309,8 +307,6 @@ struct gpu_ops {
 		u32 (*get_tpc_num)(struct gk20a *g, u32 addr);
 		u32 (*get_egpc_base)(struct gk20a *g);
 		void (*detect_sm_arch)(struct gk20a *g);
-		void (*free_gr_ctx)(struct gk20a *g,
-				    struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
 		void (*powergate_tpc)(struct gk20a *g);
 		int (*init_ctxsw_preemption_mode)(struct gk20a *g,
 			  struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
@@ -635,6 +631,11 @@ struct gpu_ops {
 						struct channel_gk20a *c,
 						u64 zcull_va,
 						u32 mode);
+			int (*alloc_obj_ctx)(struct channel_gk20a  *c,
+				     u32 class_num, u32 flags);
+			void (*free_gr_ctx)(struct gk20a *g,
+				struct vm_gk20a *vm,
+				struct nvgpu_gr_ctx *gr_ctx);
 		} setup;
 
 		struct {
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h b/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h
index 79c7c0674..7f40e3506 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h
@@ -26,8 +26,15 @@
 
 struct gk20a;
 struct channel_gk20a;
+struct vm_gk20a;
+struct nvgpu_gr_ctx;
 
 int nvgpu_gr_setup_bind_ctxsw_zcull(struct gk20a *g, struct channel_gk20a *c,
 			u64 zcull_va, u32 mode);
 
+int nvgpu_gr_setup_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num,
+		u32 flags);
+void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g,
+		struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
+
 #endif /* NVGPU_GR_SETUP_H */
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c
index 81aa82d60..fa85e4d4a 100644
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -552,7 +552,7 @@ static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
 	/* CDE enabled */
 	cde_ctx->ch->cde = true;
 
-	err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0);
+	err = g->ops.gr.setup.alloc_obj_ctx(cde_ctx->ch, required_class, 0);
 	if (err) {
 		nvgpu_warn(g, "cde: failed to allocate ctx. err=%d",
 			   err);
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
index 6849e95b5..f5cffa3b6 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -873,7 +873,7 @@ static u32 nvgpu_obj_ctx_user_flags_to_common_flags(u32 user_flags)
 static int nvgpu_ioctl_channel_alloc_obj_ctx(struct channel_gk20a *ch,
 	u32 class_num, u32 user_flags)
 {
-	return ch->g->ops.gr.alloc_obj_ctx(ch, class_num,
+	return ch->g->ops.gr.setup.alloc_obj_ctx(ch, class_num,
 			nvgpu_obj_ctx_user_flags_to_common_flags(user_flags));
 }
 
diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c
index 987047f07..0fa5f6799 100644
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -424,11 +424,9 @@ static const struct gpu_ops tu104_ops = {
 		.get_sm_dsm_perf_ctrl_regs = gr_tu104_get_sm_dsm_perf_ctrl_regs,
 		.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,
 		.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
-		.alloc_obj_ctx = gk20a_alloc_obj_ctx,
 		.is_tpc_addr = gr_gm20b_is_tpc_addr,
 		.get_tpc_num = gr_gm20b_get_tpc_num,
 		.detect_sm_arch = gr_gv11b_detect_sm_arch,
-		.free_gr_ctx = gr_gk20a_free_gr_ctx,
 		.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
 		.update_pc_sampling = gr_gm20b_update_pc_sampling,
 		.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -619,6 +617,8 @@ static const struct gpu_ops tu104_ops = {
 #endif /* CONFIG_GK20A_CTXSW_TRACE */
 		.setup = {
 			.bind_ctxsw_zcull = nvgpu_gr_setup_bind_ctxsw_zcull,
+			.alloc_obj_ctx = nvgpu_gr_setup_alloc_obj_ctx,
+			.free_gr_ctx = nvgpu_gr_setup_free_gr_ctx,
 		},
 		.zbc = {
 			.add_color = gp10b_gr_zbc_add_color,