From 35e9663bd09063ca2c2ac7e471cd557d719f4241 Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Mon, 1 Apr 2019 12:56:19 -0700
Subject: [PATCH] gpu: nvgpu: move eng_method_bufers from fifo to tsg

Moved init/deinit eng method buffers from fifo to tsg
- tsg.init_eng_method_buffers
- tsg.deinit_eng_method_buffers

Moved gv11b_fifo_init_ramfc_eng_method_buffer to the
following tsg HAL:
- tsg.bind_channel_eng_method_buffers

This HAL is now called during bind_channel.

Added the following ramin HAL:
- ramin.set_ramfc_eng_method_buffer

Jira NVGPU-2979

Change-Id: I96f6ff15d2176d4e3714fa8fe65a9126b3fff82c
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2087185
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/fifo/tsg.c           |  12 +-
 .../nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c  |   4 +
 .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c  |   7 +-
 drivers/gpu/nvgpu/gm20b/hal_gm20b.c           |   1 +
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c           |   1 +
 drivers/gpu/nvgpu/gv100/hal_gv100.c           |   9 +-
 drivers/gpu/nvgpu/gv11b/fifo_gv11b.c          | 107 ------------------
 drivers/gpu/nvgpu/gv11b/fifo_gv11b.h          |  11 --
 drivers/gpu/nvgpu/gv11b/hal_gv11b.c           |   9 +-
 drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b.c      |   2 -
 drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c      |   2 -
 drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.c      |  13 +++
 drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h      |   2 +
 drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.c        | 101 ++++++++++++++++-
 drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h        |   6 +
 drivers/gpu/nvgpu/include/nvgpu/gk20a.h       |  12 +-
 drivers/gpu/nvgpu/tu104/hal_tu104.c           |   9 +-
 17 files changed, 165 insertions(+), 143 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index d43079db2..0a5a38c36 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -104,6 +104,10 @@ int nvgpu_tsg_bind_channel(struct tsg_gk20a *tsg, struct channel_gk20a *ch)
 	ch->tsgid = tsg->tsgid;
 	nvgpu_rwsem_up_write(&tsg->ch_list_lock);
 
+	if (g->ops.tsg.bind_channel_eng_method_buffers != NULL) {
+		g->ops.tsg.bind_channel_eng_method_buffers(tsg, ch);
+	}
+
 	nvgpu_ref_get(&tsg->refcount);
 
 	return err;
@@ -688,8 +692,8 @@ int nvgpu_tsg_open_common(struct gk20a *g, struct tsg_gk20a *tsg, pid_t pid)
 		goto clean_up;
 	}
 
-	if (g->ops.fifo.init_eng_method_buffers != NULL) {
-		g->ops.fifo.init_eng_method_buffers(g, tsg);
+	if (g->ops.tsg.init_eng_method_buffers != NULL) {
+		g->ops.tsg.init_eng_method_buffers(g, tsg);
 	}
 
 	if (g->ops.tsg.open != NULL) {
@@ -741,8 +745,8 @@ void nvgpu_tsg_release_common(struct gk20a *g, struct tsg_gk20a *tsg)
 	nvgpu_kfree(g, tsg->gr_ctx);
 	tsg->gr_ctx = NULL;
 
-	if (g->ops.fifo.deinit_eng_method_buffers != NULL) {
-		g->ops.fifo.deinit_eng_method_buffers(g, tsg);
+	if (g->ops.tsg.deinit_eng_method_buffers != NULL) {
+		g->ops.tsg.deinit_eng_method_buffers(g, tsg);
 	}
 
 	if (tsg->vm != NULL) {
diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
index c0220a2d5..15c1aa04d 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -497,6 +497,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.set_adr_limit = gk20a_ramin_set_adr_limit,
 		.base_shift = gk20a_ramin_base_shift,
 		.alloc_size = gk20a_ramin_alloc_size,
+		.set_eng_method_buffer = NULL,
 	},
 	.runlist = {
 		.reschedule = NULL,
@@ -540,9 +541,12 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 	.tsg = {
 		.open = vgpu_tsg_open,
 		.release = vgpu_tsg_release,
+		.init_eng_method_buffers = NULL,
+		.deinit_eng_method_buffers = NULL,
 		.enable = vgpu_tsg_enable,
 		.disable = nvgpu_tsg_disable,
 		.bind_channel = vgpu_tsg_bind_channel,
+		.bind_channel_eng_method_buffers = NULL,
 		.unbind_channel = vgpu_tsg_unbind_channel,
 		.unbind_channel_check_hw_state = NULL,
 		.unbind_channel_check_ctx_reload = NULL,
diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
index 48e2cbc90..8b4ea099f 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -504,9 +504,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.is_preempt_pending = gv11b_fifo_is_preempt_pending,
 		.reset_enable_hw = NULL,
 		.teardown_ch_tsg = NULL,
-		.init_eng_method_buffers = gv11b_fifo_init_eng_method_buffers,
-		.deinit_eng_method_buffers =
-			gv11b_fifo_deinit_eng_method_buffers,
 		.post_event_id = gk20a_tsg_event_id_post_event,
 		.setup_sw = vgpu_fifo_setup_sw,
 		.cleanup_sw = vgpu_fifo_cleanup_sw,
@@ -584,6 +581,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.set_adr_limit = NULL,
 		.base_shift = gk20a_ramin_base_shift,
 		.alloc_size = gk20a_ramin_alloc_size,
+		.set_eng_method_buffer = NULL,
 	},
 	.runlist = {
 		.reschedule = NULL,
@@ -628,9 +626,12 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 	.tsg = {
 		.open = vgpu_tsg_open,
 		.release = vgpu_tsg_release,
+		.init_eng_method_buffers = NULL,
+		.deinit_eng_method_buffers = NULL,
 		.enable = gv11b_tsg_enable,
 		.disable = nvgpu_tsg_disable,
 		.bind_channel = vgpu_gv11b_tsg_bind_channel,
+		.bind_channel_eng_method_buffers = NULL,
 		.unbind_channel = vgpu_tsg_unbind_channel,
 		.unbind_channel_check_hw_state = NULL,
 		.unbind_channel_check_ctx_reload = NULL,
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 95095ca42..c13870a21 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -719,6 +719,7 @@ static const struct gpu_ops gm20b_ops = {
 		.set_adr_limit = gk20a_ramin_set_adr_limit,
 		.base_shift = gk20a_ramin_base_shift,
 		.alloc_size = gk20a_ramin_alloc_size,
+		.set_eng_method_buffer = NULL,
 	},
 	.runlist = {
 		.update_for_channel = gk20a_runlist_update_for_channel,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 5b61d0863..f3e0020ff 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -808,6 +808,7 @@ static const struct gpu_ops gp10b_ops = {
 		.set_adr_limit = gk20a_ramin_set_adr_limit,
 		.base_shift = gk20a_ramin_base_shift,
 		.alloc_size = gk20a_ramin_alloc_size,
+		.set_eng_method_buffer = NULL,
 	},
 	.runlist = {
 		.reschedule = gk20a_runlist_reschedule,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 849825566..138ac4dc0 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -906,9 +906,6 @@ static const struct gpu_ops gv100_ops = {
 		.teardown_ch_tsg = gv11b_fifo_teardown_ch_tsg,
 		.teardown_mask_intr = gv100_fifo_teardown_mask_intr,
 		.teardown_unmask_intr = gv100_fifo_teardown_unmask_intr,
-		.init_eng_method_buffers = gv11b_fifo_init_eng_method_buffers,
-		.deinit_eng_method_buffers =
-			gv11b_fifo_deinit_eng_method_buffers,
 		.post_event_id = gk20a_tsg_event_id_post_event,
 		.setup_sw = nvgpu_fifo_setup_sw,
 		.cleanup_sw = nvgpu_fifo_cleanup_sw,
@@ -994,6 +991,7 @@ static const struct gpu_ops gv100_ops = {
 		.set_adr_limit = NULL,
 		.base_shift = gk20a_ramin_base_shift,
 		.alloc_size = gk20a_ramin_alloc_size,
+		.set_eng_method_buffer = gv11b_ramin_set_eng_method_buffer,
 	},
 	.runlist = {
 		.update_for_channel = gk20a_runlist_update_for_channel,
@@ -1041,7 +1039,12 @@ static const struct gpu_ops gv100_ops = {
 	.tsg = {
 		.enable = gv11b_tsg_enable,
 		.disable = nvgpu_tsg_disable,
+		.init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers,
+		.deinit_eng_method_buffers =
+			gv11b_tsg_deinit_eng_method_buffers,
 		.bind_channel = NULL,
+		.bind_channel_eng_method_buffers =
+			gv11b_tsg_bind_channel_eng_method_buffers,
 		.unbind_channel = NULL,
 		.unbind_channel_check_hw_state =
 				nvgpu_tsg_unbind_channel_check_hw_state,
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index ba3f70c45..484cfbd1d 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -995,113 +995,6 @@ int gv11b_init_fifo_reset_enable_hw(struct gk20a *g)
 	return 0;
 }
 
-void gv11b_fifo_init_ramfc_eng_method_buffer(struct gk20a *g,
-			struct channel_gk20a *ch, struct nvgpu_mem *mem)
-{
-	struct tsg_gk20a *tsg;
-	struct nvgpu_mem *method_buffer_per_runque;
-
-	tsg = tsg_gk20a_from_ch(ch);
-	if (tsg == NULL) {
-		nvgpu_err(g, "channel is not part of tsg");
-		return;
-	}
-	if (tsg->eng_method_buffers == NULL) {
-		nvgpu_log_info(g, "eng method buffer NULL");
-		return;
-	}
-	if (tsg->runlist_id == nvgpu_engine_get_fast_ce_runlist_id(g)) {
-		method_buffer_per_runque =
-			&tsg->eng_method_buffers[ASYNC_CE_RUNQUE];
-	} else {
-		method_buffer_per_runque =
-			&tsg->eng_method_buffers[GR_RUNQUE];
-	}
-
-	nvgpu_mem_wr32(g, mem, ram_in_eng_method_buffer_addr_lo_w(),
-			u64_lo32(method_buffer_per_runque->gpu_va));
-	nvgpu_mem_wr32(g, mem, ram_in_eng_method_buffer_addr_hi_w(),
-			u64_hi32(method_buffer_per_runque->gpu_va));
-
-	nvgpu_log_info(g, "init ramfc with method buffer");
-}
-
-static unsigned int gv11b_fifo_get_eng_method_buffer_size(struct gk20a *g)
-{
-	unsigned int buffer_size;
-
-	buffer_size =  ((9U + 1U + 3U) * g->ops.ce2.get_num_pce(g)) + 2U;
-	buffer_size = (27U * 5U * buffer_size);
-	buffer_size = roundup(buffer_size, PAGE_SIZE);
-	nvgpu_log_info(g, "method buffer size in bytes %d", buffer_size);
-
-	return buffer_size;
-}
-
-void gv11b_fifo_init_eng_method_buffers(struct gk20a *g,
-					struct tsg_gk20a *tsg)
-{
-	struct vm_gk20a *vm = g->mm.bar2.vm;
-	int err = 0;
-	int i;
-	unsigned int runque, method_buffer_size;
-	unsigned int num_pbdma = g->fifo.num_pbdma;
-
-	if (tsg->eng_method_buffers != NULL) {
-		return;
-	}
-
-	method_buffer_size = gv11b_fifo_get_eng_method_buffer_size(g);
-	if (method_buffer_size == 0U) {
-		nvgpu_info(g, "ce will hit MTHD_BUFFER_FAULT");
-		return;
-	}
-
-	tsg->eng_method_buffers = nvgpu_kzalloc(g,
-					num_pbdma * sizeof(struct nvgpu_mem));
-
-	for (runque = 0; runque < num_pbdma; runque++) {
-		err = nvgpu_dma_alloc_map_sys(vm, method_buffer_size,
-					&tsg->eng_method_buffers[runque]);
-		if (err != 0) {
-			break;
-		}
-	}
-	if (err != 0) {
-		for (i = ((int)runque - 1); i >= 0; i--) {
-			nvgpu_dma_unmap_free(vm,
-				 &tsg->eng_method_buffers[i]);
-		}
-
-		nvgpu_kfree(g, tsg->eng_method_buffers);
-		tsg->eng_method_buffers = NULL;
-		nvgpu_err(g, "could not alloc eng method buffers");
-		return;
-	}
-	nvgpu_log_info(g, "eng method buffers allocated");
-
-}
-
-void gv11b_fifo_deinit_eng_method_buffers(struct gk20a *g,
-					struct tsg_gk20a *tsg)
-{
-	struct vm_gk20a *vm = g->mm.bar2.vm;
-	unsigned int runque;
-
-	if (tsg->eng_method_buffers == NULL) {
-		return;
-	}
-
-	for (runque = 0; runque < g->fifo.num_pbdma; runque++) {
-		nvgpu_dma_unmap_free(vm, &tsg->eng_method_buffers[runque]);
-	}
-
-	nvgpu_kfree(g, tsg->eng_method_buffers);
-	tsg->eng_method_buffers = NULL;
-
-	nvgpu_log_info(g, "eng method buffers de-allocated");
-}
-
 int gv11b_init_fifo_setup_hw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
index 44edbd48e..5687871e7 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
@@ -30,11 +30,6 @@
 #define FIFO_INVAL_PBDMA_ID	(~U32(0U))
 #define FIFO_INVAL_VEID		(~U32(0U))
 
-/* can be removed after runque support is added */
-
-#define GR_RUNQUE			0U	/* pbdma 0 */
-#define ASYNC_CE_RUNQUE			2U	/* pbdma 2 */
-
 #define CHANNEL_INFO_VEID0		0U
 
 #define MAX_PRE_SI_RETRIES		200000U	/* 1G/500KHz * 100 */
@@ -61,16 +56,10 @@ void gv11b_fifo_teardown_mask_intr(struct gk20a *g);
 void gv11b_fifo_teardown_unmask_intr(struct gk20a *g);
 void gv11b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f);
 int gv11b_init_fifo_reset_enable_hw(struct gk20a *g);
-void gv11b_fifo_init_eng_method_buffers(struct gk20a *g,
-					struct tsg_gk20a *tsg);
-void gv11b_fifo_deinit_eng_method_buffers(struct gk20a *g,
-					struct tsg_gk20a *tsg);
 int gv11b_init_fifo_setup_hw(struct gk20a *g);
 
 u32 gv11b_fifo_get_preempt_timeout(struct gk20a *g);
 
-void gv11b_fifo_init_ramfc_eng_method_buffer(struct gk20a *g,
-			struct channel_gk20a *ch, struct nvgpu_mem *mem);
 void gv11b_ring_channel_doorbell(struct channel_gk20a *c);
 u64 gv11b_fifo_usermode_base(struct gk20a *g);
 u32 gv11b_fifo_doorbell_token(struct channel_gk20a *c);
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index e76abc86f..7e274dfc9 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -862,9 +862,6 @@ static const struct gpu_ops gv11b_ops = {
 		.teardown_ch_tsg = gv11b_fifo_teardown_ch_tsg,
 		.teardown_mask_intr = gv11b_fifo_teardown_mask_intr,
 		.teardown_unmask_intr = gv11b_fifo_teardown_unmask_intr,
-		.init_eng_method_buffers = gv11b_fifo_init_eng_method_buffers,
-		.deinit_eng_method_buffers =
-			gv11b_fifo_deinit_eng_method_buffers,
 		.post_event_id = gk20a_tsg_event_id_post_event,
 		.setup_sw = nvgpu_fifo_setup_sw,
 		.cleanup_sw = nvgpu_fifo_cleanup_sw,
@@ -950,6 +947,7 @@ static const struct gpu_ops gv11b_ops = {
 		.set_adr_limit = NULL,
 		.base_shift = gk20a_ramin_base_shift,
 		.alloc_size = gk20a_ramin_alloc_size,
+		.set_eng_method_buffer = gv11b_ramin_set_eng_method_buffer,
 	},
 	.runlist = {
 		.reschedule = gv11b_runlist_reschedule,
@@ -999,7 +997,12 @@ static const struct gpu_ops gv11b_ops = {
 	.tsg = {
 		.enable = gv11b_tsg_enable,
 		.disable = nvgpu_tsg_disable,
+		.init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers,
+		.deinit_eng_method_buffers =
+			gv11b_tsg_deinit_eng_method_buffers,
 		.bind_channel = NULL,
+		.bind_channel_eng_method_buffers =
+			gv11b_tsg_bind_channel_eng_method_buffers,
 		.unbind_channel = NULL,
 		.unbind_channel_check_hw_state =
 				nvgpu_tsg_unbind_channel_check_hw_state,
diff --git a/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b.c b/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b.c
index 80d0f163c..b81e8c07f 100644
--- a/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b.c
@@ -97,8 +97,6 @@ int gv11b_ramfc_setup(struct channel_gk20a *ch, u64 gpfifo_base,
 	nvgpu_mem_wr32(g, mem, ram_in_engine_wfi_veid_w(),
 		ram_in_engine_wfi_veid_f(ch->subctx_id));
 
-	gv11b_fifo_init_ramfc_eng_method_buffer(g, ch, mem);
-
 	if (ch->is_privileged_channel) {
 		/* Set privilege level for channel */
 		nvgpu_mem_wr32(g, mem, ram_fc_config_w(),
diff --git a/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c b/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c
index a00038bbc..57a462ef0 100644
--- a/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c
@@ -83,8 +83,6 @@ int tu104_ramfc_setup(struct channel_gk20a *ch, u64 gpfifo_base,
 	nvgpu_mem_wr32(g, mem, ram_in_engine_wfi_veid_w(),
 		ram_in_engine_wfi_veid_f(ch->subctx_id));
 
-	gv11b_fifo_init_ramfc_eng_method_buffer(g, ch, mem);
-
 	if (ch->is_privileged_channel) {
 		/* Set privilege level for channel */
 		nvgpu_mem_wr32(g, mem, ram_fc_config_w(),
diff --git a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.c b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.c
index fab9d53da..383a42df1 100644
--- a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.c
@@ -106,3 +106,16 @@ void gv11b_ramin_init_subctx_pdb(struct gk20a *g,
 	gv11b_subctx_commit_valid_mask(g, inst_block);
 
 }
+
+void gv11b_ramin_set_eng_method_buffer(struct gk20a *g,
+		struct nvgpu_mem *inst_block, u64 gpu_va)
+{
+	u32 addr_lo = u64_lo32(gpu_va);
+	u32 addr_hi = u64_hi32(gpu_va);
+
+	nvgpu_mem_wr32(g, inst_block, ram_in_eng_method_buffer_addr_lo_w(),
+			addr_lo);
+	nvgpu_mem_wr32(g, inst_block, ram_in_eng_method_buffer_addr_hi_w(),
+			addr_hi);
+}
+
diff --git a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h
index c6ffb473e..1b5cef63b 100644
--- a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h
@@ -33,5 +33,7 @@ void gv11b_ramin_set_gr_ptr(struct gk20a *g,
 void gv11b_ramin_init_subctx_pdb(struct gk20a *g,
 		struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
 		bool replayable);
+void gv11b_ramin_set_eng_method_buffer(struct gk20a *g,
+		struct nvgpu_mem *inst_block, u64 gpu_va);
 
 #endif /* NVGPU_RAMIN_GV11B_H */
diff --git a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.c b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.c
index a0b24bb36..ae9ddd58c 100644
--- a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.c
@@ -21,13 +21,17 @@
  */
 
 #include <nvgpu/channel.h>
+#include <nvgpu/engines.h>
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/tsg.h>
 #include <nvgpu/gk20a.h>
 
 #include "hal/fifo/tsg_gv11b.h"
 
-#include "gv11b/fifo_gv11b.h"
+
+/* can be removed after runque support is added */
+#define GR_RUNQUE			0U	/* pbdma 0 */
+#define ASYNC_CE_RUNQUE			2U	/* pbdma 2 */
 
 /* TSG enable sequence applicable for Volta and onwards */
 void gv11b_tsg_enable(struct tsg_gk20a *tsg)
@@ -75,3 +79,98 @@ void gv11b_tsg_unbind_channel_check_eng_faulted(struct tsg_gk20a *tsg,
 		nvgpu_mem_wr32(g, mem, 0, 0);
 	}
 }
+
+void gv11b_tsg_bind_channel_eng_method_buffers(struct tsg_gk20a *tsg,
+		struct channel_gk20a *ch)
+{
+	struct gk20a *g = tsg->g;
+	u64 gpu_va;
+
+	if (tsg->eng_method_buffers == NULL) {
+		nvgpu_log_info(g, "eng method buffer NULL");
+		return;
+	}
+
+	if (tsg->runlist_id == nvgpu_engine_get_fast_ce_runlist_id(g)) {
+		gpu_va = tsg->eng_method_buffers[ASYNC_CE_RUNQUE].gpu_va;
+	} else {
+		gpu_va = tsg->eng_method_buffers[GR_RUNQUE].gpu_va;
+	}
+
+	g->ops.ramin.set_eng_method_buffer(g, &ch->inst_block, gpu_va);
+}
+
+static unsigned int gv11b_tsg_get_eng_method_buffer_size(struct gk20a *g)
+{
+	unsigned int buffer_size;
+
+	buffer_size =  ((9U + 1U + 3U) * g->ops.ce2.get_num_pce(g)) + 2U;
+	buffer_size = (27U * 5U * buffer_size);
+	buffer_size = roundup(buffer_size, PAGE_SIZE);
+	nvgpu_log_info(g, "method buffer size in bytes %d", buffer_size);
+
+	return buffer_size;
+}
+
+void gv11b_tsg_init_eng_method_buffers(struct gk20a *g, struct tsg_gk20a *tsg)
+{
+	struct vm_gk20a *vm = g->mm.bar2.vm;
+	int err = 0;
+	int i;
+	unsigned int runque, method_buffer_size;
+	unsigned int num_pbdma = g->fifo.num_pbdma;
+
+	if (tsg->eng_method_buffers != NULL) {
+		return;
+	}
+
+	method_buffer_size = gv11b_tsg_get_eng_method_buffer_size(g);
+	if (method_buffer_size == 0U) {
+		nvgpu_info(g, "ce will hit MTHD_BUFFER_FAULT");
+		return;
+	}
+
+	tsg->eng_method_buffers = nvgpu_kzalloc(g,
+					num_pbdma * sizeof(struct nvgpu_mem));
+
+	for (runque = 0; runque < num_pbdma; runque++) {
+		err = nvgpu_dma_alloc_map_sys(vm, method_buffer_size,
+					&tsg->eng_method_buffers[runque]);
+		if (err != 0) {
+			break;
+		}
+	}
+	if (err != 0) {
+		for (i = ((int)runque - 1); i >= 0; i--) {
+			nvgpu_dma_unmap_free(vm,
+				 &tsg->eng_method_buffers[i]);
+		}
+
+		nvgpu_kfree(g, tsg->eng_method_buffers);
+		tsg->eng_method_buffers = NULL;
+		nvgpu_err(g, "could not alloc eng method buffers");
+		return;
+	}
+	nvgpu_log_info(g, "eng method buffers allocated");
+
+}
+
+void gv11b_tsg_deinit_eng_method_buffers(struct gk20a *g,
+		struct tsg_gk20a *tsg)
+{
+	struct vm_gk20a *vm = g->mm.bar2.vm;
+	unsigned int runque;
+
+	if (tsg->eng_method_buffers == NULL) {
+		return;
+	}
+
+	for (runque = 0; runque < g->fifo.num_pbdma; runque++) {
+		nvgpu_dma_unmap_free(vm, &tsg->eng_method_buffers[runque]);
+	}
+
+	nvgpu_kfree(g, tsg->eng_method_buffers);
+	tsg->eng_method_buffers = NULL;
+
+	nvgpu_log_info(g, "eng method buffers de-allocated");
+}
diff --git a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h
index 6453064ce..6fd680fc1 100644
--- a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h
@@ -31,5 +31,11 @@ void gv11b_tsg_enable(struct tsg_gk20a *tsg);
 void gv11b_tsg_unbind_channel_check_eng_faulted(struct tsg_gk20a *tsg,
 		struct channel_gk20a *ch,
 		struct nvgpu_channel_hw_state *hw_state);
+void gv11b_tsg_init_eng_method_buffers(struct gk20a *g,
+		struct tsg_gk20a *tsg);
+void gv11b_tsg_deinit_eng_method_buffers(struct gk20a *g,
+		struct tsg_gk20a *tsg);
+void gv11b_tsg_bind_channel_eng_method_buffers(struct tsg_gk20a *tsg,
+		struct channel_gk20a *ch);
 
 #endif /* NVGPU_TSG_GV11B_H */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index b3e38d251..5107ad783 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -970,10 +970,6 @@ struct gpu_ops {
 			 struct mmu_fault_info *mmfault);
 		void (*teardown_mask_intr)(struct gk20a *g);
 		void (*teardown_unmask_intr)(struct gk20a *g);
-		void (*init_eng_method_buffers)(struct gk20a *g,
-						struct tsg_gk20a *tsg);
-		void (*deinit_eng_method_buffers)(struct gk20a *g,
-						struct tsg_gk20a *tsg);
 		u32 (*get_preempt_timeout)(struct gk20a *g);
 		void (*post_event_id)(struct tsg_gk20a *tsg, int event_id);
 		void (*ring_channel_doorbell)(struct channel_gk20a *c);
@@ -1036,6 +1032,8 @@ struct gpu_ops {
 				struct nvgpu_mem *inst_block, u64 va_limit);
 		u32 (*base_shift)(void);
 		u32 (*alloc_size)(void);
+		void (*set_eng_method_buffer)(struct gk20a *g,
+				struct nvgpu_mem *inst_block, u64 gpu_va);
 	} ramin;
 	struct {
 		int (*reschedule)(struct channel_gk20a *ch, bool preempt_next);
@@ -1148,10 +1146,16 @@ struct gpu_ops {
 	struct {
 		int (*open)(struct tsg_gk20a *tsg);
 		void (*release)(struct tsg_gk20a *tsg);
+		void (*init_eng_method_buffers)(struct gk20a *g,
+				struct tsg_gk20a *tsg);
+		void (*deinit_eng_method_buffers)(struct gk20a *g,
+				struct tsg_gk20a *tsg);
 		void (*enable)(struct tsg_gk20a *tsg);
 		void (*disable)(struct tsg_gk20a *tsg);
 		int (*bind_channel)(struct tsg_gk20a *tsg,
 				struct channel_gk20a *ch);
+		void (*bind_channel_eng_method_buffers)(struct tsg_gk20a *tsg,
+				struct channel_gk20a *ch);
 		int (*unbind_channel)(struct tsg_gk20a *tsg,
 				struct channel_gk20a *ch);
 		int (*unbind_channel_check_hw_state)(struct tsg_gk20a *tsg,
diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c
index 5e432294e..cde9e96cb 100644
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -939,9 +939,6 @@ static const struct gpu_ops tu104_ops = {
 		.teardown_ch_tsg = gv11b_fifo_teardown_ch_tsg,
 		.teardown_mask_intr = gv11b_fifo_teardown_mask_intr,
 		.teardown_unmask_intr = gv11b_fifo_teardown_unmask_intr,
-		.init_eng_method_buffers = gv11b_fifo_init_eng_method_buffers,
-		.deinit_eng_method_buffers =
-			gv11b_fifo_deinit_eng_method_buffers,
 		.post_event_id = gk20a_tsg_event_id_post_event,
 		.setup_sw = nvgpu_fifo_setup_sw,
 		.cleanup_sw = nvgpu_fifo_cleanup_sw,
@@ -1029,6 +1026,7 @@ static const struct gpu_ops tu104_ops = {
 		.set_adr_limit = NULL,
 		.base_shift = gk20a_ramin_base_shift,
 		.alloc_size = gk20a_ramin_alloc_size,
+		.set_eng_method_buffer = gv11b_ramin_set_eng_method_buffer,
 	},
 	.runlist = {
 		.update_for_channel = gk20a_runlist_update_for_channel,
@@ -1076,7 +1074,12 @@ static const struct gpu_ops tu104_ops = {
 	.tsg = {
 		.enable = gv11b_tsg_enable,
 		.disable = nvgpu_tsg_disable,
+		.init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers,
+		.deinit_eng_method_buffers =
+			gv11b_tsg_deinit_eng_method_buffers,
 		.bind_channel = NULL,
+		.bind_channel_eng_method_buffers =
+			gv11b_tsg_bind_channel_eng_method_buffers,
 		.unbind_channel = NULL,
 		.unbind_channel_check_hw_state =
 				nvgpu_tsg_unbind_channel_check_hw_state,