diff --git a/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c b/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c
index fb8cc15e5..270eda140 100644
--- a/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c
+++ b/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c
@@ -24,18 +24,17 @@
 #include <nvgpu/list.h>
 #include <nvgpu/log.h>
 #include <nvgpu/log2.h>
+#include <nvgpu/circ_buf.h>
+#include <nvgpu/timers.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/fecs_trace.h>
-
-/*
- * TODO: This include is only needed for transition phase to new unit
- * Remove as soon as transition is complete
- */
-#include "gk20a/fecs_trace_gk20a.h"
+#include <nvgpu/ctxsw_trace.h>
 
 #ifdef CONFIG_GK20A_CTXSW_TRACE
 
+static int nvgpu_gr_fecs_trace_periodic_polling(void *arg);
+
 int nvgpu_gr_fecs_trace_add_context(struct gk20a *g, u32 context_ptr,
 	pid_t pid, u32 vmid, struct nvgpu_list_node *list)
 {
@@ -266,7 +265,7 @@ int nvgpu_gr_fecs_trace_enable(struct gk20a *g)
 		g->ops.fecs_trace.set_read_index(g, write);
 
 		err = nvgpu_thread_create(&trace->poll_task, g,
-				gk20a_fecs_trace_periodic_polling, __func__);
+				nvgpu_gr_fecs_trace_periodic_polling, __func__);
 		if (err != 0) {
 			nvgpu_warn(g, "failed to create FECS polling task");
 			goto done;
@@ -311,4 +310,233 @@ void nvgpu_gr_fecs_trace_reset_buffer(struct gk20a *g)
 		g->ops.fecs_trace.get_write_index(g));
 }
 
+/*
+ * Converts HW entry format to userspace-facing format and pushes it to the
+ * queue.
+ */
+int nvgpu_gr_fecs_trace_ring_read(struct gk20a *g, int index,
+	u32 *vm_update_mask)
+{
+	int i;
+	struct nvgpu_gpu_ctxsw_trace_entry entry = { };
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+	pid_t cur_pid = 0, new_pid = 0;
+	u32 cur_vmid = 0U, new_vmid = 0U;
+	u32 vmid = 0U;
+	int count = 0;
+
+	struct nvgpu_fecs_trace_record *r =
+		nvgpu_gr_fecs_trace_get_record(g, index);
+	if (r == NULL) {
+		return -EINVAL;
+	}
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
+		"consuming record trace=%p read=%d record=%p", trace, index, r);
+
+	if (!nvgpu_gr_fecs_trace_is_valid_record(g, r)) {
+		nvgpu_warn(g,
+			"trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
+			trace, index, r, r->magic_lo, r->magic_hi);
+		return -EINVAL;
+	}
+
+	/* Clear magic_hi to detect cases where CPU could read write index
+	 * before FECS record is actually written to DRAM. This should not
+	 * as we force FECS writes to SYSMEM by reading through PRAMIN.
+	 */
+	r->magic_hi = 0;
+
+	if ((r->context_ptr != 0U) && (r->context_id != 0U)) {
+		nvgpu_gr_fecs_trace_find_pid(g, r->context_ptr,
+			&trace->context_list, &cur_pid, &cur_vmid);
+	} else {
+		cur_vmid = 0xffffffffU;
+		cur_pid = 0;
+	}
+
+	if (r->new_context_ptr != 0U) {
+		nvgpu_gr_fecs_trace_find_pid(g, r->new_context_ptr,
+			&trace->context_list, &new_pid, &new_vmid);
+	} else {
+		new_vmid = 0xffffffffU;
+		new_pid = 0;
+	}
+
+	nvgpu_log(g, gpu_dbg_ctxsw,
+		"context_ptr=%x (vmid=%u pid=%d)",
+		r->context_ptr, cur_vmid, cur_pid);
+	nvgpu_log(g, gpu_dbg_ctxsw,
+		"new_context_ptr=%x (vmid=%u pid=%d)",
+		r->new_context_ptr, new_vmid, new_pid);
+
+	entry.context_id = r->context_id;
+
+	/* break out FECS record into trace events */
+	for (i = 0; i < nvgpu_gr_fecs_trace_num_ts(g); i++) {
+
+		entry.tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]);
+		entry.timestamp =
+			g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]);
+		entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
+
+		nvgpu_log(g, gpu_dbg_ctxsw,
+			"tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
+			entry.tag, entry.timestamp, r->context_id,
+			r->new_context_id);
+
+		switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) {
+		case NVGPU_GPU_CTXSW_TAG_RESTORE_START:
+		case NVGPU_GPU_CTXSW_TAG_CONTEXT_START:
+			entry.context_id = r->new_context_id;
+			entry.pid = new_pid;
+			entry.vmid = new_vmid;
+			break;
+
+		case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
+		case NVGPU_GPU_CTXSW_TAG_FE_ACK:
+		case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI:
+		case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP:
+		case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP:
+		case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP:
+		case NVGPU_GPU_CTXSW_TAG_SAVE_END:
+			entry.context_id = r->context_id;
+			entry.pid = cur_pid;
+			entry.vmid = cur_vmid;
+			break;
+
+		default:
+			/* tags are not guaranteed to start at the beginning */
+			if ((entry.tag != 0) && (entry.tag !=
+				    NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP)) {
+				nvgpu_warn(g, "TAG not found");
+			}
+			continue;
+		}
+
+		nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
+			entry.tag, entry.context_id, entry.pid);
+
+		if (!entry.context_id)
+			continue;
+
+		if (g->ops.fecs_trace.vm_dev_write != NULL) {
+			g->ops.fecs_trace.vm_dev_write(g, entry.vmid,
+				vm_update_mask, &entry);
+		} else {
+			gk20a_ctxsw_trace_write(g, &entry);
+		}
+		count++;
+	}
+
+	gk20a_ctxsw_trace_wake_up(g, vmid);
+	return count;
+}
+
+int nvgpu_gr_fecs_trace_poll(struct gk20a *g)
+{
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+	u32 vm_update_mask = 0U;
+	int read = 0;
+	int write = 0;
+	int cnt;
+	int err = 0;
+
+	nvgpu_mutex_acquire(&trace->poll_lock);
+	if (trace->enable_count == 0) {
+		goto done_unlock;
+	}
+
+	err = gk20a_busy(g);
+	if (err) {
+		goto done_unlock;
+	}
+
+	write = g->ops.fecs_trace.get_write_index(g);
+	if ((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS)) {
+		nvgpu_err(g,
+			"failed to acquire write index, write=%d", write);
+		err = write;
+		goto done;
+	}
+
+	read = g->ops.fecs_trace.get_read_index(g);
+
+	cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
+	if (!cnt)
+		goto done;
+
+	nvgpu_log(g, gpu_dbg_ctxsw,
+		"circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
+		read, g->ops.fecs_trace.get_read_index(g), write, cnt);
+
+	/* Ensure all FECS writes have made it to SYSMEM */
+	g->ops.mm.fb_flush(g);
+
+	while (read != write) {
+		cnt = nvgpu_gr_fecs_trace_ring_read(g, read, &vm_update_mask);
+		if (cnt <= 0) {
+			break;
+		}
+
+		/* Get to next record. */
+		read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
+	}
+
+	/* ensure FECS records has been updated before incrementing read index */
+	nvgpu_wmb();
+	g->ops.fecs_trace.set_read_index(g, read);
+
+	/*
+	 * FECS ucode does a priv holdoff around the assertion of context
+	 * reset. So, pri transactions (e.g. mailbox1 register write) might
+	 * fail due to this. Hence, do write with ack i.e. write and read
+	 * it back to make sure write happened for mailbox1.
+	 */
+	while (g->ops.fecs_trace.get_read_index(g) != read) {
+		nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed");
+		g->ops.fecs_trace.set_read_index(g, read);
+	}
+
+	if (g->ops.fecs_trace.vm_dev_update) {
+		g->ops.fecs_trace.vm_dev_update(g, vm_update_mask);
+	}
+
+done:
+	gk20a_idle(g);
+done_unlock:
+	nvgpu_mutex_release(&trace->poll_lock);
+	return err;
+}
+
+static int nvgpu_gr_fecs_trace_periodic_polling(void *arg)
+{
+	struct gk20a *g = (struct gk20a *)arg;
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+
+	nvgpu_log(g, gpu_dbg_ctxsw, "thread running");
+
+	while (!nvgpu_thread_should_stop(&trace->poll_task) &&
+			trace->enable_count > 0U) {
+
+		nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US,
+				   GK20A_FECS_TRACE_FRAME_PERIOD_US * 2U);
+
+		nvgpu_gr_fecs_trace_poll(g);
+	}
+
+	return 0;
+}
+
+int nvgpu_gr_fecs_trace_reset(struct gk20a *g)
+{
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
+
+	if (!g->ops.fecs_trace.is_enabled(g))
+		return 0;
+
+	nvgpu_gr_fecs_trace_poll(g);
+	return g->ops.fecs_trace.set_read_index(g, 0);
+}
+
 #endif /* CONFIG_GK20A_CTXSW_TRACE */
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 36087a8c0..d20b1a126 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -24,7 +24,6 @@
 #include <nvgpu/dma.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/bug.h>
-#include <nvgpu/circ_buf.h>
 #include <nvgpu/thread.h>
 #include <nvgpu/barrier.h>
 #include <nvgpu/mm.h>
@@ -51,193 +50,6 @@ static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk2
 	return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL);
 }
 
-/*
- * Converts HW entry format to userspace-facing format and pushes it to the
- * queue.
- */
-static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
-{
-	int i;
-	struct nvgpu_gpu_ctxsw_trace_entry entry = { };
-	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
-	pid_t cur_pid;
-	pid_t new_pid;
-	u32 cur_vmid, new_vmid;
-	int count = 0;
-
-	/* for now, only one VM */
-	const int vmid = 0;
-
-	struct nvgpu_fecs_trace_record *r =
-		nvgpu_gr_fecs_trace_get_record(g, index);
-	if (r == NULL) {
-		return -EINVAL;
-	}
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
-		"consuming record trace=%p read=%d record=%p", trace, index, r);
-
-	if (unlikely(!nvgpu_gr_fecs_trace_is_valid_record(g, r))) {
-		nvgpu_warn(g,
-			"trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
-			trace, index, r, r->magic_lo, r->magic_hi);
-		return -EINVAL;
-	}
-
-	/* Clear magic_hi to detect cases where CPU could read write index
-	 * before FECS record is actually written to DRAM. This should not
-	 * as we force FECS writes to SYSMEM by reading through PRAMIN.
-	 */
-	r->magic_hi = 0;
-
-	nvgpu_gr_fecs_trace_find_pid(g, r->context_ptr, &trace->context_list,
-		&cur_pid, &cur_vmid);
-	nvgpu_gr_fecs_trace_find_pid(g, r->new_context_ptr, &trace->context_list,
-		&new_pid, &new_vmid);
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
-		"context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)",
-		r->context_ptr, cur_pid, r->new_context_ptr, new_pid);
-
-	entry.context_id = r->context_id;
-	entry.vmid = vmid;
-
-	/* break out FECS record into trace events */
-	for (i = 0; i < nvgpu_gr_fecs_trace_num_ts(g); i++) {
-
-		entry.tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]);
-		entry.timestamp =
-			g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]);
-		entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
-
-		nvgpu_log(g, gpu_dbg_ctxsw,
-			"tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
-			entry.tag, entry.timestamp, r->context_id,
-			r->new_context_id);
-
-		switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) {
-		case NVGPU_GPU_CTXSW_TAG_RESTORE_START:
-		case NVGPU_GPU_CTXSW_TAG_CONTEXT_START:
-			entry.context_id = r->new_context_id;
-			entry.pid = new_pid;
-			break;
-
-		case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
-		case NVGPU_GPU_CTXSW_TAG_FE_ACK:
-		case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI:
-		case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP:
-		case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP:
-		case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP:
-		case NVGPU_GPU_CTXSW_TAG_SAVE_END:
-			entry.context_id = r->context_id;
-			entry.pid = cur_pid;
-			break;
-
-		default:
-			/* tags are not guaranteed to start at the beginning */
-			WARN_ON(entry.tag && (entry.tag != NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP));
-			continue;
-		}
-
-		nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
-			entry.tag, entry.context_id, entry.pid);
-
-		if (!entry.context_id)
-			continue;
-
-		gk20a_ctxsw_trace_write(g, &entry);
-		count++;
-	}
-
-	gk20a_ctxsw_trace_wake_up(g, vmid);
-	return count;
-}
-
-int gk20a_fecs_trace_poll(struct gk20a *g)
-{
-	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
-
-	int read = 0;
-	int write = 0;
-	int cnt;
-	int err;
-
-	err = gk20a_busy(g);
-	if (unlikely(err))
-		return err;
-
-	nvgpu_mutex_acquire(&trace->poll_lock);
-	write = g->ops.fecs_trace.get_write_index(g);
-	if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) {
-		nvgpu_err(g,
-			"failed to acquire write index, write=%d", write);
-		err = write;
-		goto done;
-	}
-
-	read = g->ops.fecs_trace.get_read_index(g);
-
-	cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
-	if (!cnt)
-		goto done;
-
-	nvgpu_log(g, gpu_dbg_ctxsw,
-		"circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
-		read, g->ops.fecs_trace.get_read_index(g), write, cnt);
-
-	/* Ensure all FECS writes have made it to SYSMEM */
-	g->ops.mm.fb_flush(g);
-
-	while (read != write) {
-		cnt = gk20a_fecs_trace_ring_read(g, read);
-		if (cnt > 0) {
-			nvgpu_log(g, gpu_dbg_ctxsw,
-				"number of trace entries added: %d", cnt);
-		}
-
-		/* Get to next record. */
-		read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
-	}
-
-	/* ensure FECS records has been updated before incrementing read index */
-	nvgpu_wmb();
-	g->ops.fecs_trace.set_read_index(g, read);
-
-	/*
-	 * FECS ucode does a priv holdoff around the assertion of context
-	 * reset. So, pri transactions (e.g. mailbox1 register write) might
-	 * fail due to this. Hence, do write with ack i.e. write and read
-	 * it back to make sure write happened for mailbox1.
-	 */
-	while (g->ops.fecs_trace.get_read_index(g) != read) {
-		nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed");
-		g->ops.fecs_trace.set_read_index(g, read);
-	}
-
-done:
-	nvgpu_mutex_release(&trace->poll_lock);
-	gk20a_idle(g);
-	return err;
-}
-
-int gk20a_fecs_trace_periodic_polling(void *arg)
-{
-	struct gk20a *g = (struct gk20a *)arg;
-	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
-
-	pr_info("%s: running\n", __func__);
-
-	while (!nvgpu_thread_should_stop(&trace->poll_task)) {
-
-		nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US,
-				   GK20A_FECS_TRACE_FRAME_PERIOD_US * 2);
-
-		gk20a_fecs_trace_poll(g);
-	}
-
-	return 0;
-}
-
 int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 		struct channel_gk20a *ch, u32 vmid, struct nvgpu_gr_ctx *gr_ctx)
 {
@@ -322,7 +134,7 @@ int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
 		if (g->ops.fecs_trace.is_enabled(g)) {
 			if (g->ops.fecs_trace.flush)
 				g->ops.fecs_trace.flush(g);
-			gk20a_fecs_trace_poll(g);
+			nvgpu_gr_fecs_trace_poll(g);
 		}
 
 		nvgpu_gr_fecs_trace_remove_context(g, context_ptr,
@@ -331,17 +143,6 @@ int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
 	return 0;
 }
 
-int gk20a_fecs_trace_reset(struct gk20a *g)
-{
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
-
-	if (!g->ops.fecs_trace.is_enabled(g))
-		return 0;
-
-	gk20a_fecs_trace_poll(g);
-	return g->ops.fecs_trace.set_read_index(g, 0);
-}
-
 u32 gk20a_fecs_trace_get_buffer_full_mailbox_val(void)
 {
 	return 0x26;
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
index d400a243c..9978700f0 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
@@ -28,13 +28,10 @@ struct channel_gk20a;
 struct nvgpu_gpu_ctxsw_trace_filter;
 struct nvgpu_gr_ctx;
 
-int gk20a_fecs_trace_poll(struct gk20a *g);
-int gk20a_fecs_trace_periodic_polling(void *arg);
 int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 		struct channel_gk20a *ch, u32 vmid,
 		struct nvgpu_gr_ctx *gr_ctx);
 int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch);
-int gk20a_fecs_trace_reset(struct gk20a *g);
 u32 gk20a_fecs_trace_get_buffer_full_mailbox_val(void);
 
 #endif /* NVGPU_GK20A_FECS_TRACE_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 75f24fd0d..afdb7377a 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -672,9 +672,9 @@ static const struct gpu_ops gp10b_ops = {
 		.enable = nvgpu_gr_fecs_trace_enable,
 		.disable = nvgpu_gr_fecs_trace_disable,
 		.is_enabled = nvgpu_gr_fecs_trace_is_enabled,
-		.reset = gk20a_fecs_trace_reset,
+		.reset = nvgpu_gr_fecs_trace_reset,
 		.flush = gp10b_fecs_trace_flush,
-		.poll = gk20a_fecs_trace_poll,
+		.poll = nvgpu_gr_fecs_trace_poll,
 		.bind_channel = gk20a_fecs_trace_bind_channel,
 		.unbind_channel = gk20a_fecs_trace_unbind_channel,
 		.max_entries = nvgpu_gr_fecs_trace_max_entries,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 89b23a776..81f3fd57b 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -839,9 +839,9 @@ static const struct gpu_ops gv100_ops = {
 		.enable = nvgpu_gr_fecs_trace_enable,
 		.disable = nvgpu_gr_fecs_trace_disable,
 		.is_enabled = nvgpu_gr_fecs_trace_is_enabled,
-		.reset = gk20a_fecs_trace_reset,
+		.reset = nvgpu_gr_fecs_trace_reset,
 		.flush = NULL,
-		.poll = gk20a_fecs_trace_poll,
+		.poll = nvgpu_gr_fecs_trace_poll,
 		.bind_channel = gk20a_fecs_trace_bind_channel,
 		.unbind_channel = gk20a_fecs_trace_unbind_channel,
 		.max_entries = nvgpu_gr_fecs_trace_max_entries,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 346f0f656..3f95f6db8 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -797,9 +797,9 @@ static const struct gpu_ops gv11b_ops = {
 		.enable = nvgpu_gr_fecs_trace_enable,
 		.disable = nvgpu_gr_fecs_trace_disable,
 		.is_enabled = nvgpu_gr_fecs_trace_is_enabled,
-		.reset = gk20a_fecs_trace_reset,
+		.reset = nvgpu_gr_fecs_trace_reset,
 		.flush = NULL,
-		.poll = gk20a_fecs_trace_poll,
+		.poll = nvgpu_gr_fecs_trace_poll,
 		.bind_channel = gk20a_fecs_trace_bind_channel,
 		.unbind_channel = gk20a_fecs_trace_unbind_channel,
 		.max_entries = nvgpu_gr_fecs_trace_max_entries,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 702078bee..98a909173 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -42,6 +42,7 @@ struct nvgpu_nvhost_dev;
 struct nvgpu_netlist_vars;
 struct nvgpu_gr_global_ctx_buffer_desc;
 struct nvgpu_gr_fecs_trace;
+struct nvgpu_gpu_ctxsw_trace_entry;
 struct nvgpu_cpu_time_correlation_sample;
 struct nvgpu_mem_sgt;
 struct nvgpu_warpstate;
@@ -1116,6 +1117,10 @@ struct gpu_ops {
 		int (*get_read_index)(struct gk20a *g);
 		int (*get_write_index)(struct gk20a *g);
 		int (*set_read_index)(struct gk20a *g, int index);
+		void (*vm_dev_write)(struct gk20a *g, u8 vmid,
+			u32 *vm_update_mask,
+			struct nvgpu_gpu_ctxsw_trace_entry *entry);
+		void (*vm_dev_update)(struct gk20a *g, u32 vm_update_mask);
 	} fecs_trace;
 #endif
 	struct {
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h b/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h
index 1e2099976..b6a103197 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h
@@ -127,4 +127,9 @@ int nvgpu_gr_fecs_trace_disable(struct gk20a *g);
 bool nvgpu_gr_fecs_trace_is_enabled(struct gk20a *g);
 void nvgpu_gr_fecs_trace_reset_buffer(struct gk20a *g);
 
+int nvgpu_gr_fecs_trace_ring_read(struct gk20a *g, int index,
+	u32 *vm_update_mask);
+int nvgpu_gr_fecs_trace_poll(struct gk20a *g);
+int nvgpu_gr_fecs_trace_reset(struct gk20a *g);
+
 #endif /* NVGPU_GR_FECS_TRACE_H */
diff --git a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c
index 2da58bb0a..6a3ef51ed 100644
--- a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c
+++ b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c
@@ -392,10 +392,11 @@ int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
 
 	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
 
-	g->ops.fecs_trace.disable(g);
-
 	nvgpu_mutex_acquire(&dev->write_lock);
-	dev->write_enabled = false;
+	if (dev->write_enabled) {
+		dev->write_enabled = false;
+		g->ops.fecs_trace.disable(g);
+	}
 	nvgpu_mutex_release(&dev->write_lock);
 
 	if (dev->hdr) {
diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c
index af59bc1d7..1064c4970 100644
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -868,9 +868,9 @@ static const struct gpu_ops tu104_ops = {
 		.enable = nvgpu_gr_fecs_trace_enable,
 		.disable = nvgpu_gr_fecs_trace_disable,
 		.is_enabled = nvgpu_gr_fecs_trace_is_enabled,
-		.reset = gk20a_fecs_trace_reset,
+		.reset = nvgpu_gr_fecs_trace_reset,
 		.flush = NULL,
-		.poll = gk20a_fecs_trace_poll,
+		.poll = nvgpu_gr_fecs_trace_poll,
 		.bind_channel = gk20a_fecs_trace_bind_channel,
 		.unbind_channel = gk20a_fecs_trace_unbind_channel,
 		.max_entries = nvgpu_gr_fecs_trace_max_entries,