diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml
index 3b412594e..240d2ede8 100644
--- a/arch/nvgpu-common.yaml
+++ b/arch/nvgpu-common.yaml
@@ -554,6 +554,11 @@ gr:
       sources: [ common/gr/obj_ctx.c,
                  common/gr/obj_ctx_priv.h,
                  include/nvgpu/gr/obj_ctx.h ]
+    ctx_mappings:
+      safe: yes
+      sources: [ common/gr/ctx_mappings.c,
+                 common/gr/ctx_mappings_priv.h,
+                 include/nvgpu/gr/ctx_mappings.h ]
     subctx:
       safe: yes
       sources: [ common/gr/subctx.c,
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index a05c5076e..718f910f2 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -254,6 +254,7 @@ nvgpu-y += \
 	common/gr/gr_intr.o \
 	common/gr/global_ctx.o \
 	common/gr/ctx.o \
+	common/gr/ctx_mappings.o \
 	common/gr/gr_falcon.o \
 	common/gr/subctx.o \
 	common/gr/zcull.o \
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index 7e783b36f..59f7c4939 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -129,6 +129,7 @@ srcs +=	common/device.c \
 	common/gr/global_ctx.c \
 	common/gr/subctx.c \
 	common/gr/ctx.c \
+	common/gr/ctx_mappings.c \
 	common/gr/gr_falcon.c \
 	common/gr/gr_config.c \
 	common/gr/gr_setup.c \
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index df68830f9..40d217613 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -978,7 +978,7 @@ void nvgpu_tsg_release(struct nvgpu_ref *ref)
 	}
 
 	if ((tsg->gr_ctx != NULL) && (tsg->vm != NULL)) {
-		g->ops.gr.setup.free_gr_ctx(g, tsg->vm, tsg->gr_ctx);
+		g->ops.gr.setup.free_gr_ctx(g, tsg->gr_ctx);
 	}
 
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
diff --git a/drivers/gpu/nvgpu/common/gr/ctx.c b/drivers/gpu/nvgpu/common/gr/ctx.c
index 9466afa66..a25793f42 100644
--- a/drivers/gpu/nvgpu/common/gr/ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/ctx.c
@@ -24,6 +24,7 @@
 #include <nvgpu/static_analysis.h>
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/vm.h>
 #include <nvgpu/io.h>
 #include <nvgpu/gmmu.h>
@@ -33,11 +34,6 @@
 #include <nvgpu/power_features/pg.h>
 #include "common/gr/ctx_priv.h"
 
-static void nvgpu_gr_ctx_unmap_global_ctx_buffers(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct vm_gk20a *vm);
-
 struct nvgpu_gr_ctx_desc *
 nvgpu_gr_ctx_desc_alloc(struct gk20a *g)
 {
@@ -58,6 +54,13 @@ void nvgpu_gr_ctx_set_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc,
 	gr_ctx_desc->size[index] = size;
 }
 
+u32 nvgpu_gr_ctx_get_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc,
+	u32 index)
+{
+	nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
+	return gr_ctx_desc->size[index];
+}
+
 struct nvgpu_gr_ctx *nvgpu_alloc_gr_ctx_struct(struct gk20a *g)
 {
 	return nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_ctx));
@@ -68,390 +71,218 @@ void nvgpu_free_gr_ctx_struct(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 	nvgpu_kfree(g, gr_ctx);
 }
 
-int nvgpu_gr_ctx_alloc(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct vm_gk20a *vm)
+void nvgpu_gr_ctx_free_ctx_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx *ctx)
+{
+	u32 i;
+
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
+		if (nvgpu_mem_is_valid(&ctx->mem[i])) {
+			nvgpu_dma_free(g, &ctx->mem[i]);
+		}
+	}
+
+	nvgpu_log(g, gpu_dbg_gr, "done");
+}
+
+int nvgpu_gr_ctx_alloc_ctx_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx_desc *desc,
+	struct nvgpu_gr_ctx *ctx)
 {
 	int err = 0;
+	u32 i;
 
-	nvgpu_log_fn(g, " ");
+	nvgpu_log(g, gpu_dbg_gr, " ");
 
-	if (gr_ctx_desc->size[NVGPU_GR_CTX_CTX] == 0U) {
+	if (desc->size[NVGPU_GR_CTX_CTX] == 0U) {
+		nvgpu_err(g, "context buffer size not set");
 		return -EINVAL;
 	}
 
-	err = nvgpu_dma_alloc(g, gr_ctx_desc->size[NVGPU_GR_CTX_CTX],
-			&gr_ctx->mem);
-	if (err != 0) {
-		return err;
+	for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
+
+#ifdef CONFIG_NVGPU_GFXP
+		/**
+		 * Skip allocating the gfxp preemption buffers if GFXP mode is
+		 * not set in the gr ctx.
+		 */
+		if ((i >= NVGPU_GR_CTX_PREEMPT_CTXSW) &&
+		    (i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW) &&
+		    (nvgpu_gr_ctx_get_graphics_preemption_mode(ctx) !=
+		     NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP)) {
+			continue;
+		}
+#endif
+
+		if (desc->size[i] != 0U) {
+			nvgpu_assert(!nvgpu_mem_is_valid(&ctx->mem[i]));
+
+			err = nvgpu_dma_alloc_sys(g, desc->size[i],
+				&ctx->mem[i]);
+			if (err != 0) {
+				nvgpu_err(g, "ctx buffer %u alloc failed", i);
+				nvgpu_gr_ctx_free_ctx_buffers(g, ctx);
+				return err;
+			}
+		}
 	}
 
-	gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm,
-					&gr_ctx->mem,
-					0, /* not GPU-cacheable */
-					gk20a_mem_flag_none, true,
-					gr_ctx->mem.aperture);
-	if (gr_ctx->mem.gpu_va == 0ULL) {
-		err = -ENOMEM;
-		goto err_free_mem;
-	}
+	ctx->ctx_id_valid = false;
 
-	gr_ctx->ctx_id_valid = false;
-
-	return 0;
-
-err_free_mem:
-	nvgpu_dma_free(g, &gr_ctx->mem);
+	nvgpu_log(g, gpu_dbg_gr, "done");
 
 	return err;
 }
 
-void nvgpu_gr_ctx_free(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct vm_gk20a *vm)
+void nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g,
+	struct nvgpu_gr_ctx *ctx)
 {
-	nvgpu_log_fn(g, " ");
+	u32 i;
 
-	if (gr_ctx != NULL) {
-		nvgpu_gr_ctx_unmap_global_ctx_buffers(g, gr_ctx,
-			global_ctx_buffer, vm);
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	/**
+	 * Map all ctx buffers as cacheable except GR CTX and
+	 * PATCH CTX buffers.
+	 */
+	for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
+		ctx->mapping_flags[i] = NVGPU_VM_MAP_CACHEABLE;
+	}
+
+	ctx->mapping_flags[NVGPU_GR_CTX_CTX] = 0U;
+	ctx->mapping_flags[NVGPU_GR_CTX_PATCH_CTX] = 0U;
+
+	nvgpu_log(g, gpu_dbg_gr, "done");
+}
 
-#ifdef CONFIG_NVGPU_DEBUGGER
-		nvgpu_gr_ctx_free_pm_ctx(g, vm, gr_ctx);
-#endif
-		nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx);
 #ifdef CONFIG_NVGPU_GFXP
-		if (nvgpu_mem_is_valid(&gr_ctx->gfxp_rtvcb_ctxsw_buffer)) {
-			nvgpu_dma_unmap_free(vm,
-				&gr_ctx->gfxp_rtvcb_ctxsw_buffer);
-		}
-		nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
-		nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
-		nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
-		nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
-#endif
-
-		nvgpu_dma_unmap_free(vm, &gr_ctx->mem);
-		(void) memset(gr_ctx, 0, sizeof(*gr_ctx));
-	}
-}
-
-int nvgpu_gr_ctx_alloc_patch_ctx(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct vm_gk20a *vm)
+static void nvgpu_gr_ctx_free_ctx_preemption_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx *ctx)
 {
-	struct patch_desc *patch_ctx = &gr_ctx->patch_ctx;
-	int err = 0;
-
-	nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "patch_ctx size = %u",
-		gr_ctx_desc->size[NVGPU_GR_CTX_PATCH_CTX]);
-
-	err = nvgpu_dma_alloc_map_sys(vm, gr_ctx_desc->size[NVGPU_GR_CTX_PATCH_CTX],
-			&patch_ctx->mem);
-	if (err != 0) {
-		return err;
-	}
-
-	return 0;
-}
-
-void nvgpu_gr_ctx_free_patch_ctx(struct gk20a *g, struct vm_gk20a *vm,
-	struct nvgpu_gr_ctx *gr_ctx)
-{
-	struct patch_desc *patch_ctx = &gr_ctx->patch_ctx;
-
-	(void)g;
-
-	if (nvgpu_mem_is_valid(&patch_ctx->mem)) {
-		nvgpu_dma_unmap_free(vm, &patch_ctx->mem);
-		patch_ctx->data_count = 0;
-	}
-}
-
-static void nvgpu_gr_ctx_unmap_global_ctx_buffers(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct vm_gk20a *vm)
-{
-	u64 *g_bfr_va = &gr_ctx->global_ctx_buffer_va[0];
-	u32 *g_bfr_index = &gr_ctx->global_ctx_buffer_index[0];
 	u32 i;
 
 	nvgpu_log_fn(g, " ");
 
-	for (i = 0U; i < NVGPU_GR_GLOBAL_CTX_VA_COUNT; i++) {
-		if (g_bfr_va[i] != 0ULL) {
-			nvgpu_gr_global_ctx_buffer_unmap(global_ctx_buffer,
-				g_bfr_index[i], vm, g_bfr_va[i]);
+	for (i = NVGPU_GR_CTX_PREEMPT_CTXSW;
+			i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) {
+		if (nvgpu_mem_is_valid(&ctx->mem[i])) {
+			nvgpu_dma_free(g, &ctx->mem[i]);
 		}
 	}
 
-	(void) memset(g_bfr_va, 0, sizeof(gr_ctx->global_ctx_buffer_va));
-	(void) memset(g_bfr_index, 0, sizeof(gr_ctx->global_ctx_buffer_index));
+	nvgpu_log_fn(g, "done");
 }
 
-static int nvgpu_gr_ctx_map_ctx_circular_buffer(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct vm_gk20a *vm, bool vpr)
+int nvgpu_gr_ctx_alloc_ctx_preemption_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx_desc *desc,
+	struct nvgpu_gr_ctx *ctx)
 {
-	u64 *g_bfr_va;
-	u32 *g_bfr_index;
-	u64 gpu_va = 0ULL;
+	int err = 0;
+	u32 i;
 
-	(void)g;
-	(void)vpr;
+	nvgpu_log(g, gpu_dbg_gr, " ");
 
-	g_bfr_va = &gr_ctx->global_ctx_buffer_va[0];
-	g_bfr_index = &gr_ctx->global_ctx_buffer_index[0];
-
-#ifdef CONFIG_NVGPU_VPR
-	if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer,
-					NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR)) {
-		gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
-					NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR,
-					vm, NVGPU_VM_MAP_CACHEABLE, true);
-		g_bfr_index[NVGPU_GR_GLOBAL_CTX_CIRCULAR_VA] =
-					NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR;
-	} else {
-#endif
-		gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
-					NVGPU_GR_GLOBAL_CTX_CIRCULAR,
-					vm, NVGPU_VM_MAP_CACHEABLE, true);
-		g_bfr_index[NVGPU_GR_GLOBAL_CTX_CIRCULAR_VA] =
-					NVGPU_GR_GLOBAL_CTX_CIRCULAR;
-#ifdef CONFIG_NVGPU_VPR
-	}
-#endif
-	if (gpu_va == 0ULL) {
-		goto clean_up;
-	}
-	g_bfr_va[NVGPU_GR_GLOBAL_CTX_CIRCULAR_VA] = gpu_va;
-
-	return 0;
-
-clean_up:
-	return -ENOMEM;
-}
-
-static int nvgpu_gr_ctx_map_ctx_attribute_buffer(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct vm_gk20a *vm, bool vpr)
-{
-	u64 *g_bfr_va;
-	u32 *g_bfr_index;
-	u64 gpu_va = 0ULL;
-
-	(void)g;
-	(void)vpr;
-
-	g_bfr_va = &gr_ctx->global_ctx_buffer_va[0];
-	g_bfr_index = &gr_ctx->global_ctx_buffer_index[0];
-
-#ifdef CONFIG_NVGPU_VPR
-	if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer,
-					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR)) {
-		gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
-					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR,
-					vm, NVGPU_VM_MAP_CACHEABLE, false);
-		g_bfr_index[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VA] =
-					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR;
-	} else {
-#endif
-		gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
-					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE,
-					vm, NVGPU_VM_MAP_CACHEABLE, false);
-		g_bfr_index[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VA] =
-					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE;
-#ifdef CONFIG_NVGPU_VPR
-	}
-#endif
-	if (gpu_va == 0ULL) {
-		goto clean_up;
-	}
-	g_bfr_va[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VA] = gpu_va;
-
-	return 0;
-
-clean_up:
-	return -ENOMEM;
-}
-
-
-static int nvgpu_gr_ctx_map_ctx_pagepool_buffer(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct vm_gk20a *vm, bool vpr)
-{
-	u64 *g_bfr_va;
-	u32 *g_bfr_index;
-	u64 gpu_va = 0ULL;
-
-	(void)g;
-	(void)vpr;
-
-	g_bfr_va = &gr_ctx->global_ctx_buffer_va[0];
-	g_bfr_index = &gr_ctx->global_ctx_buffer_index[0];
-
-#ifdef CONFIG_NVGPU_VPR
-	if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer,
-					NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR)) {
-		gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
-					NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR,
-					vm, NVGPU_VM_MAP_CACHEABLE, true);
-		g_bfr_index[NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA] =
-					NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR;
-	} else {
-#endif
-		gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
-					NVGPU_GR_GLOBAL_CTX_PAGEPOOL,
-					vm, NVGPU_VM_MAP_CACHEABLE, true);
-		g_bfr_index[NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA] =
-					NVGPU_GR_GLOBAL_CTX_PAGEPOOL;
-#ifdef CONFIG_NVGPU_VPR
-	}
-#endif
-	if (gpu_va == 0ULL) {
-		goto clean_up;
-	}
-	g_bfr_va[NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA] = gpu_va;
-
-	return 0;
-
-clean_up:
-	return -ENOMEM;
-}
-
-static int nvgpu_gr_ctx_map_ctx_buffer(struct gk20a *g,
-	u32 buffer_type, u32 va_type,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct vm_gk20a *vm)
-{
-	u64 *g_bfr_va;
-	u32 *g_bfr_index;
-	u64 gpu_va = 0ULL;
-
-	(void)g;
-
-	g_bfr_va = &gr_ctx->global_ctx_buffer_va[0];
-	g_bfr_index = &gr_ctx->global_ctx_buffer_index[0];
-
-	gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
-			buffer_type, vm, 0, true);
-	if (gpu_va == 0ULL) {
-		goto clean_up;
-	}
-
-	g_bfr_index[va_type] = buffer_type;
-	g_bfr_va[va_type] = gpu_va;
-
-	return 0;
-
-clean_up:
-	return -ENOMEM;
-}
-
-int nvgpu_gr_ctx_map_global_ctx_buffers(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct vm_gk20a *vm, bool vpr)
-{
-	int err;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
-
-	/*
-	 * MIG supports only compute class.
-	 * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB
-	 * if 2D/3D/I2M classes(graphics) are supported.
+	/**
+	 * Skip allocating the gfxp preemption buffers if GFXP mode is
+	 * not set in the gr ctx.
 	 */
-	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
-		/* Circular Buffer */
-		err = nvgpu_gr_ctx_map_ctx_circular_buffer(g, gr_ctx,
-						global_ctx_buffer, vm, vpr);
-		if (err != 0) {
-			nvgpu_err(g, "cannot map ctx circular buffer");
-			goto fail;
-		}
+	if (nvgpu_gr_ctx_get_graphics_preemption_mode(ctx) !=
+	    NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
+		nvgpu_log(g, gpu_dbg_gr, "GFXP mode not set. Skip preemption "
+					 "buffers allocation");
+		return 0;
+	}
 
-		/* Attribute Buffer */
-		err = nvgpu_gr_ctx_map_ctx_attribute_buffer(g, gr_ctx,
-						global_ctx_buffer, vm, vpr);
-		if (err != 0) {
-			nvgpu_err(g, "cannot map ctx attribute buffer");
-			goto fail;
-		}
+	for (i = NVGPU_GR_CTX_PREEMPT_CTXSW;
+			i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) {
 
-		/* Page Pool */
-		err = nvgpu_gr_ctx_map_ctx_pagepool_buffer(g, gr_ctx,
-						global_ctx_buffer, vm, vpr);
-		if (err != 0) {
-			nvgpu_err(g, "cannot map ctx pagepool buffer");
-			goto fail;
-		}
-#ifdef CONFIG_NVGPU_GRAPHICS
-		/* RTV circular buffer */
-		if (nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer,
-				NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER)) {
-			err  = nvgpu_gr_ctx_map_ctx_buffer(g,
-					NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER,
-					NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER_VA,
-					gr_ctx, global_ctx_buffer, vm);
+		if (desc->size[i] != 0U && !nvgpu_mem_is_valid(&ctx->mem[i])) {
+			err = nvgpu_dma_alloc_sys(g, desc->size[i],
+				&ctx->mem[i]);
 			if (err != 0) {
-				nvgpu_err(g,
-					"cannot map ctx rtv circular buffer");
-				goto fail;
+				nvgpu_err(g, "ctx preemption buffer %u alloc failed", i);
+				nvgpu_gr_ctx_free_ctx_preemption_buffers(g, ctx);
+				return err;
 			}
 		}
-#endif
 	}
 
-	/* Priv register Access Map */
-	err  = nvgpu_gr_ctx_map_ctx_buffer(g,
-			NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP,
-			NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_VA,
-			gr_ctx, global_ctx_buffer, vm);
-	if (err != 0) {
-		nvgpu_err(g, "cannot map ctx priv access buffer");
-		goto fail;
-	}
+	nvgpu_log(g, gpu_dbg_gr, "done");
 
-#ifdef CONFIG_NVGPU_FECS_TRACE
-	/* FECS trace buffer */
-	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
-		err  = nvgpu_gr_ctx_map_ctx_buffer(g,
-			NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER,
-			NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER_VA,
-			gr_ctx, global_ctx_buffer, vm);
-		if (err != 0) {
-			nvgpu_err(g, "cannot map ctx fecs trace buffer");
-			goto fail;
-		}
-	}
-#endif
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
-	return 0;
-
-fail:
-	nvgpu_gr_ctx_unmap_global_ctx_buffers(g, gr_ctx, global_ctx_buffer, vm);
 	return err;
 }
+#endif
 
-u64 nvgpu_gr_ctx_get_global_ctx_va(struct nvgpu_gr_ctx *gr_ctx,
-	u32 index)
+void nvgpu_gr_ctx_free(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer)
 {
-	return gr_ctx->global_ctx_buffer_va[index];
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	if ((gr_ctx != NULL) && (gr_ctx->mappings != NULL)) {
+		nvgpu_gr_ctx_unmap_buffers(g,
+			gr_ctx, global_ctx_buffer, gr_ctx->mappings);
+
+		nvgpu_gr_ctx_free_mappings(g, gr_ctx);
+
+		nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0);
+
+		nvgpu_gr_ctx_free_ctx_buffers(g, gr_ctx);
+
+		(void) memset(gr_ctx, 0, sizeof(*gr_ctx));
+	}
+
+	nvgpu_log(g, gpu_dbg_gr, "done");
 }
 
-struct nvgpu_mem *nvgpu_gr_ctx_get_patch_ctx_mem(struct nvgpu_gr_ctx *gr_ctx)
+struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_alloc_or_get_mappings(struct gk20a *g,
+				struct nvgpu_tsg *tsg, struct vm_gk20a *vm)
 {
-	return &gr_ctx->patch_ctx.mem;
+	struct nvgpu_gr_ctx_mappings *mappings = NULL;
+	struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx;
+
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	mappings = gr_ctx->mappings;
+	if (mappings != NULL) {
+		return mappings;
+	}
+
+	mappings = nvgpu_gr_ctx_mappings_create(g, tsg, vm);
+	if (mappings == NULL) {
+		nvgpu_err(g, "failed to allocate gr_ctx mappings");
+		return mappings;
+	}
+
+	gr_ctx->mappings = mappings;
+
+	nvgpu_log(g, gpu_dbg_gr, "done");
+
+	return mappings;
+}
+
+void nvgpu_gr_ctx_free_mappings(struct gk20a *g,
+				struct nvgpu_gr_ctx *gr_ctx)
+{
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	if (gr_ctx->mappings == NULL) {
+		return;
+	}
+
+	nvgpu_gr_ctx_mappings_free(g, gr_ctx->mappings);
+	gr_ctx->mappings = NULL;
+
+	nvgpu_log(g, gpu_dbg_gr, "done");
+}
+
+struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_get_mappings(struct nvgpu_tsg *tsg)
+{
+	struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx;
+
+	return gr_ctx->mappings;
 }
 
 void nvgpu_gr_ctx_set_patch_ctx_data_count(struct nvgpu_gr_ctx *gr_ctx,
@@ -460,9 +291,17 @@ void nvgpu_gr_ctx_set_patch_ctx_data_count(struct nvgpu_gr_ctx *gr_ctx,
 	gr_ctx->patch_ctx.data_count = data_count;
 }
 
-struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx)
+struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx,
+	u32 index)
 {
-	return &gr_ctx->mem;
+	nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
+	return &gr_ctx->mem[index];
+}
+
+u32 nvgpu_gr_ctx_get_ctx_mapping_flags(struct nvgpu_gr_ctx *gr_ctx, u32 index)
+{
+	nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
+	return gr_ctx->mapping_flags[index];
 }
 
 #ifdef CONFIG_NVGPU_SM_DIVERSITY
@@ -481,6 +320,7 @@ u32 nvgpu_gr_ctx_get_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx)
 /* load saved fresh copy of gloden image into channel gr_ctx */
 void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_ctx_mappings *mappings,
 	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
 	bool cde)
 {
@@ -493,7 +333,7 @@ void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g,
 
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
 
-	mem = &gr_ctx->mem;
+	mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
 
 	nvgpu_gr_global_ctx_load_local_golden_image(g,
 		local_golden_image, mem);
@@ -513,7 +353,7 @@ void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g,
 	g->ops.gr.ctxsw_prog.set_priv_access_map_config_mode(g, mem,
 		g->allow_all);
 	g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, mem,
-		nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+		nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 			NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_VA));
 #endif
 
@@ -535,7 +375,8 @@ void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g,
 	g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
 		gr_ctx->patch_ctx.data_count);
 	g->ops.gr.ctxsw_prog.set_patch_addr(g, mem,
-		gr_ctx->patch_ctx.mem.gpu_va);
+		nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
+			NVGPU_GR_CTX_PATCH_CTX));
 
 #ifdef CONFIG_NVGPU_DEBUGGER
 	/* PM ctxt switch is off by default */
@@ -561,10 +402,12 @@ void nvgpu_gr_ctx_patch_write_begin(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
 	bool update_patch_count)
 {
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
+
 	if (update_patch_count) {
 		/* reset patch count if ucode has already processed it */
 		gr_ctx->patch_ctx.data_count =
-			g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem);
+			g->ops.gr.ctxsw_prog.get_patch_count(g, mem);
 		nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
 					gr_ctx->patch_ctx.data_count);
 	}
@@ -574,9 +417,11 @@ void nvgpu_gr_ctx_patch_write_end(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
 	bool update_patch_count)
 {
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
+
 	/* Write context count to context image if it is mapped */
 	if (update_patch_count) {
-		g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem,
+		g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
 			     gr_ctx->patch_ctx.data_count);
 		nvgpu_log(g, gpu_dbg_info, "write patch count %d",
 			gr_ctx->patch_ctx.data_count);
@@ -590,6 +435,7 @@ void nvgpu_gr_ctx_patch_write(struct gk20a *g,
 	if (patch) {
 		u32 patch_slot;
 		u64 patch_slot_max;
+		struct nvgpu_mem *patch_ctx_mem;
 
 		if (gr_ctx == NULL) {
 			nvgpu_err(g,
@@ -597,13 +443,15 @@ void nvgpu_gr_ctx_patch_write(struct gk20a *g,
 			return;
 		}
 
+		patch_ctx_mem = &gr_ctx->mem[NVGPU_GR_CTX_PATCH_CTX];
+
 		patch_slot =
 			nvgpu_safe_mult_u32(gr_ctx->patch_ctx.data_count,
 					PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY);
 		patch_slot_max =
 			nvgpu_safe_sub_u64(
 				PATCH_CTX_ENTRIES_FROM_SIZE(
-					gr_ctx->patch_ctx.mem.size),
+					patch_ctx_mem->size),
 					PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY);
 
 		if (patch_slot > patch_slot_max) {
@@ -612,10 +460,8 @@ void nvgpu_gr_ctx_patch_write(struct gk20a *g,
 			return;
 		}
 
-		nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem,
-						(u64)patch_slot, addr);
-		nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem,
-						(u64)patch_slot + 1ULL, data);
+		nvgpu_mem_wr32(g, patch_ctx_mem, (u64)patch_slot, addr);
+		nvgpu_mem_wr32(g, patch_ctx_mem, (u64)patch_slot + 1ULL, data);
 		gr_ctx->patch_ctx.data_count = nvgpu_safe_add_u32(
 						gr_ctx->patch_ctx.data_count, 1U);
 		nvgpu_log(g, gpu_dbg_info,
@@ -688,25 +534,23 @@ bool nvgpu_gr_ctx_check_valid_preemption_mode(struct gk20a *g,
 void nvgpu_gr_ctx_set_preemption_modes(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx)
 {
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
+
 #ifdef CONFIG_NVGPU_GFXP
 	if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
-		g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g,
-			&gr_ctx->mem);
+		g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g, mem);
 	}
 #endif
 
 #ifdef CONFIG_NVGPU_CILP
 	if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
-		g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g,
-			&gr_ctx->mem);
+		g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g, mem);
 	}
 #endif
 
 	if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
-		g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g,
-			&gr_ctx->mem);
+		g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, mem);
 	}
-
 }
 
 void nvgpu_gr_ctx_set_tsgid(struct nvgpu_gr_ctx *gr_ctx, u32 tsgid)
@@ -749,10 +593,12 @@ u64 nvgpu_gr_ctx_get_zcull_ctx_va(struct nvgpu_gr_ctx *gr_ctx)
 
 int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 {
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
+
 	nvgpu_log(g, gpu_dbg_gr, " ");
 
-	g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, &gr_ctx->mem);
-	g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &gr_ctx->mem, 0);
+	g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, mem);
+	g->ops.gr.ctxsw_prog.set_zcull_ptr(g, mem, 0);
 
 	return 0;
 }
@@ -760,6 +606,8 @@ int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	bool set_zcull_ptr)
 {
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
+
 	nvgpu_log_fn(g, " ");
 
 	if (gr_ctx->zcull_ctx.gpu_va == 0ULL &&
@@ -768,11 +616,10 @@ int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 		return -EINVAL;
 	}
 
-	g->ops.gr.ctxsw_prog.set_zcull(g, &gr_ctx->mem,
-		gr_ctx->zcull_ctx.ctx_sw_mode);
+	g->ops.gr.ctxsw_prog.set_zcull(g, mem, gr_ctx->zcull_ctx.ctx_sw_mode);
 
 	if (set_zcull_ptr) {
-		g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &gr_ctx->mem,
+		g->ops.gr.ctxsw_prog.set_zcull_ptr(g, mem,
 			gr_ctx->zcull_ctx.gpu_va);
 	}
 
@@ -782,14 +629,19 @@ int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 
 #ifdef CONFIG_NVGPU_GFXP
 void nvgpu_gr_ctx_set_preemption_buffer_va(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx)
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_ctx_mappings *mappings)
 {
-	g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, &gr_ctx->mem,
-		gr_ctx->preempt_ctxsw_buffer.gpu_va);
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
+	u64 preempt_ctxsw_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
+						NVGPU_GR_CTX_PREEMPT_CTXSW);
+
+	g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, mem,
+				preempt_ctxsw_gpu_va);
 
 	if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) {
 		g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g,
-			&gr_ctx->mem, gr_ctx->preempt_ctxsw_buffer.gpu_va);
+			mem, preempt_ctxsw_gpu_va);
 	}
 }
 
@@ -797,154 +649,6 @@ bool nvgpu_gr_ctx_desc_force_preemption_gfxp(struct nvgpu_gr_ctx_desc *gr_ctx_de
 {
 	return gr_ctx_desc->force_preemption_gfxp;
 }
-
-static int nvgpu_gr_ctx_alloc_ctxsw_buffer(struct vm_gk20a *vm, size_t size,
-	struct nvgpu_mem *mem)
-{
-	int err;
-
-	err = nvgpu_dma_alloc_sys(vm->mm->g, size, mem);
-	if (err != 0) {
-		return err;
-	}
-
-	mem->gpu_va = nvgpu_gmmu_map_partial(vm,
-				mem,
-				mem->aligned_size,
-				NVGPU_VM_MAP_CACHEABLE,
-				gk20a_mem_flag_none,
-				false,
-				mem->aperture);
-	if (mem->gpu_va == 0ULL) {
-		nvgpu_dma_free(vm->mm->g, mem);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static int nvgpu_gr_ctx_alloc_preemption_buffers(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct vm_gk20a *vm)
-{
-	int err = 0;
-
-	err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm,
-			gr_ctx_desc->size[NVGPU_GR_CTX_PREEMPT_CTXSW],
-			&gr_ctx->preempt_ctxsw_buffer);
-	if (err != 0) {
-		nvgpu_err(g, "cannot allocate preempt buffer");
-		goto fail;
-	}
-
-	err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm,
-			gr_ctx_desc->size[NVGPU_GR_CTX_SPILL_CTXSW],
-			&gr_ctx->spill_ctxsw_buffer);
-	if (err != 0) {
-		nvgpu_err(g, "cannot allocate spill buffer");
-		goto fail_free_preempt;
-	}
-
-	err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm,
-			gr_ctx_desc->size[NVGPU_GR_CTX_BETACB_CTXSW],
-			&gr_ctx->betacb_ctxsw_buffer);
-	if (err != 0) {
-		nvgpu_err(g, "cannot allocate beta buffer");
-		goto fail_free_spill;
-	}
-
-	if (gr_ctx_desc->size[NVGPU_GR_CTX_GFXP_RTVCB_CTXSW] != 0U) {
-		err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm,
-			gr_ctx_desc->size[NVGPU_GR_CTX_GFXP_RTVCB_CTXSW],
-			&gr_ctx->gfxp_rtvcb_ctxsw_buffer);
-		if (err != 0) {
-			nvgpu_err(g, "cannot allocate gfxp rtvcb");
-			goto fail_free_betacb;
-		}
-	}
-	return 0;
-
-fail_free_betacb:
-	nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
-fail_free_spill:
-	nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
-fail_free_preempt:
-	nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
-fail:
-	return err;
-}
-
-int nvgpu_gr_ctx_alloc_ctxsw_buffers(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct vm_gk20a *vm)
-{
-	int err = 0;
-
-	/* nothing to do if already initialized */
-	if (nvgpu_mem_is_valid(&gr_ctx->preempt_ctxsw_buffer)) {
-		return 0;
-	}
-
-	if (gr_ctx_desc->size[NVGPU_GR_CTX_PREEMPT_CTXSW] == 0U ||
-	    gr_ctx_desc->size[NVGPU_GR_CTX_SPILL_CTXSW] == 0U ||
-	    gr_ctx_desc->size[NVGPU_GR_CTX_BETACB_CTXSW] == 0U ||
-	    gr_ctx_desc->size[NVGPU_GR_CTX_PAGEPOOL_CTXSW] == 0U) {
-		return -EINVAL;
-	}
-
-	err = nvgpu_gr_ctx_alloc_preemption_buffers(g, gr_ctx,
-		gr_ctx_desc, vm);
-
-	if (err != 0) {
-		nvgpu_err(g, "cannot allocate preemption buffers");
-		goto fail;
-	}
-
-	err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm,
-			gr_ctx_desc->size[NVGPU_GR_CTX_PAGEPOOL_CTXSW],
-			&gr_ctx->pagepool_ctxsw_buffer);
-	if (err != 0) {
-		nvgpu_err(g, "cannot allocate page pool");
-		goto fail;
-	}
-
-	return 0;
-
-fail:
-	return err;
-}
-
-struct nvgpu_mem *nvgpu_gr_ctx_get_preempt_ctxsw_buffer(
-	struct nvgpu_gr_ctx *gr_ctx)
-{
-	return &gr_ctx->preempt_ctxsw_buffer;
-}
-
-struct nvgpu_mem *nvgpu_gr_ctx_get_spill_ctxsw_buffer(
-	struct nvgpu_gr_ctx *gr_ctx)
-{
-	return &gr_ctx->spill_ctxsw_buffer;
-}
-
-struct nvgpu_mem *nvgpu_gr_ctx_get_betacb_ctxsw_buffer(
-	struct nvgpu_gr_ctx *gr_ctx)
-{
-	return &gr_ctx->betacb_ctxsw_buffer;
-}
-
-struct nvgpu_mem *nvgpu_gr_ctx_get_pagepool_ctxsw_buffer(
-	struct nvgpu_gr_ctx *gr_ctx)
-{
-	return &gr_ctx->pagepool_ctxsw_buffer;
-}
-
-struct nvgpu_mem *nvgpu_gr_ctx_get_gfxp_rtvcb_ctxsw_buffer(
-	struct nvgpu_gr_ctx *gr_ctx)
-{
-	return &gr_ctx->gfxp_rtvcb_ctxsw_buffer;
-}
 #endif /* CONFIG_NVGPU_GFXP */
 
 #ifdef CONFIG_NVGPU_CILP
@@ -969,9 +673,10 @@ void nvgpu_gr_ctx_set_cilp_preempt_pending(struct nvgpu_gr_ctx *gr_ctx,
 void nvgpu_gr_ctx_reset_patch_count(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx)
 {
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
 	u32 tmp;
 
-	tmp = g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem);
+	tmp = g->ops.gr.ctxsw_prog.get_patch_count(g, mem);
 	if (tmp == 0U) {
 		gr_ctx->patch_ctx.data_count = 0;
 	}
@@ -979,63 +684,82 @@ void nvgpu_gr_ctx_reset_patch_count(struct gk20a *g,
 
 void nvgpu_gr_ctx_set_patch_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 {
-	g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem,
+	struct nvgpu_gr_ctx_mappings *mappings = gr_ctx->mappings;
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
+
+	g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
 		gr_ctx->patch_ctx.data_count);
 
-	g->ops.gr.ctxsw_prog.set_patch_addr(g, &gr_ctx->mem,
-		gr_ctx->patch_ctx.mem.gpu_va);
+	g->ops.gr.ctxsw_prog.set_patch_addr(g, mem,
+		nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
+					NVGPU_GR_CTX_PATCH_CTX));
 }
 
-int nvgpu_gr_ctx_alloc_pm_ctx(struct gk20a *g,
+static int nvgpu_gr_ctx_alloc_pm_ctx(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct vm_gk20a *vm)
+	struct nvgpu_gr_ctx_desc *gr_ctx_desc)
 {
-	struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx;
 	int err;
 
-	if (pm_ctx->mem.gpu_va != 0ULL) {
-		return 0;
-	}
-
 	err = nvgpu_dma_alloc_sys(g, gr_ctx_desc->size[NVGPU_GR_CTX_PM_CTX],
-			&pm_ctx->mem);
+			&gr_ctx->mem[NVGPU_GR_CTX_PM_CTX]);
 	if (err != 0) {
 		nvgpu_err(g,
 			"failed to allocate pm ctx buffer");
 		return err;
 	}
 
-	pm_ctx->mem.gpu_va = nvgpu_gmmu_map(vm,
-					&pm_ctx->mem,
-					NVGPU_VM_MAP_CACHEABLE,
-					gk20a_mem_flag_none, true,
-					pm_ctx->mem.aperture);
-	if (pm_ctx->mem.gpu_va == 0ULL) {
-		nvgpu_err(g,
-			"failed to map pm ctxt buffer");
-		nvgpu_dma_free(g, &pm_ctx->mem);
-		return -ENOMEM;
-	}
-
 	return 0;
 }
 
-void nvgpu_gr_ctx_free_pm_ctx(struct gk20a *g, struct vm_gk20a *vm,
-	struct nvgpu_gr_ctx *gr_ctx)
+static void nvgpu_gr_ctx_free_pm_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 {
-	struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx;
-
-	if (pm_ctx->mem.gpu_va != 0ULL) {
-		nvgpu_dma_unmap_free(vm, &pm_ctx->mem);
+	if (nvgpu_mem_is_valid(&gr_ctx->mem[NVGPU_GR_CTX_PM_CTX])) {
+		nvgpu_dma_free(g, &gr_ctx->mem[NVGPU_GR_CTX_PM_CTX]);
 	}
 
 	(void)g;
 }
 
-struct nvgpu_mem *nvgpu_gr_ctx_get_pm_ctx_mem(struct nvgpu_gr_ctx *gr_ctx)
+int nvgpu_gr_ctx_alloc_map_pm_ctx(struct gk20a *g,
+	struct nvgpu_tsg *tsg,
+	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
+	struct nvgpu_gr_hwpm_map *hwpm_map)
 {
-	return &gr_ctx->pm_ctx.mem;
+	struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx;
+	struct nvgpu_gr_ctx_mappings *mappings;
+	int ret;
+
+	if (gr_ctx->pm_ctx.mapped) {
+		return 0;
+	}
+
+	mappings = nvgpu_gr_ctx_get_mappings(tsg);
+	if (mappings == NULL) {
+		nvgpu_err(g, "gr_ctx mappings struct not allocated");
+		return -ENOMEM;
+	}
+
+	nvgpu_gr_ctx_set_size(gr_ctx_desc,
+		NVGPU_GR_CTX_PM_CTX,
+		nvgpu_gr_hwpm_map_get_size(hwpm_map));
+
+	ret = nvgpu_gr_ctx_alloc_pm_ctx(g, gr_ctx, gr_ctx_desc);
+	if (ret != 0) {
+		nvgpu_err(g,
+			"failed to allocate pm ctxt buffer");
+		return ret;
+	}
+
+	ret = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, gr_ctx,
+			NVGPU_GR_CTX_PM_CTX, mappings);
+	if (ret != 0) {
+		nvgpu_err(g, "gr_ctx pm_ctx buffer map failed %d", ret);
+		nvgpu_gr_ctx_free_pm_ctx(g, gr_ctx);
+		return ret;
+	}
+
+	return 0;
 }
 
 void nvgpu_gr_ctx_set_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx, u32 pm_mode)
@@ -1050,9 +774,11 @@ u32 nvgpu_gr_ctx_get_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx)
 
 u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 {
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
+
 	if (!gr_ctx->ctx_id_valid) {
 		gr_ctx->ctx_id = g->ops.gr.ctxsw_prog.get_main_image_ctx_id(g,
-					&gr_ctx->mem);
+					mem);
 		gr_ctx->ctx_id_valid = true;
 	}
 
@@ -1089,25 +815,30 @@ bool nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close(
 int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	bool enable)
 {
-	if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
+
+	if (!nvgpu_mem_is_valid(mem)) {
 		nvgpu_err(g, "no graphics context allocated");
 		return -EFAULT;
 	}
 
-	g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, &gr_ctx->mem, enable);
+	g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, mem, enable);
 
 	return 0;
 }
 
-int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
-	u32 mode, bool *skip_update)
+int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	u32 mode, u64 *pm_ctx_gpu_va, bool *skip_update)
 {
+	struct nvgpu_gr_ctx_mappings *mappings = gr_ctx->mappings;
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
 	struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx;
 	int ret = 0;
 
 	*skip_update = false;
 
-	if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
+	if (!nvgpu_mem_is_valid(mem)) {
 		nvgpu_err(g, "no graphics context allocated");
 		return -EFAULT;
 	}
@@ -1127,7 +858,8 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 			return 0;
 		}
 		pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw();
-		pm_ctx->gpu_va = pm_ctx->mem.gpu_va;
+		*pm_ctx_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
+					NVGPU_GR_CTX_PM_CTX);
 		break;
 	case  NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW:
 		if (pm_ctx->pm_mode ==
@@ -1137,7 +869,7 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 		}
 		pm_ctx->pm_mode =
 			g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw();
-		pm_ctx->gpu_va = 0;
+		*pm_ctx_gpu_va = 0;
 		break;
 	case NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
 		if (pm_ctx->pm_mode ==
@@ -1147,7 +879,8 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 		}
 		pm_ctx->pm_mode =
 			g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw();
-		pm_ctx->gpu_va = pm_ctx->mem.gpu_va;
+		*pm_ctx_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
+					NVGPU_GR_CTX_PM_CTX);
 		break;
 	default:
 		nvgpu_err(g, "invalid hwpm context switch mode");
@@ -1160,13 +893,21 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 
 void nvgpu_gr_ctx_set_hwpm_pm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 {
-	g->ops.gr.ctxsw_prog.set_pm_mode(g, &gr_ctx->mem,
-			gr_ctx->pm_ctx.pm_mode);
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
+
+	g->ops.gr.ctxsw_prog.set_pm_mode(g, mem, gr_ctx->pm_ctx.pm_mode);
 }
 
-void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
+void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
+			       u64 pm_ctx_gpu_va)
 {
-	g->ops.gr.ctxsw_prog.set_pm_ptr(g, &gr_ctx->mem,
-		gr_ctx->pm_ctx.gpu_va);
+	struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
+
+	g->ops.gr.ctxsw_prog.set_pm_ptr(g, mem, pm_ctx_gpu_va);
+}
+
+void nvgpu_gr_ctx_set_pm_ctx_mapped(struct nvgpu_gr_ctx *ctx, bool mapped)
+{
+	ctx->pm_ctx.mapped = mapped;
 }
 #endif /* CONFIG_NVGPU_DEBUGGER */
diff --git a/drivers/gpu/nvgpu/common/gr/ctx_mappings.c b/drivers/gpu/nvgpu/common/gr/ctx_mappings.c
new file mode 100644
index 000000000..541066a11
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/gr/ctx_mappings.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/static_analysis.h>
+#include <nvgpu/gr/global_ctx.h>
+#include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/ctx_mappings.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/io.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/string.h>
+
+#include <nvgpu/power_features/pg.h>
+#include "common/gr/ctx_mappings_priv.h"
+
+struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_mappings_create(struct gk20a *g,
+				struct nvgpu_tsg *tsg, struct vm_gk20a *vm)
+{
+	struct nvgpu_gr_ctx_mappings *mappings = NULL;
+
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	if (tsg == NULL || vm == NULL) {
+		return NULL;
+	}
+
+	mappings = (struct nvgpu_gr_ctx_mappings *)
+			nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_ctx_mappings));
+	if (mappings == NULL) {
+		nvgpu_err(g, "failed to alloc mappings");
+		return NULL;
+	}
+
+	nvgpu_vm_get(vm);
+	mappings->tsg = tsg;
+	mappings->vm = vm;
+
+	nvgpu_log(g, gpu_dbg_gr, "done");
+
+	return mappings;
+}
+
+void nvgpu_gr_ctx_mappings_free(struct gk20a *g,
+				struct nvgpu_gr_ctx_mappings *mappings)
+{
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	nvgpu_vm_put(mappings->vm);
+	nvgpu_kfree(g, mappings);
+
+	nvgpu_log(g, gpu_dbg_gr, "done");
+}
+
+int nvgpu_gr_ctx_mappings_map_ctx_buffer(struct gk20a *g,
+	struct nvgpu_gr_ctx *ctx, u32 index,
+	struct nvgpu_gr_ctx_mappings *mappings)
+{
+	struct vm_gk20a *vm = mappings->vm;
+	struct nvgpu_mem *mem;
+	u32 mapping_flags;
+	u64 gpu_va;
+
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	mem = nvgpu_gr_ctx_get_ctx_mem(ctx, index);
+	mapping_flags = nvgpu_gr_ctx_get_ctx_mapping_flags(ctx, index);
+
+	nvgpu_assert(mappings->ctx_buffer_va[index] == 0ULL);
+
+	if (nvgpu_mem_is_valid(mem)) {
+		gpu_va = nvgpu_gmmu_map(vm,
+				mem,
+				mapping_flags,
+				gk20a_mem_flag_none, true,
+				mem->aperture);
+		if (gpu_va == 0ULL) {
+			nvgpu_err(g, "failed to map ctx buffer %u", index);
+			return -ENOMEM;
+		}
+
+		mappings->ctx_buffer_va[index] = gpu_va;
+
+		nvgpu_log(g, gpu_dbg_gr, "buffer[%u] mapped at address 0x%llx", index, gpu_va);
+
+#ifdef CONFIG_NVGPU_DEBUGGER
+		if (index == NVGPU_GR_CTX_PM_CTX) {
+			nvgpu_gr_ctx_set_pm_ctx_mapped(ctx, true);
+		}
+#endif
+	} else {
+		nvgpu_log(g, gpu_dbg_gr, "buffer not allocated");
+	}
+
+	nvgpu_log(g, gpu_dbg_gr, "done");
+
+	return 0;
+}
+
+static void nvgpu_gr_ctx_mappings_unmap_ctx_buffer(struct nvgpu_gr_ctx *ctx,
+	u32 index, struct nvgpu_gr_ctx_mappings *mappings)
+{
+	struct vm_gk20a *vm = mappings->vm;
+	struct nvgpu_mem *mem;
+
+	mem = nvgpu_gr_ctx_get_ctx_mem(ctx, index);
+
+	if (nvgpu_mem_is_valid(mem) &&
+	    (mappings->ctx_buffer_va[index] != 0ULL)) {
+		nvgpu_gmmu_unmap_addr(vm, mem, mappings->ctx_buffer_va[index]);
+		mappings->ctx_buffer_va[index] = 0ULL;
+
+#ifdef CONFIG_NVGPU_DEBUGGER
+		if (index == NVGPU_GR_CTX_PM_CTX) {
+			nvgpu_gr_ctx_set_pm_ctx_mapped(ctx, false);
+		}
+#endif
+	}
+}
+
+static void nvgpu_gr_ctx_mappings_unmap_ctx_buffers(struct nvgpu_gr_ctx *ctx,
+	struct nvgpu_gr_ctx_mappings *mappings)
+{
+	u32 i;
+
+	for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
+		nvgpu_gr_ctx_mappings_unmap_ctx_buffer(ctx, i, mappings);
+	}
+}
+
+static int nvgpu_gr_ctx_mappings_map_ctx_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx *ctx,
+	struct nvgpu_gr_ctx_mappings *mappings)
+{
+	int err = 0;
+	u32 i;
+
+	for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
+		err = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, ctx, i, mappings);
+		if (err != 0) {
+			nvgpu_err(g, "gr_ctx buffer %u map failed %d", i, err);
+			nvgpu_gr_ctx_mappings_unmap_ctx_buffers(ctx, mappings);
+			return err;
+		}
+	}
+
+	return err;
+}
+
+#ifdef CONFIG_NVGPU_GFXP
+static void nvgpu_gr_ctx_mappings_unmap_ctx_preemption_buffers(
+	struct nvgpu_gr_ctx *ctx,
+	struct nvgpu_gr_ctx_mappings *mappings)
+{
+	u32 i;
+
+	for (i = NVGPU_GR_CTX_PREEMPT_CTXSW;
+			i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) {
+		nvgpu_gr_ctx_mappings_unmap_ctx_buffer(ctx, i, mappings);
+	}
+}
+
+int nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx *ctx,
+	struct nvgpu_gr_ctx_mappings *mappings)
+{
+	int err = 0;
+	u32 i;
+
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	for (i = NVGPU_GR_CTX_PREEMPT_CTXSW;
+			i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) {
+		if (mappings->ctx_buffer_va[i] == 0ULL) {
+			err = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, ctx, i, mappings);
+			if (err != 0) {
+				nvgpu_err(g, "gr_ctx buffer %u map failed %d", i, err);
+				nvgpu_gr_ctx_mappings_unmap_ctx_preemption_buffers(ctx, mappings);
+				return err;
+			}
+		}
+	}
+
+	nvgpu_log(g, gpu_dbg_gr, "done");
+
+	return err;
+}
+#endif
+
+static int nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
+	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
+	u32 va_type, u32 buffer_type, u32 buffer_vpr_type,
+	bool vpr, struct nvgpu_gr_ctx_mappings *mappings)
+{
+	struct vm_gk20a *vm = mappings->vm;
+	u64 *g_bfr_va;
+	u32 *g_bfr_index;
+	u64 gpu_va = 0ULL;
+
+	(void)vpr;
+	(void)buffer_vpr_type;
+
+	g_bfr_va = &mappings->global_ctx_buffer_va[0];
+	g_bfr_index = &mappings->global_ctx_buffer_index[0];
+
+#ifdef CONFIG_NVGPU_VPR
+	if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer,
+					buffer_vpr_type)) {
+		gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
+					buffer_vpr_type,
+					vm, true);
+		g_bfr_index[va_type] = buffer_vpr_type;
+	} else {
+#endif
+		gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
+					buffer_type,
+					vm, true);
+		g_bfr_index[va_type] = buffer_type;
+#ifdef CONFIG_NVGPU_VPR
+	}
+#endif
+	if (gpu_va == 0ULL) {
+		goto clean_up;
+	}
+
+	g_bfr_va[va_type] = gpu_va;
+
+	return 0;
+
+clean_up:
+	return -ENOMEM;
+}
+
+static void nvgpu_gr_ctx_mappings_unmap_global_ctx_buffers(
+	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
+	struct nvgpu_gr_ctx_mappings *mappings)
+{
+	u64 *g_bfr_va = &mappings->global_ctx_buffer_va[0];
+	u32 *g_bfr_index = &mappings->global_ctx_buffer_index[0];
+	struct vm_gk20a *vm = mappings->vm;
+	u32 i;
+
+	for (i = 0U; i < NVGPU_GR_GLOBAL_CTX_VA_COUNT; i++) {
+		if (g_bfr_va[i] != 0ULL) {
+			nvgpu_gr_global_ctx_buffer_unmap(global_ctx_buffer,
+				g_bfr_index[i], vm, g_bfr_va[i]);
+		}
+	}
+
+	(void) memset(g_bfr_va, 0, sizeof(mappings->global_ctx_buffer_va));
+	(void) memset(g_bfr_index, 0, sizeof(mappings->global_ctx_buffer_index));
+}
+
+static int nvgpu_gr_ctx_mappings_map_global_ctx_buffers(struct gk20a *g,
+	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
+	struct nvgpu_gr_ctx_mappings *mappings, bool vpr)
+{
+	int err;
+
+	/*
+	 * MIG supports only compute class.
+	 * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB
+	 * if 2D/3D/I2M classes(graphics) are supported.
+	 */
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		/* Circular Buffer */
+		err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
+					global_ctx_buffer,
+					NVGPU_GR_GLOBAL_CTX_CIRCULAR_VA,
+					NVGPU_GR_GLOBAL_CTX_CIRCULAR,
+#ifdef CONFIG_NVGPU_VPR
+					NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR,
+#else
+					NVGPU_GR_GLOBAL_CTX_CIRCULAR,
+#endif
+					vpr, mappings);
+		if (err != 0) {
+			nvgpu_err(g, "cannot map ctx circular buffer");
+			goto fail;
+		}
+
+		/* Attribute Buffer */
+		err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
+					global_ctx_buffer,
+					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VA,
+					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE,
+#ifdef CONFIG_NVGPU_VPR
+					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR,
+#else
+					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE,
+#endif
+					vpr, mappings);
+		if (err != 0) {
+			nvgpu_err(g, "cannot map ctx attribute buffer");
+			goto fail;
+		}
+
+		/* Page Pool */
+		err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
+					global_ctx_buffer,
+					NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA,
+					NVGPU_GR_GLOBAL_CTX_PAGEPOOL,
+#ifdef CONFIG_NVGPU_VPR
+					NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR,
+#else
+					NVGPU_GR_GLOBAL_CTX_PAGEPOOL,
+#endif
+					vpr, mappings);
+		if (err != 0) {
+			nvgpu_err(g, "cannot map ctx pagepool buffer");
+			goto fail;
+		}
+#ifdef CONFIG_NVGPU_GRAPHICS
+		/*
+		 * RTV circular buffer. Note that this is non-VPR buffer always.
+		 */
+		if (nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer,
+				NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER)) {
+			err  = nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
+					global_ctx_buffer,
+					NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER_VA,
+					NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER,
+					NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER,
+					false, mappings);
+			if (err != 0) {
+				nvgpu_err(g,
+					"cannot map ctx rtv circular buffer");
+				goto fail;
+			}
+		}
+#endif
+	}
+
+	/* Priv register Access Map. Note that this is non-VPR buffer always. */
+	err  = nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
+			global_ctx_buffer,
+			NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_VA,
+			NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP,
+			NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP,
+			false, mappings);
+	if (err != 0) {
+		nvgpu_err(g, "cannot map ctx priv access buffer");
+		goto fail;
+	}
+
+#ifdef CONFIG_NVGPU_FECS_TRACE
+	/* FECS trace buffer. Note that this is non-VPR buffer always. */
+	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
+		err  = nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
+			global_ctx_buffer,
+			NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER_VA,
+			NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER,
+			NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER,
+			false, mappings);
+		if (err != 0) {
+			nvgpu_err(g, "cannot map ctx fecs trace buffer");
+			goto fail;
+		}
+	}
+#endif
+
+	return 0;
+
+fail:
+	nvgpu_gr_ctx_mappings_unmap_global_ctx_buffers(
+		global_ctx_buffer, mappings);
+	return err;
+}
+
+int nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
+	struct nvgpu_gr_ctx_mappings *mappings,
+	bool vpr)
+{
+	int err;
+
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	if (gr_ctx == NULL || global_ctx_buffer == NULL ||
+	    mappings == NULL) {
+		nvgpu_err(g, "mappings/gr_ctx/global_ctx_buffer struct null");
+		return -EINVAL;
+	}
+
+	err = nvgpu_gr_ctx_mappings_map_ctx_buffers(g, gr_ctx, mappings);
+	if (err != 0) {
+		nvgpu_err(g, "fail to map ctx buffers");
+		return err;
+	}
+
+	err = nvgpu_gr_ctx_mappings_map_global_ctx_buffers(g,
+			global_ctx_buffer, mappings, vpr);
+	if (err != 0) {
+		nvgpu_err(g, "fail to map global ctx buffer");
+		nvgpu_gr_ctx_mappings_unmap_ctx_buffers(gr_ctx, mappings);
+		return err;
+	}
+
+	nvgpu_log(g, gpu_dbg_gr, "done");
+
+	return err;
+}
+
+void nvgpu_gr_ctx_unmap_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
+	struct nvgpu_gr_ctx_mappings *mappings)
+{
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	nvgpu_gr_ctx_mappings_unmap_global_ctx_buffers(global_ctx_buffer,
+		mappings);
+
+	nvgpu_gr_ctx_mappings_unmap_ctx_buffers(gr_ctx, mappings);
+
+	nvgpu_log(g, gpu_dbg_gr, "done");
+}
+
+u64 nvgpu_gr_ctx_mappings_get_global_ctx_va(struct nvgpu_gr_ctx_mappings *mappings,
+	u32 index)
+{
+	nvgpu_assert(index < NVGPU_GR_GLOBAL_CTX_VA_COUNT);
+	return mappings->global_ctx_buffer_va[index];
+}
+
+u64 nvgpu_gr_ctx_mappings_get_ctx_va(struct nvgpu_gr_ctx_mappings *mappings,
+	u32 index)
+{
+	nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
+	return mappings->ctx_buffer_va[index];
+}
diff --git a/drivers/gpu/nvgpu/common/gr/ctx_mappings_priv.h b/drivers/gpu/nvgpu/common/gr/ctx_mappings_priv.h
new file mode 100644
index 000000000..34b3e6722
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/gr/ctx_mappings_priv.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_GR_CTX_MAPPINGS_PRIV_H
+#define NVGPU_GR_CTX_MAPPINGS_PRIV_H
+
+#include <nvgpu/types.h>
+
+struct nvgpu_tsg;
+struct vm_gk20a;
+
+struct nvgpu_gr_ctx_mappings {
+
+	/** TSG whose gr ctx mappings are tracked in this object */
+	struct nvgpu_tsg *tsg;
+
+	/** GPU virtual address space to which gr ctx buffers are mapped */
+	struct vm_gk20a *vm;
+
+	/**
+	 * Array to store GPU virtual addresses of all TSG context
+	 * buffers.
+	 */
+	u64	ctx_buffer_va[NVGPU_GR_CTX_COUNT];
+
+	/**
+	 * Array to store GPU virtual addresses of all global context
+	 * buffers.
+	 */
+	u64	global_ctx_buffer_va[NVGPU_GR_GLOBAL_CTX_VA_COUNT];
+
+	/**
+	 * Array to store indexes of global context buffers
+	 * corresponding to GPU virtual addresses above.
+	 */
+	u32	global_ctx_buffer_index[NVGPU_GR_GLOBAL_CTX_VA_COUNT];
+};
+#endif /* NVGPU_GR_CTX_MAPPINGS_PRIV_H */
diff --git a/drivers/gpu/nvgpu/common/gr/ctx_priv.h b/drivers/gpu/nvgpu/common/gr/ctx_priv.h
index 3ecdecc29..a47228bfd 100644
--- a/drivers/gpu/nvgpu/common/gr/ctx_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/ctx_priv.h
@@ -31,11 +31,6 @@ struct nvgpu_mem;
  * Pointer to this structure is maintained in #nvgpu_gr_ctx structure.
  */
 struct patch_desc {
-	/**
-	 * Memory to hold patch context buffer.
-	 */
-	struct nvgpu_mem mem;
-
 	/**
 	 * Count of entries written into patch context buffer.
 	 */
@@ -51,9 +46,8 @@ struct zcull_ctx_desc {
 
 #ifdef CONFIG_NVGPU_DEBUGGER
 struct pm_ctx_desc {
-	struct nvgpu_mem mem;
-	u64 gpu_va;
 	u32 pm_mode;
+	bool mapped;
 };
 #endif
 
@@ -100,17 +94,21 @@ struct nvgpu_gr_ctx {
 	bool ctx_id_valid;
 
 	/**
-	 * Memory to hold graphics context buffer.
+	 * Array to store all GR context buffers.
 	 */
-	struct nvgpu_mem mem;
+	struct nvgpu_mem mem[NVGPU_GR_CTX_COUNT];
 
-#ifdef CONFIG_NVGPU_GFXP
-	struct nvgpu_mem preempt_ctxsw_buffer;
-	struct nvgpu_mem spill_ctxsw_buffer;
-	struct nvgpu_mem betacb_ctxsw_buffer;
-	struct nvgpu_mem pagepool_ctxsw_buffer;
-	struct nvgpu_mem gfxp_rtvcb_ctxsw_buffer;
-#endif
+	/**
+	 * Cacheability flags for mapping the context buffers.
+	 */
+	u32 mapping_flags[NVGPU_GR_CTX_COUNT];
+
+	/**
+	 * Pointer to structure that holds GPU mapping of context buffers.
+	 * These mappings will exist for the lifetime of TSG when the
+	 * subcontexts are not enabled.
+	 */
+	struct nvgpu_gr_ctx_mappings *mappings;
 
 	/**
 	 * Patch context buffer descriptor struct.
@@ -146,18 +144,6 @@ struct nvgpu_gr_ctx {
 	bool boosted_ctx;
 #endif
 
-	/**
-	 * Array to store GPU virtual addresses of all global context
-	 * buffers.
-	 */
-	u64	global_ctx_buffer_va[NVGPU_GR_GLOBAL_CTX_VA_COUNT];
-
-	/**
-	 * Array to store indexes of global context buffers
-	 * corresponding to GPU virtual addresses above.
-	 */
-	u32	global_ctx_buffer_index[NVGPU_GR_GLOBAL_CTX_VA_COUNT];
-
 	/**
 	 * TSG identifier corresponding to the graphics context.
 	 */
diff --git a/drivers/gpu/nvgpu/common/gr/fecs_trace.c b/drivers/gpu/nvgpu/common/gr/fecs_trace.c
index 2ce3614e8..0d65e2953 100644
--- a/drivers/gpu/nvgpu/common/gr/fecs_trace.c
+++ b/drivers/gpu/nvgpu/common/gr/fecs_trace.c
@@ -31,6 +31,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/fecs_trace.h>
 #include <nvgpu/gr/gr_utils.h>
@@ -607,7 +608,8 @@ int nvgpu_gr_fecs_trace_reset(struct gk20a *g)
  */
 int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g,
 	struct nvgpu_mem *inst_block, struct nvgpu_gr_subctx *subctx,
-	struct nvgpu_gr_ctx *gr_ctx, pid_t pid, u32 vmid)
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
+	pid_t pid, u32 vmid)
 {
 	u64 addr = 0ULL;
 	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
@@ -636,7 +638,7 @@ int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g,
 	}
 
 	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
-		addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+		addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 				NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER_VA);
 		nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
 		aperture_mask = 0;
@@ -650,7 +652,7 @@ int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g,
 		return -ENOMEM;
 	}
 
-	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
+	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX);
 
 	nvgpu_log(g, gpu_dbg_ctxsw, "addr=%llx count=%d", addr,
 		GK20A_FECS_TRACE_NUM_RECORDS);
diff --git a/drivers/gpu/nvgpu/common/gr/global_ctx.c b/drivers/gpu/nvgpu/common/gr/global_ctx.c
index cae66ed92..abb34946c 100644
--- a/drivers/gpu/nvgpu/common/gr/global_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/global_ctx.c
@@ -265,7 +265,6 @@ fail:
 	return err;
 }
 
-
 int nvgpu_gr_global_ctx_buffer_alloc(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *desc)
 {
@@ -315,9 +314,32 @@ clean_up:
 	return err;
 }
 
+void nvgpu_gr_global_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g,
+	struct nvgpu_gr_global_ctx_buffer_desc *desc)
+{
+	u32 i;
+
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	/**
+	 * Map all ctx buffers as cacheable except PRIV_ACCESS_MAP,
+	 * RTV_CIRCULAR_BUFFER and FECS_TRACE buffers.
+	 */
+	for (i = 0; i < NVGPU_GR_GLOBAL_CTX_COUNT; i++) {
+		desc[i].mapping_flags = NVGPU_VM_MAP_CACHEABLE;
+	}
+
+	desc[NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP].mapping_flags = 0U;
+	desc[NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER].mapping_flags = 0U;
+#ifdef CONFIG_NVGPU_FECS_TRACE
+	desc[NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER].mapping_flags = 0U;
+#endif
+
+	nvgpu_log(g, gpu_dbg_gr, "done");
+}
+
 u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc,
-	u32 index,
-	struct vm_gk20a *vm, u32 flags, bool priv)
+	u32 index, struct vm_gk20a *vm, bool priv)
 {
 	u64 gpu_va;
 
@@ -326,7 +348,7 @@ u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc,
 	}
 
 	gpu_va = nvgpu_gmmu_map(vm, &desc[index].mem,
-			flags, gk20a_mem_flag_none, priv,
+			desc[index].mapping_flags, gk20a_mem_flag_none, priv,
 			desc[index].mem.aperture);
 	return gpu_va;
 }
diff --git a/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h b/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h
index 06647c6f4..be9b275d3 100644
--- a/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -40,6 +40,11 @@ struct nvgpu_gr_global_ctx_buffer_desc {
 	 */
 	size_t size;
 
+	/**
+	 * Cacheability flags for mapping this context buffer.
+	 */
+	u32 mapping_flags;
+
 	/**
 	 * Function pointer to free global context buffer.
 	 */
diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c
index a76655019..756dd7091 100644
--- a/drivers/gpu/nvgpu/common/gr/gr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr.c
@@ -131,6 +131,9 @@ static int gr_alloc_global_ctx_buffers(struct gk20a *g, struct nvgpu_gr *gr)
 		return err;
 	}
 
+	nvgpu_gr_global_ctx_init_ctx_buffers_mapping_flags(g,
+		gr->global_ctx_buffer);
+
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
 	return 0;
 }
diff --git a/drivers/gpu/nvgpu/common/gr/gr_setup.c b/drivers/gpu/nvgpu/common/gr/gr_setup.c
index df790b4ff..8dd089626 100644
--- a/drivers/gpu/nvgpu/common/gr/gr_setup.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_setup.c
@@ -23,6 +23,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/obj_ctx.h>
 #ifdef CONFIG_NVGPU_GRAPHICS
@@ -163,6 +164,7 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 	struct nvgpu_tsg *tsg = NULL;
 	int err = 0;
 	struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
+	struct nvgpu_gr_ctx_mappings *mappings = NULL;
 
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr,
 		"GR%u: allocate object context for channel %u",
@@ -203,18 +205,27 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 
 	gr_ctx = tsg->gr_ctx;
 
-	if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx))) {
+	mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, c->vm);
+	if (mappings == NULL) {
+		nvgpu_err(g, "fail to allocate/get ctx mappings struct");
+		nvgpu_mutex_release(&tsg->ctx_init_lock);
+		goto out;
+	}
+
+	if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx,
+							 NVGPU_GR_CTX_CTX))) {
 		tsg->vm = c->vm;
 		nvgpu_vm_get(tsg->vm);
 
 		err = nvgpu_gr_obj_ctx_alloc(g, gr->golden_image,
 				gr->global_ctx_buffer, gr->gr_ctx_desc,
 				gr->config, gr_ctx, c->subctx,
-				tsg->vm, &c->inst_block, class_num, flags,
+				mappings, &c->inst_block, class_num, flags,
 				c->cde, c->vpr);
 		if (err != 0) {
 			nvgpu_err(g,
 				"failed to allocate gr ctx buffer");
+			nvgpu_gr_ctx_free_mappings(g, gr_ctx);
 			nvgpu_mutex_release(&tsg->ctx_init_lock);
 			nvgpu_vm_put(tsg->vm);
 			tsg->vm = NULL;
@@ -225,13 +236,13 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 	} else {
 		/* commit gr ctx buffer */
 		nvgpu_gr_obj_ctx_commit_inst(g, &c->inst_block, gr_ctx,
-			c->subctx, nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va);
+			c->subctx, mappings);
 	}
 
 #ifdef CONFIG_NVGPU_FECS_TRACE
 	if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
 		err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
-			c->subctx, gr_ctx, tsg->tgid, 0);
+			c->subctx, gr_ctx, mappings, tsg->tgid, 0);
 		if (err != 0) {
 			nvgpu_warn(g,
 				"fail to bind channel for ctxsw trace");
@@ -275,22 +286,27 @@ out:
 }
 
 void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g,
-		struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
+				struct nvgpu_gr_ctx *gr_ctx)
 {
+	struct nvgpu_mem *mem;
+
 	nvgpu_log_fn(g, " ");
 
-	if ((gr_ctx != NULL) &&
-		nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx))) {
+	if (gr_ctx != NULL) {
+		mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX);
+		if (!nvgpu_mem_is_valid(mem)) {
+			return;
+		}
+
 #ifdef CONFIG_DEBUG_FS
 		if ((g->ops.gr.ctxsw_prog.dump_ctxsw_stats != NULL) &&
 		     nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close(
 					g->gr->gr_ctx_desc)) {
-			g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g,
-				 nvgpu_gr_ctx_get_ctx_mem(gr_ctx));
+			g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g, mem);
 		}
 #endif
 
-		nvgpu_gr_ctx_free(g, gr_ctx, g->gr->global_ctx_buffer, vm);
+		nvgpu_gr_ctx_free(g, gr_ctx, g->gr->global_ctx_buffer);
 	}
 }
 
@@ -334,16 +350,14 @@ static bool nvgpu_gr_setup_validate_preemption_mode(u32 *graphics_preempt_mode,
 	return true;
 }
 
-
-
 int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
 		u32 graphics_preempt_mode, u32 compute_preempt_mode,
 		u32 gr_instance_id)
 {
+	struct nvgpu_gr_ctx_mappings *mappings;
 	struct nvgpu_gr_ctx *gr_ctx;
 	struct gk20a *g = ch->g;
 	struct nvgpu_tsg *tsg;
-	struct vm_gk20a *vm;
 	struct nvgpu_gr *gr;
 	u32 class_num;
 	int err = 0;
@@ -365,7 +379,6 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
 		return -EINVAL;
 	}
 
-	vm = tsg->vm;
 	gr_ctx = tsg->gr_ctx;
 
 	if (nvgpu_gr_setup_validate_preemption_mode(&graphics_preempt_mode,
@@ -379,13 +392,35 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
 			graphics_preempt_mode, compute_preempt_mode);
 
 	err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, gr->config,
-			gr->gr_ctx_desc, gr_ctx, vm, class_num,
+			gr->gr_ctx_desc, gr_ctx, class_num,
 			graphics_preempt_mode, compute_preempt_mode);
 	if (err != 0) {
 		nvgpu_err(g, "set_ctxsw_preemption_mode failed");
 		return err;
 	}
 
+	mappings = nvgpu_gr_ctx_get_mappings(tsg);
+	if (mappings == NULL) {
+		nvgpu_err(g, "failed to get gr_ctx mappings");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_NVGPU_GFXP
+	err = nvgpu_gr_ctx_alloc_ctx_preemption_buffers(g,
+			gr->gr_ctx_desc, gr_ctx);
+	if (err != 0) {
+		nvgpu_err(g, "fail to allocate ctx preemption buffers");
+		return err;
+	}
+
+	err = nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(g,
+			gr_ctx, mappings);
+	if (err != 0) {
+		nvgpu_err(g, "fail to map ctx preemption buffers");
+		return err;
+	}
+ #endif
+
 	g->ops.tsg.disable(tsg);
 
 	err = nvgpu_preempt_channel(g, ch);
@@ -395,7 +430,7 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
 	}
 
 	nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, gr->config, gr_ctx,
-		ch->subctx);
+		ch->subctx, mappings);
 
 	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
 		nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
diff --git a/drivers/gpu/nvgpu/common/gr/obj_ctx.c b/drivers/gpu/nvgpu/common/gr/obj_ctx.c
index b474c7799..273674952 100644
--- a/drivers/gpu/nvgpu/common/gr/obj_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c
@@ -29,6 +29,7 @@
 #include <nvgpu/power_features/pg.h>
 #endif
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/obj_ctx.h>
@@ -48,19 +49,22 @@ void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g,
 }
 
 void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block,
-	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, u64 gpu_va)
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx,
+	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	struct nvgpu_mem *ctxheader;
+	u64 gpu_va;
 
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
 
 	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
-		nvgpu_gr_subctx_load_ctx_header(g, subctx, gr_ctx, gpu_va);
+		nvgpu_gr_subctx_load_ctx_header(g, subctx, gr_ctx, mappings);
 
 		ctxheader = nvgpu_gr_subctx_get_ctx_header(subctx);
 		nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block,
 			ctxheader->gpu_va);
 	} else {
+		gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_CTX);
 		nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block, gpu_va);
 	}
 
@@ -70,7 +74,7 @@ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block,
 #if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP)
 static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g,
 	struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
+	struct nvgpu_gr_ctx *gr_ctx,
 	u32 class_num, u32 flags)
 {
 	int err;
@@ -122,7 +126,7 @@ static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g,
 	}
 
 	err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config,
-		gr_ctx_desc, gr_ctx, vm, class_num, graphics_preempt_mode,
+		gr_ctx_desc, gr_ctx, class_num, graphics_preempt_mode,
 		compute_preempt_mode);
 	if (err != 0) {
 		nvgpu_err(g, "set_ctxsw_preemption_mode failed");
@@ -138,14 +142,13 @@ static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g,
 #ifdef CONFIG_NVGPU_GRAPHICS
 static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g,
 	struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
+	struct nvgpu_gr_ctx *gr_ctx,
 	u32 graphics_preempt_mode)
 {
 	int err = 0;
 
 	(void)config;
 	(void)gr_ctx_desc;
-	(void)vm;
 
 	/* set preemption modes */
 	switch (graphics_preempt_mode) {
@@ -166,6 +169,13 @@ static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g,
 		nvgpu_log_info(g, "gfxp context attrib_cb_size=%d",
 				attrib_cb_size);
 
+		if ((nvgpu_gr_ctx_get_size(gr_ctx_desc,
+				NVGPU_GR_CTX_PREEMPT_CTXSW) == 0U) ||
+		    (spill_size == 0U) || (attrib_cb_size == 0U) ||
+		    (pagepool_size == 0U)) {
+			return -EINVAL;
+		}
+
 		nvgpu_gr_ctx_set_size(gr_ctx_desc,
 			NVGPU_GR_CTX_SPILL_CTXSW, spill_size);
 		nvgpu_gr_ctx_set_size(gr_ctx_desc,
@@ -179,13 +189,6 @@ static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g,
 				NVGPU_GR_CTX_GFXP_RTVCB_CTXSW, rtv_cb_size);
 		}
 
-		err = nvgpu_gr_ctx_alloc_ctxsw_buffers(g, gr_ctx,
-			gr_ctx_desc, vm);
-		if (err != 0) {
-			nvgpu_err(g, "cannot allocate ctxsw buffers");
-			return err;
-		}
-
 		nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx,
 			graphics_preempt_mode);
 		break;
@@ -226,7 +229,7 @@ static int nvgpu_gr_obj_ctx_set_compute_preemption_mode(struct gk20a *g,
 
 int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
 	struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class_num,
+	struct nvgpu_gr_ctx *gr_ctx, u32 class_num,
 	u32 graphics_preempt_mode, u32 compute_preempt_mode)
 {
 	int err = 0;
@@ -243,7 +246,7 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
 
 #ifdef CONFIG_NVGPU_GRAPHICS
 	err = nvgpu_gr_obj_ctx_set_graphics_preemption_mode(g, config,
-				gr_ctx_desc, gr_ctx, vm, graphics_preempt_mode);
+				gr_ctx_desc, gr_ctx, graphics_preempt_mode);
 
 	if (err != 0) {
 		goto fail;
@@ -259,7 +262,8 @@ fail:
 
 void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
 	struct nvgpu_gr_config *config,
-	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx)
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx,
+	struct nvgpu_gr_ctx_mappings *mappings)
 {
 #ifdef CONFIG_NVGPU_GFXP
 	u64 addr;
@@ -269,6 +273,7 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
 
 	(void)config;
 	(void)subctx;
+	(void)mappings;
 
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
 
@@ -280,35 +285,35 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
 	}
 
 	if (!nvgpu_mem_is_valid(
-			nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx))) {
+			nvgpu_gr_ctx_get_ctx_mem(gr_ctx,
+				NVGPU_GR_CTX_PREEMPT_CTXSW))) {
 		goto done;
 	}
 
 	if (subctx != NULL) {
-		nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx,
-			gr_ctx);
+		nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx, mappings);
 	} else {
-		nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx);
+		nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx, mappings);
 	}
 
 	nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
 
-	addr = nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->gpu_va;
-	g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx,
+	addr = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_BETACB_CTXSW);
+	g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, mappings,
 		nvgpu_gr_config_get_tpc_count(config),
 		nvgpu_gr_config_get_max_tpc_count(config), addr,
 		true);
 
-	mem = nvgpu_gr_ctx_get_pagepool_ctxsw_buffer(gr_ctx);
-	addr = mem->gpu_va;
+	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_PAGEPOOL_CTXSW);
+	addr = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PAGEPOOL_CTXSW);
 	nvgpu_assert(mem->size <= U32_MAX);
 	size = (u32)mem->size;
 
 	g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size,
 		true, false);
 
-	mem = nvgpu_gr_ctx_get_spill_ctxsw_buffer(gr_ctx);
-	addr = mem->gpu_va;
+	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_SPILL_CTXSW);
+	addr = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_SPILL_CTXSW);
 	nvgpu_assert(mem->size <= U32_MAX);
 	size = (u32)mem->size;
 
@@ -321,7 +326,7 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
 	}
 
 	if (g->ops.gr.init.commit_gfxp_rtv_cb != NULL) {
-		g->ops.gr.init.commit_gfxp_rtv_cb(g, gr_ctx, true);
+		g->ops.gr.init.commit_gfxp_rtv_cb(g, gr_ctx, mappings, true);
 	}
 
 	nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
@@ -333,7 +338,10 @@ done:
 
 void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct nvgpu_gr_config *config,	struct nvgpu_gr_ctx *gr_ctx, bool patch)
+	struct nvgpu_gr_config *config,
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_ctx_mappings *mappings,
+	bool patch)
 {
 	u64 addr;
 	u32 size;
@@ -351,7 +359,7 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 	 */
 	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
 		/* global pagepool buffer */
-		addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+		addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 			NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA);
 		size = nvgpu_safe_cast_u64_to_u32(nvgpu_gr_global_ctx_get_size(
 				global_ctx_buffer,
@@ -361,7 +369,7 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 			patch, true);
 
 		/* global bundle cb */
-		addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+		addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 			NVGPU_GR_GLOBAL_CTX_CIRCULAR_VA);
 		size = nvgpu_safe_cast_u64_to_u32(
 				g->ops.gr.init.get_bundle_cb_default_size(g));
@@ -370,10 +378,10 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 			patch);
 
 		/* global attrib cb */
-		addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+		addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 				NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VA);
 
-		g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx,
+		g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, mappings,
 			nvgpu_gr_config_get_tpc_count(config),
 			nvgpu_gr_config_get_max_tpc_count(config), addr, patch);
 
@@ -383,7 +391,7 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 #ifdef CONFIG_NVGPU_GRAPHICS
 		if (g->ops.gr.init.commit_rtv_cb != NULL) {
 			/* RTV circular buffer */
-			addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+			addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 				NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER_VA);
 
 			g->ops.gr.init.commit_rtv_cb(g, addr, gr_ctx, patch);
@@ -546,7 +554,8 @@ clean_up:
 
 static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx)
+	struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	int err = 0;
 	struct netlist_av_list *sw_method_init =
@@ -562,7 +571,7 @@ static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g,
 	g->ops.gr.init.fe_go_idle_timeout(g, false);
 
 	nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer,
-		config, gr_ctx, false);
+		config, gr_ctx, mappings, false);
 
 	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
 		/* override a few ctx state registers */
@@ -635,7 +644,7 @@ static int nvgpu_gr_obj_ctx_save_golden_ctx(struct gk20a *g,
 
 	nvgpu_log(g, gpu_dbg_gr, " ");
 
-	gr_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
+	gr_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX);
 	size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image);
 
 #ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
@@ -707,6 +716,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
 	struct nvgpu_gr_config *config,
 	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_ctx_mappings *mappings,
 	struct nvgpu_mem *inst_block)
 {
 	int err = 0;
@@ -731,7 +741,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
 	}
 
 	err = nvgpu_gr_obj_ctx_commit_hw_state(g, global_ctx_buffer,
-							config, gr_ctx);
+					       config, gr_ctx, mappings);
 	if (err != 0) {
 		goto clean_up;
 	}
@@ -778,27 +788,71 @@ clean_up:
 	return err;
 }
 
-static int nvgpu_gr_obj_ctx_gr_ctx_alloc(struct gk20a *g,
+static void nvgpu_gr_obj_ctx_gr_ctx_set_size(struct gk20a *g,
 	struct nvgpu_gr_obj_ctx_golden_image *golden_image,
-	struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_ctx *gr_ctx,
-	struct vm_gk20a *vm)
+	struct nvgpu_gr_ctx_desc *gr_ctx_desc)
 {
 	u64 size;
-	int err = 0;
-
-	nvgpu_log_fn(g, " ");
 
 	size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image);
 	nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_CTX,
 		nvgpu_safe_cast_u64_to_u32(size));
 
 	nvgpu_log(g, gpu_dbg_gr, "gr_ctx size = %llu", size);
-	err = nvgpu_gr_ctx_alloc(g, gr_ctx, gr_ctx_desc, vm);
+}
+
+static void nvgpu_gr_obj_ctx_patch_ctx_set_size(struct gk20a *g,
+	struct nvgpu_gr_config *config,
+	struct nvgpu_gr_ctx_desc *gr_ctx_desc)
+{
+	u32 size;
+
+	size = nvgpu_safe_mult_u32(
+			g->ops.gr.init.get_patch_slots(g, config),
+			PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY);
+	nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_PATCH_CTX, size);
+
+	nvgpu_log(g, gpu_dbg_gr, "patch_ctx size = %u", size);
+}
+
+static int nvgpu_gr_obj_ctx_alloc_buffers(struct gk20a *g,
+	struct nvgpu_gr_obj_ctx_golden_image *golden_image,
+	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
+	struct nvgpu_gr_config *config,
+	struct nvgpu_gr_ctx *gr_ctx,
+	u32 class_num, u32 flags)
+{
+	int err;
+
+	(void)class_num;
+	(void)flags;
+
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	nvgpu_gr_obj_ctx_gr_ctx_set_size(g, golden_image, gr_ctx_desc);
+
+	nvgpu_gr_obj_ctx_patch_ctx_set_size(g, config, gr_ctx_desc);
+
+	nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0);
+
+#if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP)
+	err = nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, config,
+		gr_ctx_desc, gr_ctx, class_num, flags);
 	if (err != 0) {
+		nvgpu_err(g, "fail to init preemption mode");
+		return err;
+	}
+#endif
+
+	err = nvgpu_gr_ctx_alloc_ctx_buffers(g, gr_ctx_desc, gr_ctx);
+	if (err != 0) {
+		nvgpu_err(g, "fail to allocate ctx buffers");
 		return err;
 	}
 
-	return 0;
+	nvgpu_log(g, gpu_dbg_gr, "done");
+
+	return err;
 }
 
 int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
@@ -808,69 +862,40 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
 	struct nvgpu_gr_config *config,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_subctx *subctx,
-	struct vm_gk20a *vm,
+	struct nvgpu_gr_ctx_mappings *mappings,
 	struct nvgpu_mem *inst_block,
 	u32 class_num, u32 flags,
 	bool cde, bool vpr)
 {
 	int err = 0;
 
-	(void)class_num;
-	(void)flags;
-
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
 
-	err = nvgpu_gr_obj_ctx_gr_ctx_alloc(g, golden_image, gr_ctx_desc,
-		gr_ctx, vm);
+	err = nvgpu_gr_obj_ctx_alloc_buffers(g, golden_image, gr_ctx_desc,
+					     config, gr_ctx, class_num, flags);
 	if (err != 0) {
-		nvgpu_err(g, "fail to allocate TSG gr ctx buffer");
+		nvgpu_err(g, "failed to alloc ctx buffers");
 		goto out;
 	}
 
-	/* allocate patch buffer */
-	if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx))) {
-		nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0);
+	nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(g, gr_ctx);
 
-		nvgpu_gr_ctx_set_size(gr_ctx_desc,
-			NVGPU_GR_CTX_PATCH_CTX,
-			nvgpu_safe_mult_u32(
-				g->ops.gr.init.get_patch_slots(g, config),
-				PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY));
-
-		err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, gr_ctx_desc, vm);
-		if (err != 0) {
-			nvgpu_err(g, "fail to allocate patch buffer");
-			goto out;
-		}
-	}
-
-#if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP)
-	err = nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, config,
-		gr_ctx_desc, gr_ctx, vm, class_num, flags);
+	err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx,
+			global_ctx_buffer, mappings, vpr);
 	if (err != 0) {
-		nvgpu_err(g, "fail to init preemption mode");
-		goto out;
-	}
-#endif
-
-	/* map global buffer to channel gpu_va and commit */
-	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx,
-			global_ctx_buffer, vm, vpr);
-	if (err != 0) {
-		nvgpu_err(g, "fail to map global ctx buffer");
+		nvgpu_err(g, "failed to map ctx buffers");
 		goto out;
 	}
 
 	nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer,
-			config, gr_ctx, true);
+			config, gr_ctx, mappings, true);
 
 	/* commit gr ctx buffer */
-	nvgpu_gr_obj_ctx_commit_inst(g, inst_block, gr_ctx, subctx,
-			nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va);
+	nvgpu_gr_obj_ctx_commit_inst(g, inst_block, gr_ctx, subctx, mappings);
 
 	/* init golden image */
 	err = nvgpu_gr_obj_ctx_alloc_golden_ctx_image(g, golden_image,
-		global_ctx_buffer, config, gr_ctx, inst_block);
+		global_ctx_buffer, config, gr_ctx, mappings, inst_block);
 	if (err != 0) {
 		nvgpu_err(g, "fail to init golden ctx image");
 		goto out;
@@ -890,11 +915,11 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
 #endif
 
 	/* load golden image */
-	nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx,
+	nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx, mappings,
 		golden_image->local_golden_image, cde);
 
 	nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, config, gr_ctx,
-		subctx);
+		subctx, mappings);
 
 #ifndef CONFIG_NVGPU_NON_FUSA
 	if (g->ops.gpu_class.is_valid_compute(class_num) &&
diff --git a/drivers/gpu/nvgpu/common/gr/subctx.c b/drivers/gpu/nvgpu/common/gr/subctx.c
index f08bb6036..6cc6d5773 100644
--- a/drivers/gpu/nvgpu/common/gr/subctx.c
+++ b/drivers/gpu/nvgpu/common/gr/subctx.c
@@ -23,6 +23,7 @@
 #include <nvgpu/gk20a.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/gmmu.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/power_features/pg.h>
@@ -81,23 +82,27 @@ void nvgpu_gr_subctx_free(struct gk20a *g,
 
 void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
 	struct nvgpu_gr_subctx *subctx,
-	struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va)
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	struct nvgpu_mem *ctxheader = &subctx->ctx_header;
+	u64 gpu_va;
+
+	gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_CTX);
 
 #ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP
 	/* set priv access map */
 	g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, ctxheader,
-		nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+		nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 			NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_VA));
 #endif
 
 	g->ops.gr.ctxsw_prog.set_patch_addr(g, ctxheader,
-		nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx)->gpu_va);
+		nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PATCH_CTX));
 
 #ifdef CONFIG_NVGPU_DEBUGGER
 	g->ops.gr.ctxsw_prog.set_pm_ptr(g, ctxheader,
-		nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va);
+		nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PM_CTX));
 #endif
 
 #ifdef CONFIG_NVGPU_GRAPHICS
@@ -129,24 +134,26 @@ void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx
 
 #ifdef CONFIG_NVGPU_GFXP
 void nvgpu_gr_subctx_set_preemption_buffer_va(struct gk20a *g,
-	struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx)
+	struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings)
 {
+	u64 preempt_ctxsw_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
+						NVGPU_GR_CTX_PREEMPT_CTXSW);
+
 	g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, &subctx->ctx_header,
-		nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va);
+				preempt_ctxsw_gpu_va);
 
 	if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) {
 		g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g,
-			&subctx->ctx_header,
-			nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va);
+			&subctx->ctx_header, preempt_ctxsw_gpu_va);
 	}
 }
 #endif /* CONFIG_NVGPU_GFXP */
 
 #ifdef CONFIG_NVGPU_DEBUGGER
 void nvgpu_gr_subctx_set_hwpm_ptr(struct gk20a *g,
-	struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx)
+	struct nvgpu_gr_subctx *subctx, u64 pm_ctx_gpu_va)
 {
 	g->ops.gr.ctxsw_prog.set_pm_ptr(g, &subctx->ctx_header,
-		nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va);
+			pm_ctx_gpu_va);
 }
 #endif
diff --git a/drivers/gpu/nvgpu/common/profiler/profiler.c b/drivers/gpu/nvgpu/common/profiler/profiler.c
index 8b277bae1..2396676eb 100644
--- a/drivers/gpu/nvgpu/common/profiler/profiler.c
+++ b/drivers/gpu/nvgpu/common/profiler/profiler.c
@@ -506,8 +506,7 @@ static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct gk20a *g,
 		return -EINVAL;
 	}
 
-	pm_ctx_mem = nvgpu_gr_ctx_get_pm_ctx_mem(tsg->gr_ctx);
-
+	pm_ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_PM_CTX);
 	if (pm_ctx_mem == NULL) {
 		nvgpu_err(g, "No PM context");
 		return -EINVAL;
diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c
index 9a5e2b555..a20bc5547 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c
@@ -1,7 +1,7 @@
 /*
  * Virtualized GPU Graphics
  *
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -41,7 +41,7 @@
 #include "common/vgpu/ivc/comm_vgpu.h"
 
 void vgpu_gr_free_gr_ctx(struct gk20a *g,
-			 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
+			 struct nvgpu_gr_ctx *gr_ctx)
 {
 	struct tegra_vgpu_cmd_msg msg;
 	struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
@@ -56,4 +56,4 @@ void vgpu_gr_free_gr_ctx(struct gk20a *g,
 	WARN_ON(err || msg.ret);
 
 	(void) memset(gr_ctx, 0, sizeof(*gr_ctx));
-}
\ No newline at end of file
+}
diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h
index 9f102d998..a30efe1c7 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -28,6 +28,6 @@ struct nvgpu_gr_ctx;
 struct vm_gk20a;
 
 void vgpu_gr_free_gr_ctx(struct gk20a *g,
-			 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
+			 struct nvgpu_gr_ctx *gr_ctx);
 
 #endif
diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
index 81c3c6ff8..c760c5420 100644
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
@@ -85,7 +85,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 	struct nvgpu_channel *ch;
 	struct nvgpu_gr_ctx *gr_ctx;
 	bool skip_update = false;
-	int err;
+	u64 pm_ctx_gpu_va = 0ULL;
 	int ret;
 	struct nvgpu_gr *gr = nvgpu_gr_get_instance_ptr(g, gr_instance_id);
 
@@ -94,15 +94,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 	gr_ctx = tsg->gr_ctx;
 
 	if (mode != NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW) {
-		nvgpu_gr_ctx_set_size(gr->gr_ctx_desc,
-			NVGPU_GR_CTX_PM_CTX,
-			nvgpu_gr_hwpm_map_get_size(gr->hwpm_map));
-
-		ret = nvgpu_gr_ctx_alloc_pm_ctx(g, gr_ctx,
-			gr->gr_ctx_desc, tsg->vm);
+		ret = nvgpu_gr_ctx_alloc_map_pm_ctx(g, tsg,
+				gr->gr_ctx_desc, gr->hwpm_map);
 		if (ret != 0) {
 			nvgpu_err(g,
-				"failed to allocate pm ctxt buffer");
+				"failed to allocate and map pm ctxt buffer");
 			return ret;
 		}
 
@@ -112,7 +108,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 		}
 	}
 
-	ret = nvgpu_gr_ctx_prepare_hwpm_mode(g, gr_ctx, mode, &skip_update);
+	ret = nvgpu_gr_ctx_prepare_hwpm_mode(g, gr_ctx, mode,
+					     &pm_ctx_gpu_va, &skip_update);
 	if (ret != 0) {
 		return ret;
 	}
@@ -134,11 +131,12 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 		nvgpu_rwsem_down_read(&tsg->ch_list_lock);
 		nvgpu_list_for_each_entry(ch, &tsg->ch_list,
 					  nvgpu_channel, ch_entry) {
-			nvgpu_gr_subctx_set_hwpm_ptr(g, ch->subctx, gr_ctx);
+			nvgpu_gr_subctx_set_hwpm_ptr(g, ch->subctx,
+				pm_ctx_gpu_va);
 		}
 		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
 	} else {
-		nvgpu_gr_ctx_set_hwpm_ptr(g, gr_ctx);
+		nvgpu_gr_ctx_set_hwpm_ptr(g, gr_ctx, pm_ctx_gpu_va);
 	}
 
 out:
@@ -1523,7 +1521,8 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 				if (!gr_ctx_ready) {
 					gr_ctx_ready = true;
 				}
-				current_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
+				current_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx,
+							NVGPU_GR_CTX_CTX);
 			} else {
 				err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
 							ctx_ops[i].offset,
@@ -1539,7 +1538,9 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 				}
 				if (!pm_ctx_ready) {
 					/* Make sure ctx buffer was initialized */
-					if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx))) {
+					if (!nvgpu_mem_is_valid(
+						nvgpu_gr_ctx_get_ctx_mem(gr_ctx,
+							NVGPU_GR_CTX_PM_CTX))) {
 						nvgpu_err(g,
 							"Invalid ctx buffer");
 						err = -EINVAL;
@@ -1547,14 +1548,16 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 					}
 					pm_ctx_ready = true;
 				}
-				current_mem = nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx);
+				current_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx,
+						NVGPU_GR_CTX_PM_CTX);
 			}
 
 			for (j = 0; j < num_offsets; j++) {
 				/* sanity check gr ctxt offsets,
 				 * don't write outside, worst case
 				 */
-				if ((current_mem == nvgpu_gr_ctx_get_ctx_mem(gr_ctx)) &&
+				if ((current_mem == nvgpu_gr_ctx_get_ctx_mem(
+					gr_ctx, NVGPU_GR_CTX_CTX)) &&
 						(offsets[j] >=
 						 nvgpu_gr_obj_ctx_get_golden_image_size(
 							gr->golden_image))) {
@@ -1581,7 +1584,9 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 							   offsets[j] + 4U, v);
 					}
 
-					if (current_mem == nvgpu_gr_ctx_get_ctx_mem(gr_ctx) &&
+					if (current_mem ==
+						nvgpu_gr_ctx_get_ctx_mem(
+							gr_ctx, NVGPU_GR_CTX_CTX) &&
 							g->ops.gr.ctx_patch_smpc != NULL) {
 						/* check to see if we need to add a special fix
 						   for some of the SMPC perf regs */
@@ -1617,7 +1622,7 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 		nvgpu_kfree(g, offsets);
 	}
 
-	if (nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx)->cpu_va != NULL) {
+	if (nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_PATCH_CTX)->cpu_va != NULL) {
 		nvgpu_gr_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready);
 	}
 
diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c
index fcbc9f61e..3c71ad8c9 100644
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c
@@ -394,7 +394,7 @@ int gr_gm20b_update_pc_sampling(struct nvgpu_channel *c,
 	}
 
 	gr_ctx = tsg->gr_ctx;
-	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
+	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX);
 	if (!nvgpu_mem_is_valid(mem) || c->vpr) {
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c
index d0064ae64..2c1f40a5f 100644
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c
@@ -767,7 +767,7 @@ int gr_gp10b_set_boosted_ctx(struct nvgpu_channel *ch,
 
 	gr_ctx = tsg->gr_ctx;
 	nvgpu_gr_ctx_set_boosted_ctx(gr_ctx, boost);
-	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
+	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX);
 
 	err = nvgpu_channel_disable_tsg(g, ch);
 	if (err != 0) {
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
index fc69b5a6b..5fd2d4586 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -34,6 +34,7 @@
 
 struct gk20a;
 struct nvgpu_gr_ctx;
+struct nvgpu_gr_ctx_mappings;
 struct nvgpu_gr_config;
 struct netlist_av_list;
 struct nvgpu_gr_config;
@@ -71,8 +72,8 @@ u32 gm20b_gr_init_get_global_ctx_cb_buffer_size(struct gk20a *g);
 u32 gm20b_gr_init_get_global_ctx_pagepool_buffer_size(struct gk20a *g);
 
 void gm20b_gr_init_commit_global_attrib_cb(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, u32 tpc_count, u32 max_tpc, u64 addr,
-	bool patch);
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
+	u32 tpc_count, u32 max_tpc, u64 addr, bool patch);
 
 u32 gm20b_gr_init_get_patch_slots(struct gk20a *g,
 	struct nvgpu_gr_config *config);
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b_fusa.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b_fusa.c
index 677a01a55..3f8b3a74e 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b_fusa.c
@@ -402,13 +402,14 @@ u32 gm20b_gr_init_get_global_ctx_pagepool_buffer_size(struct gk20a *g)
 }
 
 void gm20b_gr_init_commit_global_attrib_cb(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, u32 tpc_count, u32 max_tpc, u64 addr,
-	bool patch)
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
+	u32 tpc_count, u32 max_tpc, u64 addr, bool patch)
 {
 	u32 cb_addr;
 
 	(void)tpc_count;
 	(void)max_tpc;
+	(void)mappings;
 
 	addr = addr >> gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v();
 
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h
index 6c4357078..fa0941654 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h
@@ -34,6 +34,7 @@
 struct gk20a;
 struct nvgpu_gr_config;
 struct nvgpu_gr_ctx;
+struct nvgpu_gr_ctx_mappings;
 struct netlist_av_list;
 struct nvgpu_gr_obj_ctx_gfx_regs;
 
@@ -69,8 +70,8 @@ u32 gv11b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
 	u32 max_tpc);
 
 void gv11b_gr_init_commit_global_attrib_cb(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, u32 tpc_count, u32 max_tpc, u64 addr,
-	bool patch);
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
+	u32 tpc_count, u32 max_tpc, u64 addr, bool patch);
 void gv11b_gr_init_fe_go_idle_timeout(struct gk20a *g, bool enable);
 
 #ifdef CONFIG_NVGPU_SM_DIVERSITY
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c
index 40a091bfc..766524545 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c
@@ -27,6 +27,7 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/static_analysis.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/ltc.h>
 #include <nvgpu/netlist.h>
 
@@ -711,21 +712,21 @@ u32 gv11b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
 }
 
 void gv11b_gr_init_commit_global_attrib_cb(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, u32 tpc_count, u32 max_tpc, u64 addr,
-	bool patch)
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
+	u32 tpc_count, u32 max_tpc, u64 addr, bool patch)
 {
 	u32 attrBufferSize;
 	u32 cb_addr;
 
-	gm20b_gr_init_commit_global_attrib_cb(g, gr_ctx, tpc_count, max_tpc,
-		addr, patch);
+	gm20b_gr_init_commit_global_attrib_cb(g, gr_ctx, mappings, tpc_count,
+		max_tpc, addr, patch);
 
 	addr = addr >> gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v();
 
 #ifdef CONFIG_NVGPU_GFXP
-	if (nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va != 0ULL) {
+	if (nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PREEMPT_CTXSW) != 0ULL) {
 		attrBufferSize = nvgpu_safe_cast_u64_to_u32(
-			nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->size);
+			nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_BETACB_CTXSW)->size);
 	} else {
 #endif
 		attrBufferSize = g->ops.gr.init.get_global_attr_cb_size(g,
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c
index fbd4c4b66..053cd947c 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c
@@ -26,6 +26,7 @@
 #include <nvgpu/static_analysis.h>
 #include <nvgpu/netlist.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/ctx_mappings.h>
 
 #include "gr_init_gm20b.h"
 #include "gr_init_tu104.h"
@@ -194,12 +195,13 @@ void tu104_gr_init_commit_rtv_cb(struct gk20a *g, u64 addr,
 
 #ifdef CONFIG_NVGPU_GFXP
 void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, bool patch)
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
+	bool patch)
 {
 	u64 addr;
+	u64 gpu_va;
 	u32 rtv_cb_size;
 	u32 gfxp_addr_size;
-	struct nvgpu_mem *buf_mem;
 
 	nvgpu_log_fn(g, " ");
 
@@ -211,9 +213,8 @@ void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g,
 	gfxp_addr_size = gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f();
 
 	/* GFXP RTV circular buffer */
-	buf_mem = nvgpu_gr_ctx_get_gfxp_rtvcb_ctxsw_buffer(gr_ctx);
-	addr = buf_mem->gpu_va >>
-			gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f();
+	gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_GFXP_RTVCB_CTXSW);
+	addr = gpu_va >> gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f();
 
 	nvgpu_assert(u64_hi32(addr) == 0U);
 	tu104_gr_init_patch_rtv_cb(g, gr_ctx, (u32)addr,
diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h
index 9a8ce5802..c6ec0eb1e 100644
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -28,6 +28,7 @@
 struct gk20a;
 struct nvgpu_gr_ctx;
 struct netlist_av64_list;
+struct nvgpu_gr_ctx_mappings;
 
 u32 tu104_gr_init_get_bundle_cb_default_size(struct gk20a *g);
 u32 tu104_gr_init_get_min_gpm_fifo_depth(struct gk20a *g);
@@ -44,7 +45,8 @@ void tu104_gr_init_commit_rtv_cb(struct gk20a *g, u64 addr,
 	struct nvgpu_gr_ctx *gr_ctx, bool patch);
 
 void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, bool patch);
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
+	bool patch);
 
 u32 tu104_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g);
 u32 tu104_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
index 687ab9346..55d540a98 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
@@ -34,6 +34,7 @@ struct vm_gk20a;
 struct nvgpu_mem;
 struct nvgpu_channel;
 struct nvgpu_gr_ctx;
+struct nvgpu_gr_ctx_mappings;
 struct nvgpu_gr_config;
 struct nvgpu_gr_isr_data;
 struct nvgpu_gr_intr_info;
@@ -188,7 +189,6 @@ struct gops_gr_setup {
 	 * @brief Free GR engine context image.
 	 *
 	 * @param g [in]	Pointer to GPU driver struct.
-	 * @param vm [in]	Pointer to virtual memory.
 	 * @param gr_ctx [in]	Pointer to GR engine context image.
 	 *
 	 * This function will free memory allocated for patch
@@ -199,7 +199,6 @@ struct gops_gr_setup {
 	 * @see nvgpu_gr_setup_free_gr_ctx
 	 */
 	void (*free_gr_ctx)(struct gk20a *g,
-			    struct vm_gk20a *vm,
 			    struct nvgpu_gr_ctx *gr_ctx);
 
 	/**
@@ -849,6 +848,7 @@ struct gops_gr_init {
 				       bool patch, bool global_ctx);
 	void (*commit_global_attrib_cb)(struct gk20a *g,
 					struct nvgpu_gr_ctx *ch_ctx,
+					struct nvgpu_gr_ctx_mappings *mappings,
 					u32 tpc_count, u32 max_tpc,
 					u64 addr, bool patch);
 	void (*commit_global_cb_manager)(struct gk20a *g,
@@ -899,6 +899,7 @@ struct gops_gr_init {
 				    bool patch);
 	void (*commit_gfxp_rtv_cb)(struct gk20a *g,
 				   struct nvgpu_gr_ctx *gr_ctx,
+				   struct nvgpu_gr_ctx_mappings *mappings,
 				   bool patch);
 	u32 (*get_attrib_cb_gfxp_default_size)(struct gk20a *g);
 	u32 (*get_attrib_cb_gfxp_size)(struct gk20a *g);
@@ -1109,6 +1110,7 @@ struct gops_gr_fecs_trace {
 			    struct nvgpu_mem *inst_block,
 			    struct nvgpu_gr_subctx *subctx,
 			    struct nvgpu_gr_ctx *gr_ctx,
+			    struct nvgpu_gr_ctx_mappings *mappings,
 			    pid_t pid, u32 vmid);
 	int (*unbind_channel)(struct gk20a *g,
 			      struct nvgpu_mem *inst_block);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h
index 1b9d5e8d7..7a703db57 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h
@@ -26,6 +26,7 @@
 #include <nvgpu/types.h>
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/gr/global_ctx.h>
+#include <nvgpu/gr/hwpm_map.h>
 
 #define NVGPU_INVALID_SM_CONFIG_ID (U32_MAX)
 
@@ -39,7 +40,9 @@
  */
 struct gk20a;
 struct vm_gk20a;
+struct nvgpu_tsg;
 struct nvgpu_gr_ctx;
+struct nvgpu_gr_ctx_mappings;
 struct nvgpu_gr_global_ctx_buffer_desc;
 struct nvgpu_gr_global_ctx_local_golden_image;
 struct patch_desc;
@@ -156,25 +159,19 @@ void nvgpu_gr_ctx_set_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc,
 	u32 index, u32 size);
 
 /**
- * @brief Allocate graphics context buffer.
+ * @brief Get size of GR context buffer with given index.
  *
- * @param g [in]		Pointer to GPU driver struct.
- * @param gr_ctx [in]		Pointer to graphics context struct.
- * @param gr_ctx_desc [in]	Pointer to context descriptor struct.
- * @param vm [in]		Pointer to virtual memory.
+ * @param desc [in]		Pointer to context descriptor struct.
+ * @param index [in]		Index of GR context buffer.
  *
- * This function allocates memory for graphics context buffer and also
- * maps it to given virtual memory.
+ * @return size of the buffer.
  *
- * @return 0 in case of success, < 0 in case of failure.
- * @retval -ENOMEM if context memory allocation fails.
- * @retval -EINVAL if context buffer size is not set in
- *         #nvgpu_gr_ctx_desc struct.
+ * This function returns the size of GR context buffer with given buffer
+ * index. \a index must be less than NVGPU_GR_CTX_COUNT otherwise
+ * an assert is raised.
  */
-int nvgpu_gr_ctx_alloc(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct vm_gk20a *vm);
+u32 nvgpu_gr_ctx_get_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc,
+	u32 index);
 
 /**
  * @brief Free graphics context buffer.
@@ -182,93 +179,14 @@ int nvgpu_gr_ctx_alloc(struct gk20a *g,
  * @param g [in]		Pointer to GPU driver struct.
  * @param gr_ctx [in]		Pointer to graphics context struct.
  * @param global_ctx_buffer [in]Pointer to global context descriptor struct.
- * @param vm [in]		Pointer to virtual memory.
  *
  * This function will free memory allocated for graphics context buffer,
  * patch context buffer, and all the ctxsw buffers.
  */
 void nvgpu_gr_ctx_free(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct vm_gk20a *vm);
+	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer);
 
-/**
- * @brief Allocate patch context buffer.
- *
- * @param g [in]		Pointer to GPU driver struct.
- * @param gr_ctx [in]		Pointer to graphics context struct.
- * @param gr_ctx_desc [in]	Pointer to context descriptor struct.
- * @param vm [in]		Pointer to virtual memory.
- *
- * This function allocates memory for patch context buffer and also
- * maps it to given virtual memory.
- *
- * @return 0 in case of success, < 0 in case of failure.
- * @retval -ENOMEM if context memory allocation fails.
- */
-int nvgpu_gr_ctx_alloc_patch_ctx(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct vm_gk20a *vm);
-
-/**
- * @brief Free patch context buffer.
- *
- * @param g [in]		Pointer to GPU driver struct.
- * @param vm [in]		Pointer to virtual memory.
- * @param gr_ctx [in]		Pointer to graphics context struct.
- *
- * This function will free memory allocated for patch context buffer.
- */
-void nvgpu_gr_ctx_free_patch_ctx(struct gk20a *g, struct vm_gk20a *vm,
-	struct nvgpu_gr_ctx *gr_ctx);
-
-/**
- * @brief Map global context buffers.
- *
- * @param g [in]		Pointer to GPU driver struct.
- * @param gr_ctx [in]		Pointer to graphics context struct.
- * @param global_ctx_buffer [in]Pointer to global context descriptor struct.
- * @param vm [in]		Pointer to virtual memory.
- * @param vpr [in]		Boolean flag to use buffers in VPR.
- *
- * This function maps all global context buffers into given
- * virtual memory and stores each virtual address into given
- * #nvgpu_gr_ctx struct.
- *
- * @return 0 in case of success, < 0 in case of failure.
- * @retval -ENOMEM if memory mapping fails for any context buffer.
- */
-int nvgpu_gr_ctx_map_global_ctx_buffers(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct vm_gk20a *vm, bool vpr);
-
-/**
- * @brief Get global context buffer virtual address.
- *
- * @param gr_ctx [in]		Pointer to graphics context struct.
- * @param index [in]		Index of global context buffer.
- *
- * This function returns virtual address of global context buffer
- * with given index stored in #nvgpu_gr_ctx struct.
- *
- * @return virtual address of global context buffer.
- */
-u64 nvgpu_gr_ctx_get_global_ctx_va(struct nvgpu_gr_ctx *gr_ctx,
-	u32 index);
-
-/**
- * @brief Get pointer of patch context buffer memory struct.
- *
- * @param gr_ctx [in]		Pointer to graphics context struct.
- *
- * This function returns #nvgpu_mem pointer of patch context buffer stored
- * in #nvgpu_gr_ctx struct.
- *
- * @return pointer to patch context buffer memory struct.
- */
-struct nvgpu_mem *nvgpu_gr_ctx_get_patch_ctx_mem(struct nvgpu_gr_ctx *gr_ctx);
 
 /**
  * @brief Set data count in patch context buffer.
@@ -283,15 +201,28 @@ void nvgpu_gr_ctx_set_patch_ctx_data_count(struct nvgpu_gr_ctx *gr_ctx,
 	u32 data_count);
 
 /**
- * @brief Get sm diversity config of the given graphics context.
+ * @brief Get context buffer mem struct of the given graphics context.
  *
  * @param gr_ctx [in]		Pointer to graphics context struct.
+ * @param index [in]		Value from (NVGPU_GR_CTX_CTX, NVGPU_GR_CTX_GFXP_RTVCB_CTXSW)
  *
- * This function returns #sm_diversity_config of graphics context struct.
+ * This function returns #mem of graphics context struct.
  *
- * @return sm diversity config of the given graphics context.
+ * @return context buffer mem of the given graphics context.
  */
-struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx);
+struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx, u32 index);
+
+/**
+ * @brief Get mapping flags of a context buffer of the given graphics context.
+ *
+ * @param gr_ctx [in]		Pointer to graphics context struct.
+ * @param index [in]		Value from (NVGPU_GR_CTX_CTX, NVGPU_GR_CTX_GFXP_RTVCB_CTXSW)
+ *
+ * This function returns #mapping_flags of graphics context struct.
+ *
+ * @return context buffer mapping flags of the given graphics context.
+ */
+u32 nvgpu_gr_ctx_get_ctx_mapping_flags(struct nvgpu_gr_ctx *gr_ctx, u32 index);
 
 #ifdef CONFIG_NVGPU_SM_DIVERSITY
 /**
@@ -320,6 +251,7 @@ u32 nvgpu_gr_ctx_get_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx);
  *
  * @param g [in]			Pointer to GPU driver struct.
  * @param gr_ctx [in]			Pointer to graphics context struct.
+ * @param mappings [in]			Pointer to mappings of GR context buffers.
  * @param local_golden_image [in]	Pointer to local golden image struct.
  * @param cde [in]			Boolean flag to enable/disable CDE.
  *
@@ -332,6 +264,7 @@ u32 nvgpu_gr_ctx_get_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx);
  */
 void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_ctx_mappings *mappings,
 	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
 	bool cde);
 
@@ -481,6 +414,99 @@ struct nvgpu_gr_ctx *nvgpu_alloc_gr_ctx_struct(struct gk20a *g);
  */
 void nvgpu_free_gr_ctx_struct(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
 
+/**
+ * @brief Free TSG specific GR context buffers.
+ *
+ * @param g [in]		Pointer to GPU driver struct.
+ * @param ctx [in]		Pointer to graphics context struct.
+ *
+ * This function frees all TSG specific GR context buffers.
+ */
+void nvgpu_gr_ctx_free_ctx_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx *ctx);
+
+/**
+ * @brief Allocate TSG specific GR context buffers.
+ *
+ * @param g [in]		Pointer to GPU driver struct.
+ * @param desc [in]		Pointer to context descriptor struct.
+ * @param ctx [in]		Pointer to graphics context struct.
+ *
+ * This function allocates all TSG specific GR context buffers.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ */
+int nvgpu_gr_ctx_alloc_ctx_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx_desc *desc,
+	struct nvgpu_gr_ctx *ctx);
+
+#ifdef CONFIG_NVGPU_GFXP
+/**
+ * @brief Allocate TSG specific GR preemption context buffers.
+ *
+ * @param g [in]		Pointer to GPU driver struct.
+ * @param desc [in]		Pointer to context descriptor struct.
+ * @param ctx [in]		Pointer to graphics context struct.
+ *
+ * This function allocates all TSG specific GR preemption context buffers.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ */
+int nvgpu_gr_ctx_alloc_ctx_preemption_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx_desc *desc,
+	struct nvgpu_gr_ctx *ctx);
+#endif
+
+/**
+ * @brief Initialize mapping flags for GR context buffers.
+ *
+ * @param g [in]		Pointer to GPU driver struct.
+ * @param ctx [in]		Pointer to graphics context struct.
+ *
+ * This function initializes cacheability attribute for TSG specific
+ * GR context buffers.
+ */
+void nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g,
+	struct nvgpu_gr_ctx *ctx);
+
+/**
+ * @brief Allocate or get GR ctx buffers mappings for a TSG.
+ *
+ * @param g [in]		Pointer to GPU driver struct.
+ * @param tsg [in]		Pointer to TSG struct.
+ * @param vm [in]		Pointer to vm struct.
+ *
+ * This function allocates the mappings struct for TSG corresponding to
+ * given vm if not available already else returns the same.
+ *
+ * @return mappings struct in case of success, null in case of failure.
+ */
+struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_alloc_or_get_mappings(struct gk20a *g,
+				struct nvgpu_tsg *tsg, struct vm_gk20a *vm);
+
+/**
+ * @brief Get GR ctx buffers mappings for a TSG.
+ *
+ * @param tsg [in]		Pointer to TSG struct.
+ *
+ * This function returns the mappings struct for TSG.
+ *
+ * @return mappings struct.
+ */
+struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_get_mappings(struct nvgpu_tsg *tsg);
+
+/**
+ * @brief Free the gr ctx mapping struct.
+ *
+ * @param g [in]		Pointer to GPU driver struct.
+ * @param ctx [in]		Pointer to graphics context struct.
+ *
+ * This function deletes the gr ctx mapping struct. This is to be
+ * called when freeing the gr context or in error cases.
+ */
+void nvgpu_gr_ctx_free_mappings(struct gk20a *g,
+				struct nvgpu_gr_ctx *gr_ctx);
+
 /**
  * @brief Set TSG id in graphics context structure.
  *
@@ -515,28 +541,9 @@ bool nvgpu_gr_ctx_desc_force_preemption_cilp(
 #endif /* CONFIG_NVGPU_CILP */
 
 #ifdef CONFIG_NVGPU_GFXP
-int nvgpu_gr_ctx_alloc_ctxsw_buffers(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct vm_gk20a *vm);
-
-struct nvgpu_mem *nvgpu_gr_ctx_get_spill_ctxsw_buffer(
-	struct nvgpu_gr_ctx *gr_ctx);
-
-struct nvgpu_mem *nvgpu_gr_ctx_get_betacb_ctxsw_buffer(
-	struct nvgpu_gr_ctx *gr_ctx);
-
-struct nvgpu_mem *nvgpu_gr_ctx_get_pagepool_ctxsw_buffer(
-	struct nvgpu_gr_ctx *gr_ctx);
-
-struct nvgpu_mem *nvgpu_gr_ctx_get_preempt_ctxsw_buffer(
-	struct nvgpu_gr_ctx *gr_ctx);
-
-struct nvgpu_mem *nvgpu_gr_ctx_get_gfxp_rtvcb_ctxsw_buffer(
-	struct nvgpu_gr_ctx *gr_ctx);
-
 void nvgpu_gr_ctx_set_preemption_buffer_va(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx);
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_ctx_mappings *mappings);
 
 bool nvgpu_gr_ctx_desc_force_preemption_gfxp(
 		struct nvgpu_gr_ctx_desc *gr_ctx_desc);
@@ -559,12 +566,10 @@ int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 #endif /* CONFIG_NVGPU_GRAPHICS */
 
 #ifdef CONFIG_NVGPU_DEBUGGER
-int nvgpu_gr_ctx_alloc_pm_ctx(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
+int nvgpu_gr_ctx_alloc_map_pm_ctx(struct gk20a *g,
+	struct nvgpu_tsg *tsg,
 	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct vm_gk20a *vm);
-void nvgpu_gr_ctx_free_pm_ctx(struct gk20a *g, struct vm_gk20a *vm,
-	struct nvgpu_gr_ctx *gr_ctx);
+	struct nvgpu_gr_hwpm_map *hwpm_map);
 
 void nvgpu_gr_ctx_reset_patch_count(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx);
@@ -573,18 +578,19 @@ void nvgpu_gr_ctx_set_patch_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
 u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
 u32 nvgpu_gr_ctx_read_ctx_id(struct nvgpu_gr_ctx *gr_ctx);
 
-struct nvgpu_mem *nvgpu_gr_ctx_get_pm_ctx_mem(struct nvgpu_gr_ctx *gr_ctx);
-
 void nvgpu_gr_ctx_set_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx, u32 pm_mode);
 u32 nvgpu_gr_ctx_get_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx);
 
 int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	bool enable);
 
-int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
-	u32 mode, bool *skip_update);
+int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	u32 mode, u64 *pm_ctx_gpu_va, bool *skip_update);
 void nvgpu_gr_ctx_set_hwpm_pm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
-void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
+void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
+			       u64 pm_ctx_gpu_va);
+void nvgpu_gr_ctx_set_pm_ctx_mapped(struct nvgpu_gr_ctx *ctx, bool mapped);
 
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
 void nvgpu_gr_ctx_set_boosted_ctx(struct nvgpu_gr_ctx *gr_ctx, bool boost);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx_mappings.h b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx_mappings.h
new file mode 100644
index 000000000..0ce3e54a4
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx_mappings.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_GR_CTX_MAPPINGS_H
+#define NVGPU_GR_CTX_MAPPINGS_H
+
+struct gk20a;
+struct nvgpu_tsg;
+struct vm_gk20a;
+struct nvgpu_gr_ctx;
+struct nvgpu_gr_ctx_mappings;
+struct nvgpu_gr_global_ctx_buffer_desc;
+
+/**
+ * @brief Create GR ctx buffers mappings for a TSG.
+ *
+ * @param g [in]		Pointer to GPU driver struct.
+ * @param tsg [in]		Pointer to TSG struct.
+ * @param vm [in]		Pointer to vm struct.
+ *
+ * This function allocates the mappings struct for TSG corresponding to
+ * given vm.
+ *
+ * @return mappings struct in case of success, null in case of failure.
+ */
+struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_mappings_create(struct gk20a *g,
+				struct nvgpu_tsg *tsg, struct vm_gk20a *vm);
+
+/**
+ * @brief Free the GR ctx buffers mappings.
+ *
+ * @param g [in]		Pointer to GPU driver struct.
+ * @param mappings [in]		Pointer to GR ctx buffers mappings struct.
+ *
+ * This function frees the mappings struct.
+ */
+void nvgpu_gr_ctx_mappings_free(struct gk20a *g,
+				struct nvgpu_gr_ctx_mappings *mappings);
+
+/**
+ * @brief Map GR context buffer and store in mappings struct.
+ *
+ * @param g [in]		Pointer to GPU driver struct.
+ * @param ctx [in]		Pointer to GR context struct.
+ * @param index [in]		index of the buffer.
+ * @param mappings [in]		Pointer to GR context buffer mappings struct.
+ *
+ * This function will map the GR context buffer at #index in #mappings->vm
+ * and stores the mapped address.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ */
+int nvgpu_gr_ctx_mappings_map_ctx_buffer(struct gk20a *g,
+	struct nvgpu_gr_ctx *ctx, u32 index,
+	struct nvgpu_gr_ctx_mappings *mappings);
+
+/**
+ * @brief Map GR context preemption buffers and store in mappings struct.
+ *
+ * @param g [in]		Pointer to GPU driver struct.
+ * @param ctx [in]		Pointer to GR context struct.
+ * @param mappings [in]		Pointer to GR context buffer mappings struct.
+ *
+ * This function will map the GR context preemption buffers in #mappings->vm
+ * and stores the mapped address.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ */
+int nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx *ctx,
+	struct nvgpu_gr_ctx_mappings *mappings);
+
+/**
+ * @brief Map GR and global context buffers and store in mappings struct.
+ *
+ * @param g [in]			Pointer to GPU driver struct.
+ * @param gr_ctx [in]			Pointer to GR context struct.
+ * @param global_ctx_buffer [in]	Pointer global context buffer desc.
+ * @param mappings [in]			Pointer to GR context buffer
+ *					mappings struct.
+ * @param vpr [in]			Indicates if VPR buffer copy is to be
+ *					mapped.
+ *
+ * This function will map the GR and global context buffers in #mappings->vm
+ * and stores the mapped address.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ */
+int nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
+	struct nvgpu_gr_ctx_mappings *mappings,
+	bool vpr);
+
+/**
+ * @brief Unmap GR and global context buffers and store in mappings struct.
+ *
+ * @param g [in]			Pointer to GPU driver struct.
+ * @param gr_ctx [in]			Pointer to GR context struct.
+ * @param global_ctx_buffer [in]	Pointer global context buffer desc.
+ * @param mappings [in]			Pointer to GR context buffer
+ *					mappings struct.
+ *
+ * This function will unmap the GR and global context buffers in #mappings->vm.
+ */
+void nvgpu_gr_ctx_unmap_buffers(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
+	struct nvgpu_gr_ctx_mappings *mappings);
+
+/**
+ * @brief Get global context buffer gpu virtual address.
+ *
+ * @param mappings [in]		Pointer to GR context buffer
+ *				mappings struct.
+ * @param index [in]		index of the buffer.
+ *
+ * This function will get the gpu virtual address of the global context buffer
+ * in #mappings.
+ *
+ * @return gpu virtual address of global context buffer.
+ */
+u64 nvgpu_gr_ctx_mappings_get_global_ctx_va(struct nvgpu_gr_ctx_mappings *mappings,
+	u32 index);
+
+/**
+ * @brief Get GR context buffer gpu virtual address.
+ *
+ * @param mappings [in]		Pointer to GR context buffer
+ *				mappings struct.
+ * @param index [in]		index of the buffer.
+ *
+ * This function will get the gpu virtual address of the GR context buffer
+ * in #mappings.
+ *
+ * @return gpu virtual address of GR context buffer.
+ */
+u64 nvgpu_gr_ctx_mappings_get_ctx_va(struct nvgpu_gr_ctx_mappings *mappings,
+	u32 index);
+
+#endif /* NVGPU_GR_CTX_MAPPINGS_H */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h b/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h
index 2efdab06f..d4a305682 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h
@@ -63,6 +63,7 @@ struct gk20a;
 struct nvgpu_mem;
 struct nvgpu_gr_subctx;
 struct nvgpu_gr_ctx;
+struct nvgpu_gr_ctx_mappings;
 struct nvgpu_tsg;
 struct vm_area_struct;
 
@@ -169,7 +170,8 @@ int nvgpu_gr_fecs_trace_reset(struct gk20a *g);
 
 int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g,
 	struct nvgpu_mem *inst_block, struct nvgpu_gr_subctx *subctx,
-	struct nvgpu_gr_ctx *gr_ctx, pid_t pid, u32 vmid);
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
+	pid_t pid, u32 vmid);
 int nvgpu_gr_fecs_trace_unbind_channel(struct gk20a *g,
 	struct nvgpu_mem *inst_block);
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/global_ctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/global_ctx.h
index b2cb34a43..1ac757de8 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/global_ctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/global_ctx.h
@@ -181,6 +181,18 @@ size_t nvgpu_gr_global_ctx_get_size(struct nvgpu_gr_global_ctx_buffer_desc *desc
 int nvgpu_gr_global_ctx_buffer_alloc(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *desc);
 
+/**
+ * @brief Initialize mapping flags for GR global context buffers.
+ *
+ * @param g [in]		Pointer to GPU driver struct.
+ * @param desc [in]		Pointer to global ctx buffer desc.
+ *
+ * This function initializes cacheability attribute for GR global
+ * context buffers.
+ */
+void nvgpu_gr_global_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g,
+	struct nvgpu_gr_global_ctx_buffer_desc *desc);
+
 /**
  * @brief Free all global context buffers.
  *
@@ -199,7 +211,6 @@ void nvgpu_gr_global_ctx_buffer_free(struct gk20a *g,
  * @param desc [in]	Pointer to global context descriptor struct.
  * @param index [in]	Index of global context buffer.
  * @param vm [in]	Pointer to virtual memory.
- * @param flags [in]	Flags used to specify mapping attributes.
  * @param priv [in]	Boolean flag to allocate privileged PTE.
  *
  * This function maps given global contex buffer with index #index into
@@ -209,8 +220,7 @@ void nvgpu_gr_global_ctx_buffer_free(struct gk20a *g,
  *         0 in case of failure.
  */
 u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc,
-	u32 index,
-	struct vm_gk20a *vm, u32 flags, bool priv);
+	u32 index, struct vm_gk20a *vm, bool priv);
 
 /**
  * @brief Unmap given global context buffer.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h
index 4649f9960..d0a013bfe 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -33,6 +33,7 @@
  */
 struct gk20a;
 struct nvgpu_gr_ctx;
+struct nvgpu_gr_ctx_mappings;
 struct nvgpu_gr_subctx;
 struct nvgpu_gr_config;
 struct nvgpu_gr_ctx_desc;
@@ -70,7 +71,7 @@ void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g,
  * @param inst_block [in]	Pointer to channel instance block.
  * @param gr_ctx [in]		Pointer to graphics context buffer.
  * @param subctx [in]		Pointer to graphics subcontext buffer.
- * @param gpu_va [in]		GPU virtual address of graphics context buffer.
+ * @param mappings [in]		Pointer to mappings of the GR context buffers.
  *
  * If graphics subcontexts are supported, subcontext buffer GPU virtual
  * address should be committed to channel instance block. Otherwise graphics
@@ -82,7 +83,7 @@ void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g,
  */
 void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block,
 	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx,
-	u64 gpu_va);
+	struct nvgpu_gr_ctx_mappings *mappings);
 
 /**
  * brief Initialize preemption mode in context struct.
@@ -91,7 +92,6 @@ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block,
  * @param config [in]			Pointer to GR configuration struct.
  * @param gr_ctx_desc [in]		Pointer to GR context descriptor struct.
  * @param gr_ctx [in]			Pointer to graphics context.
- * @param vm [in]			Pointer to virtual memory.
  * @param class_num [in]		GR engine class.
  * @param graphics_preempt_mode		Graphics preemption mode to set.
  * @param compute_preempt_mode		Compute preemption mode to set.
@@ -111,7 +111,7 @@ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block,
  */
 int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
 	struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class_num,
+	struct nvgpu_gr_ctx *gr_ctx, u32 class_num,
 	u32 graphics_preempt_mode, u32 compute_preempt_mode);
 
 /**
@@ -121,6 +121,7 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
  * @param config [in]		Pointer to GR configuration struct.
  * @param gr_ctx [in]		Pointer to graphics context.
  * @param subctx [in]		Pointer to graphics subcontext buffer.
+ * @param mappings [in]		Pointer to mappings of GR context buffers.
  *
  * This function will read preemption modes stored in #nvgpu_gr_ctx
  * struct and write them into graphics context image.
@@ -133,7 +134,8 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
  */
 void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
 	struct nvgpu_gr_config *config,
-	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx);
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx,
+	struct nvgpu_gr_ctx_mappings *mappings);
 
 /**
  * brief Update global context buffer addresses in graphics context.
@@ -142,6 +144,7 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
  * @param global_ctx_buffer [in]	Pointer to global context descriptor struct.
  * @param config [in]			Pointer to GR configuration struct.
  * @param gr_ctx [in]			Pointer to graphics context.
+ * @param mappings [in]			Pointer to mappings of GR context buffers.
  * @param patch [in]			Boolean flag to use patch context buffer.
  *
  * This function will update GPU virtual addresses of global context
@@ -152,7 +155,8 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
  */
 void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct nvgpu_gr_config *config,	struct nvgpu_gr_ctx *gr_ctx, bool patch);
+	struct nvgpu_gr_config *config,	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_ctx_mappings *mappings, bool patch);
 
 /**
  * @brief Allocate golden context image.
@@ -193,6 +197,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
 	struct nvgpu_gr_config *config,
 	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_ctx_mappings *mappings,
 	struct nvgpu_mem *inst_block);
 
 /**
@@ -205,7 +210,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
  * @param config [in]			Pointer to GR configuration struct.
  * @param gr_ctx [in]			Pointer to graphics context.
  * @param subctx [in]			Pointer to graphics subcontext buffer.
- * @param vm [in]			Pointer to virtual memory.
+ * @param mappings [in]			Pointer to mappings of the GR context buffers.
  * @param inst_block [in]		Pointer to channel instance block.
  * @param class_num [in]		GR engine class.
  * @param flags [in]			Object context attribute flags.
@@ -216,8 +221,8 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
  * This function allocates object context for the GPU channel.
  * Allocating object context includes:
  *
- * - Allocating graphics context buffer. See #nvgpu_gr_obj_ctx_gr_ctx_alloc().
- * - Allocating patch context buffer. See #nvgpu_gr_ctx_alloc_patch_ctx().
+ * - Allocating graphics context buffers.
+ * - Allocating patch context buffer.
  * - Allocating golden context image. See #nvgpu_gr_obj_ctx_alloc_golden_ctx_image().
  * - Committing global context buffers in graphics context image.
  *   See #nvgpu_gr_obj_ctx_commit_global_ctx_buffers().
@@ -245,7 +250,7 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
 	struct nvgpu_gr_config *config,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_subctx *subctx,
-	struct vm_gk20a *vm,
+	struct nvgpu_gr_ctx_mappings *mappings,
 	struct nvgpu_mem *inst_block,
 	u32 class_num, u32 flags,
 	bool cde, bool vpr);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h b/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h
index 34e9a9553..4b6ed33d0 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -88,14 +88,13 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
  * @brief Free GR engine context image.
  *
  * @param g [in]		Pointer to GPU driver struct.
- * @param vm [in]		Pointer to virtual memory.
  * @param gr_ctx [in]		Pointer to GR engine context image.
  *
  * This function will free memory allocated for patch context image and
  * GR engine context image in #nvgpu_gr_setup_alloc_obj_ctx().
  */
 void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g,
-		struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
+		struct nvgpu_gr_ctx *gr_ctx);
 
 /**
  * @brief Free GR engine subcontext.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h
index 75049cf5a..8739a165b 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h
@@ -34,6 +34,7 @@ struct gk20a;
 struct vm_gk20a;
 struct nvgpu_gr_subctx;
 struct nvgpu_mem;
+struct nvgpu_gr_ctx_mappings;
 
 /**
  * @brief Allocate graphics subcontext buffer.
@@ -73,7 +74,8 @@ void nvgpu_gr_subctx_free(struct gk20a *g,
  * @param g [in]		Pointer to GPU driver struct.
  * @param subctx [in]		Pointer to graphics subcontext struct.
  * @param gr_ctx [in]		Pointer to graphics context struct.
- * @param gpu_va [in]		GPU virtual address of graphics context buffer.
+ * @param mappings [in]		GPU virtual address mappings of graphics
+ *                              context buffers.
  *
  * This function will initialize graphics subcontext buffer header
  * by reading appropriate values from #nvgpu_gr_ctx structure and
@@ -84,7 +86,8 @@ void nvgpu_gr_subctx_free(struct gk20a *g,
  */
 void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
 	struct nvgpu_gr_subctx *subctx,
-	struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va);
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_ctx_mappings *mappings);
 
 /**
  * @brief Get pointer of subcontext header memory struct.
@@ -103,11 +106,12 @@ void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx
 		struct nvgpu_gr_ctx *gr_ctx);
 
 void nvgpu_gr_subctx_set_preemption_buffer_va(struct gk20a *g,
-	struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx);
+	struct nvgpu_gr_subctx *subctx,
+	struct nvgpu_gr_ctx_mappings *mappings);
 #endif
 
 #ifdef CONFIG_NVGPU_DEBUGGER
 void nvgpu_gr_subctx_set_hwpm_ptr(struct gk20a *g,
-	struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx);
+	struct nvgpu_gr_subctx *subctx, u64 pm_ctx_gpu_va);
 #endif
 #endif /* NVGPU_GR_SUBCTX_H */
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index e81e05a94..06679747c 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1872,7 +1872,7 @@ static int nvgpu_dbg_gpu_ioctl_get_gr_context_size(struct dbg_session_gk20a *dbg
 		return -EINVAL;
 	}
 
-	ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx);
+	ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_CTX);
 	if (ctx_mem == NULL || !nvgpu_mem_is_valid(ctx_mem)) {
 		nvgpu_err(g, "invalid context mem");
 		return -EINVAL;
@@ -1918,7 +1918,7 @@ static int nvgpu_dbg_gpu_ioctl_get_gr_context(struct dbg_session_gk20a *dbg_s,
 		return -EINVAL;
 	}
 
-	ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx);
+	ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_CTX);
 	if (ctx_mem == NULL || !nvgpu_mem_is_valid(ctx_mem)) {
 		nvgpu_err(g, "invalid context mem");
 		return -EINVAL;
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
index 62fdb9f63..09b66ef82 100644
--- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
@@ -29,7 +29,6 @@
 
 struct gk20a;
 struct nvgpu_channel;
-struct gr_ctx_buffer_desc;
 struct gk20a_scale_profile;
 
 struct secure_page_buffer {
diff --git a/libs/dgpu/libnvgpu-drv-dgpu_safe.export b/libs/dgpu/libnvgpu-drv-dgpu_safe.export
index 2a7e0b6d5..fd477b3e7 100644
--- a/libs/dgpu/libnvgpu-drv-dgpu_safe.export
+++ b/libs/dgpu/libnvgpu-drv-dgpu_safe.export
@@ -446,13 +446,10 @@ nvgpu_gr_config_set_sm_info_gpc_index
 nvgpu_gr_config_set_sm_info_sm_index
 nvgpu_gr_config_set_sm_info_tpc_index
 nvgpu_gr_ctx_alloc
-nvgpu_gr_ctx_alloc_patch_ctx
 nvgpu_gr_ctx_desc_alloc
 nvgpu_gr_ctx_desc_free
 nvgpu_gr_ctx_free
-nvgpu_gr_ctx_free_patch_ctx
 nvgpu_gr_ctx_get_tsgid
-nvgpu_gr_ctx_map_global_ctx_buffers
 nvgpu_gr_ctx_patch_write
 nvgpu_gr_ctx_patch_write_begin
 nvgpu_gr_ctx_patch_write_end
diff --git a/libs/igpu/libnvgpu-drv-igpu_safe.export b/libs/igpu/libnvgpu-drv-igpu_safe.export
index be292eb22..53699cc81 100644
--- a/libs/igpu/libnvgpu-drv-igpu_safe.export
+++ b/libs/igpu/libnvgpu-drv-igpu_safe.export
@@ -462,18 +462,20 @@ nvgpu_gr_config_set_sm_info_global_tpc_index
 nvgpu_gr_config_set_sm_info_gpc_index
 nvgpu_gr_config_set_sm_info_sm_index
 nvgpu_gr_config_set_sm_info_tpc_index
-nvgpu_gr_ctx_alloc
-nvgpu_gr_ctx_alloc_patch_ctx
 nvgpu_gr_ctx_desc_alloc
 nvgpu_gr_ctx_desc_free
 nvgpu_gr_ctx_free
-nvgpu_gr_ctx_free_patch_ctx
 nvgpu_gr_ctx_get_tsgid
-nvgpu_gr_ctx_map_global_ctx_buffers
 nvgpu_gr_ctx_patch_write
 nvgpu_gr_ctx_patch_write_begin
 nvgpu_gr_ctx_patch_write_end
 nvgpu_gr_ctx_set_size
+nvgpu_gr_ctx_alloc_ctx_buffers
+nvgpu_gr_ctx_free_ctx_buffers
+nvgpu_gr_ctx_mappings_create
+nvgpu_gr_ctx_alloc_or_get_mappings
+nvgpu_gr_ctx_mappings_map_gr_ctx_buffers
+nvgpu_gr_ctx_get_ctx_mem
 nvgpu_gr_enable_hw
 nvgpu_gr_engine_interrupt_mask
 nvgpu_gr_falcon_get_fecs_ucode_segments
diff --git a/userspace/units/fifo/tsg/nvgpu-tsg.c b/userspace/units/fifo/tsg/nvgpu-tsg.c
index a7d8e4adf..601b289a6 100644
--- a/userspace/units/fifo/tsg/nvgpu-tsg.c
+++ b/userspace/units/fifo/tsg/nvgpu-tsg.c
@@ -622,10 +622,9 @@ done:
 #define F_TSG_RELEASE_NO_RELEASE_HAL	BIT(0)
 #define F_TSG_RELEASE_GR_CTX		BIT(1)
 #define F_TSG_RELEASE_MEM		BIT(2)
-#define F_TSG_RELEASE_VM		BIT(3)
-#define F_TSG_RELEASE_ENG_BUFS		BIT(4)
-#define F_TSG_RELEASE_SM_ERR_STATES	BIT(5)
-#define F_TSG_RELEASE_LAST		BIT(6)
+#define F_TSG_RELEASE_ENG_BUFS		BIT(3)
+#define F_TSG_RELEASE_SM_ERR_STATES	BIT(4)
+#define F_TSG_RELEASE_LAST		BIT(5)
 
 
 static void stub_tsg_release(struct nvgpu_tsg *tsg)
@@ -640,7 +639,7 @@ static void stub_tsg_deinit_eng_method_buffers(struct gk20a *g,
 }
 
 static void stub_gr_setup_free_gr_ctx(struct gk20a *g,
-		struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
+		struct nvgpu_gr_ctx *gr_ctx)
 {
 	stub[1].name = __func__;
 	stub[1].count++;
@@ -650,24 +649,32 @@ static void stub_gr_setup_free_gr_ctx(struct gk20a *g,
 int test_tsg_release(struct unit_module *m,
 		struct gk20a *g, void *args)
 {
+	struct nvgpu_gr_ctx_desc *gr_ctx_desc;
+	struct nvgpu_mem *gr_ctx_mem;
 	struct nvgpu_fifo *f = &g->fifo;
 	struct gpu_ops gops = g->ops;
 	struct nvgpu_tsg *tsg = NULL;
 	struct vm_gk20a vm;
 	u32 branches = 0U;
 	int ret = UNIT_FAIL;
-	struct nvgpu_mem mem;
 	u32 free_gr_ctx_mask =
-		F_TSG_RELEASE_GR_CTX|F_TSG_RELEASE_MEM|F_TSG_RELEASE_VM;
+		F_TSG_RELEASE_GR_CTX|F_TSG_RELEASE_MEM;
 	const char *labels[] = {
 		"no_release_hal",
 		"gr_ctx",
 		"mem",
-		"vm",
 		"eng_bufs",
 		"sm_err_states"
 	};
 
+	gr_ctx_desc = nvgpu_gr_ctx_desc_alloc(g);
+	if (!gr_ctx_desc) {
+		unit_return_fail(m, "failed to allocate memory");
+	}
+
+	nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_CTX,
+		NVGPU_CPU_PAGE_SIZE);
+
 	for (branches = 0U; branches < F_TSG_RELEASE_LAST; branches++) {
 
 		if (!(branches & F_TSG_RELEASE_GR_CTX) &&
@@ -683,8 +690,9 @@ int test_tsg_release(struct unit_module *m,
 		tsg = nvgpu_tsg_open(g, getpid());
 		unit_assert(tsg != NULL, goto done);
 		unit_assert(tsg->gr_ctx != NULL, goto done);
-		unit_assert(tsg->gr_ctx->mem.aperture ==
-				APERTURE_INVALID, goto done);
+
+		gr_ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_CTX);
+		unit_assert(gr_ctx_mem->aperture == APERTURE_INVALID, goto done);
 
 		g->ops.tsg.release =
 			branches & F_TSG_RELEASE_NO_RELEASE_HAL ?
@@ -696,11 +704,8 @@ int test_tsg_release(struct unit_module *m,
 		}
 
 		if (branches & F_TSG_RELEASE_MEM) {
-			nvgpu_dma_alloc(g, NVGPU_CPU_PAGE_SIZE, &mem);
-			tsg->gr_ctx->mem = mem;
-		}
-
-		if (branches & F_TSG_RELEASE_VM) {
+			ret = nvgpu_gr_ctx_alloc_ctx_buffers(g, gr_ctx_desc, tsg->gr_ctx);
+			unit_assert(ret == UNIT_SUCCESS, goto done);
 			tsg->vm = &vm;
 			/* prevent nvgpu_vm_remove */
 			nvgpu_ref_init(&vm.ref);
@@ -734,7 +739,7 @@ int test_tsg_release(struct unit_module *m,
 				gops.gr.setup.free_gr_ctx;
 
 			if (branches & F_TSG_RELEASE_MEM) {
-				nvgpu_dma_free(g, &mem);
+				nvgpu_gr_ctx_free_ctx_buffers(g, tsg->gr_ctx);
 			}
 
 			if (tsg->gr_ctx != NULL) {
diff --git a/userspace/units/fifo/tsg/nvgpu-tsg.h b/userspace/units/fifo/tsg/nvgpu-tsg.h
index 4cbebfc09..64f50b9fc 100644
--- a/userspace/units/fifo/tsg/nvgpu-tsg.h
+++ b/userspace/units/fifo/tsg/nvgpu-tsg.h
@@ -177,17 +177,19 @@ int test_tsg_unbind_channel(struct unit_module *m,
  *   - Check that in_use is false.
  * - Check de-allocation of other resources:
  *   - Case where g->ops.gr.setup.free_gr_ctx is called.
- *     It requires dummy vm, gr_ctx and gr_ctx->mem to be allocated.
+ *     It requires dummy vm, gr_ctx and gr_ctx->mem[NVGPU_GR_CTX_CTX] to be
+ *     allocated.
  *     A stub is used to check that the HAL was actually invoked.
- *   - Other combinations of vm, gr_ctx and gr_ctx->mem allocations, to
- *     check that g->ops.gr.setup.free_gr_ctx is not called.
+ *   - Other combinations of vm, gr_ctx and gr_ctx->mem[NVGPU_GR_CTX_CTX]
+ *     allocations, to check that g->ops.gr.setup.free_gr_ctx is not called.
  *   - Unhook of event_ids (by adding 2 dummy events in event_id list, and
  *     checking that list is empty after TSG release).
  *   - Case where event_id is empty before TSG release is tested as well
  *   - Check that VM refcount is decremented (and VM deallocated in our
  *     case), when present.
  *   - Check that sm_error_states is deallocated.
- *   - Check any combination of VM, gr_ctx, gr_ctx->mem, and sm_error_state.
+ *   - Check any combination of VM, gr_ctx, gr_ctx->mem[NVGPU_GR_CTX_CTX], and
+ *     sm_error_state.
  *
  * Output: Returns PASS if all branches gave expected results. FAIL otherwise.
  */
diff --git a/userspace/units/gr/ctx/nvgpu-gr-ctx.c b/userspace/units/gr/ctx/nvgpu-gr-ctx.c
index 56bd01902..05b20b8e5 100644
--- a/userspace/units/gr/ctx/nvgpu-gr-ctx.c
+++ b/userspace/units/gr/ctx/nvgpu-gr-ctx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -31,6 +31,7 @@
 #include <nvgpu/dma.h>
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/ctx_mappings.h>
 
 #include <nvgpu/posix/posix-fault-injection.h>
 #include <nvgpu/posix/dma.h>
@@ -43,6 +44,37 @@
 
 #define DUMMY_SIZE	0xF0U
 
+static u64 nvgpu_gmmu_map_locked_stub(struct vm_gk20a *vm,
+			  u64 vaddr,
+			  struct nvgpu_sgt *sgt,
+			  u64 buffer_offset,
+			  u64 size,
+			  u32 pgsz_idx,
+			  u8 kind_v,
+			  u32 ctag_offset,
+			  u32 flags,
+			  enum gk20a_mem_rw_flag rw_flag,
+			  bool clear_ctags,
+			  bool sparse,
+			  bool priv,
+			  struct vm_gk20a_mapping_batch *batch,
+			  enum nvgpu_aperture aperture)
+{
+	return 1;
+}
+
+static void nvgpu_gmmu_unmap_locked_stub(struct vm_gk20a *vm,
+			     u64 vaddr,
+			     u64 size,
+			     u32 pgsz_idx,
+			     bool va_allocated,
+			     enum gk20a_mem_rw_flag rw_flag,
+			     bool sparse,
+			     struct vm_gk20a_mapping_batch *batch)
+{
+	return;
+}
+
 int test_gr_ctx_error_injection(struct unit_module *m,
 		struct gk20a *g, void *args)
 {
@@ -51,12 +83,22 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 	struct vm_gk20a *vm;
 	struct nvgpu_gr_ctx_desc *desc;
 	struct nvgpu_gr_global_ctx_buffer_desc *global_desc;
+	struct nvgpu_gr_ctx_mappings *mappings = NULL;
 	struct nvgpu_gr_ctx *gr_ctx = NULL;
 	struct nvgpu_posix_fault_inj *dma_fi =
 		nvgpu_dma_alloc_get_fault_injection();
 	struct nvgpu_posix_fault_inj *kmem_fi =
 		nvgpu_kmem_get_fault_injection();
 	u64 low_hole = SZ_4K * 16UL;
+	struct nvgpu_channel *channel = (struct nvgpu_channel *)
+		malloc(sizeof(struct nvgpu_channel));
+	struct nvgpu_tsg *tsg = (struct nvgpu_tsg *)
+		malloc(sizeof(struct nvgpu_tsg));
+	u32 i;
+
+	if (channel == NULL || tsg == NULL) {
+		unit_return_fail(m, "failed to allocate channel/tsg");
+	}
 
 	desc = nvgpu_gr_ctx_desc_alloc(g);
 	if (!desc) {
@@ -84,68 +126,70 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 		unit_return_fail(m, "nvgpu_vm_init failed\n");
 	}
 
-	/* Try to free gr_ctx before it is allocated. */
-	nvgpu_gr_ctx_free(g, gr_ctx, NULL, NULL);
+	channel->g = g;
+	channel->vm = vm;
 
-	gr_ctx = nvgpu_alloc_gr_ctx_struct(g);
-	if (!gr_ctx) {
-		unit_return_fail(m, "failed to allocate memory");
-	}
-
-	/* Context size is not set, so should fail. */
-	err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm);
-	if (err == 0) {
-		unit_return_fail(m, "unexpected success");
-	}
-
-	/* Set the size now, but inject dma allocation failures. */
-	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_CTX, DUMMY_SIZE);
-	nvgpu_posix_enable_fault_injection(dma_fi, true, 0);
-	err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm);
-	if (err == 0) {
-		unit_return_fail(m, "unexpected success");
-	}
-
-	/* Inject kmem alloc failures to trigger mapping failures */
-	nvgpu_posix_enable_fault_injection(dma_fi, false, 0);
-	nvgpu_posix_enable_fault_injection(kmem_fi, true, 1);
-	err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm);
-	if (err == 0) {
-		unit_return_fail(m, "unexpected success");
-	}
-
-	/* Successful allocation */
-	nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
-	err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm);
-	if (err != 0) {
-		unit_return_fail(m, "failed to allocate context");
-	}
-
-	/* Try to free patch context before it is allocated. */
-	nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx);
-
-	/* Inject allocation error and allocate patch context */
-	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_PATCH_CTX, DUMMY_SIZE);
-	nvgpu_posix_enable_fault_injection(dma_fi, true, 0);
-	err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, desc, vm);
-	if (err == 0) {
-		unit_return_fail(m, "unexpected success");
-	}
-
-	/* Successful allocation */
-	nvgpu_posix_enable_fault_injection(dma_fi, false, 0);
-	err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, desc, vm);
-	if (err != 0) {
-		unit_return_fail(m, "failed to allocate patch context");
-	}
+	g->ops.mm.gmmu.map = nvgpu_gmmu_map_locked_stub;
+	g->ops.mm.gmmu.unmap = nvgpu_gmmu_unmap_locked_stub;
 
 	global_desc = nvgpu_gr_global_ctx_desc_alloc(g);
 	if (!global_desc) {
 		unit_return_fail(m, "failed to allocate desc");
 	}
 
-	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc,
-			vm, false);
+	/* Try to free gr_ctx before it is allocated. */
+	nvgpu_gr_ctx_free(g, gr_ctx, NULL);
+
+	gr_ctx = nvgpu_alloc_gr_ctx_struct(g);
+	if (!gr_ctx) {
+		unit_return_fail(m, "failed to allocate memory");
+	}
+
+	tsg->gr_ctx = gr_ctx;
+
+	mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, vm);
+	if (mappings == NULL) {
+		unit_return_fail(m, "failed to allocate gr_ctx mappings");
+	}
+
+	/* Context size is not set, so should fail. */
+	err = nvgpu_gr_ctx_alloc_ctx_buffers(g, desc, gr_ctx);
+	if (err == 0) {
+		unit_return_fail(m, "unexpected success");
+	}
+
+	/* Set the size now, but inject dma allocation failures. */
+	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_CTX, DUMMY_SIZE);
+	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_PATCH_CTX, DUMMY_SIZE);
+
+	for (i = 0; i < 2; i++) {
+		nvgpu_posix_enable_fault_injection(dma_fi, true, i);
+		err = nvgpu_gr_ctx_alloc_ctx_buffers(g, desc, gr_ctx);
+		if (err == 0) {
+			unit_return_fail(m, "unexpected success");
+		}
+		nvgpu_posix_enable_fault_injection(dma_fi, false, 0);
+	}
+
+	err = nvgpu_gr_ctx_alloc_ctx_buffers(g, desc, gr_ctx);
+	if (err != 0) {
+		unit_return_fail(m, "unexpected success");
+	}
+
+	/* Inject kmem alloc failures to trigger mapping failures */
+	for (i = 0; i < 2; i++) {
+		nvgpu_posix_enable_fault_injection(kmem_fi, true, 2 * i);
+		err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx,
+					global_desc, mappings, false);
+		if (err == 0) {
+			unit_return_fail(m, "unexpected success");
+		}
+		nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
+	}
+
+	/* global ctx_desc size is not set. */
+	err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc,
+				       mappings, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
 	}
@@ -164,42 +208,21 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 		unit_return_fail(m, "failed to allocate global buffers");
 	}
 
-	/* Fail global circular buffer mapping */
-	nvgpu_posix_enable_fault_injection(kmem_fi, true, 0);
-	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc,
-			vm, false);
-	if (err == 0) {
-		unit_return_fail(m, "unexpected success");
+	/* Fail global ctx buffer mappings */
+	for (i = 0; i < 4; i++) {
+		nvgpu_posix_enable_fault_injection(kmem_fi, true, 4 + (2 * i));
+		err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc,
+					       mappings, false);
+		if (err == 0) {
+			unit_return_fail(m, "unexpected success");
+		}
+		nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
 	}
 
-	/* Fail global attribute buffer mapping */
-	nvgpu_posix_enable_fault_injection(kmem_fi, true, 4);
-	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc,
-			vm, false);
-	if (err == 0) {
-		unit_return_fail(m, "unexpected success");
-	}
-
-	/* Fail global pagepool buffer mapping */
-	nvgpu_posix_enable_fault_injection(kmem_fi, true, 8);
-	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc,
-			vm, false);
-	if (err == 0) {
-		unit_return_fail(m, "unexpected success");
-	}
-
-	/* Fail global access map buffer mapping */
-	nvgpu_posix_enable_fault_injection(kmem_fi, true, 12);
-	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc,
-			vm, false);
-	if (err == 0) {
-		unit_return_fail(m, "unexpected success");
-	}
 
 	/* Successful mapping */
-	nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
-	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc,
-			vm, false);
+	err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc,
+				       mappings, false);
 	if (err != 0) {
 		unit_return_fail(m, "failed to map global buffers");
 	}
@@ -225,11 +248,9 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 	nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
 
 	/* cleanup */
-	nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx);
-	nvgpu_gr_ctx_free(g, gr_ctx, global_desc, vm);
+	nvgpu_gr_ctx_free(g, gr_ctx, global_desc);
 	nvgpu_free_gr_ctx_struct(g, gr_ctx);
 	nvgpu_gr_ctx_desc_free(g, desc);
-	nvgpu_vm_put(vm);
 	nvgpu_vm_put(g->mm.bar1.vm);
 
 	return UNIT_SUCCESS;
diff --git a/userspace/units/gr/ctx/nvgpu-gr-ctx.h b/userspace/units/gr/ctx/nvgpu-gr-ctx.h
index d4bd6efe5..a25f626fc 100644
--- a/userspace/units/gr/ctx/nvgpu-gr-ctx.h
+++ b/userspace/units/gr/ctx/nvgpu-gr-ctx.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -40,16 +40,15 @@ struct unit_module;
  *
  * Test Type: Feature, Error guessing
  *
- * Targets: #nvgpu_gr_ctx_alloc,
+ * Targets: #nvgpu_gr_ctx_alloc_ctx_buffers,
+ *          #nvgpu_gr_ctx_free_ctx_buffers,
  *          #nvgpu_gr_ctx_free,
  *          #nvgpu_gr_ctx_desc_alloc,
  *          #nvgpu_gr_ctx_desc_free,
  *          #nvgpu_alloc_gr_ctx_struct,
  *          #nvgpu_free_gr_ctx_struct,
  *          #nvgpu_gr_ctx_set_size,
- *          #nvgpu_gr_ctx_alloc_patch_ctx,
- *          #nvgpu_gr_ctx_free_patch_ctx,
- *          #nvgpu_gr_ctx_map_global_ctx_buffers,
+ *          #nvgpu_gr_ctx_mappings_map_global_ctx_buffers,
  *          #nvgpu_gr_ctx_patch_write_begin,
  *          #nvgpu_gr_ctx_patch_write,
  *          #nvgpu_gr_ctx_patch_write_end.
@@ -63,7 +62,6 @@ struct unit_module;
  * - Inject dma allocation failure and try to allocate gr_ctx, should fail.
  * - Inject kmem allocation failure and try to allocate gr_ctx, should fail.
  * - Disable error injection and allocate gr_ctx, should pass.
- * - Try to free patch_ctx before it is allocated, should fail.
  * - Inject dma allocation failure and try to allocate patch_ctx, should fail.
  * - Disable error injection and allocate patch_ctx, should pass.
  * - Setup all the global context buffers.
diff --git a/userspace/units/gr/global_ctx/nvgpu-gr-global-ctx.c b/userspace/units/gr/global_ctx/nvgpu-gr-global-ctx.c
index eea5e03d3..87fb9cc21 100644
--- a/userspace/units/gr/global_ctx/nvgpu-gr-global-ctx.c
+++ b/userspace/units/gr/global_ctx/nvgpu-gr-global-ctx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -100,7 +100,7 @@ int test_gr_global_ctx_alloc_error_injection(struct unit_module *m,
 
 	/* Ensure mapping fails before buffers are allocated */
 	gpu_va = nvgpu_gr_global_ctx_buffer_map(desc,
-			NVGPU_GR_GLOBAL_CTX_CIRCULAR, NULL, 0, false);
+			NVGPU_GR_GLOBAL_CTX_CIRCULAR, NULL, false);
 	if (gpu_va != 0) {
 		unit_return_fail(m, "unexpected success");
 	}
diff --git a/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c b/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c
index 273f15083..966a39ea8 100644
--- a/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c
+++ b/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -757,17 +757,12 @@ int test_gr_init_hal_error_injection(struct unit_module *m,
 	}
 
 	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_CTX, DUMMY_SIZE);
-	err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm);
+	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_PATCH_CTX, DUMMY_SIZE);
+	err = nvgpu_gr_ctx_alloc_ctx_buffers(g, desc, gr_ctx);
 	if (err != 0) {
 		unit_return_fail(m, "failed to allocate context");
 	}
 
-	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_PATCH_CTX, DUMMY_SIZE);
-	err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, desc, vm);
-	if (err != 0) {
-		unit_return_fail(m, "failed to allocate patch context");
-	}
-
 	/* global_ctx = false and arbitrary size */
 	g->ops.gr.init.commit_global_pagepool(g, gr_ctx, 0x12345678,
 		DUMMY_SIZE, false, false);
@@ -803,7 +798,7 @@ int test_gr_init_hal_error_injection(struct unit_module *m,
 	g->ops = gops;
 
 	/* cleanup */
-	nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx);
+	nvgpu_gr_ctx_free_ctx_buffers(g, gr_ctx);
 	nvgpu_free_gr_ctx_struct(g, gr_ctx);
 	nvgpu_gr_ctx_desc_free(g, desc);
 	nvgpu_vm_put(vm);
diff --git a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c
index 90f286dbc..e8a5e5a62 100644
--- a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c
+++ b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -35,6 +35,7 @@
 #include <nvgpu/gr/gr_utils.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/gr/obj_ctx.h>
 
 #include <nvgpu/posix/posix-fault-injection.h>
@@ -117,6 +118,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	struct nvgpu_gr_ctx_desc *desc;
 	struct nvgpu_gr_global_ctx_buffer_desc *global_desc;
 	struct nvgpu_gr_ctx *gr_ctx = NULL;
+	struct nvgpu_gr_ctx_mappings *mappings = NULL;
 	struct nvgpu_gr_subctx *subctx = NULL;
 	struct nvgpu_mem inst_block;
 	struct nvgpu_gr_config *config = nvgpu_gr_get_config_ptr(g);
@@ -128,6 +130,8 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 		nvgpu_local_golden_image_get_fault_injection();
 	int (*init_sm_id_table_tmp)(struct gk20a *g,
 		struct nvgpu_gr_config *config);
+	struct nvgpu_tsg *tsg = (struct nvgpu_tsg *)
+		malloc(sizeof(struct nvgpu_tsg));
 
 	/* Inject allocation failures and initialize obj_ctx, should fail */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 0);
@@ -171,6 +175,8 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 		unit_return_fail(m, "failed to allocate memory");
 	}
 
+	tsg->gr_ctx = gr_ctx;
+
 	global_desc = nvgpu_gr_global_ctx_desc_alloc(g);
 	if (!global_desc) {
 		unit_return_fail(m, "failed to allocate desc");
@@ -195,10 +201,15 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 		unit_return_fail(m, "failed to allocate subcontext");
 	}
 
+	mappings = nvgpu_gr_ctx_mappings_create(g, tsg, vm);
+	if (mappings == NULL) {
+		unit_return_fail(m, "failed to allocate gr_ctx mappings");
+	}
+
 	/* Fail gr_ctx allocation */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 0);
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -207,7 +218,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	/* Fail patch_ctx allocation */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 3);
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -216,7 +227,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	/* Fail circular buffer mapping */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 8);
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -228,7 +239,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	g->ops.gr.init.fe_pwr_mode_force_on = test_fe_pwr_mode_force_on;
 	fe_pwr_mode_count = 0;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -237,7 +248,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	/* Fail second call to gops.gr.init.fe_pwr_mode_force_on */
 	fe_pwr_mode_count = 1;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -252,7 +263,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	g->ops.gr.falcon.ctrl_ctxsw = test_falcon_ctrl_ctxsw;
 	ctrl_ctxsw_count = -1;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -265,7 +276,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	g->ops.gr.init.wait_idle = test_gr_wait_idle;
 	gr_wait_idle_count = 2;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -278,7 +289,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	g->ops.gr.init.load_sw_bundle_init = test_load_sw_bundle;
 	load_sw_bundle_count = 0;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -288,7 +299,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	g->ops.gr.init.load_sw_veid_bundle = test_load_sw_bundle;
 	load_sw_bundle_count = 1;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -308,7 +319,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	g->ops.gr.init.wait_idle = test_gr_wait_idle;
 	gr_wait_idle_count = 4;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -323,7 +334,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	 */
 	ctrl_ctxsw_count = 1;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -335,7 +346,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	 */
 	ctrl_ctxsw_count = 2;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -347,7 +358,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	/* Fail golden context verification */
 	nvgpu_posix_enable_fault_injection(golden_ctx_verif_fi, true, 0);
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -358,7 +369,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 
 	/* Finally, successful obj_ctx allocation */
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err != 0) {
 		unit_return_fail(m, "failed to allocate obj_ctx");
@@ -371,14 +382,14 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 
 	/* Reallocation with golden image already created */
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err != 0) {
 		unit_return_fail(m, "failed to re-allocate obj_ctx");
 	}
 
 	/* Set preemption mode with invalid compute class */
-	err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config, desc, gr_ctx, vm,
+	err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config, desc, gr_ctx,
 		VOLTA_DMA_COPY_A, 0, NVGPU_PREEMPTION_MODE_COMPUTE_CTA);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -386,8 +397,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 
 	/* Cleanup */
 	nvgpu_gr_subctx_free(g, subctx, vm);
-	nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx);
-	nvgpu_gr_ctx_free(g, gr_ctx, global_desc, vm);
+	nvgpu_gr_ctx_free(g, gr_ctx, global_desc);
 	nvgpu_free_gr_ctx_struct(g, gr_ctx);
 	nvgpu_gr_ctx_desc_free(g, desc);
 	nvgpu_gr_obj_ctx_deinit(g, golden_image);
diff --git a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.h b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.h
index 7371c7814..10fc4e272 100644
--- a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.h
+++ b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -53,7 +53,6 @@ struct unit_module;
  *          nvgpu_gr_subctx_free,
  *          nvgpu_gr_obj_ctx_commit_inst,
  *          nvgpu_gr_obj_ctx_commit_inst_gpu_va,
- *          nvgpu_gr_ctx_get_patch_ctx_mem,
  *          nvgpu_gr_subctx_get_ctx_header,
  *          nvgpu_gr_subctx_load_ctx_header,
  *          nvgpu_gr_global_ctx_get_size,
diff --git a/userspace/units/gr/setup/nvgpu-gr-setup.c b/userspace/units/gr/setup/nvgpu-gr-setup.c
index 992dde595..cee8cfdde 100644
--- a/userspace/units/gr/setup/nvgpu-gr-setup.c
+++ b/userspace/units/gr/setup/nvgpu-gr-setup.c
@@ -584,7 +584,7 @@ static void gr_setup_fake_free_obj_ctx(struct unit_module *m, struct gk20a *g)
 	g->ops.gr.setup.free_subctx(gr_setup_ch);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS, true);
 
-	g->ops.gr.setup.free_gr_ctx(g, 0, 0);
+	g->ops.gr.setup.free_gr_ctx(g, NULL);
 	gr_setup_ch->subctx = gr_subctx;
 }
 
diff --git a/userspace/units/gr/setup/nvgpu-gr-setup.h b/userspace/units/gr/setup/nvgpu-gr-setup.h
index a85deb98a..51569f787 100644
--- a/userspace/units/gr/setup/nvgpu-gr-setup.h
+++ b/userspace/units/gr/setup/nvgpu-gr-setup.h
@@ -55,7 +55,7 @@ struct unit_module;
  *          nvgpu_gr_ctx_get_ctx_mem,
  *          nvgpu_gr_ctx_set_tsgid,
  *          nvgpu_gr_ctx_get_tsgid,
- *          nvgpu_gr_ctx_get_global_ctx_va,
+ *          nvgpu_gr_ctx_mappings_get_global_ctx_va,
  *          gops_gr_setup.alloc_obj_ctx,
  *          nvgpu_gr_ctx_load_golden_ctx_image,
  *          gm20b_ctxsw_prog_set_patch_addr,