gpu: nvgpu: maintain ctx buffers mappings separately from ctx mems

In order to maintain separate mappings of GR TSG and global context buffers for different subcontexts, we need to separate the memory struct and the mapping struct for the buffers. This patch moves the mappings of all GR ctx buffers to new structure nvgpu_gr_ctx_mappings. This will be instantiated per subcontext in the upcoming patches. Summary of changes: 1. Various context buffers were allocated and mapped separately. All TSG context buffers are now stored in gr_ctx->mem[] array since allocation and mapping is unified for them. 2. Mapping/unmapping and querying the GPU VA of the context buffers is now handled in ctx_mappings unit. Structure nvgpu_gr_ctx_mappings in nvgpu_gr_ctx holds the maps. On ALLOC_OBJ_CTX this struct is instantiated and deleted on free_gr_ctx. 3. Introduce mapping flags for TSG and global context buffers. This is to map different buffers with different caching attribute. Map all buffers as cacheable except PRIV_ACCESS_MAP, RTV_CIRCULAR_BUFFER, FECS_TRACE, GR CTX and PATCH ctx buffers. Map all buffers as privileged. 4. Wherever VM or GPU VA is passed in the obj_ctx allocation functions, they are now replaced by nvgpu_gr_ctx_mappings. 5. free_gr_ctx API need not accept the VM as mappings struct will hold the VM. mappings struct will be kept in gr_ctx. 6. Move preemption buffers allocation logic out of nvgpu_gr_obj_ctx_set_graphics_preemption_mode. 7. set_preemption_mode and gr_gk20a_update_hwpm_ctxsw_mode functions need update to ensure buffers are allocated and mapped. 8. Keep the unit tests and documentation updated. With these changes there is clear seggregation of allocation and mapping of GR context buffers. This will simplify further change to add multiple address spaces support. With multiple address spaces in a TSG, subcontexts created after first subcontext just need to map the buffers. Bug 3677982 Change-Id: I3cd5f1311dd85aad1cf547da8fa45293fb7a7cb3 Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2712222 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2022-04-05 18:14:11 +05:30
parent 931e5f8220
commit f95cb5f4f8
49 changed files with 1645 additions and 1076 deletions
--- a/arch/nvgpu-common.yaml
+++ b/arch/nvgpu-common.yaml
@@ -554,6 +554,11 @@ gr:
      sources: [ common/gr/obj_ctx.c,
                 common/gr/obj_ctx_priv.h,
                 include/nvgpu/gr/obj_ctx.h ]
    ctx_mappings:
      safe: yes
      sources: [ common/gr/ctx_mappings.c,
                 common/gr/ctx_mappings_priv.h,
                 include/nvgpu/gr/ctx_mappings.h ]
    subctx:
      safe: yes
      sources: [ common/gr/subctx.c,
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -254,6 +254,7 @@ nvgpu-y += \
 	common/gr/gr_intr.o \
 	common/gr/global_ctx.o \
 	common/gr/ctx.o \
 	common/gr/ctx_mappings.o \
 	common/gr/gr_falcon.o \
 	common/gr/subctx.o \
 	common/gr/zcull.o \
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -129,6 +129,7 @@ srcs +=	common/device.c \
 	common/gr/global_ctx.c \
 	common/gr/subctx.c \
 	common/gr/ctx.c \
 	common/gr/ctx_mappings.c \
 	common/gr/gr_falcon.c \
 	common/gr/gr_config.c \
 	common/gr/gr_setup.c \
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -978,7 +978,7 @@ void nvgpu_tsg_release(struct nvgpu_ref *ref)
 	}
 	if ((tsg->gr_ctx != NULL) && (tsg->vm != NULL)) {
-		g->ops.gr.setup.free_gr_ctx(g, tsg->vm, tsg->gr_ctx);
+		g->ops.gr.setup.free_gr_ctx(g, tsg->gr_ctx);
 	}
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
--- a/drivers/gpu/nvgpu/common/gr/ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/ctx.c
--- a/drivers/gpu/nvgpu/common/gr/ctx_mappings.c
+++ b/drivers/gpu/nvgpu/common/gr/ctx_mappings.c
@@ -0,0 +1,452 @@
 /*
 * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
 #include <nvgpu/gk20a.h>
 #include <nvgpu/static_analysis.h>
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/vm.h>
 #include <nvgpu/io.h>
 #include <nvgpu/gmmu.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/string.h>
 #include <nvgpu/power_features/pg.h>
 #include "common/gr/ctx_mappings_priv.h"
 struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_mappings_create(struct gk20a *g,
 				struct nvgpu_tsg *tsg, struct vm_gk20a *vm)
 {
 	struct nvgpu_gr_ctx_mappings *mappings = NULL;
 	nvgpu_log(g, gpu_dbg_gr, " ");
 	if (tsg == NULL || vm == NULL) {
 		return NULL;
 	}
 	mappings = (struct nvgpu_gr_ctx_mappings *)
 			nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_ctx_mappings));
 	if (mappings == NULL) {
 		nvgpu_err(g, "failed to alloc mappings");
 		return NULL;
 	}
 	nvgpu_vm_get(vm);
 	mappings->tsg = tsg;
 	mappings->vm = vm;
 	nvgpu_log(g, gpu_dbg_gr, "done");
 	return mappings;
 }
 void nvgpu_gr_ctx_mappings_free(struct gk20a *g,
 				struct nvgpu_gr_ctx_mappings *mappings)
 {
 	nvgpu_log(g, gpu_dbg_gr, " ");
 	nvgpu_vm_put(mappings->vm);
 	nvgpu_kfree(g, mappings);
 	nvgpu_log(g, gpu_dbg_gr, "done");
 }
 int nvgpu_gr_ctx_mappings_map_ctx_buffer(struct gk20a *g,
 	struct nvgpu_gr_ctx *ctx, u32 index,
 	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	struct vm_gk20a *vm = mappings->vm;
 	struct nvgpu_mem *mem;
 	u32 mapping_flags;
 	u64 gpu_va;
 	nvgpu_log(g, gpu_dbg_gr, " ");
 	mem = nvgpu_gr_ctx_get_ctx_mem(ctx, index);
 	mapping_flags = nvgpu_gr_ctx_get_ctx_mapping_flags(ctx, index);
 	nvgpu_assert(mappings->ctx_buffer_va[index] == 0ULL);
 	if (nvgpu_mem_is_valid(mem)) {
 		gpu_va = nvgpu_gmmu_map(vm,
 				mem,
 				mapping_flags,
 				gk20a_mem_flag_none, true,
 				mem->aperture);
 		if (gpu_va == 0ULL) {
 			nvgpu_err(g, "failed to map ctx buffer %u", index);
 			return -ENOMEM;
 		}
 		mappings->ctx_buffer_va[index] = gpu_va;
 		nvgpu_log(g, gpu_dbg_gr, "buffer[%u] mapped at address 0x%llx", index, gpu_va);
 #ifdef CONFIG_NVGPU_DEBUGGER
 		if (index == NVGPU_GR_CTX_PM_CTX) {
 			nvgpu_gr_ctx_set_pm_ctx_mapped(ctx, true);
 		}
 #endif
 	} else {
 		nvgpu_log(g, gpu_dbg_gr, "buffer not allocated");
 	}
 	nvgpu_log(g, gpu_dbg_gr, "done");
 	return 0;
 }
 static void nvgpu_gr_ctx_mappings_unmap_ctx_buffer(struct nvgpu_gr_ctx *ctx,
 	u32 index, struct nvgpu_gr_ctx_mappings *mappings)
 {
 	struct vm_gk20a *vm = mappings->vm;
 	struct nvgpu_mem *mem;
 	mem = nvgpu_gr_ctx_get_ctx_mem(ctx, index);
 	if (nvgpu_mem_is_valid(mem) &&
 	    (mappings->ctx_buffer_va[index] != 0ULL)) {
 		nvgpu_gmmu_unmap_addr(vm, mem, mappings->ctx_buffer_va[index]);
 		mappings->ctx_buffer_va[index] = 0ULL;
 #ifdef CONFIG_NVGPU_DEBUGGER
 		if (index == NVGPU_GR_CTX_PM_CTX) {
 			nvgpu_gr_ctx_set_pm_ctx_mapped(ctx, false);
 		}
 #endif
 	}
 }
 static void nvgpu_gr_ctx_mappings_unmap_ctx_buffers(struct nvgpu_gr_ctx *ctx,
 	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	u32 i;
 	for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
 		nvgpu_gr_ctx_mappings_unmap_ctx_buffer(ctx, i, mappings);
 	}
 }
 static int nvgpu_gr_ctx_mappings_map_ctx_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx *ctx,
 	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	int err = 0;
 	u32 i;
 	for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
 		err = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, ctx, i, mappings);
 		if (err != 0) {
 			nvgpu_err(g, "gr_ctx buffer %u map failed %d", i, err);
 			nvgpu_gr_ctx_mappings_unmap_ctx_buffers(ctx, mappings);
 			return err;
 		}
 	}
 	return err;
 }
 #ifdef CONFIG_NVGPU_GFXP
 static void nvgpu_gr_ctx_mappings_unmap_ctx_preemption_buffers(
 	struct nvgpu_gr_ctx *ctx,
 	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	u32 i;
 	for (i = NVGPU_GR_CTX_PREEMPT_CTXSW;
 			i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) {
 		nvgpu_gr_ctx_mappings_unmap_ctx_buffer(ctx, i, mappings);
 	}
 }
 int nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx *ctx,
 	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	int err = 0;
 	u32 i;
 	nvgpu_log(g, gpu_dbg_gr, " ");
 	for (i = NVGPU_GR_CTX_PREEMPT_CTXSW;
 			i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) {
 		if (mappings->ctx_buffer_va[i] == 0ULL) {
 			err = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, ctx, i, mappings);
 			if (err != 0) {
 				nvgpu_err(g, "gr_ctx buffer %u map failed %d", i, err);
 				nvgpu_gr_ctx_mappings_unmap_ctx_preemption_buffers(ctx, mappings);
 				return err;
 			}
 		}
 	}
 	nvgpu_log(g, gpu_dbg_gr, "done");
 	return err;
 }
 #endif
 static int nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
 	u32 va_type, u32 buffer_type, u32 buffer_vpr_type,
 	bool vpr, struct nvgpu_gr_ctx_mappings *mappings)
 {
 	struct vm_gk20a *vm = mappings->vm;
 	u64 *g_bfr_va;
 	u32 *g_bfr_index;
 	u64 gpu_va = 0ULL;
 	(void)vpr;
 	(void)buffer_vpr_type;
 	g_bfr_va = &mappings->global_ctx_buffer_va[0];
 	g_bfr_index = &mappings->global_ctx_buffer_index[0];
 #ifdef CONFIG_NVGPU_VPR
 	if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer,
 					buffer_vpr_type)) {
 		gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
 					buffer_vpr_type,
 					vm, true);
 		g_bfr_index[va_type] = buffer_vpr_type;
 	} else {
 #endif
 		gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
 					buffer_type,
 					vm, true);
 		g_bfr_index[va_type] = buffer_type;
 #ifdef CONFIG_NVGPU_VPR
 	}
 #endif
 	if (gpu_va == 0ULL) {
 		goto clean_up;
 	}
 	g_bfr_va[va_type] = gpu_va;
 	return 0;
 clean_up:
 	return -ENOMEM;
 }
 static void nvgpu_gr_ctx_mappings_unmap_global_ctx_buffers(
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
 	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	u64 *g_bfr_va = &mappings->global_ctx_buffer_va[0];
 	u32 *g_bfr_index = &mappings->global_ctx_buffer_index[0];
 	struct vm_gk20a *vm = mappings->vm;
 	u32 i;
 	for (i = 0U; i < NVGPU_GR_GLOBAL_CTX_VA_COUNT; i++) {
 		if (g_bfr_va[i] != 0ULL) {
 			nvgpu_gr_global_ctx_buffer_unmap(global_ctx_buffer,
 				g_bfr_index[i], vm, g_bfr_va[i]);
 		}
 	}
 	(void) memset(g_bfr_va, 0, sizeof(mappings->global_ctx_buffer_va));
 	(void) memset(g_bfr_index, 0, sizeof(mappings->global_ctx_buffer_index));
 }
 static int nvgpu_gr_ctx_mappings_map_global_ctx_buffers(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
 	struct nvgpu_gr_ctx_mappings *mappings, bool vpr)
 {
 	int err;
 	/*
 	 * MIG supports only compute class.
 	 * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB
 	 * if 2D/3D/I2M classes(graphics) are supported.
 	 */
 	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
 		/* Circular Buffer */
 		err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
 					global_ctx_buffer,
 					NVGPU_GR_GLOBAL_CTX_CIRCULAR_VA,
 					NVGPU_GR_GLOBAL_CTX_CIRCULAR,
 #ifdef CONFIG_NVGPU_VPR
 					NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR,
 #else
 					NVGPU_GR_GLOBAL_CTX_CIRCULAR,
 #endif
 					vpr, mappings);
 		if (err != 0) {
 			nvgpu_err(g, "cannot map ctx circular buffer");
 			goto fail;
 		}
 		/* Attribute Buffer */
 		err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
 					global_ctx_buffer,
 					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VA,
 					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE,
 #ifdef CONFIG_NVGPU_VPR
 					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR,
 #else
 					NVGPU_GR_GLOBAL_CTX_ATTRIBUTE,
 #endif
 					vpr, mappings);
 		if (err != 0) {
 			nvgpu_err(g, "cannot map ctx attribute buffer");
 			goto fail;
 		}
 		/* Page Pool */
 		err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
 					global_ctx_buffer,
 					NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA,
 					NVGPU_GR_GLOBAL_CTX_PAGEPOOL,
 #ifdef CONFIG_NVGPU_VPR
 					NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR,
 #else
 					NVGPU_GR_GLOBAL_CTX_PAGEPOOL,
 #endif
 					vpr, mappings);
 		if (err != 0) {
 			nvgpu_err(g, "cannot map ctx pagepool buffer");
 			goto fail;
 		}
 #ifdef CONFIG_NVGPU_GRAPHICS
 		/*
 		 * RTV circular buffer. Note that this is non-VPR buffer always.
 		 */
 		if (nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer,
 				NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER)) {
 			err  = nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
 					global_ctx_buffer,
 					NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER_VA,
 					NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER,
 					NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER,
 					false, mappings);
 			if (err != 0) {
 				nvgpu_err(g,
 					"cannot map ctx rtv circular buffer");
 				goto fail;
 			}
 		}
 #endif
 	}
 	/* Priv register Access Map. Note that this is non-VPR buffer always. */
 	err  = nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
 			global_ctx_buffer,
 			NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_VA,
 			NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP,
 			NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP,
 			false, mappings);
 	if (err != 0) {
 		nvgpu_err(g, "cannot map ctx priv access buffer");
 		goto fail;
 	}
 #ifdef CONFIG_NVGPU_FECS_TRACE
 	/* FECS trace buffer. Note that this is non-VPR buffer always. */
 	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
 		err  = nvgpu_gr_ctx_mappings_map_global_ctx_buffer(
 			global_ctx_buffer,
 			NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER_VA,
 			NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER,
 			NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER,
 			false, mappings);
 		if (err != 0) {
 			nvgpu_err(g, "cannot map ctx fecs trace buffer");
 			goto fail;
 		}
 	}
 #endif
 	return 0;
 fail:
 	nvgpu_gr_ctx_mappings_unmap_global_ctx_buffers(
 		global_ctx_buffer, mappings);
 	return err;
 }
 int nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
 	struct nvgpu_gr_ctx_mappings *mappings,
 	bool vpr)
 {
 	int err;
 	nvgpu_log(g, gpu_dbg_gr, " ");
 	if (gr_ctx == NULL || global_ctx_buffer == NULL ||
 	    mappings == NULL) {
 		nvgpu_err(g, "mappings/gr_ctx/global_ctx_buffer struct null");
 		return -EINVAL;
 	}
 	err = nvgpu_gr_ctx_mappings_map_ctx_buffers(g, gr_ctx, mappings);
 	if (err != 0) {
 		nvgpu_err(g, "fail to map ctx buffers");
 		return err;
 	}
 	err = nvgpu_gr_ctx_mappings_map_global_ctx_buffers(g,
 			global_ctx_buffer, mappings, vpr);
 	if (err != 0) {
 		nvgpu_err(g, "fail to map global ctx buffer");
 		nvgpu_gr_ctx_mappings_unmap_ctx_buffers(gr_ctx, mappings);
 		return err;
 	}
 	nvgpu_log(g, gpu_dbg_gr, "done");
 	return err;
 }
 void nvgpu_gr_ctx_unmap_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
 	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	nvgpu_log(g, gpu_dbg_gr, " ");
 	nvgpu_gr_ctx_mappings_unmap_global_ctx_buffers(global_ctx_buffer,
 		mappings);
 	nvgpu_gr_ctx_mappings_unmap_ctx_buffers(gr_ctx, mappings);
 	nvgpu_log(g, gpu_dbg_gr, "done");
 }
 u64 nvgpu_gr_ctx_mappings_get_global_ctx_va(struct nvgpu_gr_ctx_mappings *mappings,
 	u32 index)
 {
 	nvgpu_assert(index < NVGPU_GR_GLOBAL_CTX_VA_COUNT);
 	return mappings->global_ctx_buffer_va[index];
 }
 u64 nvgpu_gr_ctx_mappings_get_ctx_va(struct nvgpu_gr_ctx_mappings *mappings,
 	u32 index)
 {
 	nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
 	return mappings->ctx_buffer_va[index];
 }
--- a/drivers/gpu/nvgpu/common/gr/ctx_mappings_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/ctx_mappings_priv.h
@@ -0,0 +1,57 @@
 /*
 * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
 #ifndef NVGPU_GR_CTX_MAPPINGS_PRIV_H
 #define NVGPU_GR_CTX_MAPPINGS_PRIV_H
 #include <nvgpu/types.h>
 struct nvgpu_tsg;
 struct vm_gk20a;
 struct nvgpu_gr_ctx_mappings {
 	/** TSG whose gr ctx mappings are tracked in this object */
 	struct nvgpu_tsg *tsg;
 	/** GPU virtual address space to which gr ctx buffers are mapped */
 	struct vm_gk20a *vm;
 	/**
 	 * Array to store GPU virtual addresses of all TSG context
 	 * buffers.
 	 */
 	u64	ctx_buffer_va[NVGPU_GR_CTX_COUNT];
 	/**
 	 * Array to store GPU virtual addresses of all global context
 	 * buffers.
 	 */
 	u64	global_ctx_buffer_va[NVGPU_GR_GLOBAL_CTX_VA_COUNT];
 	/**
 	 * Array to store indexes of global context buffers
 	 * corresponding to GPU virtual addresses above.
 	 */
 	u32	global_ctx_buffer_index[NVGPU_GR_GLOBAL_CTX_VA_COUNT];
 };
 #endif /* NVGPU_GR_CTX_MAPPINGS_PRIV_H */
--- a/drivers/gpu/nvgpu/common/gr/ctx_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/ctx_priv.h
@@ -31,11 +31,6 @@ struct nvgpu_mem;
 * Pointer to this structure is maintained in #nvgpu_gr_ctx structure.
 */
 struct patch_desc {
 	/**
 	 * Memory to hold patch context buffer.
 	 */
 	struct nvgpu_mem mem;
 	/**
 	 * Count of entries written into patch context buffer.
 	 */
@@ -51,9 +46,8 @@ struct zcull_ctx_desc {
 #ifdef CONFIG_NVGPU_DEBUGGER
 struct pm_ctx_desc {
 	struct nvgpu_mem mem;
 	u64 gpu_va;
 	u32 pm_mode;
 	bool mapped;
 };
 #endif
@@ -100,17 +94,21 @@ struct nvgpu_gr_ctx {
 	bool ctx_id_valid;
 	/**
-	 * Memory to hold graphics context buffer.
+	 * Array to store all GR context buffers.
 	 */
-	struct nvgpu_mem mem;
+	struct nvgpu_mem mem[NVGPU_GR_CTX_COUNT];
-#ifdef CONFIG_NVGPU_GFXP
+	/**
-	struct nvgpu_mem preempt_ctxsw_buffer;
+	 * Cacheability flags for mapping the context buffers.
-	struct nvgpu_mem spill_ctxsw_buffer;
+	 */
-	struct nvgpu_mem betacb_ctxsw_buffer;
+	u32 mapping_flags[NVGPU_GR_CTX_COUNT];
-	struct nvgpu_mem pagepool_ctxsw_buffer;
+
-	struct nvgpu_mem gfxp_rtvcb_ctxsw_buffer;
+	/**
-#endif
+	 * Pointer to structure that holds GPU mapping of context buffers.
 	 * These mappings will exist for the lifetime of TSG when the
 	 * subcontexts are not enabled.
 	 */
 	struct nvgpu_gr_ctx_mappings *mappings;
 	/**
 	 * Patch context buffer descriptor struct.
@@ -146,18 +144,6 @@ struct nvgpu_gr_ctx {
 	bool boosted_ctx;
 #endif
 	/**
 	 * Array to store GPU virtual addresses of all global context
 	 * buffers.
 	 */
 	u64	global_ctx_buffer_va[NVGPU_GR_GLOBAL_CTX_VA_COUNT];
 	/**
 	 * Array to store indexes of global context buffers
 	 * corresponding to GPU virtual addresses above.
 	 */
 	u32	global_ctx_buffer_index[NVGPU_GR_GLOBAL_CTX_VA_COUNT];
 	/**
 	 * TSG identifier corresponding to the graphics context.
 	 */
--- a/drivers/gpu/nvgpu/common/gr/fecs_trace.c
+++ b/drivers/gpu/nvgpu/common/gr/fecs_trace.c
@@ -31,6 +31,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/fecs_trace.h>
 #include <nvgpu/gr/gr_utils.h>
@@ -607,7 +608,8 @@ int nvgpu_gr_fecs_trace_reset(struct gk20a *g)
 */
 int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g,
 	struct nvgpu_mem *inst_block, struct nvgpu_gr_subctx *subctx,
-	struct nvgpu_gr_ctx *gr_ctx, pid_t pid, u32 vmid)
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
 	pid_t pid, u32 vmid)
 {
 	u64 addr = 0ULL;
 	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
@@ -636,7 +638,7 @@ int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g,
 	}
 	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
-		addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+		addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 				NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER_VA);
 		nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
 		aperture_mask = 0;
@@ -650,7 +652,7 @@ int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g,
 		return -ENOMEM;
 	}
-	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
+	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX);
 	nvgpu_log(g, gpu_dbg_ctxsw, "addr=%llx count=%d", addr,
 		GK20A_FECS_TRACE_NUM_RECORDS);
--- a/drivers/gpu/nvgpu/common/gr/global_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/global_ctx.c
@@ -265,7 +265,6 @@ fail:
 	return err;
 }
 int nvgpu_gr_global_ctx_buffer_alloc(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *desc)
 {
@@ -315,9 +314,32 @@ clean_up:
 	return err;
 }
 void nvgpu_gr_global_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *desc)
 {
 	u32 i;
 	nvgpu_log(g, gpu_dbg_gr, " ");
 	/**
 	 * Map all ctx buffers as cacheable except PRIV_ACCESS_MAP,
 	 * RTV_CIRCULAR_BUFFER and FECS_TRACE buffers.
 	 */
 	for (i = 0; i < NVGPU_GR_GLOBAL_CTX_COUNT; i++) {
 		desc[i].mapping_flags = NVGPU_VM_MAP_CACHEABLE;
 	}
 	desc[NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP].mapping_flags = 0U;
 	desc[NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER].mapping_flags = 0U;
 #ifdef CONFIG_NVGPU_FECS_TRACE
 	desc[NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER].mapping_flags = 0U;
 #endif
 	nvgpu_log(g, gpu_dbg_gr, "done");
 }
 u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc,
-	u32 index,
+	u32 index, struct vm_gk20a *vm, bool priv)
 	struct vm_gk20a *vm, u32 flags, bool priv)
 {
 	u64 gpu_va;
@@ -326,7 +348,7 @@ u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc,
 	}
 	gpu_va = nvgpu_gmmu_map(vm, &desc[index].mem,
-			flags, gk20a_mem_flag_none, priv,
+			desc[index].mapping_flags, gk20a_mem_flag_none, priv,
 			desc[index].mem.aperture);
 	return gpu_va;
 }
--- a/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -40,6 +40,11 @@ struct nvgpu_gr_global_ctx_buffer_desc {
 	 */
 	size_t size;
 	/**
 	 * Cacheability flags for mapping this context buffer.
 	 */
 	u32 mapping_flags;
 	/**
 	 * Function pointer to free global context buffer.
 	 */
--- a/drivers/gpu/nvgpu/common/gr/gr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr.c
@@ -131,6 +131,9 @@ static int gr_alloc_global_ctx_buffers(struct gk20a *g, struct nvgpu_gr *gr)
 		return err;
 	}
 	nvgpu_gr_global_ctx_init_ctx_buffers_mapping_flags(g,
 		gr->global_ctx_buffer);
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
 	return 0;
 }
--- a/drivers/gpu/nvgpu/common/gr/gr_setup.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_setup.c
@@ -23,6 +23,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/obj_ctx.h>
 #ifdef CONFIG_NVGPU_GRAPHICS
@@ -163,6 +164,7 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 	struct nvgpu_tsg *tsg = NULL;
 	int err = 0;
 	struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
 	struct nvgpu_gr_ctx_mappings *mappings = NULL;
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr,
 		"GR%u: allocate object context for channel %u",
@@ -203,18 +205,27 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 	gr_ctx = tsg->gr_ctx;
-	if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx))) {
+	mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, c->vm);
 	if (mappings == NULL) {
 		nvgpu_err(g, "fail to allocate/get ctx mappings struct");
 		nvgpu_mutex_release(&tsg->ctx_init_lock);
 		goto out;
 	}
 	if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx,
 							 NVGPU_GR_CTX_CTX))) {
 		tsg->vm = c->vm;
 		nvgpu_vm_get(tsg->vm);
 		err = nvgpu_gr_obj_ctx_alloc(g, gr->golden_image,
 				gr->global_ctx_buffer, gr->gr_ctx_desc,
 				gr->config, gr_ctx, c->subctx,
-				tsg->vm, &c->inst_block, class_num, flags,
+				mappings, &c->inst_block, class_num, flags,
 				c->cde, c->vpr);
 		if (err != 0) {
 			nvgpu_err(g,
 				"failed to allocate gr ctx buffer");
 			nvgpu_gr_ctx_free_mappings(g, gr_ctx);
 			nvgpu_mutex_release(&tsg->ctx_init_lock);
 			nvgpu_vm_put(tsg->vm);
 			tsg->vm = NULL;
@@ -225,13 +236,13 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 	} else {
 		/* commit gr ctx buffer */
 		nvgpu_gr_obj_ctx_commit_inst(g, &c->inst_block, gr_ctx,
-			c->subctx, nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va);
+			c->subctx, mappings);
 	}
 #ifdef CONFIG_NVGPU_FECS_TRACE
 	if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
 		err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
-			c->subctx, gr_ctx, tsg->tgid, 0);
+			c->subctx, gr_ctx, mappings, tsg->tgid, 0);
 		if (err != 0) {
 			nvgpu_warn(g,
 				"fail to bind channel for ctxsw trace");
@@ -275,22 +286,27 @@ out:
 }
 void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g,
-		struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
+				struct nvgpu_gr_ctx *gr_ctx)
 {
 	struct nvgpu_mem *mem;
 	nvgpu_log_fn(g, " ");
-	if ((gr_ctx != NULL) &&
+	if (gr_ctx != NULL) {
-		nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx))) {
+		mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX);
 		if (!nvgpu_mem_is_valid(mem)) {
 			return;
 		}
 #ifdef CONFIG_DEBUG_FS
 		if ((g->ops.gr.ctxsw_prog.dump_ctxsw_stats != NULL) &&
 		     nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close(
 					g->gr->gr_ctx_desc)) {
-			g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g,
+			g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g, mem);
 				 nvgpu_gr_ctx_get_ctx_mem(gr_ctx));
 		}
 #endif
-		nvgpu_gr_ctx_free(g, gr_ctx, g->gr->global_ctx_buffer, vm);
+		nvgpu_gr_ctx_free(g, gr_ctx, g->gr->global_ctx_buffer);
 	}
 }
@@ -334,16 +350,14 @@ static bool nvgpu_gr_setup_validate_preemption_mode(u32 *graphics_preempt_mode,
 	return true;
 }
 int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
 		u32 graphics_preempt_mode, u32 compute_preempt_mode,
 		u32 gr_instance_id)
 {
 	struct nvgpu_gr_ctx_mappings *mappings;
 	struct nvgpu_gr_ctx *gr_ctx;
 	struct gk20a *g = ch->g;
 	struct nvgpu_tsg *tsg;
 	struct vm_gk20a *vm;
 	struct nvgpu_gr *gr;
 	u32 class_num;
 	int err = 0;
@@ -365,7 +379,6 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
 		return -EINVAL;
 	}
 	vm = tsg->vm;
 	gr_ctx = tsg->gr_ctx;
 	if (nvgpu_gr_setup_validate_preemption_mode(&graphics_preempt_mode,
@@ -379,13 +392,35 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
 			graphics_preempt_mode, compute_preempt_mode);
 	err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, gr->config,
-			gr->gr_ctx_desc, gr_ctx, vm, class_num,
+			gr->gr_ctx_desc, gr_ctx, class_num,
 			graphics_preempt_mode, compute_preempt_mode);
 	if (err != 0) {
 		nvgpu_err(g, "set_ctxsw_preemption_mode failed");
 		return err;
 	}
 	mappings = nvgpu_gr_ctx_get_mappings(tsg);
 	if (mappings == NULL) {
 		nvgpu_err(g, "failed to get gr_ctx mappings");
 		return -EINVAL;
 	}
 #ifdef CONFIG_NVGPU_GFXP
 	err = nvgpu_gr_ctx_alloc_ctx_preemption_buffers(g,
 			gr->gr_ctx_desc, gr_ctx);
 	if (err != 0) {
 		nvgpu_err(g, "fail to allocate ctx preemption buffers");
 		return err;
 	}
 	err = nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(g,
 			gr_ctx, mappings);
 	if (err != 0) {
 		nvgpu_err(g, "fail to map ctx preemption buffers");
 		return err;
 	}
 #endif
 	g->ops.tsg.disable(tsg);
 	err = nvgpu_preempt_channel(g, ch);
@@ -395,7 +430,7 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
 	}
 	nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, gr->config, gr_ctx,
-		ch->subctx);
+		ch->subctx, mappings);
 	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
 		nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
--- a/drivers/gpu/nvgpu/common/gr/obj_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c
@@ -29,6 +29,7 @@
 #include <nvgpu/power_features/pg.h>
 #endif
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/obj_ctx.h>
@@ -48,19 +49,22 @@ void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g,
 }
 void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block,
-	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, u64 gpu_va)
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx,
 	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	struct nvgpu_mem *ctxheader;
 	u64 gpu_va;
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
 	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
-		nvgpu_gr_subctx_load_ctx_header(g, subctx, gr_ctx, gpu_va);
+		nvgpu_gr_subctx_load_ctx_header(g, subctx, gr_ctx, mappings);
 		ctxheader = nvgpu_gr_subctx_get_ctx_header(subctx);
 		nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block,
 			ctxheader->gpu_va);
 	} else {
 		gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_CTX);
 		nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block, gpu_va);
 	}
@@ -70,7 +74,7 @@ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block,
 #if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP)
 static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g,
 	struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
+	struct nvgpu_gr_ctx *gr_ctx,
 	u32 class_num, u32 flags)
 {
 	int err;
@@ -122,7 +126,7 @@ static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g,
 	}
 	err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config,
-		gr_ctx_desc, gr_ctx, vm, class_num, graphics_preempt_mode,
+		gr_ctx_desc, gr_ctx, class_num, graphics_preempt_mode,
 		compute_preempt_mode);
 	if (err != 0) {
 		nvgpu_err(g, "set_ctxsw_preemption_mode failed");
@@ -138,14 +142,13 @@ static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g,
 #ifdef CONFIG_NVGPU_GRAPHICS
 static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g,
 	struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
+	struct nvgpu_gr_ctx *gr_ctx,
 	u32 graphics_preempt_mode)
 {
 	int err = 0;
 	(void)config;
 	(void)gr_ctx_desc;
 	(void)vm;
 	/* set preemption modes */
 	switch (graphics_preempt_mode) {
@@ -166,6 +169,13 @@ static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g,
 		nvgpu_log_info(g, "gfxp context attrib_cb_size=%d",
 				attrib_cb_size);
 		if ((nvgpu_gr_ctx_get_size(gr_ctx_desc,
 				NVGPU_GR_CTX_PREEMPT_CTXSW) == 0U) ||
 		    (spill_size == 0U) || (attrib_cb_size == 0U) ||
 		    (pagepool_size == 0U)) {
 			return -EINVAL;
 		}
 		nvgpu_gr_ctx_set_size(gr_ctx_desc,
 			NVGPU_GR_CTX_SPILL_CTXSW, spill_size);
 		nvgpu_gr_ctx_set_size(gr_ctx_desc,
@@ -179,13 +189,6 @@ static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g,
 				NVGPU_GR_CTX_GFXP_RTVCB_CTXSW, rtv_cb_size);
 		}
 		err = nvgpu_gr_ctx_alloc_ctxsw_buffers(g, gr_ctx,
 			gr_ctx_desc, vm);
 		if (err != 0) {
 			nvgpu_err(g, "cannot allocate ctxsw buffers");
 			return err;
 		}
 		nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx,
 			graphics_preempt_mode);
 		break;
@@ -226,7 +229,7 @@ static int nvgpu_gr_obj_ctx_set_compute_preemption_mode(struct gk20a *g,
 int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
 	struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class_num,
+	struct nvgpu_gr_ctx *gr_ctx, u32 class_num,
 	u32 graphics_preempt_mode, u32 compute_preempt_mode)
 {
 	int err = 0;
@@ -243,7 +246,7 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
 #ifdef CONFIG_NVGPU_GRAPHICS
 	err = nvgpu_gr_obj_ctx_set_graphics_preemption_mode(g, config,
-				gr_ctx_desc, gr_ctx, vm, graphics_preempt_mode);
+				gr_ctx_desc, gr_ctx, graphics_preempt_mode);
 	if (err != 0) {
 		goto fail;
@@ -259,7 +262,8 @@ fail:
 void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
 	struct nvgpu_gr_config *config,
-	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx)
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx,
 	struct nvgpu_gr_ctx_mappings *mappings)
 {
 #ifdef CONFIG_NVGPU_GFXP
 	u64 addr;
@@ -269,6 +273,7 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
 	(void)config;
 	(void)subctx;
 	(void)mappings;
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
@@ -280,35 +285,35 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
 	}
 	if (!nvgpu_mem_is_valid(
-			nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx))) {
+			nvgpu_gr_ctx_get_ctx_mem(gr_ctx,
 				NVGPU_GR_CTX_PREEMPT_CTXSW))) {
 		goto done;
 	}
 	if (subctx != NULL) {
-		nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx,
+		nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx, mappings);
 			gr_ctx);
 	} else {
-		nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx);
+		nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx, mappings);
 	}
 	nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
-	addr = nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->gpu_va;
+	addr = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_BETACB_CTXSW);
-	g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx,
+	g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, mappings,
 		nvgpu_gr_config_get_tpc_count(config),
 		nvgpu_gr_config_get_max_tpc_count(config), addr,
 		true);
-	mem = nvgpu_gr_ctx_get_pagepool_ctxsw_buffer(gr_ctx);
+	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_PAGEPOOL_CTXSW);
-	addr = mem->gpu_va;
+	addr = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PAGEPOOL_CTXSW);
 	nvgpu_assert(mem->size <= U32_MAX);
 	size = (u32)mem->size;
 	g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size,
 		true, false);
-	mem = nvgpu_gr_ctx_get_spill_ctxsw_buffer(gr_ctx);
+	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_SPILL_CTXSW);
-	addr = mem->gpu_va;
+	addr = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_SPILL_CTXSW);
 	nvgpu_assert(mem->size <= U32_MAX);
 	size = (u32)mem->size;
@@ -321,7 +326,7 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
 	}
 	if (g->ops.gr.init.commit_gfxp_rtv_cb != NULL) {
-		g->ops.gr.init.commit_gfxp_rtv_cb(g, gr_ctx, true);
+		g->ops.gr.init.commit_gfxp_rtv_cb(g, gr_ctx, mappings, true);
 	}
 	nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
@@ -333,7 +338,10 @@ done:
 void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct nvgpu_gr_config *config,	struct nvgpu_gr_ctx *gr_ctx, bool patch)
+	struct nvgpu_gr_config *config,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_mappings *mappings,
 	bool patch)
 {
 	u64 addr;
 	u32 size;
@@ -351,7 +359,7 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 	 */
 	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
 		/* global pagepool buffer */
-		addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+		addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 			NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA);
 		size = nvgpu_safe_cast_u64_to_u32(nvgpu_gr_global_ctx_get_size(
 				global_ctx_buffer,
@@ -361,7 +369,7 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 			patch, true);
 		/* global bundle cb */
-		addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+		addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 			NVGPU_GR_GLOBAL_CTX_CIRCULAR_VA);
 		size = nvgpu_safe_cast_u64_to_u32(
 				g->ops.gr.init.get_bundle_cb_default_size(g));
@@ -370,10 +378,10 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 			patch);
 		/* global attrib cb */
-		addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+		addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 				NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VA);
-		g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx,
+		g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, mappings,
 			nvgpu_gr_config_get_tpc_count(config),
 			nvgpu_gr_config_get_max_tpc_count(config), addr, patch);
@@ -383,7 +391,7 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 #ifdef CONFIG_NVGPU_GRAPHICS
 		if (g->ops.gr.init.commit_rtv_cb != NULL) {
 			/* RTV circular buffer */
-			addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+			addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 				NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER_VA);
 			g->ops.gr.init.commit_rtv_cb(g, addr, gr_ctx, patch);
@@ -546,7 +554,8 @@ clean_up:
 static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx)
+	struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	int err = 0;
 	struct netlist_av_list *sw_method_init =
@@ -562,7 +571,7 @@ static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g,
 	g->ops.gr.init.fe_go_idle_timeout(g, false);
 	nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer,
-		config, gr_ctx, false);
+		config, gr_ctx, mappings, false);
 	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
 		/* override a few ctx state registers */
@@ -635,7 +644,7 @@ static int nvgpu_gr_obj_ctx_save_golden_ctx(struct gk20a *g,
 	nvgpu_log(g, gpu_dbg_gr, " ");
-	gr_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
+	gr_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX);
 	size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image);
 #ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
@@ -707,6 +716,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
 	struct nvgpu_gr_config *config,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_mappings *mappings,
 	struct nvgpu_mem *inst_block)
 {
 	int err = 0;
@@ -731,7 +741,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
 	}
 	err = nvgpu_gr_obj_ctx_commit_hw_state(g, global_ctx_buffer,
-							config, gr_ctx);
+					       config, gr_ctx, mappings);
 	if (err != 0) {
 		goto clean_up;
 	}
@@ -778,27 +788,71 @@ clean_up:
 	return err;
 }
-static int nvgpu_gr_obj_ctx_gr_ctx_alloc(struct gk20a *g,
+static void nvgpu_gr_obj_ctx_gr_ctx_set_size(struct gk20a *g,
 	struct nvgpu_gr_obj_ctx_golden_image *golden_image,
-	struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_ctx_desc *gr_ctx_desc)
 	struct vm_gk20a *vm)
 {
 	u64 size;
 	int err = 0;
 	nvgpu_log_fn(g, " ");
 	size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image);
 	nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_CTX,
 		nvgpu_safe_cast_u64_to_u32(size));
 	nvgpu_log(g, gpu_dbg_gr, "gr_ctx size = %llu", size);
-	err = nvgpu_gr_ctx_alloc(g, gr_ctx, gr_ctx_desc, vm);
+}
 static void nvgpu_gr_obj_ctx_patch_ctx_set_size(struct gk20a *g,
 	struct nvgpu_gr_config *config,
 	struct nvgpu_gr_ctx_desc *gr_ctx_desc)
 {
 	u32 size;
 	size = nvgpu_safe_mult_u32(
 			g->ops.gr.init.get_patch_slots(g, config),
 			PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY);
 	nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_PATCH_CTX, size);
 	nvgpu_log(g, gpu_dbg_gr, "patch_ctx size = %u", size);
 }
 static int nvgpu_gr_obj_ctx_alloc_buffers(struct gk20a *g,
 	struct nvgpu_gr_obj_ctx_golden_image *golden_image,
 	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
 	struct nvgpu_gr_config *config,
 	struct nvgpu_gr_ctx *gr_ctx,
 	u32 class_num, u32 flags)
 {
 	int err;
 	(void)class_num;
 	(void)flags;
 	nvgpu_log(g, gpu_dbg_gr, " ");
 	nvgpu_gr_obj_ctx_gr_ctx_set_size(g, golden_image, gr_ctx_desc);
 	nvgpu_gr_obj_ctx_patch_ctx_set_size(g, config, gr_ctx_desc);
 	nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0);
 #if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP)
 	err = nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, config,
 		gr_ctx_desc, gr_ctx, class_num, flags);
 	if (err != 0) {
 		nvgpu_err(g, "fail to init preemption mode");
 		return err;
 	}
 #endif
 	err = nvgpu_gr_ctx_alloc_ctx_buffers(g, gr_ctx_desc, gr_ctx);
 	if (err != 0) {
 		nvgpu_err(g, "fail to allocate ctx buffers");
 		return err;
 	}
-	return 0;
+	nvgpu_log(g, gpu_dbg_gr, "done");
 	return err;
 }
 int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
@@ -808,69 +862,40 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
 	struct nvgpu_gr_config *config,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_subctx *subctx,
-	struct vm_gk20a *vm,
+	struct nvgpu_gr_ctx_mappings *mappings,
 	struct nvgpu_mem *inst_block,
 	u32 class_num, u32 flags,
 	bool cde, bool vpr)
 {
 	int err = 0;
 	(void)class_num;
 	(void)flags;
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
-	err = nvgpu_gr_obj_ctx_gr_ctx_alloc(g, golden_image, gr_ctx_desc,
+	err = nvgpu_gr_obj_ctx_alloc_buffers(g, golden_image, gr_ctx_desc,
-		gr_ctx, vm);
+					     config, gr_ctx, class_num, flags);
 	if (err != 0) {
-		nvgpu_err(g, "fail to allocate TSG gr ctx buffer");
+		nvgpu_err(g, "failed to alloc ctx buffers");
 		goto out;
 	}
-	/* allocate patch buffer */
+	nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(g, gr_ctx);
 	if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx))) {
 		nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0);
-		nvgpu_gr_ctx_set_size(gr_ctx_desc,
+	err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx,
-			NVGPU_GR_CTX_PATCH_CTX,
+			global_ctx_buffer, mappings, vpr);
 			nvgpu_safe_mult_u32(
 				g->ops.gr.init.get_patch_slots(g, config),
 				PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY));
 		err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, gr_ctx_desc, vm);
 	if (err != 0) {
-			nvgpu_err(g, "fail to allocate patch buffer");
+		nvgpu_err(g, "failed to map ctx buffers");
 			goto out;
 		}
 	}
 #if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP)
 	err = nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, config,
 		gr_ctx_desc, gr_ctx, vm, class_num, flags);
 	if (err != 0) {
 		nvgpu_err(g, "fail to init preemption mode");
 		goto out;
 	}
 #endif
 	/* map global buffer to channel gpu_va and commit */
 	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx,
 			global_ctx_buffer, vm, vpr);
 	if (err != 0) {
 		nvgpu_err(g, "fail to map global ctx buffer");
 		goto out;
 	}
 	nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer,
-			config, gr_ctx, true);
+			config, gr_ctx, mappings, true);
 	/* commit gr ctx buffer */
-	nvgpu_gr_obj_ctx_commit_inst(g, inst_block, gr_ctx, subctx,
+	nvgpu_gr_obj_ctx_commit_inst(g, inst_block, gr_ctx, subctx, mappings);
 			nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va);
 	/* init golden image */
 	err = nvgpu_gr_obj_ctx_alloc_golden_ctx_image(g, golden_image,
-		global_ctx_buffer, config, gr_ctx, inst_block);
+		global_ctx_buffer, config, gr_ctx, mappings, inst_block);
 	if (err != 0) {
 		nvgpu_err(g, "fail to init golden ctx image");
 		goto out;
@@ -890,11 +915,11 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
 #endif
 	/* load golden image */
-	nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx,
+	nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx, mappings,
 		golden_image->local_golden_image, cde);
 	nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, config, gr_ctx,
-		subctx);
+		subctx, mappings);
 #ifndef CONFIG_NVGPU_NON_FUSA
 	if (g->ops.gpu_class.is_valid_compute(class_num) &&
--- a/drivers/gpu/nvgpu/common/gr/subctx.c
+++ b/drivers/gpu/nvgpu/common/gr/subctx.c
@@ -23,6 +23,7 @@
 #include <nvgpu/gk20a.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/gmmu.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/power_features/pg.h>
@@ -81,23 +82,27 @@ void nvgpu_gr_subctx_free(struct gk20a *g,
 void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
 	struct nvgpu_gr_subctx *subctx,
-	struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va)
+	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_mappings *mappings)
 {
 	struct nvgpu_mem *ctxheader = &subctx->ctx_header;
 	u64 gpu_va;
 	gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_CTX);
 #ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP
 	/* set priv access map */
 	g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, ctxheader,
-		nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+		nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
 			NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_VA));
 #endif
 	g->ops.gr.ctxsw_prog.set_patch_addr(g, ctxheader,
-		nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx)->gpu_va);
+		nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PATCH_CTX));
 #ifdef CONFIG_NVGPU_DEBUGGER
 	g->ops.gr.ctxsw_prog.set_pm_ptr(g, ctxheader,
-		nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va);
+		nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PM_CTX));
 #endif
 #ifdef CONFIG_NVGPU_GRAPHICS
@@ -129,24 +134,26 @@ void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx
 #ifdef CONFIG_NVGPU_GFXP
 void nvgpu_gr_subctx_set_preemption_buffer_va(struct gk20a *g,
-	struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx)
+	struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings)
 {
 	u64 preempt_ctxsw_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
 						NVGPU_GR_CTX_PREEMPT_CTXSW);
 	g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, &subctx->ctx_header,
-		nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va);
+				preempt_ctxsw_gpu_va);
 	if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) {
 		g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g,
-			&subctx->ctx_header,
+			&subctx->ctx_header, preempt_ctxsw_gpu_va);
 			nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va);
 	}
 }
 #endif /* CONFIG_NVGPU_GFXP */
 #ifdef CONFIG_NVGPU_DEBUGGER
 void nvgpu_gr_subctx_set_hwpm_ptr(struct gk20a *g,
-	struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx)
+	struct nvgpu_gr_subctx *subctx, u64 pm_ctx_gpu_va)
 {
 	g->ops.gr.ctxsw_prog.set_pm_ptr(g, &subctx->ctx_header,
-		nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va);
+			pm_ctx_gpu_va);
 }
 #endif
--- a/drivers/gpu/nvgpu/common/profiler/profiler.c
+++ b/drivers/gpu/nvgpu/common/profiler/profiler.c
@@ -506,8 +506,7 @@ static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct gk20a *g,
 		return -EINVAL;
 	}
-	pm_ctx_mem = nvgpu_gr_ctx_get_pm_ctx_mem(tsg->gr_ctx);
+	pm_ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_PM_CTX);
 	if (pm_ctx_mem == NULL) {
 		nvgpu_err(g, "No PM context");
 		return -EINVAL;
--- a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c
@@ -1,7 +1,7 @@
 /*
 * Virtualized GPU Graphics
 *
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -41,7 +41,7 @@
 #include "common/vgpu/ivc/comm_vgpu.h"
 void vgpu_gr_free_gr_ctx(struct gk20a *g,
-			 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
+			 struct nvgpu_gr_ctx *gr_ctx)
 {
 	struct tegra_vgpu_cmd_msg msg;
 	struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
--- a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -28,6 +28,6 @@ struct nvgpu_gr_ctx;
 struct vm_gk20a;
 void vgpu_gr_free_gr_ctx(struct gk20a *g,
-			 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
+			 struct nvgpu_gr_ctx *gr_ctx);
 #endif
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
@@ -85,7 +85,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 	struct nvgpu_channel *ch;
 	struct nvgpu_gr_ctx *gr_ctx;
 	bool skip_update = false;
-	int err;
+	u64 pm_ctx_gpu_va = 0ULL;
 	int ret;
 	struct nvgpu_gr *gr = nvgpu_gr_get_instance_ptr(g, gr_instance_id);
@@ -94,15 +94,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 	gr_ctx = tsg->gr_ctx;
 	if (mode != NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW) {
-		nvgpu_gr_ctx_set_size(gr->gr_ctx_desc,
+		ret = nvgpu_gr_ctx_alloc_map_pm_ctx(g, tsg,
-			NVGPU_GR_CTX_PM_CTX,
+				gr->gr_ctx_desc, gr->hwpm_map);
 			nvgpu_gr_hwpm_map_get_size(gr->hwpm_map));
 		ret = nvgpu_gr_ctx_alloc_pm_ctx(g, gr_ctx,
 			gr->gr_ctx_desc, tsg->vm);
 		if (ret != 0) {
 			nvgpu_err(g,
-				"failed to allocate pm ctxt buffer");
+				"failed to allocate and map pm ctxt buffer");
 			return ret;
 		}
@@ -112,7 +108,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 		}
 	}
-	ret = nvgpu_gr_ctx_prepare_hwpm_mode(g, gr_ctx, mode, &skip_update);
+	ret = nvgpu_gr_ctx_prepare_hwpm_mode(g, gr_ctx, mode,
 					     &pm_ctx_gpu_va, &skip_update);
 	if (ret != 0) {
 		return ret;
 	}
@@ -134,11 +131,12 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 		nvgpu_rwsem_down_read(&tsg->ch_list_lock);
 		nvgpu_list_for_each_entry(ch, &tsg->ch_list,
 					  nvgpu_channel, ch_entry) {
-			nvgpu_gr_subctx_set_hwpm_ptr(g, ch->subctx, gr_ctx);
+			nvgpu_gr_subctx_set_hwpm_ptr(g, ch->subctx,
 				pm_ctx_gpu_va);
 		}
 		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
 	} else {
-		nvgpu_gr_ctx_set_hwpm_ptr(g, gr_ctx);
+		nvgpu_gr_ctx_set_hwpm_ptr(g, gr_ctx, pm_ctx_gpu_va);
 	}
 out:
@@ -1523,7 +1521,8 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 				if (!gr_ctx_ready) {
 					gr_ctx_ready = true;
 				}
-				current_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
+				current_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx,
 							NVGPU_GR_CTX_CTX);
 			} else {
 				err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
 							ctx_ops[i].offset,
@@ -1539,7 +1538,9 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 				}
 				if (!pm_ctx_ready) {
 					/* Make sure ctx buffer was initialized */
-					if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx))) {
+					if (!nvgpu_mem_is_valid(
 						nvgpu_gr_ctx_get_ctx_mem(gr_ctx,
 							NVGPU_GR_CTX_PM_CTX))) {
 						nvgpu_err(g,
 							"Invalid ctx buffer");
 						err = -EINVAL;
@@ -1547,14 +1548,16 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 					}
 					pm_ctx_ready = true;
 				}
-				current_mem = nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx);
+				current_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx,
 						NVGPU_GR_CTX_PM_CTX);
 			}
 			for (j = 0; j < num_offsets; j++) {
 				/* sanity check gr ctxt offsets,
 				 * don't write outside, worst case
 				 */
-				if ((current_mem == nvgpu_gr_ctx_get_ctx_mem(gr_ctx)) &&
+				if ((current_mem == nvgpu_gr_ctx_get_ctx_mem(
 					gr_ctx, NVGPU_GR_CTX_CTX)) &&
 						(offsets[j] >=
 						 nvgpu_gr_obj_ctx_get_golden_image_size(
 							gr->golden_image))) {
@@ -1581,7 +1584,9 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 							   offsets[j] + 4U, v);
 					}
-					if (current_mem == nvgpu_gr_ctx_get_ctx_mem(gr_ctx) &&
+					if (current_mem ==
 						nvgpu_gr_ctx_get_ctx_mem(
 							gr_ctx, NVGPU_GR_CTX_CTX) &&
 							g->ops.gr.ctx_patch_smpc != NULL) {
 						/* check to see if we need to add a special fix
 						   for some of the SMPC perf regs */
@@ -1617,7 +1622,7 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 		nvgpu_kfree(g, offsets);
 	}
-	if (nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx)->cpu_va != NULL) {
+	if (nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_PATCH_CTX)->cpu_va != NULL) {
 		nvgpu_gr_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready);
 	}
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c
@@ -394,7 +394,7 @@ int gr_gm20b_update_pc_sampling(struct nvgpu_channel *c,
 	}
 	gr_ctx = tsg->gr_ctx;
-	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
+	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX);
 	if (!nvgpu_mem_is_valid(mem) || c->vpr) {
 		return -EINVAL;
 	}
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c
@@ -767,7 +767,7 @@ int gr_gp10b_set_boosted_ctx(struct nvgpu_channel *ch,
 	gr_ctx = tsg->gr_ctx;
 	nvgpu_gr_ctx_set_boosted_ctx(gr_ctx, boost);
-	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
+	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX);
 	err = nvgpu_channel_disable_tsg(g, ch);
 	if (err != 0) {
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -34,6 +34,7 @@
 struct gk20a;
 struct nvgpu_gr_ctx;
 struct nvgpu_gr_ctx_mappings;
 struct nvgpu_gr_config;
 struct netlist_av_list;
 struct nvgpu_gr_config;
@@ -71,8 +72,8 @@ u32 gm20b_gr_init_get_global_ctx_cb_buffer_size(struct gk20a *g);
 u32 gm20b_gr_init_get_global_ctx_pagepool_buffer_size(struct gk20a *g);
 void gm20b_gr_init_commit_global_attrib_cb(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, u32 tpc_count, u32 max_tpc, u64 addr,
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
-	bool patch);
+	u32 tpc_count, u32 max_tpc, u64 addr, bool patch);
 u32 gm20b_gr_init_get_patch_slots(struct gk20a *g,
 	struct nvgpu_gr_config *config);
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b_fusa.c
@@ -402,13 +402,14 @@ u32 gm20b_gr_init_get_global_ctx_pagepool_buffer_size(struct gk20a *g)
 }
 void gm20b_gr_init_commit_global_attrib_cb(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, u32 tpc_count, u32 max_tpc, u64 addr,
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
-	bool patch)
+	u32 tpc_count, u32 max_tpc, u64 addr, bool patch)
 {
 	u32 cb_addr;
 	(void)tpc_count;
 	(void)max_tpc;
 	(void)mappings;
 	addr = addr >> gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v();
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h
@@ -34,6 +34,7 @@
 struct gk20a;
 struct nvgpu_gr_config;
 struct nvgpu_gr_ctx;
 struct nvgpu_gr_ctx_mappings;
 struct netlist_av_list;
 struct nvgpu_gr_obj_ctx_gfx_regs;
@@ -69,8 +70,8 @@ u32 gv11b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
 	u32 max_tpc);
 void gv11b_gr_init_commit_global_attrib_cb(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, u32 tpc_count, u32 max_tpc, u64 addr,
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
-	bool patch);
+	u32 tpc_count, u32 max_tpc, u64 addr, bool patch);
 void gv11b_gr_init_fe_go_idle_timeout(struct gk20a *g, bool enable);
 #ifdef CONFIG_NVGPU_SM_DIVERSITY
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c
@@ -27,6 +27,7 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/static_analysis.h>
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/ltc.h>
 #include <nvgpu/netlist.h>
@@ -711,21 +712,21 @@ u32 gv11b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
 }
 void gv11b_gr_init_commit_global_attrib_cb(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, u32 tpc_count, u32 max_tpc, u64 addr,
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
-	bool patch)
+	u32 tpc_count, u32 max_tpc, u64 addr, bool patch)
 {
 	u32 attrBufferSize;
 	u32 cb_addr;
-	gm20b_gr_init_commit_global_attrib_cb(g, gr_ctx, tpc_count, max_tpc,
+	gm20b_gr_init_commit_global_attrib_cb(g, gr_ctx, mappings, tpc_count,
-		addr, patch);
+		max_tpc, addr, patch);
 	addr = addr >> gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v();
 #ifdef CONFIG_NVGPU_GFXP
-	if (nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va != 0ULL) {
+	if (nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PREEMPT_CTXSW) != 0ULL) {
 		attrBufferSize = nvgpu_safe_cast_u64_to_u32(
-			nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->size);
+			nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_BETACB_CTXSW)->size);
 	} else {
 #endif
 		attrBufferSize = g->ops.gr.init.get_global_attr_cb_size(g,
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c
@@ -26,6 +26,7 @@
 #include <nvgpu/static_analysis.h>
 #include <nvgpu/netlist.h>
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/ctx_mappings.h>
 #include "gr_init_gm20b.h"
 #include "gr_init_tu104.h"
@@ -194,12 +195,13 @@ void tu104_gr_init_commit_rtv_cb(struct gk20a *g, u64 addr,
 #ifdef CONFIG_NVGPU_GFXP
 void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, bool patch)
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
 	bool patch)
 {
 	u64 addr;
 	u64 gpu_va;
 	u32 rtv_cb_size;
 	u32 gfxp_addr_size;
 	struct nvgpu_mem *buf_mem;
 	nvgpu_log_fn(g, " ");
@@ -211,9 +213,8 @@ void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g,
 	gfxp_addr_size = gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f();
 	/* GFXP RTV circular buffer */
-	buf_mem = nvgpu_gr_ctx_get_gfxp_rtvcb_ctxsw_buffer(gr_ctx);
+	gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_GFXP_RTVCB_CTXSW);
-	addr = buf_mem->gpu_va >>
+	addr = gpu_va >> gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f();
 			gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f();
 	nvgpu_assert(u64_hi32(addr) == 0U);
 	tu104_gr_init_patch_rtv_cb(g, gr_ctx, (u32)addr,
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -28,6 +28,7 @@
 struct gk20a;
 struct nvgpu_gr_ctx;
 struct netlist_av64_list;
 struct nvgpu_gr_ctx_mappings;
 u32 tu104_gr_init_get_bundle_cb_default_size(struct gk20a *g);
 u32 tu104_gr_init_get_min_gpm_fifo_depth(struct gk20a *g);
@@ -44,7 +45,8 @@ void tu104_gr_init_commit_rtv_cb(struct gk20a *g, u64 addr,
 	struct nvgpu_gr_ctx *gr_ctx, bool patch);
 void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, bool patch);
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
 	bool patch);
 u32 tu104_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g);
 u32 tu104_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g);
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
@@ -34,6 +34,7 @@ struct vm_gk20a;
 struct nvgpu_mem;
 struct nvgpu_channel;
 struct nvgpu_gr_ctx;
 struct nvgpu_gr_ctx_mappings;
 struct nvgpu_gr_config;
 struct nvgpu_gr_isr_data;
 struct nvgpu_gr_intr_info;
@@ -188,7 +189,6 @@ struct gops_gr_setup {
 	 * @brief Free GR engine context image.
 	 *
 	 * @param g [in]	Pointer to GPU driver struct.
 	 * @param vm [in]	Pointer to virtual memory.
 	 * @param gr_ctx [in]	Pointer to GR engine context image.
 	 *
 	 * This function will free memory allocated for patch
@@ -199,7 +199,6 @@ struct gops_gr_setup {
 	 * @see nvgpu_gr_setup_free_gr_ctx
 	 */
 	void (*free_gr_ctx)(struct gk20a *g,
 			    struct vm_gk20a *vm,
 			    struct nvgpu_gr_ctx *gr_ctx);
 	/**
@@ -849,6 +848,7 @@ struct gops_gr_init {
 				       bool patch, bool global_ctx);
 	void (*commit_global_attrib_cb)(struct gk20a *g,
 					struct nvgpu_gr_ctx *ch_ctx,
 					struct nvgpu_gr_ctx_mappings *mappings,
 					u32 tpc_count, u32 max_tpc,
 					u64 addr, bool patch);
 	void (*commit_global_cb_manager)(struct gk20a *g,
@@ -899,6 +899,7 @@ struct gops_gr_init {
 				    bool patch);
 	void (*commit_gfxp_rtv_cb)(struct gk20a *g,
 				   struct nvgpu_gr_ctx *gr_ctx,
 				   struct nvgpu_gr_ctx_mappings *mappings,
 				   bool patch);
 	u32 (*get_attrib_cb_gfxp_default_size)(struct gk20a *g);
 	u32 (*get_attrib_cb_gfxp_size)(struct gk20a *g);
@@ -1109,6 +1110,7 @@ struct gops_gr_fecs_trace {
 			    struct nvgpu_mem *inst_block,
 			    struct nvgpu_gr_subctx *subctx,
 			    struct nvgpu_gr_ctx *gr_ctx,
 			    struct nvgpu_gr_ctx_mappings *mappings,
 			    pid_t pid, u32 vmid);
 	int (*unbind_channel)(struct gk20a *g,
 			      struct nvgpu_mem *inst_block);
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h
@@ -26,6 +26,7 @@
 #include <nvgpu/types.h>
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/hwpm_map.h>
 #define NVGPU_INVALID_SM_CONFIG_ID (U32_MAX)
@@ -39,7 +40,9 @@
 */
 struct gk20a;
 struct vm_gk20a;
 struct nvgpu_tsg;
 struct nvgpu_gr_ctx;
 struct nvgpu_gr_ctx_mappings;
 struct nvgpu_gr_global_ctx_buffer_desc;
 struct nvgpu_gr_global_ctx_local_golden_image;
 struct patch_desc;
@@ -156,25 +159,19 @@ void nvgpu_gr_ctx_set_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc,
 	u32 index, u32 size);
 /**
- * @brief Allocate graphics context buffer.
+ * @brief Get size of GR context buffer with given index.
 *
- * @param g [in]		Pointer to GPU driver struct.
+ * @param desc [in]		Pointer to context descriptor struct.
- * @param gr_ctx [in]		Pointer to graphics context struct.
+ * @param index [in]		Index of GR context buffer.
 * @param gr_ctx_desc [in]	Pointer to context descriptor struct.
 * @param vm [in]		Pointer to virtual memory.
 *
- * This function allocates memory for graphics context buffer and also
+ * @return size of the buffer.
 * maps it to given virtual memory.
 *
- * @return 0 in case of success, < 0 in case of failure.
+ * This function returns the size of GR context buffer with given buffer
- * @retval -ENOMEM if context memory allocation fails.
+ * index. \a index must be less than NVGPU_GR_CTX_COUNT otherwise
- * @retval -EINVAL if context buffer size is not set in
+ * an assert is raised.
 *         #nvgpu_gr_ctx_desc struct.
 */
-int nvgpu_gr_ctx_alloc(struct gk20a *g,
+u32 nvgpu_gr_ctx_get_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct nvgpu_gr_ctx *gr_ctx,
+	u32 index);
 	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
 	struct vm_gk20a *vm);
 /**
 * @brief Free graphics context buffer.
@@ -182,93 +179,14 @@ int nvgpu_gr_ctx_alloc(struct gk20a *g,
 * @param g [in]		Pointer to GPU driver struct.
 * @param gr_ctx [in]		Pointer to graphics context struct.
 * @param global_ctx_buffer [in]Pointer to global context descriptor struct.
 * @param vm [in]		Pointer to virtual memory.
 *
 * This function will free memory allocated for graphics context buffer,
 * patch context buffer, and all the ctxsw buffers.
 */
 void nvgpu_gr_ctx_free(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
-	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
+	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer);
 	struct vm_gk20a *vm);
 /**
 * @brief Allocate patch context buffer.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param gr_ctx [in]		Pointer to graphics context struct.
 * @param gr_ctx_desc [in]	Pointer to context descriptor struct.
 * @param vm [in]		Pointer to virtual memory.
 *
 * This function allocates memory for patch context buffer and also
 * maps it to given virtual memory.
 *
 * @return 0 in case of success, < 0 in case of failure.
 * @retval -ENOMEM if context memory allocation fails.
 */
 int nvgpu_gr_ctx_alloc_patch_ctx(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
 	struct vm_gk20a *vm);
 /**
 * @brief Free patch context buffer.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param vm [in]		Pointer to virtual memory.
 * @param gr_ctx [in]		Pointer to graphics context struct.
 *
 * This function will free memory allocated for patch context buffer.
 */
 void nvgpu_gr_ctx_free_patch_ctx(struct gk20a *g, struct vm_gk20a *vm,
 	struct nvgpu_gr_ctx *gr_ctx);
 /**
 * @brief Map global context buffers.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param gr_ctx [in]		Pointer to graphics context struct.
 * @param global_ctx_buffer [in]Pointer to global context descriptor struct.
 * @param vm [in]		Pointer to virtual memory.
 * @param vpr [in]		Boolean flag to use buffers in VPR.
 *
 * This function maps all global context buffers into given
 * virtual memory and stores each virtual address into given
 * #nvgpu_gr_ctx struct.
 *
 * @return 0 in case of success, < 0 in case of failure.
 * @retval -ENOMEM if memory mapping fails for any context buffer.
 */
 int nvgpu_gr_ctx_map_global_ctx_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
 	struct vm_gk20a *vm, bool vpr);
 /**
 * @brief Get global context buffer virtual address.
 *
 * @param gr_ctx [in]		Pointer to graphics context struct.
 * @param index [in]		Index of global context buffer.
 *
 * This function returns virtual address of global context buffer
 * with given index stored in #nvgpu_gr_ctx struct.
 *
 * @return virtual address of global context buffer.
 */
 u64 nvgpu_gr_ctx_get_global_ctx_va(struct nvgpu_gr_ctx *gr_ctx,
 	u32 index);
 /**
 * @brief Get pointer of patch context buffer memory struct.
 *
 * @param gr_ctx [in]		Pointer to graphics context struct.
 *
 * This function returns #nvgpu_mem pointer of patch context buffer stored
 * in #nvgpu_gr_ctx struct.
 *
 * @return pointer to patch context buffer memory struct.
 */
 struct nvgpu_mem *nvgpu_gr_ctx_get_patch_ctx_mem(struct nvgpu_gr_ctx *gr_ctx);
 /**
 * @brief Set data count in patch context buffer.
@@ -283,15 +201,28 @@ void nvgpu_gr_ctx_set_patch_ctx_data_count(struct nvgpu_gr_ctx *gr_ctx,
 	u32 data_count);
 /**
- * @brief Get sm diversity config of the given graphics context.
+ * @brief Get context buffer mem struct of the given graphics context.
 *
 * @param gr_ctx [in]		Pointer to graphics context struct.
 * @param index [in]		Value from (NVGPU_GR_CTX_CTX, NVGPU_GR_CTX_GFXP_RTVCB_CTXSW)
 *
- * This function returns #sm_diversity_config of graphics context struct.
+ * This function returns #mem of graphics context struct.
 *
- * @return sm diversity config of the given graphics context.
+ * @return context buffer mem of the given graphics context.
 */
-struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx);
+struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx, u32 index);
 /**
 * @brief Get mapping flags of a context buffer of the given graphics context.
 *
 * @param gr_ctx [in]		Pointer to graphics context struct.
 * @param index [in]		Value from (NVGPU_GR_CTX_CTX, NVGPU_GR_CTX_GFXP_RTVCB_CTXSW)
 *
 * This function returns #mapping_flags of graphics context struct.
 *
 * @return context buffer mapping flags of the given graphics context.
 */
 u32 nvgpu_gr_ctx_get_ctx_mapping_flags(struct nvgpu_gr_ctx *gr_ctx, u32 index);
 #ifdef CONFIG_NVGPU_SM_DIVERSITY
 /**
@@ -320,6 +251,7 @@ u32 nvgpu_gr_ctx_get_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx);
 *
 * @param g [in]			Pointer to GPU driver struct.
 * @param gr_ctx [in]			Pointer to graphics context struct.
 * @param mappings [in]			Pointer to mappings of GR context buffers.
 * @param local_golden_image [in]	Pointer to local golden image struct.
 * @param cde [in]			Boolean flag to enable/disable CDE.
 *
@@ -332,6 +264,7 @@ u32 nvgpu_gr_ctx_get_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx);
 */
 void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_mappings *mappings,
 	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
 	bool cde);
@@ -481,6 +414,99 @@ struct nvgpu_gr_ctx *nvgpu_alloc_gr_ctx_struct(struct gk20a *g);
 */
 void nvgpu_free_gr_ctx_struct(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
 /**
 * @brief Free TSG specific GR context buffers.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param ctx [in]		Pointer to graphics context struct.
 *
 * This function frees all TSG specific GR context buffers.
 */
 void nvgpu_gr_ctx_free_ctx_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx *ctx);
 /**
 * @brief Allocate TSG specific GR context buffers.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param desc [in]		Pointer to context descriptor struct.
 * @param ctx [in]		Pointer to graphics context struct.
 *
 * This function allocates all TSG specific GR context buffers.
 *
 * @return 0 in case of success, < 0 in case of failure.
 */
 int nvgpu_gr_ctx_alloc_ctx_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx_desc *desc,
 	struct nvgpu_gr_ctx *ctx);
 #ifdef CONFIG_NVGPU_GFXP
 /**
 * @brief Allocate TSG specific GR preemption context buffers.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param desc [in]		Pointer to context descriptor struct.
 * @param ctx [in]		Pointer to graphics context struct.
 *
 * This function allocates all TSG specific GR preemption context buffers.
 *
 * @return 0 in case of success, < 0 in case of failure.
 */
 int nvgpu_gr_ctx_alloc_ctx_preemption_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx_desc *desc,
 	struct nvgpu_gr_ctx *ctx);
 #endif
 /**
 * @brief Initialize mapping flags for GR context buffers.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param ctx [in]		Pointer to graphics context struct.
 *
 * This function initializes cacheability attribute for TSG specific
 * GR context buffers.
 */
 void nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g,
 	struct nvgpu_gr_ctx *ctx);
 /**
 * @brief Allocate or get GR ctx buffers mappings for a TSG.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param tsg [in]		Pointer to TSG struct.
 * @param vm [in]		Pointer to vm struct.
 *
 * This function allocates the mappings struct for TSG corresponding to
 * given vm if not available already else returns the same.
 *
 * @return mappings struct in case of success, null in case of failure.
 */
 struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_alloc_or_get_mappings(struct gk20a *g,
 				struct nvgpu_tsg *tsg, struct vm_gk20a *vm);
 /**
 * @brief Get GR ctx buffers mappings for a TSG.
 *
 * @param tsg [in]		Pointer to TSG struct.
 *
 * This function returns the mappings struct for TSG.
 *
 * @return mappings struct.
 */
 struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_get_mappings(struct nvgpu_tsg *tsg);
 /**
 * @brief Free the gr ctx mapping struct.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param ctx [in]		Pointer to graphics context struct.
 *
 * This function deletes the gr ctx mapping struct. This is to be
 * called when freeing the gr context or in error cases.
 */
 void nvgpu_gr_ctx_free_mappings(struct gk20a *g,
 				struct nvgpu_gr_ctx *gr_ctx);
 /**
 * @brief Set TSG id in graphics context structure.
 *
@@ -515,28 +541,9 @@ bool nvgpu_gr_ctx_desc_force_preemption_cilp(
 #endif /* CONFIG_NVGPU_CILP */
 #ifdef CONFIG_NVGPU_GFXP
 int nvgpu_gr_ctx_alloc_ctxsw_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
 	struct vm_gk20a *vm);
 struct nvgpu_mem *nvgpu_gr_ctx_get_spill_ctxsw_buffer(
 	struct nvgpu_gr_ctx *gr_ctx);
 struct nvgpu_mem *nvgpu_gr_ctx_get_betacb_ctxsw_buffer(
 	struct nvgpu_gr_ctx *gr_ctx);
 struct nvgpu_mem *nvgpu_gr_ctx_get_pagepool_ctxsw_buffer(
 	struct nvgpu_gr_ctx *gr_ctx);
 struct nvgpu_mem *nvgpu_gr_ctx_get_preempt_ctxsw_buffer(
 	struct nvgpu_gr_ctx *gr_ctx);
 struct nvgpu_mem *nvgpu_gr_ctx_get_gfxp_rtvcb_ctxsw_buffer(
 	struct nvgpu_gr_ctx *gr_ctx);
 void nvgpu_gr_ctx_set_preemption_buffer_va(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx);
+	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_mappings *mappings);
 bool nvgpu_gr_ctx_desc_force_preemption_gfxp(
 		struct nvgpu_gr_ctx_desc *gr_ctx_desc);
@@ -559,12 +566,10 @@ int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 #endif /* CONFIG_NVGPU_GRAPHICS */
 #ifdef CONFIG_NVGPU_DEBUGGER
-int nvgpu_gr_ctx_alloc_pm_ctx(struct gk20a *g,
+int nvgpu_gr_ctx_alloc_map_pm_ctx(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_tsg *tsg,
 	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct vm_gk20a *vm);
+	struct nvgpu_gr_hwpm_map *hwpm_map);
 void nvgpu_gr_ctx_free_pm_ctx(struct gk20a *g, struct vm_gk20a *vm,
 	struct nvgpu_gr_ctx *gr_ctx);
 void nvgpu_gr_ctx_reset_patch_count(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx);
@@ -573,18 +578,19 @@ void nvgpu_gr_ctx_set_patch_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
 u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
 u32 nvgpu_gr_ctx_read_ctx_id(struct nvgpu_gr_ctx *gr_ctx);
 struct nvgpu_mem *nvgpu_gr_ctx_get_pm_ctx_mem(struct nvgpu_gr_ctx *gr_ctx);
 void nvgpu_gr_ctx_set_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx, u32 pm_mode);
 u32 nvgpu_gr_ctx_get_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx);
 int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	bool enable);
-int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
+int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g,
-	u32 mode, bool *skip_update);
+	struct nvgpu_gr_ctx *gr_ctx,
 	u32 mode, u64 *pm_ctx_gpu_va, bool *skip_update);
 void nvgpu_gr_ctx_set_hwpm_pm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
-void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
+void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 			       u64 pm_ctx_gpu_va);
 void nvgpu_gr_ctx_set_pm_ctx_mapped(struct nvgpu_gr_ctx *ctx, bool mapped);
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
 void nvgpu_gr_ctx_set_boosted_ctx(struct nvgpu_gr_ctx *gr_ctx, bool boost);
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx_mappings.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx_mappings.h
@@ -0,0 +1,160 @@
 /*
 * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
 #ifndef NVGPU_GR_CTX_MAPPINGS_H
 #define NVGPU_GR_CTX_MAPPINGS_H
 struct gk20a;
 struct nvgpu_tsg;
 struct vm_gk20a;
 struct nvgpu_gr_ctx;
 struct nvgpu_gr_ctx_mappings;
 struct nvgpu_gr_global_ctx_buffer_desc;
 /**
 * @brief Create GR ctx buffers mappings for a TSG.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param tsg [in]		Pointer to TSG struct.
 * @param vm [in]		Pointer to vm struct.
 *
 * This function allocates the mappings struct for TSG corresponding to
 * given vm.
 *
 * @return mappings struct in case of success, null in case of failure.
 */
 struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_mappings_create(struct gk20a *g,
 				struct nvgpu_tsg *tsg, struct vm_gk20a *vm);
 /**
 * @brief Free the GR ctx buffers mappings.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param mappings [in]		Pointer to GR ctx buffers mappings struct.
 *
 * This function frees the mappings struct.
 */
 void nvgpu_gr_ctx_mappings_free(struct gk20a *g,
 				struct nvgpu_gr_ctx_mappings *mappings);
 /**
 * @brief Map GR context buffer and store in mappings struct.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param ctx [in]		Pointer to GR context struct.
 * @param index [in]		index of the buffer.
 * @param mappings [in]		Pointer to GR context buffer mappings struct.
 *
 * This function will map the GR context buffer at #index in #mappings->vm
 * and stores the mapped address.
 *
 * @return 0 in case of success, < 0 in case of failure.
 */
 int nvgpu_gr_ctx_mappings_map_ctx_buffer(struct gk20a *g,
 	struct nvgpu_gr_ctx *ctx, u32 index,
 	struct nvgpu_gr_ctx_mappings *mappings);
 /**
 * @brief Map GR context preemption buffers and store in mappings struct.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param ctx [in]		Pointer to GR context struct.
 * @param mappings [in]		Pointer to GR context buffer mappings struct.
 *
 * This function will map the GR context preemption buffers in #mappings->vm
 * and stores the mapped address.
 *
 * @return 0 in case of success, < 0 in case of failure.
 */
 int nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx *ctx,
 	struct nvgpu_gr_ctx_mappings *mappings);
 /**
 * @brief Map GR and global context buffers and store in mappings struct.
 *
 * @param g [in]			Pointer to GPU driver struct.
 * @param gr_ctx [in]			Pointer to GR context struct.
 * @param global_ctx_buffer [in]	Pointer global context buffer desc.
 * @param mappings [in]			Pointer to GR context buffer
 *					mappings struct.
 * @param vpr [in]			Indicates if VPR buffer copy is to be
 *					mapped.
 *
 * This function will map the GR and global context buffers in #mappings->vm
 * and stores the mapped address.
 *
 * @return 0 in case of success, < 0 in case of failure.
 */
 int nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
 	struct nvgpu_gr_ctx_mappings *mappings,
 	bool vpr);
 /**
 * @brief Unmap GR and global context buffers and store in mappings struct.
 *
 * @param g [in]			Pointer to GPU driver struct.
 * @param gr_ctx [in]			Pointer to GR context struct.
 * @param global_ctx_buffer [in]	Pointer global context buffer desc.
 * @param mappings [in]			Pointer to GR context buffer
 *					mappings struct.
 *
 * This function will unmap the GR and global context buffers in #mappings->vm.
 */
 void nvgpu_gr_ctx_unmap_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
 	struct nvgpu_gr_ctx_mappings *mappings);
 /**
 * @brief Get global context buffer gpu virtual address.
 *
 * @param mappings [in]		Pointer to GR context buffer
 *				mappings struct.
 * @param index [in]		index of the buffer.
 *
 * This function will get the gpu virtual address of the global context buffer
 * in #mappings.
 *
 * @return gpu virtual address of global context buffer.
 */
 u64 nvgpu_gr_ctx_mappings_get_global_ctx_va(struct nvgpu_gr_ctx_mappings *mappings,
 	u32 index);
 /**
 * @brief Get GR context buffer gpu virtual address.
 *
 * @param mappings [in]		Pointer to GR context buffer
 *				mappings struct.
 * @param index [in]		index of the buffer.
 *
 * This function will get the gpu virtual address of the GR context buffer
 * in #mappings.
 *
 * @return gpu virtual address of GR context buffer.
 */
 u64 nvgpu_gr_ctx_mappings_get_ctx_va(struct nvgpu_gr_ctx_mappings *mappings,
 	u32 index);
 #endif /* NVGPU_GR_CTX_MAPPINGS_H */
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h
@@ -63,6 +63,7 @@ struct gk20a;
 struct nvgpu_mem;
 struct nvgpu_gr_subctx;
 struct nvgpu_gr_ctx;
 struct nvgpu_gr_ctx_mappings;
 struct nvgpu_tsg;
 struct vm_area_struct;
@@ -169,7 +170,8 @@ int nvgpu_gr_fecs_trace_reset(struct gk20a *g);
 int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g,
 	struct nvgpu_mem *inst_block, struct nvgpu_gr_subctx *subctx,
-	struct nvgpu_gr_ctx *gr_ctx, pid_t pid, u32 vmid);
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings,
 	pid_t pid, u32 vmid);
 int nvgpu_gr_fecs_trace_unbind_channel(struct gk20a *g,
 	struct nvgpu_mem *inst_block);
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/global_ctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/global_ctx.h
@@ -181,6 +181,18 @@ size_t nvgpu_gr_global_ctx_get_size(struct nvgpu_gr_global_ctx_buffer_desc *desc
 int nvgpu_gr_global_ctx_buffer_alloc(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *desc);
 /**
 * @brief Initialize mapping flags for GR global context buffers.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param desc [in]		Pointer to global ctx buffer desc.
 *
 * This function initializes cacheability attribute for GR global
 * context buffers.
 */
 void nvgpu_gr_global_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *desc);
 /**
 * @brief Free all global context buffers.
 *
@@ -199,7 +211,6 @@ void nvgpu_gr_global_ctx_buffer_free(struct gk20a *g,
 * @param desc [in]	Pointer to global context descriptor struct.
 * @param index [in]	Index of global context buffer.
 * @param vm [in]	Pointer to virtual memory.
 * @param flags [in]	Flags used to specify mapping attributes.
 * @param priv [in]	Boolean flag to allocate privileged PTE.
 *
 * This function maps given global contex buffer with index #index into
@@ -209,8 +220,7 @@ void nvgpu_gr_global_ctx_buffer_free(struct gk20a *g,
 *         0 in case of failure.
 */
 u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc,
-	u32 index,
+	u32 index, struct vm_gk20a *vm, bool priv);
 	struct vm_gk20a *vm, u32 flags, bool priv);
 /**
 * @brief Unmap given global context buffer.
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -33,6 +33,7 @@
 */
 struct gk20a;
 struct nvgpu_gr_ctx;
 struct nvgpu_gr_ctx_mappings;
 struct nvgpu_gr_subctx;
 struct nvgpu_gr_config;
 struct nvgpu_gr_ctx_desc;
@@ -70,7 +71,7 @@ void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g,
 * @param inst_block [in]	Pointer to channel instance block.
 * @param gr_ctx [in]		Pointer to graphics context buffer.
 * @param subctx [in]		Pointer to graphics subcontext buffer.
- * @param gpu_va [in]		GPU virtual address of graphics context buffer.
+ * @param mappings [in]		Pointer to mappings of the GR context buffers.
 *
 * If graphics subcontexts are supported, subcontext buffer GPU virtual
 * address should be committed to channel instance block. Otherwise graphics
@@ -82,7 +83,7 @@ void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g,
 */
 void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block,
 	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx,
-	u64 gpu_va);
+	struct nvgpu_gr_ctx_mappings *mappings);
 /**
 * brief Initialize preemption mode in context struct.
@@ -91,7 +92,6 @@ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block,
 * @param config [in]			Pointer to GR configuration struct.
 * @param gr_ctx_desc [in]		Pointer to GR context descriptor struct.
 * @param gr_ctx [in]			Pointer to graphics context.
 * @param vm [in]			Pointer to virtual memory.
 * @param class_num [in]		GR engine class.
 * @param graphics_preempt_mode		Graphics preemption mode to set.
 * @param compute_preempt_mode		Compute preemption mode to set.
@@ -111,7 +111,7 @@ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block,
 */
 int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
 	struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
-	struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class_num,
+	struct nvgpu_gr_ctx *gr_ctx, u32 class_num,
 	u32 graphics_preempt_mode, u32 compute_preempt_mode);
 /**
@@ -121,6 +121,7 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
 * @param config [in]		Pointer to GR configuration struct.
 * @param gr_ctx [in]		Pointer to graphics context.
 * @param subctx [in]		Pointer to graphics subcontext buffer.
 * @param mappings [in]		Pointer to mappings of GR context buffers.
 *
 * This function will read preemption modes stored in #nvgpu_gr_ctx
 * struct and write them into graphics context image.
@@ -133,7 +134,8 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
 */
 void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
 	struct nvgpu_gr_config *config,
-	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx);
+	struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx,
 	struct nvgpu_gr_ctx_mappings *mappings);
 /**
 * brief Update global context buffer addresses in graphics context.
@@ -142,6 +144,7 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
 * @param global_ctx_buffer [in]	Pointer to global context descriptor struct.
 * @param config [in]			Pointer to GR configuration struct.
 * @param gr_ctx [in]			Pointer to graphics context.
 * @param mappings [in]			Pointer to mappings of GR context buffers.
 * @param patch [in]			Boolean flag to use patch context buffer.
 *
 * This function will update GPU virtual addresses of global context
@@ -152,7 +155,8 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
 */
 void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
-	struct nvgpu_gr_config *config,	struct nvgpu_gr_ctx *gr_ctx, bool patch);
+	struct nvgpu_gr_config *config,	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_mappings *mappings, bool patch);
 /**
 * @brief Allocate golden context image.
@@ -193,6 +197,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
 	struct nvgpu_gr_config *config,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_mappings *mappings,
 	struct nvgpu_mem *inst_block);
 /**
@@ -205,7 +210,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
 * @param config [in]			Pointer to GR configuration struct.
 * @param gr_ctx [in]			Pointer to graphics context.
 * @param subctx [in]			Pointer to graphics subcontext buffer.
- * @param vm [in]			Pointer to virtual memory.
+ * @param mappings [in]			Pointer to mappings of the GR context buffers.
 * @param inst_block [in]		Pointer to channel instance block.
 * @param class_num [in]		GR engine class.
 * @param flags [in]			Object context attribute flags.
@@ -216,8 +221,8 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
 * This function allocates object context for the GPU channel.
 * Allocating object context includes:
 *
- * - Allocating graphics context buffer. See #nvgpu_gr_obj_ctx_gr_ctx_alloc().
+ * - Allocating graphics context buffers.
- * - Allocating patch context buffer. See #nvgpu_gr_ctx_alloc_patch_ctx().
+ * - Allocating patch context buffer.
 * - Allocating golden context image. See #nvgpu_gr_obj_ctx_alloc_golden_ctx_image().
 * - Committing global context buffers in graphics context image.
 *   See #nvgpu_gr_obj_ctx_commit_global_ctx_buffers().
@@ -245,7 +250,7 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
 	struct nvgpu_gr_config *config,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_subctx *subctx,
-	struct vm_gk20a *vm,
+	struct nvgpu_gr_ctx_mappings *mappings,
 	struct nvgpu_mem *inst_block,
 	u32 class_num, u32 flags,
 	bool cde, bool vpr);
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -88,14 +88,13 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 * @brief Free GR engine context image.
 *
 * @param g [in]		Pointer to GPU driver struct.
 * @param vm [in]		Pointer to virtual memory.
 * @param gr_ctx [in]		Pointer to GR engine context image.
 *
 * This function will free memory allocated for patch context image and
 * GR engine context image in #nvgpu_gr_setup_alloc_obj_ctx().
 */
 void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g,
-		struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
+		struct nvgpu_gr_ctx *gr_ctx);
 /**
 * @brief Free GR engine subcontext.
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h
@@ -34,6 +34,7 @@ struct gk20a;
 struct vm_gk20a;
 struct nvgpu_gr_subctx;
 struct nvgpu_mem;
 struct nvgpu_gr_ctx_mappings;
 /**
 * @brief Allocate graphics subcontext buffer.
@@ -73,7 +74,8 @@ void nvgpu_gr_subctx_free(struct gk20a *g,
 * @param g [in]		Pointer to GPU driver struct.
 * @param subctx [in]		Pointer to graphics subcontext struct.
 * @param gr_ctx [in]		Pointer to graphics context struct.
- * @param gpu_va [in]		GPU virtual address of graphics context buffer.
+ * @param mappings [in]		GPU virtual address mappings of graphics
 *                              context buffers.
 *
 * This function will initialize graphics subcontext buffer header
 * by reading appropriate values from #nvgpu_gr_ctx structure and
@@ -84,7 +86,8 @@ void nvgpu_gr_subctx_free(struct gk20a *g,
 */
 void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
 	struct nvgpu_gr_subctx *subctx,
-	struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va);
+	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_mappings *mappings);
 /**
 * @brief Get pointer of subcontext header memory struct.
@@ -103,11 +106,12 @@ void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx
 		struct nvgpu_gr_ctx *gr_ctx);
 void nvgpu_gr_subctx_set_preemption_buffer_va(struct gk20a *g,
-	struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx);
+	struct nvgpu_gr_subctx *subctx,
 	struct nvgpu_gr_ctx_mappings *mappings);
 #endif
 #ifdef CONFIG_NVGPU_DEBUGGER
 void nvgpu_gr_subctx_set_hwpm_ptr(struct gk20a *g,
-	struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx);
+	struct nvgpu_gr_subctx *subctx, u64 pm_ctx_gpu_va);
 #endif
 #endif /* NVGPU_GR_SUBCTX_H */
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1872,7 +1872,7 @@ static int nvgpu_dbg_gpu_ioctl_get_gr_context_size(struct dbg_session_gk20a *dbg
 		return -EINVAL;
 	}
-	ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx);
+	ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_CTX);
 	if (ctx_mem == NULL || !nvgpu_mem_is_valid(ctx_mem)) {
 		nvgpu_err(g, "invalid context mem");
 		return -EINVAL;
@@ -1918,7 +1918,7 @@ static int nvgpu_dbg_gpu_ioctl_get_gr_context(struct dbg_session_gk20a *dbg_s,
 		return -EINVAL;
 	}
-	ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx);
+	ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_CTX);
 	if (ctx_mem == NULL || !nvgpu_mem_is_valid(ctx_mem)) {
 		nvgpu_err(g, "invalid context mem");
 		return -EINVAL;
--- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
@@ -29,7 +29,6 @@
 struct gk20a;
 struct nvgpu_channel;
 struct gr_ctx_buffer_desc;
 struct gk20a_scale_profile;
 struct secure_page_buffer {
--- a/libs/dgpu/libnvgpu-drv-dgpu_safe.export
+++ b/libs/dgpu/libnvgpu-drv-dgpu_safe.export
@@ -446,13 +446,10 @@ nvgpu_gr_config_set_sm_info_gpc_index
 nvgpu_gr_config_set_sm_info_sm_index
 nvgpu_gr_config_set_sm_info_tpc_index
 nvgpu_gr_ctx_alloc
 nvgpu_gr_ctx_alloc_patch_ctx
 nvgpu_gr_ctx_desc_alloc
 nvgpu_gr_ctx_desc_free
 nvgpu_gr_ctx_free
 nvgpu_gr_ctx_free_patch_ctx
 nvgpu_gr_ctx_get_tsgid
 nvgpu_gr_ctx_map_global_ctx_buffers
 nvgpu_gr_ctx_patch_write
 nvgpu_gr_ctx_patch_write_begin
 nvgpu_gr_ctx_patch_write_end
--- a/libs/igpu/libnvgpu-drv-igpu_safe.export
+++ b/libs/igpu/libnvgpu-drv-igpu_safe.export
@@ -462,18 +462,20 @@ nvgpu_gr_config_set_sm_info_global_tpc_index
 nvgpu_gr_config_set_sm_info_gpc_index
 nvgpu_gr_config_set_sm_info_sm_index
 nvgpu_gr_config_set_sm_info_tpc_index
 nvgpu_gr_ctx_alloc
 nvgpu_gr_ctx_alloc_patch_ctx
 nvgpu_gr_ctx_desc_alloc
 nvgpu_gr_ctx_desc_free
 nvgpu_gr_ctx_free
 nvgpu_gr_ctx_free_patch_ctx
 nvgpu_gr_ctx_get_tsgid
 nvgpu_gr_ctx_map_global_ctx_buffers
 nvgpu_gr_ctx_patch_write
 nvgpu_gr_ctx_patch_write_begin
 nvgpu_gr_ctx_patch_write_end
 nvgpu_gr_ctx_set_size
 nvgpu_gr_ctx_alloc_ctx_buffers
 nvgpu_gr_ctx_free_ctx_buffers
 nvgpu_gr_ctx_mappings_create
 nvgpu_gr_ctx_alloc_or_get_mappings
 nvgpu_gr_ctx_mappings_map_gr_ctx_buffers
 nvgpu_gr_ctx_get_ctx_mem
 nvgpu_gr_enable_hw
 nvgpu_gr_engine_interrupt_mask
 nvgpu_gr_falcon_get_fecs_ucode_segments
--- a/userspace/units/fifo/tsg/nvgpu-tsg.c
+++ b/userspace/units/fifo/tsg/nvgpu-tsg.c
@@ -622,10 +622,9 @@ done:
 #define F_TSG_RELEASE_NO_RELEASE_HAL	BIT(0)
 #define F_TSG_RELEASE_GR_CTX		BIT(1)
 #define F_TSG_RELEASE_MEM		BIT(2)
-#define F_TSG_RELEASE_VM		BIT(3)
+#define F_TSG_RELEASE_ENG_BUFS		BIT(3)
-#define F_TSG_RELEASE_ENG_BUFS		BIT(4)
+#define F_TSG_RELEASE_SM_ERR_STATES	BIT(4)
-#define F_TSG_RELEASE_SM_ERR_STATES	BIT(5)
+#define F_TSG_RELEASE_LAST		BIT(5)
 #define F_TSG_RELEASE_LAST		BIT(6)
 static void stub_tsg_release(struct nvgpu_tsg *tsg)
@@ -640,7 +639,7 @@ static void stub_tsg_deinit_eng_method_buffers(struct gk20a *g,
 }
 static void stub_gr_setup_free_gr_ctx(struct gk20a *g,
-		struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
+		struct nvgpu_gr_ctx *gr_ctx)
 {
 	stub[1].name = __func__;
 	stub[1].count++;
@@ -650,24 +649,32 @@ static void stub_gr_setup_free_gr_ctx(struct gk20a *g,
 int test_tsg_release(struct unit_module *m,
 		struct gk20a *g, void *args)
 {
 	struct nvgpu_gr_ctx_desc *gr_ctx_desc;
 	struct nvgpu_mem *gr_ctx_mem;
 	struct nvgpu_fifo *f = &g->fifo;
 	struct gpu_ops gops = g->ops;
 	struct nvgpu_tsg *tsg = NULL;
 	struct vm_gk20a vm;
 	u32 branches = 0U;
 	int ret = UNIT_FAIL;
 	struct nvgpu_mem mem;
 	u32 free_gr_ctx_mask =
-		F_TSG_RELEASE_GR_CTX|F_TSG_RELEASE_MEM|F_TSG_RELEASE_VM;
+		F_TSG_RELEASE_GR_CTX|F_TSG_RELEASE_MEM;
 	const char *labels[] = {
 		"no_release_hal",
 		"gr_ctx",
 		"mem",
 		"vm",
 		"eng_bufs",
 		"sm_err_states"
 	};
 	gr_ctx_desc = nvgpu_gr_ctx_desc_alloc(g);
 	if (!gr_ctx_desc) {
 		unit_return_fail(m, "failed to allocate memory");
 	}
 	nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_CTX,
 		NVGPU_CPU_PAGE_SIZE);
 	for (branches = 0U; branches < F_TSG_RELEASE_LAST; branches++) {
 		if (!(branches & F_TSG_RELEASE_GR_CTX) &&
@@ -683,8 +690,9 @@ int test_tsg_release(struct unit_module *m,
 		tsg = nvgpu_tsg_open(g, getpid());
 		unit_assert(tsg != NULL, goto done);
 		unit_assert(tsg->gr_ctx != NULL, goto done);
-		unit_assert(tsg->gr_ctx->mem.aperture ==
+
-				APERTURE_INVALID, goto done);
+		gr_ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_CTX);
 		unit_assert(gr_ctx_mem->aperture == APERTURE_INVALID, goto done);
 		g->ops.tsg.release =
 			branches & F_TSG_RELEASE_NO_RELEASE_HAL ?
@@ -696,11 +704,8 @@ int test_tsg_release(struct unit_module *m,
 		}
 		if (branches & F_TSG_RELEASE_MEM) {
-			nvgpu_dma_alloc(g, NVGPU_CPU_PAGE_SIZE, &mem);
+			ret = nvgpu_gr_ctx_alloc_ctx_buffers(g, gr_ctx_desc, tsg->gr_ctx);
-			tsg->gr_ctx->mem = mem;
+			unit_assert(ret == UNIT_SUCCESS, goto done);
 		}
 		if (branches & F_TSG_RELEASE_VM) {
 			tsg->vm = &vm;
 			/* prevent nvgpu_vm_remove */
 			nvgpu_ref_init(&vm.ref);
@@ -734,7 +739,7 @@ int test_tsg_release(struct unit_module *m,
 				gops.gr.setup.free_gr_ctx;
 			if (branches & F_TSG_RELEASE_MEM) {
-				nvgpu_dma_free(g, &mem);
+				nvgpu_gr_ctx_free_ctx_buffers(g, tsg->gr_ctx);
 			}
 			if (tsg->gr_ctx != NULL) {
--- a/userspace/units/fifo/tsg/nvgpu-tsg.h
+++ b/userspace/units/fifo/tsg/nvgpu-tsg.h
@@ -177,17 +177,19 @@ int test_tsg_unbind_channel(struct unit_module *m,
 *   - Check that in_use is false.
 * - Check de-allocation of other resources:
 *   - Case where g->ops.gr.setup.free_gr_ctx is called.
- *     It requires dummy vm, gr_ctx and gr_ctx->mem to be allocated.
+ *     It requires dummy vm, gr_ctx and gr_ctx->mem[NVGPU_GR_CTX_CTX] to be
 *     allocated.
 *     A stub is used to check that the HAL was actually invoked.
- *   - Other combinations of vm, gr_ctx and gr_ctx->mem allocations, to
+ *   - Other combinations of vm, gr_ctx and gr_ctx->mem[NVGPU_GR_CTX_CTX]
- *     check that g->ops.gr.setup.free_gr_ctx is not called.
+ *     allocations, to check that g->ops.gr.setup.free_gr_ctx is not called.
 *   - Unhook of event_ids (by adding 2 dummy events in event_id list, and
 *     checking that list is empty after TSG release).
 *   - Case where event_id is empty before TSG release is tested as well
 *   - Check that VM refcount is decremented (and VM deallocated in our
 *     case), when present.
 *   - Check that sm_error_states is deallocated.
- *   - Check any combination of VM, gr_ctx, gr_ctx->mem, and sm_error_state.
+ *   - Check any combination of VM, gr_ctx, gr_ctx->mem[NVGPU_GR_CTX_CTX], and
 *     sm_error_state.
 *
 * Output: Returns PASS if all branches gave expected results. FAIL otherwise.
 */
--- a/userspace/units/gr/ctx/nvgpu-gr-ctx.c
+++ b/userspace/units/gr/ctx/nvgpu-gr-ctx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -31,6 +31,7 @@
 #include <nvgpu/dma.h>
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/posix/posix-fault-injection.h>
 #include <nvgpu/posix/dma.h>
@@ -43,6 +44,37 @@
 #define DUMMY_SIZE	0xF0U
 static u64 nvgpu_gmmu_map_locked_stub(struct vm_gk20a *vm,
 			  u64 vaddr,
 			  struct nvgpu_sgt *sgt,
 			  u64 buffer_offset,
 			  u64 size,
 			  u32 pgsz_idx,
 			  u8 kind_v,
 			  u32 ctag_offset,
 			  u32 flags,
 			  enum gk20a_mem_rw_flag rw_flag,
 			  bool clear_ctags,
 			  bool sparse,
 			  bool priv,
 			  struct vm_gk20a_mapping_batch *batch,
 			  enum nvgpu_aperture aperture)
 {
 	return 1;
 }
 static void nvgpu_gmmu_unmap_locked_stub(struct vm_gk20a *vm,
 			     u64 vaddr,
 			     u64 size,
 			     u32 pgsz_idx,
 			     bool va_allocated,
 			     enum gk20a_mem_rw_flag rw_flag,
 			     bool sparse,
 			     struct vm_gk20a_mapping_batch *batch)
 {
 	return;
 }
 int test_gr_ctx_error_injection(struct unit_module *m,
 		struct gk20a *g, void *args)
 {
@@ -51,12 +83,22 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 	struct vm_gk20a *vm;
 	struct nvgpu_gr_ctx_desc *desc;
 	struct nvgpu_gr_global_ctx_buffer_desc *global_desc;
 	struct nvgpu_gr_ctx_mappings *mappings = NULL;
 	struct nvgpu_gr_ctx *gr_ctx = NULL;
 	struct nvgpu_posix_fault_inj *dma_fi =
 		nvgpu_dma_alloc_get_fault_injection();
 	struct nvgpu_posix_fault_inj *kmem_fi =
 		nvgpu_kmem_get_fault_injection();
 	u64 low_hole = SZ_4K * 16UL;
 	struct nvgpu_channel *channel = (struct nvgpu_channel *)
 		malloc(sizeof(struct nvgpu_channel));
 	struct nvgpu_tsg *tsg = (struct nvgpu_tsg *)
 		malloc(sizeof(struct nvgpu_tsg));
 	u32 i;
 	if (channel == NULL || tsg == NULL) {
 		unit_return_fail(m, "failed to allocate channel/tsg");
 	}
 	desc = nvgpu_gr_ctx_desc_alloc(g);
 	if (!desc) {
@@ -84,68 +126,70 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 		unit_return_fail(m, "nvgpu_vm_init failed\n");
 	}
-	/* Try to free gr_ctx before it is allocated. */
+	channel->g = g;
-	nvgpu_gr_ctx_free(g, gr_ctx, NULL, NULL);
+	channel->vm = vm;
-	gr_ctx = nvgpu_alloc_gr_ctx_struct(g);
+	g->ops.mm.gmmu.map = nvgpu_gmmu_map_locked_stub;
-	if (!gr_ctx) {
+	g->ops.mm.gmmu.unmap = nvgpu_gmmu_unmap_locked_stub;
 		unit_return_fail(m, "failed to allocate memory");
 	}
 	/* Context size is not set, so should fail. */
 	err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
 	}
 	/* Set the size now, but inject dma allocation failures. */
 	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_CTX, DUMMY_SIZE);
 	nvgpu_posix_enable_fault_injection(dma_fi, true, 0);
 	err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
 	}
 	/* Inject kmem alloc failures to trigger mapping failures */
 	nvgpu_posix_enable_fault_injection(dma_fi, false, 0);
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 1);
 	err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
 	}
 	/* Successful allocation */
 	nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
 	err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm);
 	if (err != 0) {
 		unit_return_fail(m, "failed to allocate context");
 	}
 	/* Try to free patch context before it is allocated. */
 	nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx);
 	/* Inject allocation error and allocate patch context */
 	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_PATCH_CTX, DUMMY_SIZE);
 	nvgpu_posix_enable_fault_injection(dma_fi, true, 0);
 	err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, desc, vm);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
 	}
 	/* Successful allocation */
 	nvgpu_posix_enable_fault_injection(dma_fi, false, 0);
 	err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, desc, vm);
 	if (err != 0) {
 		unit_return_fail(m, "failed to allocate patch context");
 	}
 	global_desc = nvgpu_gr_global_ctx_desc_alloc(g);
 	if (!global_desc) {
 		unit_return_fail(m, "failed to allocate desc");
 	}
-	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc,
+	/* Try to free gr_ctx before it is allocated. */
-			vm, false);
+	nvgpu_gr_ctx_free(g, gr_ctx, NULL);
 	gr_ctx = nvgpu_alloc_gr_ctx_struct(g);
 	if (!gr_ctx) {
 		unit_return_fail(m, "failed to allocate memory");
 	}
 	tsg->gr_ctx = gr_ctx;
 	mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, vm);
 	if (mappings == NULL) {
 		unit_return_fail(m, "failed to allocate gr_ctx mappings");
 	}
 	/* Context size is not set, so should fail. */
 	err = nvgpu_gr_ctx_alloc_ctx_buffers(g, desc, gr_ctx);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
 	}
 	/* Set the size now, but inject dma allocation failures. */
 	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_CTX, DUMMY_SIZE);
 	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_PATCH_CTX, DUMMY_SIZE);
 	for (i = 0; i < 2; i++) {
 		nvgpu_posix_enable_fault_injection(dma_fi, true, i);
 		err = nvgpu_gr_ctx_alloc_ctx_buffers(g, desc, gr_ctx);
 		if (err == 0) {
 			unit_return_fail(m, "unexpected success");
 		}
 		nvgpu_posix_enable_fault_injection(dma_fi, false, 0);
 	}
 	err = nvgpu_gr_ctx_alloc_ctx_buffers(g, desc, gr_ctx);
 	if (err != 0) {
 		unit_return_fail(m, "unexpected success");
 	}
 	/* Inject kmem alloc failures to trigger mapping failures */
 	for (i = 0; i < 2; i++) {
 		nvgpu_posix_enable_fault_injection(kmem_fi, true, 2 * i);
 		err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx,
 					global_desc, mappings, false);
 		if (err == 0) {
 			unit_return_fail(m, "unexpected success");
 		}
 		nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
 	}
 	/* global ctx_desc size is not set. */
 	err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc,
 				       mappings, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
 	}
@@ -164,42 +208,21 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 		unit_return_fail(m, "failed to allocate global buffers");
 	}
-	/* Fail global circular buffer mapping */
+	/* Fail global ctx buffer mappings */
-	nvgpu_posix_enable_fault_injection(kmem_fi, true, 0);
+	for (i = 0; i < 4; i++) {
-	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc,
+		nvgpu_posix_enable_fault_injection(kmem_fi, true, 4 + (2 * i));
-			vm, false);
+		err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc,
 					       mappings, false);
 		if (err == 0) {
 			unit_return_fail(m, "unexpected success");
 		}
 		nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
 	}
 	/* Fail global attribute buffer mapping */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 4);
 	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc,
 			vm, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
 	}
 	/* Fail global pagepool buffer mapping */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 8);
 	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc,
 			vm, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
 	}
 	/* Fail global access map buffer mapping */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 12);
 	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc,
 			vm, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
 	}
 	/* Successful mapping */
-	nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
+	err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc,
-	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc,
+				       mappings, false);
 			vm, false);
 	if (err != 0) {
 		unit_return_fail(m, "failed to map global buffers");
 	}
@@ -225,11 +248,9 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 	nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
 	/* cleanup */
-	nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx);
+	nvgpu_gr_ctx_free(g, gr_ctx, global_desc);
 	nvgpu_gr_ctx_free(g, gr_ctx, global_desc, vm);
 	nvgpu_free_gr_ctx_struct(g, gr_ctx);
 	nvgpu_gr_ctx_desc_free(g, desc);
 	nvgpu_vm_put(vm);
 	nvgpu_vm_put(g->mm.bar1.vm);
 	return UNIT_SUCCESS;
--- a/userspace/units/gr/ctx/nvgpu-gr-ctx.h
+++ b/userspace/units/gr/ctx/nvgpu-gr-ctx.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -40,16 +40,15 @@ struct unit_module;
 *
 * Test Type: Feature, Error guessing
 *
- * Targets: #nvgpu_gr_ctx_alloc,
+ * Targets: #nvgpu_gr_ctx_alloc_ctx_buffers,
 *          #nvgpu_gr_ctx_free_ctx_buffers,
 *          #nvgpu_gr_ctx_free,
 *          #nvgpu_gr_ctx_desc_alloc,
 *          #nvgpu_gr_ctx_desc_free,
 *          #nvgpu_alloc_gr_ctx_struct,
 *          #nvgpu_free_gr_ctx_struct,
 *          #nvgpu_gr_ctx_set_size,
- *          #nvgpu_gr_ctx_alloc_patch_ctx,
+ *          #nvgpu_gr_ctx_mappings_map_global_ctx_buffers,
 *          #nvgpu_gr_ctx_free_patch_ctx,
 *          #nvgpu_gr_ctx_map_global_ctx_buffers,
 *          #nvgpu_gr_ctx_patch_write_begin,
 *          #nvgpu_gr_ctx_patch_write,
 *          #nvgpu_gr_ctx_patch_write_end.
@@ -63,7 +62,6 @@ struct unit_module;
 * - Inject dma allocation failure and try to allocate gr_ctx, should fail.
 * - Inject kmem allocation failure and try to allocate gr_ctx, should fail.
 * - Disable error injection and allocate gr_ctx, should pass.
 * - Try to free patch_ctx before it is allocated, should fail.
 * - Inject dma allocation failure and try to allocate patch_ctx, should fail.
 * - Disable error injection and allocate patch_ctx, should pass.
 * - Setup all the global context buffers.
--- a/userspace/units/gr/global_ctx/nvgpu-gr-global-ctx.c
+++ b/userspace/units/gr/global_ctx/nvgpu-gr-global-ctx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -100,7 +100,7 @@ int test_gr_global_ctx_alloc_error_injection(struct unit_module *m,
 	/* Ensure mapping fails before buffers are allocated */
 	gpu_va = nvgpu_gr_global_ctx_buffer_map(desc,
-			NVGPU_GR_GLOBAL_CTX_CIRCULAR, NULL, 0, false);
+			NVGPU_GR_GLOBAL_CTX_CIRCULAR, NULL, false);
 	if (gpu_va != 0) {
 		unit_return_fail(m, "unexpected success");
 	}
--- a/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c
+++ b/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -757,17 +757,12 @@ int test_gr_init_hal_error_injection(struct unit_module *m,
 	}
 	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_CTX, DUMMY_SIZE);
-	err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm);
+	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_PATCH_CTX, DUMMY_SIZE);
 	err = nvgpu_gr_ctx_alloc_ctx_buffers(g, desc, gr_ctx);
 	if (err != 0) {
 		unit_return_fail(m, "failed to allocate context");
 	}
 	nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_PATCH_CTX, DUMMY_SIZE);
 	err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, desc, vm);
 	if (err != 0) {
 		unit_return_fail(m, "failed to allocate patch context");
 	}
 	/* global_ctx = false and arbitrary size */
 	g->ops.gr.init.commit_global_pagepool(g, gr_ctx, 0x12345678,
 		DUMMY_SIZE, false, false);
@@ -803,7 +798,7 @@ int test_gr_init_hal_error_injection(struct unit_module *m,
 	g->ops = gops;
 	/* cleanup */
-	nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx);
+	nvgpu_gr_ctx_free_ctx_buffers(g, gr_ctx);
 	nvgpu_free_gr_ctx_struct(g, gr_ctx);
 	nvgpu_gr_ctx_desc_free(g, desc);
 	nvgpu_vm_put(vm);
--- a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c
+++ b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -35,6 +35,7 @@
 #include <nvgpu/gr/gr_utils.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/gr/obj_ctx.h>
 #include <nvgpu/posix/posix-fault-injection.h>
@@ -117,6 +118,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	struct nvgpu_gr_ctx_desc *desc;
 	struct nvgpu_gr_global_ctx_buffer_desc *global_desc;
 	struct nvgpu_gr_ctx *gr_ctx = NULL;
 	struct nvgpu_gr_ctx_mappings *mappings = NULL;
 	struct nvgpu_gr_subctx *subctx = NULL;
 	struct nvgpu_mem inst_block;
 	struct nvgpu_gr_config *config = nvgpu_gr_get_config_ptr(g);
@@ -128,6 +130,8 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 		nvgpu_local_golden_image_get_fault_injection();
 	int (*init_sm_id_table_tmp)(struct gk20a *g,
 		struct nvgpu_gr_config *config);
 	struct nvgpu_tsg *tsg = (struct nvgpu_tsg *)
 		malloc(sizeof(struct nvgpu_tsg));
 	/* Inject allocation failures and initialize obj_ctx, should fail */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 0);
@@ -171,6 +175,8 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 		unit_return_fail(m, "failed to allocate memory");
 	}
 	tsg->gr_ctx = gr_ctx;
 	global_desc = nvgpu_gr_global_ctx_desc_alloc(g);
 	if (!global_desc) {
 		unit_return_fail(m, "failed to allocate desc");
@@ -195,10 +201,15 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 		unit_return_fail(m, "failed to allocate subcontext");
 	}
 	mappings = nvgpu_gr_ctx_mappings_create(g, tsg, vm);
 	if (mappings == NULL) {
 		unit_return_fail(m, "failed to allocate gr_ctx mappings");
 	}
 	/* Fail gr_ctx allocation */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 0);
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -207,7 +218,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	/* Fail patch_ctx allocation */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 3);
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -216,7 +227,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	/* Fail circular buffer mapping */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 8);
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -228,7 +239,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	g->ops.gr.init.fe_pwr_mode_force_on = test_fe_pwr_mode_force_on;
 	fe_pwr_mode_count = 0;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -237,7 +248,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	/* Fail second call to gops.gr.init.fe_pwr_mode_force_on */
 	fe_pwr_mode_count = 1;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -252,7 +263,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	g->ops.gr.falcon.ctrl_ctxsw = test_falcon_ctrl_ctxsw;
 	ctrl_ctxsw_count = -1;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -265,7 +276,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	g->ops.gr.init.wait_idle = test_gr_wait_idle;
 	gr_wait_idle_count = 2;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -278,7 +289,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	g->ops.gr.init.load_sw_bundle_init = test_load_sw_bundle;
 	load_sw_bundle_count = 0;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -288,7 +299,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	g->ops.gr.init.load_sw_veid_bundle = test_load_sw_bundle;
 	load_sw_bundle_count = 1;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -308,7 +319,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	g->ops.gr.init.wait_idle = test_gr_wait_idle;
 	gr_wait_idle_count = 4;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -323,7 +334,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	 */
 	ctrl_ctxsw_count = 1;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -335,7 +346,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	 */
 	ctrl_ctxsw_count = 2;
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -347,7 +358,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	/* Fail golden context verification */
 	nvgpu_posix_enable_fault_injection(golden_ctx_verif_fi, true, 0);
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -358,7 +369,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	/* Finally, successful obj_ctx allocation */
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err != 0) {
 		unit_return_fail(m, "failed to allocate obj_ctx");
@@ -371,14 +382,14 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	/* Reallocation with golden image already created */
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
-			config, gr_ctx, subctx, vm, &inst_block,
+			config, gr_ctx, subctx, mappings, &inst_block,
 			VOLTA_COMPUTE_A, 0, false, false);
 	if (err != 0) {
 		unit_return_fail(m, "failed to re-allocate obj_ctx");
 	}
 	/* Set preemption mode with invalid compute class */
-	err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config, desc, gr_ctx, vm,
+	err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config, desc, gr_ctx,
 		VOLTA_DMA_COPY_A, 0, NVGPU_PREEMPTION_MODE_COMPUTE_CTA);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
@@ -386,8 +397,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	/* Cleanup */
 	nvgpu_gr_subctx_free(g, subctx, vm);
-	nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx);
+	nvgpu_gr_ctx_free(g, gr_ctx, global_desc);
 	nvgpu_gr_ctx_free(g, gr_ctx, global_desc, vm);
 	nvgpu_free_gr_ctx_struct(g, gr_ctx);
 	nvgpu_gr_ctx_desc_free(g, desc);
 	nvgpu_gr_obj_ctx_deinit(g, golden_image);
--- a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.h
+++ b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -53,7 +53,6 @@ struct unit_module;
 *          nvgpu_gr_subctx_free,
 *          nvgpu_gr_obj_ctx_commit_inst,
 *          nvgpu_gr_obj_ctx_commit_inst_gpu_va,
 *          nvgpu_gr_ctx_get_patch_ctx_mem,
 *          nvgpu_gr_subctx_get_ctx_header,
 *          nvgpu_gr_subctx_load_ctx_header,
 *          nvgpu_gr_global_ctx_get_size,
--- a/userspace/units/gr/setup/nvgpu-gr-setup.c
+++ b/userspace/units/gr/setup/nvgpu-gr-setup.c
@@ -584,7 +584,7 @@ static void gr_setup_fake_free_obj_ctx(struct unit_module *m, struct gk20a *g)
 	g->ops.gr.setup.free_subctx(gr_setup_ch);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS, true);
-	g->ops.gr.setup.free_gr_ctx(g, 0, 0);
+	g->ops.gr.setup.free_gr_ctx(g, NULL);
 	gr_setup_ch->subctx = gr_subctx;
 }
--- a/userspace/units/gr/setup/nvgpu-gr-setup.h
+++ b/userspace/units/gr/setup/nvgpu-gr-setup.h
@@ -55,7 +55,7 @@ struct unit_module;
 *          nvgpu_gr_ctx_get_ctx_mem,
 *          nvgpu_gr_ctx_set_tsgid,
 *          nvgpu_gr_ctx_get_tsgid,
- *          nvgpu_gr_ctx_get_global_ctx_va,
+ *          nvgpu_gr_ctx_mappings_get_global_ctx_va,
 *          gops_gr_setup.alloc_obj_ctx,
 *          nvgpu_gr_ctx_load_golden_ctx_image,
 *          gm20b_ctxsw_prog_set_patch_addr,