diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index 343d27cf9..113e924ee 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -1283,6 +1283,7 @@ struct nvgpu_channel *nvgpu_channel_open_new(struct gk20a *g,
 	ch->obj_class = 0;
 	ch->subctx_id = 0;
 	ch->runqueue_sel = 0;
+	ch->golden_ctx_init_ch = false;
 
 	ch->mmu_nack_handled = false;
 
diff --git a/drivers/gpu/nvgpu/common/gr/gr_utils.c b/drivers/gpu/nvgpu/common/gr/gr_utils.c
index 79c3eba1e..6239b3f29 100644
--- a/drivers/gpu/nvgpu/common/gr/gr_utils.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_utils.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -111,7 +111,6 @@ void nvgpu_gr_clear_cilp_preempt_pending_chid(struct gk20a *g)
 }
 #endif
 
-#ifdef CONFIG_NVGPU_DEBUGGER
 struct nvgpu_gr_obj_ctx_golden_image *nvgpu_gr_get_golden_image_ptr(
 		struct gk20a *g)
 {
@@ -120,6 +119,7 @@ struct nvgpu_gr_obj_ctx_golden_image *nvgpu_gr_get_golden_image_ptr(
 	return gr->golden_image;
 }
 
+#ifdef CONFIG_NVGPU_DEBUGGER
 struct nvgpu_gr_hwpm_map *nvgpu_gr_get_hwpm_map_ptr(struct gk20a *g)
 {
 	struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
diff --git a/drivers/gpu/nvgpu/common/gr/obj_ctx.c b/drivers/gpu/nvgpu/common/gr/obj_ctx.c
index 273674952..3f4206307 100644
--- a/drivers/gpu/nvgpu/common/gr/obj_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c
@@ -20,6 +20,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include <nvgpu/class.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/log.h>
 #include <nvgpu/io.h>
@@ -37,8 +38,13 @@
 #include <nvgpu/netlist.h>
 #include <nvgpu/gr/gr_falcon.h>
 #include <nvgpu/gr/fs_state.h>
+#include <nvgpu/gr/gr_utils.h>
+#include <nvgpu/engines.h>
+#include <nvgpu/nvgpu_init.h>
 #include <nvgpu/power_features/cg.h>
 #include <nvgpu/static_analysis.h>
+#include <nvgpu/tsg.h>
+#include <nvgpu/vm.h>
 
 #include "obj_ctx_priv.h"
 
@@ -728,8 +734,6 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
 	 * channel initializes golden image, driver needs to prevent multiple
 	 * channels from initializing golden ctx at the same time
 	 */
-	nvgpu_mutex_acquire(&golden_image->ctx_mutex);
-
 	if (golden_image->ready) {
 		nvgpu_log(g, gpu_dbg_gr, "golden image already saved");
 		goto clean_up;
@@ -784,7 +788,6 @@ clean_up:
 		nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
 	}
 
-	nvgpu_mutex_release(&golden_image->ctx_mutex);
 	return err;
 }
 
@@ -855,6 +858,130 @@ static int nvgpu_gr_obj_ctx_alloc_buffers(struct gk20a *g,
 	return err;
 }
 
+int nvgpu_gr_obj_ctx_init_golden_context_image(struct gk20a *g)
+{
+	struct nvgpu_gr_obj_ctx_golden_image *golden_image =
+					nvgpu_gr_get_golden_image_ptr(g);
+	struct nvgpu_setup_bind_args setup_bind_args;
+	struct nvgpu_channel *veid0_ch;
+	u64 user_size, kernel_size;
+	struct nvgpu_tsg *tsg;
+	struct vm_gk20a *vm;
+	u32 big_page_size;
+	u32 obj_class;
+	int err = 0;
+
+	err = gk20a_busy(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to power on, %d", err);
+		return err;
+	}
+
+	nvgpu_mutex_acquire(&golden_image->ctx_mutex);
+
+	if (golden_image->ready) {
+		goto out;
+	}
+
+	big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
+
+	/* allocate a tsg */
+	tsg = nvgpu_tsg_open(g, 0);
+	if (tsg == NULL) {
+		nvgpu_err(g, "tsg not available");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* allocate a VM */
+	g->ops.mm.get_default_va_sizes(NULL, &user_size, &kernel_size);
+	vm = nvgpu_vm_init(g, big_page_size,
+				U64(big_page_size) << U64(10),
+				nvgpu_safe_sub_u64(user_size,
+					U64(big_page_size) << U64(10)),
+				kernel_size,
+				0ULL,
+				false, false, false, "golden_context");
+	if (vm == NULL) {
+		nvgpu_err(g, "vm init failed");
+		err = -ENOMEM;
+		goto out_release_tsg;
+	}
+
+	/* allocate veid0 channel by specifying GR runlist id */
+	veid0_ch = nvgpu_channel_open_new(g, nvgpu_engine_get_gr_runlist_id(g),
+				true, 0, 0);
+	if (veid0_ch == NULL) {
+		nvgpu_err(g, "channel not available");
+		err = -ENOMEM;
+		goto out_release_vm;
+	}
+
+	veid0_ch->golden_ctx_init_ch = true;
+
+	/* bind the channel to the vm */
+	err = g->ops.mm.vm_bind_channel(vm, veid0_ch);
+	if (err != 0) {
+		nvgpu_err(g, "could not bind vm");
+		goto out_release_ch;
+	}
+
+	/* bind the channel to the tsg */
+	err = nvgpu_tsg_bind_channel(tsg, veid0_ch);
+	if (err != 0) {
+		nvgpu_err(g, "unable to bind to tsg");
+		goto out_release_ch;
+	}
+
+	setup_bind_args.num_gpfifo_entries = 1024;
+	setup_bind_args.num_inflight_jobs = 0;
+ #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+	setup_bind_args.flags = 0;
+ #else
+	/*
+	 * Usermode gpfifo and userd buffers are just allocated here but they
+	 * are not used for submitting any work. Since these buffers are
+	 * nvgpu allocated ones, we don't specify userd_dmabuf_fd and
+	 * gpfifo_dmabuf_fd here.
+	 */
+	setup_bind_args.flags = NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT;
+ #endif
+	err = nvgpu_channel_setup_bind(veid0_ch, &setup_bind_args);
+	if (err != 0) {
+		nvgpu_err(g, "unable to setup and bind channel");
+		goto out_release_ch;
+	}
+
+#ifdef CONFIG_NVGPU_HAL_NON_FUSA
+	obj_class = MAXWELL_B;
+#else
+	obj_class = VOLTA_A;
+#endif
+
+	/* allocate obj_ctx to initialize golden image */
+	err = g->ops.gr.setup.alloc_obj_ctx(veid0_ch, obj_class, 0U);
+	if (err != 0) {
+		nvgpu_err(g, "unable to alloc obj_ctx");
+		goto out_release_ch;
+	}
+
+	/* This state update is needed for vGPU case */
+	golden_image->ready = true;
+
+	nvgpu_log(g, gpu_dbg_gr, "Golden context image initialized!");
+
+out_release_ch:
+	nvgpu_channel_close(veid0_ch);
+out_release_vm:
+	nvgpu_vm_put(vm);
+out_release_tsg:
+	nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release);
+out:
+	nvgpu_mutex_release(&golden_image->ctx_mutex);
+	gk20a_idle(g);
+	return err;
+}
+
 int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
 	struct nvgpu_gr_obj_ctx_golden_image *golden_image,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
index c760c5420..4258f2ff7 100644
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
@@ -447,11 +447,6 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
 		return -EINVAL;
 	}
 
-	if (!nvgpu_gr_obj_ctx_is_golden_image_ready(gr->golden_image)) {
-		nvgpu_log_fn(g, "no context switch header info to work with");
-		return -ENODEV;
-	}
-
 	priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets);
 	if (priv_registers == NULL) {
 		nvgpu_log_fn(g, "failed alloc for potential_offsets=%d", potential_offsets);
@@ -527,11 +522,6 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
 		return -EINVAL;
 	}
 
-	if (!nvgpu_gr_obj_ctx_is_golden_image_ready(gr->golden_image)) {
-		nvgpu_log_fn(g, "no context switch header info to work with");
-		return -ENODEV;
-	}
-
 	priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets);
 	if (priv_registers == NULL) {
 		nvgpu_log_fn(g, "failed alloc for potential_offsets=%d", potential_offsets);
@@ -1512,6 +1502,16 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 				continue;
 			}
 
+			if (!nvgpu_gr_obj_ctx_is_golden_image_ready(
+						gr->golden_image)) {
+				nvgpu_err(g, "no context switch header info to "
+					"work with");
+				ctx_ops[i].status =
+					REGOP(STATUS_INVALID_OFFSET);
+				err = -ENODEV;
+				continue;
+			}
+
 			err = g->ops.gr.get_ctx_buffer_offsets(g,
 						ctx_ops[i].offset,
 						max_offsets,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index 4cb7927e8..0243b44d9 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -543,6 +543,11 @@ struct nvgpu_channel {
 	 * Host methods on its pushbuffer.
 	 */
 	bool is_privileged_channel;
+
+	/**
+	 * nvgpu created channel to initialize the golden context image.
+	 */
+	bool golden_ctx_init_ch;
 #ifdef CONFIG_NVGPU_DEBUGGER
 	/**
 	 * MMU Debugger Mode is enabled for this channel if refcnt > 0
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_utils.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_utils.h
index d5778e8ec..6096fedd2 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_utils.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_utils.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -123,9 +123,9 @@ struct nvgpu_gr_zbc *nvgpu_gr_get_zbc_ptr(struct gk20a *g);
 u32 nvgpu_gr_get_cilp_preempt_pending_chid(struct gk20a *g);
 void nvgpu_gr_clear_cilp_preempt_pending_chid(struct gk20a *g);
 #endif
-#ifdef CONFIG_NVGPU_DEBUGGER
 struct nvgpu_gr_obj_ctx_golden_image *nvgpu_gr_get_golden_image_ptr(
 		struct gk20a *g);
+#ifdef CONFIG_NVGPU_DEBUGGER
 struct nvgpu_gr_hwpm_map *nvgpu_gr_get_hwpm_map_ptr(struct gk20a *g);
 void nvgpu_gr_reset_falcon_ptr(struct gk20a *g);
 void nvgpu_gr_reset_golden_image_ptr(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h
index d0a013bfe..ee5d147b5 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h
@@ -158,6 +158,31 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
 	struct nvgpu_gr_config *config,	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_mappings *mappings, bool patch);
 
+/**
+ * @brief Allocate and setup object context s/w image for VEID0 GPU channel.
+ *        This will initialize the golden context image.
+ *
+ * @param g [in]		Pointer to GPU driver struct.
+ *
+ * This function allocates and sets up object context for VEID0 GPU channel
+ * in order to initialize golden image.
+ *
+ * The steps include:
+ *
+ * - Allocate a TSG.
+ * - Allocate a VM.
+ * - Allocate GR/VEID0 channel.
+ * - Bind the channel to VM.
+ * - Bind the channel to TSG.
+ * - Setup and bind the channel.
+ * - Allocate object context for the channel. This will initialize the
+ *   golden image.
+ * - Close the channel.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ */
+int nvgpu_gr_obj_ctx_init_golden_context_image(struct gk20a *g);
+
 /**
  * @brief Allocate golden context image.
  *
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
index 0783e613f..06d7a6767 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -983,6 +983,19 @@ static u32 nvgpu_obj_ctx_user_flags_to_common_flags(u32 user_flags)
 static int nvgpu_ioctl_channel_alloc_obj_ctx(struct nvgpu_channel *ch,
 	u32 class_num, u32 user_flags)
 {
+	struct gk20a *g = ch->g;
+	int err;
+
+	/*
+	 * Allocate VEID0 channel and initialize the golden context image.
+	 */
+	err = nvgpu_gr_obj_ctx_init_golden_context_image(g);
+	if (err != 0) {
+		nvgpu_err(g, "golden context image init failed (%d).",
+			  err);
+		return -ENOSYS;
+	}
+
 	return ch->g->ops.gr.setup.alloc_obj_ctx(ch, class_num,
 			nvgpu_obj_ctx_user_flags_to_common_flags(user_flags));
 }