gpu: nvgpu: add common.gr.obj_ctx unit

Add a new unit common.gr.obj_ctx which allocates and initializes GR context. This unit also takes care of creating global golden image used to initialize every context. Add private header obj_ctx_priv.h that defines struct nvgpu_gr_obj_ctx_golden_image Add public header obj_ctx.h that exposes functions supported by new unit This unit now exposes below API to allocate and initialize context nvgpu_gr_obj_ctx_alloc() Remove below functions from gk20a/gr_gk20a.c and move them to new unit with below renames gr_gk20a_fecs_ctx_bind_channel() -> nvgpu_gr_obj_ctx_bind_channel() gr_gk20a_fecs_ctx_image_save() -> nvgpu_gr_obj_ctx_image_save() gk20a_init_sw_bundle() -> nvgpu_gr_obj_ctx_alloc_sw_bundle() gr_gk20a_alloc_gr_ctx() -> nvgpu_gr_obj_ctx_gr_ctx_alloc() gr_gk20a_init_golden_ctx_image() -> nvgpu_gr_obj_ctx_alloc_golden_ctx_image() Use new APIs in gk20a_alloc_obj_ctx() to allocate context For now this unit includes <nvgpu/gr/gr.h> and some h/w headers. But they will be removed in follow up patches Jira NVGPU-1887 Change-Id: Ib95ec1c19c5b74810f85c2feed8fdd63889d3d22 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2087662 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Vinod Gopalakrishnakurup <vinodg@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-25 02:52:51 +03:00 · 2019-04-01 18:22:07 +05:30
parent 1819c36562
commit c33827e122
8 changed files with 608 additions and 366 deletions
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -77,6 +77,7 @@ nvgpu-y += \
 	common/gr/zbc.o \
 	common/gr/gr_setup.o \
 	common/gr/hwpm_map.o \
+	common/gr/obj_ctx.o \
 	common/netlist/netlist.o \
 	common/netlist/netlist_sim.o \
 	common/netlist/netlist_gm20b.o \
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -117,6 +117,7 @@ srcs += common/sim.c \
 	common/gr/zbc.c \
 	common/gr/gr_setup.c \
 	common/gr/hwpm_map.c \
+	common/gr/obj_ctx.c \
 	common/netlist/netlist.c \
 	common/netlist/netlist_sim.c \
 	common/netlist/netlist_gm20b.c \
--- a/drivers/gpu/nvgpu/common/gr/obj_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c
@@ -0,0 +1,460 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/log.h>
+#include <nvgpu/io.h>
+#include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/global_ctx.h>
+#include <nvgpu/gr/obj_ctx.h>
+#include <nvgpu/power_features/cg.h>
+
+#include "obj_ctx_priv.h"
+
+/*
+ * TODO: needed for nvgpu_gr_init_fs_state() and introduces cyclic dependency
+ * with common.gr.gr unit. Remove this in follow up
+ */
+#include <nvgpu/gr/gr.h>
+
+/*
+ * TODO: remove these when nvgpu_gr_obj_ctx_bind_channel() and
+ * nvgpu_gr_obj_ctx_image_save() are moved to appropriate units
+ */
+#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
+#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
+
+static int nvgpu_gr_obj_ctx_alloc_sw_bundle(struct gk20a *g)
+{
+	struct netlist_av_list *sw_bundle_init =
+			&g->netlist_vars->sw_bundle_init;
+	struct netlist_av_list *sw_veid_bundle_init =
+			&g->netlist_vars->sw_veid_bundle_init;
+	struct netlist_av64_list *sw_bundle64_init =
+			&g->netlist_vars->sw_bundle64_init;
+	int err = 0;
+
+	/* enable pipe mode override */
+	g->ops.gr.init.pipe_mode_override(g, true);
+
+	/* load bundle init */
+	err = g->ops.gr.init.load_sw_bundle_init(g, sw_bundle_init);
+	if (err != 0) {
+		goto error;
+	}
+
+	if (g->ops.gr.init.load_sw_veid_bundle != NULL) {
+		err = g->ops.gr.init.load_sw_veid_bundle(g,
+				sw_veid_bundle_init);
+		if (err != 0) {
+			goto error;
+		}
+	}
+
+	if (g->ops.gr.init.load_sw_bundle64 != NULL) {
+		err = g->ops.gr.init.load_sw_bundle64(g, sw_bundle64_init);
+		if (err != 0) {
+			goto error;
+		}
+	}
+
+	/* disable pipe mode override */
+	g->ops.gr.init.pipe_mode_override(g, false);
+
+	err = g->ops.gr.init.wait_idle(g);
+
+	return err;
+
+error:
+	/* in case of error skip waiting for GR idle - just restore state */
+	g->ops.gr.init.pipe_mode_override(g, false);
+
+	return err;
+}
+
+static int nvgpu_gr_obj_ctx_bind_channel(struct gk20a *g,
+		struct nvgpu_mem *inst_block)
+{
+	u32 inst_base_ptr = u64_lo32(nvgpu_inst_block_addr(g, inst_block)
+				     >> ram_in_base_shift_v());
+	u32 data = fecs_current_ctx_data(g, inst_block);
+	int ret;
+
+	nvgpu_log_info(g, "bind inst ptr 0x%08x", inst_base_ptr);
+
+	ret = g->ops.gr.falcon.submit_fecs_method_op(g,
+		     (struct fecs_method_op_gk20a) {
+		     .method.addr = gr_fecs_method_push_adr_bind_pointer_v(),
+		     .method.data = data,
+		     .mailbox = { .id = 0, .data = 0,
+				  .clr = 0x30,
+				  .ret = NULL,
+				  .ok = 0x10,
+				  .fail = 0x20, },
+		     .cond.ok = GR_IS_UCODE_OP_AND,
+		     .cond.fail = GR_IS_UCODE_OP_AND}, true);
+	if (ret != 0) {
+		nvgpu_err(g,
+			"bind channel instance failed");
+	}
+
+	return ret;
+}
+
+static int nvgpu_gr_obj_ctx_image_save(struct gk20a *g,
+		struct nvgpu_mem *inst_block)
+{
+	int ret;
+
+	nvgpu_log_fn(g, " ");
+
+	ret = g->ops.gr.falcon.submit_fecs_method_op(g,
+		(struct fecs_method_op_gk20a) {
+		.method.addr = gr_fecs_method_push_adr_wfi_golden_save_v(),
+		.method.data = fecs_current_ctx_data(g, inst_block),
+		.mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL,
+			.ok = 1, .fail = 2,
+		},
+		.cond.ok = GR_IS_UCODE_OP_AND,
+		.cond.fail = GR_IS_UCODE_OP_AND,
+		 }, true);
+
+	if (ret != 0) {
+		nvgpu_err(g, "save context image failed");
+	}
+
+	return ret;
+}
+
+/*
+ * init global golden image from a fresh gr_ctx in channel ctx.
+ * save a copy in local_golden_image in ctx_vars
+ */
+int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
+	struct nvgpu_gr_obj_ctx_golden_image *golden_image,
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_mem *inst_block)
+{
+	u32 i;
+	struct nvgpu_mem *gr_mem;
+	int err = 0;
+	struct netlist_aiv_list *sw_ctx_load = &g->netlist_vars->sw_ctx_load;
+	struct netlist_av_list *sw_method_init = &g->netlist_vars->sw_method_init;
+
+	nvgpu_log_fn(g, " ");
+
+	gr_mem = &gr_ctx->mem;
+
+	/*
+	 * golden ctx is global to all channels. Although only the first
+	 * channel initializes golden image, driver needs to prevent multiple
+	 * channels from initializing golden ctx at the same time
+	 */
+	nvgpu_mutex_acquire(&golden_image->ctx_mutex);
+
+	if (golden_image->ready) {
+		goto clean_up;
+	}
+
+	err = g->ops.gr.init.fe_pwr_mode_force_on(g, true);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+	g->ops.gr.init.override_context_reset(g);
+
+	err = g->ops.gr.init.fe_pwr_mode_force_on(g, false);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+	err = nvgpu_gr_obj_ctx_bind_channel(g, inst_block);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+	err = g->ops.gr.init.wait_idle(g);
+
+	/* load ctx init */
+	for (i = 0U; i < sw_ctx_load->count; i++) {
+		nvgpu_writel(g, sw_ctx_load->l[i].addr,
+			     sw_ctx_load->l[i].value);
+	}
+
+	if (g->ops.gr.init.preemption_state != NULL) {
+		err = g->ops.gr.init.preemption_state(g,
+			g->gr.gfxp_wfi_timeout_count,
+			g->gr.gfxp_wfi_timeout_unit_usec);
+		if (err != 0) {
+			goto clean_up;
+		}
+	}
+
+	nvgpu_cg_blcg_gr_load_enable(g);
+
+	err = g->ops.gr.init.wait_idle(g);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+	/* disable fe_go_idle */
+	g->ops.gr.init.fe_go_idle_timeout(g, false);
+
+	err = g->ops.gr.commit_global_ctx_buffers(g, gr_ctx, false);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+	/* override a few ctx state registers */
+	g->ops.gr.init.commit_global_timeslice(g);
+
+	/* floorsweep anything left */
+	err = nvgpu_gr_init_fs_state(g);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+	err = g->ops.gr.init.wait_idle(g);
+	if (err != 0) {
+		goto restore_fe_go_idle;
+	}
+
+	err = nvgpu_gr_obj_ctx_alloc_sw_bundle(g);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+restore_fe_go_idle:
+	/* restore fe_go_idle */
+	g->ops.gr.init.fe_go_idle_timeout(g, true);
+
+	if ((err != 0) || (g->ops.gr.init.wait_idle(g) != 0)) {
+		goto clean_up;
+	}
+
+	/* load method init */
+	g->ops.gr.init.load_method_init(g, sw_method_init);
+
+	err = g->ops.gr.init.wait_idle(g);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+	err = nvgpu_gr_ctx_init_zcull(g, gr_ctx);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+	nvgpu_gr_obj_ctx_image_save(g, inst_block);
+
+	golden_image->local_golden_image =
+		nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem,
+			g->gr.ctx_vars.golden_image_size);
+	if (golden_image->local_golden_image == NULL) {
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	golden_image->ready = true;
+	g->gr.ctx_vars.golden_image_initialized = true;
+
+	g->ops.gr.falcon.set_current_ctx_invalid(g);
+
+clean_up:
+	if (err != 0) {
+		nvgpu_err(g, "fail");
+	} else {
+		nvgpu_log_fn(g, "done");
+	}
+
+	nvgpu_mutex_release(&golden_image->ctx_mutex);
+	return err;
+}
+
+static int nvgpu_gr_obj_ctx_gr_ctx_alloc(struct gk20a *g,
+	struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm)
+{
+	struct gr_gk20a *gr = &g->gr;
+	u32 size;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	size = nvgpu_gr_obj_ctx_get_golden_image_size(g->gr.golden_image);
+	nvgpu_gr_ctx_set_size(gr->gr_ctx_desc, NVGPU_GR_CTX_CTX, size);
+
+	err = nvgpu_gr_ctx_alloc(g, gr_ctx, gr->gr_ctx_desc, vm);
+	if (err != 0) {
+		return err;
+	}
+
+	return 0;
+}
+
+int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
+	struct nvgpu_gr_obj_ctx_golden_image *golden_image,
+	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_subctx *subctx,
+	struct channel_gk20a *c,
+	struct vm_gk20a *vm,
+	struct nvgpu_mem *inst_block,
+	u32 class_num, u32 flags,
+	bool cde, bool vpr)
+{
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	err = nvgpu_gr_obj_ctx_gr_ctx_alloc(g, gr_ctx, vm);
+	if (err != 0) {
+		nvgpu_err(g,
+			"fail to allocate TSG gr ctx buffer");
+		goto out;
+	}
+
+	/* allocate patch buffer */
+	if (!nvgpu_mem_is_valid(&gr_ctx->patch_ctx.mem)) {
+		gr_ctx->patch_ctx.data_count = 0;
+
+		nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
+			NVGPU_GR_CTX_PATCH_CTX,
+			g->ops.gr.get_patch_slots(g) *
+				PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY);
+
+		err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx,
+			g->gr.gr_ctx_desc, vm);
+		if (err != 0) {
+			nvgpu_err(g,
+				"fail to allocate patch buffer");
+			goto out;
+		}
+	}
+
+	g->ops.gr.init_ctxsw_preemption_mode(g, gr_ctx, vm, class_num, flags);
+
+	/* map global buffer to channel gpu_va and commit */
+	err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx,
+			global_ctx_buffer, vm, vpr);
+	if (err != 0) {
+		nvgpu_err(g,
+			"fail to map global ctx buffer");
+		goto out;
+	}
+
+	g->ops.gr.commit_global_ctx_buffers(g, gr_ctx, true);
+
+	/* commit gr ctx buffer */
+	err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
+	if (err != 0) {
+		nvgpu_err(g,
+			"fail to commit gr ctx buffer");
+		goto out;
+	}
+
+	/* init golden image, ELPG enabled after this is done */
+	err = nvgpu_gr_obj_ctx_alloc_golden_ctx_image(g, golden_image, gr_ctx,
+		inst_block);
+	if (err != 0) {
+		nvgpu_err(g,
+			"fail to init golden ctx image");
+		goto out;
+	}
+
+	/* load golden image */
+	nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx,
+		golden_image->local_golden_image, cde);
+	if (err != 0) {
+		nvgpu_err(g,
+			"fail to load golden ctx image");
+		goto out;
+	}
+
+	if (g->ops.gr.update_ctxsw_preemption_mode != NULL) {
+		g->ops.gr.update_ctxsw_preemption_mode(g, gr_ctx,
+			subctx);
+	}
+
+	nvgpu_log_fn(g, "done");
+	return 0;
+out:
+	/*
+	 * 1. gr_ctx, patch_ctx and global ctx buffer mapping
+	 * can be reused so no need to release them.
+	 * 2. golden image init and load is a one time thing so if
+	 * they pass, no need to undo.
+	 */
+	nvgpu_err(g, "fail");
+	return err;
+}
+
+void nvgpu_gr_obj_ctx_set_golden_image_size(
+		struct nvgpu_gr_obj_ctx_golden_image *golden_image,
+		size_t size)
+{
+	golden_image->size = size;
+}
+
+size_t nvgpu_gr_obj_ctx_get_golden_image_size(
+		struct nvgpu_gr_obj_ctx_golden_image *golden_image)
+{
+	return golden_image->size;
+}
+
+u32 *nvgpu_gr_obj_ctx_get_local_golden_image_ptr(
+	struct nvgpu_gr_obj_ctx_golden_image *golden_image)
+{
+	return nvgpu_gr_global_ctx_get_local_golden_image_ptr(
+			golden_image->local_golden_image);
+}
+
+int nvgpu_gr_obj_ctx_init(struct gk20a *g,
+	struct nvgpu_gr_obj_ctx_golden_image **gr_golden_image, u32 size)
+{
+	struct nvgpu_gr_obj_ctx_golden_image *golden_image;
+
+	golden_image = nvgpu_kzalloc(g, sizeof(*golden_image));
+	if (golden_image == NULL) {
+		return -ENOMEM;
+	}
+
+	nvgpu_gr_obj_ctx_set_golden_image_size(golden_image, size);
+	nvgpu_mutex_init(&golden_image->ctx_mutex);
+
+	*gr_golden_image = golden_image;
+
+	return 0;
+}
+
+void nvgpu_gr_obj_ctx_deinit(struct gk20a *g,
+	struct nvgpu_gr_obj_ctx_golden_image *golden_image)
+{
+	if (golden_image->local_golden_image != NULL) {
+		nvgpu_gr_global_ctx_deinit_local_golden_image(g,
+			golden_image->local_golden_image);
+		golden_image->local_golden_image = NULL;
+	}
+
+	golden_image->ready = false;
+	nvgpu_kfree(g, golden_image);
+}
+
--- a/drivers/gpu/nvgpu/common/gr/obj_ctx_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/obj_ctx_priv.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_GR_OBJ_CTX_PRIV_H
+#define NVGPU_GR_OBJ_CTX_PRIV_H
+
+#include <nvgpu/types.h>
+#include <nvgpu/lock.h>
+
+struct nvgpu_gr_global_ctx_local_golden_image;
+
+struct nvgpu_gr_obj_ctx_golden_image {
+	bool ready;
+	struct nvgpu_mutex ctx_mutex;
+
+	size_t size;
+
+	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image;
+};
+
+#endif /* NVGPU_GR_OBJ_CTX_PRIV_H */
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -52,6 +52,7 @@
 #include <nvgpu/gr/zbc.h>
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/gr/gr_falcon.h>
+#include <nvgpu/gr/obj_ctx.h>
 #include <nvgpu/gr/zcull.h>
 #include <nvgpu/gr/config.h>
 #include <nvgpu/gr/fecs_trace.h>
@@ -302,7 +303,7 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
 	return 0;
 }

-static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
+u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
 {
 	u64 ptr = nvgpu_inst_block_addr(g, inst_block) >>
 		ram_in_base_shift_v();
@@ -315,36 +316,6 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
 		gr_fecs_current_ctx_valid_f(1);
 }

-int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
-					struct channel_gk20a *c)
-{
-	u32 inst_base_ptr = u64_lo32(nvgpu_inst_block_addr(g, &c->inst_block)
-				     >> ram_in_base_shift_v());
-	u32 data = fecs_current_ctx_data(g, &c->inst_block);
-	int ret;
-
-	nvgpu_log_info(g, "bind channel %d inst ptr 0x%08x",
-		   c->chid, inst_base_ptr);
-
-	ret = g->ops.gr.falcon.submit_fecs_method_op(g,
-		     (struct fecs_method_op_gk20a) {
-		     .method.addr = gr_fecs_method_push_adr_bind_pointer_v(),
-		     .method.data = data,
-		     .mailbox = { .id = 0, .data = 0,
-				  .clr = 0x30,
-				  .ret = NULL,
-				  .ok = 0x10,
-				  .fail = 0x20, },
-		     .cond.ok = GR_IS_UCODE_OP_AND,
-		     .cond.fail = GR_IS_UCODE_OP_AND}, true);
-	if (ret != 0) {
-		nvgpu_err(g,
-			"bind channel instance failed");
-	}
-
-	return ret;
-}
-
 int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
 			struct nvgpu_gr_ctx *gr_ctx, bool patch)
 {
@@ -401,219 +372,6 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
 	return 0;
 }

-int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
-{
-	struct gk20a *g = c->g;
-	int ret;
-
-	nvgpu_log_fn(g, " ");
-
-	ret = g->ops.gr.falcon.submit_fecs_method_op(g,
-		(struct fecs_method_op_gk20a) {
-		.method.addr = save_type,
-		.method.data = fecs_current_ctx_data(g, &c->inst_block),
-		.mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL,
-			.ok = 1, .fail = 2,
-		},
-		.cond.ok = GR_IS_UCODE_OP_AND,
-		.cond.fail = GR_IS_UCODE_OP_AND,
-		 }, true);
-
-	if (ret != 0) {
-		nvgpu_err(g, "save context image failed");
-	}
-
-	return ret;
-}
-
-int gk20a_init_sw_bundle(struct gk20a *g)
-{
-	struct netlist_av_list *sw_bundle_init =
-			&g->netlist_vars->sw_bundle_init;
-	struct netlist_av_list *sw_veid_bundle_init =
-			&g->netlist_vars->sw_veid_bundle_init;
-	struct netlist_av64_list *sw_bundle64_init =
-			&g->netlist_vars->sw_bundle64_init;
-	int err = 0;
-
-	/* enable pipe mode override */
-	g->ops.gr.init.pipe_mode_override(g, true);
-
-	/* load bundle init */
-	err = g->ops.gr.init.load_sw_bundle_init(g, sw_bundle_init);
-	if (err != 0) {
-		goto error;
-	}
-
-	if (g->ops.gr.init.load_sw_veid_bundle != NULL) {
-		err = g->ops.gr.init.load_sw_veid_bundle(g,
-				sw_veid_bundle_init);
-		if (err != 0) {
-			goto error;
-		}
-	}
-
-	if (g->ops.gr.init.load_sw_bundle64 != NULL) {
-		err = g->ops.gr.init.load_sw_bundle64(g, sw_bundle64_init);
-		if (err != 0) {
-			goto error;
-		}
-	}
-
-	/* disable pipe mode override */
-	g->ops.gr.init.pipe_mode_override(g, false);
-
-	err = g->ops.gr.init.wait_idle(g);
-
-	return err;
-
-error:
-	/* in case of error skip waiting for GR idle - just restore state */
-	g->ops.gr.init.pipe_mode_override(g, false);
-
-	return err;
-}
-
-/* init global golden image from a fresh gr_ctx in channel ctx.
-   save a copy in local_golden_image in ctx_vars */
-int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
-					  struct channel_gk20a *c,
-					  struct nvgpu_gr_ctx *gr_ctx)
-{
-	struct gr_gk20a *gr = &g->gr;
-	u32 i;
-	struct nvgpu_mem *gr_mem;
-	int err = 0;
-	struct netlist_aiv_list *sw_ctx_load = &g->netlist_vars->sw_ctx_load;
-	struct netlist_av_list *sw_method_init = &g->netlist_vars->sw_method_init;
-
-	nvgpu_log_fn(g, " ");
-
-	gr_mem = &gr_ctx->mem;
-
-	/* golden ctx is global to all channels. Although only the first
-	   channel initializes golden image, driver needs to prevent multiple
-	   channels from initializing golden ctx at the same time */
-	nvgpu_mutex_acquire(&gr->ctx_mutex);
-
-	if (gr->ctx_vars.golden_image_initialized) {
-		goto clean_up;
-	}
-
-	err = g->ops.gr.init.fe_pwr_mode_force_on(g, true);
-	if (err != 0) {
-		goto clean_up;
-	}
-
-	g->ops.gr.init.override_context_reset(g);
-
-	err = g->ops.gr.init.fe_pwr_mode_force_on(g, false);
-	if (err != 0) {
-		goto clean_up;
-	}
-
-	err = gr_gk20a_fecs_ctx_bind_channel(g, c);
-	if (err != 0) {
-		goto clean_up;
-	}
-
-	err = g->ops.gr.init.wait_idle(g);
-
-	/* load ctx init */
-	for (i = 0; i < sw_ctx_load->count; i++) {
-		gk20a_writel(g, sw_ctx_load->l[i].addr,
-			     sw_ctx_load->l[i].value);
-	}
-
-	if (g->ops.gr.init.preemption_state != NULL) {
-		err = g->ops.gr.init.preemption_state(g,
-			gr->gfxp_wfi_timeout_count,
-			gr->gfxp_wfi_timeout_unit_usec);
-		if (err != 0) {
-			goto clean_up;
-		}
-	}
-
-	nvgpu_cg_blcg_gr_load_enable(g);
-
-	err = g->ops.gr.init.wait_idle(g);
-	if (err != 0) {
-		goto clean_up;
-	}
-
-	/* disable fe_go_idle */
-	g->ops.gr.init.fe_go_idle_timeout(g, false);
-
-	err = g->ops.gr.commit_global_ctx_buffers(g, gr_ctx, false);
-	if (err != 0) {
-		goto clean_up;
-	}
-
-	/* override a few ctx state registers */
-	g->ops.gr.init.commit_global_timeslice(g);
-
-	/* floorsweep anything left */
-	err = nvgpu_gr_init_fs_state(g);
-	if (err != 0) {
-		goto clean_up;
-	}
-
-	err = g->ops.gr.init.wait_idle(g);
-	if (err != 0) {
-		goto restore_fe_go_idle;
-	}
-
-	err = gk20a_init_sw_bundle(g);
-	if (err != 0) {
-		goto clean_up;
-	}
-
-restore_fe_go_idle:
-	/* restore fe_go_idle */
-	g->ops.gr.init.fe_go_idle_timeout(g, true);
-
-	if ((err != 0) || (g->ops.gr.init.wait_idle(g) != 0)) {
-		goto clean_up;
-	}
-
-	/* load method init */
-	g->ops.gr.init.load_method_init(g, sw_method_init);
-
-	err = g->ops.gr.init.wait_idle(g);
-	if (err != 0) {
-		goto clean_up;
-	}
-
-	err = nvgpu_gr_ctx_init_zcull(g, gr_ctx);
-	if (err != 0) {
-		goto clean_up;
-	}
-
-	gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());
-
-	gr->local_golden_image =
-		nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem,
-			gr->ctx_vars.golden_image_size);
-	if (gr->local_golden_image == NULL) {
-		err = -ENOMEM;
-		goto clean_up;
-	}
-
-	gr->ctx_vars.golden_image_initialized = true;
-
-	g->ops.gr.falcon.set_current_ctx_invalid(g);
-
-clean_up:
-	if (err != 0) {
-		nvgpu_err(g, "fail");
-	} else {
-		nvgpu_log_fn(g, "done");
-	}
-
-	nvgpu_mutex_release(&gr->ctx_mutex);
-	return err;
-}
-
 int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
 				    struct channel_gk20a *c,
 				    bool enable_smpc_ctxsw)
@@ -851,25 +609,6 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
 	return 0;
 }

-static int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
-	struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm)
-{
-	struct gr_gk20a *gr = &g->gr;
-	int err = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	nvgpu_gr_ctx_set_size(gr->gr_ctx_desc, NVGPU_GR_CTX_CTX,
-		gr->ctx_vars.golden_image_size);
-
-	err = nvgpu_gr_ctx_alloc(g, gr_ctx, gr->gr_ctx_desc, vm);
-	if (err != 0) {
-		return err;
-	}
-
-	return 0;
-}
-
 void gr_gk20a_free_gr_ctx(struct gk20a *g,
 			  struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
 {
@@ -936,88 +675,20 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
 	if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
 		tsg->vm = c->vm;
 		nvgpu_vm_get(tsg->vm);
-		err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, tsg->vm);
+
+		err = nvgpu_gr_obj_ctx_alloc(g, g->gr.golden_image,
+				g->gr.global_ctx_buffer, gr_ctx, c->subctx, c,
+				tsg->vm, &c->inst_block, class_num, flags,
+				c->cde, c->vpr);
 		if (err != 0) {
 			nvgpu_err(g,
-				"fail to allocate TSG gr ctx buffer");
+				"failed to allocate gr ctx buffer");
 			nvgpu_vm_put(tsg->vm);
 			tsg->vm = NULL;
 			goto out;
 		}

 		gr_ctx->tsgid = tsg->tsgid;
-
-		/* allocate patch buffer */
-		if (!nvgpu_mem_is_valid(&gr_ctx->patch_ctx.mem)) {
-			gr_ctx->patch_ctx.data_count = 0;
-
-			nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
-				NVGPU_GR_CTX_PATCH_CTX,
-				g->ops.gr.get_patch_slots(g) *
-					PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY);
-
-			err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx,
-				g->gr.gr_ctx_desc, c->vm);
-			if (err != 0) {
-				nvgpu_err(g,
-					"fail to allocate patch buffer");
-				goto out;
-			}
-		}
-
-		g->ops.gr.init_ctxsw_preemption_mode(g, gr_ctx, tsg->vm,
-			class_num, flags);
-
-		/* map global buffer to channel gpu_va and commit */
-		err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx,
-				g->gr.global_ctx_buffer, tsg->vm, c->vpr);
-		if (err != 0) {
-			nvgpu_err(g,
-				"fail to map global ctx buffer");
-			goto out;
-		}
-		g->ops.gr.commit_global_ctx_buffers(g, gr_ctx, true);
-
-		/* commit gr ctx buffer */
-		err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
-		if (err != 0) {
-			nvgpu_err(g,
-				"fail to commit gr ctx buffer");
-			goto out;
-		}
-
-		/* init golden image, ELPG enabled after this is done */
-		err = gr_gk20a_init_golden_ctx_image(g, c, gr_ctx);
-		if (err != 0) {
-			nvgpu_err(g,
-				"fail to init golden ctx image");
-			goto out;
-		}
-
-		/* load golden image */
-		nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx,
-			g->gr.local_golden_image, c->cde);
-		if (err != 0) {
-			nvgpu_err(g,
-				"fail to load golden ctx image");
-			goto out;
-		}
-
-		if (g->ops.gr.update_ctxsw_preemption_mode != NULL) {
-			g->ops.gr.update_ctxsw_preemption_mode(g, gr_ctx,
-				c->subctx);
-		}
-
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-		if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
-			err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
-				c->subctx, gr_ctx, tsg->tgid, 0);
-			if (err != 0) {
-				nvgpu_warn(g,
-					"fail to bind channel for ctxsw trace");
-			}
-		}
-#endif
 	} else {
 		/* commit gr ctx buffer */
 		err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
@@ -1026,18 +697,19 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
 				"fail to commit gr ctx buffer");
 			goto out;
 		}
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-		if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
-			err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
-				c->subctx, gr_ctx, tsg->tgid, 0);
-			if (err != 0) {
-				nvgpu_warn(g,
-					"fail to bind channel for ctxsw trace");
-			}
-		}
-#endif
 	}

+#ifdef CONFIG_GK20A_CTXSW_TRACE
+	if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
+		err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
+			c->subctx, gr_ctx, tsg->tgid, 0);
+		if (err != 0) {
+			nvgpu_warn(g,
+				"fail to bind channel for ctxsw trace");
+		}
+	}
+#endif
+
 	nvgpu_log_fn(g, "done");
 	return 0;
 out:
@@ -1069,18 +741,13 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)

 	nvgpu_netlist_deinit_ctx_vars(g);

-	if (gr->local_golden_image != NULL) {
-		nvgpu_gr_global_ctx_deinit_local_golden_image(g,
-			gr->local_golden_image);
-		gr->local_golden_image = NULL;
-		gr->ctx_vars.golden_image_initialized = false;
-	}
-
 	nvgpu_gr_hwpm_map_deinit(g, gr->hwpm_map);

 	nvgpu_ecc_remove_support(g);
 	nvgpu_gr_zbc_deinit(g, gr->zbc);
 	nvgpu_gr_zcull_deinit(g, gr->zcull);
+	nvgpu_gr_obj_ctx_deinit(g, gr->golden_image);
+	gr->ctx_vars.golden_image_initialized = false;
 }

 static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
@@ -1363,6 +1030,12 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
 	}
 #endif

+	err = nvgpu_gr_obj_ctx_init(g, &gr->golden_image,
+			g->gr.ctx_vars.golden_image_size);
+	if (err != 0) {
+		goto clean_up;
+	}
+
 	err = gr_gk20a_init_gr_config(g, gr);
 	if (err != 0) {
 		goto clean_up;
@@ -3162,8 +2835,8 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
 		err = gr_gk20a_find_priv_offset_in_buffer(g,
 			  priv_registers[i],
 			  is_quad, quad,
-			  nvgpu_gr_global_ctx_get_local_golden_image_ptr(
-				g->gr.local_golden_image),
+			  nvgpu_gr_obj_ctx_get_local_golden_image_ptr(
+				g->gr.golden_image),
 			  g->gr.ctx_vars.golden_image_size,
 			  &priv_offset);
 		if (err != 0) {
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -166,7 +166,8 @@ struct gr_gk20a {
 	bool gfxp_wfi_timeout_unit_usec;

 	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer;
-	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image;
+
+	struct nvgpu_gr_obj_ctx_golden_image *golden_image;

 	struct nvgpu_gr_ctx_desc *gr_ctx_desc;

@@ -428,10 +429,9 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
 int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
 			struct nvgpu_gr_ctx *gr_ctx, bool patch);

-int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
-					struct channel_gk20a *c);
+u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block);
+
 int gk20a_init_sw_bundle(struct gk20a *g);
-int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type);
 int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
 				struct gr_gk20a_isr_data *isr_data);
 int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_GR_OBJ_CTX_H
+#define NVGPU_GR_OBJ_CTX_H
+
+#include <nvgpu/types.h>
+#include <nvgpu/lock.h>
+
+struct gk20a;
+struct nvgpu_gr_ctx;
+struct nvgpu_gr_subctx;
+struct vm_gk20a;
+struct nvgpu_gr_global_ctx_buffer_desc;
+struct nvgpu_mem;
+struct channel_gk20a;
+struct nvgpu_gr_obj_ctx_golden_image;
+
+int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
+	struct nvgpu_gr_obj_ctx_golden_image *golden_image,
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_mem *inst_block);
+
+int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
+	struct nvgpu_gr_obj_ctx_golden_image *golden_image,
+	struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
+	struct nvgpu_gr_ctx *gr_ctx,
+	struct nvgpu_gr_subctx *subctx,
+	struct channel_gk20a *c,
+	struct vm_gk20a *vm,
+	struct nvgpu_mem *inst_block,
+	u32 class_num, u32 flags,
+	bool cde, bool vpr);
+
+void nvgpu_gr_obj_ctx_set_golden_image_size(
+		struct nvgpu_gr_obj_ctx_golden_image *golden_image,
+		size_t size);
+size_t nvgpu_gr_obj_ctx_get_golden_image_size(
+		struct nvgpu_gr_obj_ctx_golden_image *golden_image);
+
+u32 *nvgpu_gr_obj_ctx_get_local_golden_image_ptr(
+	struct nvgpu_gr_obj_ctx_golden_image *golden_image);
+
+int nvgpu_gr_obj_ctx_init(struct gk20a *g,
+	struct nvgpu_gr_obj_ctx_golden_image **gr_golden_image, u32 size);
+void nvgpu_gr_obj_ctx_deinit(struct gk20a *g,
+	struct nvgpu_gr_obj_ctx_golden_image *golden_image);
+
+#endif /* NVGPU_GR_OBJ_CTX_H */
--- a/drivers/gpu/nvgpu/os/linux/sysfs.c
+++ b/drivers/gpu/nvgpu/os/linux/sysfs.c
@@ -24,6 +24,7 @@
 #include <nvgpu/string.h>
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/config.h>
+#include <nvgpu/gr/obj_ctx.h>
 #include <nvgpu/power_features/cg.h>
 #include <nvgpu/power_features/pg.h>

@@ -884,13 +885,11 @@ static ssize_t tpc_fs_mask_store(struct device *dev,

 		g->ops.gr.set_gpc_tpc_mask(g, 0);

-		if (g->gr.local_golden_image != NULL) {
-			nvgpu_gr_global_ctx_deinit_local_golden_image(g,
-				g->gr.local_golden_image);
-			g->gr.local_golden_image = NULL;
-			g->gr.ctx_vars.golden_image_initialized = false;
-		}
+		nvgpu_gr_obj_ctx_deinit(g, g->gr.golden_image);
+
+		g->gr.ctx_vars.golden_image_initialized = false;
 		g->gr.ctx_vars.golden_image_size = 0;
+
 		nvgpu_gr_config_deinit(g, g->gr.config);
 		/* Cause next poweron to reinit just gr */
 		g->gr.sw_ready = false;