diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c
index 69859d933..745286450 100644
--- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c
@@ -67,6 +67,8 @@ int nvgpu_acr_lsf_fecs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img)
 	struct lsf_ucode_desc *lsf_desc;
 	struct nvgpu_firmware *fecs_sig;
 	struct flcn_ucode_img *p_img = (struct flcn_ucode_img *)lsf_ucode_img;
+	struct nvgpu_ctxsw_ucode_segments *fecs =
+			nvgpu_gr_falcon_get_fecs_ucode_segments(g->gr.falcon);
 	int err;
 
 	fecs_sig = nvgpu_request_firmware(g, GM20B_FECS_UCODE_SIG, 0);
@@ -90,34 +92,25 @@ int nvgpu_acr_lsf_fecs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img)
 		goto free_lsf_desc;
 	}
 
-	p_img->desc->bootloader_start_offset =
-		g->ctxsw_ucode_info.fecs.boot.offset;
-	p_img->desc->bootloader_size =
-		ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256);
-	p_img->desc->bootloader_imem_offset =
-		g->ctxsw_ucode_info.fecs.boot_imem_offset;
-	p_img->desc->bootloader_entry_point =
-		g->ctxsw_ucode_info.fecs.boot_entry;
+	p_img->desc->bootloader_start_offset = fecs->boot.offset;
+	p_img->desc->bootloader_size = ALIGN(fecs->boot.size, 256);
+	p_img->desc->bootloader_imem_offset = fecs->boot_imem_offset;
+	p_img->desc->bootloader_entry_point = fecs->boot_entry;
 
-	p_img->desc->image_size =
-		ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256) +
-		ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) +
-		ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256);
-	p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) +
-		ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256);
-	p_img->desc->app_start_offset = g->ctxsw_ucode_info.fecs.code.offset;
+	p_img->desc->image_size = ALIGN(fecs->boot.size, 256) +
+		ALIGN(fecs->code.size, 256) + ALIGN(fecs->data.size, 256);
+	p_img->desc->app_size = ALIGN(fecs->code.size, 256) +
+					ALIGN(fecs->data.size, 256);
+	p_img->desc->app_start_offset = fecs->code.offset;
 	p_img->desc->app_imem_offset = 0;
 	p_img->desc->app_imem_entry = 0;
 	p_img->desc->app_dmem_offset = 0;
 	p_img->desc->app_resident_code_offset = 0;
-	p_img->desc->app_resident_code_size =
-		g->ctxsw_ucode_info.fecs.code.size;
+	p_img->desc->app_resident_code_size = fecs->code.size;
 	p_img->desc->app_resident_data_offset =
-		g->ctxsw_ucode_info.fecs.data.offset -
-		g->ctxsw_ucode_info.fecs.code.offset;
-	p_img->desc->app_resident_data_size =
-		g->ctxsw_ucode_info.fecs.data.size;
-	p_img->data = g->ctxsw_ucode_info.surface_desc.cpu_va;
+				fecs->data.offset - fecs->code.offset;
+	p_img->desc->app_resident_data_size = fecs->data.size;
+	p_img->data = nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr.falcon);
 	p_img->data_size = p_img->desc->image_size;
 
 	p_img->fw_ver = NULL;
@@ -138,6 +131,8 @@ int nvgpu_acr_lsf_gpccs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img)
 	struct lsf_ucode_desc *lsf_desc;
 	struct nvgpu_firmware *gpccs_sig;
 	struct flcn_ucode_img *p_img = (struct flcn_ucode_img *)lsf_ucode_img;
+	struct nvgpu_ctxsw_ucode_segments *gpccs =
+			nvgpu_gr_falcon_get_gpccs_ucode_segments(g->gr.falcon);
 	int err;
 
 	if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
@@ -166,33 +161,26 @@ int nvgpu_acr_lsf_gpccs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img)
 
 	p_img->desc->bootloader_start_offset =
 		0;
-	p_img->desc->bootloader_size =
-		ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256);
-	p_img->desc->bootloader_imem_offset =
-		g->ctxsw_ucode_info.gpccs.boot_imem_offset;
-	p_img->desc->bootloader_entry_point =
-		g->ctxsw_ucode_info.gpccs.boot_entry;
+	p_img->desc->bootloader_size = ALIGN(gpccs->boot.size, 256);
+	p_img->desc->bootloader_imem_offset = gpccs->boot_imem_offset;
+	p_img->desc->bootloader_entry_point = gpccs->boot_entry;
 
-	p_img->desc->image_size =
-		ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256) +
-		ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256) +
-		ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
-	p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256)
-		+ ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
+	p_img->desc->image_size = ALIGN(gpccs->boot.size, 256) +
+		ALIGN(gpccs->code.size, 256) + ALIGN(gpccs->data.size, 256);
+	p_img->desc->app_size =
+		ALIGN(gpccs->code.size, 256) + ALIGN(gpccs->data.size, 256);
 	p_img->desc->app_start_offset = p_img->desc->bootloader_size;
 	p_img->desc->app_imem_offset = 0;
 	p_img->desc->app_imem_entry = 0;
 	p_img->desc->app_dmem_offset = 0;
 	p_img->desc->app_resident_code_offset = 0;
-	p_img->desc->app_resident_code_size =
-		ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256);
-	p_img->desc->app_resident_data_offset =
-		ALIGN(g->ctxsw_ucode_info.gpccs.data.offset, 256) -
-		ALIGN(g->ctxsw_ucode_info.gpccs.code.offset, 256);
-	p_img->desc->app_resident_data_size =
-		ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
-	p_img->data = (u32 *)((u8 *)g->ctxsw_ucode_info.surface_desc.cpu_va +
-		g->ctxsw_ucode_info.gpccs.boot.offset);
+	p_img->desc->app_resident_code_size = ALIGN(gpccs->code.size, 256);
+	p_img->desc->app_resident_data_offset = ALIGN(gpccs->data.offset, 256) -
+						ALIGN(gpccs->code.offset, 256);
+	p_img->desc->app_resident_data_size = ALIGN(gpccs->data.size, 256);
+	p_img->data = (u32 *)
+		((u8 *)nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr.falcon) +
+							gpccs->boot.offset);
 	p_img->data_size = ALIGN(p_img->desc->image_size, 256);
 	p_img->fw_ver = NULL;
 	p_img->header = NULL;
@@ -820,7 +808,7 @@ int nvgpu_acr_prepare_ucode_blob_v0(struct gk20a *g)
 		return err;
 	}
 
-	err = nvgpu_gr_falcon_init_ctxsw_ucode(g);
+	err = nvgpu_gr_falcon_init_ctxsw_ucode(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "gr_falcon_init_ctxsw_ucode failed err=%d", err);
 		return err;
diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v1.c b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v1.c
index 4ab5fd7f6..264519abc 100644
--- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v1.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v1.c
@@ -75,6 +75,8 @@ int nvgpu_acr_lsf_fecs_ucode_details_v1(struct gk20a *g, void *lsf_ucode_img)
 	struct nvgpu_firmware *fecs_sig = NULL;
 	struct flcn_ucode_img_v1 *p_img =
 		(struct flcn_ucode_img_v1 *)lsf_ucode_img;
+	struct nvgpu_ctxsw_ucode_segments *fecs =
+			nvgpu_gr_falcon_get_fecs_ucode_segments(g->gr.falcon);
 	int err;
 
 	switch (ver) {
@@ -113,34 +115,25 @@ int nvgpu_acr_lsf_fecs_ucode_details_v1(struct gk20a *g, void *lsf_ucode_img)
 		goto free_lsf_desc;
 	}
 
-	p_img->desc->bootloader_start_offset =
-		g->ctxsw_ucode_info.fecs.boot.offset;
-	p_img->desc->bootloader_size =
-		ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256);
-	p_img->desc->bootloader_imem_offset =
-		g->ctxsw_ucode_info.fecs.boot_imem_offset;
-	p_img->desc->bootloader_entry_point =
-		g->ctxsw_ucode_info.fecs.boot_entry;
+	p_img->desc->bootloader_start_offset = fecs->boot.offset;
+	p_img->desc->bootloader_size = ALIGN(fecs->boot.size, 256);
+	p_img->desc->bootloader_imem_offset = fecs->boot_imem_offset;
+	p_img->desc->bootloader_entry_point = fecs->boot_entry;
 
-	p_img->desc->image_size =
-		ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256) +
-		ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) +
-		ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256);
-	p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) +
-		ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256);
-	p_img->desc->app_start_offset = g->ctxsw_ucode_info.fecs.code.offset;
+	p_img->desc->image_size = ALIGN(fecs->boot.size, 256) +
+		ALIGN(fecs->code.size, 256) + ALIGN(fecs->data.size, 256);
+	p_img->desc->app_size = ALIGN(fecs->code.size, 256) +
+					ALIGN(fecs->data.size, 256);
+	p_img->desc->app_start_offset = fecs->code.offset;
 	p_img->desc->app_imem_offset = 0;
 	p_img->desc->app_imem_entry = 0;
 	p_img->desc->app_dmem_offset = 0;
 	p_img->desc->app_resident_code_offset = 0;
-	p_img->desc->app_resident_code_size =
-		g->ctxsw_ucode_info.fecs.code.size;
-	p_img->desc->app_resident_data_offset =
-		g->ctxsw_ucode_info.fecs.data.offset -
-		g->ctxsw_ucode_info.fecs.code.offset;
-	p_img->desc->app_resident_data_size =
-		g->ctxsw_ucode_info.fecs.data.size;
-	p_img->data = g->ctxsw_ucode_info.surface_desc.cpu_va;
+	p_img->desc->app_resident_code_size = fecs->code.size;
+	p_img->desc->app_resident_data_offset = fecs->data.offset -
+						fecs->code.offset;
+	p_img->desc->app_resident_data_size = fecs->data.size;
+	p_img->data = nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr.falcon);
 	p_img->data_size = p_img->desc->image_size;
 
 	p_img->fw_ver = NULL;
@@ -166,6 +159,8 @@ int nvgpu_acr_lsf_gpccs_ucode_details_v1(struct gk20a *g, void *lsf_ucode_img)
 	struct nvgpu_firmware *gpccs_sig = NULL;
 	struct flcn_ucode_img_v1 *p_img =
 		(struct flcn_ucode_img_v1 *)lsf_ucode_img;
+	struct nvgpu_ctxsw_ucode_segments *gpccs =
+			nvgpu_gr_falcon_get_gpccs_ucode_segments(g->gr.falcon);
 	int err;
 
 	if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
@@ -208,33 +203,26 @@ int nvgpu_acr_lsf_gpccs_ucode_details_v1(struct gk20a *g, void *lsf_ucode_img)
 	}
 
 	p_img->desc->bootloader_start_offset = 0;
-	p_img->desc->bootloader_size =
-		ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256);
-	p_img->desc->bootloader_imem_offset =
-		g->ctxsw_ucode_info.gpccs.boot_imem_offset;
-	p_img->desc->bootloader_entry_point =
-		g->ctxsw_ucode_info.gpccs.boot_entry;
+	p_img->desc->bootloader_size = ALIGN(gpccs->boot.size, 256);
+	p_img->desc->bootloader_imem_offset = gpccs->boot_imem_offset;
+	p_img->desc->bootloader_entry_point = gpccs->boot_entry;
 
-	p_img->desc->image_size =
-		ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256) +
-		ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256) +
-		ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
-	p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256)
-		+ ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
+	p_img->desc->image_size = ALIGN(gpccs->boot.size, 256) +
+		ALIGN(gpccs->code.size, 256) + ALIGN(gpccs->data.size, 256);
+	p_img->desc->app_size = ALIGN(gpccs->code.size, 256)
+		+ ALIGN(gpccs->data.size, 256);
 	p_img->desc->app_start_offset = p_img->desc->bootloader_size;
 	p_img->desc->app_imem_offset = 0;
 	p_img->desc->app_imem_entry = 0;
 	p_img->desc->app_dmem_offset = 0;
 	p_img->desc->app_resident_code_offset = 0;
-	p_img->desc->app_resident_code_size =
-		ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256);
-	p_img->desc->app_resident_data_offset =
-		ALIGN(g->ctxsw_ucode_info.gpccs.data.offset, 256) -
-		ALIGN(g->ctxsw_ucode_info.gpccs.code.offset, 256);
-	p_img->desc->app_resident_data_size =
-		ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
-	p_img->data = (u32 *)((u8 *)g->ctxsw_ucode_info.surface_desc.cpu_va +
-		g->ctxsw_ucode_info.gpccs.boot.offset);
+	p_img->desc->app_resident_code_size = ALIGN(gpccs->code.size, 256);
+	p_img->desc->app_resident_data_offset = ALIGN(gpccs->data.offset, 256) -
+						ALIGN(gpccs->code.offset, 256);
+	p_img->desc->app_resident_data_size = ALIGN(gpccs->data.size, 256);
+	p_img->data = (u32 *)
+		((u8 *)nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr.falcon) +
+					gpccs->boot.offset);
 	p_img->data_size = ALIGN(p_img->desc->image_size, 256);
 	p_img->fw_ver = NULL;
 	p_img->header = NULL;
@@ -943,7 +931,7 @@ int nvgpu_acr_prepare_ucode_blob_v1(struct gk20a *g)
 
 	plsfm = &lsfm_l;
 	(void) memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr_v1));
-	err = nvgpu_gr_falcon_init_ctxsw_ucode(g);
+	err = nvgpu_gr_falcon_init_ctxsw_ucode(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "gr_falcon_init_ctxsw_ucode failed err=%d", err);
 		return err;
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index 87ab67185..2fe1a7d18 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -1483,7 +1483,7 @@ void nvgpu_channel_recover(struct gk20a *g, struct channel_gk20a *ch,
 	/* stop context switching to prevent engine assignments from
 	   changing until channel is recovered */
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	err = g->ops.gr.falcon.disable_ctxsw(g);
+	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "failed to disable ctxsw");
 		goto fail;
@@ -1502,7 +1502,7 @@ void nvgpu_channel_recover(struct gk20a *g, struct channel_gk20a *ch,
 		}
 	}
 
-	err = g->ops.gr.falcon.enable_ctxsw(g);
+	err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "failed to enable ctxsw");
 	}
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index 66ce30497..adedc2fbf 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -289,7 +289,7 @@ void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg,
 	 * changing until engine status is checked to make sure tsg
 	 * being recovered is not loaded on the engines
 	 */
-	err = g->ops.gr.falcon.disable_ctxsw(g);
+	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
 
 	if (err != 0) {
 		/* if failed to disable ctxsw, just abort tsg */
@@ -306,7 +306,7 @@ void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg,
 		 * By that time if tsg is not on the engine, engine need not
 		 * be reset.
 		 */
-		err = g->ops.gr.falcon.enable_ctxsw(g);
+		err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
 		if (err != 0) {
 			nvgpu_err(g, "failed to enable ctxsw");
 		}
diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c
index d60c05c21..126fd3f14 100644
--- a/drivers/gpu/nvgpu/common/gr/gr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr.c
@@ -271,6 +271,9 @@ static void gr_remove_support(struct gr_gk20a *gr)
 
 	nvgpu_gr_hwpm_map_deinit(g, gr->hwpm_map);
 
+	nvgpu_gr_falcon_remove_support(g, gr->falcon);
+	gr->falcon = NULL;
+
 	nvgpu_ecc_remove_support(g);
 	nvgpu_gr_zbc_deinit(g, gr->zbc);
 	nvgpu_gr_zcull_deinit(g, gr->zcull);
@@ -498,7 +501,31 @@ out:
 	return 0;
 }
 
-static void gr_init_prepare(struct gk20a *g)
+int nvgpu_gr_prepare_sw(struct gk20a *g)
+{
+	struct gr_gk20a *gr = &g->gr;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	err = nvgpu_netlist_init_ctx_vars(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to parse netlist");
+		return err;
+	}
+
+	if (gr->falcon == NULL) {
+		gr->falcon = nvgpu_gr_falcon_init_support(g);
+		if (gr->falcon == NULL) {
+			nvgpu_err(g, "failed to init gr falcon");
+			err = -ENOMEM;
+			return err;
+		}
+	}
+	return err;
+}
+
+static void gr_init_prepare_hw(struct gk20a *g)
 {
 	/* reset gr engine */
 	g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_GRAPH) |
@@ -520,13 +547,7 @@ int nvgpu_gr_enable_hw(struct gk20a *g)
 
 	nvgpu_log_fn(g, " ");
 
-	gr_init_prepare(g);
-
-	err = nvgpu_netlist_init_ctx_vars(g);
-	if (err != 0) {
-		nvgpu_err(g, "failed to parse netlist");
-		return err;
-	}
+	gr_init_prepare_hw(g);
 
 	err = gr_init_reset_enable_hw(g);
 	if (err != 0) {
@@ -541,30 +562,32 @@ int nvgpu_gr_enable_hw(struct gk20a *g)
 int nvgpu_gr_reset(struct gk20a *g)
 {
 	int err;
+	struct nvgpu_mutex *fecs_mutex =
+		nvgpu_gr_falcon_get_fecs_mutex(g->gr.falcon);
 
 	g->gr.initialized = false;
 
-	nvgpu_mutex_acquire(&g->gr.fecs_mutex);
+	nvgpu_mutex_acquire(fecs_mutex);
 
 	err = nvgpu_gr_enable_hw(g);
 	if (err != 0) {
-		nvgpu_mutex_release(&g->gr.fecs_mutex);
+		nvgpu_mutex_release(fecs_mutex);
 		return err;
 	}
 
 	err = gr_init_setup_hw(g);
 	if (err != 0) {
-		nvgpu_mutex_release(&g->gr.fecs_mutex);
+		nvgpu_mutex_release(fecs_mutex);
 		return err;
 	}
 
-	err = nvgpu_gr_falcon_init_ctxsw(g);
+	err = nvgpu_gr_falcon_init_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
-		nvgpu_mutex_release(&g->gr.fecs_mutex);
+		nvgpu_mutex_release(fecs_mutex);
 		return err;
 	}
 
-	nvgpu_mutex_release(&g->gr.fecs_mutex);
+	nvgpu_mutex_release(fecs_mutex);
 
 	/* this appears query for sw states but fecs actually init
 	   ramchain, etc so this is hw init */
@@ -598,14 +621,7 @@ int nvgpu_gr_init_support(struct gk20a *g)
 
 	g->gr.initialized = false;
 
-	/* this is required before gr_gk20a_init_ctx_state */
-	err = nvgpu_mutex_init(&g->gr.fecs_mutex);
-	if (err != 0) {
-		nvgpu_err(g, "Error in gr.fecs_mutex initialization");
-		return err;
-	}
-
-	err = nvgpu_gr_falcon_init_ctxsw(g);
+	err = nvgpu_gr_falcon_init_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		return err;
 	}
diff --git a/drivers/gpu/nvgpu/common/gr/gr_falcon.c b/drivers/gpu/nvgpu/common/gr/gr_falcon.c
index 650b19764..3918763a4 100644
--- a/drivers/gpu/nvgpu/common/gr/gr_falcon.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_falcon.c
@@ -37,6 +37,53 @@
 
 #include "gr_falcon_priv.h"
 
+#define NVGPU_FECS_UCODE_IMAGE	"fecs.bin"
+#define NVGPU_GPCCS_UCODE_IMAGE	"gpccs.bin"
+
+struct nvgpu_gr_falcon *nvgpu_gr_falcon_init_support(struct gk20a *g)
+{
+	struct nvgpu_gr_falcon *falcon;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	falcon = nvgpu_kzalloc(g, sizeof(*falcon));
+	if (falcon == NULL) {
+		return falcon;
+	}
+
+	err = nvgpu_mutex_init(&falcon->ctxsw_disable_mutex);
+	if (err != 0) {
+		nvgpu_err(g, "Error in ctxsw_disable_mutex init");
+		goto done;
+	}
+	falcon->ctxsw_disable_count = 0;
+
+	err = nvgpu_mutex_init(&falcon->fecs_mutex);
+	if (err != 0) {
+		nvgpu_err(g, "Error in fecs_mutex init");
+		goto done;
+	}
+
+done:
+	if (err != 0) {
+		nvgpu_kfree(g, falcon);
+		falcon = NULL;
+	}
+	return falcon;
+}
+
+void nvgpu_gr_falcon_remove_support(struct gk20a *g,
+				struct nvgpu_gr_falcon *falcon)
+{
+	nvgpu_log_fn(g, " ");
+
+	if (falcon == NULL) {
+		return;
+	}
+	nvgpu_kfree(g, falcon);
+}
+
 int nvgpu_gr_falcon_bind_fecs_elpg(struct gk20a *g)
 {
 	struct nvgpu_pmu *pmu = &g->pmu;
@@ -88,13 +135,13 @@ int nvgpu_gr_falcon_bind_fecs_elpg(struct gk20a *g)
 	return err;
 }
 
-int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g)
+int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g, struct nvgpu_gr_falcon *falcon)
 {
 	int err = 0;
 
 	nvgpu_log_fn(g, " ");
 
-	err = g->ops.gr.falcon.load_ctxsw_ucode(g);
+	err = g->ops.gr.falcon.load_ctxsw_ucode(g, falcon);
 	if (err != 0) {
 		goto out;
 	}
@@ -145,11 +192,12 @@ out:
 	return err;
 }
 
-static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g)
+static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon)
 {
 	struct mm_gk20a *mm = &g->mm;
 	struct vm_gk20a *vm = mm->pmu.vm;
-	struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
+	struct nvgpu_ctxsw_ucode_info *ucode_info = &falcon->ctxsw_ucode_info;
 	int err;
 
 	err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc);
@@ -176,7 +224,7 @@ static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g)
 }
 
 static void nvgpu_gr_falcon_init_ctxsw_ucode_segment(
-	struct gk20a_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size)
+	struct nvgpu_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size)
 {
 	p_seg->offset = *offset;
 	p_seg->size = size;
@@ -184,7 +232,7 @@ static void nvgpu_gr_falcon_init_ctxsw_ucode_segment(
 }
 
 static void nvgpu_gr_falcon_init_ctxsw_ucode_segments(
-	struct gk20a_ctxsw_ucode_segments *segments, u32 *offset,
+	struct nvgpu_ctxsw_ucode_segments *segments, u32 *offset,
 	struct nvgpu_ctxsw_bootloader_desc *bootdesc,
 	u32 code_size, u32 data_size)
 {
@@ -203,7 +251,7 @@ static void nvgpu_gr_falcon_init_ctxsw_ucode_segments(
 static int nvgpu_gr_falcon_copy_ctxsw_ucode_segments(
 	struct gk20a *g,
 	struct nvgpu_mem *dst,
-	struct gk20a_ctxsw_ucode_segments *segments,
+	struct nvgpu_ctxsw_ucode_segments *segments,
 	u32 *bootimage,
 	u32 *code, u32 *data)
 {
@@ -225,7 +273,8 @@ static int nvgpu_gr_falcon_copy_ctxsw_ucode_segments(
 	return 0;
 }
 
-int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g)
+int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon)
 {
 	struct mm_gk20a *mm = &g->mm;
 	struct vm_gk20a *vm = mm->pmu.vm;
@@ -235,11 +284,11 @@ int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g)
 	struct nvgpu_firmware *gpccs_fw;
 	u32 *fecs_boot_image;
 	u32 *gpccs_boot_image;
-	struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
+	struct nvgpu_ctxsw_ucode_info *ucode_info = &falcon->ctxsw_ucode_info;
 	u32 ucode_size;
 	int err = 0;
 
-	fecs_fw = nvgpu_request_firmware(g, GK20A_FECS_UCODE_IMAGE, 0);
+	fecs_fw = nvgpu_request_firmware(g, NVGPU_FECS_UCODE_IMAGE, 0);
 	if (fecs_fw == NULL) {
 		nvgpu_err(g, "failed to load fecs ucode!!");
 		return -ENOENT;
@@ -249,7 +298,7 @@ int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g)
 	fecs_boot_image = (void *)(fecs_fw->data +
 				sizeof(struct nvgpu_ctxsw_bootloader_desc));
 
-	gpccs_fw = nvgpu_request_firmware(g, GK20A_GPCCS_UCODE_IMAGE, 0);
+	gpccs_fw = nvgpu_request_firmware(g, NVGPU_GPCCS_UCODE_IMAGE, 0);
 	if (gpccs_fw == NULL) {
 		nvgpu_release_firmware(g, fecs_fw);
 		nvgpu_err(g, "failed to load gpccs ucode!!");
@@ -293,7 +342,7 @@ int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g)
 	nvgpu_release_firmware(g, gpccs_fw);
 	gpccs_fw = NULL;
 
-	err = nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(g);
+	err = nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(g, falcon);
 	if (err != 0) {
 		goto clean_up;
 	}
@@ -352,9 +401,11 @@ static void nvgpu_gr_falcon_load_imem(struct gk20a *g)
 	nvgpu_log_fn(g, "done");
 }
 
-static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g)
+static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon)
 {
-	struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
+	struct nvgpu_ctxsw_ucode_info *ucode_info =
+					&falcon->ctxsw_ucode_info;
 	u64 inst_ptr;
 
 	inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
@@ -365,7 +416,7 @@ static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g)
 }
 
 static void nvgpu_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g,
-	u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments,
+	u64 addr_base, struct nvgpu_ctxsw_ucode_segments *segments,
 	u32 reg_offset)
 {
 	u32 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
@@ -377,7 +428,7 @@ static void nvgpu_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g,
 }
 
 static void nvgpu_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g,
-	u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments,
+	u64 addr_base, struct nvgpu_ctxsw_ucode_segments *segments,
 	u32 reg_offset)
 {
 	u32 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
@@ -391,7 +442,7 @@ static void nvgpu_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g,
 
 static void nvgpu_gr_falcon_load_ctxsw_ucode_segments(
 		struct gk20a *g, u64 addr_base,
-		struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
+		struct nvgpu_ctxsw_ucode_segments *segments, u32 reg_offset)
 {
 
 	/* Copy falcon bootloader into dmem */
@@ -402,24 +453,28 @@ static void nvgpu_gr_falcon_load_ctxsw_ucode_segments(
 }
 
 
-static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g)
+static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon)
 {
-	struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
+	struct nvgpu_ctxsw_ucode_info *ucode_info =
+					&falcon->ctxsw_ucode_info;
 	u64 addr_base = ucode_info->surface_desc.gpu_va;
 
-	nvgpu_gr_falcon_bind_instblk(g);
+	nvgpu_gr_falcon_bind_instblk(g, falcon);
 
 	nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
-		&g->ctxsw_ucode_info.fecs, 0);
+		&falcon->ctxsw_ucode_info.fecs, 0);
 
 	nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
-		&g->ctxsw_ucode_info.gpccs,
+		&falcon->ctxsw_ucode_info.gpccs,
 		g->ops.gr.falcon.get_gpccs_start_reg_offset());
 }
 
-int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g)
+int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon)
 {
 	int err;
+	struct gr_gk20a *gr = &g->gr;
 
 	nvgpu_log_fn(g, " ");
 
@@ -436,32 +491,35 @@ int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g)
 		nvgpu_gr_falcon_load_imem(g);
 		g->ops.gr.falcon.start_ucode(g);
 	} else {
-		if (!g->gr.skip_ucode_init) {
-			err =  nvgpu_gr_falcon_init_ctxsw_ucode(g);
+		if (!gr->falcon->skip_ucode_init) {
+			err =  nvgpu_gr_falcon_init_ctxsw_ucode(g, falcon);
 			if (err != 0) {
 				return err;
 			}
 		}
-		nvgpu_gr_falcon_load_with_bootloader(g);
-		g->gr.skip_ucode_init = true;
+		nvgpu_gr_falcon_load_with_bootloader(g, falcon);
+		gr->falcon->skip_ucode_init = true;
 	}
 	nvgpu_log_fn(g, "done");
 	return 0;
 }
 
-static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g)
+static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon)
 {
-	struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
+	struct nvgpu_ctxsw_ucode_info *ucode_info =
+					&falcon->ctxsw_ucode_info;
 	u64 addr_base = ucode_info->surface_desc.gpu_va;
 
-	nvgpu_gr_falcon_bind_instblk(g);
+	nvgpu_gr_falcon_bind_instblk(g, falcon);
 
 	nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
-		&g->ctxsw_ucode_info.gpccs,
+		&falcon->ctxsw_ucode_info.gpccs,
 		g->ops.gr.falcon.get_gpccs_start_reg_offset());
 }
 
-int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g)
+int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon)
 {
 	int err = 0;
 	u8 falcon_id_mask = 0;
@@ -475,12 +533,12 @@ int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g)
 	if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) {
 		/* this must be recovery so bootstrap fecs and gpccs */
 		if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
-			nvgpu_gr_falcon_load_gpccs_with_bootloader(g);
+			nvgpu_gr_falcon_load_gpccs_with_bootloader(g, falcon);
 			err = nvgpu_pmu_lsfm_bootstrap_ls_falcon(g, &g->pmu,
 					g->pmu.lsfm, BIT32(FALCON_ID_FECS));
 		} else {
 			/* bind WPR VA inst block */
-			nvgpu_gr_falcon_bind_instblk(g);
+			nvgpu_gr_falcon_bind_instblk(g, falcon);
 			if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
 				err = nvgpu_sec2_bootstrap_ls_falcons(g,
 					&g->sec2, FALCON_ID_FECS);
@@ -508,10 +566,10 @@ int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g)
 		/* cold boot or rg exit */
 		nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true);
 		if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
-			nvgpu_gr_falcon_load_gpccs_with_bootloader(g);
+			nvgpu_gr_falcon_load_gpccs_with_bootloader(g, falcon);
 		} else {
 			/* bind WPR VA inst block */
-			nvgpu_gr_falcon_bind_instblk(g);
+			nvgpu_gr_falcon_bind_instblk(g, falcon);
 			if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr,
 							FALCON_ID_FECS)) {
 				falcon_id_mask |= BIT8(FALCON_ID_FECS);
@@ -558,53 +616,55 @@ int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g)
  * to pmu elpg sequence. It could come as pmu halt or abort or
  * maybe ext error too.
  */
-int nvgpu_gr_falcon_disable_ctxsw(struct gk20a *g)
+int nvgpu_gr_falcon_disable_ctxsw(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon)
 {
 	int err = 0;
 
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
 
-	nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
-	g->ctxsw_disable_count++;
-	if (g->ctxsw_disable_count == 1) {
+	nvgpu_mutex_acquire(&falcon->ctxsw_disable_mutex);
+	falcon->ctxsw_disable_count++;
+	if (falcon->ctxsw_disable_count == 1) {
 		err = nvgpu_pg_elpg_disable(g);
 		if (err != 0) {
 			nvgpu_err(g,
 				"failed to disable elpg for stop_ctxsw");
 			/* stop ctxsw command is not sent */
-			g->ctxsw_disable_count--;
+			falcon->ctxsw_disable_count--;
 		} else {
 			err = g->ops.gr.falcon.ctrl_ctxsw(g,
 				NVGPU_GR_FALCON_METHOD_CTXSW_STOP, 0U, NULL);
 			if (err != 0) {
 				nvgpu_err(g, "failed to stop fecs ctxsw");
 				/* stop ctxsw failed */
-				g->ctxsw_disable_count--;
+				falcon->ctxsw_disable_count--;
 			}
 		}
 	} else {
 		nvgpu_log_info(g, "ctxsw disabled, ctxsw_disable_count: %d",
-			g->ctxsw_disable_count);
+			falcon->ctxsw_disable_count);
 	}
-	nvgpu_mutex_release(&g->ctxsw_disable_lock);
+	nvgpu_mutex_release(&falcon->ctxsw_disable_mutex);
 
 	return err;
 }
 
 /* Start processing (continue) context switches at FECS */
-int nvgpu_gr_falcon_enable_ctxsw(struct gk20a *g)
+int nvgpu_gr_falcon_enable_ctxsw(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon)
 {
 	int err = 0;
 
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
 
-	nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
-	if (g->ctxsw_disable_count == 0) {
+	nvgpu_mutex_acquire(&falcon->ctxsw_disable_mutex);
+	if (falcon->ctxsw_disable_count == 0) {
 		goto ctxsw_already_enabled;
 	}
-	g->ctxsw_disable_count--;
-	WARN_ON(g->ctxsw_disable_count < 0);
-	if (g->ctxsw_disable_count == 0) {
+	falcon->ctxsw_disable_count--;
+	WARN_ON(falcon->ctxsw_disable_count < 0);
+	if (falcon->ctxsw_disable_count == 0) {
 		err = g->ops.gr.falcon.ctrl_ctxsw(g,
 				NVGPU_GR_FALCON_METHOD_CTXSW_START, 0U, NULL);
 		if (err != 0) {
@@ -617,10 +677,10 @@ int nvgpu_gr_falcon_enable_ctxsw(struct gk20a *g)
 		}
 	} else {
 		nvgpu_log_info(g, "ctxsw_disable_count: %d is not 0 yet",
-			g->ctxsw_disable_count);
+			falcon->ctxsw_disable_count);
 	}
 ctxsw_already_enabled:
-	nvgpu_mutex_release(&g->ctxsw_disable_lock);
+	nvgpu_mutex_release(&falcon->ctxsw_disable_mutex);
 
 	return err;
 }
@@ -630,3 +690,23 @@ int nvgpu_gr_falcon_halt_pipe(struct gk20a *g)
 	return g->ops.gr.falcon.ctrl_ctxsw(g,
 				NVGPU_GR_FALCON_METHOD_HALT_PIPELINE, 0U, NULL);
 }
+
+struct nvgpu_mutex *nvgpu_gr_falcon_get_fecs_mutex(
+					struct nvgpu_gr_falcon *falcon)
+{
+	return &falcon->fecs_mutex;
+}
+struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_fecs_ucode_segments(
+					struct nvgpu_gr_falcon *falcon)
+{
+	return &falcon->ctxsw_ucode_info.fecs;
+}
+struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_gpccs_ucode_segments(
+					struct nvgpu_gr_falcon *falcon)
+{
+	return &falcon->ctxsw_ucode_info.gpccs;
+}
+void *nvgpu_gr_falcon_get_surface_desc_cpu_va(struct nvgpu_gr_falcon *falcon)
+{
+	return falcon->ctxsw_ucode_info.surface_desc.cpu_va;
+}
diff --git a/drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h b/drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h
index e9a940f67..6f5a85880 100644
--- a/drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h
@@ -24,6 +24,9 @@
 #define GR_FALOCN_PRIV_H
 
 #include <nvgpu/types.h>
+#include <nvgpu/nvgpu_mem.h>
+
+struct nvgpu_ctxsw_ucode_segments;
 
 struct nvgpu_fecs_method_op {
 	struct {
@@ -54,5 +57,66 @@ struct nvgpu_ctxsw_bootloader_desc {
 	u32 entry_point;
 };
 
+struct nvgpu_ctxsw_ucode_info {
+	u64 *p_va;
+	struct nvgpu_mem inst_blk_desc;
+	struct nvgpu_mem surface_desc;
+	struct nvgpu_ctxsw_ucode_segments fecs;
+	struct nvgpu_ctxsw_ucode_segments gpccs;
+};
+
+struct nvgpu_gr_falcon {
+	struct nvgpu_ctxsw_ucode_info ctxsw_ucode_info;
+	struct nvgpu_mutex ctxsw_disable_mutex;
+	int ctxsw_disable_count;
+	struct nvgpu_mutex fecs_mutex; /* protect fecs method */
+	bool skip_ucode_init;
+};
+
+enum wait_ucode_status {
+	WAIT_UCODE_LOOP,
+	WAIT_UCODE_TIMEOUT,
+	WAIT_UCODE_ERROR,
+	WAIT_UCODE_OK
+};
+
+enum {
+	GR_IS_UCODE_OP_EQUAL,
+	GR_IS_UCODE_OP_NOT_EQUAL,
+	GR_IS_UCODE_OP_AND,
+	GR_IS_UCODE_OP_LESSER,
+	GR_IS_UCODE_OP_LESSER_EQUAL,
+	GR_IS_UCODE_OP_SKIP
+};
+
+enum {
+	eUcodeHandshakeInitComplete = 1,
+	eUcodeHandshakeMethodFinished
+};
+
+/* sums over the ucode files as sequences of u32, computed to the
+ * boot_signature field in the structure above */
+
+/* T18X FECS remains same as T21X,
+ * so FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED used
+ * for T18X*/
+#define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED	0x68edab34U
+#define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE	0x9121ab5cU
+#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED	0x9125ab5cU
+#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED	0x8a621f78U
+#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED	0x67e5344bU
+#define FALCON_UCODE_SIG_T12X_FECS_OLDER		0x56da09fU
+
+#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED	0x3d3d65e2U
+#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED	0x303465d5U
+#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED	0x3fdd33d3U
+#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER		0x53d7877U
+
+#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED	0x93671b7dU
+#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2	0x4d6cbc10U
+
+#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED	0x393161daU
+
+
 #endif /* GR_FALOCN_PRIV_H */
 
diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
index ad5fb6ef8..483636535 100644
--- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c
+++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
@@ -331,6 +331,14 @@ int gk20a_finalize_poweron(struct gk20a *g)
 		}
 	}
 
+	/* prepare portion of sw required for enable hw */
+	err = nvgpu_gr_prepare_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to prepare sw");
+		nvgpu_mutex_release(&g->tpc_pg_lock);
+		goto done;
+	}
+
 	err = nvgpu_gr_enable_hw(g);
 	if (err != 0) {
 		nvgpu_err(g, "failed to enable gr");
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 4abeac26d..d9597aa93 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -176,7 +176,7 @@ int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
 		return 0;
 	}
 
-	err = g->ops.gr.falcon.disable_ctxsw(g);
+	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "failed to disable ctxsw");
 		goto fail;
@@ -211,7 +211,7 @@ int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
 	nvgpu_mutex_release(&f->deferred_reset_mutex);
 
 clean_up:
-	err = g->ops.gr.falcon.enable_ctxsw(g);
+	err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "failed to enable ctxsw");
 	}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index ece997f52..e42daa562 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2326,7 +2326,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 	 * at that point the hardware state can be inspected to
 	 * determine if the context we're interested in is current.
 	 */
-	err = g->ops.gr.falcon.disable_ctxsw(g);
+	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "unable to stop gr ctxsw");
 		/* this should probably be ctx-fatal... */
@@ -2343,7 +2343,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 	err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops,
 				      num_ctx_rd_ops, ch_is_curr_ctx);
 
-	tmp_err = g->ops.gr.falcon.enable_ctxsw(g);
+	tmp_err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
 	if (tmp_err != 0) {
 		nvgpu_err(g, "unable to restart ctxsw!");
 		err = tmp_err;
@@ -2689,7 +2689,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g,
 
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
-	err = g->ops.gr.falcon.disable_ctxsw(g);
+	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "unable to stop gr ctxsw");
 		goto clean_up;
@@ -2709,7 +2709,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g,
 
 	nvgpu_mutex_release(&dbg_s->ch_list_lock);
 
-	err = g->ops.gr.falcon.enable_ctxsw(g);
+	err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "unable to restart ctxsw!");
 	}
@@ -2734,7 +2734,7 @@ int gr_gk20a_resume_contexts(struct gk20a *g,
 
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
-	err = g->ops.gr.falcon.disable_ctxsw(g);
+	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "unable to stop gr ctxsw");
 		goto clean_up;
@@ -2750,7 +2750,7 @@ int gr_gk20a_resume_contexts(struct gk20a *g,
 		}
 	}
 
-	err = g->ops.gr.falcon.enable_ctxsw(g);
+	err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "unable to restart ctxsw!");
 	}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 752cbf96f..81b068f7a 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -31,13 +31,8 @@
 #include <nvgpu/comptags.h>
 #include <nvgpu/cond.h>
 
-#define GR_FECS_POLL_INTERVAL		5U /* usec */
-
 #define INVALID_MAX_WAYS		0xFFFFFFFFU
 
-#define GK20A_FECS_UCODE_IMAGE	"fecs.bin"
-#define GK20A_GPCCS_UCODE_IMAGE	"gpccs.bin"
-
 #define GK20A_TIMEOUT_FPGA		100000U /* 100 sec */
 
 /* Flags to be passed to g->ops.gr.alloc_obj_ctx() */
@@ -49,6 +44,7 @@ struct nvgpu_gr_ctx;
 struct channel_gk20a;
 struct nvgpu_warpstate;
 struct nvgpu_gr_ctx_desc;
+struct nvgpu_gr_falcon;
 struct nvgpu_gr_global_ctx_buffer_desc;
 struct nvgpu_gr_global_ctx_local_golden_image;
 struct nvgpu_gr_zbc;
@@ -58,27 +54,6 @@ struct nvgpu_gr_ctx_desc;
 
 enum ctxsw_addr_type;
 
-enum wait_ucode_status {
-	WAIT_UCODE_LOOP,
-	WAIT_UCODE_TIMEOUT,
-	WAIT_UCODE_ERROR,
-	WAIT_UCODE_OK
-};
-
-enum {
-	GR_IS_UCODE_OP_EQUAL,
-	GR_IS_UCODE_OP_NOT_EQUAL,
-	GR_IS_UCODE_OP_AND,
-	GR_IS_UCODE_OP_LESSER,
-	GR_IS_UCODE_OP_LESSER_EQUAL,
-	GR_IS_UCODE_OP_SKIP
-};
-
-enum {
-	eUcodeHandshakeInitComplete = 1,
-	eUcodeHandshakeMethodFinished
-};
-
 enum {
 	ELCG_MODE = (1 << 0),
 	BLCG_MODE = (1 << 1),
@@ -135,7 +110,6 @@ struct gr_gk20a {
 	} ctx_vars;
 
 	struct nvgpu_mutex ctx_mutex; /* protect golden ctx init */
-	struct nvgpu_mutex fecs_mutex; /* protect fecs method */
 
 	struct nvgpu_cond init_wq;
 	bool initialized;
@@ -160,6 +134,8 @@ struct gr_gk20a {
 
 	struct nvgpu_gr_zbc *zbc;
 
+	struct nvgpu_gr_falcon *falcon;
+
 #define GR_CHANNEL_MAP_TLB_SIZE		2U /* must of power of 2 */
 	struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
 	u32 channel_tlb_flush_index;
@@ -167,7 +143,6 @@ struct gr_gk20a {
 
 	void (*remove_support)(struct gr_gk20a *gr);
 	bool sw_ready;
-	bool skip_ucode_init;
 
 	u32 fecs_feature_override_ecc_val;
 
@@ -184,50 +159,7 @@ struct gr_gk20a {
 	u32 max_ctxsw_ring_buffer_size;
 };
 
-struct gk20a_ctxsw_ucode_segment {
-	u32 offset;
-	u32 size;
-};
 
-struct gk20a_ctxsw_ucode_segments {
-	u32 boot_entry;
-	u32 boot_imem_offset;
-	u32 boot_signature;
-	struct gk20a_ctxsw_ucode_segment boot;
-	struct gk20a_ctxsw_ucode_segment code;
-	struct gk20a_ctxsw_ucode_segment data;
-};
-
-/* sums over the ucode files as sequences of u32, computed to the
- * boot_signature field in the structure above */
-
-/* T18X FECS remains same as T21X,
- * so FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED used
- * for T18X*/
-#define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED	0x68edab34U
-#define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE	0x9121ab5cU
-#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED	0x9125ab5cU
-#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED	0x8a621f78U
-#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED	0x67e5344bU
-#define FALCON_UCODE_SIG_T12X_FECS_OLDER		0x56da09fU
-
-#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED	0x3d3d65e2U
-#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED	0x303465d5U
-#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED	0x3fdd33d3U
-#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER		0x53d7877U
-
-#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED	0x93671b7dU
-#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2	0x4d6cbc10U
-
-#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED	0x393161daU
-
-struct gk20a_ctxsw_ucode_info {
-	u64 *p_va;
-	struct nvgpu_mem inst_blk_desc;
-	struct nvgpu_mem surface_desc;
-	struct gk20a_ctxsw_ucode_segments fecs;
-	struct gk20a_ctxsw_ucode_segments gpccs;
-};
 
 struct nvgpu_warpstate {
 	u64 valid_warps[2];
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index bd327e499..3601352ce 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -695,7 +695,7 @@ int gm20b_gr_clear_sm_error_state(struct gk20a *g,
 	(void) memset(&tsg->sm_error_states[sm_id], 0,
 		sizeof(*tsg->sm_error_states));
 
-	err = g->ops.gr.falcon.disable_ctxsw(g);
+	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "unable to stop gr ctxsw");
 		goto fail;
@@ -716,7 +716,7 @@ int gm20b_gr_clear_sm_error_state(struct gk20a *g,
 				0);
 	}
 
-	err = g->ops.gr.falcon.enable_ctxsw(g);
+	err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
 
 fail:
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
index b22498f78..ab229c7e7 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
@@ -57,8 +57,6 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
 					       u32 **sm_dsm_perf_ctrl_regs,
 					       u32 *ctrl_register_stride);
 void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
-void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
-	struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
 bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr);
 u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr);
 int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 7f984684d..239b9472d 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1053,7 +1053,7 @@ int gr_gp10b_suspend_contexts(struct gk20a *g,
 
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
-	err = g->ops.gr.falcon.disable_ctxsw(g);
+	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "unable to stop gr ctxsw");
 		nvgpu_mutex_release(&g->dbg_sessions_lock);
@@ -1078,7 +1078,7 @@ int gr_gp10b_suspend_contexts(struct gk20a *g,
 
 	nvgpu_mutex_release(&dbg_s->ch_list_lock);
 
-	err = g->ops.gr.falcon.enable_ctxsw(g);
+	err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_mutex_release(&g->dbg_sessions_lock);
 		goto clean_up;
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 5b57243c4..c3aaaf390 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -3185,7 +3185,7 @@ int gv11b_gr_clear_sm_error_state(struct gk20a *g,
 
 	(void)memset(&tsg->sm_error_states[sm_id], 0, sizeof(*tsg->sm_error_states));
 
-	err = g->ops.gr.falcon.disable_ctxsw(g);
+	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		nvgpu_err(g, "unable to stop gr ctxsw");
 		goto fail;
@@ -3217,7 +3217,7 @@ int gv11b_gr_clear_sm_error_state(struct gk20a *g,
 				0);
 	}
 
-	err = g->ops.gr.falcon.enable_ctxsw(g);
+	err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
 
 fail:
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
diff --git a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c
index a46f8185a..7af921faf 100644
--- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c
@@ -33,6 +33,8 @@
 #include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
 
+#define GR_FECS_POLL_INTERVAL	5U /* usec */
+
 #define FECS_ARB_CMD_TIMEOUT_MAX_US 40U
 #define FECS_ARB_CMD_TIMEOUT_DEFAULT_US 2U
 #define CTXSW_MEM_SCRUBBING_TIMEOUT_MAX_US 1000U
@@ -733,10 +735,10 @@ int gm20b_gr_falcon_submit_fecs_method_op(struct gk20a *g,
 				   struct nvgpu_fecs_method_op op,
 				   bool sleepduringwait)
 {
-	struct gr_gk20a *gr = &g->gr;
 	int ret;
+	struct gr_gk20a *gr = &g->gr;
 
-	nvgpu_mutex_acquire(&gr->fecs_mutex);
+	nvgpu_mutex_acquire(&gr->falcon->fecs_mutex);
 
 	if (op.mailbox.id != 0U) {
 		nvgpu_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id),
@@ -766,7 +768,7 @@ int gm20b_gr_falcon_submit_fecs_method_op(struct gk20a *g,
 			op.method.data, op.method.addr);
 	}
 
-	nvgpu_mutex_release(&gr->fecs_mutex);
+	nvgpu_mutex_release(&gr->falcon->fecs_mutex);
 
 	return ret;
 }
@@ -775,10 +777,10 @@ int gm20b_gr_falcon_submit_fecs_method_op(struct gk20a *g,
 int gm20b_gr_falcon_submit_fecs_sideband_method_op(struct gk20a *g,
 		struct nvgpu_fecs_method_op op)
 {
-	struct gr_gk20a *gr = &g->gr;
 	int ret;
+	struct gr_gk20a *gr = &g->gr;
 
-	nvgpu_mutex_acquire(&gr->fecs_mutex);
+	nvgpu_mutex_acquire(&gr->falcon->fecs_mutex);
 
 	nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(op.mailbox.id),
 		gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr));
@@ -796,7 +798,7 @@ int gm20b_gr_falcon_submit_fecs_sideband_method_op(struct gk20a *g,
 			op.method.data, op.method.addr);
 	}
 
-	nvgpu_mutex_release(&gr->fecs_mutex);
+	nvgpu_mutex_release(&gr->falcon->fecs_mutex);
 
 	return ret;
 }
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 569aba03e..6f94f274e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -56,6 +56,7 @@ struct perf_pmupstate;
 struct boardobjgrp;
 struct boardobjgrp_pmu_cmd;
 struct boardobjgrpmask;
+struct nvgpu_gr_falcon;
 struct nvgpu_sgt;
 struct nvgpu_sgl;
 struct nvgpu_device_info;
@@ -564,7 +565,8 @@ struct gpu_ops {
 			void (*load_ctxsw_ucode_boot)(struct gk20a *g,
 				u32 reg_offset, u32 boot_entry,
 				u32 addr_load32, u32 blocks, u32 dst);
-			int (*load_ctxsw_ucode)(struct gk20a *g);
+			int (*load_ctxsw_ucode)(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon);
 			int (*wait_mem_scrubbing)(struct gk20a *g);
 			int (*wait_ctxsw_ready)(struct gk20a *g);
 			int (*submit_fecs_method_op)(struct gk20a *g,
@@ -575,8 +577,10 @@ struct gpu_ops {
 			int (*ctrl_ctxsw)(struct gk20a *g, u32 fecs_method,
 				u32 fecs_data, u32 *ret_val);
 			int (*halt_pipe)(struct gk20a *g);
-			int (*disable_ctxsw)(struct gk20a *g);
-			int (*enable_ctxsw)(struct gk20a *g);
+			int (*disable_ctxsw)(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon);
+			int (*enable_ctxsw)(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon);
 			u32 (*get_current_ctx)(struct gk20a *g);
 			u32 (*get_ctx_ptr)(u32 ctx);
 			u32 (*get_fecs_current_ctx_data)(struct gk20a *g,
@@ -1988,9 +1992,6 @@ struct gk20a {
 
 	nvgpu_atomic_t usage_count;
 
-	struct nvgpu_mutex ctxsw_disable_lock;
-	int ctxsw_disable_count;
-
 	struct nvgpu_ref refcount;
 
 	const char *name;
@@ -2087,8 +2088,6 @@ struct gk20a {
 
 	u32 emc3d_ratio;
 
-	struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
-
 	/*
 	 * A group of semaphore pools. One for each channel.
 	 */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h
index 6c741ac65..2e62f30e1 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h
@@ -25,6 +25,7 @@
 
 #include <nvgpu/types.h>
 
+int nvgpu_gr_prepare_sw(struct gk20a *g);
 int nvgpu_gr_enable_hw(struct gk20a *g);
 int nvgpu_gr_reset(struct gk20a *g);
 int nvgpu_gr_init_support(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h
index d5668a922..1a14f0567 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h
@@ -26,6 +26,21 @@
 #include <nvgpu/types.h>
 
 struct gk20a;
+struct nvgpu_gr_falcon;
+
+struct nvgpu_ctxsw_ucode_segment {
+	u32 offset;
+	u32 size;
+};
+
+struct nvgpu_ctxsw_ucode_segments {
+	u32 boot_entry;
+	u32 boot_imem_offset;
+	u32 boot_signature;
+	struct nvgpu_ctxsw_ucode_segment boot;
+	struct nvgpu_ctxsw_ucode_segment code;
+	struct nvgpu_ctxsw_ucode_segment data;
+};
 
 #define NVGPU_GR_FALCON_METHOD_CTXSW_STOP 0
 #define NVGPU_GR_FALCON_METHOD_CTXSW_START 1
@@ -57,14 +72,31 @@ struct nvgpu_fecs_host_intr_status {
 	bool watchdog_active;
 };
 
+struct nvgpu_gr_falcon *nvgpu_gr_falcon_init_support(struct gk20a *g);
+void nvgpu_gr_falcon_remove_support(struct gk20a *g,
+				struct nvgpu_gr_falcon *falcon);
 int nvgpu_gr_falcon_bind_fecs_elpg(struct gk20a *g);
-int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g);
+int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g, struct nvgpu_gr_falcon *falcon);
 int nvgpu_gr_falcon_init_ctx_state(struct gk20a *g);
-int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g);
-int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g);
-int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g);
-int nvgpu_gr_falcon_disable_ctxsw(struct gk20a *g);
-int nvgpu_gr_falcon_enable_ctxsw(struct gk20a *g);
+int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon);
+int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon);
+int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon);
+int nvgpu_gr_falcon_disable_ctxsw(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon);
+int nvgpu_gr_falcon_enable_ctxsw(struct gk20a *g,
+					struct nvgpu_gr_falcon *falcon);
 int nvgpu_gr_falcon_halt_pipe(struct gk20a *g);
 
+struct nvgpu_mutex *nvgpu_gr_falcon_get_fecs_mutex(
+					struct nvgpu_gr_falcon *falcon);
+struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_fecs_ucode_segments(
+					struct nvgpu_gr_falcon *falcon);
+struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_gpccs_ucode_segments(
+					struct nvgpu_gr_falcon *falcon);
+void *nvgpu_gr_falcon_get_surface_desc_cpu_va(
+					struct nvgpu_gr_falcon *falcon);
+
 #endif /* NVGPU_GR_FALCON_H */
diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c
index d87220115..59b707022 100644
--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -63,7 +63,6 @@ static void nvgpu_init_vars(struct gk20a *g)
 	nvgpu_mutex_init(&g->dbg_sessions_lock);
 	nvgpu_mutex_init(&g->client_lock);
 	nvgpu_mutex_init(&g->power_lock);
-	nvgpu_mutex_init(&g->ctxsw_disable_lock);
 	nvgpu_mutex_init(&g->tpc_pg_lock);
 	nvgpu_mutex_init(&g->clk_arb_enable_lock);
 	nvgpu_mutex_init(&g->cg_pg_lock);
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index 33aacf4ad..d3ac492b7 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1101,7 +1101,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	/* Suspend GPU context switching */
-	err = g->ops.gr.falcon.disable_ctxsw(g);
+	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
 	if (err) {
 		nvgpu_err(g, "unable to stop gr ctxsw");
 		/* this should probably be ctx-fatal... */
@@ -1119,7 +1119,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
 		break;
 	}
 
-	err = g->ops.gr.falcon.enable_ctxsw(g);
+	err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
 	if (err)
 		nvgpu_err(g, "unable to restart ctxsw!");
 
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c
index ba4875077..b7a98232f 100644
--- a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c
@@ -81,7 +81,6 @@ static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform)
 	struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
 
 	nvgpu_mutex_init(&g->power_lock);
-	nvgpu_mutex_init(&g->ctxsw_disable_lock);
 	nvgpu_mutex_init(&g->clk_arb_enable_lock);
 	nvgpu_mutex_init(&g->cg_pg_lock);