diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index 3051135fd..8e46d70de 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -1005,7 +1005,6 @@ static void channel_free(struct nvgpu_channel *ch, bool force)
 	nvgpu_cic_rm_wait_for_deferred_interrupts(g);
 
 unbind:
-	g->ops.channel.unbind(ch);
 	g->ops.channel.free_inst(g, ch);
 
 	nvgpu_channel_wdt_destroy(ch->wdt);
@@ -1520,6 +1519,14 @@ int nvgpu_channel_setup_bind(struct nvgpu_channel *c,
 	}
 #endif
 
+	c->replayable = false;
+
+#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
+	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) {
+		c->replayable = true;
+	}
+#endif
+
 	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT) != 0U) {
 		err = nvgpu_channel_setup_usermode(c, args);
 	} else {
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index 357292631..fc0be9f21 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -38,6 +38,7 @@
 #include <nvgpu/nvs.h>
 #include <nvgpu/static_analysis.h>
 #include <nvgpu/nvgpu_init.h>
+#include <nvgpu/kmem.h>
 #ifdef CONFIG_NVGPU_PROFILER
 #include <nvgpu/profiler.h>
 #endif
@@ -292,12 +293,7 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg,
 	}
 #endif
 
-	/**
-	 * Remove channel from TSG and re-enable rest of the channels.
-	 * Since channel removal can lead to subctx removal and/or
-	 * VM mappings removal, acquire ctx_init_lock.
-	 */
-	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
+	g->ops.channel.unbind(ch);
 
 	nvgpu_rwsem_down_write(&tsg->ch_list_lock);
 	nvgpu_tsg_subctx_unbind_channel(tsg, ch);
@@ -311,8 +307,6 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg,
 	g->ops.channel.disable(ch);
 	nvgpu_rwsem_up_write(&tsg->ch_list_lock);
 
-	nvgpu_mutex_release(&tsg->ctx_init_lock);
-
 	/*
 	 * Don't re-enable all channels if TSG has timed out already
 	 *
@@ -345,8 +339,16 @@ int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch,
 
 	nvgpu_log_fn(g, "unbind tsg:%u ch:%u\n", tsg->tsgid, ch->chid);
 
+	/**
+	 * Remove channel from TSG and re-enable rest of the channels.
+	 * Since channel removal can lead to subctx removal and/or
+	 * VM mappings removal, acquire ctx_init_lock.
+	 */
+	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
+
 	err = nvgpu_tsg_unbind_channel_common(tsg, ch);
 	if (!force && err == -EAGAIN) {
+		nvgpu_mutex_release(&tsg->ctx_init_lock);
 		return err;
 	}
 
@@ -368,6 +370,8 @@ int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch,
 		}
 	}
 
+	nvgpu_mutex_release(&tsg->ctx_init_lock);
+
 	nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release);
 
 	return err;
@@ -413,7 +417,7 @@ fail_common:
 	}
 #endif
 
-	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
+	g->ops.channel.unbind(ch);
 
 	nvgpu_rwsem_down_write(&tsg->ch_list_lock);
 	nvgpu_tsg_subctx_unbind_channel(tsg, ch);
@@ -907,6 +911,15 @@ int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid)
 		goto clean_up;
 	}
 
+	if (g->ops.tsg.init_subctx_state != NULL) {
+		err = g->ops.tsg.init_subctx_state(g, tsg);
+		if (err != 0) {
+			nvgpu_err(g, "tsg %d subctx state init failed %d",
+				  tsg->tsgid, err);
+			goto clean_up;
+		}
+	}
+
 #ifdef CONFIG_NVGPU_SM_DIVERSITY
 	nvgpu_gr_ctx_set_sm_diversity_config(tsg->gr_ctx,
 		NVGPU_INVALID_SM_CONFIG_ID);
@@ -970,6 +983,10 @@ void nvgpu_tsg_release_common(struct gk20a *g, struct nvgpu_tsg *tsg)
 	nvgpu_free_gr_ctx_struct(g, tsg->gr_ctx);
 	tsg->gr_ctx = NULL;
 
+	if (g->ops.tsg.deinit_subctx_state != NULL) {
+		g->ops.tsg.deinit_subctx_state(g, tsg);
+	}
+
 	if (g->ops.tsg.deinit_eng_method_buffers != NULL) {
 		g->ops.tsg.deinit_eng_method_buffers(g, tsg);
 	}
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c b/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c
index 4864ccf72..ece7d6517 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c
@@ -120,6 +120,10 @@ void nvgpu_tsg_subctx_unbind_channel(struct nvgpu_tsg *tsg,
 	nvgpu_list_del(&ch->subctx_entry);
 
 	if (nvgpu_list_empty(&subctx->ch_list)) {
+		if (g->ops.tsg.remove_subctx_channel_hw != NULL) {
+			g->ops.tsg.remove_subctx_channel_hw(ch);
+		}
+
 		if (g->ops.gr.setup.free_subctx != NULL) {
 			g->ops.gr.setup.free_subctx(ch);
 			subctx->gr_subctx = NULL;
@@ -203,6 +207,22 @@ u32 nvgpu_tsg_subctx_get_id(struct nvgpu_tsg_subctx *subctx)
 	return subctx->subctx_id;
 }
 
+void nvgpu_tsg_subctx_set_replayable(struct nvgpu_tsg_subctx *subctx,
+				     bool replayable)
+{
+	subctx->replayable = replayable;
+}
+
+bool nvgpu_tsg_subctx_get_replayable(struct nvgpu_tsg_subctx *subctx)
+{
+	return subctx->replayable;
+}
+
+struct vm_gk20a *nvgpu_tsg_subctx_get_vm(struct nvgpu_tsg_subctx *subctx)
+{
+	return subctx->vm;
+}
+
 struct nvgpu_gr_ctx_mappings *nvgpu_tsg_subctx_alloc_or_get_mappings(
 				struct gk20a *g,
 				struct nvgpu_tsg *tsg,
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h b/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h
index ed6376148..0939b7760 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h
+++ b/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h
@@ -44,6 +44,9 @@ struct nvgpu_tsg_subctx {
 	/** Subcontext's GR ctx header and GR ctx buffers mappings. */
 	struct nvgpu_gr_subctx *gr_subctx;
 
+	/** Replayable faults state for a subcontext. */
+	bool replayable;
+
 	/**
 	 * Subcontext's entry in TSG's (#nvgpu_tsg) subcontexts list
 	 * #subctx_list.
diff --git a/drivers/gpu/nvgpu/common/gr/gr_falcon.c b/drivers/gpu/nvgpu/common/gr/gr_falcon.c
index 919f1b18f..f47cd1567 100644
--- a/drivers/gpu/nvgpu/common/gr/gr_falcon.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_falcon.c
@@ -227,7 +227,11 @@ static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g,
 		return err;
 	}
 
-	g->ops.mm.init_inst_block_core(&ucode_info->inst_blk_desc, vm, 0);
+	err = g->ops.mm.init_inst_block_core(&ucode_info->inst_blk_desc, vm, 0);
+	if (err != 0) {
+		nvgpu_free_inst_block(g, &ucode_info->inst_blk_desc);
+		return err;
+	}
 
 	/* Map ucode surface to GMMU */
 	ucode_info->surface_desc.gpu_va = nvgpu_gmmu_map(vm,
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
index c4f8c99cf..035400585 100644
--- a/drivers/gpu/nvgpu/common/mm/mm.c
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -228,7 +228,12 @@ static int nvgpu_init_system_vm(struct mm_gk20a *mm)
 	if (err != 0) {
 		goto clean_up_vm;
 	}
-	g->ops.mm.init_inst_block_core(inst_block, mm->pmu.vm, big_page_size);
+
+	err = g->ops.mm.init_inst_block_core(inst_block, mm->pmu.vm, big_page_size);
+	if (err != 0) {
+		nvgpu_free_inst_block(g, inst_block);
+		goto clean_up_vm;
+	}
 
 	return 0;
 
@@ -247,7 +252,12 @@ static int nvgpu_init_hwpm(struct mm_gk20a *mm)
 	if (err != 0) {
 		return err;
 	}
-	g->ops.mm.init_inst_block_core(inst_block, mm->pmu.vm, 0);
+
+	err = g->ops.mm.init_inst_block_core(inst_block, mm->pmu.vm, 0);
+	if (err != 0) {
+		nvgpu_free_inst_block(g, inst_block);
+		return err;
+	}
 
 	return 0;
 }
@@ -366,7 +376,12 @@ static int nvgpu_init_bar1_vm(struct mm_gk20a *mm)
 	if (err != 0) {
 		goto clean_up_vm;
 	}
-	g->ops.mm.init_inst_block_core(inst_block, mm->bar1.vm, big_page_size);
+
+	err = g->ops.mm.init_inst_block_core(inst_block, mm->bar1.vm, big_page_size);
+	if (err != 0) {
+		nvgpu_free_inst_block(g, inst_block);
+		goto clean_up_vm;
+	}
 
 	return 0;
 
@@ -401,7 +416,11 @@ static int nvgpu_init_engine_ucode_vm(struct gk20a *g,
 		goto clean_up_va;
 	}
 
-	g->ops.mm.init_inst_block_core(inst_block, ucode->vm, big_page_size);
+	err = g->ops.mm.init_inst_block_core(inst_block, ucode->vm, big_page_size);
+	if (err != 0) {
+		nvgpu_free_inst_block(g, inst_block);
+		goto clean_up_va;
+	}
 
 	return 0;
 
diff --git a/drivers/gpu/nvgpu/common/perf/perfbuf.c b/drivers/gpu/nvgpu/common/perf/perfbuf.c
index c541a4e26..a5a1e30d5 100644
--- a/drivers/gpu/nvgpu/common/perf/perfbuf.c
+++ b/drivers/gpu/nvgpu/common/perf/perfbuf.c
@@ -72,7 +72,12 @@ int nvgpu_perfbuf_init_inst_block(struct gk20a *g)
 		return err;
 	}
 
-	g->ops.mm.init_inst_block_core(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);
+	err = g->ops.mm.init_inst_block_core(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);
+	if (err != 0) {
+		nvgpu_free_inst_block(g, &mm->perfbuf.inst_block);
+		return err;
+	}
+
 	g->ops.perf.init_inst_block(g, &mm->perfbuf.inst_block);
 
 	return 0;
diff --git a/drivers/gpu/nvgpu/hal/fifo/channel_ga10b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/channel_ga10b_fusa.c
index 26e2014d8..8747f98f0 100644
--- a/drivers/gpu/nvgpu/hal/fifo/channel_ga10b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/channel_ga10b_fusa.c
@@ -82,9 +82,19 @@ void ga10b_channel_bind(struct nvgpu_channel *ch)
 {
 	struct gk20a *g = ch->g;
 	struct nvgpu_runlist *runlist = NULL;
+	int err;
 
 	runlist = ch->runlist;
 
+	/* Enable subcontext */
+	if (g->ops.tsg.add_subctx_channel_hw != NULL) {
+		err = g->ops.tsg.add_subctx_channel_hw(ch, ch->replayable);
+		if (err != 0) {
+			nvgpu_err(g, "Subcontext addition failed %d", err);
+			return;
+		}
+	}
+
 	/* Enable channel */
 	nvgpu_chram_bar0_writel(g, runlist, runlist_chram_channel_r(ch->chid),
 		runlist_chram_channel_update_f(
diff --git a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b.h b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b.h
index a563b2b27..eb0e64998 100644
--- a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -30,6 +30,7 @@ struct nvgpu_channel;
 struct nvgpu_channel_hw_state;
 struct nvgpu_debug_context;
 
+void gv11b_channel_bind(struct nvgpu_channel *ch);
 void gv11b_channel_unbind(struct nvgpu_channel *ch);
 u32 gv11b_channel_count(struct gk20a *g);
 void gv11b_channel_read_state(struct gk20a *g, struct nvgpu_channel *ch,
diff --git a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c
index 59edfbba0..7e52b834f 100644
--- a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c
@@ -32,6 +32,42 @@
 
 #include <nvgpu/hw/gv11b/hw_ccsr_gv11b.h>
 
+void gv11b_channel_bind(struct nvgpu_channel *ch)
+{
+	struct gk20a *g = ch->g;
+	int err;
+
+	u32 inst_ptr = nvgpu_inst_block_ptr(g, &ch->inst_block);
+
+	nvgpu_log_info(g, "bind channel %d inst ptr 0x%08x",
+		ch->chid, inst_ptr);
+
+	/* Enable subcontext */
+	if (g->ops.tsg.add_subctx_channel_hw != NULL) {
+		err = g->ops.tsg.add_subctx_channel_hw(ch, ch->replayable);
+		if (err != 0) {
+			nvgpu_err(g, "Subcontext addition failed %d", err);
+			return;
+		}
+	}
+
+	/* Enable channel */
+	nvgpu_writel(g, ccsr_channel_inst_r(ch->chid),
+		     ccsr_channel_inst_ptr_f(inst_ptr) |
+		     nvgpu_aperture_mask(g, &ch->inst_block,
+				ccsr_channel_inst_target_sys_mem_ncoh_f(),
+				ccsr_channel_inst_target_sys_mem_coh_f(),
+				ccsr_channel_inst_target_vid_mem_f()) |
+		     ccsr_channel_inst_bind_true_f());
+
+	nvgpu_writel(g, ccsr_channel_r(ch->chid),
+		(nvgpu_readl(g, ccsr_channel_r(ch->chid)) &
+		 ~ccsr_channel_enable_set_f(~U32(0U))) |
+		 ccsr_channel_enable_set_true_f());
+
+	nvgpu_atomic_set(&ch->bound, 1);
+}
+
 void gv11b_channel_unbind(struct nvgpu_channel *ch)
 {
 	struct gk20a *g = ch->g;
diff --git a/drivers/gpu/nvgpu/hal/fifo/ramfc_ga10b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/ramfc_ga10b_fusa.c
index 50840ddc6..f644bd885 100644
--- a/drivers/gpu/nvgpu/hal/fifo/ramfc_ga10b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ramfc_ga10b_fusa.c
@@ -44,7 +44,6 @@ int ga10b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base,
 	u32 eng_intr_mask = 0U;
 	u32 eng_intr_vector = 0U;
 	u32 eng_bitmask = 0U;
-	bool replayable = false;
 
 	(void)flags;
 
@@ -65,18 +64,9 @@ int ga10b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base,
 
 	nvgpu_memset(g, mem, 0U, 0U, ram_fc_size_val_v());
 
-#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
-	if ((flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) {
-		replayable = true;
-	}
-#endif
-
 	nvgpu_log_info(g, "%llu %u", pbdma_acquire_timeout,
 		g->ops.pbdma.acquire_val(pbdma_acquire_timeout));
 
-	g->ops.ramin.init_subctx_pdb(g, mem, ch->vm->pdb.mem,
-		replayable, nvgpu_channel_get_max_subctx_count(ch));
-
 	nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(),
 		g->ops.pbdma.get_gp_base(gpfifo_base));
 
diff --git a/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b_fusa.c
index b2a2245ca..f482b3f98 100644
--- a/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b_fusa.c
@@ -38,7 +38,6 @@ int gv11b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base,
 	struct gk20a *g = ch->g;
 	struct nvgpu_mem *mem = &ch->inst_block;
 	u32 data;
-	bool replayable = false;
 
 	(void)flags;
 
@@ -46,18 +45,9 @@ int gv11b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base,
 
 	nvgpu_memset(g, mem, 0, 0, ram_fc_size_val_v());
 
-#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
-	if ((flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) {
-		replayable = true;
-	}
-#endif
-
 	nvgpu_log_info(g, "%llu %u", pbdma_acquire_timeout,
 		g->ops.pbdma.acquire_val(pbdma_acquire_timeout));
 
-	g->ops.ramin.init_subctx_pdb(g, mem, ch->vm->pdb.mem,
-		replayable, nvgpu_channel_get_max_subctx_count(ch));
-
 	nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(),
 		g->ops.pbdma.get_gp_base(gpfifo_base));
 
diff --git a/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c b/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c
index cba9b0373..4a8c577bc 100644
--- a/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c
@@ -39,24 +39,14 @@ int tu104_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base,
 	struct gk20a *g = ch->g;
 	struct nvgpu_mem *mem = &ch->inst_block;
 	u32 data;
-	bool replayable = false;
 
 	nvgpu_log_fn(g, " ");
 
 	nvgpu_memset(g, mem, 0, 0, ram_fc_size_val_v());
 
-#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
-	if ((flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) {
-		replayable = true;
-	}
-#endif
-
 	nvgpu_log_info(g, "%llu %u", pbdma_acquire_timeout,
 		g->ops.pbdma.acquire_val(pbdma_acquire_timeout));
 
-	g->ops.ramin.init_subctx_pdb(g, mem, ch->vm->pdb.mem,
-		replayable, nvgpu_channel_get_max_subctx_count(ch));
-
 	nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(),
 		g->ops.pbdma.get_gp_base(gpfifo_base));
 
diff --git a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h
index c7321f1d0..e0c3e3702 100644
--- a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -30,9 +30,15 @@ struct nvgpu_mem;
 
 void gv11b_ramin_set_gr_ptr(struct gk20a *g,
 		struct nvgpu_mem *inst_block, u64 gpu_va);
+void gv11b_ramin_set_subctx_pdb_info(struct gk20a *g,
+		u32 subctx_id, struct nvgpu_mem *pdb_mem,
+		bool replayable, bool add, u32 *subctx_pdb_map);
+void gv11b_ramin_init_subctx_pdb_map(struct gk20a *g,
+		u32 *subctx_pdb_map);
+void gv11b_ramin_init_subctx_valid_mask(struct gk20a *g,
+		struct nvgpu_mem *inst_block, unsigned long *valid_subctx_mask);
 void gv11b_ramin_init_subctx_pdb(struct gk20a *g,
-		struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
-		bool replayable, u32 max_subctx_count);
+		struct nvgpu_mem *inst_block, u32 *subctx_pdb_map);
 void gv11b_ramin_set_eng_method_buffer(struct gk20a *g,
 		struct nvgpu_mem *inst_block, u64 gpu_va);
 void gv11b_ramin_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
diff --git a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b_fusa.c
index abb5cb83f..55551e01b 100644
--- a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b_fusa.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -45,85 +45,89 @@ void gv11b_ramin_set_gr_ptr(struct gk20a *g,
 		ram_in_engine_wfi_ptr_hi_f(addr_hi));
 }
 
-static void gv11b_subctx_commit_valid_mask(struct gk20a *g,
-		struct nvgpu_mem *inst_block, u32 max_subctx_count)
+void gv11b_ramin_set_subctx_pdb_info(struct gk20a *g,
+		u32 subctx_id, struct nvgpu_mem *pdb_mem,
+		bool replayable, bool add, u32 *subctx_pdb_map)
 {
+	u32 format_word = 0;
+	u32 pdb_addr_lo = 0;
+	u32 pdb_addr_hi = 0;
+	u64 pdb_addr;
+	u32 aperture;
+
+	if (add) {
+		aperture = nvgpu_aperture_mask(g, pdb_mem,
+				ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(),
+				ram_in_sc_page_dir_base_target_sys_mem_coh_v(),
+				ram_in_sc_page_dir_base_target_vid_mem_v());
+
+		pdb_addr = nvgpu_mem_get_addr(g, pdb_mem);
+		pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
+		pdb_addr_hi = u64_hi32(pdb_addr);
+		format_word = ram_in_sc_page_dir_base_target_f(aperture, 0U) |
+			ram_in_sc_page_dir_base_vol_f(
+			ram_in_sc_page_dir_base_vol_true_v(), 0U) |
+			ram_in_sc_use_ver2_pt_format_f(1U, 0U) |
+			ram_in_sc_big_page_size_f(1U, 0U) |
+			ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
+
+		if (replayable) {
+			format_word |=
+				ram_in_sc_page_dir_base_fault_replay_tex_f(1U, 0U) |
+				ram_in_sc_page_dir_base_fault_replay_gcc_f(1U, 0U);
+		}
+	} else {
+		aperture = ram_in_sc_page_dir_base_target_invalid_v();
+		format_word = ram_in_sc_page_dir_base_target_f(aperture, 0U);
+	}
+
+	nvgpu_log(g, gpu_dbg_info, "%s subctx[%u] pdb info lo %x hi %x",
+			add ? "add" : "remove", subctx_id,
+			format_word, pdb_addr_hi);
+
+	subctx_pdb_map[subctx_id * 4U] = format_word;
+	subctx_pdb_map[(subctx_id * 4U) + 1U] = pdb_addr_hi;
+}
+
+void gv11b_ramin_init_subctx_pdb_map(struct gk20a *g,
+		u32 *subctx_pdb_map)
+{
+	u32 max_subctx_count = g->ops.gr.init.get_max_subctx_count();
+	u32 i;
+
+	/* Initially, all subcontexts are invalid in the TSG. */
+	for (i = 0; i < max_subctx_count; i++) {
+		gv11b_ramin_set_subctx_pdb_info(g, i, NULL, false, false,
+						subctx_pdb_map);
+	}
+}
+
+void gv11b_ramin_init_subctx_valid_mask(struct gk20a *g,
+		struct nvgpu_mem *inst_block, unsigned long *valid_subctx_mask)
+{
+	u32 max_subctx_count = g->ops.gr.init.get_max_subctx_count();
 	u32 id;
-	u32 subctx_count = max_subctx_count;
 
 	for (id = 0U; id < max_subctx_count; id += 32U) {
-		u32 subctx_mask_max_bit = ((subctx_count < 32U) ?
-			(subctx_count % 32U) : 0U);
-		u32 subctx_mask = U32_MAX;
-
-		if (subctx_mask_max_bit != 0U) {
-			subctx_mask = nvgpu_safe_sub_u32(
-				BIT32(subctx_mask_max_bit), 1U);
-		}
+		u32 subctx_mask = ((u32 *)valid_subctx_mask)[id / 32U];
 
 		nvgpu_mem_wr32(g, inst_block,
 				ram_in_sc_pdb_valid_long_w(id), subctx_mask);
 
 		nvgpu_log(g, gpu_dbg_info | gpu_dbg_mig,
-			"id[%d] max_subctx_count[%u] subctx_mask_max_bit[%u] "
-				"subctx_count[%u] subctx_mask[%x] ",
-			id, max_subctx_count, subctx_mask_max_bit,
-			subctx_count, subctx_mask);
-
-		if (subctx_count > 32U) {
-			subctx_count = nvgpu_safe_sub_u32(subctx_count, 32U);
-		}
-	}
-}
-
-static void gv11b_subctx_commit_pdb(struct gk20a *g,
-		struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
-		bool replayable, u32 max_subctx_count)
-{
-	u32 lo, hi;
-	u32 subctx_id = 0;
-	u32 format_word;
-	u32 pdb_addr_lo, pdb_addr_hi;
-	u64 pdb_addr;
-	u32 aperture = nvgpu_aperture_mask(g, pdb_mem,
-				ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(),
-				ram_in_sc_page_dir_base_target_sys_mem_coh_v(),
-				ram_in_sc_page_dir_base_target_vid_mem_v());
-
-	pdb_addr = nvgpu_mem_get_addr(g, pdb_mem);
-	pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
-	pdb_addr_hi = u64_hi32(pdb_addr);
-	format_word = ram_in_sc_page_dir_base_target_f(aperture, 0U) |
-		ram_in_sc_page_dir_base_vol_f(
-		ram_in_sc_page_dir_base_vol_true_v(), 0U) |
-		ram_in_sc_use_ver2_pt_format_f(1U, 0U) |
-		ram_in_sc_big_page_size_f(1U, 0U) |
-		ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
-
-	if (replayable) {
-		format_word |=
-			ram_in_sc_page_dir_base_fault_replay_tex_f(1U, 0U) |
-			ram_in_sc_page_dir_base_fault_replay_gcc_f(1U, 0U);
-	}
-
-	nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x",
-					format_word, pdb_addr_hi);
-	for (subctx_id = 0U; subctx_id < max_subctx_count; subctx_id++) {
-		lo = ram_in_sc_page_dir_base_vol_w(subctx_id);
-		hi = ram_in_sc_page_dir_base_hi_w(subctx_id);
-		nvgpu_mem_wr32(g, inst_block, lo, format_word);
-		nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi);
+			"id[%d] max_subctx_count[%u] subctx_mask[%x] ",
+			id, max_subctx_count, subctx_mask);
 	}
 }
 
 void gv11b_ramin_init_subctx_pdb(struct gk20a *g,
-		struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
-		bool replayable, u32 max_subctx_count)
+		struct nvgpu_mem *inst_block, u32 *subctx_pdb_map)
 {
-	gv11b_subctx_commit_pdb(g, inst_block, pdb_mem, replayable,
-		max_subctx_count);
-	gv11b_subctx_commit_valid_mask(g, inst_block, max_subctx_count);
+	u32 max_subctx_count = g->ops.gr.init.get_max_subctx_count();
+	u32 size = max_subctx_count * 4U * 4U;
 
+	nvgpu_mem_wr_n(g, inst_block, ram_in_sc_page_dir_base_vol_w(0) * 4U,
+		subctx_pdb_map, size);
 }
 
 void gv11b_ramin_set_eng_method_buffer(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h
index f148cf23b..cb55192e0 100644
--- a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -38,4 +38,61 @@ void gv11b_tsg_deinit_eng_method_buffers(struct gk20a *g,
 void gv11b_tsg_bind_channel_eng_method_buffers(struct nvgpu_tsg *tsg,
 		struct nvgpu_channel *ch);
 
+/**
+ * @brief Initialize subcontext PDB map and valid mask for a TSG.
+ *
+ * @param g [in]	Pointer to GPU driver struct.
+ * @param tsg [in]	Pointer to TSG struct.
+ *
+ * - If subcontexts are enabled:
+ *   - Allocate array of PDB configuration values for maximum supported
+ *     subcontexts.
+ *   - Initialize the array by calling g->ops.ramin.init_subctx_pdb_map.
+ *   - Allocate valid subcontexts bitmask.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ */
+int gv11b_tsg_init_subctx_state(struct gk20a *g, struct nvgpu_tsg *tsg);
+
+/**
+ * @brief Deinitialize subcontext PDB map and valid mask for a TSG.
+ *
+ * @param g [in]	Pointer to GPU driver struct.
+ * @param tsg [in]	Pointer to TSG struct.
+ *
+ * - If subcontexts are enabled:
+ *   - Free array of PDB configuration values.
+ *   - Free valid subcontexts bitmask.
+ */
+void gv11b_tsg_deinit_subctx_state(struct gk20a *g, struct nvgpu_tsg *tsg);
+
+/**
+ * @brief Add a subctx channel to TSG.
+ *
+ * @param ch [in]		Pointer to Channel struct.
+ * @param replayable [in]	replayable state of the channel.
+ *
+ * - If subcontexts are enabled:
+ *   - Update subcontext info in TSG members if this is first channel
+ *     of a subcontext and update instance blocks of all channels
+ *     in the TSG with this information.
+ *   -  If this is a channel in existing subcontext then:
+ *      - Update the channel instance block with subcontext info.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ */
+int gv11b_tsg_add_subctx_channel_hw(struct nvgpu_channel *ch, bool replayable);
+
+/**
+ * @brief Remove a subctx channel from TSG.
+ *
+ * @param ch [in]			Pointer to Channel struct.
+ *
+ * - If subcontexts are enabled:
+ *   - Update subcontext info in TSG members as this is the last channel
+ *     of a subcontext and update instance blocks of all channels
+ *     in the TSG with this information.
+ */
+void gv11b_tsg_remove_subctx_channel_hw(struct nvgpu_channel *ch);
+
 #endif /* NVGPU_TSG_GV11B_H */
diff --git a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b_fusa.c
index 760cef6bb..cbd77728d 100644
--- a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b_fusa.c
@@ -25,6 +25,7 @@
 #include <nvgpu/runlist.h>
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/tsg.h>
+#include <nvgpu/tsg_subctx.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/static_analysis.h>
@@ -174,3 +175,192 @@ void gv11b_tsg_deinit_eng_method_buffers(struct gk20a *g,
 
 	nvgpu_log_info(g, "eng method buffers de-allocated");
 }
+
+int gv11b_tsg_init_subctx_state(struct gk20a *g, struct nvgpu_tsg *tsg)
+{
+	u32 max_subctx_count;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return 0;
+	}
+
+	max_subctx_count = g->ops.gr.init.get_max_subctx_count();
+
+	/*
+	 * Allocate an array of subctx PDB configuration values for all supported
+	 * subcontexts. For each subctx, there will be two registers to be
+	 * configured, ram_in_sc_page_dir_base_lo_w(i) and
+	 * ram_in_sc_page_dir_base_hi_w(i) in the instance block for the channels
+	 * belonging to this TSG. Two more unused registers follow these for each
+	 * subcontext. Same PDB table/array is programmed in the instance block
+	 * of all the channels.
+	 *
+	 * As the subcontexts are bound to the TSG, their configurations register
+	 * values are added to the array and corresponding bit is set in the
+	 * valid_subctxs bitmask. And as the subcontexts are unbound from
+	 * the TSG, their configurations register values are added to the
+	 * array and corresponding bit is set in the valid_subctxs bitmask.
+	 */
+	tsg->subctx_pdb_map = nvgpu_kzalloc(g, max_subctx_count * sizeof(u32) * 4U);
+	if (tsg->subctx_pdb_map == NULL) {
+		nvgpu_err(g, "subctx_pdb_map alloc failed");
+		return -ENOMEM;
+	}
+
+	g->ops.ramin.init_subctx_pdb_map(g, tsg->subctx_pdb_map);
+
+	tsg->valid_subctxs = nvgpu_kzalloc(g,
+				BITS_TO_LONGS(max_subctx_count) *
+				sizeof(unsigned long));
+	if (tsg->valid_subctxs == NULL) {
+		nvgpu_err(g, "valid_subctxs bitmap alloc failed");
+		nvgpu_kfree(g, tsg->subctx_pdb_map);
+		tsg->subctx_pdb_map = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void gv11b_tsg_deinit_subctx_state(struct gk20a *g, struct nvgpu_tsg *tsg)
+{
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return;
+	}
+
+	nvgpu_kfree(g, tsg->subctx_pdb_map);
+	tsg->subctx_pdb_map = NULL;
+
+	nvgpu_kfree(g, tsg->valid_subctxs);
+	tsg->valid_subctxs = NULL;
+}
+
+static void gv11b_tsg_update_inst_blocks_subctxs(struct nvgpu_tsg *tsg)
+{
+	struct gk20a *g = tsg->g;
+	struct nvgpu_channel *ch;
+
+	nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) {
+		g->ops.ramin.init_subctx_pdb(g, &ch->inst_block,
+					     tsg->subctx_pdb_map);
+		g->ops.ramin.init_subctx_mask(g, &ch->inst_block,
+					      tsg->valid_subctxs);
+	}
+}
+
+static void gv11b_tsg_update_subctxs(struct nvgpu_tsg *tsg, u32 subctx_id,
+				struct vm_gk20a *vm, bool replayable, bool add)
+{
+	struct gk20a *g = tsg->g;
+
+	if (add) {
+		g->ops.ramin.set_subctx_pdb_info(g, subctx_id, vm->pdb.mem,
+				replayable, true, tsg->subctx_pdb_map);
+		nvgpu_set_bit(subctx_id, tsg->valid_subctxs);
+	} else {
+		g->ops.ramin.set_subctx_pdb_info(g, subctx_id, NULL,
+				false, false, tsg->subctx_pdb_map);
+		nvgpu_clear_bit(subctx_id, tsg->valid_subctxs);
+	}
+
+	gv11b_tsg_update_inst_blocks_subctxs(tsg);
+}
+
+static void gv11b_tsg_add_new_subctx_channel_hw(struct nvgpu_channel *ch,
+						bool replayable)
+{
+	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
+	struct nvgpu_tsg_subctx *subctx = ch->subctx;
+	struct vm_gk20a *vm = nvgpu_tsg_subctx_get_vm(subctx);
+	u32 subctx_id = nvgpu_tsg_subctx_get_id(subctx);
+
+	nvgpu_tsg_subctx_set_replayable(subctx, replayable);
+
+	gv11b_tsg_update_subctxs(tsg, subctx_id, vm, replayable, true);
+}
+
+static void gv11b_tsg_add_existing_subctx_channel_hw(struct nvgpu_channel *ch,
+						 bool replayable)
+{
+	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
+	struct nvgpu_tsg_subctx *subctx = ch->subctx;
+	struct gk20a *g = ch->g;
+
+	if (nvgpu_tsg_subctx_get_replayable(subctx) != replayable) {
+		nvgpu_err(g, "subctx replayable mismatch. ignoring.");
+	}
+
+	g->ops.ramin.init_subctx_pdb(g, &ch->inst_block, tsg->subctx_pdb_map);
+	g->ops.ramin.init_subctx_mask(g, &ch->inst_block, tsg->valid_subctxs);
+}
+
+int gv11b_tsg_add_subctx_channel_hw(struct nvgpu_channel *ch, bool replayable)
+{
+	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
+	struct gk20a *g = tsg->g;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_fn, " ");
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return 0;
+	}
+
+	/*
+	 * Add new subcontext to the TSG. Sequence for this is below:
+	 *   1. Disable TSG.
+	 *   2. Preempt TSG.
+	 *   3. Program subctx PDBs in instance blocks of all channels in
+	 *      the TSG.
+	 *   4. Enable TSG.
+	 * This sequence is executed acquiring TSG level lock ctx_init_lock.
+	 * to synchronize with channels from other subcontexts.
+	 * ctx_init_lock is reused here. It is originally there for
+	 * synchronizing the GR context initialization by various
+	 * channels in the TSG.
+	 */
+
+	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
+
+	g->ops.tsg.disable(tsg);
+	err = g->ops.fifo.preempt_tsg(g, tsg);
+	if (err != 0) {
+		g->ops.tsg.enable(tsg);
+		nvgpu_mutex_release(&tsg->ctx_init_lock);
+		nvgpu_err(g, "preempt failed %d", err);
+		return err;
+	}
+
+	nvgpu_rwsem_down_read(&tsg->ch_list_lock);
+
+	if (!nvgpu_test_bit(ch->subctx_id, tsg->valid_subctxs)) {
+		gv11b_tsg_add_new_subctx_channel_hw(ch, replayable);
+	} else {
+		gv11b_tsg_add_existing_subctx_channel_hw(ch, replayable);
+	}
+
+	nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+
+
+	g->ops.tsg.enable(tsg);
+	nvgpu_mutex_release(&tsg->ctx_init_lock);
+
+	nvgpu_log(g, gpu_dbg_fn, "done");
+
+	return 0;
+}
+
+void gv11b_tsg_remove_subctx_channel_hw(struct nvgpu_channel *ch)
+{
+	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
+	struct gk20a *g = tsg->g;
+	u32 subctx_id;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return;
+	}
+
+	subctx_id = nvgpu_tsg_subctx_get_id(ch->subctx);
+
+	gv11b_tsg_update_subctxs(tsg, subctx_id, NULL, false, false);
+}
diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga100.c b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
index 34bdb2071..01036cf87 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
@@ -1131,7 +1131,10 @@ static const struct gops_ramin ga100_ops_ramin = {
 	.set_gr_ptr = gv11b_ramin_set_gr_ptr,
 	.set_big_page_size = gm20b_ramin_set_big_page_size,
 	.init_pdb = ga10b_ramin_init_pdb,
+	.init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map,
+	.set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info,
 	.init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
+	.init_subctx_mask = gv11b_ramin_init_subctx_valid_mask,
 	.set_adr_limit = NULL,
 	.base_shift = gk20a_ramin_base_shift,
 	.alloc_size = gk20a_ramin_alloc_size,
@@ -1199,6 +1202,10 @@ static const struct gops_channel ga100_ops_channel = {
 static const struct gops_tsg ga100_ops_tsg = {
 	.enable = gv11b_tsg_enable,
 	.disable = nvgpu_tsg_disable,
+	.init_subctx_state = gv11b_tsg_init_subctx_state,
+	.deinit_subctx_state = gv11b_tsg_deinit_subctx_state,
+	.add_subctx_channel_hw = gv11b_tsg_add_subctx_channel_hw,
+	.remove_subctx_channel_hw = gv11b_tsg_remove_subctx_channel_hw,
 	.init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers,
 	.deinit_eng_method_buffers = gv11b_tsg_deinit_eng_method_buffers,
 	.bind_channel = NULL,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
index 2342f5edb..2fbd2c163 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
@@ -1145,7 +1145,10 @@ static const struct gops_ramin ga10b_ops_ramin = {
 	.set_gr_ptr = gv11b_ramin_set_gr_ptr,
 	.set_big_page_size = gm20b_ramin_set_big_page_size,
 	.init_pdb = ga10b_ramin_init_pdb,
+	.init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map,
+	.set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info,
 	.init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
+	.init_subctx_mask = gv11b_ramin_init_subctx_valid_mask,
 	.set_adr_limit = NULL,
 	.base_shift = gk20a_ramin_base_shift,
 	.alloc_size = gk20a_ramin_alloc_size,
@@ -1214,6 +1217,10 @@ static const struct gops_channel ga10b_ops_channel = {
 static const struct gops_tsg ga10b_ops_tsg = {
 	.enable = gv11b_tsg_enable,
 	.disable = nvgpu_tsg_disable,
+	.init_subctx_state = gv11b_tsg_init_subctx_state,
+	.deinit_subctx_state = gv11b_tsg_deinit_subctx_state,
+	.add_subctx_channel_hw = gv11b_tsg_add_subctx_channel_hw,
+	.remove_subctx_channel_hw = gv11b_tsg_remove_subctx_channel_hw,
 	.init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers,
 	.deinit_eng_method_buffers = gv11b_tsg_deinit_eng_method_buffers,
 	.bind_channel = NULL,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
index 35604bc97..f34b77cb9 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -803,7 +803,7 @@ static const struct gops_mm gm20b_ops_mm = {
 	.setup_hw = nvgpu_mm_setup_hw,
 	.is_bar1_supported = gm20b_mm_is_bar1_supported,
 	.init_inst_block = gk20a_mm_init_inst_block,
-	.init_inst_block_core = gk20a_mm_init_inst_block,
+	.init_inst_block_core = gk20a_mm_init_inst_block_core,
 	.get_default_va_sizes = gm20b_mm_get_default_va_sizes,
 #ifdef CONFIG_NVGPU_USERD
 	.bar1_map_userd = gk20a_mm_bar1_map_userd,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
index e691fad13..d575f30ca 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -969,7 +969,10 @@ static const struct gops_ramin gv11b_ops_ramin = {
 	.set_gr_ptr = gv11b_ramin_set_gr_ptr,
 	.set_big_page_size = gm20b_ramin_set_big_page_size,
 	.init_pdb = gv11b_ramin_init_pdb,
+	.init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map,
+	.set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info,
 	.init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
+	.init_subctx_mask = gv11b_ramin_init_subctx_valid_mask,
 	.set_adr_limit = NULL,
 	.base_shift = gk20a_ramin_base_shift,
 	.alloc_size = gk20a_ramin_alloc_size,
@@ -1013,7 +1016,7 @@ static const struct gops_userd gv11b_ops_userd = {
 static const struct gops_channel gv11b_ops_channel = {
 	.alloc_inst = nvgpu_channel_alloc_inst,
 	.free_inst = nvgpu_channel_free_inst,
-	.bind = gm20b_channel_bind,
+	.bind = gv11b_channel_bind,
 	.unbind = gv11b_channel_unbind,
 	.enable = gk20a_channel_enable,
 	.disable = gk20a_channel_disable,
@@ -1030,6 +1033,10 @@ static const struct gops_channel gv11b_ops_channel = {
 static const struct gops_tsg gv11b_ops_tsg = {
 	.enable = gv11b_tsg_enable,
 	.disable = nvgpu_tsg_disable,
+	.init_subctx_state = gv11b_tsg_init_subctx_state,
+	.deinit_subctx_state = gv11b_tsg_deinit_subctx_state,
+	.add_subctx_channel_hw = gv11b_tsg_add_subctx_channel_hw,
+	.remove_subctx_channel_hw = gv11b_tsg_remove_subctx_channel_hw,
 	.init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers,
 	.deinit_eng_method_buffers = gv11b_tsg_deinit_eng_method_buffers,
 	.bind_channel = NULL,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
index 74790477d..a22151849 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -1026,7 +1026,10 @@ static const struct gops_ramin tu104_ops_ramin = {
 	.set_gr_ptr = gv11b_ramin_set_gr_ptr,
 	.set_big_page_size = gm20b_ramin_set_big_page_size,
 	.init_pdb = gv11b_ramin_init_pdb,
+	.init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map,
+	.set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info,
 	.init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
+	.init_subctx_mask = gv11b_ramin_init_subctx_valid_mask,
 	.set_adr_limit = NULL,
 	.base_shift = gk20a_ramin_base_shift,
 	.alloc_size = gk20a_ramin_alloc_size,
@@ -1068,7 +1071,7 @@ static const struct gops_userd tu104_ops_userd = {
 static const struct gops_channel tu104_ops_channel = {
 	.alloc_inst = nvgpu_channel_alloc_inst,
 	.free_inst = nvgpu_channel_free_inst,
-	.bind = gm20b_channel_bind,
+	.bind = gv11b_channel_bind,
 	.unbind = gv11b_channel_unbind,
 	.enable = gk20a_channel_enable,
 	.disable = gk20a_channel_disable,
@@ -1085,6 +1088,10 @@ static const struct gops_channel tu104_ops_channel = {
 static const struct gops_tsg tu104_ops_tsg = {
 	.enable = gv11b_tsg_enable,
 	.disable = nvgpu_tsg_disable,
+	.init_subctx_state = gv11b_tsg_init_subctx_state,
+	.deinit_subctx_state = gv11b_tsg_deinit_subctx_state,
+	.add_subctx_channel_hw = gv11b_tsg_add_subctx_channel_hw,
+	.remove_subctx_channel_hw = gv11b_tsg_remove_subctx_channel_hw,
 	.init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers,
 	.deinit_eng_method_buffers = gv11b_tsg_deinit_eng_method_buffers,
 	.bind_channel = NULL,
diff --git a/drivers/gpu/nvgpu/hal/mm/mm_gk20a.c b/drivers/gpu/nvgpu/hal/mm/mm_gk20a.c
index f404b2fee..fc7917cf5 100644
--- a/drivers/gpu/nvgpu/hal/mm/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/mm/mm_gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -44,6 +44,14 @@ void gk20a_mm_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 	}
 }
 
+int gk20a_mm_init_inst_block_core(struct nvgpu_mem *inst_block,
+		struct vm_gk20a *vm, u32 big_page_size)
+{
+	gk20a_mm_init_inst_block(inst_block, vm, big_page_size);
+
+	return 0;
+}
+
 #ifdef CONFIG_NVGPU_USERD
 u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
 {
diff --git a/drivers/gpu/nvgpu/hal/mm/mm_gk20a.h b/drivers/gpu/nvgpu/hal/mm/mm_gk20a.h
index b74049969..8bed6444a 100644
--- a/drivers/gpu/nvgpu/hal/mm/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/mm/mm_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -30,6 +30,8 @@ struct vm_gk20a;
 
 void gk20a_mm_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 			      u32 big_page_size);
+int gk20a_mm_init_inst_block_core(struct nvgpu_mem *inst_block,
+		struct vm_gk20a *vm, u32 big_page_size);
 u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset);
 
 #endif
diff --git a/drivers/gpu/nvgpu/hal/mm/mm_gp10b_fusa.c b/drivers/gpu/nvgpu/hal/mm/mm_gp10b_fusa.c
index 53a396596..94a617abe 100644
--- a/drivers/gpu/nvgpu/hal/mm/mm_gp10b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/mm/mm_gp10b_fusa.c
@@ -51,7 +51,11 @@ int gp10b_mm_init_bar2_vm(struct gk20a *g)
 		goto clean_up_va;
 	}
 
-	g->ops.mm.init_inst_block_core(inst_block, mm->bar2.vm, big_page_size);
+	err = g->ops.mm.init_inst_block_core(inst_block, mm->bar2.vm, big_page_size);
+	if (err != 0) {
+		nvgpu_free_inst_block(g, inst_block);
+		goto clean_up_va;
+	}
 
 	return 0;
 
diff --git a/drivers/gpu/nvgpu/hal/mm/mm_gv11b.h b/drivers/gpu/nvgpu/hal/mm/mm_gv11b.h
index 754335887..f0e0f7157 100644
--- a/drivers/gpu/nvgpu/hal/mm/mm_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/mm/mm_gv11b.h
@@ -30,7 +30,7 @@ struct vm_gk20a;
 
 void gv11b_mm_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 			      u32 big_page_size);
-void gv11b_mm_init_inst_block_core(struct nvgpu_mem *inst_block,
+int gv11b_mm_init_inst_block_core(struct nvgpu_mem *inst_block,
 				   struct vm_gk20a *vm,
 				   u32 big_page_size);
 bool gv11b_mm_is_bar1_supported(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/hal/mm/mm_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/mm/mm_gv11b_fusa.c
index edf501959..2e8244c38 100644
--- a/drivers/gpu/nvgpu/hal/mm/mm_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/mm/mm_gv11b_fusa.c
@@ -21,6 +21,7 @@
  */
 
 #include <nvgpu/gk20a.h>
+#include <nvgpu/fifo.h>
 #include <nvgpu/gmmu.h>
 #include <nvgpu/mm.h>
 
@@ -42,14 +43,48 @@ void gv11b_mm_init_inst_block(struct nvgpu_mem *inst_block,
 	}
 }
 
-void gv11b_mm_init_inst_block_core(struct nvgpu_mem *inst_block,
+int gv11b_mm_init_inst_block_core(struct nvgpu_mem *inst_block,
 		struct vm_gk20a *vm, u32 big_page_size)
 {
 	struct gk20a *g = gk20a_from_vm(vm);
+	u32 max_subctx_count = g->ops.gr.init.get_max_subctx_count();
+	unsigned long *valid_subctxs;
+	u32 *subctx_pdb_map;
+
+	subctx_pdb_map = nvgpu_kzalloc(g, max_subctx_count * sizeof(u32) * 4U);
+	if (subctx_pdb_map == NULL) {
+		nvgpu_err(g, "subctx_pdb_map alloc failed");
+		return -ENOMEM;
+	}
+
+	valid_subctxs = nvgpu_kzalloc(g,
+				BITS_TO_LONGS(max_subctx_count) *
+				sizeof(unsigned long));
+	if (valid_subctxs == NULL) {
+		nvgpu_err(g, "valid_subctxs bitmask alloc failed");
+		nvgpu_kfree(g, subctx_pdb_map);
+		return -ENOMEM;
+	}
 
 	gv11b_mm_init_inst_block(inst_block, vm, big_page_size);
 
-	g->ops.ramin.init_subctx_pdb(g, inst_block, vm->pdb.mem, false, 1U);
+	/* Program subctx pdb info in the instance block */
+	g->ops.ramin.init_subctx_pdb_map(g, subctx_pdb_map);
+	g->ops.ramin.set_subctx_pdb_info(g, CHANNEL_INFO_VEID0, vm->pdb.mem,
+					 false, true, subctx_pdb_map);
+	g->ops.ramin.init_subctx_pdb(g, inst_block, subctx_pdb_map);
+
+	/*
+	 * Program subctx pdb valid mask in the instance block.
+	 * Only subctx 0 is valid here.
+	 */
+	nvgpu_set_bit(CHANNEL_INFO_VEID0, valid_subctxs);
+	g->ops.ramin.init_subctx_mask(g, inst_block, valid_subctxs);
+
+	nvgpu_kfree(g, valid_subctxs);
+	nvgpu_kfree(g, subctx_pdb_map);
+
+	return 0;
 }
 
 bool gv11b_mm_is_bar1_supported(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c
index dc3244ca5..38c608e5d 100644
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c
@@ -693,7 +693,10 @@ static const struct gops_ramin vgpu_ga10b_ops_ramin = {
 	.set_gr_ptr = NULL,
 	.set_big_page_size = gm20b_ramin_set_big_page_size,
 	.init_pdb = gv11b_ramin_init_pdb,
+	.init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map,
+	.set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info,
 	.init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
+	.init_subctx_mask = gv11b_ramin_init_subctx_valid_mask,
 	.set_adr_limit = NULL,
 	.base_shift = gk20a_ramin_base_shift,
 	.alloc_size = gk20a_ramin_alloc_size,
@@ -744,6 +747,10 @@ static const struct gops_channel vgpu_ga10b_ops_channel = {
 static const struct gops_tsg vgpu_ga10b_ops_tsg = {
 	.open = vgpu_tsg_open,
 	.release = vgpu_tsg_release,
+	.init_subctx_state = NULL,
+	.deinit_subctx_state = NULL,
+	.add_subctx_channel_hw = NULL,
+	.remove_subctx_channel_hw = NULL,
 	.init_eng_method_buffers = NULL,
 	.deinit_eng_method_buffers = NULL,
 	.enable = gv11b_tsg_enable,
diff --git a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
index 770f5e7e6..c68c788b9 100644
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
@@ -660,7 +660,10 @@ static const struct gops_ramin vgpu_gv11b_ops_ramin = {
 	.set_gr_ptr = NULL,
 	.set_big_page_size = gm20b_ramin_set_big_page_size,
 	.init_pdb = gv11b_ramin_init_pdb,
+	.init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map,
+	.set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info,
 	.init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
+	.init_subctx_mask = gv11b_ramin_init_subctx_valid_mask,
 	.set_adr_limit = NULL,
 	.base_shift = gk20a_ramin_base_shift,
 	.alloc_size = gk20a_ramin_alloc_size,
@@ -712,6 +715,10 @@ static const struct gops_channel vgpu_gv11b_ops_channel = {
 static const struct gops_tsg vgpu_gv11b_ops_tsg = {
 	.open = vgpu_tsg_open,
 	.release = vgpu_tsg_release,
+	.init_subctx_state = NULL,
+	.deinit_subctx_state = NULL,
+	.add_subctx_channel_hw = NULL,
+	.remove_subctx_channel_hw = NULL,
 	.init_eng_method_buffers = NULL,
 	.deinit_eng_method_buffers = NULL,
 	.enable = gv11b_tsg_enable,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index 573a2a5a8..1ce5484fb 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -501,6 +501,11 @@ struct nvgpu_channel {
 	/** Runlist the channel will run on. */
 	struct nvgpu_runlist *runlist;
 
+	/**
+	 * Replayable fault state for the channel.
+	 */
+	bool replayable;
+
 	/**
 	 * Recovery path can be entered twice for the same error in
 	 * case of mmu_nack. This flag indicates if we already recovered
@@ -960,7 +965,7 @@ struct nvgpu_channel *nvgpu_channel_open_new(struct gk20a *g,
 		pid_t pid, pid_t tid);
 
 /**
- * @brief Setup and bind the channel
+ * @brief Setup and bind the channel and add subcontext PDB.
  *
  * @param ch [in]	Channel pointer.
  * @param args [in]	Setup bind arguments.
@@ -975,6 +980,7 @@ struct nvgpu_channel *nvgpu_channel_open_new(struct gk20a *g,
  * provided in args. A submit token is passed back to be written in the
  * doorbell register in the usermode region to notify the GPU for new
  * work on this channel.
+ * Update the instance blocks of all channels to add the subctx pdb.
  *
  * @note An address space needs to have been bound to the channel before
  *       calling this function.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/mm.h b/drivers/gpu/nvgpu/include/nvgpu/gops/mm.h
index 60fb44d96..a13c67bdb 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/mm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/mm.h
@@ -566,7 +566,7 @@ struct gops_mm {
 	 * - Configures the pdb base, big page size and
 	 *   0th sub context's pdb base in context's instance block memory.
 	 */
-	void (*init_inst_block_core)(struct nvgpu_mem *inst_block,
+	int (*init_inst_block_core)(struct nvgpu_mem *inst_block,
 			struct vm_gk20a *vm, u32 big_page_size);
 
 	/**
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/ramin.h b/drivers/gpu/nvgpu/include/nvgpu/gops/ramin.h
index 2fd5d017a..1e3c54678 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/ramin.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/ramin.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -90,34 +90,78 @@ struct gops_ramin {
 	void (*init_pdb)(struct gk20a *g, struct nvgpu_mem *inst_block,
 			u64 pdb_addr, struct nvgpu_mem *pdb_mem);
 
+	/**
+	 * @brief Init subcontext pdb map for a TSG.
+	 *
+	 * @param g [in]		Pointer to GPU driver struct.
+	 * @param subctx_pdb_map [in]	Memory pointing to pdb map for a TSG.
+	 *
+	 * This HAL configures PDB for all subcontexts of an instance block.
+	 * It sets all PDBs invalid.
+	 */
+	void (*init_subctx_pdb_map)(struct gk20a *g,
+			u32 *subctx_pdb_map);
+
+	/**
+	 * @brief Update subcontext pdb map for subcontext addition/removal.
+	 *
+	 * @param g [in]		Pointer to GPU driver struct.
+	 * @param subctx_id [in]	Subcontext ID.
+	 * @param pdb_mem [in]		Memory descriptor of PDB.
+	 * @param replayable [in]	Indicates if errors are replayable
+	 *				for this Subcontext.
+	 * @param add [in]		Indicate if subcontext PDB is to be
+	 *				added or removed.
+	 * @param subctx_pdb_map [in]	Memory pointing to pdb map for a TSG.
+	 *
+	 * This HAL configures PDB for sub-context of Instance Block:
+	 * If adding a subcontext PDB:
+	 * - Get aperture mask from \a pdb_mem.
+	 * - Get physical address of \a pdb_mem.
+	 * - Build PDB entry with defaults for PT version, big page size,
+	 *   volatile attribute, and above aperture.
+	 * - If \a replayable is true, set replayable attribute for TEX
+	 *   and GCC faults.
+	 * - Set lo and hi 32-bits to point to \a pdb_mem.
+	 * - Program related entry in \a subctx_pdb_map.
+	 * If removing a subcontext PDB:
+	 * - Set aperture as ram_in_sc_page_dir_base_target_invalid_v().
+	 * - Program related entry in \a subctx_pdb_map.
+	 */
+	void (*set_subctx_pdb_info)(struct gk20a *g,
+		u32 subctx_id, struct nvgpu_mem *pdb_mem,
+		bool replayable, bool add, u32 *subctx_pdb_map);
+
 	/**
 	 * @brief Init PDB for sub-contexts.
 	 *
 	 * @param g [in]		Pointer to GPU driver struct.
 	 * @param inst_block [in]	Memory descriptor of Instance Block.
-	 * @param pdb_mem [in]		Memory descriptor of PDB.
-	 * @param replayable [in]	Indicates if errors are replayable
-	 * 				for this Instance Block.
-	 * @param max_subctx_count [in] Max number of sub context.
+	 * @param subctx_pdb_map [in]	Memory pointing to pdb map for a TSG.
 	 *
-	 * This HAL configures PDB for all sub-contexts of Instance Block:
-	 * - Get max number of sub-contexts from HW.
-	 * - Get aperture mask from \a pdb_mem.
-	 * - Get physical address of \a pdb_mem.
-	 * - For each sub-context:
-	 *    - Build PDB entry with defaults for PT version, big page size,
-	 *      volatile attribute, and above aperture.
-	 *    - If \a replayable is true, set replayable attribute for TEX
-	 *      and GCC faults.
-	 *    - Set lo and hi 32-bits to point to \a pdb_mem.
-	 *    - Program related entry in Instance Block.
-	 *
-	 * @see NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE
+	 * This HAL configures PDB for all sub-contexts of Instance Block.
+	 * It copies \a subctx_pdb_map to the offset
+	 * ram_in_sc_page_dir_base_vol_w(0) * 4U in
+	 * the instance block.
 	 */
 	void (*init_subctx_pdb)(struct gk20a *g,
-			struct nvgpu_mem *inst_block,
-			struct nvgpu_mem *pdb_mem,
-			bool replayable, u32 max_subctx_count);
+		struct nvgpu_mem *inst_block, u32 *subctx_pdb_map);
+
+	/**
+	 * @brief Set valid subcontexts masks.
+	 *
+	 * @param g [in]			Pointer to GPU driver struct.
+	 * @param inst_block [in]		Memory descriptor of Instance
+	 *					Block.
+	 * @param valid_subctx_mask [in]	Max number of sub context.
+	 *
+	 * This HAL configures mask for all sub-contexts of Instance Block:
+	 * - Get max number of sub-contexts from HW.
+	 * - For each set of 32 subcontexts, set the mask from
+	 *   \a valid_subctx_mask in ram_in_sc_pdb_valid_long_w().
+	 */
+	void (*init_subctx_mask)(struct gk20a *g,
+		struct nvgpu_mem *inst_block, unsigned long *valid_subctx_mask);
 
 	/**
 	 * @brief Instance Block shift.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h
index aff7a4eb2..b99babb35 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -55,6 +55,11 @@ struct gops_tsg {
 /** @cond DOXYGEN_SHOULD_SKIP_THIS */
 	int (*open)(struct nvgpu_tsg *tsg);
 	void (*release)(struct nvgpu_tsg *tsg);
+	int (*init_subctx_state)(struct gk20a *g, struct nvgpu_tsg *tsg);
+	void (*deinit_subctx_state)(struct gk20a *g, struct nvgpu_tsg *tsg);
+	int (*add_subctx_channel_hw)(struct nvgpu_channel *ch,
+				      bool replayable);
+	void (*remove_subctx_channel_hw)(struct nvgpu_channel *ch);
 	int (*init_eng_method_buffers)(struct gk20a *g,
 			struct nvgpu_tsg *tsg);
 	void (*deinit_eng_method_buffers)(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
index 9b6280df1..cd92c8f27 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -117,6 +117,10 @@ struct nvgpu_tsg {
 	 */
 	struct nvgpu_ref refcount;
 
+	u32 *subctx_pdb_map;
+
+	unsigned long *valid_subctxs;
+
 	/**
 	 * List of subcontexts (#nvgpu_tsg_subctx) bound to this TSG.
 	 * Accessed by holding #ch_list_lock from TSG.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h b/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h
index 73cfd444e..4ce1f7826 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h
@@ -60,6 +60,8 @@ int nvgpu_tsg_subctx_bind_channel(struct nvgpu_tsg *tsg,
  * - Validate that #subctx is allocated for the channel #ch.
  * - Remove the channel from the subctx #ch_list.
  * - If the subctx #ch_list is empty
+ *   - Update the instance blocks of all channels to remove the
+ *     subctx pdb.
  *   - Invoke g->ops.gr.setup.free_subctx to free the GR subcontext
  *     struct (and GR subcontext mappings struct).
  *   - Remove the subctx from the TSG #subctx_list.
@@ -120,6 +122,35 @@ struct nvgpu_gr_subctx *nvgpu_tsg_subctx_get_gr_subctx(
  */
 u32 nvgpu_tsg_subctx_get_id(struct nvgpu_tsg_subctx *tsg_subctx);
 
+/**
+ * @brief Set replayable state for a TSG subcontext.
+ *
+ * @param subctx [in]		Pointer to TSG subcontext.
+ * @param Replayable [in]	replayable state for the subcontext.
+ *
+ * - Set #replayable in #nvgpu_tsg_subctx.
+ */
+void nvgpu_tsg_subctx_set_replayable(struct nvgpu_tsg_subctx *subctx,
+				     bool replayable);
+
+/**
+ * @brief Get replayable state for a TSG subcontext.
+ *
+ * @param subctx [in]		Pointer to TSG subcontext.
+ *
+ * - Return #replayable from #nvgpu_tsg_subctx.
+ */
+bool nvgpu_tsg_subctx_get_replayable(struct nvgpu_tsg_subctx *subctx);
+
+/**
+ * @brief Get VM for a TSG subcontext.
+ *
+ * @param subctx [in]		Pointer to TSG subcontext.
+ *
+ * - Return #vm from #nvgpu_tsg_subctx.
+ */
+struct vm_gk20a *nvgpu_tsg_subctx_get_vm(struct nvgpu_tsg_subctx *subctx);
+
 /**
  * @brief Allocate or get the mappings struct for the TSG subcontext.
  *
diff --git a/userspace/units/acr/nvgpu-acr.c b/userspace/units/acr/nvgpu-acr.c
index 27ea3f0fa..8532f9748 100644
--- a/userspace/units/acr/nvgpu-acr.c
+++ b/userspace/units/acr/nvgpu-acr.c
@@ -826,24 +826,24 @@ int test_acr_prepare_ucode_blob(struct unit_module *m,
 
 	nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
 
-	nvgpu_posix_enable_fault_injection(kmem_fi, true, 17);
+	nvgpu_posix_enable_fault_injection(kmem_fi, true, 19);
 
-	unit_info(m, " kmem counter 17\n");
+	unit_info(m, " kmem counter 19\n");
 	err = g->acr->prepare_ucode_blob(g);
 
 	if (err != -ENOENT) {
-		unit_return_fail(m, "kmem count 17 test did not fail as expected\n");
+		unit_return_fail(m, "kmem count 19 test did not fail as expected\n");
 	}
 
 	/*
-	 * the kmem counter is decreased after 17th count
+	 * the kmem counter is decreased after 19th count
 	 * because in the first attempt new memory is allocated and mapped for
 	 * page directories but after that since memory is already allocated it
 	 * is just mapped. Thus, number of kmallocs decrease.
 	 */
 	nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
 
-	for (i = 9; i < 17; i++) {
+	for (i = 9; i < 19; i++) {
 		unit_info(m, "kmem counter %d\n", i);
 		nvgpu_posix_enable_fault_injection(kmem_fi, true, i);
 		err = g->acr->prepare_ucode_blob(g);
diff --git a/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c b/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c
index 8b4631f43..096fb5dee 100644
--- a/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c
+++ b/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -69,6 +69,12 @@ struct unit_ctx {
 	size_t size;
 };
 
+static int stub_add_subctx_channel_hw(struct nvgpu_channel *ch,
+				      bool replayable)
+{
+	return 0;
+}
+
 int test_gv11b_channel_unbind(struct unit_module *m,
 		struct gk20a *g, void *args)
 {
@@ -77,6 +83,8 @@ int test_gv11b_channel_unbind(struct unit_module *m,
 	struct nvgpu_channel *ch;
 	int ret = UNIT_FAIL;
 
+	g->ops.tsg.add_subctx_channel_hw = stub_add_subctx_channel_hw;
+
 	ch = nvgpu_channel_open_new(g, runlist_id,
 		privileged, getpid(), getpid());
 	unit_assert(ch, goto done);
diff --git a/userspace/units/fifo/channel/nvgpu-channel.c b/userspace/units/fifo/channel/nvgpu-channel.c
index 3d390f88a..85729ff42 100644
--- a/userspace/units/fifo/channel/nvgpu-channel.c
+++ b/userspace/units/fifo/channel/nvgpu-channel.c
@@ -1562,6 +1562,12 @@ static void stub_channel_work_completion_cancel_sync(struct nvgpu_channel *ch)
 }
 #endif
 
+static int stub_add_subctx_channel_hw(struct nvgpu_channel *ch,
+				      bool replayable)
+{
+	return 0;
+}
+
 int test_channel_suspend_resume_serviceable_chs(struct unit_module *m,
 						struct gk20a *g, void *vargs)
 {
@@ -1594,6 +1600,7 @@ int test_channel_suspend_resume_serviceable_chs(struct unit_module *m,
 	g->ops.fifo.preempt_tsg = stub_fifo_preempt_tsg;
 	g->ops.fifo.preempt_channel = stub_fifo_preempt_channel;
 	g->ops.runlist.reload = stub_runlist_reload;
+	g->ops.tsg.add_subctx_channel_hw = stub_add_subctx_channel_hw;
 	orig_ch_tsgid = ch->tsgid;
 
 	for (branches = 0U; branches < F_CHANNEL_SUSPEND_RESUME_CHS_LAST;
diff --git a/userspace/units/fifo/ramfc/gv11b/nvgpu-ramfc-gv11b.c b/userspace/units/fifo/ramfc/gv11b/nvgpu-ramfc-gv11b.c
index bae3ab33d..3168f6d1c 100644
--- a/userspace/units/fifo/ramfc/gv11b/nvgpu-ramfc-gv11b.c
+++ b/userspace/units/fifo/ramfc/gv11b/nvgpu-ramfc-gv11b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -131,13 +131,6 @@ static int stub_ramfc_commit_userd(struct nvgpu_channel *ch)
 	return 0;
 }
 
-static void stub_ramin_init_subctx_pdb(struct gk20a *g,
-			struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
-			bool replayable, u32 max_subctx_count)
-{
-	global_count++;
-}
-
 #define F_RAMFC_SETUP_PRIVILEDGED_CH			BIT(0)
 #define F_RAMFC_SETUP_LAST				BIT(1)
 
@@ -156,7 +149,6 @@ int test_gv11b_ramfc_setup(struct unit_module *m, struct gk20a *g, void *args)
 
 	g->ops.ramin.alloc_size = gk20a_ramin_alloc_size;
 	g->ops.pbdma.acquire_val = stub_pbdma_acquire_val;
-	g->ops.ramin.init_subctx_pdb = stub_ramin_init_subctx_pdb;
 	g->ops.pbdma.get_gp_base = stub_pbdma_get_gp_base;
 	g->ops.pbdma.get_gp_base_hi = stub_pbdma_get_gp_base_hi;
 	g->ops.pbdma.get_signature = stub_pbdma_get_signature;
@@ -199,9 +191,9 @@ int test_gv11b_ramfc_setup(struct unit_module *m, struct gk20a *g, void *args)
 				ram_fc_config_w()) == 5U, goto done);
 
 		if (branches & F_RAMFC_SETUP_PRIVILEDGED_CH) {
-			unit_assert(global_count == 15U, goto done);
+			unit_assert(global_count == 14U, goto done);
 		} else {
-			unit_assert(global_count == 13U, goto done);
+			unit_assert(global_count == 12U, goto done);
 		}
 	}
 
diff --git a/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.c b/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.c
index a22a0b183..d464d1fd1 100644
--- a/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.c
+++ b/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -109,6 +109,24 @@ int test_gv11b_ramin_init_subctx_pdb(struct unit_module *m, struct gk20a *g,
 	u64 pdb_addr;
 	u32 max_subctx_count = ram_in_sc_page_dir_base_target__size_1_v();
 	u32 aperture = ram_in_sc_page_dir_base_target_sys_mem_ncoh_v();
+	unsigned long *valid_subctxs;
+	u32 *subctx_pdb_map;
+
+	subctx_pdb_map = nvgpu_kzalloc(g, max_subctx_count * sizeof(u32) * 4U);
+	if (subctx_pdb_map == NULL) {
+		nvgpu_err(g, "subctx_pdb_map alloc failed");
+		return UNIT_FAIL;
+	}
+
+	valid_subctxs = nvgpu_kzalloc(g,
+				BITS_TO_LONGS(max_subctx_count) *
+				sizeof(unsigned long));
+	if (valid_subctxs == NULL) {
+		nvgpu_err(g, "valid_subctxs bitmap alloc failed");
+		nvgpu_kfree(g, subctx_pdb_map);
+		subctx_pdb_map = NULL;
+		return UNIT_FAIL;
+	}
 
 	g->ops.ramin.alloc_size = gk20a_ramin_alloc_size;
 
@@ -146,8 +164,21 @@ int test_gv11b_ramin_init_subctx_pdb(struct unit_module *m, struct gk20a *g,
 					1U, 0U);
 		}
 
-		gv11b_ramin_init_subctx_pdb(g, &inst_block, &pdb_mem,
-								replayable, 64);
+		g->ops.ramin.init_subctx_pdb_map(g, subctx_pdb_map);
+		for (subctx_id = 0; subctx_id < max_subctx_count; subctx_id++) {
+			g->ops.ramin.set_subctx_pdb_info(g, subctx_id,
+				&pdb_mem, replayable, true, subctx_pdb_map);
+			nvgpu_set_bit(subctx_id, valid_subctxs);
+		}
+
+		/* Program subctx pdb info in the instance block */
+		g->ops.ramin.init_subctx_pdb(g, &inst_block, subctx_pdb_map);
+
+		/*
+		 * Program subctx pdb valid mask in the instance block.
+		 * Only subctx 0 is valid here.
+		 */
+		g->ops.ramin.init_subctx_mask(g, &inst_block, valid_subctxs);
 
 		for (subctx_id = 0; subctx_id < max_subctx_count; subctx_id++) {
 			addr_lo = ram_in_sc_page_dir_base_vol_w(subctx_id);
diff --git a/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.h b/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.h
index 65e96e0c6..def991470 100644
--- a/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.h
+++ b/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -63,7 +63,7 @@ int test_gv11b_ramin_set_gr_ptr(struct unit_module *m, struct gk20a *g,
  * Test Type: Feature
  *
  * Targets: gops_ramin.init_subctx_pdb, gv11b_ramin_init_subctx_pdb,
- *          gv11b_subctx_commit_pdb, gv11b_subctx_commit_valid_mask
+ *          gops_ramin.init_subctx_mask, gv11b_ramin_init_subctx_valid_mask
  *
  * Input: None
  *