gpu: nvgpu: subcontext add/remove support

Subcontext PDBs and valid mask in the instance blocks of the channels in various subcontexts has to be updated when new subcontext is created or a subcontext is removed. Replayable fault state is cached in the channel structure. Replayable fault state for subcontext is set based on first channel's bind parameter. It was earlier programmed in function channel_setup_ramfc. init_inst_block_core is updated to setup TSG level pdb map and mask. Added new hal gv11b_channel_bind to enable the subcontext on channel bind. Bug 3677982 Change-Id: I58156c5b3ab6309b6a4b8e72b0e798d6a39c1bee Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2719994 Reviewed-by: Ankur Kishore <ankkishore@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2022-04-24 22:37:03 +05:30
parent 269e853fc5
commit 693305c0fd
41 changed files with 739 additions and 171 deletions
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -1005,7 +1005,6 @@ static void channel_free(struct nvgpu_channel *ch, bool force)
 	nvgpu_cic_rm_wait_for_deferred_interrupts(g);
 unbind:
 	g->ops.channel.unbind(ch);
 	g->ops.channel.free_inst(g, ch);
 	nvgpu_channel_wdt_destroy(ch->wdt);
@@ -1520,6 +1519,14 @@ int nvgpu_channel_setup_bind(struct nvgpu_channel *c,
 	}
 #endif
 	c->replayable = false;
 #ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
 	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) {
 		c->replayable = true;
 	}
 #endif
 	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT) != 0U) {
 		err = nvgpu_channel_setup_usermode(c, args);
 	} else {
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -38,6 +38,7 @@
 #include <nvgpu/nvs.h>
 #include <nvgpu/static_analysis.h>
 #include <nvgpu/nvgpu_init.h>
 #include <nvgpu/kmem.h>
 #ifdef CONFIG_NVGPU_PROFILER
 #include <nvgpu/profiler.h>
 #endif
@@ -292,12 +293,7 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg,
 	}
 #endif
-	/**
+	g->ops.channel.unbind(ch);
 	 * Remove channel from TSG and re-enable rest of the channels.
 	 * Since channel removal can lead to subctx removal and/or
 	 * VM mappings removal, acquire ctx_init_lock.
 	 */
 	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
 	nvgpu_rwsem_down_write(&tsg->ch_list_lock);
 	nvgpu_tsg_subctx_unbind_channel(tsg, ch);
@@ -311,8 +307,6 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg,
 	g->ops.channel.disable(ch);
 	nvgpu_rwsem_up_write(&tsg->ch_list_lock);
 	nvgpu_mutex_release(&tsg->ctx_init_lock);
 	/*
 	 * Don't re-enable all channels if TSG has timed out already
 	 *
@@ -345,8 +339,16 @@ int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch,
 	nvgpu_log_fn(g, "unbind tsg:%u ch:%u\n", tsg->tsgid, ch->chid);
 	/**
 	 * Remove channel from TSG and re-enable rest of the channels.
 	 * Since channel removal can lead to subctx removal and/or
 	 * VM mappings removal, acquire ctx_init_lock.
 	 */
 	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
 	err = nvgpu_tsg_unbind_channel_common(tsg, ch);
 	if (!force && err == -EAGAIN) {
 		nvgpu_mutex_release(&tsg->ctx_init_lock);
 		return err;
 	}
@@ -368,6 +370,8 @@ int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch,
 		}
 	}
 	nvgpu_mutex_release(&tsg->ctx_init_lock);
 	nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release);
 	return err;
@@ -413,7 +417,7 @@ fail_common:
 	}
 #endif
-	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
+	g->ops.channel.unbind(ch);
 	nvgpu_rwsem_down_write(&tsg->ch_list_lock);
 	nvgpu_tsg_subctx_unbind_channel(tsg, ch);
@@ -907,6 +911,15 @@ int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid)
 		goto clean_up;
 	}
 	if (g->ops.tsg.init_subctx_state != NULL) {
 		err = g->ops.tsg.init_subctx_state(g, tsg);
 		if (err != 0) {
 			nvgpu_err(g, "tsg %d subctx state init failed %d",
 				  tsg->tsgid, err);
 			goto clean_up;
 		}
 	}
 #ifdef CONFIG_NVGPU_SM_DIVERSITY
 	nvgpu_gr_ctx_set_sm_diversity_config(tsg->gr_ctx,
 		NVGPU_INVALID_SM_CONFIG_ID);
@@ -970,6 +983,10 @@ void nvgpu_tsg_release_common(struct gk20a *g, struct nvgpu_tsg *tsg)
 	nvgpu_free_gr_ctx_struct(g, tsg->gr_ctx);
 	tsg->gr_ctx = NULL;
 	if (g->ops.tsg.deinit_subctx_state != NULL) {
 		g->ops.tsg.deinit_subctx_state(g, tsg);
 	}
 	if (g->ops.tsg.deinit_eng_method_buffers != NULL) {
 		g->ops.tsg.deinit_eng_method_buffers(g, tsg);
 	}
--- a/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c
@@ -120,6 +120,10 @@ void nvgpu_tsg_subctx_unbind_channel(struct nvgpu_tsg *tsg,
 	nvgpu_list_del(&ch->subctx_entry);
 	if (nvgpu_list_empty(&subctx->ch_list)) {
 		if (g->ops.tsg.remove_subctx_channel_hw != NULL) {
 			g->ops.tsg.remove_subctx_channel_hw(ch);
 		}
 		if (g->ops.gr.setup.free_subctx != NULL) {
 			g->ops.gr.setup.free_subctx(ch);
 			subctx->gr_subctx = NULL;
@@ -203,6 +207,22 @@ u32 nvgpu_tsg_subctx_get_id(struct nvgpu_tsg_subctx *subctx)
 	return subctx->subctx_id;
 }
 void nvgpu_tsg_subctx_set_replayable(struct nvgpu_tsg_subctx *subctx,
 				     bool replayable)
 {
 	subctx->replayable = replayable;
 }
 bool nvgpu_tsg_subctx_get_replayable(struct nvgpu_tsg_subctx *subctx)
 {
 	return subctx->replayable;
 }
 struct vm_gk20a *nvgpu_tsg_subctx_get_vm(struct nvgpu_tsg_subctx *subctx)
 {
 	return subctx->vm;
 }
 struct nvgpu_gr_ctx_mappings *nvgpu_tsg_subctx_alloc_or_get_mappings(
 				struct gk20a *g,
 				struct nvgpu_tsg *tsg,
--- a/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h
+++ b/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h
@@ -44,6 +44,9 @@ struct nvgpu_tsg_subctx {
 	/** Subcontext's GR ctx header and GR ctx buffers mappings. */
 	struct nvgpu_gr_subctx *gr_subctx;
 	/** Replayable faults state for a subcontext. */
 	bool replayable;
 	/**
 	 * Subcontext's entry in TSG's (#nvgpu_tsg) subcontexts list
 	 * #subctx_list.
--- a/drivers/gpu/nvgpu/common/gr/gr_falcon.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_falcon.c
@@ -227,7 +227,11 @@ static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g,
 		return err;
 	}
-	g->ops.mm.init_inst_block_core(&ucode_info->inst_blk_desc, vm, 0);
+	err = g->ops.mm.init_inst_block_core(&ucode_info->inst_blk_desc, vm, 0);
 	if (err != 0) {
 		nvgpu_free_inst_block(g, &ucode_info->inst_blk_desc);
 		return err;
 	}
 	/* Map ucode surface to GMMU */
 	ucode_info->surface_desc.gpu_va = nvgpu_gmmu_map(vm,
--- a/drivers/gpu/nvgpu/common/mm/mm.c
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -228,7 +228,12 @@ static int nvgpu_init_system_vm(struct mm_gk20a *mm)
 	if (err != 0) {
 		goto clean_up_vm;
 	}
-	g->ops.mm.init_inst_block_core(inst_block, mm->pmu.vm, big_page_size);
+
 	err = g->ops.mm.init_inst_block_core(inst_block, mm->pmu.vm, big_page_size);
 	if (err != 0) {
 		nvgpu_free_inst_block(g, inst_block);
 		goto clean_up_vm;
 	}
 	return 0;
@@ -247,7 +252,12 @@ static int nvgpu_init_hwpm(struct mm_gk20a *mm)
 	if (err != 0) {
 		return err;
 	}
-	g->ops.mm.init_inst_block_core(inst_block, mm->pmu.vm, 0);
+
 	err = g->ops.mm.init_inst_block_core(inst_block, mm->pmu.vm, 0);
 	if (err != 0) {
 		nvgpu_free_inst_block(g, inst_block);
 		return err;
 	}
 	return 0;
 }
@@ -366,7 +376,12 @@ static int nvgpu_init_bar1_vm(struct mm_gk20a *mm)
 	if (err != 0) {
 		goto clean_up_vm;
 	}
-	g->ops.mm.init_inst_block_core(inst_block, mm->bar1.vm, big_page_size);
+
 	err = g->ops.mm.init_inst_block_core(inst_block, mm->bar1.vm, big_page_size);
 	if (err != 0) {
 		nvgpu_free_inst_block(g, inst_block);
 		goto clean_up_vm;
 	}
 	return 0;
@@ -401,7 +416,11 @@ static int nvgpu_init_engine_ucode_vm(struct gk20a *g,
 		goto clean_up_va;
 	}
-	g->ops.mm.init_inst_block_core(inst_block, ucode->vm, big_page_size);
+	err = g->ops.mm.init_inst_block_core(inst_block, ucode->vm, big_page_size);
 	if (err != 0) {
 		nvgpu_free_inst_block(g, inst_block);
 		goto clean_up_va;
 	}
 	return 0;
--- a/drivers/gpu/nvgpu/common/perf/perfbuf.c
+++ b/drivers/gpu/nvgpu/common/perf/perfbuf.c
@@ -72,7 +72,12 @@ int nvgpu_perfbuf_init_inst_block(struct gk20a *g)
 		return err;
 	}
-	g->ops.mm.init_inst_block_core(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);
+	err = g->ops.mm.init_inst_block_core(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);
 	if (err != 0) {
 		nvgpu_free_inst_block(g, &mm->perfbuf.inst_block);
 		return err;
 	}
 	g->ops.perf.init_inst_block(g, &mm->perfbuf.inst_block);
 	return 0;
--- a/drivers/gpu/nvgpu/hal/fifo/channel_ga10b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/channel_ga10b_fusa.c
@@ -82,9 +82,19 @@ void ga10b_channel_bind(struct nvgpu_channel *ch)
 {
 	struct gk20a *g = ch->g;
 	struct nvgpu_runlist *runlist = NULL;
 	int err;
 	runlist = ch->runlist;
 	/* Enable subcontext */
 	if (g->ops.tsg.add_subctx_channel_hw != NULL) {
 		err = g->ops.tsg.add_subctx_channel_hw(ch, ch->replayable);
 		if (err != 0) {
 			nvgpu_err(g, "Subcontext addition failed %d", err);
 			return;
 		}
 	}
 	/* Enable channel */
 	nvgpu_chram_bar0_writel(g, runlist, runlist_chram_channel_r(ch->chid),
 		runlist_chram_channel_update_f(
--- a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -30,6 +30,7 @@ struct nvgpu_channel;
 struct nvgpu_channel_hw_state;
 struct nvgpu_debug_context;
 void gv11b_channel_bind(struct nvgpu_channel *ch);
 void gv11b_channel_unbind(struct nvgpu_channel *ch);
 u32 gv11b_channel_count(struct gk20a *g);
 void gv11b_channel_read_state(struct gk20a *g, struct nvgpu_channel *ch,
--- a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c
@@ -32,6 +32,42 @@
 #include <nvgpu/hw/gv11b/hw_ccsr_gv11b.h>
 void gv11b_channel_bind(struct nvgpu_channel *ch)
 {
 	struct gk20a *g = ch->g;
 	int err;
 	u32 inst_ptr = nvgpu_inst_block_ptr(g, &ch->inst_block);
 	nvgpu_log_info(g, "bind channel %d inst ptr 0x%08x",
 		ch->chid, inst_ptr);
 	/* Enable subcontext */
 	if (g->ops.tsg.add_subctx_channel_hw != NULL) {
 		err = g->ops.tsg.add_subctx_channel_hw(ch, ch->replayable);
 		if (err != 0) {
 			nvgpu_err(g, "Subcontext addition failed %d", err);
 			return;
 		}
 	}
 	/* Enable channel */
 	nvgpu_writel(g, ccsr_channel_inst_r(ch->chid),
 		     ccsr_channel_inst_ptr_f(inst_ptr) |
 		     nvgpu_aperture_mask(g, &ch->inst_block,
 				ccsr_channel_inst_target_sys_mem_ncoh_f(),
 				ccsr_channel_inst_target_sys_mem_coh_f(),
 				ccsr_channel_inst_target_vid_mem_f()) |
 		     ccsr_channel_inst_bind_true_f());
 	nvgpu_writel(g, ccsr_channel_r(ch->chid),
 		(nvgpu_readl(g, ccsr_channel_r(ch->chid)) &
 		 ~ccsr_channel_enable_set_f(~U32(0U))) |
 		 ccsr_channel_enable_set_true_f());
 	nvgpu_atomic_set(&ch->bound, 1);
 }
 void gv11b_channel_unbind(struct nvgpu_channel *ch)
 {
 	struct gk20a *g = ch->g;
--- a/drivers/gpu/nvgpu/hal/fifo/ramfc_ga10b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ramfc_ga10b_fusa.c
@@ -44,7 +44,6 @@ int ga10b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base,
 	u32 eng_intr_mask = 0U;
 	u32 eng_intr_vector = 0U;
 	u32 eng_bitmask = 0U;
 	bool replayable = false;
 	(void)flags;
@@ -65,18 +64,9 @@ int ga10b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base,
 	nvgpu_memset(g, mem, 0U, 0U, ram_fc_size_val_v());
 #ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
 	if ((flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) {
 		replayable = true;
 	}
 #endif
 	nvgpu_log_info(g, "%llu %u", pbdma_acquire_timeout,
 		g->ops.pbdma.acquire_val(pbdma_acquire_timeout));
 	g->ops.ramin.init_subctx_pdb(g, mem, ch->vm->pdb.mem,
 		replayable, nvgpu_channel_get_max_subctx_count(ch));
 	nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(),
 		g->ops.pbdma.get_gp_base(gpfifo_base));
--- a/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b_fusa.c
@@ -38,7 +38,6 @@ int gv11b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base,
 	struct gk20a *g = ch->g;
 	struct nvgpu_mem *mem = &ch->inst_block;
 	u32 data;
 	bool replayable = false;
 	(void)flags;
@@ -46,18 +45,9 @@ int gv11b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base,
 	nvgpu_memset(g, mem, 0, 0, ram_fc_size_val_v());
 #ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
 	if ((flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) {
 		replayable = true;
 	}
 #endif
 	nvgpu_log_info(g, "%llu %u", pbdma_acquire_timeout,
 		g->ops.pbdma.acquire_val(pbdma_acquire_timeout));
 	g->ops.ramin.init_subctx_pdb(g, mem, ch->vm->pdb.mem,
 		replayable, nvgpu_channel_get_max_subctx_count(ch));
 	nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(),
 		g->ops.pbdma.get_gp_base(gpfifo_base));
--- a/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c
@@ -39,24 +39,14 @@ int tu104_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base,
 	struct gk20a *g = ch->g;
 	struct nvgpu_mem *mem = &ch->inst_block;
 	u32 data;
 	bool replayable = false;
 	nvgpu_log_fn(g, " ");
 	nvgpu_memset(g, mem, 0, 0, ram_fc_size_val_v());
 #ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
 	if ((flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) {
 		replayable = true;
 	}
 #endif
 	nvgpu_log_info(g, "%llu %u", pbdma_acquire_timeout,
 		g->ops.pbdma.acquire_val(pbdma_acquire_timeout));
 	g->ops.ramin.init_subctx_pdb(g, mem, ch->vm->pdb.mem,
 		replayable, nvgpu_channel_get_max_subctx_count(ch));
 	nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(),
 		g->ops.pbdma.get_gp_base(gpfifo_base));
--- a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -30,9 +30,15 @@ struct nvgpu_mem;
 void gv11b_ramin_set_gr_ptr(struct gk20a *g,
 		struct nvgpu_mem *inst_block, u64 gpu_va);
 void gv11b_ramin_set_subctx_pdb_info(struct gk20a *g,
 		u32 subctx_id, struct nvgpu_mem *pdb_mem,
 		bool replayable, bool add, u32 *subctx_pdb_map);
 void gv11b_ramin_init_subctx_pdb_map(struct gk20a *g,
 		u32 *subctx_pdb_map);
 void gv11b_ramin_init_subctx_valid_mask(struct gk20a *g,
 		struct nvgpu_mem *inst_block, unsigned long *valid_subctx_mask);
 void gv11b_ramin_init_subctx_pdb(struct gk20a *g,
-		struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
+		struct nvgpu_mem *inst_block, u32 *subctx_pdb_map);
 		bool replayable, u32 max_subctx_count);
 void gv11b_ramin_set_eng_method_buffer(struct gk20a *g,
 		struct nvgpu_mem *inst_block, u64 gpu_va);
 void gv11b_ramin_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
--- a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b_fusa.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -45,85 +45,89 @@ void gv11b_ramin_set_gr_ptr(struct gk20a *g,
 		ram_in_engine_wfi_ptr_hi_f(addr_hi));
 }
-static void gv11b_subctx_commit_valid_mask(struct gk20a *g,
+void gv11b_ramin_set_subctx_pdb_info(struct gk20a *g,
-		struct nvgpu_mem *inst_block, u32 max_subctx_count)
+		u32 subctx_id, struct nvgpu_mem *pdb_mem,
 		bool replayable, bool add, u32 *subctx_pdb_map)
 {
 	u32 format_word = 0;
 	u32 pdb_addr_lo = 0;
 	u32 pdb_addr_hi = 0;
 	u64 pdb_addr;
 	u32 aperture;
 	if (add) {
 		aperture = nvgpu_aperture_mask(g, pdb_mem,
 				ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(),
 				ram_in_sc_page_dir_base_target_sys_mem_coh_v(),
 				ram_in_sc_page_dir_base_target_vid_mem_v());
 		pdb_addr = nvgpu_mem_get_addr(g, pdb_mem);
 		pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
 		pdb_addr_hi = u64_hi32(pdb_addr);
 		format_word = ram_in_sc_page_dir_base_target_f(aperture, 0U) |
 			ram_in_sc_page_dir_base_vol_f(
 			ram_in_sc_page_dir_base_vol_true_v(), 0U) |
 			ram_in_sc_use_ver2_pt_format_f(1U, 0U) |
 			ram_in_sc_big_page_size_f(1U, 0U) |
 			ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
 		if (replayable) {
 			format_word |=
 				ram_in_sc_page_dir_base_fault_replay_tex_f(1U, 0U) |
 				ram_in_sc_page_dir_base_fault_replay_gcc_f(1U, 0U);
 		}
 	} else {
 		aperture = ram_in_sc_page_dir_base_target_invalid_v();
 		format_word = ram_in_sc_page_dir_base_target_f(aperture, 0U);
 	}
 	nvgpu_log(g, gpu_dbg_info, "%s subctx[%u] pdb info lo %x hi %x",
 			add ? "add" : "remove", subctx_id,
 			format_word, pdb_addr_hi);
 	subctx_pdb_map[subctx_id * 4U] = format_word;
 	subctx_pdb_map[(subctx_id * 4U) + 1U] = pdb_addr_hi;
 }
 void gv11b_ramin_init_subctx_pdb_map(struct gk20a *g,
 		u32 *subctx_pdb_map)
 {
 	u32 max_subctx_count = g->ops.gr.init.get_max_subctx_count();
 	u32 i;
 	/* Initially, all subcontexts are invalid in the TSG. */
 	for (i = 0; i < max_subctx_count; i++) {
 		gv11b_ramin_set_subctx_pdb_info(g, i, NULL, false, false,
 						subctx_pdb_map);
 	}
 }
 void gv11b_ramin_init_subctx_valid_mask(struct gk20a *g,
 		struct nvgpu_mem *inst_block, unsigned long *valid_subctx_mask)
 {
 	u32 max_subctx_count = g->ops.gr.init.get_max_subctx_count();
 	u32 id;
 	u32 subctx_count = max_subctx_count;
 	for (id = 0U; id < max_subctx_count; id += 32U) {
-		u32 subctx_mask_max_bit = ((subctx_count < 32U) ?
+		u32 subctx_mask = ((u32 *)valid_subctx_mask)[id / 32U];
 			(subctx_count % 32U) : 0U);
 		u32 subctx_mask = U32_MAX;
 		if (subctx_mask_max_bit != 0U) {
 			subctx_mask = nvgpu_safe_sub_u32(
 				BIT32(subctx_mask_max_bit), 1U);
 		}
 		nvgpu_mem_wr32(g, inst_block,
 				ram_in_sc_pdb_valid_long_w(id), subctx_mask);
 		nvgpu_log(g, gpu_dbg_info | gpu_dbg_mig,
-			"id[%d] max_subctx_count[%u] subctx_mask_max_bit[%u] "
+			"id[%d] max_subctx_count[%u] subctx_mask[%x] ",
-				"subctx_count[%u] subctx_mask[%x] ",
+			id, max_subctx_count, subctx_mask);
 			id, max_subctx_count, subctx_mask_max_bit,
 			subctx_count, subctx_mask);
 		if (subctx_count > 32U) {
 			subctx_count = nvgpu_safe_sub_u32(subctx_count, 32U);
 		}
 	}
 }
 static void gv11b_subctx_commit_pdb(struct gk20a *g,
 		struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
 		bool replayable, u32 max_subctx_count)
 {
 	u32 lo, hi;
 	u32 subctx_id = 0;
 	u32 format_word;
 	u32 pdb_addr_lo, pdb_addr_hi;
 	u64 pdb_addr;
 	u32 aperture = nvgpu_aperture_mask(g, pdb_mem,
 				ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(),
 				ram_in_sc_page_dir_base_target_sys_mem_coh_v(),
 				ram_in_sc_page_dir_base_target_vid_mem_v());
 	pdb_addr = nvgpu_mem_get_addr(g, pdb_mem);
 	pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
 	pdb_addr_hi = u64_hi32(pdb_addr);
 	format_word = ram_in_sc_page_dir_base_target_f(aperture, 0U) |
 		ram_in_sc_page_dir_base_vol_f(
 		ram_in_sc_page_dir_base_vol_true_v(), 0U) |
 		ram_in_sc_use_ver2_pt_format_f(1U, 0U) |
 		ram_in_sc_big_page_size_f(1U, 0U) |
 		ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo);
 	if (replayable) {
 		format_word |=
 			ram_in_sc_page_dir_base_fault_replay_tex_f(1U, 0U) |
 			ram_in_sc_page_dir_base_fault_replay_gcc_f(1U, 0U);
 	}
 	nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x",
 					format_word, pdb_addr_hi);
 	for (subctx_id = 0U; subctx_id < max_subctx_count; subctx_id++) {
 		lo = ram_in_sc_page_dir_base_vol_w(subctx_id);
 		hi = ram_in_sc_page_dir_base_hi_w(subctx_id);
 		nvgpu_mem_wr32(g, inst_block, lo, format_word);
 		nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi);
 	}
 }
 void gv11b_ramin_init_subctx_pdb(struct gk20a *g,
-		struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
+		struct nvgpu_mem *inst_block, u32 *subctx_pdb_map)
 		bool replayable, u32 max_subctx_count)
 {
-	gv11b_subctx_commit_pdb(g, inst_block, pdb_mem, replayable,
+	u32 max_subctx_count = g->ops.gr.init.get_max_subctx_count();
-		max_subctx_count);
+	u32 size = max_subctx_count * 4U * 4U;
 	gv11b_subctx_commit_valid_mask(g, inst_block, max_subctx_count);
 	nvgpu_mem_wr_n(g, inst_block, ram_in_sc_page_dir_base_vol_w(0) * 4U,
 		subctx_pdb_map, size);
 }
 void gv11b_ramin_set_eng_method_buffer(struct gk20a *g,
--- a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -38,4 +38,61 @@ void gv11b_tsg_deinit_eng_method_buffers(struct gk20a *g,
 void gv11b_tsg_bind_channel_eng_method_buffers(struct nvgpu_tsg *tsg,
 		struct nvgpu_channel *ch);
 /**
 * @brief Initialize subcontext PDB map and valid mask for a TSG.
 *
 * @param g [in]	Pointer to GPU driver struct.
 * @param tsg [in]	Pointer to TSG struct.
 *
 * - If subcontexts are enabled:
 *   - Allocate array of PDB configuration values for maximum supported
 *     subcontexts.
 *   - Initialize the array by calling g->ops.ramin.init_subctx_pdb_map.
 *   - Allocate valid subcontexts bitmask.
 *
 * @return 0 in case of success, < 0 in case of failure.
 */
 int gv11b_tsg_init_subctx_state(struct gk20a *g, struct nvgpu_tsg *tsg);
 /**
 * @brief Deinitialize subcontext PDB map and valid mask for a TSG.
 *
 * @param g [in]	Pointer to GPU driver struct.
 * @param tsg [in]	Pointer to TSG struct.
 *
 * - If subcontexts are enabled:
 *   - Free array of PDB configuration values.
 *   - Free valid subcontexts bitmask.
 */
 void gv11b_tsg_deinit_subctx_state(struct gk20a *g, struct nvgpu_tsg *tsg);
 /**
 * @brief Add a subctx channel to TSG.
 *
 * @param ch [in]		Pointer to Channel struct.
 * @param replayable [in]	replayable state of the channel.
 *
 * - If subcontexts are enabled:
 *   - Update subcontext info in TSG members if this is first channel
 *     of a subcontext and update instance blocks of all channels
 *     in the TSG with this information.
 *   -  If this is a channel in existing subcontext then:
 *      - Update the channel instance block with subcontext info.
 *
 * @return 0 in case of success, < 0 in case of failure.
 */
 int gv11b_tsg_add_subctx_channel_hw(struct nvgpu_channel *ch, bool replayable);
 /**
 * @brief Remove a subctx channel from TSG.
 *
 * @param ch [in]			Pointer to Channel struct.
 *
 * - If subcontexts are enabled:
 *   - Update subcontext info in TSG members as this is the last channel
 *     of a subcontext and update instance blocks of all channels
 *     in the TSG with this information.
 */
 void gv11b_tsg_remove_subctx_channel_hw(struct nvgpu_channel *ch);
 #endif /* NVGPU_TSG_GV11B_H */
--- a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b_fusa.c
@@ -25,6 +25,7 @@
 #include <nvgpu/runlist.h>
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/tsg.h>
 #include <nvgpu/tsg_subctx.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/static_analysis.h>
@@ -174,3 +175,192 @@ void gv11b_tsg_deinit_eng_method_buffers(struct gk20a *g,
 	nvgpu_log_info(g, "eng method buffers de-allocated");
 }
 int gv11b_tsg_init_subctx_state(struct gk20a *g, struct nvgpu_tsg *tsg)
 {
 	u32 max_subctx_count;
 	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
 		return 0;
 	}
 	max_subctx_count = g->ops.gr.init.get_max_subctx_count();
 	/*
 	 * Allocate an array of subctx PDB configuration values for all supported
 	 * subcontexts. For each subctx, there will be two registers to be
 	 * configured, ram_in_sc_page_dir_base_lo_w(i) and
 	 * ram_in_sc_page_dir_base_hi_w(i) in the instance block for the channels
 	 * belonging to this TSG. Two more unused registers follow these for each
 	 * subcontext. Same PDB table/array is programmed in the instance block
 	 * of all the channels.
 	 *
 	 * As the subcontexts are bound to the TSG, their configurations register
 	 * values are added to the array and corresponding bit is set in the
 	 * valid_subctxs bitmask. And as the subcontexts are unbound from
 	 * the TSG, their configurations register values are added to the
 	 * array and corresponding bit is set in the valid_subctxs bitmask.
 	 */
 	tsg->subctx_pdb_map = nvgpu_kzalloc(g, max_subctx_count * sizeof(u32) * 4U);
 	if (tsg->subctx_pdb_map == NULL) {
 		nvgpu_err(g, "subctx_pdb_map alloc failed");
 		return -ENOMEM;
 	}
 	g->ops.ramin.init_subctx_pdb_map(g, tsg->subctx_pdb_map);
 	tsg->valid_subctxs = nvgpu_kzalloc(g,
 				BITS_TO_LONGS(max_subctx_count) *
 				sizeof(unsigned long));
 	if (tsg->valid_subctxs == NULL) {
 		nvgpu_err(g, "valid_subctxs bitmap alloc failed");
 		nvgpu_kfree(g, tsg->subctx_pdb_map);
 		tsg->subctx_pdb_map = NULL;
 		return -ENOMEM;
 	}
 	return 0;
 }
 void gv11b_tsg_deinit_subctx_state(struct gk20a *g, struct nvgpu_tsg *tsg)
 {
 	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
 		return;
 	}
 	nvgpu_kfree(g, tsg->subctx_pdb_map);
 	tsg->subctx_pdb_map = NULL;
 	nvgpu_kfree(g, tsg->valid_subctxs);
 	tsg->valid_subctxs = NULL;
 }
 static void gv11b_tsg_update_inst_blocks_subctxs(struct nvgpu_tsg *tsg)
 {
 	struct gk20a *g = tsg->g;
 	struct nvgpu_channel *ch;
 	nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) {
 		g->ops.ramin.init_subctx_pdb(g, &ch->inst_block,
 					     tsg->subctx_pdb_map);
 		g->ops.ramin.init_subctx_mask(g, &ch->inst_block,
 					      tsg->valid_subctxs);
 	}
 }
 static void gv11b_tsg_update_subctxs(struct nvgpu_tsg *tsg, u32 subctx_id,
 				struct vm_gk20a *vm, bool replayable, bool add)
 {
 	struct gk20a *g = tsg->g;
 	if (add) {
 		g->ops.ramin.set_subctx_pdb_info(g, subctx_id, vm->pdb.mem,
 				replayable, true, tsg->subctx_pdb_map);
 		nvgpu_set_bit(subctx_id, tsg->valid_subctxs);
 	} else {
 		g->ops.ramin.set_subctx_pdb_info(g, subctx_id, NULL,
 				false, false, tsg->subctx_pdb_map);
 		nvgpu_clear_bit(subctx_id, tsg->valid_subctxs);
 	}
 	gv11b_tsg_update_inst_blocks_subctxs(tsg);
 }
 static void gv11b_tsg_add_new_subctx_channel_hw(struct nvgpu_channel *ch,
 						bool replayable)
 {
 	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
 	struct nvgpu_tsg_subctx *subctx = ch->subctx;
 	struct vm_gk20a *vm = nvgpu_tsg_subctx_get_vm(subctx);
 	u32 subctx_id = nvgpu_tsg_subctx_get_id(subctx);
 	nvgpu_tsg_subctx_set_replayable(subctx, replayable);
 	gv11b_tsg_update_subctxs(tsg, subctx_id, vm, replayable, true);
 }
 static void gv11b_tsg_add_existing_subctx_channel_hw(struct nvgpu_channel *ch,
 						 bool replayable)
 {
 	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
 	struct nvgpu_tsg_subctx *subctx = ch->subctx;
 	struct gk20a *g = ch->g;
 	if (nvgpu_tsg_subctx_get_replayable(subctx) != replayable) {
 		nvgpu_err(g, "subctx replayable mismatch. ignoring.");
 	}
 	g->ops.ramin.init_subctx_pdb(g, &ch->inst_block, tsg->subctx_pdb_map);
 	g->ops.ramin.init_subctx_mask(g, &ch->inst_block, tsg->valid_subctxs);
 }
 int gv11b_tsg_add_subctx_channel_hw(struct nvgpu_channel *ch, bool replayable)
 {
 	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
 	struct gk20a *g = tsg->g;
 	int err;
 	nvgpu_log(g, gpu_dbg_fn, " ");
 	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
 		return 0;
 	}
 	/*
 	 * Add new subcontext to the TSG. Sequence for this is below:
 	 *   1. Disable TSG.
 	 *   2. Preempt TSG.
 	 *   3. Program subctx PDBs in instance blocks of all channels in
 	 *      the TSG.
 	 *   4. Enable TSG.
 	 * This sequence is executed acquiring TSG level lock ctx_init_lock.
 	 * to synchronize with channels from other subcontexts.
 	 * ctx_init_lock is reused here. It is originally there for
 	 * synchronizing the GR context initialization by various
 	 * channels in the TSG.
 	 */
 	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
 	g->ops.tsg.disable(tsg);
 	err = g->ops.fifo.preempt_tsg(g, tsg);
 	if (err != 0) {
 		g->ops.tsg.enable(tsg);
 		nvgpu_mutex_release(&tsg->ctx_init_lock);
 		nvgpu_err(g, "preempt failed %d", err);
 		return err;
 	}
 	nvgpu_rwsem_down_read(&tsg->ch_list_lock);
 	if (!nvgpu_test_bit(ch->subctx_id, tsg->valid_subctxs)) {
 		gv11b_tsg_add_new_subctx_channel_hw(ch, replayable);
 	} else {
 		gv11b_tsg_add_existing_subctx_channel_hw(ch, replayable);
 	}
 	nvgpu_rwsem_up_read(&tsg->ch_list_lock);
 	g->ops.tsg.enable(tsg);
 	nvgpu_mutex_release(&tsg->ctx_init_lock);
 	nvgpu_log(g, gpu_dbg_fn, "done");
 	return 0;
 }
 void gv11b_tsg_remove_subctx_channel_hw(struct nvgpu_channel *ch)
 {
 	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
 	struct gk20a *g = tsg->g;
 	u32 subctx_id;
 	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
 		return;
 	}
 	subctx_id = nvgpu_tsg_subctx_get_id(ch->subctx);
 	gv11b_tsg_update_subctxs(tsg, subctx_id, NULL, false, false);
 }
--- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
@@ -1131,7 +1131,10 @@ static const struct gops_ramin ga100_ops_ramin = {
 	.set_gr_ptr = gv11b_ramin_set_gr_ptr,
 	.set_big_page_size = gm20b_ramin_set_big_page_size,
 	.init_pdb = ga10b_ramin_init_pdb,
 	.init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map,
 	.set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info,
 	.init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
 	.init_subctx_mask = gv11b_ramin_init_subctx_valid_mask,
 	.set_adr_limit = NULL,
 	.base_shift = gk20a_ramin_base_shift,
 	.alloc_size = gk20a_ramin_alloc_size,
@@ -1199,6 +1202,10 @@ static const struct gops_channel ga100_ops_channel = {
 static const struct gops_tsg ga100_ops_tsg = {
 	.enable = gv11b_tsg_enable,
 	.disable = nvgpu_tsg_disable,
 	.init_subctx_state = gv11b_tsg_init_subctx_state,
 	.deinit_subctx_state = gv11b_tsg_deinit_subctx_state,
 	.add_subctx_channel_hw = gv11b_tsg_add_subctx_channel_hw,
 	.remove_subctx_channel_hw = gv11b_tsg_remove_subctx_channel_hw,
 	.init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers,
 	.deinit_eng_method_buffers = gv11b_tsg_deinit_eng_method_buffers,
 	.bind_channel = NULL,
--- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
@@ -1145,7 +1145,10 @@ static const struct gops_ramin ga10b_ops_ramin = {
 	.set_gr_ptr = gv11b_ramin_set_gr_ptr,
 	.set_big_page_size = gm20b_ramin_set_big_page_size,
 	.init_pdb = ga10b_ramin_init_pdb,
 	.init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map,
 	.set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info,
 	.init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
 	.init_subctx_mask = gv11b_ramin_init_subctx_valid_mask,
 	.set_adr_limit = NULL,
 	.base_shift = gk20a_ramin_base_shift,
 	.alloc_size = gk20a_ramin_alloc_size,
@@ -1214,6 +1217,10 @@ static const struct gops_channel ga10b_ops_channel = {
 static const struct gops_tsg ga10b_ops_tsg = {
 	.enable = gv11b_tsg_enable,
 	.disable = nvgpu_tsg_disable,
 	.init_subctx_state = gv11b_tsg_init_subctx_state,
 	.deinit_subctx_state = gv11b_tsg_deinit_subctx_state,
 	.add_subctx_channel_hw = gv11b_tsg_add_subctx_channel_hw,
 	.remove_subctx_channel_hw = gv11b_tsg_remove_subctx_channel_hw,
 	.init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers,
 	.deinit_eng_method_buffers = gv11b_tsg_deinit_eng_method_buffers,
 	.bind_channel = NULL,
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -803,7 +803,7 @@ static const struct gops_mm gm20b_ops_mm = {
 	.setup_hw = nvgpu_mm_setup_hw,
 	.is_bar1_supported = gm20b_mm_is_bar1_supported,
 	.init_inst_block = gk20a_mm_init_inst_block,
-	.init_inst_block_core = gk20a_mm_init_inst_block,
+	.init_inst_block_core = gk20a_mm_init_inst_block_core,
 	.get_default_va_sizes = gm20b_mm_get_default_va_sizes,
 #ifdef CONFIG_NVGPU_USERD
 	.bar1_map_userd = gk20a_mm_bar1_map_userd,
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -969,7 +969,10 @@ static const struct gops_ramin gv11b_ops_ramin = {
 	.set_gr_ptr = gv11b_ramin_set_gr_ptr,
 	.set_big_page_size = gm20b_ramin_set_big_page_size,
 	.init_pdb = gv11b_ramin_init_pdb,
 	.init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map,
 	.set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info,
 	.init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
 	.init_subctx_mask = gv11b_ramin_init_subctx_valid_mask,
 	.set_adr_limit = NULL,
 	.base_shift = gk20a_ramin_base_shift,
 	.alloc_size = gk20a_ramin_alloc_size,
@@ -1013,7 +1016,7 @@ static const struct gops_userd gv11b_ops_userd = {
 static const struct gops_channel gv11b_ops_channel = {
 	.alloc_inst = nvgpu_channel_alloc_inst,
 	.free_inst = nvgpu_channel_free_inst,
-	.bind = gm20b_channel_bind,
+	.bind = gv11b_channel_bind,
 	.unbind = gv11b_channel_unbind,
 	.enable = gk20a_channel_enable,
 	.disable = gk20a_channel_disable,
@@ -1030,6 +1033,10 @@ static const struct gops_channel gv11b_ops_channel = {
 static const struct gops_tsg gv11b_ops_tsg = {
 	.enable = gv11b_tsg_enable,
 	.disable = nvgpu_tsg_disable,
 	.init_subctx_state = gv11b_tsg_init_subctx_state,
 	.deinit_subctx_state = gv11b_tsg_deinit_subctx_state,
 	.add_subctx_channel_hw = gv11b_tsg_add_subctx_channel_hw,
 	.remove_subctx_channel_hw = gv11b_tsg_remove_subctx_channel_hw,
 	.init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers,
 	.deinit_eng_method_buffers = gv11b_tsg_deinit_eng_method_buffers,
 	.bind_channel = NULL,
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -1026,7 +1026,10 @@ static const struct gops_ramin tu104_ops_ramin = {
 	.set_gr_ptr = gv11b_ramin_set_gr_ptr,
 	.set_big_page_size = gm20b_ramin_set_big_page_size,
 	.init_pdb = gv11b_ramin_init_pdb,
 	.init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map,
 	.set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info,
 	.init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
 	.init_subctx_mask = gv11b_ramin_init_subctx_valid_mask,
 	.set_adr_limit = NULL,
 	.base_shift = gk20a_ramin_base_shift,
 	.alloc_size = gk20a_ramin_alloc_size,
@@ -1068,7 +1071,7 @@ static const struct gops_userd tu104_ops_userd = {
 static const struct gops_channel tu104_ops_channel = {
 	.alloc_inst = nvgpu_channel_alloc_inst,
 	.free_inst = nvgpu_channel_free_inst,
-	.bind = gm20b_channel_bind,
+	.bind = gv11b_channel_bind,
 	.unbind = gv11b_channel_unbind,
 	.enable = gk20a_channel_enable,
 	.disable = gk20a_channel_disable,
@@ -1085,6 +1088,10 @@ static const struct gops_channel tu104_ops_channel = {
 static const struct gops_tsg tu104_ops_tsg = {
 	.enable = gv11b_tsg_enable,
 	.disable = nvgpu_tsg_disable,
 	.init_subctx_state = gv11b_tsg_init_subctx_state,
 	.deinit_subctx_state = gv11b_tsg_deinit_subctx_state,
 	.add_subctx_channel_hw = gv11b_tsg_add_subctx_channel_hw,
 	.remove_subctx_channel_hw = gv11b_tsg_remove_subctx_channel_hw,
 	.init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers,
 	.deinit_eng_method_buffers = gv11b_tsg_deinit_eng_method_buffers,
 	.bind_channel = NULL,
--- a/drivers/gpu/nvgpu/hal/mm/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/mm/mm_gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -44,6 +44,14 @@ void gk20a_mm_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 	}
 }
 int gk20a_mm_init_inst_block_core(struct nvgpu_mem *inst_block,
 		struct vm_gk20a *vm, u32 big_page_size)
 {
 	gk20a_mm_init_inst_block(inst_block, vm, big_page_size);
 	return 0;
 }
 #ifdef CONFIG_NVGPU_USERD
 u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
 {
--- a/drivers/gpu/nvgpu/hal/mm/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/mm/mm_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -30,6 +30,8 @@ struct vm_gk20a;
 void gk20a_mm_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 			      u32 big_page_size);
 int gk20a_mm_init_inst_block_core(struct nvgpu_mem *inst_block,
 		struct vm_gk20a *vm, u32 big_page_size);
 u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset);
 #endif
--- a/drivers/gpu/nvgpu/hal/mm/mm_gp10b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/mm/mm_gp10b_fusa.c
@@ -51,7 +51,11 @@ int gp10b_mm_init_bar2_vm(struct gk20a *g)
 		goto clean_up_va;
 	}
-	g->ops.mm.init_inst_block_core(inst_block, mm->bar2.vm, big_page_size);
+	err = g->ops.mm.init_inst_block_core(inst_block, mm->bar2.vm, big_page_size);
 	if (err != 0) {
 		nvgpu_free_inst_block(g, inst_block);
 		goto clean_up_va;
 	}
 	return 0;
--- a/drivers/gpu/nvgpu/hal/mm/mm_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/mm/mm_gv11b.h
@@ -30,7 +30,7 @@ struct vm_gk20a;
 void gv11b_mm_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 			      u32 big_page_size);
-void gv11b_mm_init_inst_block_core(struct nvgpu_mem *inst_block,
+int gv11b_mm_init_inst_block_core(struct nvgpu_mem *inst_block,
 				   struct vm_gk20a *vm,
 				   u32 big_page_size);
 bool gv11b_mm_is_bar1_supported(struct gk20a *g);
--- a/drivers/gpu/nvgpu/hal/mm/mm_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/mm/mm_gv11b_fusa.c
@@ -21,6 +21,7 @@
 */
 #include <nvgpu/gk20a.h>
 #include <nvgpu/fifo.h>
 #include <nvgpu/gmmu.h>
 #include <nvgpu/mm.h>
@@ -42,14 +43,48 @@ void gv11b_mm_init_inst_block(struct nvgpu_mem *inst_block,
 	}
 }
-void gv11b_mm_init_inst_block_core(struct nvgpu_mem *inst_block,
+int gv11b_mm_init_inst_block_core(struct nvgpu_mem *inst_block,
 		struct vm_gk20a *vm, u32 big_page_size)
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 	u32 max_subctx_count = g->ops.gr.init.get_max_subctx_count();
 	unsigned long *valid_subctxs;
 	u32 *subctx_pdb_map;
 	subctx_pdb_map = nvgpu_kzalloc(g, max_subctx_count * sizeof(u32) * 4U);
 	if (subctx_pdb_map == NULL) {
 		nvgpu_err(g, "subctx_pdb_map alloc failed");
 		return -ENOMEM;
 	}
 	valid_subctxs = nvgpu_kzalloc(g,
 				BITS_TO_LONGS(max_subctx_count) *
 				sizeof(unsigned long));
 	if (valid_subctxs == NULL) {
 		nvgpu_err(g, "valid_subctxs bitmask alloc failed");
 		nvgpu_kfree(g, subctx_pdb_map);
 		return -ENOMEM;
 	}
 	gv11b_mm_init_inst_block(inst_block, vm, big_page_size);
-	g->ops.ramin.init_subctx_pdb(g, inst_block, vm->pdb.mem, false, 1U);
+	/* Program subctx pdb info in the instance block */
 	g->ops.ramin.init_subctx_pdb_map(g, subctx_pdb_map);
 	g->ops.ramin.set_subctx_pdb_info(g, CHANNEL_INFO_VEID0, vm->pdb.mem,
 					 false, true, subctx_pdb_map);
 	g->ops.ramin.init_subctx_pdb(g, inst_block, subctx_pdb_map);
 	/*
 	 * Program subctx pdb valid mask in the instance block.
 	 * Only subctx 0 is valid here.
 	 */
 	nvgpu_set_bit(CHANNEL_INFO_VEID0, valid_subctxs);
 	g->ops.ramin.init_subctx_mask(g, inst_block, valid_subctxs);
 	nvgpu_kfree(g, valid_subctxs);
 	nvgpu_kfree(g, subctx_pdb_map);
 	return 0;
 }
 bool gv11b_mm_is_bar1_supported(struct gk20a *g)
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c
@@ -693,7 +693,10 @@ static const struct gops_ramin vgpu_ga10b_ops_ramin = {
 	.set_gr_ptr = NULL,
 	.set_big_page_size = gm20b_ramin_set_big_page_size,
 	.init_pdb = gv11b_ramin_init_pdb,
 	.init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map,
 	.set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info,
 	.init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
 	.init_subctx_mask = gv11b_ramin_init_subctx_valid_mask,
 	.set_adr_limit = NULL,
 	.base_shift = gk20a_ramin_base_shift,
 	.alloc_size = gk20a_ramin_alloc_size,
@@ -744,6 +747,10 @@ static const struct gops_channel vgpu_ga10b_ops_channel = {
 static const struct gops_tsg vgpu_ga10b_ops_tsg = {
 	.open = vgpu_tsg_open,
 	.release = vgpu_tsg_release,
 	.init_subctx_state = NULL,
 	.deinit_subctx_state = NULL,
 	.add_subctx_channel_hw = NULL,
 	.remove_subctx_channel_hw = NULL,
 	.init_eng_method_buffers = NULL,
 	.deinit_eng_method_buffers = NULL,
 	.enable = gv11b_tsg_enable,
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
@@ -660,7 +660,10 @@ static const struct gops_ramin vgpu_gv11b_ops_ramin = {
 	.set_gr_ptr = NULL,
 	.set_big_page_size = gm20b_ramin_set_big_page_size,
 	.init_pdb = gv11b_ramin_init_pdb,
 	.init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map,
 	.set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info,
 	.init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
 	.init_subctx_mask = gv11b_ramin_init_subctx_valid_mask,
 	.set_adr_limit = NULL,
 	.base_shift = gk20a_ramin_base_shift,
 	.alloc_size = gk20a_ramin_alloc_size,
@@ -712,6 +715,10 @@ static const struct gops_channel vgpu_gv11b_ops_channel = {
 static const struct gops_tsg vgpu_gv11b_ops_tsg = {
 	.open = vgpu_tsg_open,
 	.release = vgpu_tsg_release,
 	.init_subctx_state = NULL,
 	.deinit_subctx_state = NULL,
 	.add_subctx_channel_hw = NULL,
 	.remove_subctx_channel_hw = NULL,
 	.init_eng_method_buffers = NULL,
 	.deinit_eng_method_buffers = NULL,
 	.enable = gv11b_tsg_enable,
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -501,6 +501,11 @@ struct nvgpu_channel {
 	/** Runlist the channel will run on. */
 	struct nvgpu_runlist *runlist;
 	/**
 	 * Replayable fault state for the channel.
 	 */
 	bool replayable;
 	/**
 	 * Recovery path can be entered twice for the same error in
 	 * case of mmu_nack. This flag indicates if we already recovered
@@ -960,7 +965,7 @@ struct nvgpu_channel *nvgpu_channel_open_new(struct gk20a *g,
 		pid_t pid, pid_t tid);
 /**
- * @brief Setup and bind the channel
+ * @brief Setup and bind the channel and add subcontext PDB.
 *
 * @param ch [in]	Channel pointer.
 * @param args [in]	Setup bind arguments.
@@ -975,6 +980,7 @@ struct nvgpu_channel *nvgpu_channel_open_new(struct gk20a *g,
 * provided in args. A submit token is passed back to be written in the
 * doorbell register in the usermode region to notify the GPU for new
 * work on this channel.
 * Update the instance blocks of all channels to add the subctx pdb.
 *
 * @note An address space needs to have been bound to the channel before
 *       calling this function.
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/mm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/mm.h
@@ -566,7 +566,7 @@ struct gops_mm {
 	 * - Configures the pdb base, big page size and
 	 *   0th sub context's pdb base in context's instance block memory.
 	 */
-	void (*init_inst_block_core)(struct nvgpu_mem *inst_block,
+	int (*init_inst_block_core)(struct nvgpu_mem *inst_block,
 			struct vm_gk20a *vm, u32 big_page_size);
 	/**
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/ramin.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/ramin.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -90,34 +90,78 @@ struct gops_ramin {
 	void (*init_pdb)(struct gk20a *g, struct nvgpu_mem *inst_block,
 			u64 pdb_addr, struct nvgpu_mem *pdb_mem);
 	/**
 	 * @brief Init subcontext pdb map for a TSG.
 	 *
 	 * @param g [in]		Pointer to GPU driver struct.
 	 * @param subctx_pdb_map [in]	Memory pointing to pdb map for a TSG.
 	 *
 	 * This HAL configures PDB for all subcontexts of an instance block.
 	 * It sets all PDBs invalid.
 	 */
 	void (*init_subctx_pdb_map)(struct gk20a *g,
 			u32 *subctx_pdb_map);
 	/**
 	 * @brief Update subcontext pdb map for subcontext addition/removal.
 	 *
 	 * @param g [in]		Pointer to GPU driver struct.
 	 * @param subctx_id [in]	Subcontext ID.
 	 * @param pdb_mem [in]		Memory descriptor of PDB.
 	 * @param replayable [in]	Indicates if errors are replayable
 	 *				for this Subcontext.
 	 * @param add [in]		Indicate if subcontext PDB is to be
 	 *				added or removed.
 	 * @param subctx_pdb_map [in]	Memory pointing to pdb map for a TSG.
 	 *
 	 * This HAL configures PDB for sub-context of Instance Block:
 	 * If adding a subcontext PDB:
 	 * - Get aperture mask from \a pdb_mem.
 	 * - Get physical address of \a pdb_mem.
 	 * - Build PDB entry with defaults for PT version, big page size,
 	 *   volatile attribute, and above aperture.
 	 * - If \a replayable is true, set replayable attribute for TEX
 	 *   and GCC faults.
 	 * - Set lo and hi 32-bits to point to \a pdb_mem.
 	 * - Program related entry in \a subctx_pdb_map.
 	 * If removing a subcontext PDB:
 	 * - Set aperture as ram_in_sc_page_dir_base_target_invalid_v().
 	 * - Program related entry in \a subctx_pdb_map.
 	 */
 	void (*set_subctx_pdb_info)(struct gk20a *g,
 		u32 subctx_id, struct nvgpu_mem *pdb_mem,
 		bool replayable, bool add, u32 *subctx_pdb_map);
 	/**
 	 * @brief Init PDB for sub-contexts.
 	 *
 	 * @param g [in]		Pointer to GPU driver struct.
 	 * @param inst_block [in]	Memory descriptor of Instance Block.
-	 * @param pdb_mem [in]		Memory descriptor of PDB.
+	 * @param subctx_pdb_map [in]	Memory pointing to pdb map for a TSG.
 	 * @param replayable [in]	Indicates if errors are replayable
 	 * 				for this Instance Block.
 	 * @param max_subctx_count [in] Max number of sub context.
 	 *
-	 * This HAL configures PDB for all sub-contexts of Instance Block:
+	 * This HAL configures PDB for all sub-contexts of Instance Block.
-	 * - Get max number of sub-contexts from HW.
+	 * It copies \a subctx_pdb_map to the offset
-	 * - Get aperture mask from \a pdb_mem.
+	 * ram_in_sc_page_dir_base_vol_w(0) * 4U in
-	 * - Get physical address of \a pdb_mem.
+	 * the instance block.
 	 * - For each sub-context:
 	 *    - Build PDB entry with defaults for PT version, big page size,
 	 *      volatile attribute, and above aperture.
 	 *    - If \a replayable is true, set replayable attribute for TEX
 	 *      and GCC faults.
 	 *    - Set lo and hi 32-bits to point to \a pdb_mem.
 	 *    - Program related entry in Instance Block.
 	 *
 	 * @see NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE
 	 */
 	void (*init_subctx_pdb)(struct gk20a *g,
-			struct nvgpu_mem *inst_block,
+		struct nvgpu_mem *inst_block, u32 *subctx_pdb_map);
-			struct nvgpu_mem *pdb_mem,
+
-			bool replayable, u32 max_subctx_count);
+	/**
 	 * @brief Set valid subcontexts masks.
 	 *
 	 * @param g [in]			Pointer to GPU driver struct.
 	 * @param inst_block [in]		Memory descriptor of Instance
 	 *					Block.
 	 * @param valid_subctx_mask [in]	Max number of sub context.
 	 *
 	 * This HAL configures mask for all sub-contexts of Instance Block:
 	 * - Get max number of sub-contexts from HW.
 	 * - For each set of 32 subcontexts, set the mask from
 	 *   \a valid_subctx_mask in ram_in_sc_pdb_valid_long_w().
 	 */
 	void (*init_subctx_mask)(struct gk20a *g,
 		struct nvgpu_mem *inst_block, unsigned long *valid_subctx_mask);
 	/**
 	 * @brief Instance Block shift.
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -55,6 +55,11 @@ struct gops_tsg {
 /** @cond DOXYGEN_SHOULD_SKIP_THIS */
 	int (*open)(struct nvgpu_tsg *tsg);
 	void (*release)(struct nvgpu_tsg *tsg);
 	int (*init_subctx_state)(struct gk20a *g, struct nvgpu_tsg *tsg);
 	void (*deinit_subctx_state)(struct gk20a *g, struct nvgpu_tsg *tsg);
 	int (*add_subctx_channel_hw)(struct nvgpu_channel *ch,
 				      bool replayable);
 	void (*remove_subctx_channel_hw)(struct nvgpu_channel *ch);
 	int (*init_eng_method_buffers)(struct gk20a *g,
 			struct nvgpu_tsg *tsg);
 	void (*deinit_eng_method_buffers)(struct gk20a *g,
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -117,6 +117,10 @@ struct nvgpu_tsg {
 	 */
 	struct nvgpu_ref refcount;
 	u32 *subctx_pdb_map;
 	unsigned long *valid_subctxs;
 	/**
 	 * List of subcontexts (#nvgpu_tsg_subctx) bound to this TSG.
 	 * Accessed by holding #ch_list_lock from TSG.
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h
@@ -60,6 +60,8 @@ int nvgpu_tsg_subctx_bind_channel(struct nvgpu_tsg *tsg,
 * - Validate that #subctx is allocated for the channel #ch.
 * - Remove the channel from the subctx #ch_list.
 * - If the subctx #ch_list is empty
 *   - Update the instance blocks of all channels to remove the
 *     subctx pdb.
 *   - Invoke g->ops.gr.setup.free_subctx to free the GR subcontext
 *     struct (and GR subcontext mappings struct).
 *   - Remove the subctx from the TSG #subctx_list.
@@ -120,6 +122,35 @@ struct nvgpu_gr_subctx *nvgpu_tsg_subctx_get_gr_subctx(
 */
 u32 nvgpu_tsg_subctx_get_id(struct nvgpu_tsg_subctx *tsg_subctx);
 /**
 * @brief Set replayable state for a TSG subcontext.
 *
 * @param subctx [in]		Pointer to TSG subcontext.
 * @param Replayable [in]	replayable state for the subcontext.
 *
 * - Set #replayable in #nvgpu_tsg_subctx.
 */
 void nvgpu_tsg_subctx_set_replayable(struct nvgpu_tsg_subctx *subctx,
 				     bool replayable);
 /**
 * @brief Get replayable state for a TSG subcontext.
 *
 * @param subctx [in]		Pointer to TSG subcontext.
 *
 * - Return #replayable from #nvgpu_tsg_subctx.
 */
 bool nvgpu_tsg_subctx_get_replayable(struct nvgpu_tsg_subctx *subctx);
 /**
 * @brief Get VM for a TSG subcontext.
 *
 * @param subctx [in]		Pointer to TSG subcontext.
 *
 * - Return #vm from #nvgpu_tsg_subctx.
 */
 struct vm_gk20a *nvgpu_tsg_subctx_get_vm(struct nvgpu_tsg_subctx *subctx);
 /**
 * @brief Allocate or get the mappings struct for the TSG subcontext.
 *
--- a/userspace/units/acr/nvgpu-acr.c
+++ b/userspace/units/acr/nvgpu-acr.c
@@ -826,24 +826,24 @@ int test_acr_prepare_ucode_blob(struct unit_module *m,
 	nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
-	nvgpu_posix_enable_fault_injection(kmem_fi, true, 17);
+	nvgpu_posix_enable_fault_injection(kmem_fi, true, 19);
-	unit_info(m, " kmem counter 17\n");
+	unit_info(m, " kmem counter 19\n");
 	err = g->acr->prepare_ucode_blob(g);
 	if (err != -ENOENT) {
-		unit_return_fail(m, "kmem count 17 test did not fail as expected\n");
+		unit_return_fail(m, "kmem count 19 test did not fail as expected\n");
 	}
 	/*
-	 * the kmem counter is decreased after 17th count
+	 * the kmem counter is decreased after 19th count
 	 * because in the first attempt new memory is allocated and mapped for
 	 * page directories but after that since memory is already allocated it
 	 * is just mapped. Thus, number of kmallocs decrease.
 	 */
 	nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
-	for (i = 9; i < 17; i++) {
+	for (i = 9; i < 19; i++) {
 		unit_info(m, "kmem counter %d\n", i);
 		nvgpu_posix_enable_fault_injection(kmem_fi, true, i);
 		err = g->acr->prepare_ucode_blob(g);
--- a/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c
+++ b/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -69,6 +69,12 @@ struct unit_ctx {
 	size_t size;
 };
 static int stub_add_subctx_channel_hw(struct nvgpu_channel *ch,
 				      bool replayable)
 {
 	return 0;
 }
 int test_gv11b_channel_unbind(struct unit_module *m,
 		struct gk20a *g, void *args)
 {
@@ -77,6 +83,8 @@ int test_gv11b_channel_unbind(struct unit_module *m,
 	struct nvgpu_channel *ch;
 	int ret = UNIT_FAIL;
 	g->ops.tsg.add_subctx_channel_hw = stub_add_subctx_channel_hw;
 	ch = nvgpu_channel_open_new(g, runlist_id,
 		privileged, getpid(), getpid());
 	unit_assert(ch, goto done);
--- a/userspace/units/fifo/channel/nvgpu-channel.c
+++ b/userspace/units/fifo/channel/nvgpu-channel.c
@@ -1562,6 +1562,12 @@ static void stub_channel_work_completion_cancel_sync(struct nvgpu_channel *ch)
 }
 #endif
 static int stub_add_subctx_channel_hw(struct nvgpu_channel *ch,
 				      bool replayable)
 {
 	return 0;
 }
 int test_channel_suspend_resume_serviceable_chs(struct unit_module *m,
 						struct gk20a *g, void *vargs)
 {
@@ -1594,6 +1600,7 @@ int test_channel_suspend_resume_serviceable_chs(struct unit_module *m,
 	g->ops.fifo.preempt_tsg = stub_fifo_preempt_tsg;
 	g->ops.fifo.preempt_channel = stub_fifo_preempt_channel;
 	g->ops.runlist.reload = stub_runlist_reload;
 	g->ops.tsg.add_subctx_channel_hw = stub_add_subctx_channel_hw;
 	orig_ch_tsgid = ch->tsgid;
 	for (branches = 0U; branches < F_CHANNEL_SUSPEND_RESUME_CHS_LAST;
--- a/userspace/units/fifo/ramfc/gv11b/nvgpu-ramfc-gv11b.c
+++ b/userspace/units/fifo/ramfc/gv11b/nvgpu-ramfc-gv11b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -131,13 +131,6 @@ static int stub_ramfc_commit_userd(struct nvgpu_channel *ch)
 	return 0;
 }
 static void stub_ramin_init_subctx_pdb(struct gk20a *g,
 			struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
 			bool replayable, u32 max_subctx_count)
 {
 	global_count++;
 }
 #define F_RAMFC_SETUP_PRIVILEDGED_CH			BIT(0)
 #define F_RAMFC_SETUP_LAST				BIT(1)
@@ -156,7 +149,6 @@ int test_gv11b_ramfc_setup(struct unit_module *m, struct gk20a *g, void *args)
 	g->ops.ramin.alloc_size = gk20a_ramin_alloc_size;
 	g->ops.pbdma.acquire_val = stub_pbdma_acquire_val;
 	g->ops.ramin.init_subctx_pdb = stub_ramin_init_subctx_pdb;
 	g->ops.pbdma.get_gp_base = stub_pbdma_get_gp_base;
 	g->ops.pbdma.get_gp_base_hi = stub_pbdma_get_gp_base_hi;
 	g->ops.pbdma.get_signature = stub_pbdma_get_signature;
@@ -199,9 +191,9 @@ int test_gv11b_ramfc_setup(struct unit_module *m, struct gk20a *g, void *args)
 				ram_fc_config_w()) == 5U, goto done);
 		if (branches & F_RAMFC_SETUP_PRIVILEDGED_CH) {
-			unit_assert(global_count == 15U, goto done);
+			unit_assert(global_count == 14U, goto done);
 		} else {
-			unit_assert(global_count == 13U, goto done);
+			unit_assert(global_count == 12U, goto done);
 		}
 	}
--- a/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.c
+++ b/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -109,6 +109,24 @@ int test_gv11b_ramin_init_subctx_pdb(struct unit_module *m, struct gk20a *g,
 	u64 pdb_addr;
 	u32 max_subctx_count = ram_in_sc_page_dir_base_target__size_1_v();
 	u32 aperture = ram_in_sc_page_dir_base_target_sys_mem_ncoh_v();
 	unsigned long *valid_subctxs;
 	u32 *subctx_pdb_map;
 	subctx_pdb_map = nvgpu_kzalloc(g, max_subctx_count * sizeof(u32) * 4U);
 	if (subctx_pdb_map == NULL) {
 		nvgpu_err(g, "subctx_pdb_map alloc failed");
 		return UNIT_FAIL;
 	}
 	valid_subctxs = nvgpu_kzalloc(g,
 				BITS_TO_LONGS(max_subctx_count) *
 				sizeof(unsigned long));
 	if (valid_subctxs == NULL) {
 		nvgpu_err(g, "valid_subctxs bitmap alloc failed");
 		nvgpu_kfree(g, subctx_pdb_map);
 		subctx_pdb_map = NULL;
 		return UNIT_FAIL;
 	}
 	g->ops.ramin.alloc_size = gk20a_ramin_alloc_size;
@@ -146,8 +164,21 @@ int test_gv11b_ramin_init_subctx_pdb(struct unit_module *m, struct gk20a *g,
 					1U, 0U);
 		}
-		gv11b_ramin_init_subctx_pdb(g, &inst_block, &pdb_mem,
+		g->ops.ramin.init_subctx_pdb_map(g, subctx_pdb_map);
-								replayable, 64);
+		for (subctx_id = 0; subctx_id < max_subctx_count; subctx_id++) {
 			g->ops.ramin.set_subctx_pdb_info(g, subctx_id,
 				&pdb_mem, replayable, true, subctx_pdb_map);
 			nvgpu_set_bit(subctx_id, valid_subctxs);
 		}
 		/* Program subctx pdb info in the instance block */
 		g->ops.ramin.init_subctx_pdb(g, &inst_block, subctx_pdb_map);
 		/*
 		 * Program subctx pdb valid mask in the instance block.
 		 * Only subctx 0 is valid here.
 		 */
 		g->ops.ramin.init_subctx_mask(g, &inst_block, valid_subctxs);
 		for (subctx_id = 0; subctx_id < max_subctx_count; subctx_id++) {
 			addr_lo = ram_in_sc_page_dir_base_vol_w(subctx_id);
--- a/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.h
+++ b/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -63,7 +63,7 @@ int test_gv11b_ramin_set_gr_ptr(struct unit_module *m, struct gk20a *g,
 * Test Type: Feature
 *
 * Targets: gops_ramin.init_subctx_pdb, gv11b_ramin_init_subctx_pdb,
- *          gv11b_subctx_commit_pdb, gv11b_subctx_commit_valid_mask
+ *          gops_ramin.init_subctx_mask, gv11b_ramin_init_subctx_valid_mask
 *
 * Input: None
 *