diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index 24ac73fe3..31719f365 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -222,6 +222,193 @@ struct nvgpu_tsg *nvgpu_tsg_get_from_id(struct gk20a *g, u32 tsgid)
 	return &f->tsg[tsgid];
 }
 
+/*
+ * Synchronous subcontext. Subcontext of this type may hold the
+ * graphics channel, and multiple copy engine and compute channels.
+ */
+#define NVGPU_TSG_SUBCONTEXT_TYPE_SYNC               (0x0U)
+
+/*
+ * Asynchronous subcontext. Asynchronous subcontext is for compute
+ * and copy engine channels only.
+ */
+#define NVGPU_TSG_SUBCONTEXT_TYPE_ASYNC              (0x1U)
+
+#define MAX_SYNC_SUBCONTEXTS	1U
+
+static int nvgpu_tsg_create_sync_subcontext(struct gk20a *g,
+				struct nvgpu_tsg *tsg, u32 *veid)
+{
+	if (tsg->sync_veid) {
+		nvgpu_err(g, "SYNC VEID not available");
+		return -ENOSPC;
+	}
+
+	tsg->sync_veid = true;
+
+	*veid = 0U;
+
+	return 0;
+}
+
+static int nvgpu_tsg_create_async_subcontext(struct gk20a *g,
+				struct nvgpu_tsg *tsg, u32 max_subctx_count,
+				u32 *veid)
+{
+	u32 max_async_subcontexts = max_subctx_count - MAX_SYNC_SUBCONTEXTS;
+	int err;
+	u32 idx;
+
+	idx = nvgpu_safe_cast_u64_to_u32(
+			find_first_zero_bit(tsg->async_veids,
+					    max_async_subcontexts));
+
+	if (idx == max_async_subcontexts) {
+		nvgpu_log_info(g, "ASYNC VEID not available");
+		err = nvgpu_tsg_create_sync_subcontext(g, tsg, veid);
+		if (err != 0) {
+			nvgpu_err(g, "ASYNC & SYNC VEIDs not available");
+			return err;
+		}
+	} else {
+		nvgpu_set_bit(idx, tsg->async_veids);
+		/* ASYNC VEIDs start from 1. */
+		*veid = idx + MAX_SYNC_SUBCONTEXTS;
+	}
+
+	return 0;
+}
+
+int nvgpu_tsg_create_subcontext(struct gk20a *g, struct nvgpu_tsg *tsg,
+				u32 type, struct vm_gk20a *vm,
+				u32 max_subctx_count, u32 *veid)
+{
+	int err;
+
+	nvgpu_mutex_acquire(&tsg->veid_alloc_lock);
+
+	if (type == NVGPU_TSG_SUBCONTEXT_TYPE_SYNC) {
+		err = nvgpu_tsg_create_sync_subcontext(g, tsg, veid);
+		if (err != 0) {
+			nvgpu_err(g, "Sync VEID not available");
+			nvgpu_mutex_release(&tsg->veid_alloc_lock);
+			return err;
+		}
+	}
+
+	if (type == NVGPU_TSG_SUBCONTEXT_TYPE_ASYNC) {
+		err = nvgpu_tsg_create_async_subcontext(g, tsg,
+						max_subctx_count, veid);
+		if (err != 0) {
+			nvgpu_err(g, "Async/Sync VEID not available");
+			nvgpu_mutex_release(&tsg->veid_alloc_lock);
+			return err;
+		}
+	}
+
+	if (tsg->subctx_vms[*veid] == NULL) {
+		tsg->subctx_vms[*veid] = vm;
+	}
+
+	nvgpu_mutex_release(&tsg->veid_alloc_lock);
+
+	nvgpu_log_info(g, "Allocated VEID %u", *veid);
+
+	return 0;
+}
+
+int nvgpu_tsg_delete_subcontext(struct gk20a *g, struct nvgpu_tsg *tsg,
+				u32 max_subctx_count, u32 veid)
+{
+	if (veid >= max_subctx_count) {
+		nvgpu_err(g, "Invalid VEID specified %u", veid);
+		return -EINVAL;
+	}
+
+	nvgpu_mutex_acquire(&tsg->veid_alloc_lock);
+
+	if (veid == 0U) {
+		if (!tsg->sync_veid) {
+			nvgpu_err(g, "VEID 0 not allocated");
+			nvgpu_mutex_release(&tsg->veid_alloc_lock);
+			return -EINVAL;
+		}
+
+		tsg->sync_veid = false;
+		tsg->subctx_vms[veid] = NULL;
+	} else {
+		if (!nvgpu_test_bit(veid - MAX_SYNC_SUBCONTEXTS, tsg->async_veids)) {
+			nvgpu_err(g, "VEID %u not allocated", veid);
+			nvgpu_mutex_release(&tsg->veid_alloc_lock);
+			return -EINVAL;
+		}
+		nvgpu_clear_bit(veid - MAX_SYNC_SUBCONTEXTS, tsg->async_veids);
+		tsg->subctx_vms[veid - MAX_SYNC_SUBCONTEXTS] = NULL;
+	}
+
+	nvgpu_mutex_release(&tsg->veid_alloc_lock);
+
+	nvgpu_log_info(g, "Freed VEID %u", veid);
+
+	return 0;
+}
+
+int nvgpu_tsg_create_sync_subcontext_internal(struct gk20a *g,
+			struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
+{
+	u32 subctx_id = 0U;
+	int err;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return 0;
+	}
+
+	nvgpu_mutex_acquire(&tsg->veid_alloc_lock);
+
+	subctx_id = ch->subctx_id;
+
+	/*
+	 * If this is first channel created without creating subcontext,
+	 * then this channel is using subcontext with VEID 0 by default.
+	 * Set subctx_vm and reserve the VEID0.
+	 */
+	if ((subctx_id == 0U) &&  (tsg->subctx_vms[0] == NULL)) {
+		err = nvgpu_tsg_create_sync_subcontext(g, tsg, &subctx_id);
+		if (err != 0) {
+			nvgpu_err(g, "SYNC VEID not available");
+			nvgpu_mutex_release(&tsg->veid_alloc_lock);
+			return err;
+		}
+
+		tsg->subctx_vms[0] = ch->vm;
+	}
+
+	nvgpu_mutex_release(&tsg->veid_alloc_lock);
+
+	return 0;
+}
+
+int nvgpu_tsg_validate_ch_subctx_vm(struct gk20a *g,
+			struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
+{
+	int err = 0;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return 0;
+	}
+
+	nvgpu_mutex_acquire(&tsg->veid_alloc_lock);
+
+	if (ch->vm != tsg->subctx_vms[ch->subctx_id]) {
+		nvgpu_err(g, "channel VM does not match with subcontext VM");
+		err = -EINVAL;
+	}
+
+	nvgpu_mutex_release(&tsg->veid_alloc_lock);
+
+	return err;
+}
+
 /*
  * API to mark channel as part of TSG
  *
@@ -654,6 +841,7 @@ static void nvgpu_tsg_destroy(struct nvgpu_tsg *tsg)
 	nvgpu_mutex_destroy(&tsg->event_id_list_lock);
 #endif
 	nvgpu_mutex_destroy(&tsg->ctx_init_lock);
+	nvgpu_mutex_destroy(&tsg->veid_alloc_lock);
 }
 
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
@@ -708,6 +896,7 @@ static void nvgpu_tsg_init_support(struct gk20a *g, u32 tsgid)
 	nvgpu_init_list_node(&tsg->gr_ctx_mappings_list);
 	nvgpu_rwsem_init(&tsg->ch_list_lock);
 	nvgpu_mutex_init(&tsg->ctx_init_lock);
+	nvgpu_mutex_init(&tsg->veid_alloc_lock);
 
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
 	nvgpu_init_list_node(&tsg->event_id_list);
@@ -1039,6 +1228,55 @@ static struct nvgpu_tsg *nvgpu_tsg_acquire_unused_tsg(struct nvgpu_fifo *f)
 	return tsg;
 }
 
+static int nvgpu_tsg_alloc_veid_state(struct gk20a *g, struct nvgpu_tsg *tsg)
+{
+	u32 max_async_subcontexts;
+	u32 max_subctx_count;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return 0;
+	}
+
+	tsg->sync_veid = false;
+
+	max_subctx_count = g->ops.gr.init.get_max_subctx_count();
+	max_async_subcontexts = max_subctx_count - MAX_SYNC_SUBCONTEXTS;
+
+	tsg->async_veids = nvgpu_kzalloc(g,
+				BITS_TO_LONGS(max_async_subcontexts) *
+				sizeof(unsigned long));
+	if (tsg->async_veids == NULL) {
+		nvgpu_err(g, "async veids bitmap alloc failed");
+		return -ENOMEM;
+	}
+
+	tsg->subctx_vms = nvgpu_kzalloc(g,
+				sizeof(struct vm_gk20a *) * max_subctx_count);
+	if (tsg->subctx_vms == NULL) {
+		nvgpu_err(g, "subctx vms alloc failed");
+		nvgpu_kfree(g, tsg->async_veids);
+		tsg->async_veids = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void nvgpu_tsg_free_veid_state(struct gk20a *g, struct nvgpu_tsg *tsg)
+{
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return;
+	}
+
+	nvgpu_kfree(g, tsg->subctx_vms);
+	tsg->subctx_vms = NULL;
+
+	nvgpu_kfree(g, tsg->async_veids);
+	tsg->async_veids = NULL;
+
+	tsg->sync_veid = false;
+}
+
 int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid)
 {
 	u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g);
@@ -1086,6 +1324,12 @@ int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid)
 		}
 	}
 
+	err = nvgpu_tsg_alloc_veid_state(g, tsg);
+	if (err != 0) {
+		nvgpu_err(g, "VEID sw state alloc failed %d", err);
+		goto clean_up;
+	}
+
 #ifdef CONFIG_NVGPU_SM_DIVERSITY
 	nvgpu_gr_ctx_set_sm_diversity_config(tsg->gr_ctx,
 		NVGPU_INVALID_SM_CONFIG_ID);
@@ -1146,6 +1390,8 @@ void nvgpu_tsg_release_common(struct gk20a *g, struct nvgpu_tsg *tsg)
 		g->ops.tsg.release(tsg);
 	}
 
+	nvgpu_tsg_free_veid_state(g, tsg);
+
 	nvgpu_free_gr_ctx_struct(g, tsg->gr_ctx);
 	tsg->gr_ctx = NULL;
 
diff --git a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c
index 38c608e5d..a490e95e0 100644
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c
@@ -436,6 +436,7 @@ static const struct gops_gr_init vgpu_ga10b_ops_gr_init = {
 	.commit_global_cb_manager = gp10b_gr_init_commit_global_cb_manager,
 	.get_ctx_attrib_cb_size = gp10b_gr_init_get_ctx_attrib_cb_size,
 	.commit_cbes_reserve = gv11b_gr_init_commit_cbes_reserve,
+	.get_max_subctx_count = gv11b_gr_init_get_max_subctx_count,
 	.detect_sm_arch = vgpu_gr_detect_sm_arch,
 	.get_supported__preemption_modes = gp10b_gr_init_get_supported_preemption_modes,
 	.get_default_preemption_modes = gp10b_gr_init_get_default_preemption_modes,
diff --git a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
index c68c788b9..4697b48aa 100644
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
@@ -407,6 +407,7 @@ static const struct gops_gr_init vgpu_gv11b_ops_gr_init = {
 	.commit_global_cb_manager = gp10b_gr_init_commit_global_cb_manager,
 	.get_ctx_attrib_cb_size = gp10b_gr_init_get_ctx_attrib_cb_size,
 	.commit_cbes_reserve = gv11b_gr_init_commit_cbes_reserve,
+	.get_max_subctx_count = gv11b_gr_init_get_max_subctx_count,
 	.detect_sm_arch = vgpu_gr_detect_sm_arch,
 	.get_supported__preemption_modes = gp10b_gr_init_get_supported_preemption_modes,
 	.get_default_preemption_modes = gp10b_gr_init_get_default_preemption_modes,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
index 2d3afb8cf..69c7d5140 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -127,6 +127,35 @@ struct nvgpu_tsg {
 	 */
 	struct nvgpu_list_node subctx_list;
 
+	/**
+	 * Mutex to synchronize VEID allocation and free. It is also used to
+	 * access/modify #subctx_vms.
+	 */
+	struct nvgpu_mutex veid_alloc_lock;
+
+	/*
+	 * Set to true if SYNC VEID is allocated to the userspace. When user
+	 * requests for a subcontext of type NVGPU_TSG_SUBCONTEXT_TYPE_SYNC
+	 * this is set to true if not already set. When user requests
+	 * for a subcontext of type NVGPU_TSG_SUBCONTEXT_TYPE_ASYNC this
+	 * is set to true if not already set and if all ASYNC veids
+	 * are allocated already.
+	 */
+	bool sync_veid;
+
+	/*
+	 * This is bitmap of ASYNC VEIDs allocated to the userspace. When user
+	 * requests for a subcontext of type NVGPU_TSG_SUBCONTEXT_TYPE_ASYNC
+	 * a bit is set from this bitmap if available.
+	 */
+	unsigned long *async_veids;
+
+	/*
+	 * VM associated with subcontexts. Currently only single VM
+	 * association is supported.
+	 */
+	struct vm_gk20a **subctx_vms;
+
 	/** List of channels bound to this TSG. */
 	struct nvgpu_list_node ch_list;
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
@@ -246,6 +275,88 @@ struct nvgpu_tsg {
 #endif
 };
 
+/**
+ * @brief Allocate subcontext VEID within a TSG.
+ *
+ * @param g [in]		The GPU driver struct.
+ * @param tsg [in]		Pointer to TSG struct.
+ * @param type [in]		Type of subcontext.
+ * @param vm [in]		Pointer to virtual memory struct.
+ * @param max_subctx_count [in] Maximum subcontexts supported for the
+ *                              gpu instance.
+ * @param veid [out]		VEID allocated.
+ *
+ * - Validate the VM. Since single VM supported for a TSG now, if
+ *   different VM is specified than already assigned to the TSG
+ *   then return -EINVAL.
+ * - If sync subcontext requested, allocate if available and set the
+ *   flag #sync_veid.
+ * - If async subcontext requested, allocate if available from the
+ *   bitmap #async_veids. If async VEIDs are not available and sync
+ *   VEID is available then allocate sync subcontext by setting the
+ *   flag #sync_veid.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ * @retval -EINVAL if invalid VM.
+ * @retval -ENOSPC if VEIDs not available.
+ */
+int nvgpu_tsg_create_subcontext(struct gk20a *g, struct nvgpu_tsg *tsg,
+				u32 type, struct vm_gk20a *vm,
+				u32 max_subctx_count, u32 *veid);
+
+/**
+ * @brief Free subcontext VEID from a TSG
+ *
+ * @param g [in]		The GPU driver struct.
+ * @param tsg [in]		Pointer to TSG struct.
+ * @param max_subctx_count [in] Maximum subcontexts supported for the
+ *                              gpu instance.
+ * @param veid [in]		VEID to be freed.
+ *
+ * - Validate #veid. If invalid, return -EINVAL.
+ * - Else free the VEID by resetting either #sync_veid or bit from #async_veids
+ *   if allocated. If not allocated, return -EINVAL.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ * @retval -EINVAL if veid is invalid.
+ */
+int nvgpu_tsg_delete_subcontext(struct gk20a *g, struct nvgpu_tsg *tsg,
+				u32 max_subctx_count, u32 veid);
+
+/**
+ * @brief Mark sync subctx created if channel is opened with implicit subctx.
+ *
+ * @param g [in]		The GPU driver struct.
+ * @param tsg [in]		Pointer to TSG struct.
+ * @param ch [in]		Pointer to Channel struct.
+ *
+ * - If this is first channel created without creating subcontext,
+ *   then this channel is using subcontext with VEID 0 by default.
+ *   Set subctx_vms and reserve the VEID0.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ */
+int nvgpu_tsg_create_sync_subcontext_internal(struct gk20a *g,
+			struct nvgpu_tsg *tsg, struct nvgpu_channel *ch);
+
+/**
+ * @brief Validate the VM associated with the Channel and TSG subcontexts.
+ *
+ * @param g [in]		The GPU driver struct.
+ * @param tsg [in]		Pointer to TSG struct.
+ * @param ch [in]		Pointer to Channel struct.
+ *
+ * - If this is first channel created without creating subcontext,
+ *   then this channel is using subcontext with VEID 0 by default.
+ *   Set subctx_vms and reserve the VEID0.
+ * - If channel VM does not match subcontext VM return -EINVAL.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ * @retval -EINVAL if invalid VM.
+ */
+int nvgpu_tsg_validate_ch_subctx_vm(struct gk20a *g,
+			struct nvgpu_tsg *tsg, struct nvgpu_channel *ch);
+
 /**
  * @brief Initialize given TSG
  *
@@ -371,6 +482,7 @@ void nvgpu_tsg_disable(struct nvgpu_tsg *tsg);
  * @param ch [in]		Pointer to Channel struct.
  *
  * - Make sure channel is not already bound to a TSG.
+ * - Make sure channel VM matches the subcontext VM.
  * - Make sure channel is not part of any runlists.
  * - If channel had ASYNC subctx id, then set runqueue selector to 1.
  * - Set runlist id of TSG to channel's runlist_id if runlist_id of TSG
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
index ab770d06c..b98bab052 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -16,6 +16,7 @@
 #include <linux/cdev.h>
 #include <linux/uaccess.h>
 #include <linux/fs.h>
+#include <linux/file.h>
 #include <nvgpu/trace.h>
 
 #include <uapi/linux/nvgpu.h>
@@ -453,6 +454,31 @@ int gk20a_as_dev_release(struct inode *inode, struct file *filp)
 	return gk20a_as_release_share(as_share);
 }
 
+/*
+ * This returns the AS with a reference. The caller must
+ * nvgpu_vm_put() the ref back after use.
+ *
+ * NULL is returned if the AS was not found.
+ */
+struct vm_gk20a *nvgpu_vm_get_from_file(int fd)
+{
+	struct gk20a_as_share *as_share;
+	struct file *f = fget(fd);
+
+	if (!f)
+		return NULL;
+
+	if (f->f_op != &gk20a_as_ops) {
+		fput(f);
+		return NULL;
+	}
+
+	as_share = (struct gk20a_as_share *)f->private_data;
+	nvgpu_vm_get(as_share->vm);
+	fput(f);
+	return as_share->vm;
+}
+
 long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	int err = 0;
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.h b/drivers/gpu/nvgpu/os/linux/ioctl_as.h
index b3de3782f..7df30404a 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.h
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.h
@@ -1,7 +1,7 @@
 /*
  * GK20A Address Spaces
  *
- * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -17,6 +17,7 @@
 
 struct inode;
 struct file;
+struct vm_gk20a;
 
 /* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and
  * num_maps */
@@ -25,6 +26,9 @@ struct file;
 /* struct file_operations driver interface */
 int gk20a_as_dev_open(struct inode *inode, struct file *filp);
 int gk20a_as_dev_release(struct inode *inode, struct file *filp);
+struct vm_gk20a *nvgpu_vm_get_from_file(int fd);
 long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 
+extern const struct file_operations gk20a_as_ops;
+
 #endif
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
index 4a8fc0359..06961fb27 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -78,7 +78,6 @@
 	(u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
 #define MHZ_TO_HZ(a) ((u64)a * MHZ)
 
-extern const struct file_operations gk20a_as_ops;
 extern const struct file_operations gk20a_tsg_ops;
 
 struct gk20a_ctrl_priv {
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
index ca06bf517..8085eb359 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
@@ -40,6 +40,7 @@
 
 #include "platform_gk20a.h"
 #include "ioctl_tsg.h"
+#include "ioctl_as.h"
 #include "ioctl_channel.h"
 #include "ioctl_nvs.h"
 #include "ioctl.h"
@@ -83,6 +84,18 @@ static int nvgpu_tsg_bind_channel_fd(struct nvgpu_tsg *tsg, int ch_fd)
 	if (!ch)
 		return -EINVAL;
 
+	err = nvgpu_tsg_create_sync_subcontext_internal(ch->g, tsg, ch);
+	if (err != 0) {
+		nvgpu_err(ch->g, "sync subctx created failed %d", err);
+		return err;
+	}
+
+	err = nvgpu_tsg_validate_ch_subctx_vm(ch->g, tsg, ch);
+	if (err != 0) {
+		nvgpu_err(ch->g, "channel/subctx VM mismatch");
+		return err;
+	}
+
 	err = nvgpu_tsg_bind_channel(tsg, ch);
 
 	nvgpu_channel_put(ch);
@@ -137,6 +150,18 @@ static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g,
 	if (ch->subctx_id > CHANNEL_INFO_VEID0)
 		ch->runqueue_sel = 1;
 
+	err = nvgpu_tsg_create_sync_subcontext_internal(g, tsg, ch);
+	if (err != 0) {
+		nvgpu_err(ch->g, "sync subctx created failed %d", err);
+		goto ch_put;
+	}
+
+	err = nvgpu_tsg_validate_ch_subctx_vm(g, tsg, ch);
+	if (err != 0) {
+		nvgpu_err(g, "channel/subctx VM mismatch");
+		goto ch_put;
+	}
+
 	err = nvgpu_tsg_bind_channel(tsg, ch);
 ch_put:
 	nvgpu_channel_put(ch);
@@ -749,6 +774,70 @@ static int nvgpu_gpu_ioctl_set_l2_sector_promotion(struct gk20a *g,
 	return err;
 }
 
+static int nvgpu_tsg_ioctl_create_subcontext(struct gk20a *g,
+		u32 gpu_instance_id, struct nvgpu_tsg *tsg,
+		struct nvgpu_tsg_create_subcontext_args *args)
+{
+	u32 max_subctx_count;
+	struct vm_gk20a *vm;
+	u32 veid;
+	int err;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return 0;
+	}
+
+	if (args->type != NVGPU_TSG_SUBCONTEXT_TYPE_SYNC &&
+	    args->type != NVGPU_TSG_SUBCONTEXT_TYPE_ASYNC) {
+		nvgpu_err(g, "Invalid subcontext type %u", args->type);
+		return -EINVAL;
+	}
+
+	vm = nvgpu_vm_get_from_file(args->as_fd);
+	if (vm == NULL) {
+		nvgpu_err(g, "Invalid VM (fd = %d) specified for subcontext",
+			  args->as_fd);
+		return -EINVAL;
+	}
+
+	max_subctx_count = nvgpu_grmgr_get_gpu_instance_max_veid_count(g, gpu_instance_id);
+
+	err = nvgpu_tsg_create_subcontext(g, tsg, args->type, vm,
+					  max_subctx_count, &veid);
+	if (err != 0) {
+		nvgpu_err(g, "Create subcontext failed %d", err);
+		nvgpu_vm_put(vm);
+		return err;
+	}
+
+	nvgpu_vm_put(vm);
+
+	args->veid = veid;
+
+	return 0;
+}
+
+static int nvgpu_tsg_ioctl_delete_subcontext(struct gk20a *g,
+		u32 gpu_instance_id, struct nvgpu_tsg *tsg,
+		struct nvgpu_tsg_delete_subcontext_args *args)
+{
+	u32 max_subctx_count;
+	int err;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return 0;
+	}
+
+	max_subctx_count = nvgpu_grmgr_get_gpu_instance_max_veid_count(g, gpu_instance_id);
+
+	err = nvgpu_tsg_delete_subcontext(g, tsg, max_subctx_count, args->veid);
+	if (err != 0) {
+		nvgpu_err(g, "Delete subcontext failed %d", err);
+	}
+
+	return err;
+}
+
 long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
 			     unsigned long arg)
 {
@@ -952,6 +1041,18 @@ long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
 		break;
 		}
 
+	case NVGPU_TSG_IOCTL_CREATE_SUBCONTEXT:
+		{
+		err = nvgpu_tsg_ioctl_create_subcontext(g, gpu_instance_id, tsg,
+				(struct nvgpu_tsg_create_subcontext_args *)buf);
+		break;
+		}
+	case NVGPU_TSG_IOCTL_DELETE_SUBCONTEXT:
+		{
+		err = nvgpu_tsg_ioctl_delete_subcontext(g, gpu_instance_id, tsg,
+				(struct nvgpu_tsg_delete_subcontext_args *)buf);
+		break;
+		}
 	default:
 		nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
 			   cmd);