gpu: nvgpu: implement VEID alloc/free

Implement the ioctls NVGPU_TSG_IOCTL_CREATE_SUBCONTEXT and NVGPU_TSG_IOCTL_DELETE_SUBCONTEXT. These will allocate and free the VEID numbers. Address space association with the VEIDs is verified to ensure that channels association with VEIDs and address space remains consistent. Bug 3677982 JIRA NVGPU-8681 Change-Id: I2d913baf61a6bdeec412c58270c0024b80ca15c6 Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2766765 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2022-08-23 07:30:40 +00:00
parent 9233886943
commit d1b28712b6
8 changed files with 492 additions and 2 deletions
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -222,6 +222,193 @@ struct nvgpu_tsg *nvgpu_tsg_get_from_id(struct gk20a *g, u32 tsgid)
 	return &f->tsg[tsgid];
 }

+/*
+ * Synchronous subcontext. Subcontext of this type may hold the
+ * graphics channel, and multiple copy engine and compute channels.
+ */
+#define NVGPU_TSG_SUBCONTEXT_TYPE_SYNC               (0x0U)
+
+/*
+ * Asynchronous subcontext. Asynchronous subcontext is for compute
+ * and copy engine channels only.
+ */
+#define NVGPU_TSG_SUBCONTEXT_TYPE_ASYNC              (0x1U)
+
+#define MAX_SYNC_SUBCONTEXTS	1U
+
+static int nvgpu_tsg_create_sync_subcontext(struct gk20a *g,
+				struct nvgpu_tsg *tsg, u32 *veid)
+{
+	if (tsg->sync_veid) {
+		nvgpu_err(g, "SYNC VEID not available");
+		return -ENOSPC;
+	}
+
+	tsg->sync_veid = true;
+
+	*veid = 0U;
+
+	return 0;
+}
+
+static int nvgpu_tsg_create_async_subcontext(struct gk20a *g,
+				struct nvgpu_tsg *tsg, u32 max_subctx_count,
+				u32 *veid)
+{
+	u32 max_async_subcontexts = max_subctx_count - MAX_SYNC_SUBCONTEXTS;
+	int err;
+	u32 idx;
+
+	idx = nvgpu_safe_cast_u64_to_u32(
+			find_first_zero_bit(tsg->async_veids,
+					    max_async_subcontexts));
+
+	if (idx == max_async_subcontexts) {
+		nvgpu_log_info(g, "ASYNC VEID not available");
+		err = nvgpu_tsg_create_sync_subcontext(g, tsg, veid);
+		if (err != 0) {
+			nvgpu_err(g, "ASYNC & SYNC VEIDs not available");
+			return err;
+		}
+	} else {
+		nvgpu_set_bit(idx, tsg->async_veids);
+		/* ASYNC VEIDs start from 1. */
+		*veid = idx + MAX_SYNC_SUBCONTEXTS;
+	}
+
+	return 0;
+}
+
+int nvgpu_tsg_create_subcontext(struct gk20a *g, struct nvgpu_tsg *tsg,
+				u32 type, struct vm_gk20a *vm,
+				u32 max_subctx_count, u32 *veid)
+{
+	int err;
+
+	nvgpu_mutex_acquire(&tsg->veid_alloc_lock);
+
+	if (type == NVGPU_TSG_SUBCONTEXT_TYPE_SYNC) {
+		err = nvgpu_tsg_create_sync_subcontext(g, tsg, veid);
+		if (err != 0) {
+			nvgpu_err(g, "Sync VEID not available");
+			nvgpu_mutex_release(&tsg->veid_alloc_lock);
+			return err;
+		}
+	}
+
+	if (type == NVGPU_TSG_SUBCONTEXT_TYPE_ASYNC) {
+		err = nvgpu_tsg_create_async_subcontext(g, tsg,
+						max_subctx_count, veid);
+		if (err != 0) {
+			nvgpu_err(g, "Async/Sync VEID not available");
+			nvgpu_mutex_release(&tsg->veid_alloc_lock);
+			return err;
+		}
+	}
+
+	if (tsg->subctx_vms[*veid] == NULL) {
+		tsg->subctx_vms[*veid] = vm;
+	}
+
+	nvgpu_mutex_release(&tsg->veid_alloc_lock);
+
+	nvgpu_log_info(g, "Allocated VEID %u", *veid);
+
+	return 0;
+}
+
+int nvgpu_tsg_delete_subcontext(struct gk20a *g, struct nvgpu_tsg *tsg,
+				u32 max_subctx_count, u32 veid)
+{
+	if (veid >= max_subctx_count) {
+		nvgpu_err(g, "Invalid VEID specified %u", veid);
+		return -EINVAL;
+	}
+
+	nvgpu_mutex_acquire(&tsg->veid_alloc_lock);
+
+	if (veid == 0U) {
+		if (!tsg->sync_veid) {
+			nvgpu_err(g, "VEID 0 not allocated");
+			nvgpu_mutex_release(&tsg->veid_alloc_lock);
+			return -EINVAL;
+		}
+
+		tsg->sync_veid = false;
+		tsg->subctx_vms[veid] = NULL;
+	} else {
+		if (!nvgpu_test_bit(veid - MAX_SYNC_SUBCONTEXTS, tsg->async_veids)) {
+			nvgpu_err(g, "VEID %u not allocated", veid);
+			nvgpu_mutex_release(&tsg->veid_alloc_lock);
+			return -EINVAL;
+		}
+		nvgpu_clear_bit(veid - MAX_SYNC_SUBCONTEXTS, tsg->async_veids);
+		tsg->subctx_vms[veid - MAX_SYNC_SUBCONTEXTS] = NULL;
+	}
+
+	nvgpu_mutex_release(&tsg->veid_alloc_lock);
+
+	nvgpu_log_info(g, "Freed VEID %u", veid);
+
+	return 0;
+}
+
+int nvgpu_tsg_create_sync_subcontext_internal(struct gk20a *g,
+			struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
+{
+	u32 subctx_id = 0U;
+	int err;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return 0;
+	}
+
+	nvgpu_mutex_acquire(&tsg->veid_alloc_lock);
+
+	subctx_id = ch->subctx_id;
+
+	/*
+	 * If this is first channel created without creating subcontext,
+	 * then this channel is using subcontext with VEID 0 by default.
+	 * Set subctx_vm and reserve the VEID0.
+	 */
+	if ((subctx_id == 0U) &&  (tsg->subctx_vms[0] == NULL)) {
+		err = nvgpu_tsg_create_sync_subcontext(g, tsg, &subctx_id);
+		if (err != 0) {
+			nvgpu_err(g, "SYNC VEID not available");
+			nvgpu_mutex_release(&tsg->veid_alloc_lock);
+			return err;
+		}
+
+		tsg->subctx_vms[0] = ch->vm;
+	}
+
+	nvgpu_mutex_release(&tsg->veid_alloc_lock);
+
+	return 0;
+}
+
+int nvgpu_tsg_validate_ch_subctx_vm(struct gk20a *g,
+			struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
+{
+	int err = 0;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return 0;
+	}
+
+	nvgpu_mutex_acquire(&tsg->veid_alloc_lock);
+
+	if (ch->vm != tsg->subctx_vms[ch->subctx_id]) {
+		nvgpu_err(g, "channel VM does not match with subcontext VM");
+		err = -EINVAL;
+	}
+
+	nvgpu_mutex_release(&tsg->veid_alloc_lock);
+
+	return err;
+}
+
 /*
 * API to mark channel as part of TSG
 *
@@ -654,6 +841,7 @@ static void nvgpu_tsg_destroy(struct nvgpu_tsg *tsg)
 	nvgpu_mutex_destroy(&tsg->event_id_list_lock);
 #endif
 	nvgpu_mutex_destroy(&tsg->ctx_init_lock);
+	nvgpu_mutex_destroy(&tsg->veid_alloc_lock);
 }

 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
@@ -708,6 +896,7 @@ static void nvgpu_tsg_init_support(struct gk20a *g, u32 tsgid)
 	nvgpu_init_list_node(&tsg->gr_ctx_mappings_list);
 	nvgpu_rwsem_init(&tsg->ch_list_lock);
 	nvgpu_mutex_init(&tsg->ctx_init_lock);
+	nvgpu_mutex_init(&tsg->veid_alloc_lock);

 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
 	nvgpu_init_list_node(&tsg->event_id_list);
@@ -1039,6 +1228,55 @@ static struct nvgpu_tsg *nvgpu_tsg_acquire_unused_tsg(struct nvgpu_fifo *f)
 	return tsg;
 }

+static int nvgpu_tsg_alloc_veid_state(struct gk20a *g, struct nvgpu_tsg *tsg)
+{
+	u32 max_async_subcontexts;
+	u32 max_subctx_count;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return 0;
+	}
+
+	tsg->sync_veid = false;
+
+	max_subctx_count = g->ops.gr.init.get_max_subctx_count();
+	max_async_subcontexts = max_subctx_count - MAX_SYNC_SUBCONTEXTS;
+
+	tsg->async_veids = nvgpu_kzalloc(g,
+				BITS_TO_LONGS(max_async_subcontexts) *
+				sizeof(unsigned long));
+	if (tsg->async_veids == NULL) {
+		nvgpu_err(g, "async veids bitmap alloc failed");
+		return -ENOMEM;
+	}
+
+	tsg->subctx_vms = nvgpu_kzalloc(g,
+				sizeof(struct vm_gk20a *) * max_subctx_count);
+	if (tsg->subctx_vms == NULL) {
+		nvgpu_err(g, "subctx vms alloc failed");
+		nvgpu_kfree(g, tsg->async_veids);
+		tsg->async_veids = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void nvgpu_tsg_free_veid_state(struct gk20a *g, struct nvgpu_tsg *tsg)
+{
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return;
+	}
+
+	nvgpu_kfree(g, tsg->subctx_vms);
+	tsg->subctx_vms = NULL;
+
+	nvgpu_kfree(g, tsg->async_veids);
+	tsg->async_veids = NULL;
+
+	tsg->sync_veid = false;
+}
+
 int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid)
 {
 	u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g);
@@ -1086,6 +1324,12 @@ int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid)
 		}
 	}

+	err = nvgpu_tsg_alloc_veid_state(g, tsg);
+	if (err != 0) {
+		nvgpu_err(g, "VEID sw state alloc failed %d", err);
+		goto clean_up;
+	}
+
 #ifdef CONFIG_NVGPU_SM_DIVERSITY
 	nvgpu_gr_ctx_set_sm_diversity_config(tsg->gr_ctx,
 		NVGPU_INVALID_SM_CONFIG_ID);
@@ -1146,6 +1390,8 @@ void nvgpu_tsg_release_common(struct gk20a *g, struct nvgpu_tsg *tsg)
 		g->ops.tsg.release(tsg);
 	}

+	nvgpu_tsg_free_veid_state(g, tsg);
+
 	nvgpu_free_gr_ctx_struct(g, tsg->gr_ctx);
 	tsg->gr_ctx = NULL;

--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c
@@ -436,6 +436,7 @@ static const struct gops_gr_init vgpu_ga10b_ops_gr_init = {
 	.commit_global_cb_manager = gp10b_gr_init_commit_global_cb_manager,
 	.get_ctx_attrib_cb_size = gp10b_gr_init_get_ctx_attrib_cb_size,
 	.commit_cbes_reserve = gv11b_gr_init_commit_cbes_reserve,
+	.get_max_subctx_count = gv11b_gr_init_get_max_subctx_count,
 	.detect_sm_arch = vgpu_gr_detect_sm_arch,
 	.get_supported__preemption_modes = gp10b_gr_init_get_supported_preemption_modes,
 	.get_default_preemption_modes = gp10b_gr_init_get_default_preemption_modes,
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
@@ -407,6 +407,7 @@ static const struct gops_gr_init vgpu_gv11b_ops_gr_init = {
 	.commit_global_cb_manager = gp10b_gr_init_commit_global_cb_manager,
 	.get_ctx_attrib_cb_size = gp10b_gr_init_get_ctx_attrib_cb_size,
 	.commit_cbes_reserve = gv11b_gr_init_commit_cbes_reserve,
+	.get_max_subctx_count = gv11b_gr_init_get_max_subctx_count,
 	.detect_sm_arch = vgpu_gr_detect_sm_arch,
 	.get_supported__preemption_modes = gp10b_gr_init_get_supported_preemption_modes,
 	.get_default_preemption_modes = gp10b_gr_init_get_default_preemption_modes,
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -127,6 +127,35 @@ struct nvgpu_tsg {
 	 */
 	struct nvgpu_list_node subctx_list;

+	/**
+	 * Mutex to synchronize VEID allocation and free. It is also used to
+	 * access/modify #subctx_vms.
+	 */
+	struct nvgpu_mutex veid_alloc_lock;
+
+	/*
+	 * Set to true if SYNC VEID is allocated to the userspace. When user
+	 * requests for a subcontext of type NVGPU_TSG_SUBCONTEXT_TYPE_SYNC
+	 * this is set to true if not already set. When user requests
+	 * for a subcontext of type NVGPU_TSG_SUBCONTEXT_TYPE_ASYNC this
+	 * is set to true if not already set and if all ASYNC veids
+	 * are allocated already.
+	 */
+	bool sync_veid;
+
+	/*
+	 * This is bitmap of ASYNC VEIDs allocated to the userspace. When user
+	 * requests for a subcontext of type NVGPU_TSG_SUBCONTEXT_TYPE_ASYNC
+	 * a bit is set from this bitmap if available.
+	 */
+	unsigned long *async_veids;
+
+	/*
+	 * VM associated with subcontexts. Currently only single VM
+	 * association is supported.
+	 */
+	struct vm_gk20a **subctx_vms;
+
 	/** List of channels bound to this TSG. */
 	struct nvgpu_list_node ch_list;
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
@@ -246,6 +275,88 @@ struct nvgpu_tsg {
 #endif
 };

+/**
+ * @brief Allocate subcontext VEID within a TSG.
+ *
+ * @param g [in]		The GPU driver struct.
+ * @param tsg [in]		Pointer to TSG struct.
+ * @param type [in]		Type of subcontext.
+ * @param vm [in]		Pointer to virtual memory struct.
+ * @param max_subctx_count [in] Maximum subcontexts supported for the
+ *                              gpu instance.
+ * @param veid [out]		VEID allocated.
+ *
+ * - Validate the VM. Since single VM supported for a TSG now, if
+ *   different VM is specified than already assigned to the TSG
+ *   then return -EINVAL.
+ * - If sync subcontext requested, allocate if available and set the
+ *   flag #sync_veid.
+ * - If async subcontext requested, allocate if available from the
+ *   bitmap #async_veids. If async VEIDs are not available and sync
+ *   VEID is available then allocate sync subcontext by setting the
+ *   flag #sync_veid.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ * @retval -EINVAL if invalid VM.
+ * @retval -ENOSPC if VEIDs not available.
+ */
+int nvgpu_tsg_create_subcontext(struct gk20a *g, struct nvgpu_tsg *tsg,
+				u32 type, struct vm_gk20a *vm,
+				u32 max_subctx_count, u32 *veid);
+
+/**
+ * @brief Free subcontext VEID from a TSG
+ *
+ * @param g [in]		The GPU driver struct.
+ * @param tsg [in]		Pointer to TSG struct.
+ * @param max_subctx_count [in] Maximum subcontexts supported for the
+ *                              gpu instance.
+ * @param veid [in]		VEID to be freed.
+ *
+ * - Validate #veid. If invalid, return -EINVAL.
+ * - Else free the VEID by resetting either #sync_veid or bit from #async_veids
+ *   if allocated. If not allocated, return -EINVAL.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ * @retval -EINVAL if veid is invalid.
+ */
+int nvgpu_tsg_delete_subcontext(struct gk20a *g, struct nvgpu_tsg *tsg,
+				u32 max_subctx_count, u32 veid);
+
+/**
+ * @brief Mark sync subctx created if channel is opened with implicit subctx.
+ *
+ * @param g [in]		The GPU driver struct.
+ * @param tsg [in]		Pointer to TSG struct.
+ * @param ch [in]		Pointer to Channel struct.
+ *
+ * - If this is first channel created without creating subcontext,
+ *   then this channel is using subcontext with VEID 0 by default.
+ *   Set subctx_vms and reserve the VEID0.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ */
+int nvgpu_tsg_create_sync_subcontext_internal(struct gk20a *g,
+			struct nvgpu_tsg *tsg, struct nvgpu_channel *ch);
+
+/**
+ * @brief Validate the VM associated with the Channel and TSG subcontexts.
+ *
+ * @param g [in]		The GPU driver struct.
+ * @param tsg [in]		Pointer to TSG struct.
+ * @param ch [in]		Pointer to Channel struct.
+ *
+ * - If this is first channel created without creating subcontext,
+ *   then this channel is using subcontext with VEID 0 by default.
+ *   Set subctx_vms and reserve the VEID0.
+ * - If channel VM does not match subcontext VM return -EINVAL.
+ *
+ * @return 0 in case of success, < 0 in case of failure.
+ * @retval -EINVAL if invalid VM.
+ */
+int nvgpu_tsg_validate_ch_subctx_vm(struct gk20a *g,
+			struct nvgpu_tsg *tsg, struct nvgpu_channel *ch);
+
 /**
 * @brief Initialize given TSG
 *
@@ -371,6 +482,7 @@ void nvgpu_tsg_disable(struct nvgpu_tsg *tsg);
 * @param ch [in]		Pointer to Channel struct.
 *
 * - Make sure channel is not already bound to a TSG.
+ * - Make sure channel VM matches the subcontext VM.
 * - Make sure channel is not part of any runlists.
 * - If channel had ASYNC subctx id, then set runqueue selector to 1.
 * - Set runlist id of TSG to channel's runlist_id if runlist_id of TSG
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -16,6 +16,7 @@
 #include <linux/cdev.h>
 #include <linux/uaccess.h>
 #include <linux/fs.h>
+#include <linux/file.h>
 #include <nvgpu/trace.h>

 #include <uapi/linux/nvgpu.h>
@@ -453,6 +454,31 @@ int gk20a_as_dev_release(struct inode *inode, struct file *filp)
 	return gk20a_as_release_share(as_share);
 }

+/*
+ * This returns the AS with a reference. The caller must
+ * nvgpu_vm_put() the ref back after use.
+ *
+ * NULL is returned if the AS was not found.
+ */
+struct vm_gk20a *nvgpu_vm_get_from_file(int fd)
+{
+	struct gk20a_as_share *as_share;
+	struct file *f = fget(fd);
+
+	if (!f)
+		return NULL;
+
+	if (f->f_op != &gk20a_as_ops) {
+		fput(f);
+		return NULL;
+	}
+
+	as_share = (struct gk20a_as_share *)f->private_data;
+	nvgpu_vm_get(as_share->vm);
+	fput(f);
+	return as_share->vm;
+}
+
 long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	int err = 0;
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.h
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.h
@@ -1,7 +1,7 @@
 /*
 * GK20A Address Spaces
 *
- * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -17,6 +17,7 @@

 struct inode;
 struct file;
+struct vm_gk20a;

 /* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and
 * num_maps */
@@ -25,6 +26,9 @@ struct file;
 /* struct file_operations driver interface */
 int gk20a_as_dev_open(struct inode *inode, struct file *filp);
 int gk20a_as_dev_release(struct inode *inode, struct file *filp);
+struct vm_gk20a *nvgpu_vm_get_from_file(int fd);
 long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);

+extern const struct file_operations gk20a_as_ops;
+
 #endif
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -78,7 +78,6 @@
 	(u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
 #define MHZ_TO_HZ(a) ((u64)a * MHZ)

-extern const struct file_operations gk20a_as_ops;
 extern const struct file_operations gk20a_tsg_ops;

 struct gk20a_ctrl_priv {
--- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
@@ -40,6 +40,7 @@

 #include "platform_gk20a.h"
 #include "ioctl_tsg.h"
+#include "ioctl_as.h"
 #include "ioctl_channel.h"
 #include "ioctl_nvs.h"
 #include "ioctl.h"
@@ -83,6 +84,18 @@ static int nvgpu_tsg_bind_channel_fd(struct nvgpu_tsg *tsg, int ch_fd)
 	if (!ch)
 		return -EINVAL;

+	err = nvgpu_tsg_create_sync_subcontext_internal(ch->g, tsg, ch);
+	if (err != 0) {
+		nvgpu_err(ch->g, "sync subctx created failed %d", err);
+		return err;
+	}
+
+	err = nvgpu_tsg_validate_ch_subctx_vm(ch->g, tsg, ch);
+	if (err != 0) {
+		nvgpu_err(ch->g, "channel/subctx VM mismatch");
+		return err;
+	}
+
 	err = nvgpu_tsg_bind_channel(tsg, ch);

 	nvgpu_channel_put(ch);
@@ -137,6 +150,18 @@ static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g,
 	if (ch->subctx_id > CHANNEL_INFO_VEID0)
 		ch->runqueue_sel = 1;

+	err = nvgpu_tsg_create_sync_subcontext_internal(g, tsg, ch);
+	if (err != 0) {
+		nvgpu_err(ch->g, "sync subctx created failed %d", err);
+		goto ch_put;
+	}
+
+	err = nvgpu_tsg_validate_ch_subctx_vm(g, tsg, ch);
+	if (err != 0) {
+		nvgpu_err(g, "channel/subctx VM mismatch");
+		goto ch_put;
+	}
+
 	err = nvgpu_tsg_bind_channel(tsg, ch);
 ch_put:
 	nvgpu_channel_put(ch);
@@ -749,6 +774,70 @@ static int nvgpu_gpu_ioctl_set_l2_sector_promotion(struct gk20a *g,
 	return err;
 }

+static int nvgpu_tsg_ioctl_create_subcontext(struct gk20a *g,
+		u32 gpu_instance_id, struct nvgpu_tsg *tsg,
+		struct nvgpu_tsg_create_subcontext_args *args)
+{
+	u32 max_subctx_count;
+	struct vm_gk20a *vm;
+	u32 veid;
+	int err;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return 0;
+	}
+
+	if (args->type != NVGPU_TSG_SUBCONTEXT_TYPE_SYNC &&
+	    args->type != NVGPU_TSG_SUBCONTEXT_TYPE_ASYNC) {
+		nvgpu_err(g, "Invalid subcontext type %u", args->type);
+		return -EINVAL;
+	}
+
+	vm = nvgpu_vm_get_from_file(args->as_fd);
+	if (vm == NULL) {
+		nvgpu_err(g, "Invalid VM (fd = %d) specified for subcontext",
+			  args->as_fd);
+		return -EINVAL;
+	}
+
+	max_subctx_count = nvgpu_grmgr_get_gpu_instance_max_veid_count(g, gpu_instance_id);
+
+	err = nvgpu_tsg_create_subcontext(g, tsg, args->type, vm,
+					  max_subctx_count, &veid);
+	if (err != 0) {
+		nvgpu_err(g, "Create subcontext failed %d", err);
+		nvgpu_vm_put(vm);
+		return err;
+	}
+
+	nvgpu_vm_put(vm);
+
+	args->veid = veid;
+
+	return 0;
+}
+
+static int nvgpu_tsg_ioctl_delete_subcontext(struct gk20a *g,
+		u32 gpu_instance_id, struct nvgpu_tsg *tsg,
+		struct nvgpu_tsg_delete_subcontext_args *args)
+{
+	u32 max_subctx_count;
+	int err;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
+		return 0;
+	}
+
+	max_subctx_count = nvgpu_grmgr_get_gpu_instance_max_veid_count(g, gpu_instance_id);
+
+	err = nvgpu_tsg_delete_subcontext(g, tsg, max_subctx_count, args->veid);
+	if (err != 0) {
+		nvgpu_err(g, "Delete subcontext failed %d", err);
+	}
+
+	return err;
+}
+
 long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
 			     unsigned long arg)
 {
@@ -952,6 +1041,18 @@ long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
 		break;
 		}

+	case NVGPU_TSG_IOCTL_CREATE_SUBCONTEXT:
+		{
+		err = nvgpu_tsg_ioctl_create_subcontext(g, gpu_instance_id, tsg,
+				(struct nvgpu_tsg_create_subcontext_args *)buf);
+		break;
+		}
+	case NVGPU_TSG_IOCTL_DELETE_SUBCONTEXT:
+		{
+		err = nvgpu_tsg_ioctl_delete_subcontext(g, gpu_instance_id, tsg,
+				(struct nvgpu_tsg_delete_subcontext_args *)buf);
+		break;
+		}
 	default:
 		nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
 			   cmd);