gpu: nvgpu: implement get and revoke share token ioctls

Add share token list to gk20a_ctrl_priv. Implement GET_SHARE_TOKEN and REVOKE_SHARE_TOKEN ioctls. Revoke tokens while closing the TSG for all active devices. Bug 3677982 JIRA NVGPU-8681 Change-Id: I74455c21d881d5a0d381729fd695239722599980 Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2792081 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: Scott Long <scottl@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2022-10-11 15:38:55 +05:30
parent 31a4701931
commit 96f675595c
5 changed files with 377 additions and 1 deletions
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -1266,6 +1266,10 @@ static int nvgpu_tsg_alloc_veid_state(struct gk20a *g, struct nvgpu_tsg *tsg)
 		return -ENOMEM;
 	}

+#ifdef CONFIG_NVGPU_TSG_SHARING
+	tsg->share_token_count = 1U;
+#endif
+
 	return 0;
 }

--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -176,8 +176,16 @@ struct nvgpu_tsg {
 	 * share token).
 	 */
 	struct nvgpu_list_node ctrl_devices_list;
+
 	/**
-	 * Mutex used to access/modify #ctrl_devices_list.
+	 * Share tokens issued for this TSG. Maximum share tokens is limited
+	 * to number of maximum subcontexts - 1.
+	 */
+	u32 share_token_count;
+
+	/**
+	 * Mutex used to access/modify #ctrl_devices_list
+	 * and #share_token_count.
 	 */
 	struct nvgpu_mutex tsg_share_lock;
 #endif
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -59,6 +59,7 @@
 #include <nvgpu/nvgpu_init.h>
 #include <nvgpu/grmgr.h>
 #include <nvgpu/string.h>
+#include <nvgpu/kmem.h>

 #include "ioctl_ctrl.h"
 #include "ioctl_dbg.h"
@@ -80,6 +81,15 @@

 extern const struct file_operations gk20a_tsg_ops;

+#ifdef CONFIG_NVGPU_TSG_SHARING
+struct nvgpu_tsg_share_token_node {
+	u64 token;
+	u64 target_device_instance_id;
+	struct nvgpu_tsg *tsg;
+	struct nvgpu_list_node ctrl_entry;
+};
+#endif
+
 struct gk20a_ctrl_priv {
 	struct device *dev;
 	struct gk20a *g;
@@ -87,6 +97,9 @@ struct gk20a_ctrl_priv {
 	struct nvgpu_cdev *cdev;
 #ifdef CONFIG_NVGPU_TSG_SHARING
 	u64 device_instance_id;
+	u64 tsg_share_token;
+	struct nvgpu_list_node tsg_share_tokens_list;
+	struct nvgpu_mutex tokens_lock;
 #endif

 	struct nvgpu_list_node list;
@@ -159,6 +172,9 @@ int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
 	nvgpu_mutex_release(&g->ctrl_dev_id_lock);

 	nvgpu_log_info(g, "opened ctrl device: %llx", priv->device_instance_id);
+
+	nvgpu_init_list_node(&priv->tsg_share_tokens_list);
+	nvgpu_mutex_init(&priv->tokens_lock);
 #endif

 	if (!g->sw_ready) {
@@ -184,6 +200,7 @@ free_ref:

 	return err;
 }
+
 int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
 {
 	struct gk20a_ctrl_priv *priv = filp->private_data;
@@ -2804,4 +2821,182 @@ u64 nvgpu_gpu_get_device_instance_id(struct gk20a_ctrl_priv *priv)
 {
 	return priv ? priv->device_instance_id : 0ULL;
 }
+
+static struct gk20a_ctrl_priv *nvgpu_gpu_get_ctrl_priv(
+					struct gk20a *g,
+					u64 device_instance_id)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_ctrl_priv *priv;
+
+	nvgpu_mutex_acquire(&l->ctrl_privs_lock);
+	nvgpu_list_for_each_entry(priv, &l->ctrl_privs,
+			gk20a_ctrl_priv, list) {
+		if (priv->device_instance_id == device_instance_id) {
+			nvgpu_mutex_release(&l->ctrl_privs_lock);
+			return priv;
+		}
+	}
+	nvgpu_mutex_release(&l->ctrl_privs_lock);
+
+	return NULL;
+}
+
+int nvgpu_gpu_get_share_token(struct gk20a *g,
+			      u64 source_device_instance_id,
+			      u64 target_device_instance_id,
+			      struct nvgpu_tsg *tsg,
+			      u64 *share_token)
+{
+	struct nvgpu_tsg_share_token_node *token_node;
+	struct gk20a_ctrl_priv *ctrl_priv;
+
+	nvgpu_log_fn(g, " ");
+
+	ctrl_priv = nvgpu_gpu_get_ctrl_priv(g, target_device_instance_id);
+	if (ctrl_priv == NULL) {
+		nvgpu_err(g, "Invalid target device instance id");
+		return -EINVAL;
+	}
+
+	ctrl_priv = nvgpu_gpu_get_ctrl_priv(g, source_device_instance_id);
+	if (ctrl_priv == NULL) {
+		nvgpu_err(g, "Invalid source device instance id");
+		return -EINVAL;
+	}
+
+	token_node = (struct nvgpu_tsg_share_token_node *) nvgpu_kzalloc(g,
+				sizeof(struct nvgpu_tsg_share_token_node));
+	if (token_node == NULL) {
+		nvgpu_err(g, "token node allocation failed");
+		return -ENOMEM;
+	}
+
+	nvgpu_init_list_node(&token_node->ctrl_entry);
+	token_node->target_device_instance_id = target_device_instance_id;
+	token_node->tsg = tsg;
+
+	nvgpu_mutex_acquire(&ctrl_priv->tokens_lock);
+
+	nvgpu_assert(ctrl_priv->tsg_share_token < U64_MAX);
+	ctrl_priv->tsg_share_token += 1ULL;
+	token_node->token = ctrl_priv->tsg_share_token;
+
+	nvgpu_list_add_tail(&token_node->ctrl_entry,
+			    &ctrl_priv->tsg_share_tokens_list);
+
+	nvgpu_mutex_release(&ctrl_priv->tokens_lock);
+
+	*share_token = token_node->token;
+
+	nvgpu_log_info(g, "Share token issued.");
+	nvgpu_log_info(g, "Source: %llx Target: %llx Token: %llx", source_device_instance_id,
+		       target_device_instance_id, token_node->token);
+
+	nvgpu_log_fn(g, "done");
+
+	return 0;
+}
+
+static inline struct nvgpu_tsg_share_token_node *
+	nvgpu_tsg_share_token_node_from_ctrl_entry(struct nvgpu_list_node *node)
+{
+	return (struct nvgpu_tsg_share_token_node *)
+	   ((uintptr_t)node - offsetof(struct nvgpu_tsg_share_token_node,
+				       ctrl_entry));
+}
+
+int nvgpu_gpu_revoke_share_token(struct gk20a *g,
+				 u64 source_device_instance_id,
+				 u64 target_device_instance_id,
+				 u64 share_token,
+				 struct nvgpu_tsg *tsg)
+{
+	struct nvgpu_tsg_share_token_node *token_node, *tmp;
+	struct gk20a_ctrl_priv *ctrl_priv;
+	bool revoke = false;
+
+	nvgpu_log_fn(g, " ");
+
+	ctrl_priv = nvgpu_gpu_get_ctrl_priv(g, source_device_instance_id);
+	if (ctrl_priv == NULL) {
+		nvgpu_err(g, "Invalid source device instance id");
+		return -EINVAL;
+	}
+
+	nvgpu_mutex_acquire(&ctrl_priv->tokens_lock);
+
+	nvgpu_list_for_each_entry_safe(token_node, tmp,
+				&ctrl_priv->tsg_share_tokens_list,
+				nvgpu_tsg_share_token_node, ctrl_entry) {
+		if ((token_node->token == share_token) &&
+		    (token_node->target_device_instance_id ==
+					target_device_instance_id) &&
+		    (token_node->tsg == tsg)) {
+			/*
+			 * Found the token with specified parameters.
+			 * Now, revoke it.
+			 */
+			revoke = true;
+			nvgpu_log_info(g, "Share token revoked.");
+			nvgpu_log_info(g, "Source: %llx Target: %llx Token: %llx",
+				       source_device_instance_id,
+				       target_device_instance_id,
+				       share_token);
+			nvgpu_list_del(&token_node->ctrl_entry);
+			nvgpu_kfree(g, token_node);
+			break;
+		}
+	}
+
+	nvgpu_mutex_release(&ctrl_priv->tokens_lock);
+
+	nvgpu_log_fn(g, "done");
+
+	return revoke ? 0 : -EINVAL;
+}
+
+int nvgpu_gpu_tsg_revoke_share_tokens(struct gk20a *g,
+				      u64 source_device_instance_id,
+				      struct nvgpu_tsg *tsg,
+				      u32 *out_count)
+{
+	struct nvgpu_tsg_share_token_node *token_node, *temp;
+	struct gk20a_ctrl_priv *ctrl_priv;
+	u32 revoked_count = 0U;
+
+	nvgpu_log_fn(g, " ");
+
+	*out_count = 0U;
+
+	ctrl_priv = nvgpu_gpu_get_ctrl_priv(g, source_device_instance_id);
+	if (ctrl_priv == NULL) {
+		nvgpu_err(g, "source device instance id is not available");
+		return -EINVAL;
+	}
+
+	nvgpu_mutex_acquire(&ctrl_priv->tokens_lock);
+
+	nvgpu_list_for_each_entry_safe(token_node, temp,
+				       &ctrl_priv->tsg_share_tokens_list,
+				       nvgpu_tsg_share_token_node, ctrl_entry) {
+		if (token_node->tsg == tsg) {
+			/*
+			 * Found the token with specified parameters.
+			 * Now, revoke it.
+			 */
+			nvgpu_list_del(&token_node->ctrl_entry);
+			nvgpu_kfree(g, token_node);
+			revoked_count++;
+		}
+	}
+
+	nvgpu_mutex_release(&ctrl_priv->tokens_lock);
+
+	*out_count = revoked_count;
+
+	nvgpu_log_fn(g, "done");
+
+	return 0;
+}
 #endif
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h
@@ -28,6 +28,20 @@ void nvgpu_restore_usermode_for_poweron(struct gk20a *g);

 #ifdef CONFIG_NVGPU_TSG_SHARING
 u64 nvgpu_gpu_get_device_instance_id(struct gk20a_ctrl_priv *priv);
+int nvgpu_gpu_get_share_token(struct gk20a *g,
+			      u64 source_device_instance_id,
+			      u64 target_device_instance_id,
+			      struct nvgpu_tsg *tsg,
+			      u64 *share_token);
+int nvgpu_gpu_revoke_share_token(struct gk20a *g,
+				 u64 source_device_instance_id,
+				 u64 target_device_instance_id,
+				 u64 share_token,
+				 struct nvgpu_tsg *tsg);
+int nvgpu_gpu_tsg_revoke_share_tokens(struct gk20a *g,
+				      u64 source_device_instance_id,
+				      struct nvgpu_tsg *tsg,
+				      u32 *out_count);
 #endif

 #endif
--- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
@@ -508,6 +508,124 @@ static int nvgpu_tsg_add_ctrl_dev_inst_id(struct nvgpu_tsg *tsg,

 	return 0;
 }
+
+static bool nvgpu_tsg_is_authorized_ctrl_device_id(struct nvgpu_tsg *tsg,
+					      u64 device_instance_id)
+{
+	struct nvgpu_tsg_ctrl_dev_node *node;
+	bool authorized = false;
+
+	nvgpu_mutex_acquire(&tsg->tsg_share_lock);
+
+	nvgpu_list_for_each_entry(node, &tsg->ctrl_devices_list,
+				  nvgpu_tsg_ctrl_dev_node, tsg_entry) {
+		if (node->device_instance_id == device_instance_id) {
+			authorized = true;
+			break;
+		}
+	}
+
+	nvgpu_mutex_release(&tsg->tsg_share_lock);
+
+	return authorized;
+}
+
+static int nvgpu_tsg_ioctl_get_share_token(struct gk20a *g,
+		struct tsg_private *priv,
+		struct nvgpu_tsg_get_share_token_args *args)
+{
+	u64 source_device_instance_id = args->source_device_instance_id;
+	u64 target_device_instance_id = args->target_device_instance_id;
+	struct nvgpu_tsg *tsg = priv->tsg;
+	u32 max_subctx_count;
+	u32 gpu_instance_id;
+	u64 share_token = 0;
+	int err;
+
+	if ((source_device_instance_id == 0UL) ||
+	    (target_device_instance_id == 0UL)) {
+		nvgpu_err(g, "Invalid source/target device instance id");
+		return -EINVAL;
+	}
+
+	if (!nvgpu_tsg_is_authorized_ctrl_device_id(tsg,
+					       source_device_instance_id)) {
+		nvgpu_err(g, "Unauthorized source device instance id");
+		return -EINVAL;
+	}
+
+	gpu_instance_id = nvgpu_get_gpu_instance_id_from_cdev(g, priv->cdev);
+	nvgpu_assert(gpu_instance_id < g->mig.num_gpu_instances);
+
+	max_subctx_count = nvgpu_grmgr_get_gpu_instance_max_veid_count(g, gpu_instance_id);
+
+	nvgpu_mutex_acquire(&tsg->tsg_share_lock);
+
+	if (tsg->share_token_count == max_subctx_count) {
+		nvgpu_err(g, "Maximum share tokens are in use");
+		nvgpu_mutex_release(&tsg->tsg_share_lock);
+		return -ENOSPC;
+	}
+
+	err = nvgpu_gpu_get_share_token(g,
+					source_device_instance_id,
+					target_device_instance_id,
+					tsg, &share_token);
+	if (err != 0) {
+		nvgpu_err(g, "Share token allocation failed %d", err);
+		nvgpu_mutex_release(&tsg->tsg_share_lock);
+		return err;
+	}
+
+	args->share_token = share_token;
+	tsg->share_token_count++;
+
+	nvgpu_mutex_release(&tsg->tsg_share_lock);
+
+	return 0;
+}
+
+static int nvgpu_tsg_ioctl_revoke_share_token(struct gk20a *g,
+		struct nvgpu_tsg *tsg,
+		struct nvgpu_tsg_revoke_share_token_args *args)
+{
+	u64 source_device_instance_id = args->source_device_instance_id;
+	u64 target_device_instance_id = args->target_device_instance_id;
+	u64 share_token = args->share_token;
+	int err;
+
+	if ((source_device_instance_id == 0UL) ||
+	    (target_device_instance_id == 0UL) ||
+	    (share_token == 0UL)) {
+		nvgpu_err(g, "Invalid source/target device instance id or"
+			     " share token");
+		return -EINVAL;
+	}
+
+	if (!nvgpu_tsg_is_authorized_ctrl_device_id(tsg,
+					       source_device_instance_id)) {
+		nvgpu_err(g, "Unauthorized source device instance id");
+		return -EINVAL;
+	}
+
+	nvgpu_mutex_acquire(&tsg->tsg_share_lock);
+
+	err = nvgpu_gpu_revoke_share_token(g,
+					source_device_instance_id,
+					target_device_instance_id,
+					share_token, tsg);
+	if (err != 0) {
+		nvgpu_err(g, "Share token revocation failed %d", err);
+		nvgpu_mutex_release(&tsg->tsg_share_lock);
+		return err;
+	}
+
+	tsg->share_token_count--;
+
+	nvgpu_mutex_release(&tsg->tsg_share_lock);
+
+	return 0;
+}
 #endif

 int nvgpu_ioctl_tsg_open(struct gk20a *g, struct gk20a_ctrl_priv *ctrl_priv,
@@ -602,6 +720,10 @@ void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref)
 	struct nvgpu_tsg *tsg = container_of(ref, struct nvgpu_tsg, refcount);
 	struct gk20a *g = tsg->g;

+#ifdef CONFIG_NVGPU_TSG_SHARING
+	nvgpu_assert(tsg->share_token_count <= 1U);
+#endif
+
 	gk20a_sched_ctrl_tsg_removed(g, tsg);

 	nvgpu_tsg_release(ref);
@@ -612,6 +734,10 @@ int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp)
 {
 	struct tsg_private *priv = filp->private_data;
 	struct nvgpu_tsg *tsg;
+#ifdef CONFIG_NVGPU_TSG_SHARING
+	u32 count;
+	int err;
+#endif

 	if (!priv) {
 		/* open failed, never got a tsg for this file */
@@ -621,6 +747,21 @@ int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp)
 	tsg = priv->tsg;

 #ifdef CONFIG_NVGPU_TSG_SHARING
+	nvgpu_mutex_acquire(&tsg->tsg_share_lock);
+
+	err = nvgpu_gpu_tsg_revoke_share_tokens(tsg->g,
+				nvgpu_gpu_get_device_instance_id(priv->ctrl_priv),
+				tsg, &count);
+	if (err != 0) {
+		nvgpu_err(tsg->g, "revoke token(%llu) failed %d",
+			  nvgpu_gpu_get_device_instance_id(priv->ctrl_priv),
+			  err);
+	}
+
+	tsg->share_token_count -= count;
+
+	nvgpu_mutex_release(&tsg->tsg_share_lock);
+
 	nvgpu_tsg_remove_ctrl_dev_inst_id(tsg, priv->ctrl_priv);
 #endif

@@ -1129,6 +1270,20 @@ long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
 				(struct nvgpu_tsg_delete_subcontext_args *)buf);
 		break;
 		}
+#ifdef CONFIG_NVGPU_TSG_SHARING
+	case NVGPU_TSG_IOCTL_GET_SHARE_TOKEN:
+		{
+		err = nvgpu_tsg_ioctl_get_share_token(g, priv,
+					(struct nvgpu_tsg_get_share_token_args *)buf);
+		break;
+		}
+	case NVGPU_TSG_IOCTL_REVOKE_SHARE_TOKEN:
+		{
+		err = nvgpu_tsg_ioctl_revoke_share_token(g, tsg,
+					(struct nvgpu_tsg_revoke_share_token_args *)buf);
+		break;
+		}
+#endif
 	default:
 		nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
 			   cmd);