gpu: nvgpu: add ctrl device instance ID

In order to share the TSG across different devices securely, device instance IDs are to be exchanged for endpoint identification. Add device instance ID field to gk20a_ctrl_priv which is generated from gk20a level device instance id value. Share this ID to userspace via gpu characteristics. Bug 3677982 JIRA NVGPU-8681 Change-Id: I79d92a81c02272c52e24f5b12c452c8993137037 Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2792079 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Scott Long <scottl@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2022-09-26 17:46:58 +05:30
parent 41c874a2d9
commit 6e2b592ab9
8 changed files with 60 additions and 3 deletions
--- a/drivers/gpu/nvgpu/Makefile.linux.configs
+++ b/drivers/gpu/nvgpu/Makefile.linux.configs
@@ -204,6 +204,8 @@ CONFIG_NVGPU_FSI_ERR_INJECTION := y
 endif
 endif
 CONFIG_NVGPU_TSG_SHARING := y
 ifeq ($(CONFIG_GK20A_PMU),y)
 ccflags-y += -DCONFIG_GK20A_PMU
 endif
@@ -321,3 +323,6 @@ endif
 ifeq ($(CONFIG_NVGPU_FSI_ERR_INJECTION),y)
 ccflags-y += -DCONFIG_NVGPU_FSI_ERR_INJECTION
 endif
 ifeq ($(CONFIG_NVGPU_TSG_SHARING),y)
 ccflags-y += -DCONFIG_NVGPU_TSG_SHARING
 endif
--- a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c
@@ -117,6 +117,9 @@ void vgpu_remove_support_common(struct gk20a *g)
 	nvgpu_clk_arb_cleanup_arbiter(g);
 	nvgpu_mutex_destroy(&g->clk_arb_enable_lock);
 #ifdef CONFIG_NVGPU_TSG_SHARING
 	nvgpu_mutex_destroy(&g->ctrl_dev_id_lock);
 #endif
 	nvgpu_mutex_destroy(&priv->vgpu_clk_get_freq_lock);
 	nvgpu_kfree(g, priv->freqs);
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -520,6 +520,22 @@ struct gk20a {
 #ifdef CONFIG_NVGPU_SIM
 	struct sim_nvgpu *sim;
 #endif
 #ifdef CONFIG_NVGPU_TSG_SHARING
 	/**
 	 * Used to assign unique ID to ctrl device opened by the application.
 	 * This is used to identify the target for TSG sharing. This is
 	 * guaranteed to be unique for every device created for CTRL
 	 * device node over nvgpu lifespan.
 	 */
 	u64 ctrl_device_instance_id;
 	/**
 	 * Mutex to protect access to ctrl_device_instance_id.
 	 */
 	struct nvgpu_mutex ctrl_dev_id_lock;
 #endif
 	struct nvgpu_device_list *devs;
 	/** Top level struct maintaining MM unit's software state. */
 	struct mm_gk20a mm;
--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -96,6 +96,9 @@ static void nvgpu_init_vars(struct gk20a *g)
 #if defined(CONFIG_NVGPU_CYCLESTATS)
 	nvgpu_mutex_init(&g->cs_lock);
 #endif
 #ifdef CONFIG_NVGPU_TSG_SHARING
 	nvgpu_mutex_init(&g->ctrl_dev_id_lock);
 #endif
 	/* Init the clock req count to 0 */
 	nvgpu_atomic_set(&g->clk_arb_global_nr, 0);
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -85,6 +85,9 @@ struct gk20a_ctrl_priv {
 	struct gk20a *g;
 	struct nvgpu_clk_session *clk_session;
 	struct nvgpu_cdev *cdev;
 #ifdef CONFIG_NVGPU_TSG_SHARING
 	u64 device_instance_id;
 #endif
 	struct nvgpu_list_node list;
 	struct {
@@ -146,6 +149,18 @@ int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
 	 */
 	priv->g = g;
 #ifdef CONFIG_NVGPU_TSG_SHARING
 	nvgpu_mutex_acquire(&g->ctrl_dev_id_lock);
 	nvgpu_assert(g->ctrl_device_instance_id < U64_MAX);
 	g->ctrl_device_instance_id += 1ULL;
 	priv->device_instance_id = g->ctrl_device_instance_id;
 	nvgpu_mutex_release(&g->ctrl_dev_id_lock);
 	nvgpu_log_info(g, "opened ctrl device: %llx", priv->device_instance_id);
 #endif
 	if (!g->sw_ready) {
 		err = gk20a_busy(g);
 		if (err)
@@ -361,7 +376,7 @@ static void nvgpu_set_preemption_mode_flags(struct gk20a *g,
 static long gk20a_ctrl_ioctl_gpu_characteristics(
 		struct gk20a *g, u32 gpu_instance_id, struct nvgpu_gr_config *gr_config,
-		struct nvgpu_gpu_get_characteristics *request)
+		struct gk20a_ctrl_priv *priv, struct nvgpu_gpu_get_characteristics *request)
 {
 	struct nvgpu_gpu_characteristics gpu;
 	long err = 0;
@@ -377,6 +392,10 @@ static long gk20a_ctrl_ioctl_gpu_characteristics(
 	(void) memset(&gpu, 0, sizeof(gpu));
 	gpu_instance = &g->mig.gpu_instance[gpu_instance_id];
 #ifdef CONFIG_NVGPU_TSG_SHARING
 	gpu.device_instance_id = priv->device_instance_id;
 #endif
 	gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
 	gpu.on_board_video_memory_size = 0; /* integrated GPU */
@@ -2416,7 +2435,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 #endif /* CONFIG_NVGPU_GRAPHICS */
 	case NVGPU_GPU_IOCTL_GET_CHARACTERISTICS:
 		err = gk20a_ctrl_ioctl_gpu_characteristics(g, gpu_instance_id, gr_config,
-			(struct nvgpu_gpu_get_characteristics *)buf);
+			priv, (struct nvgpu_gpu_get_characteristics *)buf);
 		break;
 	case NVGPU_GPU_IOCTL_PREPARE_COMPRESSIBLE_READ:
 		err = gk20a_ctrl_prepare_compressible_read(g,
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -1082,6 +1082,9 @@ void gk20a_remove_support(struct gk20a *g)
 	nvgpu_fbp_remove_support(g);
 #ifdef CONFIG_NVGPU_TSG_SHARING
 	nvgpu_mutex_destroy(&g->ctrl_dev_id_lock);
 #endif
 	nvgpu_remove_usermode_support(g);
 	nvgpu_free_enabled_flags(g);
--- a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c
@@ -148,7 +148,9 @@ static int vgpu_init_support(struct platform_device *pdev)
 #if defined(CONFIG_NVGPU_CYCLESTATS)
 	nvgpu_mutex_init(&g->cs_lock);
 #endif
-
+#ifdef CONFIG_NVGPU_TSG_SHARING
 	nvgpu_mutex_init(&g->ctrl_dev_id_lock);
 #endif
 	nvgpu_init_list_node(&g->profiler_objects);
 #ifdef CONFIG_NVGPU_DEBUGGER
--- a/include/uapi/linux/nvgpu-ctrl.h
+++ b/include/uapi/linux/nvgpu-ctrl.h
@@ -343,6 +343,12 @@ struct nvgpu_gpu_characteristics {
 	__u32 max_dbg_tsg_timeslice;
 	__u32 reserved5;
 	/*
 	 * Instance id of the opened ctrl node. Unique number over the
 	 * nvgpu driver's lifetime (probe to unload).
 	 */
 	__u64 device_instance_id;
 	/* Notes:
 	   - This struct can be safely appended with new fields. However, always
 	     keep the structure size multiple of 8 and make sure that the binary