nvdla: kmd: add bypass exec support with submit

[1] NVDLA_SUBMIT_FLAGS_BYPASS_EXEC flag allows its clients (UMD or nvdla_kmd_sanity) to bypass execution for a submission. This flag is a property of submission and shall directly apply on all tasks within that submit. [2] With above flag set, it propagates information to firmware through task descriptor and firmware shall be responsible for bypassing the execution. Jira DLA-4443 Change-Id: I70000ef486905c812fec65f265a378a99844c10a Signed-off-by: Arvind M <am@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2544802 Reviewed-by: svcacv <svcacv@nvidia.com> Reviewed-by: Anup Mahindre <amahindre@nvidia.com> Reviewed-by: Amit Sharma (SW-TEGRA) <amisharma@nvidia.com> Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com> Reviewed-by: Sachin Nikam <snikam@nvidia.com> Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-23 17:55:05 +03:00 · 2021-06-14 17:49:07 +05:30
parent ae34c03649
commit 9bf2e99b45
5 changed files with 35 additions and 6 deletions
--- a/drivers/video/tegra/host/nvdla/dla_os_interface.h
+++ b/drivers/video/tegra/host/nvdla/dla_os_interface.h
@@ -25,7 +25,7 @@
 * @brief Jobs to DLA are submitted in form of task and uses @ref dla_task_descriptor
 * @{
 */
-#define DLA_DESCRIPTOR_VERSION	1U
+#define DLA_DESCRIPTOR_VERSION	2U
 /** @} */

 /**
@@ -166,6 +166,13 @@
 *                                               first entry has to be dla_roi_array_desc
 * @num_addresses: Number of addresses in address list
 * @status: Update task status here after completion
+ * @timeout: Timeout (in us), that starts to expire after task is
+ *           scheduled for execution.
+ * @flags: Flags to minorly tweak firmware behavior.
+ *      Supported flags:
+ *          - DLA_DESC_FLAGS_BYPASS_EXEC: skips execution
+ * @reserved1: reserved for future use.
+ * @reserved2: reserved for future use.
 */
 struct dla_task_descriptor {
 	/* Common parameters */
@@ -185,6 +192,11 @@ struct dla_task_descriptor {
 	uint16_t num_addresses;
 	uint16_t status;
 	uint64_t timeout;
+#define DLA_DESC_FLAGS_BYPASS_EXEC	(1U << 0)
+	uint16_t flags;
+
+	uint64_t reserved1;
+	uint64_t reserved2;
 } __attribute__ ((packed, aligned(4)));

 struct dla_action_list {
--- a/drivers/video/tegra/host/nvdla/nvdla.h
+++ b/drivers/video/tegra/host/nvdla/nvdla.h
@@ -3,7 +3,7 @@
 *
 * Tegra Graphics Host NVDLA
 *
- * Copyright (c) 2016-2019 NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2016-2021 NVIDIA Corporation.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -389,14 +389,15 @@ void nvdla_task_get(struct nvdla_task *task);
 /**
 * nvdla_task_alloc()	allocate task for a give queue
 *
- * @task		Pointer to nvdla_task
+ * @task		Pointer to nvdla_task.
+ * @bypass_exec		Task is marked to bypass its execution.
 *
 * Return		allocated task in success, otherwise pointer to err
 *
 * This function allocates task desc and fills up initial task descriptor as
 * task parameter detais
 */
-int nvdla_fill_task_desc(struct nvdla_task *task);
+int nvdla_fill_task_desc(struct nvdla_task *task, bool bypass_exec);

 /**
 * nvdla_send_postfences()	send back fences to UMD
--- a/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
+++ b/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
@@ -1029,6 +1029,7 @@ static int nvdla_submit(struct nvdla_private *priv, void *arg)
 	u32 num_tasks;
 	struct nvdla_task *task = NULL; // task under submission
 	int err = 0, i = 0;
+	bool bypass_exec;

 	if (!args || !priv)
 		return -EINVAL;
@@ -1052,6 +1053,9 @@ static int nvdla_submit(struct nvdla_private *priv, void *arg)

 	nvdla_dbg_info(pdev, "num of tasks [%d]", num_tasks);

+	bypass_exec = ((args->flags & NVDLA_SUBMIT_FLAGS_BYPASS_EXEC) != 0U);
+	nvdla_dbg_info(pdev, "submit flags [%u]", args->flags);
+
 	/* IOCTL copy descriptors*/
 	if (copy_from_user(local_tasks, (void __user *)user_tasks,
 			(num_tasks * sizeof(*user_tasks)))) {
@@ -1086,7 +1090,12 @@ static int nvdla_submit(struct nvdla_private *priv, void *arg)
 		nvdla_dbg_info(pdev, "dump task[%d] done", i + 1);

 		/* update task desc fields */
-		err = nvdla_fill_task_desc(task);
+		/**
+		 * Bypass execution of submission shall propagate downstream
+		 * as bypass execution of all tasks corresponding to that
+		 * submit.
+		 **/
+		err = nvdla_fill_task_desc(task, bypass_exec);
 		if (err) {
 			nvdla_dbg_err(pdev, "fail to fill task desc%d", i + 1);
 			goto fail_to_fill_task_desc;
--- a/drivers/video/tegra/host/nvdla/nvdla_queue.c
+++ b/drivers/video/tegra/host/nvdla/nvdla_queue.c
@@ -1223,7 +1223,7 @@ fail:
 	return err;
 }

-int nvdla_fill_task_desc(struct nvdla_task *task)
+int nvdla_fill_task_desc(struct nvdla_task *task, bool bypass_exec)
 {
 	int err;
 	struct dla_task_descriptor *task_desc;
@@ -1239,6 +1239,12 @@ int nvdla_fill_task_desc(struct nvdla_task *task)
 	task_desc->size = nvdla_get_task_desc_size();
 	task_desc->timeout = task->timeout;

+	task_desc->flags = 0U;
+	if (bypass_exec) {
+		task_desc->flags =
+			(task_desc->flags | DLA_DESC_FLAGS_BYPASS_EXEC);
+	}
+
 	/* update current task sequeue, make sure wrap around condition */
 	queue->sequence = queue->sequence + 1;
 	if (unlikely(queue->sequence >= (UINT_MAX - 1)))
--- a/include/uapi/linux/nvhost_nvdla_ioctl.h
+++ b/include/uapi/linux/nvhost_nvdla_ioctl.h
@@ -96,6 +96,7 @@ struct nvdla_submit_args {
 	__u16 num_tasks;
 #define MAX_TASKS_PER_SUBMIT		16
 #define NVDLA_SUBMIT_FLAGS_ATOMIC	(1 << 0)
+#define NVDLA_SUBMIT_FLAGS_BYPASS_EXEC	(1 << 1)
 	__u16 flags;
 	__u32 version;
 };