From 0f2266c47a2b4b62b734cbd276ad1a409ed4f4bd Mon Sep 17 00:00:00 2001
From: Prashant Gaikwad <pgaikwad@nvidia.com>
Date: Wed, 10 Jan 2018 14:09:22 +0530
Subject: [PATCH] drivers: video: tegra: host: nvdla: add emu task submit

Emulator tasks execute on CCPLEX in DLA UMD thread but
these tasks need synchronization between other tasks
runing on DLA engine or some other engines.

Synchronization between DLA and other engines is through
sync point as NvMedia layer does not support semaphore.

This requires assigning and incrementing sync point
value for emulator tasks too.

This change adds an IOCTL to increment sync point
max value and report it back to UMD so that DLA UMD
can communicate it to other engines.

Jira DLA-677

Change-Id: I1c4ce66868e8ab7315f37c0a6b62e1f5335a1c3a
Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1643572
GVS: Gerrit_Virtual_Submit
Reviewed-by: Mitch Harwell <mharwell@nvidia.com>
Tested-by: Mitch Harwell <mharwell@nvidia.com>
Reviewed-by: Ken Adams <kadams@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/video/tegra/host/nvdla/nvdla.h       |  23 ++++
 drivers/video/tegra/host/nvdla/nvdla_ioctl.c | 138 +++++++++++++++++++
 drivers/video/tegra/host/nvdla/nvdla_queue.c |  57 ++++++++
 3 files changed, 218 insertions(+)

diff --git a/drivers/video/tegra/host/nvdla/nvdla.h b/drivers/video/tegra/host/nvdla/nvdla.h
index 45a564fb..48e37ca5 100644
--- a/drivers/video/tegra/host/nvdla/nvdla.h
+++ b/drivers/video/tegra/host/nvdla/nvdla.h
@@ -207,6 +207,26 @@ struct nvdla_device {
 	u32 *gcov_dump_va;
 };
 
+/**
+ * struct nvdla_emu_task:	structure for emulator task info
+ *
+ * @queue		Queue in which task submitted
+ * @sp			pointer to syncpt
+ * @postfences		pointer to post fences
+ * @num_postfences	Number of postfences in task
+ * @fence		Fence tracking for current task
+ * @fence_counter	Counter used to track fence value
+ *
+ */
+struct nvdla_emu_task {
+	struct nvhost_queue *queue;
+	struct nvhost_syncpt *sp;
+	struct nvdla_fence postfences[MAX_NUM_NVDLA_POSTFENCES];
+	u32 num_postfences;
+	u32 fence;
+	u32 fence_counter;
+};
+
 /**
  * struct nvdla_task:	structure for task info
  *
@@ -372,4 +392,7 @@ size_t nvdla_get_max_task_size(void);
 int nvdla_alloc_gcov_region(struct platform_device *pdev);
 int nvdla_free_gcov_region(struct platform_device *pdev, bool update_region);
 
+int nvdla_emulator_submit(struct nvhost_queue *queue,
+				struct nvdla_emu_task *task);
+
 #endif /* End of __NVHOST_NVDLA_H__ */
diff --git a/drivers/video/tegra/host/nvdla/nvdla_ioctl.c b/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
index 7e04ec2b..a2e0996e 100644
--- a/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
+++ b/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
@@ -388,6 +388,61 @@ fail:
 	return err;
 }
 
+int nvdla_send_emu_postfences(struct nvdla_emu_task *task,
+			struct nvdla_ioctl_emu_submit_task *user_task)
+{
+	int err = 0, i;
+	struct platform_device *dla_pdev = task->queue->pool->pdev;
+	struct platform_device *host_pdev =
+				to_platform_device(dla_pdev->dev.parent);
+	struct nvdla_fence __user *postfences =
+		(struct nvdla_fence __user *)(uintptr_t)user_task->postfences;
+	char fence_name[32];
+
+	nvdla_dbg_fn(dla_pdev, "sending post fences");
+
+	for (i = 0; i < task->num_postfences; i++) {
+		if (task->postfences[i].type == NVDLA_FENCE_TYPE_SYNC_FD) {
+			struct nvhost_ctrl_sync_fence_info info;
+
+			info.id = task->postfences[i].syncpoint_index;
+			info.thresh = task->postfences[i].syncpoint_value;
+
+			nvdla_dbg_info(dla_pdev,
+					"creating post sync fd [%d]:[%d]\n",
+					info.id, info.thresh);
+
+			/* create fence name format example: nvdla0_1_fence */
+			snprintf(fence_name, sizeof(fence_name),
+				"%s_%d_fence", dev_name(&dla_pdev->dev),
+				task->postfences[i].syncpoint_index);
+
+			err = nvhost_sync_create_fence_fd(host_pdev,
+				&info, 1, fence_name,
+				&task->postfences[i].sync_fd);
+
+			if (err) {
+				nvdla_dbg_err(dla_pdev,
+					"fail to create postfence syncfd\n");
+				goto fail;
+			}
+		}
+	}
+
+	nvdla_dbg_fn(dla_pdev, "copy postfences to user");
+	/* send post fences */
+	if (copy_to_user(postfences, task->postfences,
+		(task->num_postfences * sizeof(struct nvdla_fence)))) {
+		err = -EFAULT;
+		nvdla_dbg_err(dla_pdev, "failed to send postfences");
+		goto fail;
+	}
+	nvdla_dbg_info(dla_pdev, "postfences sent");
+
+fail:
+	return err;
+}
+
 int nvdla_send_postfences(struct nvdla_task *task,
 			struct nvdla_ioctl_submit_task *user_task)
 {
@@ -623,6 +678,86 @@ static void nvdla_dump_task(struct nvdla_task *task)
 	}
 }
 
+static int nvdla_emu_task_submit(struct nvdla_private *priv, void *arg)
+{
+	struct nvdla_submit_args *args =
+			(struct nvdla_submit_args *)arg;
+	struct nvdla_ioctl_emu_submit_task __user *user_tasks;
+	struct nvdla_ioctl_emu_submit_task local_tasks[MAX_TASKS_PER_SUBMIT];
+	struct platform_device *pdev;
+	struct nvhost_queue *queue;
+	struct nvdla_emu_task task;
+	int err = 0, i = 0;
+	u32 num_tasks;
+
+	if (!args || !priv)
+		return -EINVAL;
+
+	pdev = priv->pdev;
+	queue = priv->queue;
+	if (!(queue && pdev))
+		return -EINVAL;
+
+	nvdla_dbg_fn(pdev, "inside emulator task submit");
+
+	task.queue = queue;
+	task.sp = &nvhost_get_host(pdev)->syncpt;
+
+	user_tasks = (struct nvdla_ioctl_emu_submit_task __user *)
+			(uintptr_t)args->tasks;
+	if (!user_tasks)
+		return -EINVAL;
+
+	num_tasks = args->num_tasks;
+	if (num_tasks == 0 || num_tasks > MAX_TASKS_PER_SUBMIT)
+		return -EINVAL;
+
+	nvdla_dbg_info(pdev, "num of emulator tasks [%d]", num_tasks);
+
+	/* IOCTL copy descriptors*/
+	if (copy_from_user(local_tasks, (void __user *)user_tasks,
+			(num_tasks * sizeof(*user_tasks)))) {
+		err = -EFAULT;
+		goto exit;
+	}
+	nvdla_dbg_info(pdev, "copy of user tasks done");
+
+	for (i = 0; i < num_tasks; i++) {
+
+		nvdla_dbg_info(pdev, "submit [%d]th task", i + 1);
+
+		task.num_postfences = local_tasks[i].num_postfences;
+
+		/* get post fences */
+		if (copy_from_user(task.postfences,
+			(void __user *)local_tasks[i].postfences,
+			(task.num_postfences * sizeof(struct nvdla_fence)))) {
+			err = -EFAULT;
+			nvdla_dbg_err(pdev, "failed to copy postfences");
+			goto exit;
+		}
+
+		err = nvdla_emulator_submit(queue, &task);
+		if (err) {
+			nvdla_dbg_err(pdev, "fail to submit task: %d", i + 1);
+			goto exit;
+		}
+		nvdla_dbg_info(pdev, "task[%d] submitted", i + 1);
+
+		/* send fences to user */
+		err = nvdla_send_emu_postfences(&task, local_tasks + i);
+		if (err) {
+			nvdla_dbg_err(pdev, "fail to send postfence%d", i + 1);
+			goto exit;
+		}
+		nvdla_dbg_info(pdev, "postfences of task[%d] sent", i + 1);
+	}
+	nvdla_dbg_fn(pdev, "Emulator task submitted, done!");
+
+exit:
+	return 0;
+}
+
 static int nvdla_submit(struct nvdla_private *priv, void *arg)
 {
 	struct nvdla_submit_args *args =
@@ -776,6 +911,9 @@ static long nvdla_ioctl(struct file *file, unsigned int cmd,
 	case NVDLA_IOCTL_GET_QUEUE_STATUS:
 		err = nvdla_get_q_status(priv, (void *)buf);
 		break;
+	case NVDLA_IOCTL_EMU_TASK_SUBMIT:
+		err = nvdla_emu_task_submit(priv, (void *)buf);
+		break;
 	default:
 		nvdla_dbg_err(pdev, "invalid IOCTL CMD");
 		err = -ENOIOCTLCMD;
diff --git a/drivers/video/tegra/host/nvdla/nvdla_queue.c b/drivers/video/tegra/host/nvdla/nvdla_queue.c
index 6ea4ee5c..71af69b2 100644
--- a/drivers/video/tegra/host/nvdla/nvdla_queue.c
+++ b/drivers/video/tegra/host/nvdla/nvdla_queue.c
@@ -942,6 +942,63 @@ done:
 	return 0;
 }
 
+int nvdla_emulator_submit(struct nvhost_queue *queue, struct nvdla_emu_task *task)
+{
+	int i;
+	uint32_t counter;
+	struct platform_device *pdev = queue->pool->pdev;
+
+	/* reset fence counter */
+	task->fence_counter = 0;
+	/* fill all postactions */
+	for (i = 0; i < task->num_postfences; i++) {
+
+		/* update action */
+		switch (task->postfences[i].type) {
+		case NVDLA_FENCE_TYPE_SYNCPT:
+		case NVDLA_FENCE_TYPE_SYNC_FD: {
+			task->fence_counter = task->fence_counter + 1;
+			break;
+		}
+		default:
+			nvdla_dbg_err(pdev, "Invalid postfence sync type[%d]",
+				task->postfences[i].type);
+			return -EINVAL;
+		}
+	}
+
+	/* get fence from nvhost */
+	task->fence = nvhost_syncpt_incr_max(task->sp, queue->syncpt_id,
+						task->fence_counter);
+
+	nvdla_dbg_fn(pdev, "syncpt[%d] fence[%d] task[%p] fence_counter[%u]",
+				queue->syncpt_id, task->fence,
+				task, task->fence_counter);
+
+	/* get syncpoint reference */
+	nvhost_syncpt_get_ref(task->sp, queue->syncpt_id);
+
+	/* Update postfences for all */
+	counter = task->fence_counter - 1;
+	for (i = 0; i < task->num_postfences; i++) {
+		if ((task->postfences[i].type == NVDLA_FENCE_TYPE_SYNCPT) ||
+		    (task->postfences[i].type == NVDLA_FENCE_TYPE_SYNC_FD)) {
+			task->postfences[i].syncpoint_index =
+					queue->syncpt_id;
+			task->postfences[i].syncpoint_value =
+					task->fence - counter;
+
+			nvdla_dbg_info(pdev, "[%d] postfence set[%u]:[%u]",
+				i, task->postfences[i].syncpoint_index,
+				task->postfences[i].syncpoint_value);
+
+			counter = counter - 1;
+		}
+	}
+
+	return 0;
+}
+
 /* Queue management API */
 static int nvdla_queue_submit(struct nvhost_queue *queue, void *in_task)
 {