From bca87c76ba07da4f63c08ee9de86364a5f4ff059 Mon Sep 17 00:00:00 2001
From: Shridhar Rasal <srasal@nvidia.com>
Date: Thu, 22 Dec 2016 12:15:13 +0530
Subject: [PATCH] video: tegra: host: dla: add support for input task status

- input task status notifier added as preactions and if task status
  of preaction matches with task status then task can proceed for
  execution
- this adds support adding input status notifier: alloc memory for
  task and task descriptor, copy and pin user memory, send IOVA of
  status notifier user memory to engine

Jira DLA-62

Change-Id: Ibfa3ea0a15dd173fc279ac58b1ab85d2c2e77d82
Signed-off-by: Shridhar Rasal <srasal@nvidia.com>
Reviewed-on: http://git-master/r/1275539
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/video/tegra/host/nvdla/nvdla.h       |  3 ++
 drivers/video/tegra/host/nvdla/nvdla_ioctl.c | 33 ++++++++----
 drivers/video/tegra/host/nvdla/nvdla_queue.c | 56 ++++++++++++++------
 3 files changed, 66 insertions(+), 26 deletions(-)

diff --git a/drivers/video/tegra/host/nvdla/nvdla.h b/drivers/video/tegra/host/nvdla/nvdla.h
index ff13e9a8..7ce186d3 100644
--- a/drivers/video/tegra/host/nvdla/nvdla.h
+++ b/drivers/video/tegra/host/nvdla/nvdla.h
@@ -58,6 +58,7 @@
  */
 #define MAX_NUM_NVDLA_PREFENCES		4
 #define MAX_NUM_NVDLA_POSTFENCES	4
+#define MAX_NUM_NVDLA_IN_TASK_STATUS	4
 
 /**
  * keep list of DLA command size here.
@@ -130,8 +131,10 @@ struct nvdla_task {
 	struct nvhost_syncpt *sp;
 	struct nvdla_fence *prefences;
 	struct nvdla_fence *postfences;
+	struct nvdla_status_notify *in_task_status;
 	u32 num_prefences;
 	u32 num_postfences;
+	u32 num_in_task_status;
 	u32 fence;
 	struct kref ref;
 	struct list_head list;
diff --git a/drivers/video/tegra/host/nvdla/nvdla_ioctl.c b/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
index 34782d05..f46ee0ea 100644
--- a/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
+++ b/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
@@ -41,9 +41,10 @@
 #define FLCN_IDLE_TIMEOUT_DEFAULT	10000	/* 10 milliseconds */
 #define ALIGNED_DMA(x) ((x >> 8) & 0xffffffff)
 
-#define MAX_NVDLA_TASK_SIZE sizeof(struct nvdla_task) + \
-		(MAX_NUM_NVDLA_PREFENCES + MAX_NUM_NVDLA_POSTFENCES) * \
-		sizeof(struct nvdla_fence)
+#define MAX_NVDLA_TASK_SIZE (sizeof(struct nvdla_task) + 		\
+		((MAX_NUM_NVDLA_PREFENCES + MAX_NUM_NVDLA_POSTFENCES) *	\
+		sizeof(struct nvdla_fence)) +				\
+		((MAX_NUM_NVDLA_IN_TASK_STATUS) * sizeof(struct nvdla_status_notify)))
 
 /**
  * struct nvdla_private per unique FD private data
@@ -170,13 +171,13 @@ fail_to_on:
 }
 
 /* task management API's */
-static int nvdla_get_fences(struct nvdla_ioctl_submit_task *user_task,
+static int nvdla_get_actions(struct nvdla_ioctl_submit_task *user_task,
 			struct nvdla_task *task)
 {
 	int err = 0;
 	struct platform_device *pdev = task->queue->pool->pdev;
 
-	nvdla_dbg_fn(pdev, "copying fences");
+	nvdla_dbg_fn(pdev, "copying actions");
 
 	/* get pre fences */
 	if (copy_from_user(task->prefences,
@@ -187,6 +188,15 @@ static int nvdla_get_fences(struct nvdla_ioctl_submit_task *user_task,
 		goto fail;
 	}
 
+	/* get input task status */
+	if (copy_from_user(task->in_task_status,
+		(void __user *)user_task->input_task_status,
+		(task->num_in_task_status * sizeof(struct nvdla_status_notify)))) {
+		err = -EFAULT;
+		nvdla_dbg_err(pdev, "failed to copy input task status");
+		goto fail;
+	}
+
 	/* get post fences */
 	if (copy_from_user(task->postfences,
 		(void __user *)user_task->postfences,
@@ -196,7 +206,7 @@ static int nvdla_get_fences(struct nvdla_ioctl_submit_task *user_task,
 		goto fail;
 	}
 
-	nvdla_dbg_info(pdev, "copying fences done");
+	nvdla_dbg_info(pdev, "copying actions done");
 
 fail:
 	return err;
@@ -285,6 +295,7 @@ static int nvdla_fill_task(struct nvhost_queue *queue,
 
 	task->num_prefences = local_task->num_prefences;
 	task->num_postfences = local_task->num_postfences;
+	task->num_in_task_status = local_task->num_input_task_status;
 
 	/* assign memory for local pre and post action lists */
 	mem = task;
@@ -292,12 +303,14 @@ static int nvdla_fill_task(struct nvhost_queue *queue,
 	task->prefences = mem;
 	mem += task->num_prefences * sizeof(struct nvdla_fence);
 	task->postfences = mem;
+	mem += task->num_postfences * sizeof(struct nvdla_fence);
+	task->in_task_status = mem;
 
 	/* update local fences into task */
-	err = nvdla_get_fences(local_task, task);
+	err = nvdla_get_actions(local_task, task);
 	if (err) {
-		nvdla_dbg_err(pdev, "failed to get fences");
-		goto fail_to_get_fences;
+		nvdla_dbg_err(pdev, "failed to get actions");
+		goto fail_to_get_actions;
 	}
 
 	task->num_addresses = local_task->num_addresses;
@@ -309,7 +322,7 @@ static int nvdla_fill_task(struct nvhost_queue *queue,
 
 	return 0;
 
-fail_to_get_fences:
+fail_to_get_actions:
 	kfree(task);
 fail_to_alloc_task:
 	*ptask = NULL;
diff --git a/drivers/video/tegra/host/nvdla/nvdla_queue.c b/drivers/video/tegra/host/nvdla/nvdla_queue.c
index 726a994c..2c8d7315 100644
--- a/drivers/video/tegra/host/nvdla/nvdla_queue.c
+++ b/drivers/video/tegra/host/nvdla/nvdla_queue.c
@@ -44,10 +44,10 @@
 #define NVDLA_QUEUE_ABORT_TIMEOUT	10000	/* 10 sec */
 #define NVDLA_QUEUE_ABORT_RETRY_PERIOD	500	/* 500 ms */
 
-#define NVDLA_MAX_PREACTION_SIZE (MAX_NUM_NVDLA_PREFENCES * \
-				sizeof(struct dla_action_opcode) + \
-				MAX_NUM_NVDLA_PREFENCES * \
-				sizeof(struct dla_action_semaphore) + \
+#define NVDLA_MAX_PREACTION_SIZE (((MAX_NUM_NVDLA_PREFENCES + MAX_NUM_NVDLA_IN_TASK_STATUS) *	\
+				sizeof(struct dla_action_opcode)) + 				\
+				((MAX_NUM_NVDLA_PREFENCES + MAX_NUM_NVDLA_IN_TASK_STATUS) *	\
+				sizeof(struct dla_action_semaphore)) + 				\
 				sizeof(struct dla_action_opcode))
 
 #define NVDLA_MAX_POSTACTION_SIZE (MAX_NUM_NVDLA_POSTFENCES * \
@@ -143,6 +143,12 @@ static void nvdla_task_free_locked(struct nvdla_task *task)
 		}
 	}
 
+	for (i = 0; i < task->num_in_task_status; i++) {
+		if (task->in_task_status[i].handle)
+			nvhost_buffer_submit_unpin(task->buffers,
+				&task->in_task_status[i].handle, 1);
+	}
+
 	for (i = 0; i < task->num_postfences; i++) {
 		if ((task->postfences[i].type == NVDLA_FENCE_TYPE_SEMAPHORE ||
 	          task->postfences[i].type == NVDLA_FENCE_TYPE_TS_SEMAPHORE) &&
@@ -466,7 +472,7 @@ static int nvdla_fill_preactions(struct nvdla_task *task)
 	uint16_t preactionlist_of;
 	void *next = NULL;
 	void *mem;
-	int i, pre_cnt = 0;
+	int i, pre_cnt = 0, j;
 
 	/* preaction list offset update */
 	preactionlist_of = task_desc->postactions +
@@ -497,15 +503,28 @@ static int nvdla_fill_preactions(struct nvdla_task *task)
 				_opcode, _addr, _val);			\
 	} while (0)
 
-	/* fill all preactions */
-	for (i = 0; i <= task->num_prefences; i++) {
+	for (j = 0; j < task->num_in_task_status; j++) {
+		dma_addr_t dma_addr;
+		size_t dma_size;
 
-		/* update end of action list */
-		if (i == task->num_prefences) {
-			UPDATE_PREACTION(pre_cnt++, PREACTION_TERMINATE,
-						(dma_addr_t)0x0, 0);
-			break;
-		}
+		nvdla_dbg_info(pdev, "i[%d] h[%u] o[%u] status[%d]",
+					j,
+					task->in_task_status[j].handle,
+					task->in_task_status[j].offset,
+					task->in_task_status[j].status);
+
+			if (nvhost_buffer_submit_pin(buffers,
+					&task->in_task_status[j].handle,
+					1, &dma_addr, &dma_size))
+				break;
+
+			UPDATE_PREACTION(pre_cnt++, PREACTION_TASK_STATUS,
+				dma_addr + task->in_task_status[j].offset,
+				task->in_task_status[j].status);
+	}
+
+	/* fill all preactions */
+	for (i = 0; i < task->num_prefences; i++) {
 
 		switch (task->prefences[i].type) {
 		case NVDLA_FENCE_TYPE_SYNC_FD: {
@@ -579,13 +598,18 @@ static int nvdla_fill_preactions(struct nvdla_task *task)
 		}
 	}
 
+	/* update end of action list */
+	UPDATE_PREACTION(pre_cnt++, PREACTION_TERMINATE, (dma_addr_t)0x0, 0);
+
 	/* actually update lists data */
 	mem = (char *)task_desc + task_desc->preactions;
 	preactionl = (struct dla_action_list *)mem;
 	preactionl->offset = preactionlist_of;
-	preactionl->size = i * (sizeof(struct dla_action_opcode) +
-			sizeof(struct dla_action_semaphore)) +
-			sizeof(struct dla_action_opcode);
+	preactionl->size = ((i * (sizeof(struct dla_action_opcode) +
+			sizeof(struct dla_action_semaphore))));
+	preactionl->size += ((j * (sizeof(struct dla_action_opcode) +
+			sizeof(struct dla_action_task_status))));
+	preactionl->size += sizeof(struct dla_action_opcode);
 
 	return 0;
 }