From 20a3db1511a4cf59abd0de551c7ff171a1a34eed Mon Sep 17 00:00:00 2001
From: Shridhar Rasal <srasal@nvidia.com>
Date: Mon, 22 Jan 2018 13:44:25 +0530
Subject: [PATCH] video: tegra: host: add DLA channel submit

- Add support to submit DLA tasks through host1x channel
- Add prefence support for channel mode submission
- Allow submit mode to switch between host1x channel and MMIO
- Add debugfs for submit mode
- enable isolate contexts

Jira DLA-105

Change-Id: I3656fba5a9f7db0c71c0f8b856dbb777513fa32b
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com>
Signed-off-by: Shridhar Rasal <srasal@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1610233
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/video/tegra/host/nvdla/nvdla.c       |   8 +-
 drivers/video/tegra/host/nvdla/nvdla.h       |  10 +-
 drivers/video/tegra/host/nvdla/nvdla_debug.c |   8 +-
 drivers/video/tegra/host/nvdla/nvdla_ioctl.c |  18 +--
 drivers/video/tegra/host/nvdla/nvdla_queue.c | 145 ++++++++++++++++---
 5 files changed, 157 insertions(+), 32 deletions(-)

diff --git a/drivers/video/tegra/host/nvdla/nvdla.c b/drivers/video/tegra/host/nvdla/nvdla.c
index 453d97d7..10ec62a6 100644
--- a/drivers/video/tegra/host/nvdla/nvdla.c
+++ b/drivers/video/tegra/host/nvdla/nvdla.c
@@ -1,7 +1,7 @@
 /*
  * NVDLA driver for T194
  *
- * Copyright (c) 2016-2017, NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2016-2018, NVIDIA Corporation.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -259,7 +259,7 @@ static int nvdla_set_gcov_region(struct platform_device *pdev, bool unset_region
 
 	gcov_region = (struct dla_region_printf *)(gcov_cmd_mem_info.va);
 	gcov_region->region = DLA_REGION_GCOV;
-	if (unset_region)
+	if (pdata->isolate_contexts || unset_region)
 		gcov_region->address = 0;
 	else
 		gcov_region->address = nvdla_dev->gcov_dump_pa;
@@ -382,6 +382,8 @@ static int nvdla_alloc_trace_region(struct platform_device *pdev)
 	trace_region->region = DLA_REGION_TRACE;
 	trace_region->address = nvdla_dev->trace_dump_pa;
 	trace_region->size = TRACE_BUFFER_SIZE;
+	if (pdata->isolate_contexts)
+		trace_region->address = 0;
 
 	cmd_data.method_id = DLA_CMD_SET_REGIONS;
 	cmd_data.method_data = ALIGNED_DMA(trace_cmd_mem_info.pa);
@@ -455,6 +457,8 @@ static int nvdla_alloc_dump_region(struct platform_device *pdev)
 #else
 	region->address = ALIGNED_DMA(nvdla_dev->debug_dump_pa);
 #endif
+	if (pdata->isolate_contexts)
+		region->address = 0;
 
 	/* prepare command data */
 	cmd_data.method_id = DLA_CMD_SET_REGIONS;
diff --git a/drivers/video/tegra/host/nvdla/nvdla.h b/drivers/video/tegra/host/nvdla/nvdla.h
index d4b3aa1c..45a564fb 100644
--- a/drivers/video/tegra/host/nvdla/nvdla.h
+++ b/drivers/video/tegra/host/nvdla/nvdla.h
@@ -3,7 +3,7 @@
  *
  * Tegra Graphics Host NVDLA
  *
- * Copyright (c) 2016-2017 NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2016-2018 NVIDIA Corporation.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -156,6 +156,11 @@ struct nvdla_cmd_data {
 	bool wait;
 };
 
+enum nvdla_submit_mode {
+	NVDLA_SUBMIT_MODE_MMIO		= 0,
+	NVDLA_SUBMIT_MODE_CHANNEL	= 1
+};
+
 /**
  * data structure to keep per DLA engine device data
  *
@@ -163,6 +168,8 @@ struct nvdla_cmd_data {
  * @pool		pointer to queue table
  * @dbg_mask		debug mask for print level
  * @en_trace		flag to enable kernel tracing
+ * @submit_mode		flag to enable task submit mode, default is
+ *				NVDLA_SUBMIT_MODE_MMIO
  * @fw_version		saves current firmware version
  * @cmd_mem		structure to hold command memory pool
  * @trace_enable	to enable/disable the DLA firmware trace
@@ -185,6 +192,7 @@ struct nvdla_device {
 	int waiting;
 	u32 dbg_mask;
 	u32 en_trace;
+	u32 submit_mode;
 	u32 fw_version;
 	struct nvdla_cmd_mem cmd_mem;
 	u32 trace_enable;
diff --git a/drivers/video/tegra/host/nvdla/nvdla_debug.c b/drivers/video/tegra/host/nvdla/nvdla_debug.c
index e9918aaa..bc1fcb12 100644
--- a/drivers/video/tegra/host/nvdla/nvdla_debug.c
+++ b/drivers/video/tegra/host/nvdla/nvdla_debug.c
@@ -1,7 +1,7 @@
 /*
  * NVDLA debug utils
  *
- * Copyright (c) 2016 - 2017, NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2016 - 2018, NVIDIA Corporation.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -684,6 +684,12 @@ void nvdla_debug_init(struct platform_device *pdev)
 			&nvdla_dev->dbg_mask);
 	debugfs_create_u32("en_trace", S_IRUGO | S_IWUSR, de,
 			&nvdla_dev->en_trace);
+	debugfs_create_u32("submit_mode", S_IRUGO | S_IWUSR, de,
+			&nvdla_dev->submit_mode);
+
+	/* Check if isolate context enabled if submit mode is CHANNEL */
+	nvdla_dev->submit_mode = nvdla_dev->submit_mode &&
+				pdata->isolate_contexts;
 
 	dla_fw_debugfs_init(pdev);
 }
diff --git a/drivers/video/tegra/host/nvdla/nvdla_ioctl.c b/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
index 639580b0..7e04ec2b 100644
--- a/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
+++ b/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
@@ -1,7 +1,7 @@
 /*
  * NVDLA IOCTL for T194
  *
- * Copyright (c) 2016-2017, NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2016-2018, NVIDIA Corporation.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -814,20 +814,20 @@ static int nvdla_open(struct inode *inode, struct file *file)
 	if (err < 0)
 		goto err_add_client;
 
-	priv->buffers = nvhost_buffer_init(pdev);
-	if (IS_ERR(priv->buffers)) {
-		err = PTR_ERR(priv->buffers);
-		goto err_alloc_buffer;
-	}
-
 	priv->queue = nvhost_queue_alloc(nvdla_dev->pool,
-					 MAX_NVDLA_TASK_COUNT,
-					 false);
+		MAX_NVDLA_TASK_COUNT,
+		nvdla_dev->submit_mode == NVDLA_SUBMIT_MODE_CHANNEL);
 	if (IS_ERR(priv->queue)) {
 		err = PTR_ERR(priv->queue);
 		goto err_alloc_queue;
 	}
 
+	priv->buffers = nvhost_buffer_init(priv->queue->vm_pdev);
+	if (IS_ERR(priv->buffers)) {
+		err = PTR_ERR(priv->buffers);
+		goto err_alloc_buffer;
+	}
+
 	return nonseekable_open(inode, file);
 
 err_alloc_queue:
diff --git a/drivers/video/tegra/host/nvdla/nvdla_queue.c b/drivers/video/tegra/host/nvdla/nvdla_queue.c
index 8d33f11b..6ea4ee5c 100644
--- a/drivers/video/tegra/host/nvdla/nvdla_queue.c
+++ b/drivers/video/tegra/host/nvdla/nvdla_queue.c
@@ -1,7 +1,7 @@
 /*
  * NVDLA queue and task management for T194
  *
- * Copyright (c) 2016-2017, NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2016-2018, NVIDIA Corporation.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -36,6 +36,7 @@
 #include "nvdla/nvdla.h"
 #include "nvdla/nvdla_debug.h"
 #include "dla_os_interface.h"
+#include "t194/hardware_t194.h"
 
 /* TODO: 1. revisit timeout post silicon
  *       2. when silicon and sim tests go live at same time,
@@ -503,7 +504,7 @@ static int nvdla_fill_postactions(struct nvdla_task *task)
 			}
 
 			/* For postaction also update MSS addr */
-			syncpt_addr = nvhost_syncpt_address(pdev,
+			syncpt_addr = nvhost_syncpt_address(queue->vm_pdev,
 					queue->syncpt_id);
 			next = add_fence_action(next, POSTACTION_SEM,
 					syncpt_addr, 1);
@@ -664,8 +665,8 @@ static int nvdla_fill_preactions(struct nvdla_task *task)
 
 					nvdla_dbg_info(pdev, "pre i:%d GoS missing for syncfd [%d]",
 							i, id);
-					syncpt_addr = nvhost_syncpt_address(pdev,
-							id);
+					syncpt_addr = nvhost_syncpt_address(
+							queue->vm_pdev, id);
 					nvdla_dbg_info(pdev, "pre i:%d syncfd_pt:[%u] mss_dma_addr[%pad]",
 						i, id, &syncpt_addr);
 					next = add_fence_action(next, PREACTION_SEM_GE,
@@ -698,7 +699,8 @@ static int nvdla_fill_preactions(struct nvdla_task *task)
 
 				nvdla_dbg_info(pdev, "pre i:%d GoS missing", i);
 
-				syncpt_addr = nvhost_syncpt_address(pdev,
+				syncpt_addr = nvhost_syncpt_address(
+					queue->vm_pdev,
 					task->prefences[i].syncpoint_index);
 				nvdla_dbg_info(pdev, "pre i:%d syncpt:[%u] dma_addr[%pad]",
 					i,
@@ -859,12 +861,95 @@ fail_to_map_mem:
 	return err;
 }
 
+static int nvdla_send_cmd_channel(struct platform_device *pdev,
+			struct nvhost_queue *queue,
+			struct nvdla_cmd_data *cmd_data,
+			struct nvdla_task *task)
+{
+	unsigned long timeout;
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct nvdla_device *nvdla_dev = pdata->private_data;
+	uint32_t method_id = cmd_data->method_id;
+	uint32_t method_data = cmd_data->method_data;
+	bool wait = cmd_data->wait;
+	u32 syncpt_wait_ids[MAX_NUM_NVDLA_PREFENCES];
+	u32 syncpt_wait_thresh[MAX_NUM_NVDLA_PREFENCES];
+	u32 cmdbuf[3];
+	int err = 0, i;
+
+	nvdla_dbg_info(pdev, "");
+	/*
+	 * enable notification for command completion or error if
+	 * wait if required
+	 */
+	if (wait)
+		method_id |= (1 << DLA_INT_ON_COMPLETE_SHIFT) |
+				(1 << DLA_INT_ON_ERROR_SHIFT);
+
+	nvdla_dev->waiting = 1;
+
+	/* Pick up fences... */
+	for (i = 0; i < task->num_prefences; i++) {
+		/* ..and ensure that we have only syncpoints present */
+		if (task->prefences[i].type != NVDLA_FENCE_TYPE_SYNCPT) {
+			nvdla_dbg_err(pdev, "syncpt only supported");
+			return -EINVAL;
+		}
+
+		nvdla_dbg_info(pdev, "presyncpt[%d] value[%d]\n",
+				task->prefences[i].syncpoint_index,
+				task->prefences[i].syncpoint_value);
+
+		/* Put fences into a separate array */
+		syncpt_wait_ids[i] =
+				task->prefences[i].syncpoint_index;
+		syncpt_wait_thresh[i] =
+				task->prefences[i].syncpoint_value;
+	}
+
+	cmdbuf[0] = nvhost_opcode_incr(NV_DLA_THI_METHOD_ID >> 2, 2);
+	cmdbuf[1] = method_id;
+	cmdbuf[2] = method_data;
+
+	err = nvhost_queue_submit_to_host1x(queue,
+					    cmdbuf,
+					    ARRAY_SIZE(cmdbuf),
+					    1,
+					    syncpt_wait_ids,
+					    syncpt_wait_thresh,
+					    task->num_prefences,
+					    &task->fence);
+	if (err) {
+		nvdla_dbg_err(pdev, "channel submit failed");
+		goto done;
+	}
+
+	nvdla_dbg_info(pdev, "task submitted through channel mode");
+
+	if (!wait)
+		goto done;
+
+	timeout = msecs_to_jiffies(CMD_TIMEOUT_MSEC);
+
+	if (!wait_for_completion_timeout(&nvdla_dev->cmd_completion, timeout)) {
+		nvdla_dbg_err(pdev, "channel mode submit timedout");
+		err = -ETIMEDOUT;
+		goto done;
+	}
+
+done:
+	nvdla_dev->waiting = 0;
+	return 0;
+}
+
 /* Queue management API */
 static int nvdla_queue_submit(struct nvhost_queue *queue, void *in_task)
 {
 	struct nvdla_task *task = (struct nvdla_task *)in_task;
 	struct nvdla_task *last_task = NULL;
 	struct platform_device *pdev = queue->pool->pdev;
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct nvdla_device *nvdla_dev = pdata->private_data;
 	struct nvdla_cmd_data cmd_data;
 	uint32_t method_data;
 	uint32_t method_id;
@@ -882,6 +967,13 @@ static int nvdla_queue_submit(struct nvhost_queue *queue, void *in_task)
 	/* get task ref and add to list */
 	nvdla_task_get(task);
 
+	/* get fence from nvhost for MMIO mode*/
+	if (nvdla_dev->submit_mode == NVDLA_SUBMIT_MODE_MMIO) {
+		task->fence = nvhost_syncpt_incr_max(task->sp,
+						queue->syncpt_id,
+						task->fence_counter);
+	}
+
 	/* update last task desc's next */
 	if (!list_empty(&queue->tasklist)) {
 		last_task = list_last_entry(&queue->tasklist,
@@ -895,10 +987,6 @@ static int nvdla_queue_submit(struct nvhost_queue *queue, void *in_task)
 
 	nvdla_dbg_info(pdev, "task[%p] added to list", task);
 
-	/* get fence from nvhost */
-	task->fence = nvhost_syncpt_incr_max(task->sp, queue->syncpt_id,
-						task->fence_counter);
-
 	nvdla_dbg_fn(pdev, "syncpt[%d] fence[%d] task[%p] fence_counter[%u]",
 				queue->syncpt_id, task->fence,
 				task, task->fence_counter);
@@ -912,6 +1000,21 @@ static int nvdla_queue_submit(struct nvhost_queue *queue, void *in_task)
 			(1 << DLA_INT_ON_ERROR_SHIFT);
 	method_data = ALIGNED_DMA(task->task_desc_pa);
 
+	/* prepare command for channel submit */
+	if (nvdla_dev->submit_mode == NVDLA_SUBMIT_MODE_CHANNEL) {
+
+		cmd_data.method_id = method_id;
+		cmd_data.method_data = method_data;
+		cmd_data.wait = true;
+
+		/* submit task to engine */
+		err = nvdla_send_cmd_channel(pdev, queue, &cmd_data, task);
+		if (err) {
+			nvdla_dbg_err(pdev, "task[%p] submit failed", task);
+			goto fail_to_channel_submit;
+		}
+	}
+
 	/* register notifier with fence */
 	err = nvhost_intr_register_notifier(pdev, queue->syncpt_id,
 		task->fence, nvdla_queue_update, queue);
@@ -936,20 +1039,24 @@ static int nvdla_queue_submit(struct nvhost_queue *queue, void *in_task)
 		}
 	}
 
-	/* prepare command */
-	cmd_data.method_id = method_id;
-	cmd_data.method_data = method_data;
-	cmd_data.wait = true;
+	/* prepare command for MMIO submit */
+	if (nvdla_dev->submit_mode == NVDLA_SUBMIT_MODE_MMIO) {
+		cmd_data.method_id = method_id;
+		cmd_data.method_data = method_data;
+		cmd_data.wait = true;
 
-	/* submit task to engine */
-	err = nvdla_send_cmd(pdev, &cmd_data);
-	if (err) {
-		nvdla_task_syncpt_reset(task->sp, queue->syncpt_id,
-				task->fence);
-		nvdla_dbg_err(pdev, "task[%p] submit failed", task);
+		/* submit task to engine */
+		err = nvdla_send_cmd(pdev, &cmd_data);
+		if (err) {
+			nvdla_task_syncpt_reset(task->sp, queue->syncpt_id,
+					task->fence);
+			nvdla_dbg_err(pdev, "task[%p] submit failed", task);
+		}
 	}
 
+
 fail_to_register:
+fail_to_channel_submit:
 	mutex_unlock(&queue->list_lock);
 
 	return err;