diff --git a/drivers/video/tegra/host/nvdla/dla_fw_version.h b/drivers/video/tegra/host/nvdla/dla_fw_version.h
index 43d42319..a0f88ace 100644
--- a/drivers/video/tegra/host/nvdla/dla_fw_version.h
+++ b/drivers/video/tegra/host/nvdla/dla_fw_version.h
@@ -20,7 +20,7 @@
 #define _DLA_FW_VERSION_H_
 
 #define FIRMWARE_VERSION_MAJOR		0x1
-#define FIRMWARE_VERSION_MINOR		0x0
+#define FIRMWARE_VERSION_MINOR		0x1
 #define FIRMWARE_VERSION_SUBMINOR	0x0
 
 static inline uint32_t dla_version(void)
diff --git a/drivers/video/tegra/host/nvdla/dla_os_interface.h b/drivers/video/tegra/host/nvdla/dla_os_interface.h
index e153cc00..d6efca15 100644
--- a/drivers/video/tegra/host/nvdla/dla_os_interface.h
+++ b/drivers/video/tegra/host/nvdla/dla_os_interface.h
@@ -74,18 +74,22 @@
 #define DLA_INT_ON_COMPLETE_SHIFT	8
 #define DLA_INT_ON_ERROR_SHIFT		9
 
-#define PREACTION_TERMINATE	0x0
-#define PREACTION_SEM_EQ	0x90
-#define PREACTION_SEM_GE	0x92
-#define PREACTION_GOS_EQ	0xB0
-#define PREACTION_GOS_GE	0xB2
-#define PREACTION_TASK_STATUS	0xC0
+/* control actions */
+#define ACTION_TERMINATE	0x0
 
-#define POSTACTION_TERMINATE	0x0
-#define POSTACTION_SEM		0x80
-#define POSTACTION_TS_SEM	0x83
-#define POSTACTION_GOS		0xA0
-#define POSTACTION_TASK_STATUS	0xC1
+/* conditional actions */
+#define ACTION_SEM_EQ	0x90
+#define ACTION_SEM_GE	0x92
+#define ACTION_GOS_EQ	0xB0
+#define ACTION_GOS_GE	0xB2
+#define ACTION_TASK_STATUS_EQ	0xC0
+
+/* write actions */
+#define ACTION_WRITE_SEM	0x80
+#define ACTION_WRITE_TS_SEM	0x83
+#define ACTION_WRITE_TIMESTAMP  	0x87
+#define ACTION_WRITE_GOS	0xA0
+#define ACTION_WRITE_TASK_STATUS	0xC1
 
 #define PING_DATA_SIZE		4
 #define BUFFER_MULTIPLIER	4
@@ -228,6 +232,17 @@ struct dla_action_task_status {
 	uint16_t status;
 } __attribute__ ((packed));
 
+/**
+ * Timestamp update action structure
+ *
+ * OPCODE = 0x87
+ *
+ * @address: Address to write timestamp
+ */
+struct dla_action_timestamp {
+	uint64_t address;
+} __attribute__ ((packed));
+
 /**
  * Status notifier structure
  *
diff --git a/drivers/video/tegra/host/nvdla/nvdla.h b/drivers/video/tegra/host/nvdla/nvdla.h
index d656ecee..2aa3a6cc 100644
--- a/drivers/video/tegra/host/nvdla/nvdla.h
+++ b/drivers/video/tegra/host/nvdla/nvdla.h
@@ -99,8 +99,11 @@
  */
 #define MAX_NUM_NVDLA_PREFENCES		32
 #define MAX_NUM_NVDLA_POSTFENCES	32
+#define MAX_NUM_NVDLA_EMU_PREFENCES	16
+#define MAX_NUM_NVDLA_EMU_POSTFENCES	16
 #define MAX_NUM_NVDLA_IN_TASK_STATUS	MAX_NUM_NVDLA_PREFENCES
 #define MAX_NUM_NVDLA_OUT_TASK_STATUS	MAX_NUM_NVDLA_POSTFENCES
+#define MAX_NUM_NVDLA_OUT_TIMESTAMP  	32
 #define NUM_PROFILING_POSTACTION	1
 
 #define MAX_COMMANDS_PER_DEVICE		1
@@ -219,7 +222,9 @@ struct nvdla_device {
  *
  * @queue		Queue in which task submitted
  * @sp			pointer to syncpt
+ * @prefences		pointer to pre fences
  * @postfences		pointer to post fences
+ * @num_prefences	Number of prefences in task
  * @num_postfences	Number of postfences in task
  * @fence		Fence tracking for current task
  * @fence_counter	Counter used to track fence value
@@ -228,7 +233,9 @@ struct nvdla_device {
 struct nvdla_emu_task {
 	struct nvdla_queue *queue;
 	struct nvhost_syncpt *sp;
-	struct nvdev_fence postfences[MAX_NUM_NVDLA_POSTFENCES];
+	struct nvdev_fence prefences[MAX_NUM_NVDLA_EMU_PREFENCES];
+	struct nvdev_fence postfences[MAX_NUM_NVDLA_EMU_POSTFENCES];
+	u32 num_prefences;
 	u32 num_postfences;
 	u32 fence;
 	u32 fence_counter;
@@ -259,12 +266,18 @@ struct nvdla_task {
 	struct nvdev_fence prefences[MAX_NUM_NVDLA_PREFENCES];
 	struct nvdev_fence postfences[MAX_NUM_NVDLA_POSTFENCES];
 	struct nvdla_status_notify in_task_status[MAX_NUM_NVDLA_IN_TASK_STATUS];
-	struct nvdla_status_notify out_task_status[MAX_NUM_NVDLA_OUT_TASK_STATUS];
+	struct nvdla_status_notify sof_task_status[MAX_NUM_NVDLA_OUT_TASK_STATUS];
+	struct nvdla_status_notify eof_task_status[MAX_NUM_NVDLA_OUT_TASK_STATUS];
+	struct nvdla_mem_handle sof_timestamps[MAX_NUM_NVDLA_OUT_TIMESTAMP];
+	struct nvdla_mem_handle eof_timestamps[MAX_NUM_NVDLA_OUT_TIMESTAMP];
 	struct nvdla_mem_handle memory_handles[NVDLA_MAX_BUFFERS_PER_TASK];
 	u8 num_prefences;
 	u8 num_postfences;
 	u8 num_in_task_status;
-	u8 num_out_task_status;
+	u8 num_sof_task_status;
+	u8 num_eof_task_status;
+	u8 num_sof_timestamps;
+	u8 num_eof_timestamps;
 	u32 num_addresses;
 	u32 fence;
 	u32 fence_counter;
@@ -280,7 +293,10 @@ struct nvdla_task {
 	struct dma_buf *prefences_sem_dmabuf[MAX_NUM_NVDLA_PREFENCES];
 	struct dma_buf *in_task_status_dmabuf[MAX_NUM_NVDLA_IN_TASK_STATUS];
 	struct dma_buf *postfences_sem_dmabuf[MAX_NUM_NVDLA_POSTFENCES];
-	struct dma_buf *out_task_status_dmabuf[MAX_NUM_NVDLA_OUT_TASK_STATUS];
+	struct dma_buf *sof_task_status_dmabuf[MAX_NUM_NVDLA_OUT_TASK_STATUS];
+	struct dma_buf *eof_task_status_dmabuf[MAX_NUM_NVDLA_OUT_TASK_STATUS];
+	struct dma_buf *sof_timestamps_dmabuf[MAX_NUM_NVDLA_OUT_TIMESTAMP];
+	struct dma_buf *eof_timestamps_dmabuf[MAX_NUM_NVDLA_OUT_TIMESTAMP];
 };
 
 struct dla_mem_addr {
@@ -402,7 +418,7 @@ int nvdla_free_gcov_region(struct platform_device *pdev, bool update_region);
 int nvdla_emulator_submit(struct nvdla_queue *queue,
 				struct nvdla_emu_task *task);
 void task_free(struct kref *ref);
-int nvdla_get_postfences(struct nvdla_queue *queue, void *in_task);
+int nvdla_get_signal_fences(struct nvdla_queue *queue, void *in_task);
 int nvdla_send_gos_region(struct platform_device *pdev);
 
 #endif /* End of __NVHOST_NVDLA_H__ */
diff --git a/drivers/video/tegra/host/nvdla/nvdla_ioctl.c b/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
index 290b1b8c..4987ee3b 100644
--- a/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
+++ b/drivers/video/tegra/host/nvdla/nvdla_ioctl.c
@@ -376,13 +376,41 @@ static int nvdla_get_actions(struct nvdla_ioctl_submit_task *user_task,
 		goto fail;
 	}
 
-	/* get output task status */
-	if (copy_from_user(task->out_task_status,
-		(void __user *)user_task->output_task_status,
-		(task->num_out_task_status *
+	/* get sof task status */
+	if (copy_from_user(task->sof_task_status,
+		(void __user *)user_task->sof_task_status,
+		(task->num_sof_task_status *
 			sizeof(struct nvdla_status_notify)))) {
 		err = -EFAULT;
-		nvdla_dbg_err(pdev, "failed to copy output task status");
+		nvdla_dbg_err(pdev, "failed to copy sof task status");
+		goto fail;
+	}
+
+	/* get eof task status */
+	if (copy_from_user(task->eof_task_status,
+		(void __user *)user_task->eof_task_status,
+		(task->num_eof_task_status *
+			sizeof(struct nvdla_status_notify)))) {
+		err = -EFAULT;
+		nvdla_dbg_err(pdev, "failed to copy eof task status");
+		goto fail;
+	}
+
+	/* get sof timestamps */
+	if (copy_from_user(task->sof_timestamps,
+		(void __user*)user_task->sof_timestamps,
+		(task->num_sof_timestamps * sizeof(struct nvdla_mem_handle)))) {
+		err = -EFAULT;
+		nvdla_dbg_err(pdev, "failed to copy sof timestamps");
+		goto fail;
+	}
+
+	/* get eof timestamps */
+	if (copy_from_user(task->eof_timestamps,
+		(void __user*)user_task->eof_timestamps,
+		(task->num_eof_timestamps * sizeof(struct nvdla_mem_handle)))) {
+		err = -EFAULT;
+		nvdla_dbg_err(pdev, "failed to copy eof timestamps");
 		goto fail;
 	}
 
@@ -392,20 +420,66 @@ fail:
 	return err;
 }
 
-static int nvdla_send_emu_postfences(struct nvdla_emu_task *task,
+static int nvdla_send_emu_signal_fences(struct nvdla_emu_task *task,
 			struct nvdla_ioctl_emu_submit_task *user_task)
 {
 	int err = 0, i;
 	struct platform_device *dla_pdev = task->queue->pool->pdev;
 	struct platform_device *host_pdev =
 				to_platform_device(dla_pdev->dev.parent);
+	struct nvdev_fence __user *prefences =
+		(struct nvdev_fence __user *)(uintptr_t)user_task->prefences;
 	struct nvdev_fence __user *postfences =
 		(struct nvdev_fence __user *)(uintptr_t)user_task->postfences;
 	char fence_name[32];
 
-	nvdla_dbg_fn(dla_pdev, "sending post fences");
+	nvdla_dbg_fn(dla_pdev, "sending signal fences");
+
+	for (i = 0; i < task->num_prefences; i++) {
+		if (task->prefences[i].action != NVDEV_FENCE_SIGNAL)
+			continue;
+
+		if (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD) {
+			struct nvhost_ctrl_sync_fence_info info;
+
+			info.id = task->prefences[i].syncpoint_index;
+			info.thresh = task->prefences[i].syncpoint_value;
+
+			nvdla_dbg_info(dla_pdev,
+					"creating pre sync fd [%d]:[%d]\n",
+					info.id, info.thresh);
+
+			/* create fence name format example: nvdla0_1_fence */
+			snprintf(fence_name, sizeof(fence_name),
+				"%s_%d_%d_prefence", dev_name(&dla_pdev->dev),
+				task->prefences[i].syncpoint_index, i);
+
+			err = nvhost_sync_create_fence_fd(host_pdev,
+				&info, 1, fence_name,
+				&task->prefences[i].sync_fd);
+
+			if (err) {
+				nvdla_dbg_err(dla_pdev,
+					"fail to create prefence syncfd\n");
+				goto fail;
+			}
+		}
+	}
+
+	nvdla_dbg_fn(dla_pdev, "copy prefences to user");
+	/* send pre fences */
+	if (copy_to_user(prefences, task->prefences,
+		(task->num_prefences * sizeof(struct nvdev_fence)))) {
+		err = -EFAULT;
+		nvdla_dbg_err(dla_pdev, "failed to send prefences");
+		goto fail;
+	}
+	nvdla_dbg_info(dla_pdev, "prefences sent");
 
 	for (i = 0; i < task->num_postfences; i++) {
+		if (task->postfences[i].action != NVDEV_FENCE_SIGNAL)
+			continue;
+
 		if (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD) {
 			struct nvhost_ctrl_sync_fence_info info;
 
@@ -418,8 +492,8 @@ static int nvdla_send_emu_postfences(struct nvdla_emu_task *task,
 
 			/* create fence name format example: nvdla0_1_fence */
 			snprintf(fence_name, sizeof(fence_name),
-				"%s_%d_fence", dev_name(&dla_pdev->dev),
-				task->postfences[i].syncpoint_index);
+				"%s_%d_%d_postfence", dev_name(&dla_pdev->dev),
+				task->postfences[i].syncpoint_index, i);
 
 			err = nvhost_sync_create_fence_fd(host_pdev,
 				&info, 1, fence_name,
@@ -447,20 +521,67 @@ fail:
 	return err;
 }
 
-static int nvdla_update_postfences(struct nvdla_task *task,
+static int nvdla_update_signal_fences(struct nvdla_task *task,
 			struct nvdla_ioctl_submit_task *user_task)
 {
 	int err = 0, i;
 	struct platform_device *dla_pdev = task->queue->pool->pdev;
 	struct platform_device *host_pdev =
 				to_platform_device(dla_pdev->dev.parent);
+	struct nvdev_fence __user *prefences =
+		(struct nvdev_fence __user *)(uintptr_t)user_task->prefences;
 	struct nvdev_fence __user *postfences =
 		(struct nvdev_fence __user *)(uintptr_t)user_task->postfences;
 	char fence_name[32];
 
-	nvdla_dbg_fn(dla_pdev, "copy post fences for user");
+	nvdla_dbg_fn(dla_pdev, "copy fences for user");
 
+	/* update pre fence signals to users */
+	for (i = 0; i < task->num_prefences; i++) {
+		if (task->prefences[i].action != NVDEV_FENCE_SIGNAL)
+			continue;
+
+		if (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD) {
+			struct nvhost_ctrl_sync_fence_info info;
+
+			info.id = task->prefences[i].syncpoint_index;
+			info.thresh = task->prefences[i].syncpoint_value;
+
+			nvdla_dbg_info(dla_pdev,
+					"creating pre sync fd [%d]:[%d]\n",
+					info.id, info.thresh);
+
+			/* create fence name format example: nvdla0_1_fence */
+			snprintf(fence_name, sizeof(fence_name),
+				"%s_%d_%d_prefence", dev_name(&dla_pdev->dev),
+				task->prefences[i].syncpoint_index, i);
+
+			err = nvhost_sync_create_fence_fd(host_pdev,
+				&info, 1, fence_name,
+				&task->prefences[i].sync_fd);
+
+			if (err) {
+				nvdla_dbg_err(dla_pdev,
+					"fail to create prefence syncfd\n");
+				goto fail;
+			}
+		}
+	}
+
+	nvdla_dbg_fn(dla_pdev, "copy prefences to user");
+	/* copy pre fences */
+	if (copy_to_user(prefences, task->prefences,
+				(task->num_prefences * sizeof(struct nvdev_fence)))) {
+		err = -EFAULT;
+		nvdla_dbg_err(dla_pdev, "failed to copy prefences");
+		goto fail;
+	}
+
+	/* update post fence signals to user */
 	for (i = 0; i < task->num_postfences; i++) {
+		if (task->postfences[i].action != NVDEV_FENCE_SIGNAL)
+			continue;
+
 		if (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD) {
 			struct nvhost_ctrl_sync_fence_info info;
 
@@ -473,8 +594,8 @@ static int nvdla_update_postfences(struct nvdla_task *task,
 
 			/* create fence name format example: nvdla0_1_fence */
 			snprintf(fence_name, sizeof(fence_name),
-				"%s_%d_fence", dev_name(&dla_pdev->dev),
-				task->postfences[i].syncpoint_index);
+				"%s_%d_%d_postfence", dev_name(&dla_pdev->dev),
+				task->postfences[i].syncpoint_index, i);
 
 			err = nvhost_sync_create_fence_fd(host_pdev,
 				&info, 1, fence_name,
@@ -525,15 +646,33 @@ static int nvdla_val_task_submit_input(struct nvdla_ioctl_submit_task *in_task)
 			MAX_NUM_NVDLA_IN_TASK_STATUS);
 		return -EINVAL;
 	}
-	if (in_task->num_output_task_status > MAX_NUM_NVDLA_OUT_TASK_STATUS) {
-		pr_err("out task status[%u] crossing expected[%d]\n",
-			in_task->num_output_task_status,
+	if (in_task->num_sof_task_status > MAX_NUM_NVDLA_OUT_TASK_STATUS) {
+		pr_err("sof task status[%u] crossing expected[%d]\n",
+			in_task->num_sof_task_status,
 			MAX_NUM_NVDLA_OUT_TASK_STATUS);
 		return -EINVAL;
 	}
-        if (in_task->num_addresses < 1) {
+	if (in_task->num_eof_task_status > MAX_NUM_NVDLA_OUT_TASK_STATUS) {
+		pr_err("eof task status[%u] crossing expected[%d]\n",
+			in_task->num_eof_task_status,
+			MAX_NUM_NVDLA_OUT_TASK_STATUS);
+		return -EINVAL;
+	}
+	if (in_task->num_sof_timestamps > MAX_NUM_NVDLA_OUT_TIMESTAMP) {
+		pr_err("sof timestamps[%u] crossing expected[%d]\n",
+			in_task->num_sof_timestamps,
+			MAX_NUM_NVDLA_OUT_TIMESTAMP);
+		return -EINVAL;
+	}
+	if (in_task->num_eof_timestamps > MAX_NUM_NVDLA_OUT_TIMESTAMP) {
+		pr_err("eof timestamps[%u] crossing expected[%d]\n",
+			in_task->num_eof_timestamps,
+			MAX_NUM_NVDLA_OUT_TIMESTAMP);
+		return -EINVAL;
+	}
+	if (in_task->num_addresses < 1) {
 		pr_err("num addresses[%u] should be min one\n",
-			in_task->num_addresses);
+				in_task->num_addresses);
 		return -EINVAL;
 	}
 	if (in_task->num_addresses > NVDLA_MAX_BUFFERS_PER_TASK) {
@@ -572,7 +711,10 @@ static int nvdla_fill_task(struct nvdla_queue *queue,
 	task->num_prefences = local_task->num_prefences;
 	task->num_postfences = local_task->num_postfences;
 	task->num_in_task_status = local_task->num_input_task_status;
-	task->num_out_task_status = local_task->num_output_task_status;
+	task->num_sof_task_status = local_task->num_sof_task_status;
+	task->num_eof_task_status = local_task->num_eof_task_status;
+	task->num_sof_timestamps = local_task->num_sof_timestamps;
+	task->num_eof_timestamps = local_task->num_eof_timestamps;
 	task->num_addresses = local_task->num_addresses;
 	task->timeout = local_task->timeout;
 
@@ -616,8 +758,14 @@ static void nvdla_dump_task(struct nvdla_task *task)
 	nvdla_dbg_info(pdev, "dumping input task [%p] parameters:", task);
 	nvdla_dbg_info(pdev, "num_prefences[%u] num_postfences[%u]",
 			task->num_prefences, task->num_postfences);
-	nvdla_dbg_info(pdev, "num_in_status[%u] num_out_task_status[%u]",
-			task->num_in_task_status, task->num_out_task_status);
+	nvdla_dbg_info(pdev, "num_in_status[%u] num_sof_task_status[%u] "
+			"num_eof_task_status[%u]",
+			task->num_in_task_status,
+			task->num_sof_task_status,
+			task->num_eof_task_status);
+	nvdla_dbg_info(pdev, "num_sof_timestamps[%u] num_eof_timestamps[%u]",
+			task->num_sof_timestamps,
+			task->num_eof_timestamps);
 	nvdla_dbg_info(pdev, "num_addresses[%u]", task->num_addresses);
 
 	for (i = 0; i < task->num_prefences; i++) {
@@ -654,12 +802,34 @@ static void nvdla_dump_task(struct nvdla_task *task)
 				task->in_task_status[i].status);
 	}
 
-	for (i = 0; i < task->num_out_task_status; i++) {
-		nvdla_dbg_info(pdev, "Output task status[%d]:"
+	for (i = 0; i < task->num_sof_task_status; i++) {
+		nvdla_dbg_info(pdev, "SOF task status[%d]:"
 				"handle[%u] offset[%u] status[%u]",
-				i, task->out_task_status[i].handle,
-				task->out_task_status[i].offset,
-				task->out_task_status[i].status);
+				i, task->sof_task_status[i].handle,
+				task->sof_task_status[i].offset,
+				task->sof_task_status[i].status);
+	}
+
+	for (i = 0; i < task->num_eof_task_status; i++) {
+		nvdla_dbg_info(pdev, "EOF task status[%d]:"
+				"handle[%u] offset[%u] status[%u]",
+				i, task->eof_task_status[i].handle,
+				task->eof_task_status[i].offset,
+				task->eof_task_status[i].status);
+	}
+
+	for (i = 0; i < task->num_sof_timestamps; i++) {
+		nvdla_dbg_info(pdev, "SOF timestamp[%d]:"
+				"handle[%u] offset[%u]",
+				i, task->sof_timestamps[i].handle,
+				task->sof_timestamps[i].offset);
+	}
+
+	for (i = 0; i < task->num_eof_timestamps; i++) {
+		nvdla_dbg_info(pdev, "EOF timestamp[%d]:"
+				"handle[%u] offset[%u]",
+				i, task->eof_timestamps[i].handle,
+				task->eof_timestamps[i].offset);
 	}
 
 	for (i = 0; i < task->num_addresses; i++) {
@@ -718,8 +888,18 @@ static int nvdla_emu_task_submit(struct nvdla_private *priv, void *arg)
 
 		nvdla_dbg_info(pdev, "submit [%d]th task", i + 1);
 
+		task.num_prefences = local_tasks[i].num_prefences;
 		task.num_postfences = local_tasks[i].num_postfences;
 
+		/* get pre fences */
+		if (copy_from_user(task.prefences,
+			(void __user *)local_tasks[i].prefences,
+			(task.num_prefences * sizeof(struct nvdev_fence)))) {
+			err = -EFAULT;
+			nvdla_dbg_err(pdev, "failed to copy prefences");
+			goto exit;
+		}
+
 		/* get post fences */
 		if (copy_from_user(task.postfences,
 			(void __user *)local_tasks[i].postfences,
@@ -736,13 +916,13 @@ static int nvdla_emu_task_submit(struct nvdla_private *priv, void *arg)
 		}
 		nvdla_dbg_info(pdev, "task[%d] submitted", i + 1);
 
-		/* send fences to user */
-		err = nvdla_send_emu_postfences(&task, local_tasks + i);
+		/* send signal fences to user */
+		err = nvdla_send_emu_signal_fences(&task, local_tasks + i);
 		if (err) {
-			nvdla_dbg_err(pdev, "fail to send postfence%d", i + 1);
+			nvdla_dbg_err(pdev, "fail to send sig fence%d", i + 1);
 			goto exit;
 		}
-		nvdla_dbg_info(pdev, "postfences of task[%d] sent", i + 1);
+		nvdla_dbg_info(pdev, "signal fences of task[%d] sent", i + 1);
 	}
 	nvdla_dbg_fn(pdev, "Emulator task submitted, done!");
 
@@ -825,8 +1005,8 @@ static int nvdla_submit(struct nvdla_private *priv, void *arg)
 		}
 		nvdla_dbg_info(pdev, "task[%d] desc filled", i + 1);
 
-		/* get expected postfences prior to submit */
-		err = nvdla_get_postfences(queue, task);
+		/* get expected signal fences prior to submit */
+		err = nvdla_get_signal_fences(queue, task);
 		if (err) {
 			nvdla_dbg_err(pdev, "fail to get fences%d", i + 1);
 			goto fail_to_get_fences;
@@ -834,7 +1014,7 @@ static int nvdla_submit(struct nvdla_private *priv, void *arg)
 		nvdla_dbg_info(pdev, "task[%d] got fences", i + 1);
 
 		/* update fences to user */
-		err = nvdla_update_postfences(task, local_tasks + i);
+		err = nvdla_update_signal_fences(task, local_tasks + i);
 		if (err) {
 			nvdla_dbg_err(pdev, "fail update postfence%d", i + 1);
 			goto fail_to_update_postfences;
diff --git a/drivers/video/tegra/host/nvdla/nvdla_queue.c b/drivers/video/tegra/host/nvdla/nvdla_queue.c
index 7bafc879..10c4b145 100644
--- a/drivers/video/tegra/host/nvdla/nvdla_queue.c
+++ b/drivers/video/tegra/host/nvdla/nvdla_queue.c
@@ -210,16 +210,43 @@ static int nvdla_unmap_task_memory(struct nvdla_task *task)
 	}
 	nvdla_dbg_fn(pdev, "all postfences unmaped");
 
-	/* unpin input task status memory */
-	for (ii = 0; ii < task->num_out_task_status; ii++) {
-		if (task->out_task_status[ii].handle) {
+	/* unpin output task status memory */
+	for (ii = 0; ii < task->num_sof_task_status; ii++) {
+		if (task->sof_task_status[ii].handle) {
 			nvdla_buffer_submit_unpin(task->buffers,
-				&task->out_task_status_dmabuf[ii], 1);
-			dma_buf_put(task->out_task_status_dmabuf[ii]);
+				&task->sof_task_status_dmabuf[ii], 1);
+			dma_buf_put(task->sof_task_status_dmabuf[ii]);
+		}
+	}
+
+	for (ii = 0; ii < task->num_eof_task_status; ii++) {
+		if (task->eof_task_status[ii].handle) {
+			nvdla_buffer_submit_unpin(task->buffers,
+				&task->eof_task_status_dmabuf[ii], 1);
+			dma_buf_put(task->eof_task_status_dmabuf[ii]);
 		}
 	}
 	nvdla_dbg_fn(pdev, "all out task status unmaped");
 
+	/* unpin output timestamp memory */
+	for (ii = 0; ii < task->num_sof_timestamps; ii++) {
+		if (task->sof_timestamps[ii].handle) {
+			nvdla_buffer_submit_unpin(task->buffers,
+				&task->sof_timestamps_dmabuf[ii], 1);
+			dma_buf_put(task->sof_timestamps_dmabuf[ii]);
+		}
+	}
+
+	for (ii = 0; ii < task->num_eof_timestamps; ii++) {
+		if (task->eof_timestamps[ii].handle) {
+			nvdla_buffer_submit_unpin(task->buffers,
+				&task->eof_timestamps_dmabuf[ii], 1);
+			dma_buf_put(task->eof_timestamps_dmabuf[ii]);
+		}
+	}
+	nvdla_dbg_fn(pdev, "all out timestamps unmaped");
+
+
 	return 0;
 }
 
@@ -252,12 +279,16 @@ static void nvdla_task_syncpt_reset(struct nvhost_syncpt *syncpt,
 
 static inline int nvdla_get_max_preaction_size(void)
 {
-	return (((MAX_NUM_NVDLA_PREFENCES + MAX_NUM_NVDLA_IN_TASK_STATUS) *
+	return (((MAX_NUM_NVDLA_PREFENCES + MAX_NUM_NVDLA_IN_TASK_STATUS +
+			MAX_NUM_NVDLA_OUT_TASK_STATUS +
+			MAX_NUM_NVDLA_OUT_TIMESTAMP) *
 		sizeof(struct dla_action_opcode)) +
 		(MAX_NUM_NVDLA_PREFENCES *
 			sizeof(struct dla_action_semaphore)) +
-		(MAX_NUM_NVDLA_IN_TASK_STATUS *
+		((MAX_NUM_NVDLA_IN_TASK_STATUS + MAX_NUM_NVDLA_OUT_TASK_STATUS) *
 			sizeof(struct dla_action_task_status)) +
+		(MAX_NUM_NVDLA_OUT_TIMESTAMP *
+			sizeof(struct dla_action_timestamp)) +
 		sizeof(struct dla_action_opcode));
 }
 
@@ -265,6 +296,7 @@ static inline int nvdla_get_max_postaction_size(void)
 {
 	return (((MAX_NUM_NVDLA_POSTFENCES +
 				MAX_NUM_NVDLA_OUT_TASK_STATUS +
+				MAX_NUM_NVDLA_OUT_TIMESTAMP +
 				NUM_PROFILING_POSTACTION) *
 		sizeof(struct dla_action_opcode)) +
 		(MAX_NUM_NVDLA_POSTFENCES *
@@ -272,6 +304,8 @@ static inline int nvdla_get_max_postaction_size(void)
 		((MAX_NUM_NVDLA_OUT_TASK_STATUS +
 			NUM_PROFILING_POSTACTION) *
 			sizeof(struct dla_action_task_status)) +
+		(MAX_NUM_NVDLA_OUT_TIMESTAMP *
+			sizeof(struct dla_action_timestamp)) +
 		sizeof(struct dla_action_opcode));
 }
 
@@ -427,6 +461,18 @@ static u8 *add_status_action(u8 *mem, uint8_t op, uint64_t addr,
 	return mem + sizeof(struct dla_action_task_status);
 }
 
+static u8 *add_timestamp_action(u8 *mem, uint8_t op, uint64_t addr)
+{
+	struct dla_action_timestamp *action;
+
+	mem = add_opcode(mem, op);
+
+	action = (struct dla_action_timestamp *)mem;
+	action->address = addr;
+
+	return mem + sizeof(struct dla_action_timestamp);
+}
+
 static u8 *add_gos_action(u8 *mem, uint8_t op, uint8_t index, uint16_t offset,
 				uint32_t value)
 {
@@ -568,17 +614,403 @@ gos_disabled:
 	return err;
 }
 
+static int nvdla_fill_wait_fence_action(struct nvdla_task *task,
+	struct nvdev_fence *fence,
+	struct dma_buf **dma_buf,
+	u8 **mem_next
+)
+{
+	int err = 0;
+
+	struct nvdla_buffers *buffers = task->buffers;
+	struct nvdla_queue *queue = task->queue;
+	struct platform_device *pdev = queue->pool->pdev;
+	struct nvhost_master *host = nvhost_get_host(pdev);
+	struct nvhost_syncpt *sp = &host->syncpt;
+	u8 *next = *mem_next;
+
+	switch(fence->type) {
+	case NVDEV_FENCE_TYPE_SYNC_FD: {
+		struct sync_fence *f;
+		struct sync_pt *pt;
+		u32 id, thresh, j;
+
+		f = nvhost_sync_fdget(fence->sync_fd);
+		if (!f) {
+			nvdla_dbg_err(pdev, "failed to get sync fd");
+			break;
+		}
+
+		j = id = thresh = 0;
+		for (j = 0; j < f->num_fences; j++) {
+			u32 gos_id, gos_offset;
+
+			pt = sync_pt_from_fence(f->cbs[j].sync_pt);
+			id = nvhost_sync_pt_id(pt);
+			thresh = nvhost_sync_pt_thresh(pt);
+
+			if (!id || !nvhost_syncpt_is_valid_hw_pt(sp, id)) {
+				nvdla_dbg_err(pdev, "Invalid sync_fd");
+				sync_fence_put(f);
+				break;
+			}
+
+			/* check if GoS backing available */
+			if (!nvdla_get_gos(pdev, id, &gos_id, &gos_offset)) {
+				nvdla_dbg_info(pdev, "syncfd_pt:[%u] "
+					"gos_id[%u] gos_offset[%u] val[%u]",
+					id, gos_id, gos_offset, thresh);
+				next = add_gos_action(next, ACTION_GOS_GE,
+						gos_id, gos_offset, thresh);
+			} else {
+				dma_addr_t syncpt_addr;
+
+				nvdla_dbg_info(pdev,
+					"GoS missing for syncfd [%d]", id);
+				syncpt_addr = nvhost_syncpt_address(
+						queue->vm_pdev, id);
+				nvdla_dbg_info(pdev, "syncfd_pt:[%u]"
+					"mss_dma_addr[%pad]",
+					id, &syncpt_addr);
+				next = add_fence_action(next, ACTION_SEM_GE,
+						syncpt_addr, thresh);
+			}
+		}
+
+		break;
+	}
+	case NVDEV_FENCE_TYPE_SYNCPT: {
+		u32 gos_id, gos_offset;
+
+		nvdla_dbg_info(pdev, "id[%d] val[%d]",
+				fence->syncpoint_index,
+				fence->syncpoint_value);
+
+		if (!nvdla_get_gos(pdev, fence->syncpoint_index, &gos_id,
+					&gos_offset)) {
+			nvdla_dbg_info(pdev, "syncpt:[%u] gos_id[%u] "
+				"gos_offset[%u] val[%u]",
+				fence->syncpoint_index, gos_id, gos_offset,
+				fence->syncpoint_value);
+			next = add_gos_action(next, ACTION_GOS_GE,
+					gos_id, gos_offset,
+					fence->syncpoint_value);
+		} else {
+			dma_addr_t syncpt_addr;
+			nvdla_dbg_info(pdev, "GoS missing");
+
+			syncpt_addr = nvhost_syncpt_address(
+				queue->vm_pdev, fence->syncpoint_index);
+			nvdla_dbg_info(pdev, "syncpt:[%u] dma_addr[%pad]",
+				fence->syncpoint_index, &syncpt_addr);
+
+			next = add_fence_action(next, ACTION_SEM_GE,
+					syncpt_addr, fence->syncpoint_value);
+		}
+
+		break;
+	}
+	case NVDEV_FENCE_TYPE_SEMAPHORE:
+	case NVDEV_FENCE_TYPE_SEMAPHORE_TS: {
+		dma_addr_t dma_addr;
+		size_t dma_size;
+
+		nvdla_dbg_info(pdev, "semh[%u] semo[%u] val[%d]",
+				fence->semaphore_handle,
+				fence->semaphore_offset,
+				fence->semaphore_value);
+
+		*dma_buf = dma_buf_get(fence->semaphore_handle);
+		if (IS_ERR_OR_NULL(*dma_buf)) {
+			*dma_buf = NULL;
+			nvdla_dbg_err(pdev, "fail to get wait buf");
+			break;
+		}
+
+		if (nvdla_buffer_submit_pin(buffers,
+				dma_buf, 1, &dma_addr, &dma_size, NULL)) {
+			nvdla_dbg_err(pdev, "fail to pin WAIT SEM");
+			break;
+		}
+
+		next = add_fence_action(next, ACTION_SEM_GE,
+			dma_addr + fence->semaphore_offset,
+			fence->semaphore_value);
+		break;
+	}
+	default:
+		nvdla_dbg_err(pdev, "Invalid sync_type[%d]", fence->type);
+		err = -EINVAL;
+		goto fail;
+	}
+
+	*mem_next = next;
+
+fail:
+	return err;
+}
+
+static int nvdla_fill_signal_fence_action(struct nvdla_task *task,
+	struct nvdev_fence *fence,
+	struct dma_buf **dma_buf,
+	u8 **mem_next)
+{
+	int err = 0;
+
+	struct nvdla_buffers *buffers = task->buffers;
+	struct nvdla_queue *queue = task->queue;
+	struct platform_device *pdev = queue->pool->pdev;
+	u8 *next = *mem_next;
+
+	switch (fence->type) {
+	case NVDEV_FENCE_TYPE_SYNC_FD:
+	case NVDEV_FENCE_TYPE_SYNCPT: {
+		dma_addr_t syncpt_addr;
+		u32 gos_id, gos_offset;
+
+		/* update GoS backing if available  */
+		if (!nvdla_get_gos(pdev, queue->syncpt_id,
+				&gos_id, &gos_offset)) {
+			u32 max;
+
+			/* send incremented max */
+			max = nvhost_syncpt_read_maxval(pdev,
+				queue->syncpt_id);
+			nvdla_dbg_info(pdev, "syncpt:[%u] gos_id[%u] "
+				"gos_offset[%u] val[%u]",
+				queue->syncpt_id, gos_id, gos_offset,
+				max + task->fence_counter + 1);
+			next = add_gos_action(next, ACTION_WRITE_GOS,
+				gos_id, gos_offset,
+				max + task->fence_counter + 1);
+		}
+
+		/* For postaction also update MSS addr */
+		syncpt_addr = nvhost_syncpt_address(queue->vm_pdev,
+						queue->syncpt_id);
+		next = add_fence_action(next, ACTION_WRITE_SEM,
+				syncpt_addr, 1);
+
+		task->fence_counter = task->fence_counter + 1;
+
+		nvdla_dbg_info(pdev, "syncpt:[%u] mss:[%pad]",
+				queue->syncpt_id, &syncpt_addr);
+		break;
+	}
+	case NVDEV_FENCE_TYPE_SEMAPHORE: {
+		dma_addr_t dma_addr;
+		size_t dma_size;
+
+		nvdla_dbg_info(pdev, "semh:%u semo:%u v:%d",
+				fence->semaphore_handle,
+				fence->semaphore_offset,
+				fence->semaphore_value);
+
+		*dma_buf = dma_buf_get(fence->semaphore_handle);
+		if (IS_ERR_OR_NULL(*dma_buf)) {
+			*dma_buf = NULL;
+			nvdla_dbg_err(pdev, "fail to get buf");
+			break;
+		}
+
+		if (nvdla_buffer_submit_pin(buffers,
+				dma_buf, 1, &dma_addr, &dma_size, NULL)) {
+			nvdla_dbg_err(pdev, "fail to pin SIGNAL SEM");
+			break;
+		}
+
+		next = add_fence_action(next, ACTION_WRITE_SEM,
+			dma_addr + fence->semaphore_offset,
+			fence->semaphore_value);
+		break;
+	}
+	case NVDEV_FENCE_TYPE_SEMAPHORE_TS: {
+		dma_addr_t dma_addr;
+		size_t dma_size;
+
+		nvdla_dbg_info(pdev, "semh:%u semo:%u v:%d",
+				fence->semaphore_handle,
+				fence->semaphore_offset,
+				fence->semaphore_value);
+
+		*dma_buf = dma_buf_get(fence->semaphore_handle);
+		if (IS_ERR_OR_NULL(*dma_buf)) {
+			*dma_buf = NULL;
+			nvdla_dbg_err(pdev, "fail to get buf");
+			break;
+		}
+
+		if (nvdla_buffer_submit_pin(buffers,
+				dma_buf, 1, &dma_addr, &dma_size, NULL)) {
+			nvdla_dbg_err(pdev, "fail to pin SIGNAL SEM");
+			break;
+		}
+
+		next = add_fence_action(next, ACTION_WRITE_TS_SEM,
+			dma_addr + fence->semaphore_offset,
+			fence->semaphore_value);
+		break;
+
+	}
+	default:
+		nvdla_dbg_err(pdev, "Invalid sync_type[%d]",
+			fence->type);
+		err = -EINVAL;
+		goto fail;
+	}
+
+	*mem_next = next;
+
+fail:
+	return err;
+}
+
+static int nvdla_fill_taskstatus_read_action(struct nvdla_task *task,
+	struct nvdla_status_notify *task_status,
+	struct dma_buf **dma_buf,
+	u8 **mem_next)
+{
+	int err = 0;
+
+	struct nvdla_buffers *buffers = task->buffers;
+	struct nvdla_queue *queue = task->queue;
+	struct platform_device *pdev = queue->pool->pdev;
+	dma_addr_t dma_addr;
+	size_t dma_size;
+
+	u8 *next = *mem_next;
+
+	nvdla_dbg_info(pdev, "h[%u] o[%u] status[%d]",
+			task_status->handle,
+			task_status->offset,
+			task_status->status);
+
+	*dma_buf = dma_buf_get(task_status->handle);
+	if (IS_ERR_OR_NULL(*dma_buf)) {
+		*dma_buf = NULL;
+		nvdla_dbg_err(pdev, "fail to get buf");
+		err = -EINVAL;
+		goto fail;
+	}
+
+	if (nvdla_buffer_submit_pin(buffers,
+			dma_buf, 1, &dma_addr, &dma_size, NULL)) {
+		nvdla_dbg_err(pdev, "fail to pin in status");
+		err = -EINVAL;
+		goto fail;
+	}
+
+	next = add_status_action(next, ACTION_TASK_STATUS_EQ,
+			dma_addr + task_status->offset,
+			task_status->status);
+
+	*mem_next = next;
+
+fail:
+	return err;
+}
+
+static int nvdla_fill_taskstatus_write_action(struct nvdla_task *task,
+	struct nvdla_status_notify *task_status,
+	struct dma_buf **dma_buf,
+	u8 **mem_next)
+{
+	int err = 0;
+
+	struct nvdla_buffers *buffers = task->buffers;
+	struct nvdla_queue *queue = task->queue;
+	struct platform_device *pdev = queue->pool->pdev;
+	dma_addr_t dma_addr;
+	size_t dma_size;
+
+	u8 *next = *mem_next;
+
+	nvdla_dbg_info(pdev, "h[%u] o[%u] status[%d]",
+			task_status->handle,
+			task_status->offset,
+			task_status->status);
+
+	*dma_buf = dma_buf_get(task_status->handle);
+	if (IS_ERR_OR_NULL(*dma_buf)) {
+		*dma_buf = NULL;
+		nvdla_dbg_err(pdev, "fail to get buf");
+		err = -EINVAL;
+		goto fail;
+	}
+
+	if (nvdla_buffer_submit_pin(buffers,
+			dma_buf, 1, &dma_addr, &dma_size, NULL)) {
+		nvdla_dbg_err(pdev, "fail to pin status");
+		err = -EINVAL;
+		goto fail;
+	}
+
+	next = add_status_action(next, ACTION_WRITE_TASK_STATUS,
+			dma_addr + task_status->offset,
+			task_status->status);
+
+	*mem_next = next;
+
+fail:
+	return err;
+}
+
+static int nvdla_fill_timestamp_write_action(struct nvdla_task *task,
+	struct nvdla_mem_handle *timestamp,
+	struct dma_buf **dma_buf,
+	u8 **mem_next)
+{
+	int err = 0;
+
+	struct nvdla_buffers *buffers = task->buffers;
+	struct nvdla_queue *queue = task->queue;
+	struct platform_device *pdev = queue->pool->pdev;
+	dma_addr_t dma_addr;
+	size_t dma_size;
+
+	u8 *next = *mem_next;
+
+	nvdla_dbg_info(pdev, "h[%u] o[%u]",
+			timestamp->handle,
+			timestamp->offset);
+
+	*dma_buf = dma_buf_get(timestamp->handle);
+	if (IS_ERR_OR_NULL(*dma_buf)) {
+		*dma_buf = NULL;
+		nvdla_dbg_err(pdev, "fail to get buf");
+		err = -EINVAL;
+		goto fail;
+	}
+
+	if (nvdla_buffer_submit_pin(buffers,
+			dma_buf, 1, &dma_addr, &dma_size, NULL)) {
+		nvdla_dbg_err(pdev, "fail to pin timestamp");
+		err = -EINVAL;
+		goto fail;
+	}
+
+	next = add_timestamp_action(next, ACTION_WRITE_TIMESTAMP,
+			dma_addr + timestamp->offset);
+
+	*mem_next = next;
+
+fail:
+	return err;
+}
+
+
 static int nvdla_fill_postactions(struct nvdla_task *task)
 {
+	int err = 0;
+
 	struct dla_task_descriptor *task_desc = task->task_desc;
-	struct nvdla_buffers *buffers = task->buffers;
 	struct nvdla_queue *queue = task->queue;
 	struct platform_device *pdev = queue->pool->pdev;
 	struct dla_action_list *postactionl;
 	uint16_t postactionlist_of;
 	u8 *next, *start;
 	void *mem;
-	int i, j = 0;
+	int i;
 
 	/* update postaction list offset */
 	postactionlist_of = task_desc->postactions +
@@ -587,176 +1019,74 @@ static int nvdla_fill_postactions(struct nvdla_task *task)
 	start = next = (u8 *)task_desc + postactionlist_of;
 
 	/* Action to write the status notifier after task finishes (for TSP). */
-	next = add_status_action(next, POSTACTION_TASK_STATUS,
+	next = add_status_action(next, ACTION_WRITE_TASK_STATUS,
 		task->task_desc_pa + nvdla_profile_status_offset(task), 0);
 
-	/* fill output task status */
-	for (j = 0; j < task->num_out_task_status; j++) {
-		dma_addr_t dma_addr;
-		size_t dma_size;
-
-		nvdla_dbg_info(pdev, "i[%d] h[%u] o[%u] status[%d]",
-					j,
-					task->out_task_status[j].handle,
-					task->out_task_status[j].offset,
-					task->out_task_status[j].status);
-
-			task->out_task_status_dmabuf[j] =
-				dma_buf_get(task->out_task_status[j].handle);
-			if (IS_ERR_OR_NULL(task->out_task_status_dmabuf[j])) {
-				task->out_task_status_dmabuf[j] = NULL;
-				nvdla_dbg_err(pdev, "fail to get buf");
-				break;
-			}
-
-			if (nvdla_buffer_submit_pin(buffers,
-					&task->out_task_status_dmabuf[j],
-					1, &dma_addr, &dma_size, NULL)) {
-				nvdla_dbg_err(pdev, "fail to pin out status");
-				break;
-			}
-
-			next = add_status_action(next, POSTACTION_TASK_STATUS,
-				dma_addr + task->out_task_status[j].offset,
-				task->out_task_status[j].status);
+	/* fill eof timestamp actions */
+	for (i = 0; i < task->num_eof_timestamps; i++) {
+		err = nvdla_fill_timestamp_write_action(task,
+				&task->eof_timestamps[i],
+				&task->eof_timestamps_dmabuf[i],
+				&next);
+		if (err < 0) {
+			nvdla_dbg_err(pdev,
+				"failed to fill eof timestamp[%d]",
+				i);
+			goto fail;
+		}
 	}
 
-	/* reset fence counter */
-	task->fence_counter = 0;
+	/* fill output task status */
+	for (i = 0; i < task->num_eof_task_status; i++) {
+		err = nvdla_fill_taskstatus_write_action(task,
+				&task->eof_task_status[i],
+				&task->eof_task_status_dmabuf[i],
+				&next);
+		if (err < 0) {
+			nvdla_dbg_err(pdev,
+				"failed to fill eof taskstatus[%d]",
+				i);
+			goto fail;
+		}
+	}
 
 	/* fill all postactions */
 	for (i = 0; i < task->num_postfences; i++) {
 		/* update action */
-		switch (task->postfences[i].type) {
-		case NVDEV_FENCE_TYPE_SYNCPT:
-		case NVDEV_FENCE_TYPE_SYNC_FD: {
-			dma_addr_t syncpt_addr;
-			u32 gos_id, gos_offset;
-
-			/* update GoS backing if available  */
-			if (!nvdla_get_gos(pdev, queue->syncpt_id,
-					&gos_id, &gos_offset)) {
-				u32 max;
-
-				/* send incremented max */
-				max = nvhost_syncpt_read_maxval(pdev,
-					queue->syncpt_id);
-				nvdla_dbg_info(pdev, "post i:%d syncpt:[%u] gos_id[%u] gos_offset[%u] val[%u]",
-					i, queue->syncpt_id, gos_id,
-					gos_offset,
-					max + task->fence_counter + 1);
-				next = add_gos_action(next, POSTACTION_GOS,
-					gos_id, gos_offset,
-					max + task->fence_counter + 1);
-			}
-
-			/* For postaction also update MSS addr */
-			syncpt_addr = nvhost_syncpt_address(queue->vm_pdev,
-					queue->syncpt_id);
-			next = add_fence_action(next, POSTACTION_SEM,
-					syncpt_addr, 1);
-
-			task->fence_counter = task->fence_counter + 1;
-
-			nvdla_dbg_info(pdev, "post i:%d syncpt:[%u] mss:[%pad]",
-					i, queue->syncpt_id, &syncpt_addr);
-			break;
-		}
-		case NVDEV_FENCE_TYPE_SEMAPHORE_TS: {
-			dma_addr_t dma_addr;
-			size_t dma_size;
-
-			nvdla_dbg_info(pdev, "POSTTS i:%d semh:%u semo:%u v:%d",
-					i,
-					task->postfences[i].semaphore_handle,
-					task->postfences[i].semaphore_offset,
-					task->postfences[i].semaphore_value);
-
-			/* TS SEMAPHORE just has extra memory bytes allocated
-			 * to store TS as compared default semaphore.
-			 * override action/opecode type here.
-			 */
-			task->postfences_sem_dmabuf[i] =
-				dma_buf_get(task->postfences[i].semaphore_handle);
-			if (IS_ERR_OR_NULL(task->postfences_sem_dmabuf[i])) {
-				task->postfences_sem_dmabuf[i] = NULL;
-				nvdla_dbg_err(pdev, "fail to get buf");
-				break;
-			}
-
-			if (nvdla_buffer_submit_pin(buffers,
-					&task->postfences_sem_dmabuf[i],
-					1, &dma_addr, &dma_size, NULL)) {
-				nvdla_dbg_err(pdev, "fail to pin OUT TSSEM");
-				break;
-			}
-
-			next = add_fence_action(next, POSTACTION_TS_SEM,
-				dma_addr + task->postfences[i].semaphore_offset,
-				task->postfences[i].semaphore_value);
-			break;
-		}
-		case NVDEV_FENCE_TYPE_SEMAPHORE: {
-			dma_addr_t dma_addr;
-			size_t dma_size;
-
-			nvdla_dbg_info(pdev, "POST i:%d semh:%u semo:%u v:%d",
-					i,
-					task->postfences[i].semaphore_handle,
-					task->postfences[i].semaphore_offset,
-					task->postfences[i].semaphore_value);
-
-			task->postfences_sem_dmabuf[i] =
-				dma_buf_get(task->postfences[i].semaphore_handle);
-			if (IS_ERR_OR_NULL(task->postfences_sem_dmabuf[i])) {
-				task->postfences_sem_dmabuf[i] = NULL;
-				nvdla_dbg_err(pdev, "fail to get buf");
-				break;
-			}
-
-			if (nvdla_buffer_submit_pin(buffers,
-					&task->postfences_sem_dmabuf[i],
-					1, &dma_addr, &dma_size, NULL)) {
-				nvdla_dbg_err(pdev, "fail to pin OUT SEM");
-				break;
-			}
-
-			next = add_fence_action(next, POSTACTION_SEM,
-				dma_addr + task->postfences[i].semaphore_offset,
-				task->postfences[i].semaphore_value);
-			break;
-		}
-		default:
-			nvdla_dbg_err(pdev, "Invalid postfence sync type[%d]",
-				task->postfences[i].type);
-			return -EINVAL;
+		err = nvdla_fill_signal_fence_action(task,
+				&task->postfences[i],
+				&task->postfences_sem_dmabuf[i],
+				&next);
+		if (err < 0) {
+			nvdla_dbg_info(pdev, "failed to fill postfences[%u]", i);
+			goto fail;
 		}
 	}
 
 	/* update end of action list */
-	next = add_opcode(next, POSTACTION_TERMINATE);
+	next = add_opcode(next, ACTION_TERMINATE);
 
 	mem = (char *)task_desc + task_desc->postactions;
 	postactionl = (struct dla_action_list *)mem;
 	postactionl->offset = postactionlist_of;
 	postactionl->size = next - start;
 
-	return 0;
+fail:
+	return err;
 }
 
 static int nvdla_fill_preactions(struct nvdla_task *task)
 {
+	int err = 0;
+
 	struct dla_task_descriptor *task_desc = task->task_desc;
-	struct nvdla_buffers *buffers = task->buffers;
 	struct nvdla_queue *queue = task->queue;
 	struct platform_device *pdev = queue->pool->pdev;
-	struct nvhost_master *host = nvhost_get_host(pdev);
-	struct nvhost_syncpt *sp = &host->syncpt;
 	struct dla_action_list *preactionl;
 	uint16_t preactionlist_of;
 	u8 *next, *start;
 	void *mem;
-	int i, j;
+	int i;
 
 	/* preaction list offset update */
 	preactionlist_of = task_desc->postactions +
@@ -764,169 +1094,84 @@ static int nvdla_fill_preactions(struct nvdla_task *task)
 
 	start = next = (u8 *)task_desc + preactionlist_of;
 
-	/* fill all preactions */
+	/* fill all preactions wait */
 	for (i = 0; i < task->num_prefences; i++) {
+		if (task->prefences[i].action != NVDEV_FENCE_WAIT)
+			continue;
 
-		switch (task->prefences[i].type) {
-		case NVDEV_FENCE_TYPE_SYNC_FD: {
-			struct sync_fence *f;
-			struct sync_pt *pt;
-			u32 id, thresh, j;
-
-			f = nvhost_sync_fdget(task->prefences[i].sync_fd);
-			if (!f) {
-				nvdla_dbg_err(pdev, "failed to get sync fd");
-				break;
-			}
-
-			j = id = thresh = 0;
-
-			for (j = 0; j < f->num_fences; j++) {
-				u32 gos_id, gos_offset;
-
-				pt = sync_pt_from_fence(f->cbs[j].sync_pt);
-				id = nvhost_sync_pt_id(pt);
-				thresh = nvhost_sync_pt_thresh(pt);
-
-				if (!id ||
-				     !nvhost_syncpt_is_valid_hw_pt(sp, id)) {
-					nvdla_dbg_err(pdev, "Invalid sync_fd");
-					sync_fence_put(f);
-					break;
-				}
-
-				/* check if GoS backing available */
-				if (!nvdla_get_gos(pdev, id, &gos_id,
-						&gos_offset)) {
-					nvdla_dbg_info(pdev, "pre i:%d syncfd_pt:[%u] gos_id[%u] gos_offset[%u] val[%u]",
-						i, id, gos_id,
-						gos_offset, thresh);
-					next = add_gos_action(next,
-						PREACTION_GOS_GE,
-						gos_id, gos_offset, thresh);
-				} else {
-					dma_addr_t syncpt_addr;
-
-					nvdla_dbg_info(pdev, "pre i:%d GoS missing for syncfd [%d]",
-							i, id);
-					syncpt_addr = nvhost_syncpt_address(
-							queue->vm_pdev, id);
-					nvdla_dbg_info(pdev, "pre i:%d syncfd_pt:[%u] mss_dma_addr[%pad]",
-						i, id, &syncpt_addr);
-					next = add_fence_action(next, PREACTION_SEM_GE,
-							syncpt_addr, thresh);
-				}
-			}
-			break;
-		}
-		case NVDEV_FENCE_TYPE_SYNCPT: {
-			u32 gos_id, gos_offset;
-
-			nvdla_dbg_info(pdev, "i[%d] id[%d] val[%d]",
-					i,
-					task->prefences[i].syncpoint_index,
-					task->prefences[i].syncpoint_value);
-
-			if (!nvdla_get_gos(pdev,
-				task->prefences[i].syncpoint_index, &gos_id,
-						&gos_offset)) {
-				nvdla_dbg_info(pdev, "pre i:%d syncpt:[%u] gos_id[%u] gos_offset[%u] val[%u]",
-					i, task->prefences[i].syncpoint_index,
-					gos_id, gos_offset,
-					task->prefences[i].syncpoint_value);
-				next = add_gos_action(next, PREACTION_GOS_GE,
-					gos_id, gos_offset,
-					task->prefences[i].syncpoint_value);
-			} else {
-				dma_addr_t syncpt_addr;
-
-				nvdla_dbg_info(pdev, "pre i:%d GoS missing", i);
-
-				syncpt_addr = nvhost_syncpt_address(
-					queue->vm_pdev,
-					task->prefences[i].syncpoint_index);
-				nvdla_dbg_info(pdev, "pre i:%d syncpt:[%u] dma_addr[%pad]",
-					i,
-					task->prefences[i].syncpoint_index,
-					&syncpt_addr);
-
-				next = add_fence_action(next, PREACTION_SEM_GE,
-					syncpt_addr,
-					task->prefences[i].syncpoint_value);
-			}
-			break;
-		}
-		case NVDEV_FENCE_TYPE_SEMAPHORE:
-		case NVDEV_FENCE_TYPE_SEMAPHORE_TS: {
-			dma_addr_t dma_addr;
-			size_t dma_size;
-
-			nvdla_dbg_info(pdev, "i[%d] semh[%u] semo[%u] val[%d]",
-					i,
-					task->prefences[i].semaphore_handle,
-					task->prefences[i].semaphore_offset,
-					task->prefences[i].semaphore_value);
-
-			task->prefences_sem_dmabuf[i] =
-				dma_buf_get(task->prefences[i].semaphore_handle);
-			if (IS_ERR_OR_NULL(task->prefences_sem_dmabuf[i])) {
-				task->prefences_sem_dmabuf[i] = NULL;
-				nvdla_dbg_err(pdev, "fail to get buf");
-				break;
-			}
-
-			if (nvdla_buffer_submit_pin(buffers,
-					&task->prefences_sem_dmabuf[i],
-					1, &dma_addr, &dma_size, NULL)) {
-				nvdla_dbg_err(pdev, "fail to pin IN SEM");
-				break;
-			}
-
-			next = add_fence_action(next, PREACTION_SEM_GE,
-				dma_addr + task->prefences[i].semaphore_offset,
-				task->prefences[i].semaphore_value);
-			break;
-		}
-		default:
-			nvdla_dbg_err(pdev, "Invalid sync_type[%d]",
-				task->prefences[i].type);
-			return -EINVAL;
+		/* update action */
+		err = nvdla_fill_wait_fence_action(task,
+				&task->prefences[i],
+				&task->prefences_sem_dmabuf[i],
+				&next);
+		if (err < 0) {
+			nvdla_dbg_info(pdev, "failed to fill prefences[%u]", i);
+			goto fail;
 		}
 	}
 
-	/* fill input status after filling sem/synpt/gos */
-	for (j = 0; j < task->num_in_task_status; j++) {
-		dma_addr_t dma_addr;
-		size_t dma_size;
+	/* fill input status after filling sem/syncpt/gos */
+	for (i = 0; i < task->num_in_task_status; i++) {
+		err = nvdla_fill_taskstatus_read_action(task,
+				&task->in_task_status[i],
+				&task->in_task_status_dmabuf[i],
+				&next);
+		if (err < 0) {
+			nvdla_dbg_err(pdev,
+				"failed to fill in taskstatus[%d]",
+				i);
+			goto fail;
+		}
+	}
 
-		nvdla_dbg_info(pdev, "i[%d] h[%u] o[%u] status[%d]",
-					j,
-					task->in_task_status[j].handle,
-					task->in_task_status[j].offset,
-					task->in_task_status[j].status);
+	/* fill sof task status actions */
+	for (i = 0; i < task->num_sof_task_status; i++) {
+		err = nvdla_fill_taskstatus_write_action(task,
+				&task->sof_task_status[i],
+				&task->sof_task_status_dmabuf[i],
+				&next);
+		if (err < 0) {
+			nvdla_dbg_err(pdev,
+				"failed to fill sof taskstatus[%d]",
+				i);
+			goto fail;
+		}
+	}
 
-			task->in_task_status_dmabuf[j] =
-				dma_buf_get(task->in_task_status[j].handle);
-			if (IS_ERR_OR_NULL(task->in_task_status_dmabuf[j])) {
-				task->in_task_status_dmabuf[j] = NULL;
-				nvdla_dbg_err(pdev, "fail to get buf");
-				break;
-			}
+	/* fill sof timestamp actions */
+	for (i = 0; i < task->num_sof_timestamps; i++) {
+		err = nvdla_fill_timestamp_write_action(task,
+				&task->sof_timestamps[i],
+				&task->sof_timestamps_dmabuf[i],
+				&next);
+		if (err < 0) {
+			nvdla_dbg_err(pdev,
+				"failed to fill sof timestamp[%d]",
+				i);
+			goto fail;
+		}
+	}
 
-			if (nvdla_buffer_submit_pin(buffers,
-					&task->in_task_status_dmabuf[j],
-					1, &dma_addr, &dma_size, NULL)) {
-				nvdla_dbg_err(pdev, "fail to pin in status");
-				break;
-			}
+	/* fill all preactions signals */
+	for (i = 0; i < task->num_prefences; i++) {
+		/* update action */
+		if (task->prefences[i].action != NVDEV_FENCE_SIGNAL)
+			continue;
 
-			next = add_status_action(next, PREACTION_TASK_STATUS,
-				dma_addr + task->in_task_status[j].offset,
-				task->in_task_status[j].status);
+		err = nvdla_fill_signal_fence_action(task,
+				&task->prefences[i],
+				&task->prefences_sem_dmabuf[i],
+				&next);
+		if (err < 0) {
+			nvdla_dbg_err(pdev,
+					"fail to fill fence sig action [%d]",
+					i);
+			goto fail;
+		}
 	}
 
 	/* update end of action list */
-	next = add_opcode(next, PREACTION_TERMINATE);
+	next = add_opcode(next, ACTION_TERMINATE);
 
 	/* actually update lists data */
 	mem = (char *)task_desc + task_desc->preactions;
@@ -934,7 +1179,8 @@ static int nvdla_fill_preactions(struct nvdla_task *task)
 	preactionl->offset = preactionlist_of;
 	preactionl->size = next - start;
 
-	return 0;
+fail:
+	return err;
 }
 
 int nvdla_fill_task_desc(struct nvdla_task *task)
@@ -985,6 +1231,9 @@ int nvdla_fill_task_desc(struct nvdla_task *task)
 
 	nvdla_update_gos(pdev);
 
+	/* reset fence counter */
+	task->fence_counter = 0;
+
 	/* fill pre actions */
 	nvdla_fill_preactions(task);
 
@@ -1095,8 +1344,30 @@ int nvdla_emulator_submit(struct nvdla_queue *queue, struct nvdla_emu_task *task
 
 	/* reset fence counter */
 	task->fence_counter = 0;
+
+	/* fill all preactions */
+	for (i = 0; i < task->num_prefences; i++) {
+		if (task->prefences[i].action != NVDEV_FENCE_SIGNAL)
+			continue;
+
+		/* update action */
+		switch (task->prefences[i].type) {
+		case NVDEV_FENCE_TYPE_SYNCPT:
+		case NVDEV_FENCE_TYPE_SYNC_FD: {
+			task->fence_counter = task->fence_counter + 1;
+			break;
+		}
+		default:
+			nvdla_dbg_err(pdev, "Invalid prefence sync type[%d]",
+				task->prefences[i].type);
+			return -EINVAL;
+		}
+	}
+
 	/* fill all postactions */
 	for (i = 0; i < task->num_postfences; i++) {
+		if (task->postfences[i].action != NVDEV_FENCE_SIGNAL)
+			continue;
 
 		/* update action */
 		switch (task->postfences[i].type) {
@@ -1120,9 +1391,31 @@ int nvdla_emulator_submit(struct nvdla_queue *queue, struct nvdla_emu_task *task
 				queue->syncpt_id, task->fence,
 				task, task->fence_counter);
 
-	/* Update postfences for all */
+	/* Update signal fences for all */
 	counter = task->fence_counter - 1;
+	for (i = 0; i < task->num_prefences; i++) {
+		if (task->prefences[i].action != NVDEV_FENCE_SIGNAL)
+			continue;
+
+		if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SYNCPT) ||
+		    (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) {
+			task->prefences[i].syncpoint_index =
+					queue->syncpt_id;
+			task->prefences[i].syncpoint_value =
+					task->fence - counter;
+
+			nvdla_dbg_info(pdev, "[%d] prefence set[%u]:[%u]",
+				i, task->prefences[i].syncpoint_index,
+				task->prefences[i].syncpoint_value);
+
+			counter = counter - 1;
+		}
+	}
+
 	for (i = 0; i < task->num_postfences; i++) {
+		if (task->postfences[i].action != NVDEV_FENCE_SIGNAL)
+			continue;
+
 		if ((task->postfences[i].type == NVDEV_FENCE_TYPE_SYNCPT) ||
 		    (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) {
 			task->postfences[i].syncpoint_index =
@@ -1141,7 +1434,7 @@ int nvdla_emulator_submit(struct nvdla_queue *queue, struct nvdla_emu_task *task
 	return 0;
 }
 
-int nvdla_get_postfences(struct nvdla_queue *queue, void *in_task)
+int nvdla_get_signal_fences(struct nvdla_queue *queue, void *in_task)
 {
 	struct nvdla_task *task = (struct nvdla_task *)in_task;
 	struct platform_device *pdev = queue->pool->pdev;
@@ -1159,9 +1452,31 @@ int nvdla_get_postfences(struct nvdla_queue *queue, void *in_task)
 	task_fence = nvhost_syncpt_read_maxval(pdev, queue->syncpt_id) +
 			task->fence_counter;
 
-	/* Update postfences for all */
+	/* Update fences signal updates for both prefence and postfence */
 	counter = task->fence_counter - 1;
+	for (i = 0; i < task->num_prefences; i++) {
+		if (task->prefences[i].action != NVDEV_FENCE_SIGNAL)
+			continue;
+
+		if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SYNCPT) ||
+		    (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) {
+			task->prefences[i].syncpoint_index =
+					queue->syncpt_id;
+			task->prefences[i].syncpoint_value =
+					task_fence - counter;
+
+			nvdla_dbg_info(pdev, "[%d] prefence set[%u]:[%u]",
+				i, task->prefences[i].syncpoint_index,
+				task->prefences[i].syncpoint_value);
+
+			counter = counter - 1;
+		}
+	}
+
 	for (i = 0; i < task->num_postfences; i++) {
+		if (task->postfences[i].action != NVDEV_FENCE_SIGNAL)
+			continue;
+
 		if ((task->postfences[i].type == NVDEV_FENCE_TYPE_SYNCPT) ||
 		    (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) {
 			task->postfences[i].syncpoint_index =
diff --git a/include/uapi/linux/nvhost_nvdla_ioctl.h b/include/uapi/linux/nvhost_nvdla_ioctl.h
index 5cd77743..3d85c544 100644
--- a/include/uapi/linux/nvhost_nvdla_ioctl.h
+++ b/include/uapi/linux/nvhost_nvdla_ioctl.h
@@ -3,7 +3,7 @@
  *
  * Tegra NvDLA Driver
  *
- * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -79,7 +79,7 @@ struct nvdla_pin_unpin_args {
 struct nvdla_submit_args {
 	__u64 tasks;
 	__u16 num_tasks;
-#define MAX_TASKS_PER_SUBMIT		24
+#define MAX_TASKS_PER_SUBMIT		16
 #define NVDLA_SUBMIT_FLAGS_ATOMIC	(1 << 0)
 	__u16 flags;
 	__u32 version;
@@ -125,13 +125,19 @@ struct nvdla_mem_handle {
  * @num_prefences		number of pre-fences in task
  * @num_postfences		number of post-fences in task
  * @num_input_task_status	number of input task status
- * @num_output_task_status	number of output task status
+ * @num_sof_task_status  	number of sof task status
+ * @num_eof_task_status  	number of eof task status
+ * @num_sof_timestamps   	number of sof timestamp
+ * @num_eof_timestamps   	number of eof timestamp
  * @flags			flags for bitwise task info embeddeing
  * @reserved			reserved for future use
  * @prefences			pointer to pre-fence struct table
  * @postfences			pointer to post-fence struct table
  * @input_task_status		pointer to input task status struct table
- * @output_task_status		pointer to output task status struct table
+ * @sof_task_status  		pointer to sof task status struct table
+ * @eof_task_status  		pointer to eof task status struct table
+ * @sof_timestamps   		pointer to sof timestamp handle list
+ * @eof_timestamps   		pointer to eof timestamp handle list
  * @num_addresses		total number of addressed passed in structure
  * @address_list		pointer to address list
  * @timeout			task timeout
@@ -141,17 +147,24 @@ struct nvdla_ioctl_submit_task {
 	__u8 num_prefences;
 	__u8 num_postfences;
 	__u8 num_input_task_status;
-	__u8 num_output_task_status;
+	__u8 num_sof_task_status;
+	__u8 num_eof_task_status;
+	__u8 num_sof_timestamps;
+	__u8 num_eof_timestamps;
+	__u8 reserved0[1];
 #define NVDLA_MAX_BUFFERS_PER_TASK (6144)
 	__u32 num_addresses;
 	__u16 flags;
-	__u16 reserved;
+	__u16 reserved1;
 
 	__u64 prefences;
 	__u64 postfences;
 
 	__u64 input_task_status;
-	__u64 output_task_status;
+	__u64 sof_task_status;
+	__u64 eof_task_status;
+	__u64 sof_timestamps;
+	__u64 eof_timestamps;
 	__u64 address_list;
 	__u64 timeout;
 };
@@ -160,15 +173,17 @@ struct nvdla_ioctl_submit_task {
  * struct nvdla_ioctl_emu_submit_task structure for single emulator task
  * information
  *
+ * @num_prefences 		number of pre-fences in task
  * @num_postfences		number of post-fences in task
- * @reserved			reserved for padding and future use
- * @postfences			pointer to post-fence struct table
+ * @prefences     		pointer to pre-fence struct table
+ * @postfences    		pointer to post-fence struct table
  *
  */
 struct nvdla_ioctl_emu_submit_task {
+	__u32 num_prefences;
 	__u32 num_postfences;
-	__u32 reserved;
 
+	__u64 prefences;
 	__u64 postfences;
 };