diff --git a/drivers/video/tegra/host/nvdla/dla_fw_version.h b/drivers/video/tegra/host/nvdla/dla_fw_version.h index 43d42319..a0f88ace 100644 --- a/drivers/video/tegra/host/nvdla/dla_fw_version.h +++ b/drivers/video/tegra/host/nvdla/dla_fw_version.h @@ -20,7 +20,7 @@ #define _DLA_FW_VERSION_H_ #define FIRMWARE_VERSION_MAJOR 0x1 -#define FIRMWARE_VERSION_MINOR 0x0 +#define FIRMWARE_VERSION_MINOR 0x1 #define FIRMWARE_VERSION_SUBMINOR 0x0 static inline uint32_t dla_version(void) diff --git a/drivers/video/tegra/host/nvdla/dla_os_interface.h b/drivers/video/tegra/host/nvdla/dla_os_interface.h index e153cc00..d6efca15 100644 --- a/drivers/video/tegra/host/nvdla/dla_os_interface.h +++ b/drivers/video/tegra/host/nvdla/dla_os_interface.h @@ -74,18 +74,22 @@ #define DLA_INT_ON_COMPLETE_SHIFT 8 #define DLA_INT_ON_ERROR_SHIFT 9 -#define PREACTION_TERMINATE 0x0 -#define PREACTION_SEM_EQ 0x90 -#define PREACTION_SEM_GE 0x92 -#define PREACTION_GOS_EQ 0xB0 -#define PREACTION_GOS_GE 0xB2 -#define PREACTION_TASK_STATUS 0xC0 +/* control actions */ +#define ACTION_TERMINATE 0x0 -#define POSTACTION_TERMINATE 0x0 -#define POSTACTION_SEM 0x80 -#define POSTACTION_TS_SEM 0x83 -#define POSTACTION_GOS 0xA0 -#define POSTACTION_TASK_STATUS 0xC1 +/* conditional actions */ +#define ACTION_SEM_EQ 0x90 +#define ACTION_SEM_GE 0x92 +#define ACTION_GOS_EQ 0xB0 +#define ACTION_GOS_GE 0xB2 +#define ACTION_TASK_STATUS_EQ 0xC0 + +/* write actions */ +#define ACTION_WRITE_SEM 0x80 +#define ACTION_WRITE_TS_SEM 0x83 +#define ACTION_WRITE_TIMESTAMP 0x87 +#define ACTION_WRITE_GOS 0xA0 +#define ACTION_WRITE_TASK_STATUS 0xC1 #define PING_DATA_SIZE 4 #define BUFFER_MULTIPLIER 4 @@ -228,6 +232,17 @@ struct dla_action_task_status { uint16_t status; } __attribute__ ((packed)); +/** + * Timestamp update action structure + * + * OPCODE = 0x87 + * + * @address: Address to write timestamp + */ +struct dla_action_timestamp { + uint64_t address; +} __attribute__ ((packed)); + /** * Status notifier structure * diff --git a/drivers/video/tegra/host/nvdla/nvdla.h b/drivers/video/tegra/host/nvdla/nvdla.h index d656ecee..2aa3a6cc 100644 --- a/drivers/video/tegra/host/nvdla/nvdla.h +++ b/drivers/video/tegra/host/nvdla/nvdla.h @@ -99,8 +99,11 @@ */ #define MAX_NUM_NVDLA_PREFENCES 32 #define MAX_NUM_NVDLA_POSTFENCES 32 +#define MAX_NUM_NVDLA_EMU_PREFENCES 16 +#define MAX_NUM_NVDLA_EMU_POSTFENCES 16 #define MAX_NUM_NVDLA_IN_TASK_STATUS MAX_NUM_NVDLA_PREFENCES #define MAX_NUM_NVDLA_OUT_TASK_STATUS MAX_NUM_NVDLA_POSTFENCES +#define MAX_NUM_NVDLA_OUT_TIMESTAMP 32 #define NUM_PROFILING_POSTACTION 1 #define MAX_COMMANDS_PER_DEVICE 1 @@ -219,7 +222,9 @@ struct nvdla_device { * * @queue Queue in which task submitted * @sp pointer to syncpt + * @prefences pointer to pre fences * @postfences pointer to post fences + * @num_prefences Number of prefences in task * @num_postfences Number of postfences in task * @fence Fence tracking for current task * @fence_counter Counter used to track fence value @@ -228,7 +233,9 @@ struct nvdla_device { struct nvdla_emu_task { struct nvdla_queue *queue; struct nvhost_syncpt *sp; - struct nvdev_fence postfences[MAX_NUM_NVDLA_POSTFENCES]; + struct nvdev_fence prefences[MAX_NUM_NVDLA_EMU_PREFENCES]; + struct nvdev_fence postfences[MAX_NUM_NVDLA_EMU_POSTFENCES]; + u32 num_prefences; u32 num_postfences; u32 fence; u32 fence_counter; @@ -259,12 +266,18 @@ struct nvdla_task { struct nvdev_fence prefences[MAX_NUM_NVDLA_PREFENCES]; struct nvdev_fence postfences[MAX_NUM_NVDLA_POSTFENCES]; struct nvdla_status_notify in_task_status[MAX_NUM_NVDLA_IN_TASK_STATUS]; - struct nvdla_status_notify out_task_status[MAX_NUM_NVDLA_OUT_TASK_STATUS]; + struct nvdla_status_notify sof_task_status[MAX_NUM_NVDLA_OUT_TASK_STATUS]; + struct nvdla_status_notify eof_task_status[MAX_NUM_NVDLA_OUT_TASK_STATUS]; + struct nvdla_mem_handle sof_timestamps[MAX_NUM_NVDLA_OUT_TIMESTAMP]; + struct nvdla_mem_handle eof_timestamps[MAX_NUM_NVDLA_OUT_TIMESTAMP]; struct nvdla_mem_handle memory_handles[NVDLA_MAX_BUFFERS_PER_TASK]; u8 num_prefences; u8 num_postfences; u8 num_in_task_status; - u8 num_out_task_status; + u8 num_sof_task_status; + u8 num_eof_task_status; + u8 num_sof_timestamps; + u8 num_eof_timestamps; u32 num_addresses; u32 fence; u32 fence_counter; @@ -280,7 +293,10 @@ struct nvdla_task { struct dma_buf *prefences_sem_dmabuf[MAX_NUM_NVDLA_PREFENCES]; struct dma_buf *in_task_status_dmabuf[MAX_NUM_NVDLA_IN_TASK_STATUS]; struct dma_buf *postfences_sem_dmabuf[MAX_NUM_NVDLA_POSTFENCES]; - struct dma_buf *out_task_status_dmabuf[MAX_NUM_NVDLA_OUT_TASK_STATUS]; + struct dma_buf *sof_task_status_dmabuf[MAX_NUM_NVDLA_OUT_TASK_STATUS]; + struct dma_buf *eof_task_status_dmabuf[MAX_NUM_NVDLA_OUT_TASK_STATUS]; + struct dma_buf *sof_timestamps_dmabuf[MAX_NUM_NVDLA_OUT_TIMESTAMP]; + struct dma_buf *eof_timestamps_dmabuf[MAX_NUM_NVDLA_OUT_TIMESTAMP]; }; struct dla_mem_addr { @@ -402,7 +418,7 @@ int nvdla_free_gcov_region(struct platform_device *pdev, bool update_region); int nvdla_emulator_submit(struct nvdla_queue *queue, struct nvdla_emu_task *task); void task_free(struct kref *ref); -int nvdla_get_postfences(struct nvdla_queue *queue, void *in_task); +int nvdla_get_signal_fences(struct nvdla_queue *queue, void *in_task); int nvdla_send_gos_region(struct platform_device *pdev); #endif /* End of __NVHOST_NVDLA_H__ */ diff --git a/drivers/video/tegra/host/nvdla/nvdla_ioctl.c b/drivers/video/tegra/host/nvdla/nvdla_ioctl.c index 290b1b8c..4987ee3b 100644 --- a/drivers/video/tegra/host/nvdla/nvdla_ioctl.c +++ b/drivers/video/tegra/host/nvdla/nvdla_ioctl.c @@ -376,13 +376,41 @@ static int nvdla_get_actions(struct nvdla_ioctl_submit_task *user_task, goto fail; } - /* get output task status */ - if (copy_from_user(task->out_task_status, - (void __user *)user_task->output_task_status, - (task->num_out_task_status * + /* get sof task status */ + if (copy_from_user(task->sof_task_status, + (void __user *)user_task->sof_task_status, + (task->num_sof_task_status * sizeof(struct nvdla_status_notify)))) { err = -EFAULT; - nvdla_dbg_err(pdev, "failed to copy output task status"); + nvdla_dbg_err(pdev, "failed to copy sof task status"); + goto fail; + } + + /* get eof task status */ + if (copy_from_user(task->eof_task_status, + (void __user *)user_task->eof_task_status, + (task->num_eof_task_status * + sizeof(struct nvdla_status_notify)))) { + err = -EFAULT; + nvdla_dbg_err(pdev, "failed to copy eof task status"); + goto fail; + } + + /* get sof timestamps */ + if (copy_from_user(task->sof_timestamps, + (void __user*)user_task->sof_timestamps, + (task->num_sof_timestamps * sizeof(struct nvdla_mem_handle)))) { + err = -EFAULT; + nvdla_dbg_err(pdev, "failed to copy sof timestamps"); + goto fail; + } + + /* get eof timestamps */ + if (copy_from_user(task->eof_timestamps, + (void __user*)user_task->eof_timestamps, + (task->num_eof_timestamps * sizeof(struct nvdla_mem_handle)))) { + err = -EFAULT; + nvdla_dbg_err(pdev, "failed to copy eof timestamps"); goto fail; } @@ -392,20 +420,66 @@ fail: return err; } -static int nvdla_send_emu_postfences(struct nvdla_emu_task *task, +static int nvdla_send_emu_signal_fences(struct nvdla_emu_task *task, struct nvdla_ioctl_emu_submit_task *user_task) { int err = 0, i; struct platform_device *dla_pdev = task->queue->pool->pdev; struct platform_device *host_pdev = to_platform_device(dla_pdev->dev.parent); + struct nvdev_fence __user *prefences = + (struct nvdev_fence __user *)(uintptr_t)user_task->prefences; struct nvdev_fence __user *postfences = (struct nvdev_fence __user *)(uintptr_t)user_task->postfences; char fence_name[32]; - nvdla_dbg_fn(dla_pdev, "sending post fences"); + nvdla_dbg_fn(dla_pdev, "sending signal fences"); + + for (i = 0; i < task->num_prefences; i++) { + if (task->prefences[i].action != NVDEV_FENCE_SIGNAL) + continue; + + if (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD) { + struct nvhost_ctrl_sync_fence_info info; + + info.id = task->prefences[i].syncpoint_index; + info.thresh = task->prefences[i].syncpoint_value; + + nvdla_dbg_info(dla_pdev, + "creating pre sync fd [%d]:[%d]\n", + info.id, info.thresh); + + /* create fence name format example: nvdla0_1_fence */ + snprintf(fence_name, sizeof(fence_name), + "%s_%d_%d_prefence", dev_name(&dla_pdev->dev), + task->prefences[i].syncpoint_index, i); + + err = nvhost_sync_create_fence_fd(host_pdev, + &info, 1, fence_name, + &task->prefences[i].sync_fd); + + if (err) { + nvdla_dbg_err(dla_pdev, + "fail to create prefence syncfd\n"); + goto fail; + } + } + } + + nvdla_dbg_fn(dla_pdev, "copy prefences to user"); + /* send pre fences */ + if (copy_to_user(prefences, task->prefences, + (task->num_prefences * sizeof(struct nvdev_fence)))) { + err = -EFAULT; + nvdla_dbg_err(dla_pdev, "failed to send prefences"); + goto fail; + } + nvdla_dbg_info(dla_pdev, "prefences sent"); for (i = 0; i < task->num_postfences; i++) { + if (task->postfences[i].action != NVDEV_FENCE_SIGNAL) + continue; + if (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD) { struct nvhost_ctrl_sync_fence_info info; @@ -418,8 +492,8 @@ static int nvdla_send_emu_postfences(struct nvdla_emu_task *task, /* create fence name format example: nvdla0_1_fence */ snprintf(fence_name, sizeof(fence_name), - "%s_%d_fence", dev_name(&dla_pdev->dev), - task->postfences[i].syncpoint_index); + "%s_%d_%d_postfence", dev_name(&dla_pdev->dev), + task->postfences[i].syncpoint_index, i); err = nvhost_sync_create_fence_fd(host_pdev, &info, 1, fence_name, @@ -447,20 +521,67 @@ fail: return err; } -static int nvdla_update_postfences(struct nvdla_task *task, +static int nvdla_update_signal_fences(struct nvdla_task *task, struct nvdla_ioctl_submit_task *user_task) { int err = 0, i; struct platform_device *dla_pdev = task->queue->pool->pdev; struct platform_device *host_pdev = to_platform_device(dla_pdev->dev.parent); + struct nvdev_fence __user *prefences = + (struct nvdev_fence __user *)(uintptr_t)user_task->prefences; struct nvdev_fence __user *postfences = (struct nvdev_fence __user *)(uintptr_t)user_task->postfences; char fence_name[32]; - nvdla_dbg_fn(dla_pdev, "copy post fences for user"); + nvdla_dbg_fn(dla_pdev, "copy fences for user"); + /* update pre fence signals to users */ + for (i = 0; i < task->num_prefences; i++) { + if (task->prefences[i].action != NVDEV_FENCE_SIGNAL) + continue; + + if (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD) { + struct nvhost_ctrl_sync_fence_info info; + + info.id = task->prefences[i].syncpoint_index; + info.thresh = task->prefences[i].syncpoint_value; + + nvdla_dbg_info(dla_pdev, + "creating pre sync fd [%d]:[%d]\n", + info.id, info.thresh); + + /* create fence name format example: nvdla0_1_fence */ + snprintf(fence_name, sizeof(fence_name), + "%s_%d_%d_prefence", dev_name(&dla_pdev->dev), + task->prefences[i].syncpoint_index, i); + + err = nvhost_sync_create_fence_fd(host_pdev, + &info, 1, fence_name, + &task->prefences[i].sync_fd); + + if (err) { + nvdla_dbg_err(dla_pdev, + "fail to create prefence syncfd\n"); + goto fail; + } + } + } + + nvdla_dbg_fn(dla_pdev, "copy prefences to user"); + /* copy pre fences */ + if (copy_to_user(prefences, task->prefences, + (task->num_prefences * sizeof(struct nvdev_fence)))) { + err = -EFAULT; + nvdla_dbg_err(dla_pdev, "failed to copy prefences"); + goto fail; + } + + /* update post fence signals to user */ for (i = 0; i < task->num_postfences; i++) { + if (task->postfences[i].action != NVDEV_FENCE_SIGNAL) + continue; + if (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD) { struct nvhost_ctrl_sync_fence_info info; @@ -473,8 +594,8 @@ static int nvdla_update_postfences(struct nvdla_task *task, /* create fence name format example: nvdla0_1_fence */ snprintf(fence_name, sizeof(fence_name), - "%s_%d_fence", dev_name(&dla_pdev->dev), - task->postfences[i].syncpoint_index); + "%s_%d_%d_postfence", dev_name(&dla_pdev->dev), + task->postfences[i].syncpoint_index, i); err = nvhost_sync_create_fence_fd(host_pdev, &info, 1, fence_name, @@ -525,15 +646,33 @@ static int nvdla_val_task_submit_input(struct nvdla_ioctl_submit_task *in_task) MAX_NUM_NVDLA_IN_TASK_STATUS); return -EINVAL; } - if (in_task->num_output_task_status > MAX_NUM_NVDLA_OUT_TASK_STATUS) { - pr_err("out task status[%u] crossing expected[%d]\n", - in_task->num_output_task_status, + if (in_task->num_sof_task_status > MAX_NUM_NVDLA_OUT_TASK_STATUS) { + pr_err("sof task status[%u] crossing expected[%d]\n", + in_task->num_sof_task_status, MAX_NUM_NVDLA_OUT_TASK_STATUS); return -EINVAL; } - if (in_task->num_addresses < 1) { + if (in_task->num_eof_task_status > MAX_NUM_NVDLA_OUT_TASK_STATUS) { + pr_err("eof task status[%u] crossing expected[%d]\n", + in_task->num_eof_task_status, + MAX_NUM_NVDLA_OUT_TASK_STATUS); + return -EINVAL; + } + if (in_task->num_sof_timestamps > MAX_NUM_NVDLA_OUT_TIMESTAMP) { + pr_err("sof timestamps[%u] crossing expected[%d]\n", + in_task->num_sof_timestamps, + MAX_NUM_NVDLA_OUT_TIMESTAMP); + return -EINVAL; + } + if (in_task->num_eof_timestamps > MAX_NUM_NVDLA_OUT_TIMESTAMP) { + pr_err("eof timestamps[%u] crossing expected[%d]\n", + in_task->num_eof_timestamps, + MAX_NUM_NVDLA_OUT_TIMESTAMP); + return -EINVAL; + } + if (in_task->num_addresses < 1) { pr_err("num addresses[%u] should be min one\n", - in_task->num_addresses); + in_task->num_addresses); return -EINVAL; } if (in_task->num_addresses > NVDLA_MAX_BUFFERS_PER_TASK) { @@ -572,7 +711,10 @@ static int nvdla_fill_task(struct nvdla_queue *queue, task->num_prefences = local_task->num_prefences; task->num_postfences = local_task->num_postfences; task->num_in_task_status = local_task->num_input_task_status; - task->num_out_task_status = local_task->num_output_task_status; + task->num_sof_task_status = local_task->num_sof_task_status; + task->num_eof_task_status = local_task->num_eof_task_status; + task->num_sof_timestamps = local_task->num_sof_timestamps; + task->num_eof_timestamps = local_task->num_eof_timestamps; task->num_addresses = local_task->num_addresses; task->timeout = local_task->timeout; @@ -616,8 +758,14 @@ static void nvdla_dump_task(struct nvdla_task *task) nvdla_dbg_info(pdev, "dumping input task [%p] parameters:", task); nvdla_dbg_info(pdev, "num_prefences[%u] num_postfences[%u]", task->num_prefences, task->num_postfences); - nvdla_dbg_info(pdev, "num_in_status[%u] num_out_task_status[%u]", - task->num_in_task_status, task->num_out_task_status); + nvdla_dbg_info(pdev, "num_in_status[%u] num_sof_task_status[%u] " + "num_eof_task_status[%u]", + task->num_in_task_status, + task->num_sof_task_status, + task->num_eof_task_status); + nvdla_dbg_info(pdev, "num_sof_timestamps[%u] num_eof_timestamps[%u]", + task->num_sof_timestamps, + task->num_eof_timestamps); nvdla_dbg_info(pdev, "num_addresses[%u]", task->num_addresses); for (i = 0; i < task->num_prefences; i++) { @@ -654,12 +802,34 @@ static void nvdla_dump_task(struct nvdla_task *task) task->in_task_status[i].status); } - for (i = 0; i < task->num_out_task_status; i++) { - nvdla_dbg_info(pdev, "Output task status[%d]:" + for (i = 0; i < task->num_sof_task_status; i++) { + nvdla_dbg_info(pdev, "SOF task status[%d]:" "handle[%u] offset[%u] status[%u]", - i, task->out_task_status[i].handle, - task->out_task_status[i].offset, - task->out_task_status[i].status); + i, task->sof_task_status[i].handle, + task->sof_task_status[i].offset, + task->sof_task_status[i].status); + } + + for (i = 0; i < task->num_eof_task_status; i++) { + nvdla_dbg_info(pdev, "EOF task status[%d]:" + "handle[%u] offset[%u] status[%u]", + i, task->eof_task_status[i].handle, + task->eof_task_status[i].offset, + task->eof_task_status[i].status); + } + + for (i = 0; i < task->num_sof_timestamps; i++) { + nvdla_dbg_info(pdev, "SOF timestamp[%d]:" + "handle[%u] offset[%u]", + i, task->sof_timestamps[i].handle, + task->sof_timestamps[i].offset); + } + + for (i = 0; i < task->num_eof_timestamps; i++) { + nvdla_dbg_info(pdev, "EOF timestamp[%d]:" + "handle[%u] offset[%u]", + i, task->eof_timestamps[i].handle, + task->eof_timestamps[i].offset); } for (i = 0; i < task->num_addresses; i++) { @@ -718,8 +888,18 @@ static int nvdla_emu_task_submit(struct nvdla_private *priv, void *arg) nvdla_dbg_info(pdev, "submit [%d]th task", i + 1); + task.num_prefences = local_tasks[i].num_prefences; task.num_postfences = local_tasks[i].num_postfences; + /* get pre fences */ + if (copy_from_user(task.prefences, + (void __user *)local_tasks[i].prefences, + (task.num_prefences * sizeof(struct nvdev_fence)))) { + err = -EFAULT; + nvdla_dbg_err(pdev, "failed to copy prefences"); + goto exit; + } + /* get post fences */ if (copy_from_user(task.postfences, (void __user *)local_tasks[i].postfences, @@ -736,13 +916,13 @@ static int nvdla_emu_task_submit(struct nvdla_private *priv, void *arg) } nvdla_dbg_info(pdev, "task[%d] submitted", i + 1); - /* send fences to user */ - err = nvdla_send_emu_postfences(&task, local_tasks + i); + /* send signal fences to user */ + err = nvdla_send_emu_signal_fences(&task, local_tasks + i); if (err) { - nvdla_dbg_err(pdev, "fail to send postfence%d", i + 1); + nvdla_dbg_err(pdev, "fail to send sig fence%d", i + 1); goto exit; } - nvdla_dbg_info(pdev, "postfences of task[%d] sent", i + 1); + nvdla_dbg_info(pdev, "signal fences of task[%d] sent", i + 1); } nvdla_dbg_fn(pdev, "Emulator task submitted, done!"); @@ -825,8 +1005,8 @@ static int nvdla_submit(struct nvdla_private *priv, void *arg) } nvdla_dbg_info(pdev, "task[%d] desc filled", i + 1); - /* get expected postfences prior to submit */ - err = nvdla_get_postfences(queue, task); + /* get expected signal fences prior to submit */ + err = nvdla_get_signal_fences(queue, task); if (err) { nvdla_dbg_err(pdev, "fail to get fences%d", i + 1); goto fail_to_get_fences; @@ -834,7 +1014,7 @@ static int nvdla_submit(struct nvdla_private *priv, void *arg) nvdla_dbg_info(pdev, "task[%d] got fences", i + 1); /* update fences to user */ - err = nvdla_update_postfences(task, local_tasks + i); + err = nvdla_update_signal_fences(task, local_tasks + i); if (err) { nvdla_dbg_err(pdev, "fail update postfence%d", i + 1); goto fail_to_update_postfences; diff --git a/drivers/video/tegra/host/nvdla/nvdla_queue.c b/drivers/video/tegra/host/nvdla/nvdla_queue.c index 7bafc879..10c4b145 100644 --- a/drivers/video/tegra/host/nvdla/nvdla_queue.c +++ b/drivers/video/tegra/host/nvdla/nvdla_queue.c @@ -210,16 +210,43 @@ static int nvdla_unmap_task_memory(struct nvdla_task *task) } nvdla_dbg_fn(pdev, "all postfences unmaped"); - /* unpin input task status memory */ - for (ii = 0; ii < task->num_out_task_status; ii++) { - if (task->out_task_status[ii].handle) { + /* unpin output task status memory */ + for (ii = 0; ii < task->num_sof_task_status; ii++) { + if (task->sof_task_status[ii].handle) { nvdla_buffer_submit_unpin(task->buffers, - &task->out_task_status_dmabuf[ii], 1); - dma_buf_put(task->out_task_status_dmabuf[ii]); + &task->sof_task_status_dmabuf[ii], 1); + dma_buf_put(task->sof_task_status_dmabuf[ii]); + } + } + + for (ii = 0; ii < task->num_eof_task_status; ii++) { + if (task->eof_task_status[ii].handle) { + nvdla_buffer_submit_unpin(task->buffers, + &task->eof_task_status_dmabuf[ii], 1); + dma_buf_put(task->eof_task_status_dmabuf[ii]); } } nvdla_dbg_fn(pdev, "all out task status unmaped"); + /* unpin output timestamp memory */ + for (ii = 0; ii < task->num_sof_timestamps; ii++) { + if (task->sof_timestamps[ii].handle) { + nvdla_buffer_submit_unpin(task->buffers, + &task->sof_timestamps_dmabuf[ii], 1); + dma_buf_put(task->sof_timestamps_dmabuf[ii]); + } + } + + for (ii = 0; ii < task->num_eof_timestamps; ii++) { + if (task->eof_timestamps[ii].handle) { + nvdla_buffer_submit_unpin(task->buffers, + &task->eof_timestamps_dmabuf[ii], 1); + dma_buf_put(task->eof_timestamps_dmabuf[ii]); + } + } + nvdla_dbg_fn(pdev, "all out timestamps unmaped"); + + return 0; } @@ -252,12 +279,16 @@ static void nvdla_task_syncpt_reset(struct nvhost_syncpt *syncpt, static inline int nvdla_get_max_preaction_size(void) { - return (((MAX_NUM_NVDLA_PREFENCES + MAX_NUM_NVDLA_IN_TASK_STATUS) * + return (((MAX_NUM_NVDLA_PREFENCES + MAX_NUM_NVDLA_IN_TASK_STATUS + + MAX_NUM_NVDLA_OUT_TASK_STATUS + + MAX_NUM_NVDLA_OUT_TIMESTAMP) * sizeof(struct dla_action_opcode)) + (MAX_NUM_NVDLA_PREFENCES * sizeof(struct dla_action_semaphore)) + - (MAX_NUM_NVDLA_IN_TASK_STATUS * + ((MAX_NUM_NVDLA_IN_TASK_STATUS + MAX_NUM_NVDLA_OUT_TASK_STATUS) * sizeof(struct dla_action_task_status)) + + (MAX_NUM_NVDLA_OUT_TIMESTAMP * + sizeof(struct dla_action_timestamp)) + sizeof(struct dla_action_opcode)); } @@ -265,6 +296,7 @@ static inline int nvdla_get_max_postaction_size(void) { return (((MAX_NUM_NVDLA_POSTFENCES + MAX_NUM_NVDLA_OUT_TASK_STATUS + + MAX_NUM_NVDLA_OUT_TIMESTAMP + NUM_PROFILING_POSTACTION) * sizeof(struct dla_action_opcode)) + (MAX_NUM_NVDLA_POSTFENCES * @@ -272,6 +304,8 @@ static inline int nvdla_get_max_postaction_size(void) ((MAX_NUM_NVDLA_OUT_TASK_STATUS + NUM_PROFILING_POSTACTION) * sizeof(struct dla_action_task_status)) + + (MAX_NUM_NVDLA_OUT_TIMESTAMP * + sizeof(struct dla_action_timestamp)) + sizeof(struct dla_action_opcode)); } @@ -427,6 +461,18 @@ static u8 *add_status_action(u8 *mem, uint8_t op, uint64_t addr, return mem + sizeof(struct dla_action_task_status); } +static u8 *add_timestamp_action(u8 *mem, uint8_t op, uint64_t addr) +{ + struct dla_action_timestamp *action; + + mem = add_opcode(mem, op); + + action = (struct dla_action_timestamp *)mem; + action->address = addr; + + return mem + sizeof(struct dla_action_timestamp); +} + static u8 *add_gos_action(u8 *mem, uint8_t op, uint8_t index, uint16_t offset, uint32_t value) { @@ -568,17 +614,403 @@ gos_disabled: return err; } +static int nvdla_fill_wait_fence_action(struct nvdla_task *task, + struct nvdev_fence *fence, + struct dma_buf **dma_buf, + u8 **mem_next +) +{ + int err = 0; + + struct nvdla_buffers *buffers = task->buffers; + struct nvdla_queue *queue = task->queue; + struct platform_device *pdev = queue->pool->pdev; + struct nvhost_master *host = nvhost_get_host(pdev); + struct nvhost_syncpt *sp = &host->syncpt; + u8 *next = *mem_next; + + switch(fence->type) { + case NVDEV_FENCE_TYPE_SYNC_FD: { + struct sync_fence *f; + struct sync_pt *pt; + u32 id, thresh, j; + + f = nvhost_sync_fdget(fence->sync_fd); + if (!f) { + nvdla_dbg_err(pdev, "failed to get sync fd"); + break; + } + + j = id = thresh = 0; + for (j = 0; j < f->num_fences; j++) { + u32 gos_id, gos_offset; + + pt = sync_pt_from_fence(f->cbs[j].sync_pt); + id = nvhost_sync_pt_id(pt); + thresh = nvhost_sync_pt_thresh(pt); + + if (!id || !nvhost_syncpt_is_valid_hw_pt(sp, id)) { + nvdla_dbg_err(pdev, "Invalid sync_fd"); + sync_fence_put(f); + break; + } + + /* check if GoS backing available */ + if (!nvdla_get_gos(pdev, id, &gos_id, &gos_offset)) { + nvdla_dbg_info(pdev, "syncfd_pt:[%u] " + "gos_id[%u] gos_offset[%u] val[%u]", + id, gos_id, gos_offset, thresh); + next = add_gos_action(next, ACTION_GOS_GE, + gos_id, gos_offset, thresh); + } else { + dma_addr_t syncpt_addr; + + nvdla_dbg_info(pdev, + "GoS missing for syncfd [%d]", id); + syncpt_addr = nvhost_syncpt_address( + queue->vm_pdev, id); + nvdla_dbg_info(pdev, "syncfd_pt:[%u]" + "mss_dma_addr[%pad]", + id, &syncpt_addr); + next = add_fence_action(next, ACTION_SEM_GE, + syncpt_addr, thresh); + } + } + + break; + } + case NVDEV_FENCE_TYPE_SYNCPT: { + u32 gos_id, gos_offset; + + nvdla_dbg_info(pdev, "id[%d] val[%d]", + fence->syncpoint_index, + fence->syncpoint_value); + + if (!nvdla_get_gos(pdev, fence->syncpoint_index, &gos_id, + &gos_offset)) { + nvdla_dbg_info(pdev, "syncpt:[%u] gos_id[%u] " + "gos_offset[%u] val[%u]", + fence->syncpoint_index, gos_id, gos_offset, + fence->syncpoint_value); + next = add_gos_action(next, ACTION_GOS_GE, + gos_id, gos_offset, + fence->syncpoint_value); + } else { + dma_addr_t syncpt_addr; + nvdla_dbg_info(pdev, "GoS missing"); + + syncpt_addr = nvhost_syncpt_address( + queue->vm_pdev, fence->syncpoint_index); + nvdla_dbg_info(pdev, "syncpt:[%u] dma_addr[%pad]", + fence->syncpoint_index, &syncpt_addr); + + next = add_fence_action(next, ACTION_SEM_GE, + syncpt_addr, fence->syncpoint_value); + } + + break; + } + case NVDEV_FENCE_TYPE_SEMAPHORE: + case NVDEV_FENCE_TYPE_SEMAPHORE_TS: { + dma_addr_t dma_addr; + size_t dma_size; + + nvdla_dbg_info(pdev, "semh[%u] semo[%u] val[%d]", + fence->semaphore_handle, + fence->semaphore_offset, + fence->semaphore_value); + + *dma_buf = dma_buf_get(fence->semaphore_handle); + if (IS_ERR_OR_NULL(*dma_buf)) { + *dma_buf = NULL; + nvdla_dbg_err(pdev, "fail to get wait buf"); + break; + } + + if (nvdla_buffer_submit_pin(buffers, + dma_buf, 1, &dma_addr, &dma_size, NULL)) { + nvdla_dbg_err(pdev, "fail to pin WAIT SEM"); + break; + } + + next = add_fence_action(next, ACTION_SEM_GE, + dma_addr + fence->semaphore_offset, + fence->semaphore_value); + break; + } + default: + nvdla_dbg_err(pdev, "Invalid sync_type[%d]", fence->type); + err = -EINVAL; + goto fail; + } + + *mem_next = next; + +fail: + return err; +} + +static int nvdla_fill_signal_fence_action(struct nvdla_task *task, + struct nvdev_fence *fence, + struct dma_buf **dma_buf, + u8 **mem_next) +{ + int err = 0; + + struct nvdla_buffers *buffers = task->buffers; + struct nvdla_queue *queue = task->queue; + struct platform_device *pdev = queue->pool->pdev; + u8 *next = *mem_next; + + switch (fence->type) { + case NVDEV_FENCE_TYPE_SYNC_FD: + case NVDEV_FENCE_TYPE_SYNCPT: { + dma_addr_t syncpt_addr; + u32 gos_id, gos_offset; + + /* update GoS backing if available */ + if (!nvdla_get_gos(pdev, queue->syncpt_id, + &gos_id, &gos_offset)) { + u32 max; + + /* send incremented max */ + max = nvhost_syncpt_read_maxval(pdev, + queue->syncpt_id); + nvdla_dbg_info(pdev, "syncpt:[%u] gos_id[%u] " + "gos_offset[%u] val[%u]", + queue->syncpt_id, gos_id, gos_offset, + max + task->fence_counter + 1); + next = add_gos_action(next, ACTION_WRITE_GOS, + gos_id, gos_offset, + max + task->fence_counter + 1); + } + + /* For postaction also update MSS addr */ + syncpt_addr = nvhost_syncpt_address(queue->vm_pdev, + queue->syncpt_id); + next = add_fence_action(next, ACTION_WRITE_SEM, + syncpt_addr, 1); + + task->fence_counter = task->fence_counter + 1; + + nvdla_dbg_info(pdev, "syncpt:[%u] mss:[%pad]", + queue->syncpt_id, &syncpt_addr); + break; + } + case NVDEV_FENCE_TYPE_SEMAPHORE: { + dma_addr_t dma_addr; + size_t dma_size; + + nvdla_dbg_info(pdev, "semh:%u semo:%u v:%d", + fence->semaphore_handle, + fence->semaphore_offset, + fence->semaphore_value); + + *dma_buf = dma_buf_get(fence->semaphore_handle); + if (IS_ERR_OR_NULL(*dma_buf)) { + *dma_buf = NULL; + nvdla_dbg_err(pdev, "fail to get buf"); + break; + } + + if (nvdla_buffer_submit_pin(buffers, + dma_buf, 1, &dma_addr, &dma_size, NULL)) { + nvdla_dbg_err(pdev, "fail to pin SIGNAL SEM"); + break; + } + + next = add_fence_action(next, ACTION_WRITE_SEM, + dma_addr + fence->semaphore_offset, + fence->semaphore_value); + break; + } + case NVDEV_FENCE_TYPE_SEMAPHORE_TS: { + dma_addr_t dma_addr; + size_t dma_size; + + nvdla_dbg_info(pdev, "semh:%u semo:%u v:%d", + fence->semaphore_handle, + fence->semaphore_offset, + fence->semaphore_value); + + *dma_buf = dma_buf_get(fence->semaphore_handle); + if (IS_ERR_OR_NULL(*dma_buf)) { + *dma_buf = NULL; + nvdla_dbg_err(pdev, "fail to get buf"); + break; + } + + if (nvdla_buffer_submit_pin(buffers, + dma_buf, 1, &dma_addr, &dma_size, NULL)) { + nvdla_dbg_err(pdev, "fail to pin SIGNAL SEM"); + break; + } + + next = add_fence_action(next, ACTION_WRITE_TS_SEM, + dma_addr + fence->semaphore_offset, + fence->semaphore_value); + break; + + } + default: + nvdla_dbg_err(pdev, "Invalid sync_type[%d]", + fence->type); + err = -EINVAL; + goto fail; + } + + *mem_next = next; + +fail: + return err; +} + +static int nvdla_fill_taskstatus_read_action(struct nvdla_task *task, + struct nvdla_status_notify *task_status, + struct dma_buf **dma_buf, + u8 **mem_next) +{ + int err = 0; + + struct nvdla_buffers *buffers = task->buffers; + struct nvdla_queue *queue = task->queue; + struct platform_device *pdev = queue->pool->pdev; + dma_addr_t dma_addr; + size_t dma_size; + + u8 *next = *mem_next; + + nvdla_dbg_info(pdev, "h[%u] o[%u] status[%d]", + task_status->handle, + task_status->offset, + task_status->status); + + *dma_buf = dma_buf_get(task_status->handle); + if (IS_ERR_OR_NULL(*dma_buf)) { + *dma_buf = NULL; + nvdla_dbg_err(pdev, "fail to get buf"); + err = -EINVAL; + goto fail; + } + + if (nvdla_buffer_submit_pin(buffers, + dma_buf, 1, &dma_addr, &dma_size, NULL)) { + nvdla_dbg_err(pdev, "fail to pin in status"); + err = -EINVAL; + goto fail; + } + + next = add_status_action(next, ACTION_TASK_STATUS_EQ, + dma_addr + task_status->offset, + task_status->status); + + *mem_next = next; + +fail: + return err; +} + +static int nvdla_fill_taskstatus_write_action(struct nvdla_task *task, + struct nvdla_status_notify *task_status, + struct dma_buf **dma_buf, + u8 **mem_next) +{ + int err = 0; + + struct nvdla_buffers *buffers = task->buffers; + struct nvdla_queue *queue = task->queue; + struct platform_device *pdev = queue->pool->pdev; + dma_addr_t dma_addr; + size_t dma_size; + + u8 *next = *mem_next; + + nvdla_dbg_info(pdev, "h[%u] o[%u] status[%d]", + task_status->handle, + task_status->offset, + task_status->status); + + *dma_buf = dma_buf_get(task_status->handle); + if (IS_ERR_OR_NULL(*dma_buf)) { + *dma_buf = NULL; + nvdla_dbg_err(pdev, "fail to get buf"); + err = -EINVAL; + goto fail; + } + + if (nvdla_buffer_submit_pin(buffers, + dma_buf, 1, &dma_addr, &dma_size, NULL)) { + nvdla_dbg_err(pdev, "fail to pin status"); + err = -EINVAL; + goto fail; + } + + next = add_status_action(next, ACTION_WRITE_TASK_STATUS, + dma_addr + task_status->offset, + task_status->status); + + *mem_next = next; + +fail: + return err; +} + +static int nvdla_fill_timestamp_write_action(struct nvdla_task *task, + struct nvdla_mem_handle *timestamp, + struct dma_buf **dma_buf, + u8 **mem_next) +{ + int err = 0; + + struct nvdla_buffers *buffers = task->buffers; + struct nvdla_queue *queue = task->queue; + struct platform_device *pdev = queue->pool->pdev; + dma_addr_t dma_addr; + size_t dma_size; + + u8 *next = *mem_next; + + nvdla_dbg_info(pdev, "h[%u] o[%u]", + timestamp->handle, + timestamp->offset); + + *dma_buf = dma_buf_get(timestamp->handle); + if (IS_ERR_OR_NULL(*dma_buf)) { + *dma_buf = NULL; + nvdla_dbg_err(pdev, "fail to get buf"); + err = -EINVAL; + goto fail; + } + + if (nvdla_buffer_submit_pin(buffers, + dma_buf, 1, &dma_addr, &dma_size, NULL)) { + nvdla_dbg_err(pdev, "fail to pin timestamp"); + err = -EINVAL; + goto fail; + } + + next = add_timestamp_action(next, ACTION_WRITE_TIMESTAMP, + dma_addr + timestamp->offset); + + *mem_next = next; + +fail: + return err; +} + + static int nvdla_fill_postactions(struct nvdla_task *task) { + int err = 0; + struct dla_task_descriptor *task_desc = task->task_desc; - struct nvdla_buffers *buffers = task->buffers; struct nvdla_queue *queue = task->queue; struct platform_device *pdev = queue->pool->pdev; struct dla_action_list *postactionl; uint16_t postactionlist_of; u8 *next, *start; void *mem; - int i, j = 0; + int i; /* update postaction list offset */ postactionlist_of = task_desc->postactions + @@ -587,176 +1019,74 @@ static int nvdla_fill_postactions(struct nvdla_task *task) start = next = (u8 *)task_desc + postactionlist_of; /* Action to write the status notifier after task finishes (for TSP). */ - next = add_status_action(next, POSTACTION_TASK_STATUS, + next = add_status_action(next, ACTION_WRITE_TASK_STATUS, task->task_desc_pa + nvdla_profile_status_offset(task), 0); - /* fill output task status */ - for (j = 0; j < task->num_out_task_status; j++) { - dma_addr_t dma_addr; - size_t dma_size; - - nvdla_dbg_info(pdev, "i[%d] h[%u] o[%u] status[%d]", - j, - task->out_task_status[j].handle, - task->out_task_status[j].offset, - task->out_task_status[j].status); - - task->out_task_status_dmabuf[j] = - dma_buf_get(task->out_task_status[j].handle); - if (IS_ERR_OR_NULL(task->out_task_status_dmabuf[j])) { - task->out_task_status_dmabuf[j] = NULL; - nvdla_dbg_err(pdev, "fail to get buf"); - break; - } - - if (nvdla_buffer_submit_pin(buffers, - &task->out_task_status_dmabuf[j], - 1, &dma_addr, &dma_size, NULL)) { - nvdla_dbg_err(pdev, "fail to pin out status"); - break; - } - - next = add_status_action(next, POSTACTION_TASK_STATUS, - dma_addr + task->out_task_status[j].offset, - task->out_task_status[j].status); + /* fill eof timestamp actions */ + for (i = 0; i < task->num_eof_timestamps; i++) { + err = nvdla_fill_timestamp_write_action(task, + &task->eof_timestamps[i], + &task->eof_timestamps_dmabuf[i], + &next); + if (err < 0) { + nvdla_dbg_err(pdev, + "failed to fill eof timestamp[%d]", + i); + goto fail; + } } - /* reset fence counter */ - task->fence_counter = 0; + /* fill output task status */ + for (i = 0; i < task->num_eof_task_status; i++) { + err = nvdla_fill_taskstatus_write_action(task, + &task->eof_task_status[i], + &task->eof_task_status_dmabuf[i], + &next); + if (err < 0) { + nvdla_dbg_err(pdev, + "failed to fill eof taskstatus[%d]", + i); + goto fail; + } + } /* fill all postactions */ for (i = 0; i < task->num_postfences; i++) { /* update action */ - switch (task->postfences[i].type) { - case NVDEV_FENCE_TYPE_SYNCPT: - case NVDEV_FENCE_TYPE_SYNC_FD: { - dma_addr_t syncpt_addr; - u32 gos_id, gos_offset; - - /* update GoS backing if available */ - if (!nvdla_get_gos(pdev, queue->syncpt_id, - &gos_id, &gos_offset)) { - u32 max; - - /* send incremented max */ - max = nvhost_syncpt_read_maxval(pdev, - queue->syncpt_id); - nvdla_dbg_info(pdev, "post i:%d syncpt:[%u] gos_id[%u] gos_offset[%u] val[%u]", - i, queue->syncpt_id, gos_id, - gos_offset, - max + task->fence_counter + 1); - next = add_gos_action(next, POSTACTION_GOS, - gos_id, gos_offset, - max + task->fence_counter + 1); - } - - /* For postaction also update MSS addr */ - syncpt_addr = nvhost_syncpt_address(queue->vm_pdev, - queue->syncpt_id); - next = add_fence_action(next, POSTACTION_SEM, - syncpt_addr, 1); - - task->fence_counter = task->fence_counter + 1; - - nvdla_dbg_info(pdev, "post i:%d syncpt:[%u] mss:[%pad]", - i, queue->syncpt_id, &syncpt_addr); - break; - } - case NVDEV_FENCE_TYPE_SEMAPHORE_TS: { - dma_addr_t dma_addr; - size_t dma_size; - - nvdla_dbg_info(pdev, "POSTTS i:%d semh:%u semo:%u v:%d", - i, - task->postfences[i].semaphore_handle, - task->postfences[i].semaphore_offset, - task->postfences[i].semaphore_value); - - /* TS SEMAPHORE just has extra memory bytes allocated - * to store TS as compared default semaphore. - * override action/opecode type here. - */ - task->postfences_sem_dmabuf[i] = - dma_buf_get(task->postfences[i].semaphore_handle); - if (IS_ERR_OR_NULL(task->postfences_sem_dmabuf[i])) { - task->postfences_sem_dmabuf[i] = NULL; - nvdla_dbg_err(pdev, "fail to get buf"); - break; - } - - if (nvdla_buffer_submit_pin(buffers, - &task->postfences_sem_dmabuf[i], - 1, &dma_addr, &dma_size, NULL)) { - nvdla_dbg_err(pdev, "fail to pin OUT TSSEM"); - break; - } - - next = add_fence_action(next, POSTACTION_TS_SEM, - dma_addr + task->postfences[i].semaphore_offset, - task->postfences[i].semaphore_value); - break; - } - case NVDEV_FENCE_TYPE_SEMAPHORE: { - dma_addr_t dma_addr; - size_t dma_size; - - nvdla_dbg_info(pdev, "POST i:%d semh:%u semo:%u v:%d", - i, - task->postfences[i].semaphore_handle, - task->postfences[i].semaphore_offset, - task->postfences[i].semaphore_value); - - task->postfences_sem_dmabuf[i] = - dma_buf_get(task->postfences[i].semaphore_handle); - if (IS_ERR_OR_NULL(task->postfences_sem_dmabuf[i])) { - task->postfences_sem_dmabuf[i] = NULL; - nvdla_dbg_err(pdev, "fail to get buf"); - break; - } - - if (nvdla_buffer_submit_pin(buffers, - &task->postfences_sem_dmabuf[i], - 1, &dma_addr, &dma_size, NULL)) { - nvdla_dbg_err(pdev, "fail to pin OUT SEM"); - break; - } - - next = add_fence_action(next, POSTACTION_SEM, - dma_addr + task->postfences[i].semaphore_offset, - task->postfences[i].semaphore_value); - break; - } - default: - nvdla_dbg_err(pdev, "Invalid postfence sync type[%d]", - task->postfences[i].type); - return -EINVAL; + err = nvdla_fill_signal_fence_action(task, + &task->postfences[i], + &task->postfences_sem_dmabuf[i], + &next); + if (err < 0) { + nvdla_dbg_info(pdev, "failed to fill postfences[%u]", i); + goto fail; } } /* update end of action list */ - next = add_opcode(next, POSTACTION_TERMINATE); + next = add_opcode(next, ACTION_TERMINATE); mem = (char *)task_desc + task_desc->postactions; postactionl = (struct dla_action_list *)mem; postactionl->offset = postactionlist_of; postactionl->size = next - start; - return 0; +fail: + return err; } static int nvdla_fill_preactions(struct nvdla_task *task) { + int err = 0; + struct dla_task_descriptor *task_desc = task->task_desc; - struct nvdla_buffers *buffers = task->buffers; struct nvdla_queue *queue = task->queue; struct platform_device *pdev = queue->pool->pdev; - struct nvhost_master *host = nvhost_get_host(pdev); - struct nvhost_syncpt *sp = &host->syncpt; struct dla_action_list *preactionl; uint16_t preactionlist_of; u8 *next, *start; void *mem; - int i, j; + int i; /* preaction list offset update */ preactionlist_of = task_desc->postactions + @@ -764,169 +1094,84 @@ static int nvdla_fill_preactions(struct nvdla_task *task) start = next = (u8 *)task_desc + preactionlist_of; - /* fill all preactions */ + /* fill all preactions wait */ for (i = 0; i < task->num_prefences; i++) { + if (task->prefences[i].action != NVDEV_FENCE_WAIT) + continue; - switch (task->prefences[i].type) { - case NVDEV_FENCE_TYPE_SYNC_FD: { - struct sync_fence *f; - struct sync_pt *pt; - u32 id, thresh, j; - - f = nvhost_sync_fdget(task->prefences[i].sync_fd); - if (!f) { - nvdla_dbg_err(pdev, "failed to get sync fd"); - break; - } - - j = id = thresh = 0; - - for (j = 0; j < f->num_fences; j++) { - u32 gos_id, gos_offset; - - pt = sync_pt_from_fence(f->cbs[j].sync_pt); - id = nvhost_sync_pt_id(pt); - thresh = nvhost_sync_pt_thresh(pt); - - if (!id || - !nvhost_syncpt_is_valid_hw_pt(sp, id)) { - nvdla_dbg_err(pdev, "Invalid sync_fd"); - sync_fence_put(f); - break; - } - - /* check if GoS backing available */ - if (!nvdla_get_gos(pdev, id, &gos_id, - &gos_offset)) { - nvdla_dbg_info(pdev, "pre i:%d syncfd_pt:[%u] gos_id[%u] gos_offset[%u] val[%u]", - i, id, gos_id, - gos_offset, thresh); - next = add_gos_action(next, - PREACTION_GOS_GE, - gos_id, gos_offset, thresh); - } else { - dma_addr_t syncpt_addr; - - nvdla_dbg_info(pdev, "pre i:%d GoS missing for syncfd [%d]", - i, id); - syncpt_addr = nvhost_syncpt_address( - queue->vm_pdev, id); - nvdla_dbg_info(pdev, "pre i:%d syncfd_pt:[%u] mss_dma_addr[%pad]", - i, id, &syncpt_addr); - next = add_fence_action(next, PREACTION_SEM_GE, - syncpt_addr, thresh); - } - } - break; - } - case NVDEV_FENCE_TYPE_SYNCPT: { - u32 gos_id, gos_offset; - - nvdla_dbg_info(pdev, "i[%d] id[%d] val[%d]", - i, - task->prefences[i].syncpoint_index, - task->prefences[i].syncpoint_value); - - if (!nvdla_get_gos(pdev, - task->prefences[i].syncpoint_index, &gos_id, - &gos_offset)) { - nvdla_dbg_info(pdev, "pre i:%d syncpt:[%u] gos_id[%u] gos_offset[%u] val[%u]", - i, task->prefences[i].syncpoint_index, - gos_id, gos_offset, - task->prefences[i].syncpoint_value); - next = add_gos_action(next, PREACTION_GOS_GE, - gos_id, gos_offset, - task->prefences[i].syncpoint_value); - } else { - dma_addr_t syncpt_addr; - - nvdla_dbg_info(pdev, "pre i:%d GoS missing", i); - - syncpt_addr = nvhost_syncpt_address( - queue->vm_pdev, - task->prefences[i].syncpoint_index); - nvdla_dbg_info(pdev, "pre i:%d syncpt:[%u] dma_addr[%pad]", - i, - task->prefences[i].syncpoint_index, - &syncpt_addr); - - next = add_fence_action(next, PREACTION_SEM_GE, - syncpt_addr, - task->prefences[i].syncpoint_value); - } - break; - } - case NVDEV_FENCE_TYPE_SEMAPHORE: - case NVDEV_FENCE_TYPE_SEMAPHORE_TS: { - dma_addr_t dma_addr; - size_t dma_size; - - nvdla_dbg_info(pdev, "i[%d] semh[%u] semo[%u] val[%d]", - i, - task->prefences[i].semaphore_handle, - task->prefences[i].semaphore_offset, - task->prefences[i].semaphore_value); - - task->prefences_sem_dmabuf[i] = - dma_buf_get(task->prefences[i].semaphore_handle); - if (IS_ERR_OR_NULL(task->prefences_sem_dmabuf[i])) { - task->prefences_sem_dmabuf[i] = NULL; - nvdla_dbg_err(pdev, "fail to get buf"); - break; - } - - if (nvdla_buffer_submit_pin(buffers, - &task->prefences_sem_dmabuf[i], - 1, &dma_addr, &dma_size, NULL)) { - nvdla_dbg_err(pdev, "fail to pin IN SEM"); - break; - } - - next = add_fence_action(next, PREACTION_SEM_GE, - dma_addr + task->prefences[i].semaphore_offset, - task->prefences[i].semaphore_value); - break; - } - default: - nvdla_dbg_err(pdev, "Invalid sync_type[%d]", - task->prefences[i].type); - return -EINVAL; + /* update action */ + err = nvdla_fill_wait_fence_action(task, + &task->prefences[i], + &task->prefences_sem_dmabuf[i], + &next); + if (err < 0) { + nvdla_dbg_info(pdev, "failed to fill prefences[%u]", i); + goto fail; } } - /* fill input status after filling sem/synpt/gos */ - for (j = 0; j < task->num_in_task_status; j++) { - dma_addr_t dma_addr; - size_t dma_size; + /* fill input status after filling sem/syncpt/gos */ + for (i = 0; i < task->num_in_task_status; i++) { + err = nvdla_fill_taskstatus_read_action(task, + &task->in_task_status[i], + &task->in_task_status_dmabuf[i], + &next); + if (err < 0) { + nvdla_dbg_err(pdev, + "failed to fill in taskstatus[%d]", + i); + goto fail; + } + } - nvdla_dbg_info(pdev, "i[%d] h[%u] o[%u] status[%d]", - j, - task->in_task_status[j].handle, - task->in_task_status[j].offset, - task->in_task_status[j].status); + /* fill sof task status actions */ + for (i = 0; i < task->num_sof_task_status; i++) { + err = nvdla_fill_taskstatus_write_action(task, + &task->sof_task_status[i], + &task->sof_task_status_dmabuf[i], + &next); + if (err < 0) { + nvdla_dbg_err(pdev, + "failed to fill sof taskstatus[%d]", + i); + goto fail; + } + } - task->in_task_status_dmabuf[j] = - dma_buf_get(task->in_task_status[j].handle); - if (IS_ERR_OR_NULL(task->in_task_status_dmabuf[j])) { - task->in_task_status_dmabuf[j] = NULL; - nvdla_dbg_err(pdev, "fail to get buf"); - break; - } + /* fill sof timestamp actions */ + for (i = 0; i < task->num_sof_timestamps; i++) { + err = nvdla_fill_timestamp_write_action(task, + &task->sof_timestamps[i], + &task->sof_timestamps_dmabuf[i], + &next); + if (err < 0) { + nvdla_dbg_err(pdev, + "failed to fill sof timestamp[%d]", + i); + goto fail; + } + } - if (nvdla_buffer_submit_pin(buffers, - &task->in_task_status_dmabuf[j], - 1, &dma_addr, &dma_size, NULL)) { - nvdla_dbg_err(pdev, "fail to pin in status"); - break; - } + /* fill all preactions signals */ + for (i = 0; i < task->num_prefences; i++) { + /* update action */ + if (task->prefences[i].action != NVDEV_FENCE_SIGNAL) + continue; - next = add_status_action(next, PREACTION_TASK_STATUS, - dma_addr + task->in_task_status[j].offset, - task->in_task_status[j].status); + err = nvdla_fill_signal_fence_action(task, + &task->prefences[i], + &task->prefences_sem_dmabuf[i], + &next); + if (err < 0) { + nvdla_dbg_err(pdev, + "fail to fill fence sig action [%d]", + i); + goto fail; + } } /* update end of action list */ - next = add_opcode(next, PREACTION_TERMINATE); + next = add_opcode(next, ACTION_TERMINATE); /* actually update lists data */ mem = (char *)task_desc + task_desc->preactions; @@ -934,7 +1179,8 @@ static int nvdla_fill_preactions(struct nvdla_task *task) preactionl->offset = preactionlist_of; preactionl->size = next - start; - return 0; +fail: + return err; } int nvdla_fill_task_desc(struct nvdla_task *task) @@ -985,6 +1231,9 @@ int nvdla_fill_task_desc(struct nvdla_task *task) nvdla_update_gos(pdev); + /* reset fence counter */ + task->fence_counter = 0; + /* fill pre actions */ nvdla_fill_preactions(task); @@ -1095,8 +1344,30 @@ int nvdla_emulator_submit(struct nvdla_queue *queue, struct nvdla_emu_task *task /* reset fence counter */ task->fence_counter = 0; + + /* fill all preactions */ + for (i = 0; i < task->num_prefences; i++) { + if (task->prefences[i].action != NVDEV_FENCE_SIGNAL) + continue; + + /* update action */ + switch (task->prefences[i].type) { + case NVDEV_FENCE_TYPE_SYNCPT: + case NVDEV_FENCE_TYPE_SYNC_FD: { + task->fence_counter = task->fence_counter + 1; + break; + } + default: + nvdla_dbg_err(pdev, "Invalid prefence sync type[%d]", + task->prefences[i].type); + return -EINVAL; + } + } + /* fill all postactions */ for (i = 0; i < task->num_postfences; i++) { + if (task->postfences[i].action != NVDEV_FENCE_SIGNAL) + continue; /* update action */ switch (task->postfences[i].type) { @@ -1120,9 +1391,31 @@ int nvdla_emulator_submit(struct nvdla_queue *queue, struct nvdla_emu_task *task queue->syncpt_id, task->fence, task, task->fence_counter); - /* Update postfences for all */ + /* Update signal fences for all */ counter = task->fence_counter - 1; + for (i = 0; i < task->num_prefences; i++) { + if (task->prefences[i].action != NVDEV_FENCE_SIGNAL) + continue; + + if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SYNCPT) || + (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { + task->prefences[i].syncpoint_index = + queue->syncpt_id; + task->prefences[i].syncpoint_value = + task->fence - counter; + + nvdla_dbg_info(pdev, "[%d] prefence set[%u]:[%u]", + i, task->prefences[i].syncpoint_index, + task->prefences[i].syncpoint_value); + + counter = counter - 1; + } + } + for (i = 0; i < task->num_postfences; i++) { + if (task->postfences[i].action != NVDEV_FENCE_SIGNAL) + continue; + if ((task->postfences[i].type == NVDEV_FENCE_TYPE_SYNCPT) || (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { task->postfences[i].syncpoint_index = @@ -1141,7 +1434,7 @@ int nvdla_emulator_submit(struct nvdla_queue *queue, struct nvdla_emu_task *task return 0; } -int nvdla_get_postfences(struct nvdla_queue *queue, void *in_task) +int nvdla_get_signal_fences(struct nvdla_queue *queue, void *in_task) { struct nvdla_task *task = (struct nvdla_task *)in_task; struct platform_device *pdev = queue->pool->pdev; @@ -1159,9 +1452,31 @@ int nvdla_get_postfences(struct nvdla_queue *queue, void *in_task) task_fence = nvhost_syncpt_read_maxval(pdev, queue->syncpt_id) + task->fence_counter; - /* Update postfences for all */ + /* Update fences signal updates for both prefence and postfence */ counter = task->fence_counter - 1; + for (i = 0; i < task->num_prefences; i++) { + if (task->prefences[i].action != NVDEV_FENCE_SIGNAL) + continue; + + if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SYNCPT) || + (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { + task->prefences[i].syncpoint_index = + queue->syncpt_id; + task->prefences[i].syncpoint_value = + task_fence - counter; + + nvdla_dbg_info(pdev, "[%d] prefence set[%u]:[%u]", + i, task->prefences[i].syncpoint_index, + task->prefences[i].syncpoint_value); + + counter = counter - 1; + } + } + for (i = 0; i < task->num_postfences; i++) { + if (task->postfences[i].action != NVDEV_FENCE_SIGNAL) + continue; + if ((task->postfences[i].type == NVDEV_FENCE_TYPE_SYNCPT) || (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { task->postfences[i].syncpoint_index = diff --git a/include/uapi/linux/nvhost_nvdla_ioctl.h b/include/uapi/linux/nvhost_nvdla_ioctl.h index 5cd77743..3d85c544 100644 --- a/include/uapi/linux/nvhost_nvdla_ioctl.h +++ b/include/uapi/linux/nvhost_nvdla_ioctl.h @@ -3,7 +3,7 @@ * * Tegra NvDLA Driver * - * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -79,7 +79,7 @@ struct nvdla_pin_unpin_args { struct nvdla_submit_args { __u64 tasks; __u16 num_tasks; -#define MAX_TASKS_PER_SUBMIT 24 +#define MAX_TASKS_PER_SUBMIT 16 #define NVDLA_SUBMIT_FLAGS_ATOMIC (1 << 0) __u16 flags; __u32 version; @@ -125,13 +125,19 @@ struct nvdla_mem_handle { * @num_prefences number of pre-fences in task * @num_postfences number of post-fences in task * @num_input_task_status number of input task status - * @num_output_task_status number of output task status + * @num_sof_task_status number of sof task status + * @num_eof_task_status number of eof task status + * @num_sof_timestamps number of sof timestamp + * @num_eof_timestamps number of eof timestamp * @flags flags for bitwise task info embeddeing * @reserved reserved for future use * @prefences pointer to pre-fence struct table * @postfences pointer to post-fence struct table * @input_task_status pointer to input task status struct table - * @output_task_status pointer to output task status struct table + * @sof_task_status pointer to sof task status struct table + * @eof_task_status pointer to eof task status struct table + * @sof_timestamps pointer to sof timestamp handle list + * @eof_timestamps pointer to eof timestamp handle list * @num_addresses total number of addressed passed in structure * @address_list pointer to address list * @timeout task timeout @@ -141,17 +147,24 @@ struct nvdla_ioctl_submit_task { __u8 num_prefences; __u8 num_postfences; __u8 num_input_task_status; - __u8 num_output_task_status; + __u8 num_sof_task_status; + __u8 num_eof_task_status; + __u8 num_sof_timestamps; + __u8 num_eof_timestamps; + __u8 reserved0[1]; #define NVDLA_MAX_BUFFERS_PER_TASK (6144) __u32 num_addresses; __u16 flags; - __u16 reserved; + __u16 reserved1; __u64 prefences; __u64 postfences; __u64 input_task_status; - __u64 output_task_status; + __u64 sof_task_status; + __u64 eof_task_status; + __u64 sof_timestamps; + __u64 eof_timestamps; __u64 address_list; __u64 timeout; }; @@ -160,15 +173,17 @@ struct nvdla_ioctl_submit_task { * struct nvdla_ioctl_emu_submit_task structure for single emulator task * information * + * @num_prefences number of pre-fences in task * @num_postfences number of post-fences in task - * @reserved reserved for padding and future use - * @postfences pointer to post-fence struct table + * @prefences pointer to pre-fence struct table + * @postfences pointer to post-fence struct table * */ struct nvdla_ioctl_emu_submit_task { + __u32 num_prefences; __u32 num_postfences; - __u32 reserved; + __u64 prefences; __u64 postfences; };