diff --git a/drivers/video/tegra/host/pva/nvpva_buffer.c b/drivers/video/tegra/host/pva/nvpva_buffer.c index 47369126..c8cf0751 100644 --- a/drivers/video/tegra/host/pva/nvpva_buffer.c +++ b/drivers/video/tegra/host/pva/nvpva_buffer.c @@ -40,6 +40,7 @@ struct nvpva_vm_buffer { dma_addr_t user_addr; u64 user_offset; u64 user_size; + u64 user_serial_id; struct rb_node rb_node; struct rb_node rb_node_id; struct list_head list_head; @@ -221,6 +222,7 @@ nvpva_buffer_map(struct platform_device *pdev, struct dma_buf *dmabuf, u64 offset, u64 size, + u64 serial_id, struct nvpva_vm_buffer *vm, bool is_user) { @@ -273,6 +275,7 @@ nvpva_buffer_map(struct platform_device *pdev, vm->size = dmabuf->size; vm->user_offset = offset; vm->user_size = size; + vm->user_serial_id = serial_id; vm->user_map_count = 1; if (is_user) @@ -359,6 +362,7 @@ int nvpva_buffer_submit_pin_id(struct nvpva_buffers *nvpva_buffers, struct dma_buf **dmabuf, dma_addr_t *paddr, u64 *psize, + u64 *serial_ids, enum nvpva_buffers_heap *heap) { struct nvpva_vm_buffer *vm; @@ -377,6 +381,7 @@ int nvpva_buffer_submit_pin_id(struct nvpva_buffers *nvpva_buffers, paddr[i] = vm->user_addr; dmabuf[i] = vm->dmabuf; psize[i] = vm->user_size; + serial_ids[i] = vm->user_serial_id; /* Return heap only if requested */ if (heap != NULL) @@ -400,6 +405,7 @@ int nvpva_buffer_pin(struct nvpva_buffers *nvpva_buffers, struct dma_buf **dmabufs, u64 *offset, u64 *size, + u64 *serial_id, u32 segment, u32 count, u32 *id, @@ -460,6 +466,7 @@ int nvpva_buffer_pin(struct nvpva_buffers *nvpva_buffers, dmabufs[i], offset[i], size[i], + serial_id[i], vm, (segment == NVPVA_SEGMENT_USER)); if (err) { diff --git a/drivers/video/tegra/host/pva/nvpva_buffer.h b/drivers/video/tegra/host/pva/nvpva_buffer.h index 4fed213b..c2bee81b 100644 --- a/drivers/video/tegra/host/pva/nvpva_buffer.h +++ b/drivers/video/tegra/host/pva/nvpva_buffer.h @@ -76,6 +76,7 @@ int nvpva_buffer_pin(struct nvpva_buffers *nvpva_buffers, struct dma_buf **dmabufs, u64 *offset, u64 *size, + u64 *serial_id, u32 segment, u32 count, u32 *id, @@ -144,6 +145,7 @@ int nvpva_buffer_submit_pin(struct nvpva_buffers *nvpva_buffers, * @param count Number of memhandles in the list * @param paddr Pointer to IOVA list * @param psize Pointer to size of buffer to return + * @param serial_ids Pointer to unique serial id to return * @param heap Pointer to a list of heaps. This is * filled by the routine. * @@ -156,6 +158,7 @@ int nvpva_buffer_submit_pin_id(struct nvpva_buffers *nvpva_buffers, struct dma_buf **dmabuf, dma_addr_t *paddr, u64 *psize, + u64 *serial_ids, enum nvpva_buffers_heap *heap); /** diff --git a/drivers/video/tegra/host/pva/pva_ioctl.c b/drivers/video/tegra/host/pva/pva_ioctl.c index 11ff423e..bc7e69e3 100644 --- a/drivers/video/tegra/host/pva/pva_ioctl.c +++ b/drivers/video/tegra/host/pva/pva_ioctl.c @@ -496,6 +496,7 @@ static int pva_pin(struct pva_private *priv, void *arg) struct dma_buf *dmabuf[1]; struct nvpva_pin_in_arg *in_arg = (struct nvpva_pin_in_arg *)arg; struct nvpva_pin_out_arg *out_arg = (struct nvpva_pin_out_arg *)arg; + u64 serial_id = 0xFFFFFFFFFFFFFFFF; dmabuf[0] = dma_buf_get(in_arg->pin.handle); if (IS_ERR_OR_NULL(dmabuf[0])) { @@ -509,6 +510,36 @@ static int pva_pin(struct pva_private *priv, void *arg) &dmabuf[0], &in_arg->pin.offset, &in_arg->pin.size, + &serial_id, + in_arg->pin.segment, + 1, + &out_arg->pin_id, + &out_arg->error_code); + dma_buf_put(dmabuf[0]); +out: + return err; +} + +static int pva_pin_ex(struct pva_private *priv, void *arg) +{ + int err = 0; + struct dma_buf *dmabuf[1]; + struct nvpva_pin_in_arg_ex *in_arg = (struct nvpva_pin_in_arg_ex *)arg; + struct nvpva_pin_out_arg *out_arg = (struct nvpva_pin_out_arg *)arg; + + dmabuf[0] = dma_buf_get(in_arg->pin.handle); + if (IS_ERR_OR_NULL(dmabuf[0])) { + dev_err(&priv->pva->pdev->dev, "invalid handle to pin: %u", + in_arg->pin.handle); + err = -EFAULT; + goto out; + } + + err = nvpva_buffer_pin(priv->client->buffers, + &dmabuf[0], + &in_arg->pin.offset, + &in_arg->pin.size, + &in_arg->pin.serial_id, in_arg->pin.segment, 1, &out_arg->pin_id, @@ -910,6 +941,9 @@ static long pva_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case NVPVA_IOCTL_PIN: err = pva_pin(priv, buf); break; + case NVPVA_IOCTL_PIN_EX: + err = pva_pin_ex(priv, buf); + break; case NVPVA_IOCTL_UNPIN: err = pva_unpin(priv, buf); break; diff --git a/drivers/video/tegra/host/pva/pva_queue.c b/drivers/video/tegra/host/pva/pva_queue.c index b2e22152..a1c92f13 100644 --- a/drivers/video/tegra/host/pva/pva_queue.c +++ b/drivers/video/tegra/host/pva/pva_queue.c @@ -166,9 +166,9 @@ struct pva_pinned_memory *pva_task_pin_mem(struct pva_submit_task *task, mem->id = id; err = nvpva_buffer_submit_pin_id(task->client->buffers, &mem->id, 1, &mem->dmabuf, &mem->dma_addr, - &mem->size, &mem->heap); + &mem->size, &mem->serial_id, &mem->heap); if (err) { - task_err(task, "submit pin failed; Is the handled pinned?"); + task_err(task, "submit pin failed; Is the handle pinned?"); goto err_out; } @@ -183,7 +183,8 @@ err_out: static int pva_task_pin_fence(struct pva_submit_task *task, struct nvpva_submit_fence *fence, - dma_addr_t *addr) + dma_addr_t *addr, + u64 *serial_id) { int err = 0; @@ -197,6 +198,8 @@ pva_task_pin_fence(struct pva_submit_task *task, err = PTR_ERR(mem); } else *addr = mem->dma_addr + fence->obj.sem.mem.offset; + + *serial_id = mem->serial_id; break; } case NVPVA_FENCE_OBJ_SYNCPT: { @@ -339,6 +342,8 @@ pva_task_process_fence_actions(struct pva_submit_task *task, dma_addr_t fence_addr = 0; u32 fence_value; dma_addr_t timestamp_addr; + u64 serial_id; + switch (fence_action->fence.type) { case NVPVA_FENCE_OBJ_SYNCPT: { @@ -366,13 +371,16 @@ pva_task_process_fence_actions(struct pva_submit_task *task, { err = pva_task_pin_fence(task, &fence_action->fence, - &fence_addr); + &fence_addr, + &serial_id); if (err) goto out; + task->sem_num += 1; task->sem_thresh += 1; fence_value = task->sem_thresh; fence_action->fence.obj.sem.value = fence_value; + task->fence_act_serial_ids[fence_type][i] = serial_id; break; } default: @@ -424,7 +432,10 @@ static int pva_task_process_prefences(struct pva_submit_task *task, dma_addr_t fence_addr = 0; u32 fence_val; - err = pva_task_pin_fence(task, fence, &fence_addr); + err = pva_task_pin_fence(task, + fence, + &fence_addr, + &task->prefences_serial_ids[i]); if (err) goto out; @@ -812,7 +823,7 @@ out: static void pva_trace_log_fill_fence(struct nvdev_fence *dst_fence, - struct nvpva_submit_fence *src_fence) + struct nvpva_submit_fence *src_fence) { static u32 obj_type[] = {NVDEV_FENCE_TYPE_SYNCPT, NVDEV_FENCE_TYPE_SEMAPHORE, @@ -828,7 +839,6 @@ pva_trace_log_fill_fence(struct nvdev_fence *dst_fence, break; case NVPVA_FENCE_OBJ_SEM: case NVPVA_FENCE_OBJ_SEMAPHORE_TS: - dst_fence->semaphore_handle = src_fence->obj.sem.mem.pin_id; dst_fence->semaphore_offset = src_fence->obj.sem.mem.offset; dst_fence->semaphore_value = src_fence->obj.sem.value; break; @@ -855,12 +865,20 @@ pva_trace_log_record_task_states(struct platform_device *pdev, /* Record task postfences */ for (i = 0 ; i < task->num_pva_fence_actions[NVPVA_FENCE_POST]; i++) { + u64 serial_id = task->fence_act_serial_ids[NVPVA_FENCE_POST][i]; + fence = &(task->pva_fence_actions[NVPVA_FENCE_POST][i].fence); pva_trace_log_fill_fence(&post_fence, fence); - trace_job_postfence(task->id, - post_fence.syncpoint_index, - post_fence.syncpoint_value); - + if (post_fence.type == NVDEV_FENCE_TYPE_SYNCPT) + trace_job_postfence(task->id, + post_fence.syncpoint_index, + post_fence.syncpoint_value); + else if ((post_fence.type == NVDEV_FENCE_TYPE_SEMAPHORE) + || (post_fence.type == NVDEV_FENCE_TYPE_SEMAPHORE_TS)) + trace_job_postfence_semaphore(task->id, + serial_id, + post_fence.semaphore_offset, + post_fence.semaphore_value); } if (task->pva->profiling_level == 1) { @@ -1174,11 +1192,17 @@ static int pva_task_submit(const struct pva_submit_tasks *task_header) timestamp); for (j = 0; j < task->num_prefences; j++) { pva_trace_log_fill_fence(&pre_fence, - &task->prefences[j]); - trace_job_prefence(task->id, - pre_fence.syncpoint_index, - pre_fence.syncpoint_value); - + &task->prefences[j]); + if (pre_fence.type == NVDEV_FENCE_TYPE_SYNCPT) + trace_job_prefence(task->id, + pre_fence.syncpoint_index, + pre_fence.syncpoint_value); + else if ((pre_fence.type == NVDEV_FENCE_TYPE_SEMAPHORE) + || (pre_fence.type == NVDEV_FENCE_TYPE_SEMAPHORE_TS)) + trace_job_prefence_semaphore(task->id, + task->prefences_serial_ids[j], + pre_fence.semaphore_offset, + pre_fence.semaphore_value); } } out: diff --git a/drivers/video/tegra/host/pva/pva_queue.h b/drivers/video/tegra/host/pva/pva_queue.h index af1852f3..3dfc8020 100644 --- a/drivers/video/tegra/host/pva/pva_queue.h +++ b/drivers/video/tegra/host/pva/pva_queue.h @@ -23,6 +23,7 @@ extern struct nvpva_queue_ops pva_queue_ops; struct pva_pinned_memory { u64 size; + u64 serial_id; dma_addr_t dma_addr; struct dma_buf *dmabuf; int id; @@ -155,6 +156,9 @@ struct pva_submit_task { struct nvpva_fence_action pva_fence_actions[NVPVA_MAX_FENCE_TYPES] [NVPVA_TASK_MAX_FENCEACTIONS]; + u64 fence_act_serial_ids[NVPVA_MAX_FENCE_TYPES] + [NVPVA_TASK_MAX_FENCEACTIONS]; + u64 prefences_serial_ids[NVPVA_TASK_MAX_PREFENCES]; struct pva_hwseq_priv_s hwseq_info[NVPVA_TASK_MAX_DMA_CHANNELS_T23X]; int8_t desc_block_height_log2[NVPVA_TASK_MAX_DMA_DESCRIPTORS]; struct pva_dma_task_buffer_info_s task_buff_info[NVPVA_TASK_MAX_DMA_DESCRIPTORS]; diff --git a/include/trace/events/nvpva_ftrace.h b/include/trace/events/nvpva_ftrace.h index 88e0e152..6a57774e 100644 --- a/include/trace/events/nvpva_ftrace.h +++ b/include/trace/events/nvpva_ftrace.h @@ -79,12 +79,12 @@ TRACE_EVENT(pva_job_ext_event, u64 queue_begin_timestamp, u64 queue_end_timestamp, u64 prepare_begin_timestamp, u64 prepare_end_timestamp, u64 vpu_begin_timestamp, u64 vpu_end_timestamp, - u64 post_begin_timestamp, u64 post_end_stamp), + u64 post_begin_timestamp, u64 post_end_timestamp), TP_ARGS(job_id, syncpt_id, threshold, vpu_id, queue_begin_timestamp, queue_end_timestamp, prepare_begin_timestamp, prepare_end_timestamp, vpu_begin_timestamp, vpu_end_timestamp, - post_begin_timestamp, post_end_stamp), + post_begin_timestamp, post_end_timestamp), TP_STRUCT__entry( __field(u64, queue_begin_timestamp) __field(u64, queue_end_timestamp) @@ -93,7 +93,7 @@ TRACE_EVENT(pva_job_ext_event, __field(u64, vpu_begin_timestamp) __field(u64, vpu_end_timestamp) __field(u64, post_begin_timestamp) - __field(u64, post_end_stamp) + __field(u64, post_end_timestamp) __field(u32, job_id) __field(u32, syncpt_id) __field(u32, threshold) @@ -111,7 +111,7 @@ TRACE_EVENT(pva_job_ext_event, __entry->vpu_begin_timestamp = vpu_begin_timestamp; __entry->vpu_end_timestamp = vpu_end_timestamp; __entry->post_begin_timestamp = post_begin_timestamp; - __entry->post_end_stamp = post_end_stamp; + __entry->post_end_timestamp = post_end_timestamp; __entry->queue_id = (job_id >> 8); __entry->vpu_id = vpu_id; ), @@ -125,7 +125,7 @@ TRACE_EVENT(pva_job_ext_event, __entry->queue_begin_timestamp, __entry->queue_end_timestamp, __entry->prepare_begin_timestamp, __entry->prepare_end_timestamp, __entry->vpu_begin_timestamp, __entry->vpu_end_timestamp, - __entry->post_begin_timestamp, __entry->post_end_stamp + __entry->post_begin_timestamp, __entry->post_end_timestamp ) ); @@ -155,6 +155,39 @@ DEFINE_EVENT(job_fence, job_postfence, TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold), TP_ARGS(job_id, syncpt_id, threshold)); +DECLARE_EVENT_CLASS(job_fence_semaphore, + TP_PROTO(u32 job_id, u64 semaphore_id, + u32 semaphore_offset, u32 semaphore_value), + TP_ARGS(job_id, semaphore_id, semaphore_offset, semaphore_value), + TP_STRUCT__entry( + __field(u64, semaphore_id) + __field(u32, job_id) + __field(u32, semaphore_offset) + __field(u32, semaphore_value) + ), + TP_fast_assign( + __entry->job_id = job_id; + __entry->semaphore_id = semaphore_id; + __entry->semaphore_offset = semaphore_offset; + __entry->semaphore_value = semaphore_value; + ), + TP_printk("job_id=%u semaphore_id=%llu semaphore_offset=%u semaphore_value=%u", + __entry->job_id, __entry->semaphore_id, + __entry->semaphore_offset, __entry->semaphore_value + ) +); + +DEFINE_EVENT(job_fence_semaphore, job_prefence_semaphore, + TP_PROTO(u32 job_id, u64 semaphore_id, + u32 semaphore_offset, u32 semaphore_value), + TP_ARGS(job_id, semaphore_id, semaphore_offset, semaphore_value)); + +DEFINE_EVENT(job_fence_semaphore, job_postfence_semaphore, + TP_PROTO(u32 job_id, u64 semaphore_id, + u32 semaphore_offset, u32 semaphore_value), + TP_ARGS(job_id, semaphore_id, semaphore_offset, semaphore_value)); + + TRACE_EVENT(job_timestamps, TP_PROTO(u32 job_id, u64 begin, u64 end), TP_ARGS(job_id, begin, end), diff --git a/include/uapi/linux/nvpva_ioctl.h b/include/uapi/linux/nvpva_ioctl.h index dbdda254..4e846560 100644 --- a/include/uapi/linux/nvpva_ioctl.h +++ b/include/uapi/linux/nvpva_ioctl.h @@ -157,10 +157,23 @@ struct nvpva_pin_handle { uint32_t type; }; +struct nvpva_pin_handle_ex { + uint64_t offset; + uint64_t size; + uint64_t serial_id; + int32_t handle; + uint32_t access; + uint32_t segment; + uint32_t type; +}; + struct nvpva_pin_in_arg { struct nvpva_pin_handle pin; }; +struct nvpva_pin_in_arg_ex { + struct nvpva_pin_handle_ex pin; +}; struct nvpva_pin_out_arg { uint32_t pin_id; /* Unique ID assigned by KMD for the Pin */ uint32_t error_code; @@ -171,6 +184,11 @@ union nvpva_pin_args { struct nvpva_pin_out_arg out; }; + +union nvpva_pin_args_ex { + struct nvpva_pin_in_arg_ex in; + struct nvpva_pin_out_arg out; +}; struct nvpva_unpin_in_arg { uint32_t pin_id; }; @@ -546,7 +564,10 @@ union nvpva_set_vpu_print_buffer_size_args { #define NVPVA_IOCTL_SET_VPU_PRINT_BUFFER_SIZE \ _IOW(NVPVA_IOCTL_MAGIC, 11, union nvpva_set_vpu_print_buffer_size_args) -#define NVPVA_IOCTL_NUMBER_MAX 11 +#define NVPVA_IOCTL_PIN_EX \ + _IOWR(NVPVA_IOCTL_MAGIC, 12, union nvpva_pin_args_ex) + +#define NVPVA_IOCTL_NUMBER_MAX 12 #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define NVPVA_IOCTL_MAX_SIZE \