diff --git a/drivers/video/tegra/host/pva/nvpva_queue.c b/drivers/video/tegra/host/pva/nvpva_queue.c index cc942dfa..e8bfa55e 100644 --- a/drivers/video/tegra/host/pva/nvpva_queue.c +++ b/drivers/video/tegra/host/pva/nvpva_queue.c @@ -283,6 +283,7 @@ struct nvpva_queue_pool *nvpva_queue_init(struct platform_device *pdev, queue->pool = pool; queue->task_pool = (void *)&task_pool[i]; queue->batch_id = 0U; + queue->task_idx = 0U; nvpva_queue_get_task_size(queue); } diff --git a/drivers/video/tegra/host/pva/nvpva_queue.h b/drivers/video/tegra/host/pva/nvpva_queue.h index c069ac72..426acac6 100644 --- a/drivers/video/tegra/host/pva/nvpva_queue.h +++ b/drivers/video/tegra/host/pva/nvpva_queue.h @@ -61,7 +61,7 @@ struct nvpva_queue { struct nvpva_queue_pool *pool; struct kref kref; u32 id; - + u32 task_idx; /*wait list for task mem requester*/ struct semaphore task_pool_sem; diff --git a/drivers/video/tegra/host/pva/pva_ioctl.c b/drivers/video/tegra/host/pva/pva_ioctl.c index 3869cba1..11ff423e 100644 --- a/drivers/video/tegra/host/pva/pva_ioctl.c +++ b/drivers/video/tegra/host/pva/pva_ioctl.c @@ -170,6 +170,8 @@ static int pva_copy_task(struct nvpva_ioctl_task *ioctl_task, * copy them. */ task->exe_id = ioctl_task->exe_id; + task->stream_id = ioctl_task->stream_id; + task->prog_id = ioctl_task->prog_id; task->l2_alloc_size = ioctl_task->l2_alloc_size; task->symbol_payload_size = ioctl_task->symbol_payload.size; task->flags = ioctl_task->flags; diff --git a/drivers/video/tegra/host/pva/pva_queue.c b/drivers/video/tegra/host/pva/pva_queue.c index 68470a9c..e5b7fbdf 100644 --- a/drivers/video/tegra/host/pva/pva_queue.c +++ b/drivers/video/tegra/host/pva/pva_queue.c @@ -18,11 +18,7 @@ #include #include -#ifdef CONFIG_EVENTLIB -#include #include -#include -#endif #include #if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE @@ -34,6 +30,8 @@ #include #include #include +#define CREATE_TRACE_POINTS +#include #include "pva.h" #include "nvpva_buffer.h" @@ -812,10 +810,9 @@ out: return err; } -#ifdef CONFIG_EVENTLIB static void -pva_eventlib_fill_fence(struct nvdev_fence *dst_fence, +pva_trace_log_fill_fence(struct nvdev_fence *dst_fence, struct nvpva_submit_fence *src_fence) { static u32 obj_type[] = {NVDEV_FENCE_TYPE_SYNCPT, @@ -844,106 +841,53 @@ pva_eventlib_fill_fence(struct nvdev_fence *dst_fence, } static void -pva_eventlib_record_r5_states(struct platform_device *pdev, +pva_trace_log_record_task_states(struct platform_device *pdev, u32 syncpt_id, u32 syncpt_thresh, struct pva_task_statistics_s *stats, struct pva_submit_task *task) { - struct nvhost_device_data *pdata = platform_get_drvdata(pdev); - struct nvhost_pva_task_state state; struct nvdev_fence post_fence; struct nvpva_submit_fence *fence; u8 i; - if ((task->pva->profiling_level == 0) || (!pdata->eventlib_id)) + if ((task->pva->profiling_level == 0) || (!IS_ENABLED(CONFIG_TRACING))) return; /* Record task postfences */ for (i = 0 ; i < task->num_pva_fence_actions[NVPVA_FENCE_POST]; i++) { fence = &(task->pva_fence_actions[NVPVA_FENCE_POST][i].fence); - pva_eventlib_fill_fence(&post_fence, fence); - nvhost_eventlib_log_fences(pdev, - syncpt_id, - syncpt_thresh, - &post_fence, - 1, - NVDEV_FENCE_KIND_POST, - stats->complete_time); + pva_trace_log_fill_fence(&post_fence, fence); + trace_job_postfence(task->id, + post_fence.syncpoint_index, + post_fence.syncpoint_value); + } - state.class_id = pdata->class; - state.syncpt_id = syncpt_id; - state.syncpt_thresh = syncpt_thresh; - state.vpu_id = stats->vpu_assigned; - state.queue_id = stats->queue_id; - state.iova = task->dma_addr; - - keventlib_write(pdata->eventlib_id, - &state, - sizeof(state), - stats->vpu_assigned == 0 ? NVHOST_PVA_VPU0_BEGIN - : NVHOST_PVA_VPU1_BEGIN, - stats->vpu_start_time); - - keventlib_write(pdata->eventlib_id, - &state, - sizeof(state), - stats->vpu_assigned == 0 ? NVHOST_PVA_VPU0_END - : NVHOST_PVA_VPU1_END, - stats->vpu_complete_time); - keventlib_write(pdata->eventlib_id, - &state, - sizeof(state), - NVHOST_PVA_PREPARE_END, - stats->vpu_start_time); - keventlib_write(pdata->eventlib_id, - &state, - sizeof(state), - NVHOST_PVA_POST_BEGIN, - stats->vpu_complete_time); - - if (task->pva->profiling_level >= 2) { - keventlib_write(pdata->eventlib_id, - &state, - sizeof(state), - NVHOST_PVA_QUEUE_BEGIN, - stats->queued_time); - - keventlib_write(pdata->eventlib_id, - &state, - sizeof(state), - NVHOST_PVA_QUEUE_END, - stats->vpu_assigned_time); - - keventlib_write(pdata->eventlib_id, - &state, - sizeof(state), - NVHOST_PVA_PREPARE_BEGIN, - stats->vpu_assigned_time); - - keventlib_write(pdata->eventlib_id, - &state, - sizeof(state), - NVHOST_PVA_POST_END, - stats->complete_time); + if (task->pva->profiling_level == 1) { + trace_pva_job_base_event(task->id, + syncpt_id, + syncpt_thresh, + stats->vpu_assigned, + stats->queue_id, + stats->vpu_start_time, + stats->vpu_complete_time); + } else if (task->pva->profiling_level >= 2) { + trace_pva_job_ext_event(task->id, + syncpt_id, + syncpt_thresh, + stats->vpu_assigned, + stats->queue_id, + stats->queued_time, + stats->vpu_assigned_time, + stats->vpu_assigned_time, + stats->vpu_start_time, + stats->vpu_start_time, + stats->vpu_complete_time, + stats->vpu_complete_time, + stats->complete_time); } } -#else -static void -pva_eventlib_fill_fence(struct nvdev_fence *dst_fence, - struct nvpva_submit_fence *src_fence) -{ -} -static void -pva_eventlib_record_r5_states(struct platform_device *pdev, - u32 syncpt_id, - u32 syncpt_thresh, - struct pva_task_statistics_s *stats, - struct pva_submit_task *task) -{ -} -#endif void pva_task_free(struct kref *ref) { @@ -1053,19 +997,17 @@ static void update_one_task(struct pva *pva) stats->vpu_assigned, r5_overhead); prof: - if (task->pva->profiling_level == 0) + if ((task->pva->profiling_level == 0) || (!IS_ENABLED(CONFIG_TRACING))) goto out; - nvhost_eventlib_log_task(pdev, - queue->syncpt_id, - task->local_sync_counter, + trace_job_timestamps(task->id, stats->vpu_assigned_time, stats->complete_time); - pva_eventlib_record_r5_states(pdev, - queue->syncpt_id, - task->local_sync_counter, - stats, - task); + pva_trace_log_record_task_states(pdev, + queue->syncpt_id, + task->local_sync_counter, + stats, + task); out: /* Not linked anymore so drop the reference */ kref_put(&task->ref, pva_task_free); @@ -1156,6 +1098,8 @@ static int pva_task_submit(const struct pva_submit_tasks *task_header) { struct pva_submit_task *first_task = task_header->tasks[0]; struct nvpva_queue *queue = first_task->queue; + struct nvhost_device_data *pdata = platform_get_drvdata(first_task->pva->pdev); + u64 timestamp; int err = 0; u32 i; @@ -1225,22 +1169,20 @@ static int pva_task_submit(const struct pva_submit_tasks *task_header) struct nvdev_fence pre_fence; struct pva_submit_task *task = task_header->tasks[i]; + trace_job_submit(&task->pva->pdev->dev, + pdata->class, task->id, + task->num_prefences, + task->prog_id, + task->stream_id, + timestamp); for (j = 0; j < task->num_prefences; j++) { - pva_eventlib_fill_fence(&pre_fence, + pva_trace_log_fill_fence(&pre_fence, &task->prefences[j]); - nvhost_eventlib_log_fences(task->pva->pdev, - queue->syncpt_id, - task->local_sync_counter, - &pre_fence, - 1, - NVDEV_FENCE_KIND_PRE, - timestamp); - } + trace_job_prefence(task->id, + pre_fence.syncpoint_index, + pre_fence.syncpoint_value); - nvhost_eventlib_log_submit(task->pva->pdev, - queue->syncpt_id, - task->local_sync_counter, - timestamp); + } } out: return 0; @@ -1296,7 +1238,9 @@ set_task_parameters(const struct pva_submit_tasks *task_header) status_interface = (task->queue->id + 1U); for (idx = 0U; idx < task_header->num_tasks; idx++) { + queue->task_idx = (queue->task_idx + 1) & 0xFFFFFF; task = task_header->tasks[idx]; + task->id = queue->task_idx | (queue->id << 24); hw_task = task->va; WARN_ON(task->pool_index > 0xFF); hw_task->task.task_id = task->pool_index; diff --git a/drivers/video/tegra/host/pva/pva_queue.h b/drivers/video/tegra/host/pva/pva_queue.h index 83230e03..825c0a4c 100644 --- a/drivers/video/tegra/host/pva/pva_queue.h +++ b/drivers/video/tegra/host/pva/pva_queue.h @@ -93,6 +93,8 @@ struct pva_submit_task { bool pinned_app; u32 exe_id; + u64 stream_id; + u64 prog_id; u32 l2_alloc_size; /* Not applicable for Xavier */ struct pva_cb *stdout; @@ -116,7 +118,7 @@ struct pva_submit_task { u32 sem_thresh; u32 sem_num; - + u32 id; /* Data provided by userspace "as is" */ struct nvpva_submit_fence prefences[NVPVA_TASK_MAX_PREFENCES]; struct nvpva_fence_action diff --git a/include/trace/events/nvpva_ftrace.h b/include/trace/events/nvpva_ftrace.h new file mode 100644 index 00000000..3d1611b5 --- /dev/null +++ b/include/trace/events/nvpva_ftrace.h @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023, NVIDIA Corporation. All rights reserved. + * + * NVPVA event logging to ftrace + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nvpva_ftrace + +#if !defined(_TRACE_NVPVA_FTRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NVPVA_FTRACE_H + +#include +#include + +TRACE_EVENT(job_submit, + TP_PROTO(struct device *dev, u32 class_id, u32 job_id, u32 num_fences, + u64 prog_id, u64 stream_id, u64 hw_timestamp), + TP_ARGS(dev, class_id, job_id, num_fences, prog_id, stream_id, hw_timestamp), + TP_STRUCT__entry( + __field(struct device *, dev) + __field(u32, class_id) + __field(u32, job_id) + __field(u32, num_fences) + __field(u64, prog_id) + __field(u64, stream_id) + __field(u64, hw_timestamp) + ), + TP_fast_assign( + __entry->dev = dev; + __entry->class_id = class_id; + __entry->job_id = job_id; + __entry->num_fences = num_fences; + __entry->prog_id = prog_id; + __entry->stream_id = stream_id; + __entry->hw_timestamp = hw_timestamp; + ), + TP_printk("%s class=%02x id=%u fences=%u stream_id=%llu prog_id=%llu ts=%llu", + dev_name(__entry->dev), __entry->class_id, __entry->job_id, + __entry->num_fences, __entry->prog_id, __entry->stream_id, + __entry->hw_timestamp + ) +); + +TRACE_EVENT(pva_job_base_event, + TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold, u32 vpu_id, + u32 queue_id, u64 vpu_begin_timestamp, u64 vpu_end_timestamp), + TP_ARGS(job_id, syncpt_id, threshold, vpu_id, queue_id, + vpu_begin_timestamp, vpu_end_timestamp), + TP_STRUCT__entry( + __field(u64, vpu_begin_timestamp) + __field(u64, vpu_end_timestamp) + __field(u32, job_id) + __field(u32, syncpt_id) + __field(u32, threshold) + __field(u32, vpu_id) + __field(u32, queue_id) + ), + TP_fast_assign( + __entry->job_id = job_id; + __entry->syncpt_id = syncpt_id; + __entry->threshold = threshold; + __entry->vpu_begin_timestamp = vpu_begin_timestamp; + __entry->vpu_end_timestamp = vpu_end_timestamp; + __entry->queue_id = queue_id; + __entry->vpu_id = vpu_id; + ), + TP_printk("job_id=%u syncpt_id=%u threshold=%u vpu_id=%u " + "queue_id=%u vpu_begin=%llu vpu_end=%llu ", + __entry->job_id, __entry->syncpt_id, __entry->threshold, + __entry->vpu_id, __entry->queue_id, __entry->vpu_begin_timestamp, + __entry->vpu_end_timestamp + ) +); + +TRACE_EVENT(pva_job_ext_event, + TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold, u32 vpu_id, u32 queue_id, + u64 queue_begin_timestamp, u64 queue_end_timestamp, + u64 prepare_begin_timestamp, u64 prepare_end_timestamp, + u64 vpu_begin_timestamp, u64 vpu_end_timestamp, + u64 post_begin_timestamp, u64 post_end_timestamp), + TP_ARGS(job_id, syncpt_id, threshold, vpu_id, queue_id, + queue_begin_timestamp, queue_end_timestamp, + prepare_begin_timestamp, prepare_end_timestamp, + vpu_begin_timestamp, vpu_end_timestamp, + post_begin_timestamp, post_end_timestamp), + TP_STRUCT__entry( + __field(u64, queue_begin_timestamp) + __field(u64, queue_end_timestamp) + __field(u64, prepare_begin_timestamp) + __field(u64, prepare_end_timestamp) + __field(u64, vpu_begin_timestamp) + __field(u64, vpu_end_timestamp) + __field(u64, post_begin_timestamp) + __field(u64, post_end_timestamp) + __field(u32, job_id) + __field(u32, syncpt_id) + __field(u32, threshold) + __field(u32, vpu_id) + __field(u32, queue_id) + ), + TP_fast_assign( + __entry->job_id = job_id; + __entry->syncpt_id = syncpt_id; + __entry->threshold = threshold; + __entry->queue_begin_timestamp = queue_begin_timestamp; + __entry->queue_end_timestamp = queue_end_timestamp; + __entry->prepare_begin_timestamp = prepare_begin_timestamp; + __entry->prepare_end_timestamp = prepare_end_timestamp; + __entry->vpu_begin_timestamp = vpu_begin_timestamp; + __entry->vpu_end_timestamp = vpu_end_timestamp; + __entry->post_begin_timestamp = post_begin_timestamp; + __entry->post_end_timestamp = post_end_timestamp; + __entry->queue_id = queue_id; + __entry->vpu_id = vpu_id; + ), + TP_printk("job_id=%u syncpt_id=%u threshold=%u vpu_id=%u queue_id=%u " + "queue_begin=%llu queue_end=%llu " + "prepare_begin=%llu prepare_end=%llu " + "vpu_begin=%llu vpu_end=%llu " + "post_begin=%llu post_end=%llu", + __entry->job_id, __entry->syncpt_id, __entry->threshold, + __entry->vpu_id, __entry->queue_id, + __entry->queue_begin_timestamp, __entry->queue_end_timestamp, + __entry->prepare_begin_timestamp, __entry->prepare_end_timestamp, + __entry->vpu_begin_timestamp, __entry->vpu_end_timestamp, + __entry->post_begin_timestamp, __entry->post_end_timestamp + ) +); + +DECLARE_EVENT_CLASS(job_fence, + TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold), + TP_ARGS(job_id, syncpt_id, threshold), + TP_STRUCT__entry( + __field(u32, job_id) + __field(u32, syncpt_id) + __field(u32, threshold) + ), + TP_fast_assign( + __entry->job_id = job_id; + __entry->syncpt_id = syncpt_id; + __entry->threshold = threshold; + ), + TP_printk("job_id=%u syncpt_id=%u threshold=%u", + __entry->job_id, __entry->syncpt_id, __entry->threshold + ) +); + +DEFINE_EVENT(job_fence, job_prefence, + TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold), + TP_ARGS(job_id, syncpt_id, threshold)); + +DEFINE_EVENT(job_fence, job_postfence, + TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold), + TP_ARGS(job_id, syncpt_id, threshold)); + +TRACE_EVENT(job_timestamps, + TP_PROTO(u32 job_id, u64 begin, u64 end), + TP_ARGS(job_id, begin, end), + TP_STRUCT__entry( + __field(u32, job_id) + __field(u64, begin) + __field(u64, end) + ), + TP_fast_assign( + __entry->job_id = job_id; + __entry->begin = begin; + __entry->end = end; + ), + TP_printk("job_id=%u begin=%llu end=%llu", + __entry->job_id, __entry->begin, __entry->end + ) +); + +#endif /* End of _TRACE_NVPVA_FTRACE_H */ + +/* This part must be outside protection */ +#include