diff --git a/drivers/video/tegra/host/nvdla/nvdla_queue.c b/drivers/video/tegra/host/nvdla/nvdla_queue.c index 61dd1d3a..5f4aa075 100644 --- a/drivers/video/tegra/host/nvdla/nvdla_queue.c +++ b/drivers/video/tegra/host/nvdla/nvdla_queue.c @@ -22,6 +22,9 @@ #include "nvdla_debug.h" #include "dla_os_interface.h" +#define CREATE_TRACE_POINTS +#include + #define NVDLA_QUEUE_ABORT_TIMEOUT 10000 /* 10 sec */ #define NVDLA_QUEUE_ABORT_RETRY_PERIOD 500 /* 500 ms */ @@ -31,6 +34,12 @@ struct nvdla_add_fence_action_cb_args { u8 **mem; }; +/* Computing a unique id to identify a task in a particular queue */ +static uint32_t nvdla_compute_task_id(u16 sequence_id, u16 queue_id) +{ + return (queue_id << 16 | sequence_id); +} + /* task management API's */ static void nvdla_queue_dump_op(struct nvdla_queue *queue, struct seq_file *s) { @@ -330,14 +339,16 @@ static void nvdla_queue_update(void *priv) u64 timestamp_start, timestamp_end; u64 *timestamp_ptr; int n_tasks_completed = 0; - + uint32_t task_id; + int i; mutex_lock(&queue->list_lock); nvdla_dbg_fn(pdev, ""); /* check which task(s) finished */ list_for_each_entry_safe(task, safe, &queue->tasklist, list) { - + task_id = nvdla_compute_task_id(task->task_desc->sequence, + task->task_desc->queue_id); task_complete = nvhost_syncpt_is_expired_ext(pdev, queue->syncpt_id, task->fence); @@ -354,21 +365,16 @@ static void nvdla_queue_update(void *priv) timestamp_end = *timestamp_ptr >> 5; timestamp_start = (*timestamp_ptr - (tsp_notifier->info32 * 1000)) >> 5; - nvhost_eventlib_log_task(pdev, - queue->syncpt_id, - task->fence, - timestamp_start, - timestamp_end); + + if (IS_ENABLED(CONFIG_TRACING)) { + trace_job_timestamps(task_id, timestamp_start, timestamp_end); /* Record task postfences */ - nvhost_eventlib_log_fences(pdev, - queue->syncpt_id, - task->fence, - task->postfences, - task->num_postfences, - NVDEV_FENCE_KIND_POST, - timestamp_end); - + for (i = 0; i < task->num_postfences; i++) { + trace_job_postfence(task_id, task->postfences[i].syncpoint_index, + task->postfences[i].syncpoint_value); + } + } nvdla_task_free_locked(task); n_tasks_completed++; } @@ -1179,7 +1185,7 @@ int nvdla_emulator_submit(struct nvdla_queue *queue, struct nvdla_emu_task *task continue; if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SYNCPT) || - (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { + (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { task->prefences[i].syncpoint_index = queue->syncpt_id; task->prefences[i].syncpoint_value = @@ -1198,7 +1204,7 @@ int nvdla_emulator_submit(struct nvdla_queue *queue, struct nvdla_emu_task *task continue; if ((task->postfences[i].type == NVDEV_FENCE_TYPE_SYNCPT) || - (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { + (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { task->postfences[i].syncpoint_index = queue->syncpt_id; task->postfences[i].syncpoint_value = @@ -1238,7 +1244,7 @@ int nvdla_get_signal_fences(struct nvdla_queue *queue, void *in_task) continue; if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SYNCPT) || - (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { + (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { task->prefences[i].syncpoint_index = queue->syncpt_id; task->prefences[i].syncpoint_value = @@ -1257,7 +1263,7 @@ int nvdla_get_signal_fences(struct nvdla_queue *queue, void *in_task) continue; if ((task->postfences[i].type == NVDEV_FENCE_TYPE_SYNCPT) || - (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { + (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { task->postfences[i].syncpoint_index = queue->syncpt_id; task->postfences[i].syncpoint_value = @@ -1285,7 +1291,9 @@ static int nvdla_queue_submit_op(struct nvdla_queue *queue, void *in_task) struct nvdla_cmd_data cmd_data; uint32_t method_data; uint32_t method_id; + uint32_t task_id; int err = 0; + int i; u64 timestamp; nvdla_dbg_fn(pdev, ""); @@ -1295,6 +1303,8 @@ static int nvdla_queue_submit_op(struct nvdla_queue *queue, void *in_task) /* Get a reference before registration or submission */ nvdla_task_get(task); + task_id = nvdla_compute_task_id(task->task_desc->sequence, task->task_desc->queue_id); + /* get fence from nvhost for MMIO mode*/ if (nvdla_dev->submit_mode == NVDLA_SUBMIT_MODE_MMIO) { task->fence = nvhost_syncpt_incr_max_ext(pdev, @@ -1365,24 +1375,21 @@ static int nvdla_queue_submit_op(struct nvdla_queue *queue, void *in_task) nvdla_dbg_err(pdev, "task[%p] submit failed", task); /* deletes invalid task from queue, puts refs */ nvhost_syncpt_set_min_update(pdev, queue->syncpt_id, - task->fence); + task->fence); } } - if (!err) { - /* If submitted, record task submit and prefences */ - nvhost_eventlib_log_submit(pdev, - queue->syncpt_id, - task->fence, - timestamp); + if (IS_ENABLED(CONFIG_TRACING)) { + if (!err) { + /* If submitted, record task submit and prefences */ + trace_job_submit(&pdev->dev, pdata->class, task_id, task->num_prefences, timestamp); - nvhost_eventlib_log_fences(pdev, - queue->syncpt_id, - task->fence, - task->prefences, - task->num_prefences, - NVDEV_FENCE_KIND_PRE, - timestamp); + /* Record task prefences */ + for (i = 0; i < task->num_prefences; i++) { + trace_job_prefence(task_id, task->prefences[i].syncpoint_index, + task->prefences[i].syncpoint_value); + } + } } mutex_unlock(&queue->list_lock); diff --git a/include/trace/events/nvdla_ftrace.h b/include/trace/events/nvdla_ftrace.h new file mode 100644 index 00000000..6500ff72 --- /dev/null +++ b/include/trace/events/nvdla_ftrace.h @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023, NVIDIA Corporation. All rights reserved. + * + * NVDLA event logging to ftrace + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nvdla_ftrace + +#if !defined(_TRACE_NVDLA_FTRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NVDLA_FTRACE_H + +#include +#include + +TRACE_EVENT(job_submit, + TP_PROTO(struct device *dev, u32 class_id, u32 job_id, u32 num_fences, u64 hw_timestamp), + TP_ARGS(dev, class_id, job_id, num_fences, hw_timestamp), + TP_STRUCT__entry( + __field(struct device *, dev) + __field(u32, class_id) + __field(u32, job_id) + __field(u32, num_fences) + __field(u64, hw_timestamp) + ), + TP_fast_assign( + __entry->dev = dev; + __entry->class_id = class_id; + __entry->job_id = job_id; + __entry->num_fences = num_fences; + __entry->hw_timestamp = hw_timestamp; + ), + TP_printk("%s class=%02x id=%u fences=%u ts=%llu", + dev_name(__entry->dev), __entry->class_id, __entry->job_id, + __entry->num_fences, __entry->hw_timestamp + ) +); + +DECLARE_EVENT_CLASS(job_fence, + TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold), + TP_ARGS(job_id, syncpt_id, threshold), + TP_STRUCT__entry( + __field(u32, job_id) + __field(u32, syncpt_id) + __field(u32, threshold) + ), + TP_fast_assign( + __entry->job_id = job_id; + __entry->syncpt_id = syncpt_id; + __entry->threshold = threshold; + ), + TP_printk("job_id=%u syncpt_id=%u threshold=%u", + __entry->job_id, __entry->syncpt_id, __entry->threshold + ) +); + +DEFINE_EVENT(job_fence, job_prefence, + TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold), + TP_ARGS(job_id, syncpt_id, threshold)); + +DEFINE_EVENT(job_fence, job_postfence, + TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold), + TP_ARGS(job_id, syncpt_id, threshold)); + +TRACE_EVENT(job_timestamps, + TP_PROTO(u32 job_id, u64 begin, u64 end), + TP_ARGS(job_id, begin, end), + TP_STRUCT__entry( + __field(u32, job_id) + __field(u64, begin) + __field(u64, end) + ), + TP_fast_assign( + __entry->job_id = job_id; + __entry->begin = begin; + __entry->end = end; + ), + TP_printk("job_id=%u begin=%llu end=%llu", + __entry->job_id, __entry->begin, __entry->end + ) +); +#endif /* End of _TRACE_NVDLA_FTRACE_H */ + +/* This part must be outside protection */ +#include