nvdla: kmd: Add ftrace support

+ Replace eventlib with ftrace for Linux-OOT kernel
+ Added event logging for:
 * job_submit
 * job_prefence
 * job_postfence
 * job_timestamps

JIRA DLA-6820

Change-Id: I4d1ad1b535b2422257e9e1dce8d94ee317c65feb
Signed-off-by: Akshata Bhat <akshatab@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2906273
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Amit Sharma (SW-TEGRA) <amisharma@nvidia.com>
Reviewed-by: Arvind M <am@nvidia.com>
Reviewed-by: Ken Adams <kadams@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
Akshata
2023-05-17 20:49:51 +00:00
committed by mobile promotions
parent a4136b7e63
commit 2e59846460
2 changed files with 126 additions and 33 deletions

View File

@@ -22,6 +22,9 @@
#include "nvdla_debug.h" #include "nvdla_debug.h"
#include "dla_os_interface.h" #include "dla_os_interface.h"
#define CREATE_TRACE_POINTS
#include <trace/events/nvdla_ftrace.h>
#define NVDLA_QUEUE_ABORT_TIMEOUT 10000 /* 10 sec */ #define NVDLA_QUEUE_ABORT_TIMEOUT 10000 /* 10 sec */
#define NVDLA_QUEUE_ABORT_RETRY_PERIOD 500 /* 500 ms */ #define NVDLA_QUEUE_ABORT_RETRY_PERIOD 500 /* 500 ms */
@@ -31,6 +34,12 @@ struct nvdla_add_fence_action_cb_args {
u8 **mem; u8 **mem;
}; };
/* Computing a unique id to identify a task in a particular queue */
static uint32_t nvdla_compute_task_id(u16 sequence_id, u16 queue_id)
{
return (queue_id << 16 | sequence_id);
}
/* task management API's */ /* task management API's */
static void nvdla_queue_dump_op(struct nvdla_queue *queue, struct seq_file *s) static void nvdla_queue_dump_op(struct nvdla_queue *queue, struct seq_file *s)
{ {
@@ -330,14 +339,16 @@ static void nvdla_queue_update(void *priv)
u64 timestamp_start, timestamp_end; u64 timestamp_start, timestamp_end;
u64 *timestamp_ptr; u64 *timestamp_ptr;
int n_tasks_completed = 0; int n_tasks_completed = 0;
uint32_t task_id;
int i;
mutex_lock(&queue->list_lock); mutex_lock(&queue->list_lock);
nvdla_dbg_fn(pdev, ""); nvdla_dbg_fn(pdev, "");
/* check which task(s) finished */ /* check which task(s) finished */
list_for_each_entry_safe(task, safe, &queue->tasklist, list) { list_for_each_entry_safe(task, safe, &queue->tasklist, list) {
task_id = nvdla_compute_task_id(task->task_desc->sequence,
task->task_desc->queue_id);
task_complete = nvhost_syncpt_is_expired_ext(pdev, task_complete = nvhost_syncpt_is_expired_ext(pdev,
queue->syncpt_id, task->fence); queue->syncpt_id, task->fence);
@@ -354,21 +365,16 @@ static void nvdla_queue_update(void *priv)
timestamp_end = *timestamp_ptr >> 5; timestamp_end = *timestamp_ptr >> 5;
timestamp_start = (*timestamp_ptr - timestamp_start = (*timestamp_ptr -
(tsp_notifier->info32 * 1000)) >> 5; (tsp_notifier->info32 * 1000)) >> 5;
nvhost_eventlib_log_task(pdev,
queue->syncpt_id, if (IS_ENABLED(CONFIG_TRACING)) {
task->fence, trace_job_timestamps(task_id, timestamp_start, timestamp_end);
timestamp_start,
timestamp_end);
/* Record task postfences */ /* Record task postfences */
nvhost_eventlib_log_fences(pdev, for (i = 0; i < task->num_postfences; i++) {
queue->syncpt_id, trace_job_postfence(task_id, task->postfences[i].syncpoint_index,
task->fence, task->postfences[i].syncpoint_value);
task->postfences, }
task->num_postfences, }
NVDEV_FENCE_KIND_POST,
timestamp_end);
nvdla_task_free_locked(task); nvdla_task_free_locked(task);
n_tasks_completed++; n_tasks_completed++;
} }
@@ -1179,7 +1185,7 @@ int nvdla_emulator_submit(struct nvdla_queue *queue, struct nvdla_emu_task *task
continue; continue;
if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SYNCPT) || if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SYNCPT) ||
(task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) {
task->prefences[i].syncpoint_index = task->prefences[i].syncpoint_index =
queue->syncpt_id; queue->syncpt_id;
task->prefences[i].syncpoint_value = task->prefences[i].syncpoint_value =
@@ -1198,7 +1204,7 @@ int nvdla_emulator_submit(struct nvdla_queue *queue, struct nvdla_emu_task *task
continue; continue;
if ((task->postfences[i].type == NVDEV_FENCE_TYPE_SYNCPT) || if ((task->postfences[i].type == NVDEV_FENCE_TYPE_SYNCPT) ||
(task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) {
task->postfences[i].syncpoint_index = task->postfences[i].syncpoint_index =
queue->syncpt_id; queue->syncpt_id;
task->postfences[i].syncpoint_value = task->postfences[i].syncpoint_value =
@@ -1238,7 +1244,7 @@ int nvdla_get_signal_fences(struct nvdla_queue *queue, void *in_task)
continue; continue;
if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SYNCPT) || if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SYNCPT) ||
(task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { (task->prefences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) {
task->prefences[i].syncpoint_index = task->prefences[i].syncpoint_index =
queue->syncpt_id; queue->syncpt_id;
task->prefences[i].syncpoint_value = task->prefences[i].syncpoint_value =
@@ -1257,7 +1263,7 @@ int nvdla_get_signal_fences(struct nvdla_queue *queue, void *in_task)
continue; continue;
if ((task->postfences[i].type == NVDEV_FENCE_TYPE_SYNCPT) || if ((task->postfences[i].type == NVDEV_FENCE_TYPE_SYNCPT) ||
(task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) { (task->postfences[i].type == NVDEV_FENCE_TYPE_SYNC_FD)) {
task->postfences[i].syncpoint_index = task->postfences[i].syncpoint_index =
queue->syncpt_id; queue->syncpt_id;
task->postfences[i].syncpoint_value = task->postfences[i].syncpoint_value =
@@ -1285,7 +1291,9 @@ static int nvdla_queue_submit_op(struct nvdla_queue *queue, void *in_task)
struct nvdla_cmd_data cmd_data; struct nvdla_cmd_data cmd_data;
uint32_t method_data; uint32_t method_data;
uint32_t method_id; uint32_t method_id;
uint32_t task_id;
int err = 0; int err = 0;
int i;
u64 timestamp; u64 timestamp;
nvdla_dbg_fn(pdev, ""); nvdla_dbg_fn(pdev, "");
@@ -1295,6 +1303,8 @@ static int nvdla_queue_submit_op(struct nvdla_queue *queue, void *in_task)
/* Get a reference before registration or submission */ /* Get a reference before registration or submission */
nvdla_task_get(task); nvdla_task_get(task);
task_id = nvdla_compute_task_id(task->task_desc->sequence, task->task_desc->queue_id);
/* get fence from nvhost for MMIO mode*/ /* get fence from nvhost for MMIO mode*/
if (nvdla_dev->submit_mode == NVDLA_SUBMIT_MODE_MMIO) { if (nvdla_dev->submit_mode == NVDLA_SUBMIT_MODE_MMIO) {
task->fence = nvhost_syncpt_incr_max_ext(pdev, task->fence = nvhost_syncpt_incr_max_ext(pdev,
@@ -1365,24 +1375,21 @@ static int nvdla_queue_submit_op(struct nvdla_queue *queue, void *in_task)
nvdla_dbg_err(pdev, "task[%p] submit failed", task); nvdla_dbg_err(pdev, "task[%p] submit failed", task);
/* deletes invalid task from queue, puts refs */ /* deletes invalid task from queue, puts refs */
nvhost_syncpt_set_min_update(pdev, queue->syncpt_id, nvhost_syncpt_set_min_update(pdev, queue->syncpt_id,
task->fence); task->fence);
} }
} }
if (!err) { if (IS_ENABLED(CONFIG_TRACING)) {
/* If submitted, record task submit and prefences */ if (!err) {
nvhost_eventlib_log_submit(pdev, /* If submitted, record task submit and prefences */
queue->syncpt_id, trace_job_submit(&pdev->dev, pdata->class, task_id, task->num_prefences, timestamp);
task->fence,
timestamp);
nvhost_eventlib_log_fences(pdev, /* Record task prefences */
queue->syncpt_id, for (i = 0; i < task->num_prefences; i++) {
task->fence, trace_job_prefence(task_id, task->prefences[i].syncpoint_index,
task->prefences, task->prefences[i].syncpoint_value);
task->num_prefences, }
NVDEV_FENCE_KIND_PRE, }
timestamp);
} }
mutex_unlock(&queue->list_lock); mutex_unlock(&queue->list_lock);

View File

@@ -0,0 +1,86 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2023, NVIDIA Corporation. All rights reserved.
*
* NVDLA event logging to ftrace
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM nvdla_ftrace
#if !defined(_TRACE_NVDLA_FTRACE_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_NVDLA_FTRACE_H
#include <linux/tracepoint.h>
#include <linux/device.h>
TRACE_EVENT(job_submit,
TP_PROTO(struct device *dev, u32 class_id, u32 job_id, u32 num_fences, u64 hw_timestamp),
TP_ARGS(dev, class_id, job_id, num_fences, hw_timestamp),
TP_STRUCT__entry(
__field(struct device *, dev)
__field(u32, class_id)
__field(u32, job_id)
__field(u32, num_fences)
__field(u64, hw_timestamp)
),
TP_fast_assign(
__entry->dev = dev;
__entry->class_id = class_id;
__entry->job_id = job_id;
__entry->num_fences = num_fences;
__entry->hw_timestamp = hw_timestamp;
),
TP_printk("%s class=%02x id=%u fences=%u ts=%llu",
dev_name(__entry->dev), __entry->class_id, __entry->job_id,
__entry->num_fences, __entry->hw_timestamp
)
);
DECLARE_EVENT_CLASS(job_fence,
TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold),
TP_ARGS(job_id, syncpt_id, threshold),
TP_STRUCT__entry(
__field(u32, job_id)
__field(u32, syncpt_id)
__field(u32, threshold)
),
TP_fast_assign(
__entry->job_id = job_id;
__entry->syncpt_id = syncpt_id;
__entry->threshold = threshold;
),
TP_printk("job_id=%u syncpt_id=%u threshold=%u",
__entry->job_id, __entry->syncpt_id, __entry->threshold
)
);
DEFINE_EVENT(job_fence, job_prefence,
TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold),
TP_ARGS(job_id, syncpt_id, threshold));
DEFINE_EVENT(job_fence, job_postfence,
TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold),
TP_ARGS(job_id, syncpt_id, threshold));
TRACE_EVENT(job_timestamps,
TP_PROTO(u32 job_id, u64 begin, u64 end),
TP_ARGS(job_id, begin, end),
TP_STRUCT__entry(
__field(u32, job_id)
__field(u64, begin)
__field(u64, end)
),
TP_fast_assign(
__entry->job_id = job_id;
__entry->begin = begin;
__entry->end = end;
),
TP_printk("job_id=%u begin=%llu end=%llu",
__entry->job_id, __entry->begin, __entry->end
)
);
#endif /* End of _TRACE_NVDLA_FTRACE_H */
/* This part must be outside protection */
#include <trace/define_trace.h>