drivers: pva: use ftrace instead of eventlib

deprecate use of eventlib and use ftrace instead
for reporting task statistics.

pass prog and stream ID to ftrace events.

Bug 4080222

Change-Id: Id304baba109cde5392f3f297a745c8084b07613a
Signed-off-by: omar <onemri@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2911268
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Nikita Chumakov <nchumakov@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
omar
2023-05-26 15:05:11 +00:00
committed by mobile promotions
parent cde5ffd7c5
commit 3fda4da5d8
6 changed files with 240 additions and 112 deletions

View File

@@ -283,6 +283,7 @@ struct nvpva_queue_pool *nvpva_queue_init(struct platform_device *pdev,
queue->pool = pool; queue->pool = pool;
queue->task_pool = (void *)&task_pool[i]; queue->task_pool = (void *)&task_pool[i];
queue->batch_id = 0U; queue->batch_id = 0U;
queue->task_idx = 0U;
nvpva_queue_get_task_size(queue); nvpva_queue_get_task_size(queue);
} }

View File

@@ -61,7 +61,7 @@ struct nvpva_queue {
struct nvpva_queue_pool *pool; struct nvpva_queue_pool *pool;
struct kref kref; struct kref kref;
u32 id; u32 id;
u32 task_idx;
/*wait list for task mem requester*/ /*wait list for task mem requester*/
struct semaphore task_pool_sem; struct semaphore task_pool_sem;

View File

@@ -170,6 +170,8 @@ static int pva_copy_task(struct nvpva_ioctl_task *ioctl_task,
* copy them. * copy them.
*/ */
task->exe_id = ioctl_task->exe_id; task->exe_id = ioctl_task->exe_id;
task->stream_id = ioctl_task->stream_id;
task->prog_id = ioctl_task->prog_id;
task->l2_alloc_size = ioctl_task->l2_alloc_size; task->l2_alloc_size = ioctl_task->l2_alloc_size;
task->symbol_payload_size = ioctl_task->symbol_payload.size; task->symbol_payload_size = ioctl_task->symbol_payload.size;
task->flags = ioctl_task->flags; task->flags = ioctl_task->flags;

View File

@@ -18,11 +18,7 @@
#include <linux/nvhost.h> #include <linux/nvhost.h>
#include <linux/host1x.h> #include <linux/host1x.h>
#ifdef CONFIG_EVENTLIB
#include <linux/keventlib.h>
#include <uapi/linux/nvdev_fence.h> #include <uapi/linux/nvdev_fence.h>
#include <uapi/linux/nvhost_events.h>
#endif
#include <linux/version.h> #include <linux/version.h>
#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE #if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
@@ -34,6 +30,8 @@
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <uapi/linux/nvpva_ioctl.h> #include <uapi/linux/nvpva_ioctl.h>
#include <trace/events/nvhost_pva.h> #include <trace/events/nvhost_pva.h>
#define CREATE_TRACE_POINTS
#include <trace/events/nvpva_ftrace.h>
#include "pva.h" #include "pva.h"
#include "nvpva_buffer.h" #include "nvpva_buffer.h"
@@ -812,10 +810,9 @@ out:
return err; return err;
} }
#ifdef CONFIG_EVENTLIB
static void static void
pva_eventlib_fill_fence(struct nvdev_fence *dst_fence, pva_trace_log_fill_fence(struct nvdev_fence *dst_fence,
struct nvpva_submit_fence *src_fence) struct nvpva_submit_fence *src_fence)
{ {
static u32 obj_type[] = {NVDEV_FENCE_TYPE_SYNCPT, static u32 obj_type[] = {NVDEV_FENCE_TYPE_SYNCPT,
@@ -844,106 +841,53 @@ pva_eventlib_fill_fence(struct nvdev_fence *dst_fence,
} }
static void static void
pva_eventlib_record_r5_states(struct platform_device *pdev, pva_trace_log_record_task_states(struct platform_device *pdev,
u32 syncpt_id, u32 syncpt_id,
u32 syncpt_thresh, u32 syncpt_thresh,
struct pva_task_statistics_s *stats, struct pva_task_statistics_s *stats,
struct pva_submit_task *task) struct pva_submit_task *task)
{ {
struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
struct nvhost_pva_task_state state;
struct nvdev_fence post_fence; struct nvdev_fence post_fence;
struct nvpva_submit_fence *fence; struct nvpva_submit_fence *fence;
u8 i; u8 i;
if ((task->pva->profiling_level == 0) || (!pdata->eventlib_id)) if ((task->pva->profiling_level == 0) || (!IS_ENABLED(CONFIG_TRACING)))
return; return;
/* Record task postfences */ /* Record task postfences */
for (i = 0 ; i < task->num_pva_fence_actions[NVPVA_FENCE_POST]; i++) { for (i = 0 ; i < task->num_pva_fence_actions[NVPVA_FENCE_POST]; i++) {
fence = &(task->pva_fence_actions[NVPVA_FENCE_POST][i].fence); fence = &(task->pva_fence_actions[NVPVA_FENCE_POST][i].fence);
pva_eventlib_fill_fence(&post_fence, fence); pva_trace_log_fill_fence(&post_fence, fence);
nvhost_eventlib_log_fences(pdev, trace_job_postfence(task->id,
post_fence.syncpoint_index,
post_fence.syncpoint_value);
}
if (task->pva->profiling_level == 1) {
trace_pva_job_base_event(task->id,
syncpt_id, syncpt_id,
syncpt_thresh, syncpt_thresh,
&post_fence, stats->vpu_assigned,
1, stats->queue_id,
NVDEV_FENCE_KIND_POST, stats->vpu_start_time,
stats->complete_time);
}
state.class_id = pdata->class;
state.syncpt_id = syncpt_id;
state.syncpt_thresh = syncpt_thresh;
state.vpu_id = stats->vpu_assigned;
state.queue_id = stats->queue_id;
state.iova = task->dma_addr;
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
stats->vpu_assigned == 0 ? NVHOST_PVA_VPU0_BEGIN
: NVHOST_PVA_VPU1_BEGIN,
stats->vpu_start_time);
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
stats->vpu_assigned == 0 ? NVHOST_PVA_VPU0_END
: NVHOST_PVA_VPU1_END,
stats->vpu_complete_time); stats->vpu_complete_time);
keventlib_write(pdata->eventlib_id, } else if (task->pva->profiling_level >= 2) {
&state, trace_pva_job_ext_event(task->id,
sizeof(state), syncpt_id,
NVHOST_PVA_PREPARE_END, syncpt_thresh,
stats->vpu_start_time); stats->vpu_assigned,
keventlib_write(pdata->eventlib_id, stats->queue_id,
&state, stats->queued_time,
sizeof(state), stats->vpu_assigned_time,
NVHOST_PVA_POST_BEGIN, stats->vpu_assigned_time,
stats->vpu_complete_time); stats->vpu_start_time,
stats->vpu_start_time,
if (task->pva->profiling_level >= 2) { stats->vpu_complete_time,
keventlib_write(pdata->eventlib_id, stats->vpu_complete_time,
&state,
sizeof(state),
NVHOST_PVA_QUEUE_BEGIN,
stats->queued_time);
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
NVHOST_PVA_QUEUE_END,
stats->vpu_assigned_time);
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
NVHOST_PVA_PREPARE_BEGIN,
stats->vpu_assigned_time);
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
NVHOST_PVA_POST_END,
stats->complete_time); stats->complete_time);
} }
} }
#else
static void
pva_eventlib_fill_fence(struct nvdev_fence *dst_fence,
struct nvpva_submit_fence *src_fence)
{
}
static void
pva_eventlib_record_r5_states(struct platform_device *pdev,
u32 syncpt_id,
u32 syncpt_thresh,
struct pva_task_statistics_s *stats,
struct pva_submit_task *task)
{
}
#endif
void pva_task_free(struct kref *ref) void pva_task_free(struct kref *ref)
{ {
@@ -1053,15 +997,13 @@ static void update_one_task(struct pva *pva)
stats->vpu_assigned, stats->vpu_assigned,
r5_overhead); r5_overhead);
prof: prof:
if (task->pva->profiling_level == 0) if ((task->pva->profiling_level == 0) || (!IS_ENABLED(CONFIG_TRACING)))
goto out; goto out;
nvhost_eventlib_log_task(pdev, trace_job_timestamps(task->id,
queue->syncpt_id,
task->local_sync_counter,
stats->vpu_assigned_time, stats->vpu_assigned_time,
stats->complete_time); stats->complete_time);
pva_eventlib_record_r5_states(pdev, pva_trace_log_record_task_states(pdev,
queue->syncpt_id, queue->syncpt_id,
task->local_sync_counter, task->local_sync_counter,
stats, stats,
@@ -1156,6 +1098,8 @@ static int pva_task_submit(const struct pva_submit_tasks *task_header)
{ {
struct pva_submit_task *first_task = task_header->tasks[0]; struct pva_submit_task *first_task = task_header->tasks[0];
struct nvpva_queue *queue = first_task->queue; struct nvpva_queue *queue = first_task->queue;
struct nvhost_device_data *pdata = platform_get_drvdata(first_task->pva->pdev);
u64 timestamp; u64 timestamp;
int err = 0; int err = 0;
u32 i; u32 i;
@@ -1225,22 +1169,20 @@ static int pva_task_submit(const struct pva_submit_tasks *task_header)
struct nvdev_fence pre_fence; struct nvdev_fence pre_fence;
struct pva_submit_task *task = task_header->tasks[i]; struct pva_submit_task *task = task_header->tasks[i];
trace_job_submit(&task->pva->pdev->dev,
pdata->class, task->id,
task->num_prefences,
task->prog_id,
task->stream_id,
timestamp);
for (j = 0; j < task->num_prefences; j++) { for (j = 0; j < task->num_prefences; j++) {
pva_eventlib_fill_fence(&pre_fence, pva_trace_log_fill_fence(&pre_fence,
&task->prefences[j]); &task->prefences[j]);
nvhost_eventlib_log_fences(task->pva->pdev, trace_job_prefence(task->id,
queue->syncpt_id, pre_fence.syncpoint_index,
task->local_sync_counter, pre_fence.syncpoint_value);
&pre_fence,
1,
NVDEV_FENCE_KIND_PRE,
timestamp);
}
nvhost_eventlib_log_submit(task->pva->pdev, }
queue->syncpt_id,
task->local_sync_counter,
timestamp);
} }
out: out:
return 0; return 0;
@@ -1296,7 +1238,9 @@ set_task_parameters(const struct pva_submit_tasks *task_header)
status_interface = (task->queue->id + 1U); status_interface = (task->queue->id + 1U);
for (idx = 0U; idx < task_header->num_tasks; idx++) { for (idx = 0U; idx < task_header->num_tasks; idx++) {
queue->task_idx = (queue->task_idx + 1) & 0xFFFFFF;
task = task_header->tasks[idx]; task = task_header->tasks[idx];
task->id = queue->task_idx | (queue->id << 24);
hw_task = task->va; hw_task = task->va;
WARN_ON(task->pool_index > 0xFF); WARN_ON(task->pool_index > 0xFF);
hw_task->task.task_id = task->pool_index; hw_task->task.task_id = task->pool_index;

View File

@@ -93,6 +93,8 @@ struct pva_submit_task {
bool pinned_app; bool pinned_app;
u32 exe_id; u32 exe_id;
u64 stream_id;
u64 prog_id;
u32 l2_alloc_size; /* Not applicable for Xavier */ u32 l2_alloc_size; /* Not applicable for Xavier */
struct pva_cb *stdout; struct pva_cb *stdout;
@@ -116,7 +118,7 @@ struct pva_submit_task {
u32 sem_thresh; u32 sem_thresh;
u32 sem_num; u32 sem_num;
u32 id;
/* Data provided by userspace "as is" */ /* Data provided by userspace "as is" */
struct nvpva_submit_fence prefences[NVPVA_TASK_MAX_PREFENCES]; struct nvpva_submit_fence prefences[NVPVA_TASK_MAX_PREFENCES];
struct nvpva_fence_action struct nvpva_fence_action

View File

@@ -0,0 +1,179 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2023, NVIDIA Corporation. All rights reserved.
*
* NVPVA event logging to ftrace
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM nvpva_ftrace
#if !defined(_TRACE_NVPVA_FTRACE_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_NVPVA_FTRACE_H
#include <linux/tracepoint.h>
#include <linux/device.h>
TRACE_EVENT(job_submit,
TP_PROTO(struct device *dev, u32 class_id, u32 job_id, u32 num_fences,
u64 prog_id, u64 stream_id, u64 hw_timestamp),
TP_ARGS(dev, class_id, job_id, num_fences, prog_id, stream_id, hw_timestamp),
TP_STRUCT__entry(
__field(struct device *, dev)
__field(u32, class_id)
__field(u32, job_id)
__field(u32, num_fences)
__field(u64, prog_id)
__field(u64, stream_id)
__field(u64, hw_timestamp)
),
TP_fast_assign(
__entry->dev = dev;
__entry->class_id = class_id;
__entry->job_id = job_id;
__entry->num_fences = num_fences;
__entry->prog_id = prog_id;
__entry->stream_id = stream_id;
__entry->hw_timestamp = hw_timestamp;
),
TP_printk("%s class=%02x id=%u fences=%u stream_id=%llu prog_id=%llu ts=%llu",
dev_name(__entry->dev), __entry->class_id, __entry->job_id,
__entry->num_fences, __entry->prog_id, __entry->stream_id,
__entry->hw_timestamp
)
);
TRACE_EVENT(pva_job_base_event,
TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold, u32 vpu_id,
u32 queue_id, u64 vpu_begin_timestamp, u64 vpu_end_timestamp),
TP_ARGS(job_id, syncpt_id, threshold, vpu_id, queue_id,
vpu_begin_timestamp, vpu_end_timestamp),
TP_STRUCT__entry(
__field(u64, vpu_begin_timestamp)
__field(u64, vpu_end_timestamp)
__field(u32, job_id)
__field(u32, syncpt_id)
__field(u32, threshold)
__field(u32, vpu_id)
__field(u32, queue_id)
),
TP_fast_assign(
__entry->job_id = job_id;
__entry->syncpt_id = syncpt_id;
__entry->threshold = threshold;
__entry->vpu_begin_timestamp = vpu_begin_timestamp;
__entry->vpu_end_timestamp = vpu_end_timestamp;
__entry->queue_id = queue_id;
__entry->vpu_id = vpu_id;
),
TP_printk("job_id=%u syncpt_id=%u threshold=%u vpu_id=%u "
"queue_id=%u vpu_begin=%llu vpu_end=%llu ",
__entry->job_id, __entry->syncpt_id, __entry->threshold,
__entry->vpu_id, __entry->queue_id, __entry->vpu_begin_timestamp,
__entry->vpu_end_timestamp
)
);
TRACE_EVENT(pva_job_ext_event,
TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold, u32 vpu_id, u32 queue_id,
u64 queue_begin_timestamp, u64 queue_end_timestamp,
u64 prepare_begin_timestamp, u64 prepare_end_timestamp,
u64 vpu_begin_timestamp, u64 vpu_end_timestamp,
u64 post_begin_timestamp, u64 post_end_timestamp),
TP_ARGS(job_id, syncpt_id, threshold, vpu_id, queue_id,
queue_begin_timestamp, queue_end_timestamp,
prepare_begin_timestamp, prepare_end_timestamp,
vpu_begin_timestamp, vpu_end_timestamp,
post_begin_timestamp, post_end_timestamp),
TP_STRUCT__entry(
__field(u64, queue_begin_timestamp)
__field(u64, queue_end_timestamp)
__field(u64, prepare_begin_timestamp)
__field(u64, prepare_end_timestamp)
__field(u64, vpu_begin_timestamp)
__field(u64, vpu_end_timestamp)
__field(u64, post_begin_timestamp)
__field(u64, post_end_timestamp)
__field(u32, job_id)
__field(u32, syncpt_id)
__field(u32, threshold)
__field(u32, vpu_id)
__field(u32, queue_id)
),
TP_fast_assign(
__entry->job_id = job_id;
__entry->syncpt_id = syncpt_id;
__entry->threshold = threshold;
__entry->queue_begin_timestamp = queue_begin_timestamp;
__entry->queue_end_timestamp = queue_end_timestamp;
__entry->prepare_begin_timestamp = prepare_begin_timestamp;
__entry->prepare_end_timestamp = prepare_end_timestamp;
__entry->vpu_begin_timestamp = vpu_begin_timestamp;
__entry->vpu_end_timestamp = vpu_end_timestamp;
__entry->post_begin_timestamp = post_begin_timestamp;
__entry->post_end_timestamp = post_end_timestamp;
__entry->queue_id = queue_id;
__entry->vpu_id = vpu_id;
),
TP_printk("job_id=%u syncpt_id=%u threshold=%u vpu_id=%u queue_id=%u "
"queue_begin=%llu queue_end=%llu "
"prepare_begin=%llu prepare_end=%llu "
"vpu_begin=%llu vpu_end=%llu "
"post_begin=%llu post_end=%llu",
__entry->job_id, __entry->syncpt_id, __entry->threshold,
__entry->vpu_id, __entry->queue_id,
__entry->queue_begin_timestamp, __entry->queue_end_timestamp,
__entry->prepare_begin_timestamp, __entry->prepare_end_timestamp,
__entry->vpu_begin_timestamp, __entry->vpu_end_timestamp,
__entry->post_begin_timestamp, __entry->post_end_timestamp
)
);
DECLARE_EVENT_CLASS(job_fence,
TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold),
TP_ARGS(job_id, syncpt_id, threshold),
TP_STRUCT__entry(
__field(u32, job_id)
__field(u32, syncpt_id)
__field(u32, threshold)
),
TP_fast_assign(
__entry->job_id = job_id;
__entry->syncpt_id = syncpt_id;
__entry->threshold = threshold;
),
TP_printk("job_id=%u syncpt_id=%u threshold=%u",
__entry->job_id, __entry->syncpt_id, __entry->threshold
)
);
DEFINE_EVENT(job_fence, job_prefence,
TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold),
TP_ARGS(job_id, syncpt_id, threshold));
DEFINE_EVENT(job_fence, job_postfence,
TP_PROTO(u32 job_id, u32 syncpt_id, u32 threshold),
TP_ARGS(job_id, syncpt_id, threshold));
TRACE_EVENT(job_timestamps,
TP_PROTO(u32 job_id, u64 begin, u64 end),
TP_ARGS(job_id, begin, end),
TP_STRUCT__entry(
__field(u32, job_id)
__field(u64, begin)
__field(u64, end)
),
TP_fast_assign(
__entry->job_id = job_id;
__entry->begin = begin;
__entry->end = end;
),
TP_printk("job_id=%u begin=%llu end=%llu",
__entry->job_id, __entry->begin, __entry->end
)
);
#endif /* End of _TRACE_NVPVA_FTRACE_H */
/* This part must be outside protection */
#include <trace/define_trace.h>