From 4efe441de5929eb3a69c8d6acbed0546366d8df1 Mon Sep 17 00:00:00 2001 From: abhinayaa Date: Tue, 26 Aug 2025 01:07:34 +0000 Subject: [PATCH] pva: redesign tracepoints for V3 Add new tracepoints to support profiling of PVA V3 workloads. These tracepoints will be supported only in non-safety builds. Change-Id: I5519c31e2f7db7bb3beab8a0dc1eb5d1d073a345 Signed-off-by: abhinayaa Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3439807 Tested-by: Nan Wang GVS: buildbot_gerritrpt Reviewed-by: Nan Wang Reviewed-by: Omar Nemri --- drivers/video/tegra/host/pva/Makefile | 1 + .../tegra/host/pva/src/fw/include/pva_fw.h | 74 +++++--- .../src/kmd/common/pva_kmd_shared_buffer.c | 25 ++- .../common/shim/pva_kmd_shim_trace_event.h | 55 +++++- .../kmd/linux/include/pva_kmd_linux_ftrace.h | 164 ++++++++++++++++++ .../src/kmd/linux/pva_kmd_linux_event_trace.c | 118 ++++--------- .../host/pva/src/private_api/pva_version.h | 2 +- 7 files changed, 322 insertions(+), 117 deletions(-) create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux_ftrace.h diff --git a/drivers/video/tegra/host/pva/Makefile b/drivers/video/tegra/host/pva/Makefile index a04faad4..e70ab8bb 100644 --- a/drivers/video/tegra/host/pva/Makefile +++ b/drivers/video/tegra/host/pva/Makefile @@ -82,6 +82,7 @@ pva_def_flags += \ -DPVA_BUILD_MODE_SIM=4 \ -DPVA_DEV_MAIN_COMPATIBLE=1 \ -DPVA_ENABLE_CUDA=1 \ + -DPVA_ENABLE_NSYS_PROFILING=1 \ -DPVA_ENABLE_R5_OCD=0 \ -DPVA_IS_DEBUG=0 \ -DPVA_SAFETY=0 \ diff --git a/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h b/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h index 202e0461..dcf2c810 100644 --- a/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h +++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h @@ -212,7 +212,17 @@ struct pva_fw_cmdbuf_submit_info { uint32_t execution_timeout_ms; struct pva_fw_memory_addr output_statuses[PVA_MAX_NUM_OUTPUT_STATUS]; struct pva_fw_postfence postfences[PVA_MAX_NUM_POSTFENCES]; + + /* + * The following fields are used for tracing by Nsight Systems (Nsys) + */ + // ID of the submission provided by the user uint64_t submit_id; + // Timestamp of the command buffer submission + uint64_t timestamp_submit; + // Process ID of the command buffer submission + // Used by Nsys to identify the process that submitted the command buffer + int32_t process_id; }; /* This is the header of the circular buffer */ @@ -419,7 +429,7 @@ enum pva_fw_timestamp_t { * message. KMD can further parse these messages to extract the exact size of the * message. */ -#define PVA_KMD_FW_BUF_ELEMENT_SIZE sizeof(struct pva_kmd_fw_msg_vpu_trace) +#define PVA_KMD_FW_BUF_ELEMENT_SIZE sizeof(struct pva_kmd_fw_msg_cmdbuf_trace) // TODO: remove element size and buffer size fields from this struct. // This struct is shared between KMD and FW. FW should not be able to change @@ -436,45 +446,57 @@ struct pva_fw_shared_buffer_header { struct pva_kmd_fw_buffer_msg_header { #define PVA_KMD_FW_BUF_MSG_TYPE_FW_EVENT 0 -#define PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE 1 -#define PVA_KMD_FW_BUF_MSG_TYPE_FENCE_TRACE 2 -#define PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG 3 -#define PVA_KMD_FW_BUF_MSG_TYPE_FW_TRACEPOINT 4 +#define PVA_KMD_FW_BUF_MSG_TYPE_CMD_BUF_TRACE 1 +#define PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE 2 +#define PVA_KMD_FW_BUF_MSG_TYPE_FENCE_TRACE 3 +#define PVA_KMD_FW_BUF_MSG_TYPE_ENGINE_ACQUIRE_TRACE 4 +#define PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG 5 +#define PVA_KMD_FW_BUF_MSG_TYPE_FW_TRACEPOINT 6 uint32_t type : 8; // Size of payload in bytes. Includes the size of the header. uint32_t size : 24; }; -// Tracing information for NSIGHT -struct pva_kmd_fw_msg_vpu_trace { - // VPU ID on which the job was executed - uint8_t engine_id; - // CCQ ID through which the job was submitted - uint8_t ccq_id; - // Queue ID through which the job was submitted - // This is not relative to a context. It ranges from 0 to 55 +// Tracing information for Nsight Systems (Nsys) +struct pva_kmd_fw_msg_cmdbuf_trace { + // FW assigns each command buffer a unique ID. + // This is used by Nsys to associate different traces with a command buffer. + uint64_t cmdbuf_id; + uint64_t submit_id; + uint64_t cmdbuf_submit_time; + uint64_t cmdbuf_start_time; + uint64_t cmdbuf_end_time; + int32_t process_id; + int32_t thread_id; + uint8_t context_id; uint8_t queue_id; - // Number of prefences in the cmdbuf - uint8_t num_prefences; - // Program ID of the VPU program executed. + uint8_t status; +}; + +struct pva_kmd_fw_msg_vpu_exec_trace { + uint64_t cmdbuf_id; + // Identification of the VPU kernel executed. // Not supported today as CUPVA does not fully support this yet. // The intent is to for user applications to be able to assign // an identification to a VPU kernel. This ID will then be forwarded // by the FW to the KMD for tracing. - uint64_t prog_id; - // Start time of the VPU execution + uint64_t exec_id; uint64_t vpu_start_time; - // End time of the VPU execution uint64_t vpu_end_time; - // Submit ID of the cmdbuf - // User applications can assign distinct identifiers to command buffers. - // FW will forward this identifier to the KMD for tracing. - uint64_t submit_id; + uint8_t engine_id; + uint8_t status; +}; + +struct pva_kmd_fw_msg_engine_acquire_trace { + uint64_t cmdbuf_id; + uint64_t engine_acquire_time; + uint64_t engine_release_time; + uint8_t engine_id; }; struct pva_kmd_fw_msg_fence_trace { - uint64_t submit_id; - uint64_t timestamp; + uint64_t cmdbuf_id; + uint64_t fence_timestamp; // For syncpt fences, fence_id is the syncpt index // For semaphore fences, fence_id is the serial ID of the semaphore NvRM memory uint64_t fence_id; @@ -482,8 +504,6 @@ struct pva_kmd_fw_msg_fence_trace { // This is only valid for semaphore fences uint64_t offset; uint32_t value; - uint8_t ccq_id; - uint8_t queue_id; #define PVA_KMD_FW_BUF_MSG_FENCE_ACTION_WAIT 0U #define PVA_KMD_FW_BUF_MSG_FENCE_ACTION_SIGNAL 1U uint8_t action; diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c index 057d09c6..2e3ad8cd 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c @@ -124,7 +124,9 @@ static void shared_buffer_process_msg(struct pva_kmd_device *pva, { enum pva_error err = PVA_SUCCESS; struct pva_kmd_fw_buffer_msg_header header; - struct pva_kmd_fw_msg_vpu_trace vpu_trace; + struct pva_kmd_fw_msg_cmdbuf_trace cmdbuf_trace; + struct pva_kmd_fw_msg_vpu_exec_trace vpu_trace; + struct pva_kmd_fw_msg_engine_acquire_trace engine_acquire_trace; struct pva_kmd_fw_msg_fence_trace fence_trace; struct pva_kmd_fw_msg_res_unreg unreg_data; struct pva_kmd_context *ctx = NULL; @@ -162,19 +164,34 @@ static void shared_buffer_process_msg(struct pva_kmd_device *pva, pva_kmd_process_fw_tracepoint(pva, &tracepoint); break; } + case PVA_KMD_FW_BUF_MSG_TYPE_CMD_BUF_TRACE: { + ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_cmdbuf_trace)); + memcpy(&cmdbuf_trace, msg_body, sizeof(cmdbuf_trace)); + pva_kmd_nsys_cmdbuf_trace(pva, &cmdbuf_trace); + break; + } case PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE: { - ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_vpu_trace)); + ASSERT(msg_size == + sizeof(struct pva_kmd_fw_msg_vpu_exec_trace)); memcpy(&vpu_trace, msg_body, sizeof(vpu_trace)); // We do not check the profiling level here. FW checks profiling level while logging // the trace event. If the profiling level was high enough for FW to log the event, // KMD should trace it. The profiling level might have changed since FW logged the event. - pva_kmd_shim_add_trace_vpu_exec(pva, &vpu_trace); + pva_kmd_nsys_vpu_exec_trace(pva, &vpu_trace); break; } case PVA_KMD_FW_BUF_MSG_TYPE_FENCE_TRACE: { ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_fence_trace)); memcpy(&fence_trace, msg_body, sizeof(fence_trace)); - pva_kmd_shim_add_trace_fence(pva, &fence_trace); + pva_kmd_nsys_fence_trace(pva, &fence_trace); + break; + } + case PVA_KMD_FW_BUF_MSG_TYPE_ENGINE_ACQUIRE_TRACE: { + ASSERT(msg_size == + sizeof(struct pva_kmd_fw_msg_engine_acquire_trace)); + memcpy(&engine_acquire_trace, msg_body, + sizeof(engine_acquire_trace)); + pva_kmd_nsys_engine_acquire_trace(pva, &engine_acquire_trace); break; } case PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG: { diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_trace_event.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_trace_event.h index 72ed37a8..2e2f1bb9 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_trace_event.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_trace_event.h @@ -5,12 +5,59 @@ #include "pva_kmd_device.h" -void pva_kmd_shim_add_trace_vpu_exec( - struct pva_kmd_device *pva, - struct pva_kmd_fw_msg_vpu_trace const *trace_info); +#if PVA_ENABLE_NSYS_PROFILING -void pva_kmd_shim_add_trace_fence( +void pva_kmd_nsys_cmdbuf_trace( + struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_cmdbuf_trace const *trace_info); + +void pva_kmd_nsys_vpu_exec_trace( + struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_vpu_exec_trace const *trace_info); + +void pva_kmd_nsys_engine_acquire_trace( + struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_engine_acquire_trace const *trace_info); + +void pva_kmd_nsys_fence_trace( struct pva_kmd_device *pva, struct pva_kmd_fw_msg_fence_trace const *trace_info); +#else /* PVA_ENABLE_NSYS_PROFILING */ + +/* Dummy inline functions when Nsight Systems profiling is disabled */ +static inline void +pva_kmd_nsys_cmdbuf_trace(struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_cmdbuf_trace const *trace_info) +{ + (void)pva; + (void)trace_info; +} + +static inline void pva_kmd_nsys_vpu_exec_trace( + struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_vpu_exec_trace const *trace_info) +{ + (void)pva; + (void)trace_info; +} + +static inline void pva_kmd_nsys_engine_acquire_trace( + struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_engine_acquire_trace const *trace_info) +{ + (void)pva; + (void)trace_info; +} + +static inline void +pva_kmd_nsys_fence_trace(struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_fence_trace const *trace_info) +{ + (void)pva; + (void)trace_info; +} + +#endif /* PVA_ENABLE_NSYS_PROFILING */ + #endif // PVA_KMD_SHIM_TRACE_EVENT_H diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux_ftrace.h b/drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux_ftrace.h new file mode 100644 index 00000000..73d9199c --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux_ftrace.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ +#ifndef PVA_KMD_LINUX_FTRACE_H +#define PVA_KMD_LINUX_FTRACE_H +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM tegra_pva + +// clang-format off + +#if !defined(_TRACE_PVA_FTRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PVA_FTRACE_H + +#include +#include + +TRACE_EVENT(pva_cmdbuf, + TP_PROTO(u64 cmdbuf_id, u64 submit_id, u64 cmdbuf_submit_timestamp, + u64 cmdbuf_begin_timestamp, u64 cmdbuf_end_timestamp, u32 class_id, u32 process_id, + u32 thread_id, u8 context_id, u8 queue_id, u8 status), + TP_ARGS(cmdbuf_id, submit_id, cmdbuf_submit_timestamp, cmdbuf_begin_timestamp, + cmdbuf_end_timestamp, class_id, process_id, thread_id, context_id, queue_id, status), + TP_STRUCT__entry( + __field(u64, cmdbuf_id) + __field(u64, submit_id) + __field(u64, cmdbuf_submit_timestamp) + __field(u64, cmdbuf_begin_timestamp) + __field(u64, cmdbuf_end_timestamp) + __field(u32, class_id) + __field(u32, process_id) + __field(u32, thread_id) + __field(u8, context_id) + __field(u8, queue_id) + __field(u8, status) + ), + TP_fast_assign( + __entry->cmdbuf_id = cmdbuf_id; + __entry->submit_id = submit_id; + __entry->cmdbuf_submit_timestamp = cmdbuf_submit_timestamp; + __entry->cmdbuf_begin_timestamp = cmdbuf_begin_timestamp; + __entry->cmdbuf_end_timestamp = cmdbuf_end_timestamp; + __entry->class_id = class_id; + __entry->process_id = process_id; + __entry->thread_id = thread_id; + __entry->context_id = context_id; + __entry->queue_id = queue_id; + __entry->status = status; + ), + TP_printk("cmdbuf_id=%llu submit_id=%llu submit_timestamp=%llu begin_timestamp=%llu " + "end_timestamp=%llu class_id=%u process_id=%u thread_id=%u context_id=%u queue_id=%u status=%u", + __entry->cmdbuf_id, __entry->submit_id, __entry->cmdbuf_submit_timestamp, + __entry->cmdbuf_begin_timestamp, __entry->cmdbuf_end_timestamp, + __entry->class_id, + __entry->process_id, __entry->thread_id, __entry->context_id, + __entry->queue_id, __entry->status) +); + +TRACE_EVENT(pva_vpu_exec, + TP_PROTO(u64 cmdbuf_id, u64 exec_id, u64 vpu_begin_timestamp, + u64 vpu_end_timestamp, u8 engine_id, u8 status), + TP_ARGS(cmdbuf_id, exec_id, vpu_begin_timestamp, vpu_end_timestamp, engine_id, status), + TP_STRUCT__entry( + __field(u64, cmdbuf_id) + __field(u64, exec_id) + __field(u64, vpu_begin_timestamp) + __field(u64, vpu_end_timestamp) + __field(u8, engine_id) + __field(u8, status) + ), + TP_fast_assign( + __entry->cmdbuf_id = cmdbuf_id; + __entry->exec_id = exec_id; + __entry->vpu_begin_timestamp = vpu_begin_timestamp; + __entry->vpu_end_timestamp = vpu_end_timestamp; + __entry->engine_id = engine_id; + __entry->status = status; + ), + TP_printk("cmdbuf_id=%llu exec_id=%llu vpu_begin_timestamp=%llu vpu_end_timestamp=%llu " + "engine_id=%u status=%u", + __entry->cmdbuf_id, __entry->exec_id, __entry->vpu_begin_timestamp, + __entry->vpu_end_timestamp, __entry->engine_id, __entry->status) +); + +TRACE_EVENT(pva_engine_acquire, + TP_PROTO(u64 cmdbuf_id, u64 engine_acquire_timestamp, u64 engine_release_timestamp, + u8 engine_id), + TP_ARGS(cmdbuf_id, engine_acquire_timestamp, engine_release_timestamp, engine_id), + TP_STRUCT__entry( + __field(u64, cmdbuf_id) + __field(u64, engine_acquire_timestamp) + __field(u64, engine_release_timestamp) + __field(u8, engine_id) + ), + TP_fast_assign( + __entry->cmdbuf_id = cmdbuf_id; + __entry->engine_acquire_timestamp = engine_acquire_timestamp; + __entry->engine_release_timestamp = engine_release_timestamp; + __entry->engine_id = engine_id; + ), + TP_printk("cmdbuf_id=%llu engine_acquire_timestamp=%llu engine_release_timestamp=%llu " + "engine_id=0x%x", + __entry->cmdbuf_id, __entry->engine_acquire_timestamp, + __entry->engine_release_timestamp, __entry->engine_id) +); + +TRACE_EVENT(pva_fence, + TP_PROTO(u64 cmdbuf_id, u32 class_id, u32 fence_kind, u32 fence_type, + u32 fence_handle, u64 fence_unique_id, u64 fence_offset, u32 fence_threshold, u64 timestamp), + TP_ARGS(cmdbuf_id, class_id, fence_kind, fence_type, fence_handle, fence_unique_id, + fence_offset, fence_threshold, timestamp), + TP_STRUCT__entry( + __field(u64, cmdbuf_id) + __field(u32, class_id) + __field(u32, fence_kind) + __field(u32, fence_type) + __field(u32, fence_handle) + __field(u64, fence_unique_id) + __field(u64, fence_offset) + __field(u32, fence_threshold) + __field(u64, timestamp) + ), + TP_fast_assign( + __entry->cmdbuf_id = cmdbuf_id; + __entry->class_id = class_id; + __entry->fence_kind = fence_kind; + __entry->fence_type = fence_type; + __entry->fence_handle = fence_handle; + __entry->fence_unique_id = fence_unique_id; + __entry->fence_offset = fence_offset; + __entry->fence_threshold = fence_threshold; + __entry->timestamp = timestamp; + ), + TP_printk("cmdbuf_id=%llu class_id=%u fence_kind=%u fence_type=%u fence_handle=%u fence_unique_id=%llu fence_offset=%llu fence_threshold=%u timestamp=%llu", + __entry->cmdbuf_id, __entry->class_id, __entry->fence_kind, __entry->fence_type, + __entry->fence_handle, __entry->fence_unique_id, __entry->fence_offset, + __entry->fence_threshold, __entry->timestamp + ) +); + +#endif /* End of _TRACE_PVA_FTRACE_H */ + +/* This part must be outside header guards */ + +/* + * 'define_trace.h' creates trace events from the macros defined in this file. + * To do so, it needs to include this pva trace header file. For this, we declare + * TRACE_INCLUDE_PATH and TRACE_INCLUDE_FILE macros.Note that we are using a long + * form for path here to minimize the risk of path collisions. + * + * Also, some build systems define 'linux' macro, which may expand and render + * TRACE_INCLUDE_PATH incorrect. To avoid this, we undefine 'linux' macro before + * defining TRACE_INCLUDE_PATH and TRACE_INCLUDE_FILE macros. This should not have + * any impact on the rest of the code. + */ +#undef linux +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../kmd/linux/include +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE pva_kmd_linux_ftrace + +// clang-format on + +#include diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c index 5c37ca52..b99e575a 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c @@ -3,94 +3,50 @@ #include "pva_kmd_shim_trace_event.h" #include "pva_kmd_linux_device.h" #define CREATE_TRACE_POINTS -#include "trace/events/nvpva_ftrace.h" -static uint32_t get_job_id(uint32_t queue_id, uint64_t submit_id) -{ - return (queue_id & 0x000000FF) << 24 | (submit_id & 0xFFFFFFU); -} +#include "pva_kmd_linux_ftrace.h" -void pva_kmd_shim_add_trace_vpu_exec( +void pva_kmd_nsys_cmdbuf_trace( struct pva_kmd_device *pva, - struct pva_kmd_fw_msg_vpu_trace const *trace_info) + struct pva_kmd_fw_msg_cmdbuf_trace const *trace_info) { - uint64_t vpu_start = trace_info->vpu_start_time; - uint64_t vpu_end = trace_info->vpu_end_time; - - // Unlike in PVA V2 stack, submissions do not go through KMD. - // So, the concept of a task being enqueued by KMD does not exist. - // We can request FW to record timestamps of when command buffers - // were submitted to it, but that would introduce a lot of overhead. - uint64_t queue_start = vpu_start; - uint64_t queue_end = vpu_start; - - // In V2, each kernel launch is independent and has a distinct setup - // and teardown phase. In V3, several kernels may share a command buffer - // and it is difficult to distincitly determine the setup and teardown - // phase for each kernel. - // So, we use the vpu_start time as the prepare_start and prepare_end time. - uint64_t prepare_start = vpu_start; - uint64_t prepare_end = vpu_start; - - // In V2, each kernel launch has a distinct postfence. - // In V3, several kernel launches may share a command buffer and therefore - // the same postfence. Using this postfence time for all kernel launches - // may be confusing for the user. So, we use vpu_end time instead. - uint64_t post_start = vpu_end; - uint64_t post_end = vpu_end; - - // In V2, Job ID is a 32-bit value with the top 8 bits being the queue ID - // and the bottom 24 bits being a per-task counter. In V3, we only use the - // queue ID. - uint32_t job_id = - get_job_id(trace_info->queue_id, trace_info->submit_id); - - trace_pva_job_ext_event(job_id, trace_info->ccq_id, - 0, // syncpt_thresh, - trace_info->engine_id, queue_start, queue_end, - prepare_start, prepare_end, vpu_start, vpu_end, - post_start, post_end); - - trace_job_submit(NULL, pva_kmd_get_device_class_id(pva), job_id, - trace_info->num_prefences, trace_info->prog_id, - trace_info->submit_id, vpu_start); + trace_pva_cmdbuf(trace_info->cmdbuf_id, trace_info->submit_id, + trace_info->cmdbuf_submit_time, + trace_info->cmdbuf_start_time, + trace_info->cmdbuf_end_time, + pva_kmd_get_device_class_id(pva), + trace_info->process_id, trace_info->thread_id, + trace_info->context_id, trace_info->queue_id, + trace_info->status); } -void pva_kmd_shim_add_trace_fence( +void pva_kmd_nsys_vpu_exec_trace( + struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_vpu_exec_trace const *trace_info) +{ + trace_pva_vpu_exec(trace_info->cmdbuf_id, trace_info->exec_id, + trace_info->vpu_start_time, trace_info->vpu_end_time, + trace_info->engine_id, trace_info->status); +} + +void pva_kmd_nsys_engine_acquire_trace( + struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_engine_acquire_trace const *trace_info) +{ + trace_pva_engine_acquire(trace_info->cmdbuf_id, + trace_info->engine_acquire_time, + trace_info->engine_release_time, + trace_info->engine_id); +} + +void pva_kmd_nsys_fence_trace( struct pva_kmd_device *pva, struct pva_kmd_fw_msg_fence_trace const *trace_info) { - uint32_t job_id; - - // We want to log events only for user workloads - if (trace_info->ccq_id == PVA_PRIV_CCQ_ID) { - return; - } - - job_id = get_job_id(trace_info->queue_id, trace_info->submit_id); - - if (trace_info->action == PVA_KMD_FW_BUF_MSG_FENCE_ACTION_WAIT) { - if (trace_info->type == PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SYNCPT) { - trace_job_prefence(job_id, trace_info->fence_id, - trace_info->value); - } else if (trace_info->type == - PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SEMAPHORE) { - trace_job_prefence_semaphore( - job_id, trace_info->fence_id, - PVA_LOW32(trace_info->offset), - trace_info->value); - } - } else if (trace_info->action == - PVA_KMD_FW_BUF_MSG_FENCE_ACTION_SIGNAL) { - if (trace_info->type == PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SYNCPT) { - trace_job_postfence(job_id, trace_info->fence_id, - trace_info->value); - } else if (trace_info->type == - PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SEMAPHORE) { - trace_job_postfence_semaphore( - job_id, trace_info->fence_id, - PVA_LOW32(trace_info->offset), - trace_info->value); - } - } + /* Use new unified fence tracepoint */ + trace_pva_fence(trace_info->cmdbuf_id, pva_kmd_get_device_class_id(pva), + trace_info->action, trace_info->type, + 0, // fence_handle (not used today) + trace_info->fence_id, trace_info->offset, + trace_info->value, trace_info->fence_timestamp); } diff --git a/drivers/video/tegra/host/pva/src/private_api/pva_version.h b/drivers/video/tegra/host/pva/src/private_api/pva_version.h index f42b6575..779ebf74 100644 --- a/drivers/video/tegra/host/pva/src/private_api/pva_version.h +++ b/drivers/video/tegra/host/pva/src/private_api/pva_version.h @@ -9,7 +9,7 @@ extern "C" { #endif /** rief Specifies the PVA system software commit ID. */ -#define PVA_SYSSW_COMMIT_ID "b284345610cebb0cc1a9cd85a91298d50e4a7d68" +#define PVA_SYSSW_COMMIT_ID "ba7f61c59af1d2660746b86ef00f00a7f5cf9028" #ifdef __cplusplus }