mirror of
git://nv-tegra.nvidia.com/linux-nv-oot.git
synced 2025-12-22 09:11:26 +03:00
pva: redesign tracepoints for V3
Add new tracepoints to support profiling of PVA V3 workloads. These tracepoints will be supported only in non-safety builds. Change-Id: I5519c31e2f7db7bb3beab8a0dc1eb5d1d073a345 Signed-off-by: abhinayaa <abhinayaa@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3439807 Tested-by: Nan Wang <nanwa@nvidia.com> GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com> Reviewed-by: Nan Wang <nanwa@nvidia.com> Reviewed-by: Omar Nemri <onemri@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
f0c437e616
commit
4efe441de5
@@ -82,6 +82,7 @@ pva_def_flags += \
|
|||||||
-DPVA_BUILD_MODE_SIM=4 \
|
-DPVA_BUILD_MODE_SIM=4 \
|
||||||
-DPVA_DEV_MAIN_COMPATIBLE=1 \
|
-DPVA_DEV_MAIN_COMPATIBLE=1 \
|
||||||
-DPVA_ENABLE_CUDA=1 \
|
-DPVA_ENABLE_CUDA=1 \
|
||||||
|
-DPVA_ENABLE_NSYS_PROFILING=1 \
|
||||||
-DPVA_ENABLE_R5_OCD=0 \
|
-DPVA_ENABLE_R5_OCD=0 \
|
||||||
-DPVA_IS_DEBUG=0 \
|
-DPVA_IS_DEBUG=0 \
|
||||||
-DPVA_SAFETY=0 \
|
-DPVA_SAFETY=0 \
|
||||||
|
|||||||
@@ -212,7 +212,17 @@ struct pva_fw_cmdbuf_submit_info {
|
|||||||
uint32_t execution_timeout_ms;
|
uint32_t execution_timeout_ms;
|
||||||
struct pva_fw_memory_addr output_statuses[PVA_MAX_NUM_OUTPUT_STATUS];
|
struct pva_fw_memory_addr output_statuses[PVA_MAX_NUM_OUTPUT_STATUS];
|
||||||
struct pva_fw_postfence postfences[PVA_MAX_NUM_POSTFENCES];
|
struct pva_fw_postfence postfences[PVA_MAX_NUM_POSTFENCES];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The following fields are used for tracing by Nsight Systems (Nsys)
|
||||||
|
*/
|
||||||
|
// ID of the submission provided by the user
|
||||||
uint64_t submit_id;
|
uint64_t submit_id;
|
||||||
|
// Timestamp of the command buffer submission
|
||||||
|
uint64_t timestamp_submit;
|
||||||
|
// Process ID of the command buffer submission
|
||||||
|
// Used by Nsys to identify the process that submitted the command buffer
|
||||||
|
int32_t process_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* This is the header of the circular buffer */
|
/* This is the header of the circular buffer */
|
||||||
@@ -419,7 +429,7 @@ enum pva_fw_timestamp_t {
|
|||||||
* message. KMD can further parse these messages to extract the exact size of the
|
* message. KMD can further parse these messages to extract the exact size of the
|
||||||
* message.
|
* message.
|
||||||
*/
|
*/
|
||||||
#define PVA_KMD_FW_BUF_ELEMENT_SIZE sizeof(struct pva_kmd_fw_msg_vpu_trace)
|
#define PVA_KMD_FW_BUF_ELEMENT_SIZE sizeof(struct pva_kmd_fw_msg_cmdbuf_trace)
|
||||||
|
|
||||||
// TODO: remove element size and buffer size fields from this struct.
|
// TODO: remove element size and buffer size fields from this struct.
|
||||||
// This struct is shared between KMD and FW. FW should not be able to change
|
// This struct is shared between KMD and FW. FW should not be able to change
|
||||||
@@ -436,45 +446,57 @@ struct pva_fw_shared_buffer_header {
|
|||||||
|
|
||||||
struct pva_kmd_fw_buffer_msg_header {
|
struct pva_kmd_fw_buffer_msg_header {
|
||||||
#define PVA_KMD_FW_BUF_MSG_TYPE_FW_EVENT 0
|
#define PVA_KMD_FW_BUF_MSG_TYPE_FW_EVENT 0
|
||||||
#define PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE 1
|
#define PVA_KMD_FW_BUF_MSG_TYPE_CMD_BUF_TRACE 1
|
||||||
#define PVA_KMD_FW_BUF_MSG_TYPE_FENCE_TRACE 2
|
#define PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE 2
|
||||||
#define PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG 3
|
#define PVA_KMD_FW_BUF_MSG_TYPE_FENCE_TRACE 3
|
||||||
#define PVA_KMD_FW_BUF_MSG_TYPE_FW_TRACEPOINT 4
|
#define PVA_KMD_FW_BUF_MSG_TYPE_ENGINE_ACQUIRE_TRACE 4
|
||||||
|
#define PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG 5
|
||||||
|
#define PVA_KMD_FW_BUF_MSG_TYPE_FW_TRACEPOINT 6
|
||||||
uint32_t type : 8;
|
uint32_t type : 8;
|
||||||
// Size of payload in bytes. Includes the size of the header.
|
// Size of payload in bytes. Includes the size of the header.
|
||||||
uint32_t size : 24;
|
uint32_t size : 24;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Tracing information for NSIGHT
|
// Tracing information for Nsight Systems (Nsys)
|
||||||
struct pva_kmd_fw_msg_vpu_trace {
|
struct pva_kmd_fw_msg_cmdbuf_trace {
|
||||||
// VPU ID on which the job was executed
|
// FW assigns each command buffer a unique ID.
|
||||||
uint8_t engine_id;
|
// This is used by Nsys to associate different traces with a command buffer.
|
||||||
// CCQ ID through which the job was submitted
|
uint64_t cmdbuf_id;
|
||||||
uint8_t ccq_id;
|
uint64_t submit_id;
|
||||||
// Queue ID through which the job was submitted
|
uint64_t cmdbuf_submit_time;
|
||||||
// This is not relative to a context. It ranges from 0 to 55
|
uint64_t cmdbuf_start_time;
|
||||||
|
uint64_t cmdbuf_end_time;
|
||||||
|
int32_t process_id;
|
||||||
|
int32_t thread_id;
|
||||||
|
uint8_t context_id;
|
||||||
uint8_t queue_id;
|
uint8_t queue_id;
|
||||||
// Number of prefences in the cmdbuf
|
uint8_t status;
|
||||||
uint8_t num_prefences;
|
};
|
||||||
// Program ID of the VPU program executed.
|
|
||||||
|
struct pva_kmd_fw_msg_vpu_exec_trace {
|
||||||
|
uint64_t cmdbuf_id;
|
||||||
|
// Identification of the VPU kernel executed.
|
||||||
// Not supported today as CUPVA does not fully support this yet.
|
// Not supported today as CUPVA does not fully support this yet.
|
||||||
// The intent is to for user applications to be able to assign
|
// The intent is to for user applications to be able to assign
|
||||||
// an identification to a VPU kernel. This ID will then be forwarded
|
// an identification to a VPU kernel. This ID will then be forwarded
|
||||||
// by the FW to the KMD for tracing.
|
// by the FW to the KMD for tracing.
|
||||||
uint64_t prog_id;
|
uint64_t exec_id;
|
||||||
// Start time of the VPU execution
|
|
||||||
uint64_t vpu_start_time;
|
uint64_t vpu_start_time;
|
||||||
// End time of the VPU execution
|
|
||||||
uint64_t vpu_end_time;
|
uint64_t vpu_end_time;
|
||||||
// Submit ID of the cmdbuf
|
uint8_t engine_id;
|
||||||
// User applications can assign distinct identifiers to command buffers.
|
uint8_t status;
|
||||||
// FW will forward this identifier to the KMD for tracing.
|
};
|
||||||
uint64_t submit_id;
|
|
||||||
|
struct pva_kmd_fw_msg_engine_acquire_trace {
|
||||||
|
uint64_t cmdbuf_id;
|
||||||
|
uint64_t engine_acquire_time;
|
||||||
|
uint64_t engine_release_time;
|
||||||
|
uint8_t engine_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct pva_kmd_fw_msg_fence_trace {
|
struct pva_kmd_fw_msg_fence_trace {
|
||||||
uint64_t submit_id;
|
uint64_t cmdbuf_id;
|
||||||
uint64_t timestamp;
|
uint64_t fence_timestamp;
|
||||||
// For syncpt fences, fence_id is the syncpt index
|
// For syncpt fences, fence_id is the syncpt index
|
||||||
// For semaphore fences, fence_id is the serial ID of the semaphore NvRM memory
|
// For semaphore fences, fence_id is the serial ID of the semaphore NvRM memory
|
||||||
uint64_t fence_id;
|
uint64_t fence_id;
|
||||||
@@ -482,8 +504,6 @@ struct pva_kmd_fw_msg_fence_trace {
|
|||||||
// This is only valid for semaphore fences
|
// This is only valid for semaphore fences
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
uint32_t value;
|
uint32_t value;
|
||||||
uint8_t ccq_id;
|
|
||||||
uint8_t queue_id;
|
|
||||||
#define PVA_KMD_FW_BUF_MSG_FENCE_ACTION_WAIT 0U
|
#define PVA_KMD_FW_BUF_MSG_FENCE_ACTION_WAIT 0U
|
||||||
#define PVA_KMD_FW_BUF_MSG_FENCE_ACTION_SIGNAL 1U
|
#define PVA_KMD_FW_BUF_MSG_FENCE_ACTION_SIGNAL 1U
|
||||||
uint8_t action;
|
uint8_t action;
|
||||||
|
|||||||
@@ -124,7 +124,9 @@ static void shared_buffer_process_msg(struct pva_kmd_device *pva,
|
|||||||
{
|
{
|
||||||
enum pva_error err = PVA_SUCCESS;
|
enum pva_error err = PVA_SUCCESS;
|
||||||
struct pva_kmd_fw_buffer_msg_header header;
|
struct pva_kmd_fw_buffer_msg_header header;
|
||||||
struct pva_kmd_fw_msg_vpu_trace vpu_trace;
|
struct pva_kmd_fw_msg_cmdbuf_trace cmdbuf_trace;
|
||||||
|
struct pva_kmd_fw_msg_vpu_exec_trace vpu_trace;
|
||||||
|
struct pva_kmd_fw_msg_engine_acquire_trace engine_acquire_trace;
|
||||||
struct pva_kmd_fw_msg_fence_trace fence_trace;
|
struct pva_kmd_fw_msg_fence_trace fence_trace;
|
||||||
struct pva_kmd_fw_msg_res_unreg unreg_data;
|
struct pva_kmd_fw_msg_res_unreg unreg_data;
|
||||||
struct pva_kmd_context *ctx = NULL;
|
struct pva_kmd_context *ctx = NULL;
|
||||||
@@ -162,19 +164,34 @@ static void shared_buffer_process_msg(struct pva_kmd_device *pva,
|
|||||||
pva_kmd_process_fw_tracepoint(pva, &tracepoint);
|
pva_kmd_process_fw_tracepoint(pva, &tracepoint);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PVA_KMD_FW_BUF_MSG_TYPE_CMD_BUF_TRACE: {
|
||||||
|
ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_cmdbuf_trace));
|
||||||
|
memcpy(&cmdbuf_trace, msg_body, sizeof(cmdbuf_trace));
|
||||||
|
pva_kmd_nsys_cmdbuf_trace(pva, &cmdbuf_trace);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE: {
|
case PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE: {
|
||||||
ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_vpu_trace));
|
ASSERT(msg_size ==
|
||||||
|
sizeof(struct pva_kmd_fw_msg_vpu_exec_trace));
|
||||||
memcpy(&vpu_trace, msg_body, sizeof(vpu_trace));
|
memcpy(&vpu_trace, msg_body, sizeof(vpu_trace));
|
||||||
// We do not check the profiling level here. FW checks profiling level while logging
|
// We do not check the profiling level here. FW checks profiling level while logging
|
||||||
// the trace event. If the profiling level was high enough for FW to log the event,
|
// the trace event. If the profiling level was high enough for FW to log the event,
|
||||||
// KMD should trace it. The profiling level might have changed since FW logged the event.
|
// KMD should trace it. The profiling level might have changed since FW logged the event.
|
||||||
pva_kmd_shim_add_trace_vpu_exec(pva, &vpu_trace);
|
pva_kmd_nsys_vpu_exec_trace(pva, &vpu_trace);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PVA_KMD_FW_BUF_MSG_TYPE_FENCE_TRACE: {
|
case PVA_KMD_FW_BUF_MSG_TYPE_FENCE_TRACE: {
|
||||||
ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_fence_trace));
|
ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_fence_trace));
|
||||||
memcpy(&fence_trace, msg_body, sizeof(fence_trace));
|
memcpy(&fence_trace, msg_body, sizeof(fence_trace));
|
||||||
pva_kmd_shim_add_trace_fence(pva, &fence_trace);
|
pva_kmd_nsys_fence_trace(pva, &fence_trace);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PVA_KMD_FW_BUF_MSG_TYPE_ENGINE_ACQUIRE_TRACE: {
|
||||||
|
ASSERT(msg_size ==
|
||||||
|
sizeof(struct pva_kmd_fw_msg_engine_acquire_trace));
|
||||||
|
memcpy(&engine_acquire_trace, msg_body,
|
||||||
|
sizeof(engine_acquire_trace));
|
||||||
|
pva_kmd_nsys_engine_acquire_trace(pva, &engine_acquire_trace);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG: {
|
case PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG: {
|
||||||
|
|||||||
@@ -5,12 +5,59 @@
|
|||||||
|
|
||||||
#include "pva_kmd_device.h"
|
#include "pva_kmd_device.h"
|
||||||
|
|
||||||
void pva_kmd_shim_add_trace_vpu_exec(
|
#if PVA_ENABLE_NSYS_PROFILING
|
||||||
struct pva_kmd_device *pva,
|
|
||||||
struct pva_kmd_fw_msg_vpu_trace const *trace_info);
|
|
||||||
|
|
||||||
void pva_kmd_shim_add_trace_fence(
|
void pva_kmd_nsys_cmdbuf_trace(
|
||||||
|
struct pva_kmd_device *pva,
|
||||||
|
struct pva_kmd_fw_msg_cmdbuf_trace const *trace_info);
|
||||||
|
|
||||||
|
void pva_kmd_nsys_vpu_exec_trace(
|
||||||
|
struct pva_kmd_device *pva,
|
||||||
|
struct pva_kmd_fw_msg_vpu_exec_trace const *trace_info);
|
||||||
|
|
||||||
|
void pva_kmd_nsys_engine_acquire_trace(
|
||||||
|
struct pva_kmd_device *pva,
|
||||||
|
struct pva_kmd_fw_msg_engine_acquire_trace const *trace_info);
|
||||||
|
|
||||||
|
void pva_kmd_nsys_fence_trace(
|
||||||
struct pva_kmd_device *pva,
|
struct pva_kmd_device *pva,
|
||||||
struct pva_kmd_fw_msg_fence_trace const *trace_info);
|
struct pva_kmd_fw_msg_fence_trace const *trace_info);
|
||||||
|
|
||||||
|
#else /* PVA_ENABLE_NSYS_PROFILING */
|
||||||
|
|
||||||
|
/* Dummy inline functions when Nsight Systems profiling is disabled */
|
||||||
|
static inline void
|
||||||
|
pva_kmd_nsys_cmdbuf_trace(struct pva_kmd_device *pva,
|
||||||
|
struct pva_kmd_fw_msg_cmdbuf_trace const *trace_info)
|
||||||
|
{
|
||||||
|
(void)pva;
|
||||||
|
(void)trace_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void pva_kmd_nsys_vpu_exec_trace(
|
||||||
|
struct pva_kmd_device *pva,
|
||||||
|
struct pva_kmd_fw_msg_vpu_exec_trace const *trace_info)
|
||||||
|
{
|
||||||
|
(void)pva;
|
||||||
|
(void)trace_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void pva_kmd_nsys_engine_acquire_trace(
|
||||||
|
struct pva_kmd_device *pva,
|
||||||
|
struct pva_kmd_fw_msg_engine_acquire_trace const *trace_info)
|
||||||
|
{
|
||||||
|
(void)pva;
|
||||||
|
(void)trace_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
pva_kmd_nsys_fence_trace(struct pva_kmd_device *pva,
|
||||||
|
struct pva_kmd_fw_msg_fence_trace const *trace_info)
|
||||||
|
{
|
||||||
|
(void)pva;
|
||||||
|
(void)trace_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* PVA_ENABLE_NSYS_PROFILING */
|
||||||
|
|
||||||
#endif // PVA_KMD_SHIM_TRACE_EVENT_H
|
#endif // PVA_KMD_SHIM_TRACE_EVENT_H
|
||||||
|
|||||||
@@ -0,0 +1,164 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||||
|
/* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
|
||||||
|
#ifndef PVA_KMD_LINUX_FTRACE_H
|
||||||
|
#define PVA_KMD_LINUX_FTRACE_H
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#undef TRACE_SYSTEM
|
||||||
|
#define TRACE_SYSTEM tegra_pva
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
|
||||||
|
#if !defined(_TRACE_PVA_FTRACE_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||||
|
#define _TRACE_PVA_FTRACE_H
|
||||||
|
|
||||||
|
#include <linux/tracepoint.h>
|
||||||
|
#include <linux/device.h>
|
||||||
|
|
||||||
|
TRACE_EVENT(pva_cmdbuf,
|
||||||
|
TP_PROTO(u64 cmdbuf_id, u64 submit_id, u64 cmdbuf_submit_timestamp,
|
||||||
|
u64 cmdbuf_begin_timestamp, u64 cmdbuf_end_timestamp, u32 class_id, u32 process_id,
|
||||||
|
u32 thread_id, u8 context_id, u8 queue_id, u8 status),
|
||||||
|
TP_ARGS(cmdbuf_id, submit_id, cmdbuf_submit_timestamp, cmdbuf_begin_timestamp,
|
||||||
|
cmdbuf_end_timestamp, class_id, process_id, thread_id, context_id, queue_id, status),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(u64, cmdbuf_id)
|
||||||
|
__field(u64, submit_id)
|
||||||
|
__field(u64, cmdbuf_submit_timestamp)
|
||||||
|
__field(u64, cmdbuf_begin_timestamp)
|
||||||
|
__field(u64, cmdbuf_end_timestamp)
|
||||||
|
__field(u32, class_id)
|
||||||
|
__field(u32, process_id)
|
||||||
|
__field(u32, thread_id)
|
||||||
|
__field(u8, context_id)
|
||||||
|
__field(u8, queue_id)
|
||||||
|
__field(u8, status)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->cmdbuf_id = cmdbuf_id;
|
||||||
|
__entry->submit_id = submit_id;
|
||||||
|
__entry->cmdbuf_submit_timestamp = cmdbuf_submit_timestamp;
|
||||||
|
__entry->cmdbuf_begin_timestamp = cmdbuf_begin_timestamp;
|
||||||
|
__entry->cmdbuf_end_timestamp = cmdbuf_end_timestamp;
|
||||||
|
__entry->class_id = class_id;
|
||||||
|
__entry->process_id = process_id;
|
||||||
|
__entry->thread_id = thread_id;
|
||||||
|
__entry->context_id = context_id;
|
||||||
|
__entry->queue_id = queue_id;
|
||||||
|
__entry->status = status;
|
||||||
|
),
|
||||||
|
TP_printk("cmdbuf_id=%llu submit_id=%llu submit_timestamp=%llu begin_timestamp=%llu "
|
||||||
|
"end_timestamp=%llu class_id=%u process_id=%u thread_id=%u context_id=%u queue_id=%u status=%u",
|
||||||
|
__entry->cmdbuf_id, __entry->submit_id, __entry->cmdbuf_submit_timestamp,
|
||||||
|
__entry->cmdbuf_begin_timestamp, __entry->cmdbuf_end_timestamp,
|
||||||
|
__entry->class_id,
|
||||||
|
__entry->process_id, __entry->thread_id, __entry->context_id,
|
||||||
|
__entry->queue_id, __entry->status)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(pva_vpu_exec,
|
||||||
|
TP_PROTO(u64 cmdbuf_id, u64 exec_id, u64 vpu_begin_timestamp,
|
||||||
|
u64 vpu_end_timestamp, u8 engine_id, u8 status),
|
||||||
|
TP_ARGS(cmdbuf_id, exec_id, vpu_begin_timestamp, vpu_end_timestamp, engine_id, status),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(u64, cmdbuf_id)
|
||||||
|
__field(u64, exec_id)
|
||||||
|
__field(u64, vpu_begin_timestamp)
|
||||||
|
__field(u64, vpu_end_timestamp)
|
||||||
|
__field(u8, engine_id)
|
||||||
|
__field(u8, status)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->cmdbuf_id = cmdbuf_id;
|
||||||
|
__entry->exec_id = exec_id;
|
||||||
|
__entry->vpu_begin_timestamp = vpu_begin_timestamp;
|
||||||
|
__entry->vpu_end_timestamp = vpu_end_timestamp;
|
||||||
|
__entry->engine_id = engine_id;
|
||||||
|
__entry->status = status;
|
||||||
|
),
|
||||||
|
TP_printk("cmdbuf_id=%llu exec_id=%llu vpu_begin_timestamp=%llu vpu_end_timestamp=%llu "
|
||||||
|
"engine_id=%u status=%u",
|
||||||
|
__entry->cmdbuf_id, __entry->exec_id, __entry->vpu_begin_timestamp,
|
||||||
|
__entry->vpu_end_timestamp, __entry->engine_id, __entry->status)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(pva_engine_acquire,
|
||||||
|
TP_PROTO(u64 cmdbuf_id, u64 engine_acquire_timestamp, u64 engine_release_timestamp,
|
||||||
|
u8 engine_id),
|
||||||
|
TP_ARGS(cmdbuf_id, engine_acquire_timestamp, engine_release_timestamp, engine_id),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(u64, cmdbuf_id)
|
||||||
|
__field(u64, engine_acquire_timestamp)
|
||||||
|
__field(u64, engine_release_timestamp)
|
||||||
|
__field(u8, engine_id)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->cmdbuf_id = cmdbuf_id;
|
||||||
|
__entry->engine_acquire_timestamp = engine_acquire_timestamp;
|
||||||
|
__entry->engine_release_timestamp = engine_release_timestamp;
|
||||||
|
__entry->engine_id = engine_id;
|
||||||
|
),
|
||||||
|
TP_printk("cmdbuf_id=%llu engine_acquire_timestamp=%llu engine_release_timestamp=%llu "
|
||||||
|
"engine_id=0x%x",
|
||||||
|
__entry->cmdbuf_id, __entry->engine_acquire_timestamp,
|
||||||
|
__entry->engine_release_timestamp, __entry->engine_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(pva_fence,
|
||||||
|
TP_PROTO(u64 cmdbuf_id, u32 class_id, u32 fence_kind, u32 fence_type,
|
||||||
|
u32 fence_handle, u64 fence_unique_id, u64 fence_offset, u32 fence_threshold, u64 timestamp),
|
||||||
|
TP_ARGS(cmdbuf_id, class_id, fence_kind, fence_type, fence_handle, fence_unique_id,
|
||||||
|
fence_offset, fence_threshold, timestamp),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(u64, cmdbuf_id)
|
||||||
|
__field(u32, class_id)
|
||||||
|
__field(u32, fence_kind)
|
||||||
|
__field(u32, fence_type)
|
||||||
|
__field(u32, fence_handle)
|
||||||
|
__field(u64, fence_unique_id)
|
||||||
|
__field(u64, fence_offset)
|
||||||
|
__field(u32, fence_threshold)
|
||||||
|
__field(u64, timestamp)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->cmdbuf_id = cmdbuf_id;
|
||||||
|
__entry->class_id = class_id;
|
||||||
|
__entry->fence_kind = fence_kind;
|
||||||
|
__entry->fence_type = fence_type;
|
||||||
|
__entry->fence_handle = fence_handle;
|
||||||
|
__entry->fence_unique_id = fence_unique_id;
|
||||||
|
__entry->fence_offset = fence_offset;
|
||||||
|
__entry->fence_threshold = fence_threshold;
|
||||||
|
__entry->timestamp = timestamp;
|
||||||
|
),
|
||||||
|
TP_printk("cmdbuf_id=%llu class_id=%u fence_kind=%u fence_type=%u fence_handle=%u fence_unique_id=%llu fence_offset=%llu fence_threshold=%u timestamp=%llu",
|
||||||
|
__entry->cmdbuf_id, __entry->class_id, __entry->fence_kind, __entry->fence_type,
|
||||||
|
__entry->fence_handle, __entry->fence_unique_id, __entry->fence_offset,
|
||||||
|
__entry->fence_threshold, __entry->timestamp
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
#endif /* End of _TRACE_PVA_FTRACE_H */
|
||||||
|
|
||||||
|
/* This part must be outside header guards */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 'define_trace.h' creates trace events from the macros defined in this file.
|
||||||
|
* To do so, it needs to include this pva trace header file. For this, we declare
|
||||||
|
* TRACE_INCLUDE_PATH and TRACE_INCLUDE_FILE macros.Note that we are using a long
|
||||||
|
* form for path here to minimize the risk of path collisions.
|
||||||
|
*
|
||||||
|
* Also, some build systems define 'linux' macro, which may expand and render
|
||||||
|
* TRACE_INCLUDE_PATH incorrect. To avoid this, we undefine 'linux' macro before
|
||||||
|
* defining TRACE_INCLUDE_PATH and TRACE_INCLUDE_FILE macros. This should not have
|
||||||
|
* any impact on the rest of the code.
|
||||||
|
*/
|
||||||
|
#undef linux
|
||||||
|
#undef TRACE_INCLUDE_PATH
|
||||||
|
#define TRACE_INCLUDE_PATH ../../kmd/linux/include
|
||||||
|
#undef TRACE_INCLUDE_FILE
|
||||||
|
#define TRACE_INCLUDE_FILE pva_kmd_linux_ftrace
|
||||||
|
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
#include <trace/define_trace.h>
|
||||||
@@ -3,94 +3,50 @@
|
|||||||
#include "pva_kmd_shim_trace_event.h"
|
#include "pva_kmd_shim_trace_event.h"
|
||||||
#include "pva_kmd_linux_device.h"
|
#include "pva_kmd_linux_device.h"
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
#include "trace/events/nvpva_ftrace.h"
|
|
||||||
|
|
||||||
static uint32_t get_job_id(uint32_t queue_id, uint64_t submit_id)
|
#include "pva_kmd_linux_ftrace.h"
|
||||||
{
|
|
||||||
return (queue_id & 0x000000FF) << 24 | (submit_id & 0xFFFFFFU);
|
|
||||||
}
|
|
||||||
|
|
||||||
void pva_kmd_shim_add_trace_vpu_exec(
|
void pva_kmd_nsys_cmdbuf_trace(
|
||||||
struct pva_kmd_device *pva,
|
struct pva_kmd_device *pva,
|
||||||
struct pva_kmd_fw_msg_vpu_trace const *trace_info)
|
struct pva_kmd_fw_msg_cmdbuf_trace const *trace_info)
|
||||||
{
|
{
|
||||||
uint64_t vpu_start = trace_info->vpu_start_time;
|
trace_pva_cmdbuf(trace_info->cmdbuf_id, trace_info->submit_id,
|
||||||
uint64_t vpu_end = trace_info->vpu_end_time;
|
trace_info->cmdbuf_submit_time,
|
||||||
|
trace_info->cmdbuf_start_time,
|
||||||
// Unlike in PVA V2 stack, submissions do not go through KMD.
|
trace_info->cmdbuf_end_time,
|
||||||
// So, the concept of a task being enqueued by KMD does not exist.
|
pva_kmd_get_device_class_id(pva),
|
||||||
// We can request FW to record timestamps of when command buffers
|
trace_info->process_id, trace_info->thread_id,
|
||||||
// were submitted to it, but that would introduce a lot of overhead.
|
trace_info->context_id, trace_info->queue_id,
|
||||||
uint64_t queue_start = vpu_start;
|
trace_info->status);
|
||||||
uint64_t queue_end = vpu_start;
|
|
||||||
|
|
||||||
// In V2, each kernel launch is independent and has a distinct setup
|
|
||||||
// and teardown phase. In V3, several kernels may share a command buffer
|
|
||||||
// and it is difficult to distincitly determine the setup and teardown
|
|
||||||
// phase for each kernel.
|
|
||||||
// So, we use the vpu_start time as the prepare_start and prepare_end time.
|
|
||||||
uint64_t prepare_start = vpu_start;
|
|
||||||
uint64_t prepare_end = vpu_start;
|
|
||||||
|
|
||||||
// In V2, each kernel launch has a distinct postfence.
|
|
||||||
// In V3, several kernel launches may share a command buffer and therefore
|
|
||||||
// the same postfence. Using this postfence time for all kernel launches
|
|
||||||
// may be confusing for the user. So, we use vpu_end time instead.
|
|
||||||
uint64_t post_start = vpu_end;
|
|
||||||
uint64_t post_end = vpu_end;
|
|
||||||
|
|
||||||
// In V2, Job ID is a 32-bit value with the top 8 bits being the queue ID
|
|
||||||
// and the bottom 24 bits being a per-task counter. In V3, we only use the
|
|
||||||
// queue ID.
|
|
||||||
uint32_t job_id =
|
|
||||||
get_job_id(trace_info->queue_id, trace_info->submit_id);
|
|
||||||
|
|
||||||
trace_pva_job_ext_event(job_id, trace_info->ccq_id,
|
|
||||||
0, // syncpt_thresh,
|
|
||||||
trace_info->engine_id, queue_start, queue_end,
|
|
||||||
prepare_start, prepare_end, vpu_start, vpu_end,
|
|
||||||
post_start, post_end);
|
|
||||||
|
|
||||||
trace_job_submit(NULL, pva_kmd_get_device_class_id(pva), job_id,
|
|
||||||
trace_info->num_prefences, trace_info->prog_id,
|
|
||||||
trace_info->submit_id, vpu_start);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void pva_kmd_shim_add_trace_fence(
|
void pva_kmd_nsys_vpu_exec_trace(
|
||||||
|
struct pva_kmd_device *pva,
|
||||||
|
struct pva_kmd_fw_msg_vpu_exec_trace const *trace_info)
|
||||||
|
{
|
||||||
|
trace_pva_vpu_exec(trace_info->cmdbuf_id, trace_info->exec_id,
|
||||||
|
trace_info->vpu_start_time, trace_info->vpu_end_time,
|
||||||
|
trace_info->engine_id, trace_info->status);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pva_kmd_nsys_engine_acquire_trace(
|
||||||
|
struct pva_kmd_device *pva,
|
||||||
|
struct pva_kmd_fw_msg_engine_acquire_trace const *trace_info)
|
||||||
|
{
|
||||||
|
trace_pva_engine_acquire(trace_info->cmdbuf_id,
|
||||||
|
trace_info->engine_acquire_time,
|
||||||
|
trace_info->engine_release_time,
|
||||||
|
trace_info->engine_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pva_kmd_nsys_fence_trace(
|
||||||
struct pva_kmd_device *pva,
|
struct pva_kmd_device *pva,
|
||||||
struct pva_kmd_fw_msg_fence_trace const *trace_info)
|
struct pva_kmd_fw_msg_fence_trace const *trace_info)
|
||||||
{
|
{
|
||||||
uint32_t job_id;
|
/* Use new unified fence tracepoint */
|
||||||
|
trace_pva_fence(trace_info->cmdbuf_id, pva_kmd_get_device_class_id(pva),
|
||||||
// We want to log events only for user workloads
|
trace_info->action, trace_info->type,
|
||||||
if (trace_info->ccq_id == PVA_PRIV_CCQ_ID) {
|
0, // fence_handle (not used today)
|
||||||
return;
|
trace_info->fence_id, trace_info->offset,
|
||||||
}
|
trace_info->value, trace_info->fence_timestamp);
|
||||||
|
|
||||||
job_id = get_job_id(trace_info->queue_id, trace_info->submit_id);
|
|
||||||
|
|
||||||
if (trace_info->action == PVA_KMD_FW_BUF_MSG_FENCE_ACTION_WAIT) {
|
|
||||||
if (trace_info->type == PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SYNCPT) {
|
|
||||||
trace_job_prefence(job_id, trace_info->fence_id,
|
|
||||||
trace_info->value);
|
|
||||||
} else if (trace_info->type ==
|
|
||||||
PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SEMAPHORE) {
|
|
||||||
trace_job_prefence_semaphore(
|
|
||||||
job_id, trace_info->fence_id,
|
|
||||||
PVA_LOW32(trace_info->offset),
|
|
||||||
trace_info->value);
|
|
||||||
}
|
|
||||||
} else if (trace_info->action ==
|
|
||||||
PVA_KMD_FW_BUF_MSG_FENCE_ACTION_SIGNAL) {
|
|
||||||
if (trace_info->type == PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SYNCPT) {
|
|
||||||
trace_job_postfence(job_id, trace_info->fence_id,
|
|
||||||
trace_info->value);
|
|
||||||
} else if (trace_info->type ==
|
|
||||||
PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SEMAPHORE) {
|
|
||||||
trace_job_postfence_semaphore(
|
|
||||||
job_id, trace_info->fence_id,
|
|
||||||
PVA_LOW32(trace_info->offset),
|
|
||||||
trace_info->value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** rief Specifies the PVA system software commit ID. */
|
/** rief Specifies the PVA system software commit ID. */
|
||||||
#define PVA_SYSSW_COMMIT_ID "b284345610cebb0cc1a9cd85a91298d50e4a7d68"
|
#define PVA_SYSSW_COMMIT_ID "ba7f61c59af1d2660746b86ef00f00a7f5cf9028"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user