mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: Add support for FECS ctxsw tracing
bug 1648908 This commit adds support for FECS ctxsw tracing. Code is compiled conditionnaly under CONFIG_GK20_CTXSW_TRACE. This feature requires an updated FECS ucode that writes one record to a ring buffer on each context switch. On RM/Kernel side, the GPU driver reads records from the master ring buffer and generates trace entries into a user-facing VM ring buffer. For each record in the master ring buffer, RM/Kernel has to retrieve the vmid+pid of the user process that submitted related work. Features currently implemented: - master ring buffer allocation - debugfs to dump master ring buffer - FECS record per context switch (with both current and new contexts) - dedicated device for ctxsw tracing (access to VM ring buffer) - SOF generation (and access to PTIMER) - VM ring buffer allocation, and reconfiguration - enable/disable tracing at user level - event-based trace filtering - context_ptr to vmid+pid mapping - read system call for ctxsw dev - mmap system call for ctxsw dev (direct access to VM ring buffer) - poll system call for ctxsw dev - save/restore register on ELPG/CG6 - separate user ring from FECS ring handling Features requiring ucode changes: - enable/disable tracing at FECS level - actual busy time on engine (bug 1642354) - master ring buffer threshold interrupt (P1) - API for GPU to CPU timestamp conversion (P1) - vmid/pid/uid based filtering (P1) Change-Id: I8e39c648221ee0fa09d5df8524b03dca83fe24f3 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1022737 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Terje Bergstrom
parent
82da6ed595
commit
1c40d09c4c
@@ -1215,4 +1215,94 @@ struct nvgpu_as_map_buffer_batch_args {
|
||||
#define NVGPU_AS_IOCTL_MAX_ARG_SIZE \
|
||||
sizeof(struct nvgpu_as_map_buffer_ex_args)
|
||||
|
||||
|
||||
/*
|
||||
* /dev/nvhost-ctxsw-gpu device
|
||||
*
|
||||
* Opening a '/dev/nvhost-ctxsw-gpu' device node creates a way to trace
|
||||
* context switches on GR engine
|
||||
*/
|
||||
|
||||
#define NVGPU_CTXSW_IOCTL_MAGIC 'C'
|
||||
|
||||
#define NVGPU_CTXSW_TAG_SOF 0x00
|
||||
#define NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST 0x01
|
||||
#define NVGPU_CTXSW_TAG_FE_ACK 0x02
|
||||
#define NVGPU_CTXSW_TAG_FE_ACK_WFI 0x0a
|
||||
#define NVGPU_CTXSW_TAG_FE_ACK_GFXP 0x0b
|
||||
#define NVGPU_CTXSW_TAG_FE_ACK_CTAP 0x0c
|
||||
#define NVGPU_CTXSW_TAG_FE_ACK_CILP 0x0d
|
||||
#define NVGPU_CTXSW_TAG_SAVE_END 0x03
|
||||
#define NVGPU_CTXSW_TAG_RESTORE_START 0x04
|
||||
#define NVGPU_CTXSW_TAG_CONTEXT_START 0x05
|
||||
#define NVGPU_CTXSW_TAG_INVALID_TIMESTAMP 0xff
|
||||
#define NVGPU_CTXSW_TAG_LAST \
|
||||
NVGPU_CTXSW_TAG_INVALID_TIMESTAMP
|
||||
|
||||
struct nvgpu_ctxsw_trace_entry {
|
||||
__u8 tag;
|
||||
__u8 vmid;
|
||||
__u16 seqno; /* sequence number to detect drops */
|
||||
__u32 context_id; /* context_id as allocated by FECS */
|
||||
__u64 pid; /* 64-bit is max bits of different OS pid */
|
||||
__u64 timestamp; /* 64-bit time */
|
||||
};
|
||||
|
||||
#define NVGPU_CTXSW_RING_HEADER_MAGIC 0x7000fade
|
||||
#define NVGPU_CTXSW_RING_HEADER_VERSION 0
|
||||
|
||||
struct nvgpu_ctxsw_ring_header {
|
||||
__u32 magic;
|
||||
__u32 version;
|
||||
__u32 num_ents;
|
||||
__u32 ent_size;
|
||||
volatile __u32 drop_count; /* excluding filtered out events */
|
||||
volatile __u32 write_seqno;
|
||||
volatile __u32 write_idx;
|
||||
volatile __u32 read_idx;
|
||||
};
|
||||
|
||||
struct nvgpu_ctxsw_ring_setup_args {
|
||||
__u32 size; /* [in/out] size of ring buffer in bytes (including
|
||||
header). will be rounded page size. this parameter
|
||||
is updated with actual allocated size. */
|
||||
};
|
||||
|
||||
#define NVGPU_CTXSW_FILTER_SIZE (NVGPU_CTXSW_TAG_LAST + 1)
|
||||
#define NVGPU_CTXSW_FILTER_SET(n, p) \
|
||||
((p)->tag_bits[(n) / 64] |= (1 << ((n) & 63)))
|
||||
#define NVGPU_CTXSW_FILTER_CLR(n, p) \
|
||||
((p)->tag_bits[(n) / 64] &= ~(1 << ((n) & 63)))
|
||||
#define NVGPU_CTXSW_FILTER_ISSET(n, p) \
|
||||
((p)->tag_bits[(n) / 64] & (1 << ((n) & 63)))
|
||||
#define NVGPU_CTXSW_FILTER_CLR_ALL(p) memset((void *)(p), 0, sizeof(*(p)))
|
||||
#define NVGPU_CTXSW_FILTER_SET_ALL(p) memset((void *)(p), ~0, sizeof(*(p)))
|
||||
|
||||
struct nvgpu_ctxsw_trace_filter {
|
||||
__u64 tag_bits[(NVGPU_CTXSW_FILTER_SIZE + 63) / 64];
|
||||
};
|
||||
|
||||
struct nvgpu_ctxsw_trace_filter_args {
|
||||
struct nvgpu_ctxsw_trace_filter filter;
|
||||
};
|
||||
|
||||
#define NVGPU_CTXSW_IOCTL_TRACE_ENABLE \
|
||||
_IO(NVGPU_CTXSW_IOCTL_MAGIC, 1)
|
||||
#define NVGPU_CTXSW_IOCTL_TRACE_DISABLE \
|
||||
_IO(NVGPU_CTXSW_IOCTL_MAGIC, 2)
|
||||
#define NVGPU_CTXSW_IOCTL_RING_SETUP \
|
||||
_IOWR(NVGPU_CTXSW_IOCTL_MAGIC, 3, struct nvgpu_ctxsw_ring_setup_args)
|
||||
#define NVGPU_CTXSW_IOCTL_SET_FILTER \
|
||||
_IOW(NVGPU_CTXSW_IOCTL_MAGIC, 4, struct nvgpu_ctxsw_trace_filter_args)
|
||||
#define NVGPU_CTXSW_IOCTL_GET_FILTER \
|
||||
_IOR(NVGPU_CTXSW_IOCTL_MAGIC, 5, struct nvgpu_ctxsw_trace_filter_args)
|
||||
#define NVGPU_CTXSW_IOCTL_POLL \
|
||||
_IO(NVGPU_CTXSW_IOCTL_MAGIC, 6)
|
||||
|
||||
#define NVGPU_CTXSW_IOCTL_LAST \
|
||||
_IOC_NR(NVGPU_CTXSW_IOCTL_POLL)
|
||||
|
||||
#define NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE \
|
||||
sizeof(struct nvgpu_ctxsw_trace_filter_args)
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user