mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Add support for FECS ctxsw tracing
bug 1648908 This commit adds support for FECS ctxsw tracing. Code is compiled conditionnaly under CONFIG_GK20_CTXSW_TRACE. This feature requires an updated FECS ucode that writes one record to a ring buffer on each context switch. On RM/Kernel side, the GPU driver reads records from the master ring buffer and generates trace entries into a user-facing VM ring buffer. For each record in the master ring buffer, RM/Kernel has to retrieve the vmid+pid of the user process that submitted related work. Features currently implemented: - master ring buffer allocation - debugfs to dump master ring buffer - FECS record per context switch (with both current and new contexts) - dedicated device for ctxsw tracing (access to VM ring buffer) - SOF generation (and access to PTIMER) - VM ring buffer allocation, and reconfiguration - enable/disable tracing at user level - event-based trace filtering - context_ptr to vmid+pid mapping - read system call for ctxsw dev - mmap system call for ctxsw dev (direct access to VM ring buffer) - poll system call for ctxsw dev - save/restore register on ELPG/CG6 - separate user ring from FECS ring handling Features requiring ucode changes: - enable/disable tracing at FECS level - actual busy time on engine (bug 1642354) - master ring buffer threshold interrupt (P1) - API for GPU to CPU timestamp conversion (P1) - vmid/pid/uid based filtering (P1) Change-Id: I8e39c648221ee0fa09d5df8524b03dca83fe24f3 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1022737 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Terje Bergstrom
parent
82da6ed595
commit
1c40d09c4c
@@ -54,6 +54,16 @@ config GK20A_CYCLE_STATS
|
||||
help
|
||||
Say Y here to enable the cycle stats debugging features.
|
||||
|
||||
config GK20A_CTXSW_TRACE
|
||||
bool "Support GK20A Context Switch tracing"
|
||||
depends on GK20A
|
||||
default n
|
||||
help
|
||||
Enable support for the GK20A Context Switch Tracing. In this mode,
|
||||
FECS collects timestamps for contexts loaded on GR engine. This
|
||||
allows tracking context switches on GR engine, as well as
|
||||
identifying processes that submitted work.
|
||||
|
||||
config TEGRA_GK20A
|
||||
bool "Enable the GK20A GPU on Tegra"
|
||||
depends on TEGRA_GRHOST || TEGRA_HOST1X
|
||||
|
||||
@@ -46,6 +46,8 @@ nvgpu-y := \
|
||||
gk20a/cde_gk20a.o \
|
||||
gk20a/platform_gk20a_generic.o \
|
||||
gk20a/tsg_gk20a.o \
|
||||
gk20a/ctxsw_trace_gk20a.o \
|
||||
gk20a/fecs_trace_gk20a.o \
|
||||
gk20a/mc_gk20a.o \
|
||||
gm20b/hal_gm20b.o \
|
||||
gm20b/ltc_gm20b.o \
|
||||
@@ -64,7 +66,6 @@ nvgpu-y := \
|
||||
gm20b/debug_gm20b.o \
|
||||
gm20b/cde_gm20b.o \
|
||||
gm20b/therm_gm20b.o
|
||||
|
||||
nvgpu-$(CONFIG_TEGRA_GK20A) += gk20a/platform_gk20a_tegra.o
|
||||
nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o
|
||||
|
||||
@@ -78,6 +79,7 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
|
||||
vgpu/debug_vgpu.o \
|
||||
vgpu/vgpu.o \
|
||||
vgpu/dbg_vgpu.o \
|
||||
vgpu/fecs_trace_vgpu.o \
|
||||
vgpu/gk20a/vgpu_hal_gk20a.o \
|
||||
vgpu/gk20a/vgpu_gr_gk20a.o \
|
||||
vgpu/gm20b/vgpu_hal_gm20b.o \
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include "debug_gk20a.h"
|
||||
#include "ctxsw_trace_gk20a.h"
|
||||
|
||||
#include "gk20a.h"
|
||||
#include "dbg_gpu_gk20a.h"
|
||||
@@ -920,6 +921,9 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
|
||||
|
||||
gk20a_free_error_notifiers(ch);
|
||||
|
||||
if (g->ops.fecs_trace.unbind_channel)
|
||||
g->ops.fecs_trace.unbind_channel(g, ch);
|
||||
|
||||
/* release channel ctx */
|
||||
g->ops.gr.free_channel_ctx(ch);
|
||||
|
||||
|
||||
586
drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
Normal file
586
drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
Normal file
@@ -0,0 +1,586 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/circ_buf.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/nvgpu.h>
|
||||
#include <linux/hashtable.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/log2.h>
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
#include "ctxsw_trace_gk20a.h"
|
||||
#include "gk20a.h"
|
||||
#include "gr_gk20a.h"
|
||||
#include "hw_ctxsw_prog_gk20a.h"
|
||||
#include "hw_gr_gk20a.h"
|
||||
|
||||
#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
|
||||
|
||||
/* Userland-facing FIFO (one global + eventually one per VM) */
|
||||
struct gk20a_ctxsw_dev {
|
||||
struct gk20a *g;
|
||||
|
||||
struct nvgpu_ctxsw_ring_header *hdr;
|
||||
struct nvgpu_ctxsw_trace_entry *ents;
|
||||
struct nvgpu_ctxsw_trace_filter filter;
|
||||
bool write_enabled;
|
||||
wait_queue_head_t readout_wq;
|
||||
size_t size;
|
||||
|
||||
atomic_t vma_ref;
|
||||
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
|
||||
struct gk20a_ctxsw_trace {
|
||||
struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
|
||||
};
|
||||
|
||||
static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
|
||||
{
|
||||
return (hdr->write_idx == hdr->read_idx);
|
||||
}
|
||||
|
||||
static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
|
||||
{
|
||||
return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
|
||||
}
|
||||
|
||||
static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
|
||||
{
|
||||
return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
|
||||
}
|
||||
|
||||
static inline int ring_space(struct nvgpu_ctxsw_ring_header *hdr)
|
||||
{
|
||||
return (hdr->read_idx - hdr->write_idx - 1) % hdr->num_ents;
|
||||
}
|
||||
|
||||
ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
|
||||
loff_t *off)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
||||
struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
|
||||
struct nvgpu_ctxsw_trace_entry __user *entry =
|
||||
(struct nvgpu_ctxsw_trace_entry *) buf;
|
||||
size_t copied = 0;
|
||||
int err;
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
|
||||
"filp=%p buf=%p size=%zu", filp, buf, size);
|
||||
|
||||
mutex_lock(&dev->lock);
|
||||
while (ring_is_empty(hdr)) {
|
||||
mutex_unlock(&dev->lock);
|
||||
if (filp->f_flags & O_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
err = wait_event_interruptible(dev->readout_wq,
|
||||
!ring_is_empty(hdr));
|
||||
if (err)
|
||||
return err;
|
||||
mutex_lock(&dev->lock);
|
||||
}
|
||||
|
||||
while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
|
||||
if (ring_is_empty(hdr))
|
||||
break;
|
||||
|
||||
if (copy_to_user(entry, &dev->ents[hdr->read_idx],
|
||||
sizeof(*entry))) {
|
||||
mutex_unlock(&dev->lock);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
hdr->read_idx++;
|
||||
if (hdr->read_idx >= hdr->num_ents)
|
||||
hdr->read_idx = 0;
|
||||
|
||||
entry++;
|
||||
copied += sizeof(*entry);
|
||||
size -= sizeof(*entry);
|
||||
}
|
||||
|
||||
gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
|
||||
hdr->read_idx);
|
||||
|
||||
*off = hdr->read_idx;
|
||||
mutex_unlock(&dev->lock);
|
||||
|
||||
return copied;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
|
||||
{
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
|
||||
dev->write_enabled = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
|
||||
{
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
|
||||
dev->write_enabled = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ring_alloc(struct gk20a_ctxsw_dev *dev,
|
||||
size_t size)
|
||||
{
|
||||
struct nvgpu_ctxsw_ring_header *hdr;
|
||||
|
||||
if (atomic_read(&dev->vma_ref))
|
||||
return -EBUSY;
|
||||
|
||||
if ((dev->write_enabled) || (atomic_read(&dev->vma_ref)))
|
||||
return -EBUSY;
|
||||
|
||||
size = roundup(size, PAGE_SIZE);
|
||||
hdr = vmalloc_user(size);
|
||||
if (!hdr)
|
||||
return -ENOMEM;
|
||||
|
||||
if (dev->hdr)
|
||||
vfree(dev->hdr);
|
||||
|
||||
dev->hdr = hdr;
|
||||
dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
|
||||
dev->size = size;
|
||||
|
||||
hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
|
||||
hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
|
||||
hdr->num_ents = (size - sizeof(struct nvgpu_ctxsw_ring_header))
|
||||
/ sizeof(struct nvgpu_ctxsw_trace_entry);
|
||||
hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
|
||||
hdr->drop_count = 0;
|
||||
hdr->read_idx = 0;
|
||||
hdr->write_idx = 0;
|
||||
hdr->write_seqno = 0;
|
||||
|
||||
gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
|
||||
dev->size, dev->hdr, dev->ents, hdr->num_ents);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
|
||||
struct nvgpu_ctxsw_ring_setup_args *args)
|
||||
{
|
||||
size_t size = args->size;
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
|
||||
|
||||
if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
return gk20a_ctxsw_dev_ring_alloc(dev, size);
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
|
||||
struct nvgpu_ctxsw_trace_filter_args *args)
|
||||
{
|
||||
dev->filter = args->filter;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
|
||||
struct nvgpu_ctxsw_trace_filter_args *args)
|
||||
{
|
||||
args->filter = dev->filter;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
|
||||
{
|
||||
struct gk20a *g = dev->g;
|
||||
int err;
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
|
||||
|
||||
err = gk20a_busy(g->dev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (g->ops.fecs_trace.flush(g))
|
||||
err = g->ops.fecs_trace.flush(g);
|
||||
|
||||
if (likely(!err))
|
||||
err = g->ops.fecs_trace.poll(g);
|
||||
|
||||
gk20a_idle(g->dev);
|
||||
return err;
|
||||
}
|
||||
|
||||
int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct gk20a *g;
|
||||
struct gk20a_ctxsw_trace *trace;
|
||||
struct gk20a_ctxsw_dev *dev;
|
||||
int err;
|
||||
size_t size;
|
||||
u32 n;
|
||||
|
||||
/* only one VM for now */
|
||||
const int vmid = 0;
|
||||
|
||||
g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev);
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
err = gk20a_busy(g->dev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
trace = g->ctxsw_trace;
|
||||
if (!trace) {
|
||||
err = -ENODEV;
|
||||
goto idle;
|
||||
}
|
||||
|
||||
/* Allow only one user for this device */
|
||||
dev = &trace->devs[vmid];
|
||||
mutex_lock(&dev->lock);
|
||||
if (dev->hdr) {
|
||||
err = -EBUSY;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* By default, allocate ring buffer big enough to accommodate
|
||||
* FECS records with default event filter */
|
||||
|
||||
/* enable all traces by default */
|
||||
NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
|
||||
|
||||
/* compute max number of entries generated with this filter */
|
||||
n = g->ops.fecs_trace.max_entries(g, &dev->filter);
|
||||
|
||||
size = sizeof(struct nvgpu_ctxsw_ring_header) +
|
||||
n * sizeof(struct nvgpu_ctxsw_trace_entry);
|
||||
gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
|
||||
size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
|
||||
|
||||
err = gk20a_ctxsw_dev_ring_alloc(dev, size);
|
||||
if (!err) {
|
||||
filp->private_data = dev;
|
||||
gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
|
||||
filp, dev, size);
|
||||
}
|
||||
|
||||
err = g->ops.fecs_trace.enable(g);
|
||||
|
||||
done:
|
||||
mutex_unlock(&dev->lock);
|
||||
|
||||
idle:
|
||||
gk20a_idle(g->dev);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
||||
struct gk20a *g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev);
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
|
||||
|
||||
mutex_lock(&dev->lock);
|
||||
dev->write_enabled = false;
|
||||
if (dev->hdr) {
|
||||
vfree(dev->hdr);
|
||||
dev->hdr = NULL;
|
||||
}
|
||||
|
||||
g->ops.fecs_trace.disable(g);
|
||||
|
||||
mutex_unlock(&dev->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
||||
struct gk20a *g = dev->g;
|
||||
u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
|
||||
int err = 0;
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
|
||||
|
||||
if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
|
||||
|| (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST))
|
||||
return -EINVAL;
|
||||
|
||||
BUG_ON(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE);
|
||||
|
||||
memset(buf, 0, sizeof(buf));
|
||||
if (_IOC_DIR(cmd) & _IOC_WRITE) {
|
||||
if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
mutex_lock(&dev->lock);
|
||||
|
||||
switch (cmd) {
|
||||
case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
|
||||
err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
|
||||
break;
|
||||
case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
|
||||
err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
|
||||
break;
|
||||
case NVGPU_CTXSW_IOCTL_RING_SETUP:
|
||||
err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
|
||||
(struct nvgpu_ctxsw_ring_setup_args *) buf);
|
||||
break;
|
||||
case NVGPU_CTXSW_IOCTL_SET_FILTER:
|
||||
err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
|
||||
(struct nvgpu_ctxsw_trace_filter_args *) buf);
|
||||
break;
|
||||
case NVGPU_CTXSW_IOCTL_GET_FILTER:
|
||||
err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
|
||||
(struct nvgpu_ctxsw_trace_filter_args *) buf);
|
||||
break;
|
||||
case NVGPU_CTXSW_IOCTL_POLL:
|
||||
mutex_unlock(&dev->lock);
|
||||
err = gk20a_ctxsw_dev_ioctl_poll(dev);
|
||||
mutex_lock(&dev->lock);
|
||||
break;
|
||||
default:
|
||||
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
|
||||
cmd);
|
||||
err = -ENOTTY;
|
||||
}
|
||||
|
||||
mutex_unlock(&dev->lock);
|
||||
|
||||
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
|
||||
err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
||||
struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
|
||||
unsigned int mask = 0;
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
|
||||
|
||||
mutex_lock(&dev->lock);
|
||||
poll_wait(filp, &dev->readout_wq, wait);
|
||||
if (!ring_is_empty(hdr))
|
||||
mask |= POLLIN | POLLRDNORM;
|
||||
mutex_unlock(&dev->lock);
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
|
||||
|
||||
atomic_inc(&dev->vma_ref);
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
|
||||
atomic_read(&dev->vma_ref));
|
||||
}
|
||||
|
||||
static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
|
||||
|
||||
atomic_dec(&dev->vma_ref);
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
|
||||
atomic_read(&dev->vma_ref));
|
||||
}
|
||||
|
||||
static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
|
||||
.open = gk20a_ctxsw_dev_vma_open,
|
||||
.close = gk20a_ctxsw_dev_vma_close,
|
||||
};
|
||||
|
||||
int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
||||
int ret;
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
|
||||
vma->vm_start, vma->vm_end);
|
||||
|
||||
ret = remap_vmalloc_range(vma, dev->hdr, 0);
|
||||
if (likely(!ret)) {
|
||||
vma->vm_private_data = dev;
|
||||
vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
|
||||
vma->vm_ops->open(vma);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
static int gk20a_ctxsw_init_devs(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
|
||||
struct gk20a_ctxsw_dev *dev = trace->devs;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
|
||||
dev->g = g;
|
||||
dev->hdr = NULL;
|
||||
dev->write_enabled = false;
|
||||
init_waitqueue_head(&dev->readout_wq);
|
||||
mutex_init(&dev->lock);
|
||||
atomic_set(&dev->vma_ref, 0);
|
||||
dev++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int gk20a_ctxsw_trace_init(struct gk20a *g)
|
||||
{
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
|
||||
int err;
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
|
||||
|
||||
if (likely(trace))
|
||||
return 0;
|
||||
|
||||
trace = kzalloc(sizeof(*trace), GFP_KERNEL);
|
||||
if (unlikely(!trace))
|
||||
return -ENOMEM;
|
||||
g->ctxsw_trace = trace;
|
||||
|
||||
err = gk20a_ctxsw_init_devs(g);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = g->ops.fecs_trace.init(g);
|
||||
if (unlikely(err))
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
kfree(trace);
|
||||
g->ctxsw_trace = NULL;
|
||||
return err;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
|
||||
{
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
kfree(g->ctxsw_trace);
|
||||
g->ctxsw_trace = NULL;
|
||||
|
||||
g->ops.fecs_trace.deinit(g);
|
||||
#endif
|
||||
}
|
||||
|
||||
int gk20a_ctxsw_trace_write(struct gk20a *g,
|
||||
struct nvgpu_ctxsw_trace_entry *entry)
|
||||
{
|
||||
struct nvgpu_ctxsw_ring_header *hdr;
|
||||
struct gk20a_ctxsw_dev *dev;
|
||||
int ret = 0;
|
||||
const char *reason;
|
||||
|
||||
if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
|
||||
return -ENODEV;
|
||||
|
||||
dev = &g->ctxsw_trace->devs[entry->vmid];
|
||||
hdr = dev->hdr;
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
|
||||
"dev=%p hdr=%p", dev, hdr);
|
||||
|
||||
mutex_lock(&dev->lock);
|
||||
|
||||
if (unlikely(!hdr)) {
|
||||
/* device has been released */
|
||||
ret = -ENODEV;
|
||||
goto done;
|
||||
}
|
||||
|
||||
entry->seqno = hdr->write_seqno++;
|
||||
|
||||
if (!dev->write_enabled) {
|
||||
ret = -EBUSY;
|
||||
reason = "write disabled";
|
||||
goto drop;
|
||||
}
|
||||
|
||||
if (unlikely(ring_is_full(hdr))) {
|
||||
ret = -ENOSPC;
|
||||
reason = "user fifo full";
|
||||
goto drop;
|
||||
}
|
||||
|
||||
if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
|
||||
reason = "filtered out";
|
||||
goto filter;
|
||||
}
|
||||
|
||||
gk20a_dbg(gpu_dbg_ctxsw,
|
||||
"seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
|
||||
entry->seqno, entry->context_id, entry->pid,
|
||||
entry->tag, entry->timestamp);
|
||||
|
||||
dev->ents[hdr->write_idx] = *entry;
|
||||
|
||||
/* ensure record is written before updating write index */
|
||||
smp_wmb();
|
||||
|
||||
hdr->write_idx++;
|
||||
if (unlikely(hdr->write_idx >= hdr->num_ents))
|
||||
hdr->write_idx = 0;
|
||||
gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
|
||||
hdr->read_idx, hdr->write_idx, ring_len(hdr));
|
||||
|
||||
mutex_unlock(&dev->lock);
|
||||
return ret;
|
||||
|
||||
drop:
|
||||
hdr->drop_count++;
|
||||
|
||||
filter:
|
||||
gk20a_dbg(gpu_dbg_ctxsw,
|
||||
"dropping seqno=%d context_id=%08x pid=%lld "
|
||||
"tag=%x time=%llx (%s)",
|
||||
entry->seqno, entry->context_id, entry->pid,
|
||||
entry->tag, entry->timestamp, reason);
|
||||
|
||||
done:
|
||||
mutex_unlock(&dev->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[vmid];
|
||||
|
||||
wake_up_interruptible(&dev->readout_wq);
|
||||
}
|
||||
41
drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
Normal file
41
drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#ifndef __CTXSW_TRACE_GK20A_H
|
||||
#define __CTXSW_TRACE_GK20A_H
|
||||
|
||||
#define GK20A_CTXSW_TRACE_NUM_DEVS 1
|
||||
|
||||
struct gk20a;
|
||||
struct nvgpu_ctxsw_trace_entry;
|
||||
struct channel_gk20a;
|
||||
struct channel_ctx_gk20a;
|
||||
struct gk20a_ctxsw_dev;
|
||||
struct gk20a_fecs_trace;
|
||||
|
||||
|
||||
int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
|
||||
int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
|
||||
long gk20a_ctxsw_dev_ioctl(struct file *filp,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
ssize_t gk20a_ctxsw_dev_read(struct file *, char __user *, size_t, loff_t *);
|
||||
unsigned int gk20a_ctxsw_dev_poll(struct file *, struct poll_table_struct *);
|
||||
int gk20a_ctxsw_dev_mmap(struct file *, struct vm_area_struct *);
|
||||
|
||||
int gk20a_ctxsw_trace_init(struct gk20a *);
|
||||
int gk20a_ctxsw_trace_setup(struct gk20a *, void *ctx_ptr);
|
||||
void gk20a_ctxsw_trace_cleanup(struct gk20a *);
|
||||
int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *);
|
||||
void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid);
|
||||
|
||||
#endif /* __CTXSW_TRACE_GK20A_H */
|
||||
763
drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
Normal file
763
drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
Normal file
@@ -0,0 +1,763 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/circ_buf.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/nvgpu.h>
|
||||
#include <linux/hashtable.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/log2.h>
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
#include "ctxsw_trace_gk20a.h"
|
||||
#include "fecs_trace_gk20a.h"
|
||||
#include "gk20a.h"
|
||||
#include "gr_gk20a.h"
|
||||
#include "hw_ctxsw_prog_gk20a.h"
|
||||
#include "hw_gr_gk20a.h"
|
||||
|
||||
/*
|
||||
* If HW circular buffer is getting too many "buffer full" conditions,
|
||||
* increasing this constant should help (it drives Linux' internal buffer size).
|
||||
*/
|
||||
#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6)
|
||||
#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */
|
||||
#define GK20A_FECS_TRACE_FRAME_PERIOD_NS (1000000000ULL/60ULL)
|
||||
#define GK20A_FECS_TRACE_PTIMER_SHIFT 5
|
||||
|
||||
struct gk20a_fecs_trace_record {
|
||||
u32 magic_lo;
|
||||
u32 magic_hi;
|
||||
u32 context_id;
|
||||
u32 context_ptr;
|
||||
u32 new_context_id;
|
||||
u32 new_context_ptr;
|
||||
u64 ts[];
|
||||
};
|
||||
|
||||
struct gk20a_fecs_trace_hash_ent {
|
||||
u32 context_ptr;
|
||||
pid_t pid;
|
||||
struct hlist_node node;
|
||||
};
|
||||
|
||||
struct gk20a_fecs_trace {
|
||||
|
||||
struct mem_desc trace_buf;
|
||||
DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
|
||||
struct mutex hash_lock;
|
||||
struct mutex poll_lock;
|
||||
u64 sof;
|
||||
u32 sof_mask; /* did we already send a SOF for this VM */
|
||||
|
||||
struct task_struct *poll_task;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
static inline u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts)
|
||||
{
|
||||
return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32));
|
||||
}
|
||||
|
||||
static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
|
||||
{
|
||||
return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32);
|
||||
}
|
||||
|
||||
|
||||
static u32 gk20a_fecs_trace_fecs_context_ptr(struct channel_gk20a *ch)
|
||||
{
|
||||
return (u32) (sg_phys(ch->inst_block.sgt->sgl) >> 12LL);
|
||||
}
|
||||
|
||||
static inline int gk20a_fecs_trace_num_ts(void)
|
||||
{
|
||||
return (ctxsw_prog_record_timestamp_record_size_in_bytes_v()
|
||||
- sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64);
|
||||
}
|
||||
|
||||
struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
|
||||
struct gk20a_fecs_trace *trace, int idx)
|
||||
{
|
||||
return (struct gk20a_fecs_trace_record *)
|
||||
((u8 *) trace->trace_buf.cpu_va
|
||||
+ (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
|
||||
}
|
||||
|
||||
static bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r)
|
||||
{
|
||||
/*
|
||||
* testing magic_hi should suffice. magic_lo is sometimes used
|
||||
* as a sequence number in experimental ucode.
|
||||
*/
|
||||
return (r->magic_hi
|
||||
== ctxsw_prog_record_timestamp_magic_value_hi_v_value_v());
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_get_read_index(struct gk20a *g)
|
||||
{
|
||||
return gr_gk20a_elpg_protected_call(g,
|
||||
gk20a_readl(g, gr_fecs_mailbox1_r()));
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_get_write_index(struct gk20a *g)
|
||||
{
|
||||
return gr_gk20a_elpg_protected_call(g,
|
||||
gk20a_readl(g, gr_fecs_mailbox0_r()));
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index)
|
||||
{
|
||||
gk20a_dbg(gpu_dbg_ctxsw, "set read=%d", index);
|
||||
return gr_gk20a_elpg_protected_call(g,
|
||||
(gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0));
|
||||
}
|
||||
|
||||
void gk20a_fecs_trace_hash_dump(struct gk20a *g)
|
||||
{
|
||||
u32 bkt;
|
||||
struct gk20a_fecs_trace_hash_ent *ent;
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
gk20a_dbg(gpu_dbg_ctxsw, "dumping hash table");
|
||||
|
||||
mutex_lock(&trace->hash_lock);
|
||||
hash_for_each(trace->pid_hash_table, bkt, ent, node)
|
||||
{
|
||||
gk20a_dbg(gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
|
||||
ent, bkt, ent->context_ptr, ent->pid);
|
||||
|
||||
}
|
||||
mutex_unlock(&trace->hash_lock);
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
|
||||
{
|
||||
struct gk20a_fecs_trace_hash_ent *he;
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
|
||||
"adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid);
|
||||
|
||||
he = kzalloc(sizeof(*he), GFP_KERNEL);
|
||||
if (unlikely(!he)) {
|
||||
gk20a_warn(dev_from_gk20a(g),
|
||||
"can't alloc new hash entry for context_ptr=%x pid=%d",
|
||||
context_ptr, pid);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
he->context_ptr = context_ptr;
|
||||
he->pid = pid;
|
||||
mutex_lock(&trace->hash_lock);
|
||||
hash_add(trace->pid_hash_table, &he->node, context_ptr);
|
||||
mutex_unlock(&trace->hash_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
|
||||
{
|
||||
struct hlist_node *tmp;
|
||||
struct gk20a_fecs_trace_hash_ent *ent;
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
|
||||
"freeing hash entry context_ptr=%x", context_ptr);
|
||||
|
||||
mutex_lock(&trace->hash_lock);
|
||||
hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
|
||||
context_ptr) {
|
||||
if (ent->context_ptr == context_ptr) {
|
||||
hash_del(&ent->node);
|
||||
gk20a_dbg(gpu_dbg_ctxsw,
|
||||
"freed hash entry=%p context_ptr=%x", ent,
|
||||
ent->context_ptr);
|
||||
kfree(ent);
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&trace->hash_lock);
|
||||
}
|
||||
|
||||
static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
|
||||
{
|
||||
u32 bkt;
|
||||
struct hlist_node *tmp;
|
||||
struct gk20a_fecs_trace_hash_ent *ent;
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
|
||||
|
||||
mutex_lock(&trace->hash_lock);
|
||||
hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
|
||||
hash_del(&ent->node);
|
||||
kfree(ent);
|
||||
}
|
||||
mutex_unlock(&trace->hash_lock);
|
||||
|
||||
}
|
||||
|
||||
static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
|
||||
{
|
||||
struct gk20a_fecs_trace_hash_ent *ent;
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
pid_t pid = 0;
|
||||
|
||||
mutex_lock(&trace->hash_lock);
|
||||
hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
|
||||
if (ent->context_ptr == context_ptr) {
|
||||
gk20a_dbg(gpu_dbg_ctxsw,
|
||||
"found context_ptr=%x -> pid=%d",
|
||||
ent->context_ptr, ent->pid);
|
||||
pid = ent->pid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&trace->hash_lock);
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Converts HW entry format to userspace-facing format and pushes it to the
|
||||
* queue.
|
||||
*/
|
||||
static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
|
||||
{
|
||||
int i;
|
||||
struct nvgpu_ctxsw_trace_entry entry = { };
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
pid_t cur_pid;
|
||||
pid_t new_pid;
|
||||
|
||||
/* for now, only one VM */
|
||||
const int vmid = 0;
|
||||
|
||||
struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(
|
||||
trace, index);
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
|
||||
"consuming record trace=%p read=%d record=%p", trace, index, r);
|
||||
|
||||
if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) {
|
||||
gk20a_warn(dev_from_gk20a(g),
|
||||
"trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
|
||||
trace, index, r, r->magic_lo, r->magic_hi);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr);
|
||||
new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr);
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
|
||||
"context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)",
|
||||
r->context_ptr, cur_pid, r->new_context_ptr, new_pid);
|
||||
|
||||
entry.context_id = r->context_id;
|
||||
entry.vmid = vmid;
|
||||
|
||||
/* insert SOF event if needed */
|
||||
if (!(trace->sof_mask & BIT(vmid))) {
|
||||
entry.tag = NVGPU_CTXSW_TAG_SOF;
|
||||
entry.timestamp = trace->sof;
|
||||
entry.context_id = 0;
|
||||
entry.pid = 0;
|
||||
|
||||
gk20a_dbg(gpu_dbg_ctxsw, "SOF time=%llx", entry.timestamp);
|
||||
gk20a_ctxsw_trace_write(g, &entry);
|
||||
trace->sof_mask |= BIT(vmid);
|
||||
}
|
||||
|
||||
/* break out FECS record into trace events */
|
||||
for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
|
||||
|
||||
entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
|
||||
entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
|
||||
entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
|
||||
|
||||
gk20a_dbg(gpu_dbg_ctxsw,
|
||||
"tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
|
||||
entry.tag, entry.timestamp, r->context_id,
|
||||
r->new_context_id);
|
||||
|
||||
switch (entry.tag) {
|
||||
case NVGPU_CTXSW_TAG_RESTORE_START:
|
||||
case NVGPU_CTXSW_TAG_CONTEXT_START:
|
||||
entry.context_id = r->new_context_id;
|
||||
entry.pid = new_pid;
|
||||
break;
|
||||
|
||||
case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
|
||||
case NVGPU_CTXSW_TAG_FE_ACK:
|
||||
case NVGPU_CTXSW_TAG_FE_ACK_WFI:
|
||||
case NVGPU_CTXSW_TAG_FE_ACK_GFXP:
|
||||
case NVGPU_CTXSW_TAG_FE_ACK_CTAP:
|
||||
case NVGPU_CTXSW_TAG_FE_ACK_CILP:
|
||||
case NVGPU_CTXSW_TAG_SAVE_END:
|
||||
entry.context_id = r->context_id;
|
||||
entry.pid = cur_pid;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* tags are not guaranteed to start at the beginning */
|
||||
WARN_ON(entry.tag && (entry.tag != NVGPU_CTXSW_TAG_INVALID_TIMESTAMP));
|
||||
continue;
|
||||
}
|
||||
|
||||
gk20a_dbg(gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
|
||||
entry.tag, entry.context_id, entry.pid);
|
||||
|
||||
if (!entry.context_id)
|
||||
continue;
|
||||
|
||||
gk20a_ctxsw_trace_write(g, &entry);
|
||||
}
|
||||
|
||||
gk20a_ctxsw_trace_wake_up(g, vmid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_poll(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
int read = 0;
|
||||
int write = 0;
|
||||
int cnt;
|
||||
int err;
|
||||
|
||||
err = gk20a_busy(g->dev);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
mutex_lock(&trace->poll_lock);
|
||||
write = gk20a_fecs_trace_get_write_index(g);
|
||||
if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) {
|
||||
gk20a_err(dev_from_gk20a(g),
|
||||
"failed to acquire write index, write=%d", write);
|
||||
err = write;
|
||||
goto done;
|
||||
}
|
||||
|
||||
read = gk20a_fecs_trace_get_read_index(g);
|
||||
|
||||
cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
|
||||
if (!cnt)
|
||||
goto done;
|
||||
|
||||
gk20a_dbg(gpu_dbg_ctxsw,
|
||||
"circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
|
||||
read, gk20a_fecs_trace_get_read_index(g), write, cnt);
|
||||
|
||||
/* we did not send any SOF yet */
|
||||
trace->sof_mask = 0;
|
||||
|
||||
/* consume all records */
|
||||
while (read != write) {
|
||||
gk20a_fecs_trace_ring_read(g, read);
|
||||
|
||||
/* Get to next record. */
|
||||
read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
|
||||
gk20a_fecs_trace_set_read_index(g, read);
|
||||
}
|
||||
|
||||
done:
|
||||
/*
|
||||
* OK, we read out all the entries... a new "frame" starts here.
|
||||
* We remember the Start Of Frame time and insert it on the next
|
||||
* iteration.
|
||||
*/
|
||||
trace->sof = gk20a_read_ptimer(g);
|
||||
|
||||
mutex_unlock(&trace->poll_lock);
|
||||
gk20a_idle(g->dev);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_periodic_polling(void *arg)
|
||||
{
|
||||
struct gk20a *g = (struct gk20a *)arg;
|
||||
struct timespec ts = ns_to_timespec(GK20A_FECS_TRACE_FRAME_PERIOD_NS);
|
||||
|
||||
pr_info("%s: running\n", __func__);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
|
||||
hrtimer_nanosleep(&ts, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
|
||||
|
||||
gk20a_fecs_trace_poll(g);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_alloc_ring(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
return gk20a_gmmu_alloc(g, GK20A_FECS_TRACE_NUM_RECORDS
|
||||
* ctxsw_prog_record_timestamp_record_size_in_bytes_v(),
|
||||
&trace->trace_buf);
|
||||
}
|
||||
|
||||
static void gk20a_fecs_trace_free_ring(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
gk20a_gmmu_free(g, &trace->trace_buf);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
/*
|
||||
* The sequence iterator functions. We simply use the count of the
|
||||
* next line as our internal position.
|
||||
*/
|
||||
static void *gk20a_fecs_trace_debugfs_ring_seq_start(
|
||||
struct seq_file *s, loff_t *pos)
|
||||
{
|
||||
if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS)
|
||||
return NULL;
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
static void *gk20a_fecs_trace_debugfs_ring_seq_next(
|
||||
struct seq_file *s, void *v, loff_t *pos)
|
||||
{
|
||||
++(*pos);
|
||||
if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS)
|
||||
return NULL;
|
||||
return pos;
|
||||
}
|
||||
|
||||
static void gk20a_fecs_trace_debugfs_ring_seq_stop(
|
||||
struct seq_file *s, void *v)
|
||||
{
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_debugfs_ring_seq_show(
|
||||
struct seq_file *s, void *v)
|
||||
{
|
||||
loff_t *pos = (loff_t *) v;
|
||||
struct gk20a *g = *(struct gk20a **)s->private;
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos);
|
||||
int i;
|
||||
const u32 invalid_tag =
|
||||
ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
|
||||
u32 tag;
|
||||
u64 timestamp;
|
||||
|
||||
seq_printf(s, "record #%lld (%p)\n", *pos, r);
|
||||
seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo);
|
||||
seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi);
|
||||
if (gk20a_fecs_trace_is_valid_record(r)) {
|
||||
seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr);
|
||||
seq_printf(s, "\tcontext_id=%08x\n", r->context_id);
|
||||
seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr);
|
||||
seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id);
|
||||
for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
|
||||
tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
|
||||
if (tag == invalid_tag)
|
||||
continue;
|
||||
timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
|
||||
timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
|
||||
seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Tie them all together into a set of seq_operations.
|
||||
*/
|
||||
const struct seq_operations gk20a_fecs_trace_debugfs_ring_seq_ops = {
|
||||
.start = gk20a_fecs_trace_debugfs_ring_seq_start,
|
||||
.next = gk20a_fecs_trace_debugfs_ring_seq_next,
|
||||
.stop = gk20a_fecs_trace_debugfs_ring_seq_stop,
|
||||
.show = gk20a_fecs_trace_debugfs_ring_seq_show
|
||||
};
|
||||
|
||||
/*
|
||||
* Time to set up the file operations for our /proc file. In this case,
|
||||
* all we need is an open function which sets up the sequence ops.
|
||||
*/
|
||||
|
||||
static int gk20a_ctxsw_debugfs_ring_open(struct inode *inode,
|
||||
struct file *file)
|
||||
{
|
||||
struct gk20a **p;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
p = __seq_open_private(file, &gk20a_fecs_trace_debugfs_ring_seq_ops,
|
||||
sizeof(struct gk20a *));
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
*p = (struct gk20a *)inode->i_private;
|
||||
return 0;
|
||||
};
|
||||
|
||||
/*
|
||||
* The file operations structure contains our open function along with
|
||||
* set of the canned seq_ ops.
|
||||
*/
|
||||
const struct file_operations gk20a_fecs_trace_debugfs_ring_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = gk20a_ctxsw_debugfs_ring_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release_private
|
||||
};
|
||||
|
||||
static int gk20a_fecs_trace_debugfs_read(void *arg, u64 *val)
|
||||
{
|
||||
*val = gk20a_fecs_trace_get_read_index((struct gk20a *)arg);
|
||||
return 0;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_read_fops,
|
||||
gk20a_fecs_trace_debugfs_read, NULL, "%llu\n");
|
||||
|
||||
static int gk20a_fecs_trace_debugfs_write(void *arg, u64 *val)
|
||||
{
|
||||
*val = gk20a_fecs_trace_get_write_index((struct gk20a *)arg);
|
||||
return 0;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_write_fops,
|
||||
gk20a_fecs_trace_debugfs_write, NULL, "%llu\n");
|
||||
|
||||
static void gk20a_fecs_trace_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_platform *plat = platform_get_drvdata(g->dev);
|
||||
|
||||
debugfs_create_file("ctxsw_trace_read", 0600, plat->debugfs, g,
|
||||
&gk20a_fecs_trace_debugfs_read_fops);
|
||||
debugfs_create_file("ctxsw_trace_write", 0600, plat->debugfs, g,
|
||||
&gk20a_fecs_trace_debugfs_write_fops);
|
||||
debugfs_create_file("ctxsw_trace_ring", 0600, plat->debugfs, g,
|
||||
&gk20a_fecs_trace_debugfs_ring_fops);
|
||||
}
|
||||
|
||||
static void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_platform *plat = platform_get_drvdata(g->dev);
|
||||
|
||||
debugfs_remove_recursive(plat->debugfs);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void gk20a_fecs_trace_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
static int gk20a_fecs_trace_init(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_fecs_trace *trace;
|
||||
int err;
|
||||
|
||||
trace = kzalloc(sizeof(struct gk20a_fecs_trace), GFP_KERNEL);
|
||||
if (!trace) {
|
||||
gk20a_warn(dev_from_gk20a(g), "failed to allocate fecs_trace");
|
||||
return -ENOMEM;
|
||||
}
|
||||
g->fecs_trace = trace;
|
||||
|
||||
BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
|
||||
err = gk20a_fecs_trace_alloc_ring(g);
|
||||
if (err) {
|
||||
gk20a_warn(dev_from_gk20a(g), "failed to allocate FECS ring");
|
||||
goto clean;
|
||||
}
|
||||
|
||||
mutex_init(&trace->poll_lock);
|
||||
mutex_init(&trace->hash_lock);
|
||||
hash_init(trace->pid_hash_table);
|
||||
|
||||
gk20a_fecs_trace_debugfs_init(g);
|
||||
return 0;
|
||||
|
||||
clean:
|
||||
kfree(trace);
|
||||
g->fecs_trace = NULL;
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
|
||||
struct channel_gk20a *ch)
|
||||
{
|
||||
/*
|
||||
* map our circ_buf to the context space and store the GPU VA
|
||||
* in the context header.
|
||||
*/
|
||||
|
||||
u32 lo;
|
||||
u32 hi;
|
||||
phys_addr_t pa;
|
||||
struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
void *ctx_ptr;
|
||||
u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch);
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
|
||||
"hw_chid=%d context_ptr=%x inst_block=%llx",
|
||||
ch->hw_chid, context_ptr, gk20a_mem_phys(&ch->inst_block));
|
||||
|
||||
if (!trace)
|
||||
return -ENOMEM;
|
||||
|
||||
pa = gk20a_mem_phys(&trace->trace_buf);
|
||||
if (!pa)
|
||||
return -ENOMEM;
|
||||
|
||||
ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
|
||||
PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, 0,
|
||||
pgprot_writecombine(PAGE_KERNEL));
|
||||
if (!ctx_ptr)
|
||||
return -ENOMEM;
|
||||
|
||||
lo = u64_lo32(pa);
|
||||
hi = u64_hi32(pa);
|
||||
|
||||
gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
|
||||
lo, GK20A_FECS_TRACE_NUM_RECORDS);
|
||||
|
||||
gk20a_mem_wr32(ctx_ptr
|
||||
+ ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
|
||||
0, lo);
|
||||
gk20a_mem_wr32(ctx_ptr
|
||||
+ ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
|
||||
0, ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi));
|
||||
gk20a_mem_wr32(ctx_ptr
|
||||
+ ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
|
||||
0, ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
|
||||
GK20A_FECS_TRACE_NUM_RECORDS));
|
||||
|
||||
vunmap(ctx_ptr);
|
||||
gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
|
||||
{
|
||||
u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch);
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
|
||||
"ch=%p context_ptr=%x", ch, context_ptr);
|
||||
|
||||
if (g->ops.fecs_trace.flush)
|
||||
g->ops.fecs_trace.flush(g);
|
||||
gk20a_fecs_trace_poll(g);
|
||||
gk20a_fecs_trace_hash_del(g, context_ptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_reset(struct gk20a *g)
|
||||
{
|
||||
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
|
||||
|
||||
if (g->ops.fecs_trace.flush)
|
||||
g->ops.fecs_trace.flush(g);
|
||||
gk20a_fecs_trace_poll(g);
|
||||
return gk20a_fecs_trace_set_read_index(g, 0);
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_deinit(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
gk20a_fecs_trace_debugfs_cleanup(g);
|
||||
kthread_stop(trace->poll_task);
|
||||
gk20a_fecs_trace_free_ring(g);
|
||||
gk20a_fecs_trace_free_hash_table(g);
|
||||
|
||||
kfree(g->fecs_trace);
|
||||
g->fecs_trace = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_gr_max_entries(struct gk20a *g,
|
||||
struct nvgpu_ctxsw_trace_filter *filter)
|
||||
{
|
||||
int n;
|
||||
int tag;
|
||||
|
||||
/* Compute number of entries per record, with given filter */
|
||||
for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++)
|
||||
n += (NVGPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
|
||||
|
||||
/* Return max number of entries generated for the whole ring */
|
||||
return n * GK20A_FECS_TRACE_NUM_RECORDS;
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_enable(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
struct task_struct *task;
|
||||
|
||||
if (!trace->poll_task) {
|
||||
task = kthread_run(gk20a_fecs_trace_periodic_polling, g, __func__);
|
||||
if (unlikely(IS_ERR(task))) {
|
||||
gk20a_warn(dev_from_gk20a(g), "failed to create FECS polling task");
|
||||
return PTR_ERR(task);
|
||||
}
|
||||
trace->poll_task = task;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_disable(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
if (trace->poll_task) {
|
||||
kthread_stop(trace->poll_task);
|
||||
trace->poll_task = NULL;
|
||||
}
|
||||
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
void gk20a_init_fecs_trace_ops(struct gpu_ops *ops)
|
||||
{
|
||||
ops->fecs_trace.init = gk20a_fecs_trace_init;
|
||||
ops->fecs_trace.deinit = gk20a_fecs_trace_deinit;
|
||||
ops->fecs_trace.enable = gk20a_fecs_trace_enable;
|
||||
ops->fecs_trace.disable = gk20a_fecs_trace_disable;
|
||||
ops->fecs_trace.reset = gk20a_fecs_trace_reset;
|
||||
ops->fecs_trace.flush = NULL;
|
||||
ops->fecs_trace.poll = gk20a_fecs_trace_poll;
|
||||
ops->fecs_trace.bind_channel = gk20a_fecs_trace_bind_channel;
|
||||
ops->fecs_trace.unbind_channel = gk20a_fecs_trace_unbind_channel;
|
||||
ops->fecs_trace.max_entries = gk20a_gr_max_entries;
|
||||
}
|
||||
#else
|
||||
void gk20a_init_fecs_trace_ops(struct gpu_ops *ops)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_GK20A_CTXSW_TRACE */
|
||||
20
drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
Normal file
20
drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
Normal file
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#ifndef __FECS_TRACE_GK20A_H
|
||||
#define __FECS_TRACE_GK20A_H
|
||||
|
||||
struct gpu_ops;
|
||||
void gk20a_init_fecs_trace_ops(struct gpu_ops *ops);
|
||||
|
||||
#endif /* __FECS_TRACE_GK20A_H */
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
#include "gk20a.h"
|
||||
#include "debug_gk20a.h"
|
||||
#include "ctxsw_trace_gk20a.h"
|
||||
#include "semaphore_gk20a.h"
|
||||
#include "hw_fifo_gk20a.h"
|
||||
#include "hw_pbdma_gk20a.h"
|
||||
@@ -778,8 +779,12 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
|
||||
gk20a_pmu_disable_elpg(g);
|
||||
/*HALT_PIPELINE method, halt GR engine*/
|
||||
if (gr_gk20a_halt_pipe(g))
|
||||
gk20a_err(dev_from_gk20a(g),
|
||||
"failed to HALT gr pipe");
|
||||
gk20a_err(dev_from_gk20a(g), "failed to HALT gr pipe");
|
||||
/* resetting engine will alter read/write index.
|
||||
* need to flush circular buffer before re-enabling FECS.
|
||||
*/
|
||||
if (g->ops.fecs_trace.reset)
|
||||
g->ops.fecs_trace.reset(g);
|
||||
/* resetting engine using mc_enable_r() is not
|
||||
enough, we do full init sequence */
|
||||
gk20a_gr_reset(g);
|
||||
|
||||
@@ -60,6 +60,7 @@
|
||||
#include "hw_gr_gk20a.h"
|
||||
#include "hw_fb_gk20a.h"
|
||||
#include "gk20a_scale.h"
|
||||
#include "ctxsw_trace_gk20a.h"
|
||||
#include "dbg_gpu_gk20a.h"
|
||||
#include "gk20a_allocator.h"
|
||||
#include "hal.h"
|
||||
@@ -80,7 +81,7 @@
|
||||
/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
|
||||
#define INTERFACE_NAME "nvhost%s-gpu"
|
||||
|
||||
#define GK20A_NUM_CDEVS 6
|
||||
#define GK20A_NUM_CDEVS 7
|
||||
|
||||
#define EMC3D_DEFAULT_RATIO 750
|
||||
|
||||
@@ -169,6 +170,19 @@ static const struct file_operations gk20a_tsg_ops = {
|
||||
.unlocked_ioctl = gk20a_tsg_dev_ioctl,
|
||||
};
|
||||
|
||||
static const struct file_operations gk20a_ctxsw_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = gk20a_ctxsw_dev_release,
|
||||
.open = gk20a_ctxsw_dev_open,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = gk20a_ctxsw_dev_ioctl,
|
||||
#endif
|
||||
.unlocked_ioctl = gk20a_ctxsw_dev_ioctl,
|
||||
.poll = gk20a_ctxsw_dev_poll,
|
||||
.read = gk20a_ctxsw_dev_read,
|
||||
.mmap = gk20a_ctxsw_dev_mmap,
|
||||
};
|
||||
|
||||
static inline void sim_writel(struct gk20a *g, u32 r, u32 v)
|
||||
{
|
||||
writel(v, g->sim.regs+r);
|
||||
@@ -881,6 +895,10 @@ static int gk20a_pm_finalize_poweron(struct device *dev)
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = gk20a_ctxsw_trace_init(g);
|
||||
if (err)
|
||||
gk20a_warn(dev, "could not initialize ctxsw tracing");
|
||||
|
||||
/* Restore the debug setting */
|
||||
g->ops.mm.set_debug_mode(g, g->mmu_debug_ctrl);
|
||||
|
||||
@@ -1009,6 +1027,11 @@ void gk20a_user_deinit(struct platform_device *dev)
|
||||
cdev_del(&g->tsg.cdev);
|
||||
}
|
||||
|
||||
if (g->ctxsw.node) {
|
||||
device_destroy(g->class, g->ctxsw.cdev.dev);
|
||||
cdev_del(&g->ctxsw.cdev);
|
||||
}
|
||||
|
||||
if (g->cdev_region)
|
||||
unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS);
|
||||
|
||||
@@ -1074,6 +1097,15 @@ int gk20a_user_init(struct platform_device *dev)
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
err = gk20a_create_device(dev, devno++, "-ctxsw",
|
||||
&g->ctxsw.cdev, &g->ctxsw.node,
|
||||
&gk20a_ctxsw_ops);
|
||||
if (err)
|
||||
goto fail;
|
||||
#endif
|
||||
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
gk20a_user_deinit(dev);
|
||||
@@ -1554,6 +1586,8 @@ static int __exit gk20a_remove(struct platform_device *dev)
|
||||
if (platform->has_cde)
|
||||
gk20a_cde_destroy(g);
|
||||
|
||||
gk20a_ctxsw_trace_cleanup(g);
|
||||
|
||||
if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
|
||||
gk20a_scale_exit(dev);
|
||||
|
||||
@@ -2091,6 +2125,19 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name)
|
||||
return fw;
|
||||
}
|
||||
|
||||
|
||||
u64 gk20a_read_ptimer(struct gk20a *g)
|
||||
{
|
||||
u32 time_hi0 = gk20a_readl(g, timer_time_1_r());
|
||||
u32 time_lo = gk20a_readl(g, timer_time_0_r());
|
||||
u32 time_hi1 = gk20a_readl(g, timer_time_1_r());
|
||||
u32 time_hi = (time_lo & (1L << 31)) ? time_hi0 : time_hi1;
|
||||
u64 time = ((u64)time_hi << 32) | time_lo;
|
||||
|
||||
return time;
|
||||
}
|
||||
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
module_init(gk20a_init);
|
||||
module_exit(gk20a_exit);
|
||||
|
||||
@@ -25,6 +25,8 @@ struct channel_gk20a;
|
||||
struct gr_gk20a;
|
||||
struct sim_gk20a;
|
||||
struct gk20a_ctxsw_ucode_segments;
|
||||
struct gk20a_fecs_trace;
|
||||
struct gk20a_ctxsw_trace;
|
||||
struct acr_gm20b;
|
||||
|
||||
#include <linux/sched.h>
|
||||
@@ -372,6 +374,19 @@ struct gpu_ops {
|
||||
bool (*is_fw_defined)(void);
|
||||
bool use_dma_for_fw_bootstrap;
|
||||
} gr_ctx;
|
||||
struct {
|
||||
int (*init)(struct gk20a *g);
|
||||
int (*max_entries)(struct gk20a *,
|
||||
struct nvgpu_ctxsw_trace_filter *);
|
||||
int (*flush)(struct gk20a *g);
|
||||
int (*poll)(struct gk20a *g);
|
||||
int (*enable)(struct gk20a *g);
|
||||
int (*disable)(struct gk20a *g);
|
||||
int (*reset)(struct gk20a *g);
|
||||
int (*bind_channel)(struct gk20a *, struct channel_gk20a *);
|
||||
int (*unbind_channel)(struct gk20a *, struct channel_gk20a *);
|
||||
int (*deinit)(struct gk20a *g);
|
||||
} fecs_trace;
|
||||
struct {
|
||||
bool (*support_sparse)(struct gk20a *g);
|
||||
bool (*is_debug_mode_enabled)(struct gk20a *g);
|
||||
@@ -613,6 +628,11 @@ struct gk20a {
|
||||
struct device *node;
|
||||
} tsg;
|
||||
|
||||
struct {
|
||||
struct cdev cdev;
|
||||
struct device *node;
|
||||
} ctxsw;
|
||||
|
||||
struct mutex client_lock;
|
||||
int client_refcount; /* open channels and ctrl nodes */
|
||||
|
||||
@@ -639,6 +659,9 @@ struct gk20a {
|
||||
|
||||
struct gk20a_scale_profile *scale_profile;
|
||||
|
||||
struct gk20a_ctxsw_trace *ctxsw_trace;
|
||||
struct gk20a_fecs_trace *fecs_trace;
|
||||
|
||||
struct device_dma_parameters dma_parms;
|
||||
|
||||
struct gk20a_cde_app cde_app;
|
||||
@@ -716,6 +739,7 @@ enum gk20a_dbg_categories {
|
||||
gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */
|
||||
gpu_dbg_cde = BIT(10), /* cde info messages */
|
||||
gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */
|
||||
gpu_dbg_ctxsw = BIT(12), /* ctxsw tracing */
|
||||
gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */
|
||||
};
|
||||
|
||||
@@ -962,4 +986,6 @@ static inline u32 scale_ptimer(u32 timeout , u32 scale10x)
|
||||
else
|
||||
return (timeout * 10) / scale10x;
|
||||
}
|
||||
|
||||
u64 gk20a_read_ptimer(struct gk20a *g);
|
||||
#endif /* GK20A_H */
|
||||
|
||||
@@ -56,6 +56,7 @@
|
||||
#include "debug_gk20a.h"
|
||||
#include "semaphore_gk20a.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include "ctxsw_trace_gk20a.h"
|
||||
|
||||
#define BLK_SIZE (256)
|
||||
|
||||
@@ -2855,6 +2856,13 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
|
||||
"fail to load golden ctx image");
|
||||
goto out;
|
||||
}
|
||||
if (g->ops.fecs_trace.bind_channel) {
|
||||
err = g->ops.fecs_trace.bind_channel(g, c);
|
||||
if (err) {
|
||||
gk20a_warn(dev_from_gk20a(g),
|
||||
"fail to bind channel for ctxsw trace");
|
||||
}
|
||||
}
|
||||
c->first_init = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "gk20a_gating_reglist.h"
|
||||
#include "channel_gk20a.h"
|
||||
#include "gr_ctx_gk20a.h"
|
||||
#include "fecs_trace_gk20a.h"
|
||||
#include "mm_gk20a.h"
|
||||
#include "mc_gk20a.h"
|
||||
#include "pmu_gk20a.h"
|
||||
@@ -57,6 +58,7 @@ int gk20a_init_hal(struct gk20a *g)
|
||||
gk20a_init_mc(gops);
|
||||
gk20a_init_ltc(gops);
|
||||
gk20a_init_gr_ops(gops);
|
||||
gk20a_init_fecs_trace_ops(gops);
|
||||
gk20a_init_fb(gops);
|
||||
gk20a_init_fifo(gops);
|
||||
gk20a_init_ce2(gops);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2012-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -246,4 +246,192 @@ static inline u32 ctxsw_prog_main_image_context_id_o(void)
|
||||
{
|
||||
return 0x000000f0;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_control_o(void)
|
||||
{
|
||||
return 0x000000ac;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(u32 v)
|
||||
{
|
||||
return (v & 0xffff) << 0;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(void)
|
||||
{
|
||||
return 0x000000b0;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_v_m(void)
|
||||
{
|
||||
return 0xfffffff << 0;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_m(void)
|
||||
{
|
||||
return 0x3 << 28;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f(void)
|
||||
{
|
||||
return 0x0;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(void)
|
||||
{
|
||||
return 0x20000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(void)
|
||||
{
|
||||
return 0x30000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(void)
|
||||
{
|
||||
return 0x000000b4;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(u32 v)
|
||||
{
|
||||
return (v & 0xffffffff) << 0;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_record_size_in_bytes_v(void)
|
||||
{
|
||||
return 0x00000080;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_record_size_in_words_v(void)
|
||||
{
|
||||
return 0x00000020;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_magic_value_lo_o(void)
|
||||
{
|
||||
return 0x00000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_magic_value_lo_v_value_v(void)
|
||||
{
|
||||
return 0x00000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_magic_value_hi_o(void)
|
||||
{
|
||||
return 0x00000004;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_magic_value_hi_v_value_v(void)
|
||||
{
|
||||
return 0x600dbeef;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_context_id_o(void)
|
||||
{
|
||||
return 0x00000008;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_context_ptr_o(void)
|
||||
{
|
||||
return 0x0000000c;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_new_context_id_o(void)
|
||||
{
|
||||
return 0x00000010;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_new_context_ptr_o(void)
|
||||
{
|
||||
return 0x00000014;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_lo_o(void)
|
||||
{
|
||||
return 0x00000018;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_o(void)
|
||||
{
|
||||
return 0x0000001c;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_v_f(u32 v)
|
||||
{
|
||||
return (v & 0xffffff) << 0;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_v_v(u32 r)
|
||||
{
|
||||
return (r >> 0) & 0xffffff;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_f(u32 v)
|
||||
{
|
||||
return (v & 0xff) << 24;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_m(void)
|
||||
{
|
||||
return 0xff << 24;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_v(u32 r)
|
||||
{
|
||||
return (r >> 24) & 0xff;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_ctxsw_req_by_host_v(void)
|
||||
{
|
||||
return 0x00000001;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_ctxsw_req_by_host_f(void)
|
||||
{
|
||||
return 0x1000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_v(void)
|
||||
{
|
||||
return 0x00000002;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_f(void)
|
||||
{
|
||||
return 0x2000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_wfi_v(void)
|
||||
{
|
||||
return 0x0000000a;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_wfi_f(void)
|
||||
{
|
||||
return 0xa000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_gfxp_v(void)
|
||||
{
|
||||
return 0x0000000b;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_gfxp_f(void)
|
||||
{
|
||||
return 0xb000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_ctap_v(void)
|
||||
{
|
||||
return 0x0000000c;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_ctap_f(void)
|
||||
{
|
||||
return 0xc000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_cilp_v(void)
|
||||
{
|
||||
return 0x0000000d;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_cilp_f(void)
|
||||
{
|
||||
return 0xd000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_save_end_v(void)
|
||||
{
|
||||
return 0x00000003;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_save_end_f(void)
|
||||
{
|
||||
return 0x3000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_restore_start_v(void)
|
||||
{
|
||||
return 0x00000004;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_restore_start_f(void)
|
||||
{
|
||||
return 0x4000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_context_start_v(void)
|
||||
{
|
||||
return 0x00000005;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_context_start_f(void)
|
||||
{
|
||||
return 0x5000000;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(void)
|
||||
{
|
||||
return 0x000000ff;
|
||||
}
|
||||
static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_f(void)
|
||||
{
|
||||
return 0xff000000;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2013-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
|
||||
21
drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c
Normal file
21
drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <linux/string.h>
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "fecs_trace_vgpu.h"
|
||||
|
||||
void vgpu_init_fecs_trace_ops(struct gpu_ops *ops)
|
||||
{
|
||||
memset(&ops->fecs_trace, 0, sizeof(ops->fecs_trace));
|
||||
}
|
||||
20
drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h
Normal file
20
drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h
Normal file
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#ifndef __FECS_TRACE_VGPU_H
|
||||
#define __FECS_TRACE_VGPU_H
|
||||
|
||||
struct gpu_ops;
|
||||
void vgpu_init_fecs_trace_ops(struct gpu_ops *ops);
|
||||
|
||||
#endif /* __FECS_TRACE_VGPU_H */
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include "vgpu/vgpu.h"
|
||||
#include "vgpu/fecs_trace_vgpu.h"
|
||||
#include "gk20a/debug_gk20a.h"
|
||||
#include "gk20a/hal_gk20a.h"
|
||||
#include "gk20a/hw_mc_gk20a.h"
|
||||
@@ -259,6 +260,7 @@ void vgpu_init_hal_common(struct gk20a *g)
|
||||
vgpu_init_ltc_ops(gops);
|
||||
vgpu_init_mm_ops(gops);
|
||||
vgpu_init_debug_ops(gops);
|
||||
vgpu_init_fecs_trace_ops(gops);
|
||||
}
|
||||
|
||||
static int vgpu_init_hal(struct gk20a *g)
|
||||
|
||||
@@ -387,7 +387,7 @@ TRACE_EVENT(gk20a_as_ioctl_get_va_regions,
|
||||
TRACE_EVENT(gk20a_mmu_fault,
|
||||
TP_PROTO(u32 fault_hi, u32 fault_lo,
|
||||
u32 fault_info,
|
||||
u32 instance,
|
||||
u64 instance,
|
||||
u32 engine_id,
|
||||
const char *engine,
|
||||
const char *client,
|
||||
@@ -398,7 +398,7 @@ TRACE_EVENT(gk20a_mmu_fault,
|
||||
__field(u32, fault_hi)
|
||||
__field(u32, fault_lo)
|
||||
__field(u32, fault_info)
|
||||
__field(u32, instance)
|
||||
__field(u64, instance)
|
||||
__field(u32, engine_id)
|
||||
__field(const char *, engine)
|
||||
__field(const char *, client)
|
||||
@@ -414,7 +414,7 @@ TRACE_EVENT(gk20a_mmu_fault,
|
||||
__entry->client = client;
|
||||
__entry->fault_type = fault_type;
|
||||
),
|
||||
TP_printk("fault=0x%x,%08x info=0x%x instance=0x%x engine_id=%d engine=%s client=%s type=%s",
|
||||
TP_printk("fault=0x%x,%08x info=0x%x instance=0x%llx engine_id=%d engine=%s client=%s type=%s",
|
||||
__entry->fault_hi, __entry->fault_lo,
|
||||
__entry->fault_info, __entry->instance, __entry->engine_id,
|
||||
__entry->engine, __entry->client, __entry->fault_type)
|
||||
|
||||
@@ -1215,4 +1215,94 @@ struct nvgpu_as_map_buffer_batch_args {
|
||||
#define NVGPU_AS_IOCTL_MAX_ARG_SIZE \
|
||||
sizeof(struct nvgpu_as_map_buffer_ex_args)
|
||||
|
||||
|
||||
/*
|
||||
* /dev/nvhost-ctxsw-gpu device
|
||||
*
|
||||
* Opening a '/dev/nvhost-ctxsw-gpu' device node creates a way to trace
|
||||
* context switches on GR engine
|
||||
*/
|
||||
|
||||
#define NVGPU_CTXSW_IOCTL_MAGIC 'C'
|
||||
|
||||
#define NVGPU_CTXSW_TAG_SOF 0x00
|
||||
#define NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST 0x01
|
||||
#define NVGPU_CTXSW_TAG_FE_ACK 0x02
|
||||
#define NVGPU_CTXSW_TAG_FE_ACK_WFI 0x0a
|
||||
#define NVGPU_CTXSW_TAG_FE_ACK_GFXP 0x0b
|
||||
#define NVGPU_CTXSW_TAG_FE_ACK_CTAP 0x0c
|
||||
#define NVGPU_CTXSW_TAG_FE_ACK_CILP 0x0d
|
||||
#define NVGPU_CTXSW_TAG_SAVE_END 0x03
|
||||
#define NVGPU_CTXSW_TAG_RESTORE_START 0x04
|
||||
#define NVGPU_CTXSW_TAG_CONTEXT_START 0x05
|
||||
#define NVGPU_CTXSW_TAG_INVALID_TIMESTAMP 0xff
|
||||
#define NVGPU_CTXSW_TAG_LAST \
|
||||
NVGPU_CTXSW_TAG_INVALID_TIMESTAMP
|
||||
|
||||
struct nvgpu_ctxsw_trace_entry {
|
||||
__u8 tag;
|
||||
__u8 vmid;
|
||||
__u16 seqno; /* sequence number to detect drops */
|
||||
__u32 context_id; /* context_id as allocated by FECS */
|
||||
__u64 pid; /* 64-bit is max bits of different OS pid */
|
||||
__u64 timestamp; /* 64-bit time */
|
||||
};
|
||||
|
||||
#define NVGPU_CTXSW_RING_HEADER_MAGIC 0x7000fade
|
||||
#define NVGPU_CTXSW_RING_HEADER_VERSION 0
|
||||
|
||||
struct nvgpu_ctxsw_ring_header {
|
||||
__u32 magic;
|
||||
__u32 version;
|
||||
__u32 num_ents;
|
||||
__u32 ent_size;
|
||||
volatile __u32 drop_count; /* excluding filtered out events */
|
||||
volatile __u32 write_seqno;
|
||||
volatile __u32 write_idx;
|
||||
volatile __u32 read_idx;
|
||||
};
|
||||
|
||||
struct nvgpu_ctxsw_ring_setup_args {
|
||||
__u32 size; /* [in/out] size of ring buffer in bytes (including
|
||||
header). will be rounded page size. this parameter
|
||||
is updated with actual allocated size. */
|
||||
};
|
||||
|
||||
#define NVGPU_CTXSW_FILTER_SIZE (NVGPU_CTXSW_TAG_LAST + 1)
|
||||
#define NVGPU_CTXSW_FILTER_SET(n, p) \
|
||||
((p)->tag_bits[(n) / 64] |= (1 << ((n) & 63)))
|
||||
#define NVGPU_CTXSW_FILTER_CLR(n, p) \
|
||||
((p)->tag_bits[(n) / 64] &= ~(1 << ((n) & 63)))
|
||||
#define NVGPU_CTXSW_FILTER_ISSET(n, p) \
|
||||
((p)->tag_bits[(n) / 64] & (1 << ((n) & 63)))
|
||||
#define NVGPU_CTXSW_FILTER_CLR_ALL(p) memset((void *)(p), 0, sizeof(*(p)))
|
||||
#define NVGPU_CTXSW_FILTER_SET_ALL(p) memset((void *)(p), ~0, sizeof(*(p)))
|
||||
|
||||
struct nvgpu_ctxsw_trace_filter {
|
||||
__u64 tag_bits[(NVGPU_CTXSW_FILTER_SIZE + 63) / 64];
|
||||
};
|
||||
|
||||
struct nvgpu_ctxsw_trace_filter_args {
|
||||
struct nvgpu_ctxsw_trace_filter filter;
|
||||
};
|
||||
|
||||
#define NVGPU_CTXSW_IOCTL_TRACE_ENABLE \
|
||||
_IO(NVGPU_CTXSW_IOCTL_MAGIC, 1)
|
||||
#define NVGPU_CTXSW_IOCTL_TRACE_DISABLE \
|
||||
_IO(NVGPU_CTXSW_IOCTL_MAGIC, 2)
|
||||
#define NVGPU_CTXSW_IOCTL_RING_SETUP \
|
||||
_IOWR(NVGPU_CTXSW_IOCTL_MAGIC, 3, struct nvgpu_ctxsw_ring_setup_args)
|
||||
#define NVGPU_CTXSW_IOCTL_SET_FILTER \
|
||||
_IOW(NVGPU_CTXSW_IOCTL_MAGIC, 4, struct nvgpu_ctxsw_trace_filter_args)
|
||||
#define NVGPU_CTXSW_IOCTL_GET_FILTER \
|
||||
_IOR(NVGPU_CTXSW_IOCTL_MAGIC, 5, struct nvgpu_ctxsw_trace_filter_args)
|
||||
#define NVGPU_CTXSW_IOCTL_POLL \
|
||||
_IO(NVGPU_CTXSW_IOCTL_MAGIC, 6)
|
||||
|
||||
#define NVGPU_CTXSW_IOCTL_LAST \
|
||||
_IOC_NR(NVGPU_CTXSW_IOCTL_POLL)
|
||||
|
||||
#define NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE \
|
||||
sizeof(struct nvgpu_ctxsw_trace_filter_args)
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user