mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
MISRA Advisory Directive 4.5 states that identifiers in the same name space with overlapping visibility should be typographically unambiguous. The presence of both the roundup(x,y) and round_up(x,y) macros in the posix utils.h header incurs a violation of this rule. These macros were added to keep in sync with the linux kernel variants. However, there is a key distinction between how these two macros work in the linux kernel; roundup(x,y) can handle any y alignment while round_up(x,y) is intended to work only when y is a power-of-two. Passing a non-power-of-two alignment to round_up(x,y) results in an incorrect value being returned (silently). Because all current uses of roundup(x,y) and round_up(x,y) in nvgpu specify a y value that is a power-of-two and the underlying posix macro implementations assume as much, it is best to remove roundup(x,y) from nvgpu altogether to avoid any confusion. So this change converts all uses of roundup(x,y) to round_up(x,y). Jira NVGPU-3178 Change-Id: I0ee974d3e088fa704e251a38f6b7ada5a7600aec Signed-off-by: Scott Long <scottl@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2271385 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
795 lines
19 KiB
C
795 lines
19 KiB
C
/*
|
|
* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <linux/wait.h>
|
|
#include <linux/ktime.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/poll.h>
|
|
#ifdef CONFIG_NVGPU_TRACE
|
|
#include <trace/events/gk20a.h>
|
|
#endif
|
|
#include <uapi/linux/nvgpu.h>
|
|
#include <nvgpu/kmem.h>
|
|
#include <nvgpu/log.h>
|
|
#include <nvgpu/atomic.h>
|
|
#include <nvgpu/barrier.h>
|
|
#include <nvgpu/gk20a.h>
|
|
#include <nvgpu/channel.h>
|
|
#include <nvgpu/gr/fecs_trace.h>
|
|
#include <nvgpu/string.h>
|
|
#include <nvgpu/nvgpu_init.h>
|
|
|
|
#include "platform_gk20a.h"
|
|
#include "os_linux.h"
|
|
#include "fecs_trace_linux.h"
|
|
|
|
/* Userland-facing FIFO (one global + eventually one per VM) */
|
|
struct gk20a_ctxsw_dev {
|
|
struct gk20a *g;
|
|
|
|
struct nvgpu_ctxsw_ring_header *hdr;
|
|
struct nvgpu_gpu_ctxsw_trace_entry *ents;
|
|
struct nvgpu_gpu_ctxsw_trace_filter filter;
|
|
bool write_enabled;
|
|
struct nvgpu_cond readout_wq;
|
|
size_t size;
|
|
u32 num_ents;
|
|
|
|
nvgpu_atomic_t vma_ref;
|
|
|
|
struct nvgpu_mutex write_lock;
|
|
};
|
|
|
|
|
|
struct gk20a_ctxsw_trace {
|
|
struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
|
|
};
|
|
|
|
static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
|
|
{
|
|
return (hdr->write_idx == hdr->read_idx);
|
|
}
|
|
|
|
static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
|
|
{
|
|
return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
|
|
}
|
|
|
|
static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
|
|
{
|
|
return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
|
|
}
|
|
|
|
static void nvgpu_set_ctxsw_trace_entry(
|
|
struct nvgpu_ctxsw_trace_entry *entry_dst,
|
|
struct nvgpu_gpu_ctxsw_trace_entry *entry_src)
|
|
{
|
|
entry_dst->tag = entry_src->tag;
|
|
entry_dst->vmid = entry_src->vmid;
|
|
entry_dst->seqno = entry_src->seqno;
|
|
entry_dst->context_id = entry_src->context_id;
|
|
entry_dst->pid = entry_src->pid;
|
|
entry_dst->timestamp = entry_src->timestamp;
|
|
}
|
|
|
|
ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
|
|
loff_t *off)
|
|
{
|
|
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
|
struct gk20a *g = dev->g;
|
|
struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
|
|
struct nvgpu_ctxsw_trace_entry __user *entry =
|
|
(struct nvgpu_ctxsw_trace_entry *) buf;
|
|
struct nvgpu_ctxsw_trace_entry user_entry;
|
|
size_t copied = 0;
|
|
int err;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
|
|
"filp=%p buf=%p size=%zu", filp, buf, size);
|
|
|
|
nvgpu_mutex_acquire(&dev->write_lock);
|
|
while (ring_is_empty(hdr)) {
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
if (filp->f_flags & O_NONBLOCK)
|
|
return -EAGAIN;
|
|
err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
|
|
!ring_is_empty(hdr), 0U);
|
|
if (err)
|
|
return err;
|
|
nvgpu_mutex_acquire(&dev->write_lock);
|
|
}
|
|
|
|
while (size >= sizeof(struct nvgpu_gpu_ctxsw_trace_entry)) {
|
|
if (ring_is_empty(hdr))
|
|
break;
|
|
|
|
nvgpu_set_ctxsw_trace_entry(&user_entry,
|
|
&dev->ents[hdr->read_idx]);
|
|
if (copy_to_user(entry, &user_entry,
|
|
sizeof(*entry))) {
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
return -EFAULT;
|
|
}
|
|
|
|
hdr->read_idx++;
|
|
if (hdr->read_idx >= hdr->num_ents)
|
|
hdr->read_idx = 0;
|
|
|
|
entry++;
|
|
copied += sizeof(*entry);
|
|
size -= sizeof(*entry);
|
|
}
|
|
|
|
nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
|
|
hdr->read_idx);
|
|
|
|
*off = hdr->read_idx;
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
|
|
return copied;
|
|
}
|
|
|
|
static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
|
|
{
|
|
struct gk20a *g = dev->g;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
|
|
nvgpu_mutex_acquire(&dev->write_lock);
|
|
dev->write_enabled = true;
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
dev->g->ops.gr.fecs_trace.enable(dev->g);
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
|
|
{
|
|
struct gk20a *g = dev->g;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
|
|
dev->g->ops.gr.fecs_trace.disable(dev->g);
|
|
nvgpu_mutex_acquire(&dev->write_lock);
|
|
dev->write_enabled = false;
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
|
|
size_t *size)
|
|
{
|
|
struct gk20a *g = dev->g;
|
|
void *buf;
|
|
int err;
|
|
|
|
if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
|
|
return -EBUSY;
|
|
|
|
if (dev->hdr) {
|
|
g->ops.gr.fecs_trace.free_user_buffer(g);
|
|
dev->hdr = NULL;
|
|
}
|
|
|
|
err = g->ops.gr.fecs_trace.alloc_user_buffer(g, &buf, size);
|
|
if (err)
|
|
return err;
|
|
|
|
|
|
dev->hdr = buf;
|
|
dev->ents = (struct nvgpu_gpu_ctxsw_trace_entry *) (dev->hdr + 1);
|
|
dev->size = *size;
|
|
dev->num_ents = dev->hdr->num_ents;
|
|
|
|
nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
|
|
dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
|
|
return 0;
|
|
}
|
|
|
|
int nvgpu_gr_fecs_trace_ring_alloc(struct gk20a *g,
|
|
void **buf, size_t *size)
|
|
{
|
|
struct nvgpu_ctxsw_ring_header *hdr;
|
|
|
|
*size = round_up(*size, PAGE_SIZE);
|
|
hdr = vmalloc_user(*size);
|
|
if (!hdr)
|
|
return -ENOMEM;
|
|
|
|
hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
|
|
hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
|
|
hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
|
|
/ sizeof(struct nvgpu_gpu_ctxsw_trace_entry);
|
|
hdr->ent_size = sizeof(struct nvgpu_gpu_ctxsw_trace_entry);
|
|
hdr->drop_count = 0;
|
|
hdr->read_idx = 0;
|
|
hdr->write_idx = 0;
|
|
hdr->write_seqno = 0;
|
|
|
|
*buf = hdr;
|
|
return 0;
|
|
}
|
|
|
|
int nvgpu_gr_fecs_trace_ring_free(struct gk20a *g)
|
|
{
|
|
struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
|
|
|
|
nvgpu_vfree(g, dev->hdr);
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
|
|
struct nvgpu_ctxsw_ring_setup_args *args)
|
|
{
|
|
struct gk20a *g = dev->g;
|
|
size_t size = args->size;
|
|
int ret;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
|
|
|
|
if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
|
|
return -EINVAL;
|
|
|
|
nvgpu_mutex_acquire(&dev->write_lock);
|
|
ret = gk20a_ctxsw_dev_alloc_buffer(dev, &size);
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
|
|
args->size = size;
|
|
return ret;
|
|
}
|
|
|
|
static void nvgpu_set_ctxsw_trace_filter_args(
|
|
struct nvgpu_gpu_ctxsw_trace_filter *filter_dst,
|
|
struct nvgpu_ctxsw_trace_filter *filter_src)
|
|
{
|
|
nvgpu_memcpy((u8 *)filter_dst->tag_bits, (u8 *)filter_src->tag_bits,
|
|
(NVGPU_CTXSW_FILTER_SIZE + 63) / 64);
|
|
}
|
|
|
|
static void nvgpu_get_ctxsw_trace_filter_args(
|
|
struct nvgpu_ctxsw_trace_filter *filter_dst,
|
|
struct nvgpu_gpu_ctxsw_trace_filter *filter_src)
|
|
{
|
|
nvgpu_memcpy((u8 *)filter_dst->tag_bits, (u8 *)filter_src->tag_bits,
|
|
(NVGPU_CTXSW_FILTER_SIZE + 63) / 64);
|
|
}
|
|
|
|
static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
|
|
struct nvgpu_ctxsw_trace_filter_args *args)
|
|
{
|
|
struct gk20a *g = dev->g;
|
|
|
|
nvgpu_mutex_acquire(&dev->write_lock);
|
|
nvgpu_set_ctxsw_trace_filter_args(&dev->filter, &args->filter);
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
|
|
if (g->ops.gr.fecs_trace.set_filter)
|
|
g->ops.gr.fecs_trace.set_filter(g, &dev->filter);
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
|
|
struct nvgpu_ctxsw_trace_filter_args *args)
|
|
{
|
|
nvgpu_mutex_acquire(&dev->write_lock);
|
|
nvgpu_get_ctxsw_trace_filter_args(&args->filter, &dev->filter);
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
|
|
{
|
|
struct gk20a *g = dev->g;
|
|
int err;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
|
|
|
|
err = gk20a_busy(g);
|
|
if (err)
|
|
return err;
|
|
|
|
if (g->ops.gr.fecs_trace.flush)
|
|
err = g->ops.gr.fecs_trace.flush(g);
|
|
|
|
if (likely(!err))
|
|
err = g->ops.gr.fecs_trace.poll(g);
|
|
|
|
gk20a_idle(g);
|
|
return err;
|
|
}
|
|
|
|
int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
|
|
{
|
|
struct nvgpu_os_linux *l;
|
|
struct gk20a *g;
|
|
struct gk20a_ctxsw_trace *trace;
|
|
struct gk20a_ctxsw_dev *dev;
|
|
int err;
|
|
size_t size;
|
|
u32 n;
|
|
|
|
/* only one VM for now */
|
|
const int vmid = 0;
|
|
|
|
l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
|
|
g = nvgpu_get(&l->g);
|
|
if (!g)
|
|
return -ENODEV;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
|
|
|
|
if (!capable(CAP_SYS_ADMIN)) {
|
|
err = -EPERM;
|
|
goto free_ref;
|
|
}
|
|
|
|
err = gk20a_busy(g);
|
|
if (err)
|
|
goto free_ref;
|
|
|
|
trace = g->ctxsw_trace;
|
|
if (!trace) {
|
|
err = -ENODEV;
|
|
goto idle;
|
|
}
|
|
|
|
/* Allow only one user for this device */
|
|
dev = &trace->devs[vmid];
|
|
nvgpu_mutex_acquire(&dev->write_lock);
|
|
if (dev->hdr) {
|
|
err = -EBUSY;
|
|
goto done;
|
|
}
|
|
|
|
/* By default, allocate ring buffer big enough to accommodate
|
|
* FECS records with default event filter */
|
|
|
|
/* enable all traces by default */
|
|
NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
|
|
|
|
/* compute max number of entries generated with this filter */
|
|
n = g->ops.gr.fecs_trace.max_entries(g, &dev->filter);
|
|
|
|
size = sizeof(struct nvgpu_ctxsw_ring_header) +
|
|
n * sizeof(struct nvgpu_gpu_ctxsw_trace_entry);
|
|
nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
|
|
size, n, sizeof(struct nvgpu_gpu_ctxsw_trace_entry));
|
|
|
|
err = gk20a_ctxsw_dev_alloc_buffer(dev, &size);
|
|
if (!err) {
|
|
filp->private_data = dev;
|
|
nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
|
|
filp, dev, size);
|
|
}
|
|
|
|
done:
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
|
|
idle:
|
|
gk20a_idle(g);
|
|
free_ref:
|
|
if (err)
|
|
nvgpu_put(g);
|
|
return err;
|
|
}
|
|
|
|
int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
|
|
{
|
|
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
|
struct gk20a *g = dev->g;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
|
|
|
|
nvgpu_mutex_acquire(&dev->write_lock);
|
|
if (dev->write_enabled) {
|
|
dev->write_enabled = false;
|
|
g->ops.gr.fecs_trace.disable(g);
|
|
}
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
|
|
if (dev->hdr) {
|
|
dev->g->ops.gr.fecs_trace.free_user_buffer(dev->g);
|
|
dev->hdr = NULL;
|
|
}
|
|
nvgpu_put(g);
|
|
return 0;
|
|
}
|
|
|
|
long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
|
|
unsigned long arg)
|
|
{
|
|
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
|
struct gk20a *g = dev->g;
|
|
u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
|
|
int err = 0;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
|
|
|
|
if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
|
|
(_IOC_NR(cmd) == 0) ||
|
|
(_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
|
|
(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
|
|
return -EINVAL;
|
|
|
|
(void) memset(buf, 0, sizeof(buf));
|
|
if (_IOC_DIR(cmd) & _IOC_WRITE) {
|
|
if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
|
|
return -EFAULT;
|
|
}
|
|
|
|
switch (cmd) {
|
|
case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
|
|
err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
|
|
break;
|
|
case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
|
|
err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
|
|
break;
|
|
case NVGPU_CTXSW_IOCTL_RING_SETUP:
|
|
err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
|
|
(struct nvgpu_ctxsw_ring_setup_args *) buf);
|
|
break;
|
|
case NVGPU_CTXSW_IOCTL_SET_FILTER:
|
|
err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
|
|
(struct nvgpu_ctxsw_trace_filter_args *) buf);
|
|
break;
|
|
case NVGPU_CTXSW_IOCTL_GET_FILTER:
|
|
err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
|
|
(struct nvgpu_ctxsw_trace_filter_args *) buf);
|
|
break;
|
|
case NVGPU_CTXSW_IOCTL_POLL:
|
|
err = gk20a_ctxsw_dev_ioctl_poll(dev);
|
|
break;
|
|
default:
|
|
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
|
|
cmd);
|
|
err = -ENOTTY;
|
|
}
|
|
|
|
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
|
|
err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
|
|
|
|
return err;
|
|
}
|
|
|
|
unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
|
|
{
|
|
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
|
struct gk20a *g = dev->g;
|
|
struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
|
|
unsigned int mask = 0;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
|
|
|
|
nvgpu_mutex_acquire(&dev->write_lock);
|
|
poll_wait(filp, &dev->readout_wq.wq, wait);
|
|
if (!ring_is_empty(hdr))
|
|
mask |= POLLIN | POLLRDNORM;
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
|
|
return mask;
|
|
}
|
|
|
|
static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
|
|
{
|
|
struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
|
|
struct gk20a *g = dev->g;
|
|
|
|
nvgpu_atomic_inc(&dev->vma_ref);
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
|
|
nvgpu_atomic_read(&dev->vma_ref));
|
|
}
|
|
|
|
static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
|
|
{
|
|
struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
|
|
struct gk20a *g = dev->g;
|
|
|
|
nvgpu_atomic_dec(&dev->vma_ref);
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
|
|
nvgpu_atomic_read(&dev->vma_ref));
|
|
}
|
|
|
|
static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
|
|
.open = gk20a_ctxsw_dev_vma_open,
|
|
.close = gk20a_ctxsw_dev_vma_close,
|
|
};
|
|
|
|
void nvgpu_gr_fecs_trace_get_mmap_buffer_info(struct gk20a *g,
|
|
void **mmapaddr, size_t *mmapsize)
|
|
{
|
|
*mmapaddr = g->ctxsw_trace->devs[0].hdr;
|
|
*mmapsize = 0;
|
|
}
|
|
|
|
int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
|
|
{
|
|
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
|
struct gk20a *g = dev->g;
|
|
size_t mmapsize = 0;
|
|
void *mmapaddr;
|
|
int ret;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
|
|
vma->vm_start, vma->vm_end);
|
|
|
|
dev->g->ops.gr.fecs_trace.get_mmap_user_buffer_info(dev->g,
|
|
&mmapaddr, &mmapsize);
|
|
if (mmapsize) {
|
|
unsigned long size = 0;
|
|
unsigned long vsize = vma->vm_end - vma->vm_start;
|
|
|
|
size = min(mmapsize, vsize);
|
|
size = round_up(size, PAGE_SIZE);
|
|
|
|
ret = remap_pfn_range(vma, vma->vm_start,
|
|
(unsigned long) mmapaddr,
|
|
size,
|
|
vma->vm_page_prot);
|
|
|
|
} else {
|
|
ret = remap_vmalloc_range(vma, mmapaddr, 0);
|
|
}
|
|
|
|
if (likely(!ret)) {
|
|
vma->vm_private_data = dev;
|
|
vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
|
|
vma->vm_ops->open(vma);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_ctxsw_init_devs(struct gk20a *g)
|
|
{
|
|
struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
|
|
struct gk20a_ctxsw_dev *dev = trace->devs;
|
|
int i;
|
|
|
|
for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
|
|
dev->g = g;
|
|
dev->hdr = NULL;
|
|
dev->write_enabled = false;
|
|
nvgpu_cond_init(&dev->readout_wq);
|
|
nvgpu_mutex_init(&dev->write_lock);
|
|
nvgpu_atomic_set(&dev->vma_ref, 0);
|
|
dev++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_ctxsw_trace_init(struct gk20a *g)
|
|
{
|
|
struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
|
|
int err;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
|
|
|
|
/* if tracing is not supported, skip this */
|
|
if (!g->ops.gr.fecs_trace.init)
|
|
return 0;
|
|
|
|
if (likely(trace))
|
|
return 0;
|
|
|
|
trace = nvgpu_kzalloc(g, sizeof(*trace));
|
|
if (unlikely(!trace))
|
|
return -ENOMEM;
|
|
g->ctxsw_trace = trace;
|
|
|
|
err = gk20a_ctxsw_init_devs(g);
|
|
if (err)
|
|
goto fail;
|
|
|
|
err = g->ops.gr.fecs_trace.init(g);
|
|
if (unlikely(err))
|
|
goto fail;
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
(void) memset(&g->ops.gr.fecs_trace, 0, sizeof(g->ops.gr.fecs_trace));
|
|
nvgpu_kfree(g, trace);
|
|
g->ctxsw_trace = NULL;
|
|
return err;
|
|
}
|
|
|
|
void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
|
|
{
|
|
struct gk20a_ctxsw_trace *trace;
|
|
struct gk20a_ctxsw_dev *dev;
|
|
int i;
|
|
|
|
if (!g->ctxsw_trace)
|
|
return;
|
|
|
|
trace = g->ctxsw_trace;
|
|
dev = trace->devs;
|
|
|
|
for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
|
|
nvgpu_mutex_destroy(&dev->write_lock);
|
|
dev++;
|
|
}
|
|
|
|
nvgpu_kfree(g, g->ctxsw_trace);
|
|
g->ctxsw_trace = NULL;
|
|
|
|
g->ops.gr.fecs_trace.deinit(g);
|
|
}
|
|
|
|
int nvgpu_gr_fecs_trace_write_entry(struct gk20a *g,
|
|
struct nvgpu_gpu_ctxsw_trace_entry *entry)
|
|
{
|
|
struct nvgpu_ctxsw_ring_header *hdr;
|
|
struct gk20a_ctxsw_dev *dev;
|
|
int ret = 0;
|
|
const char *reason;
|
|
u32 write_idx;
|
|
|
|
if (!g->ctxsw_trace)
|
|
return 0;
|
|
|
|
if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
|
|
return -ENODEV;
|
|
|
|
dev = &g->ctxsw_trace->devs[entry->vmid];
|
|
hdr = dev->hdr;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
|
|
"dev=%p hdr=%p", dev, hdr);
|
|
|
|
nvgpu_mutex_acquire(&dev->write_lock);
|
|
|
|
if (unlikely(!hdr)) {
|
|
/* device has been released */
|
|
ret = -ENODEV;
|
|
goto done;
|
|
}
|
|
|
|
write_idx = hdr->write_idx;
|
|
if (write_idx >= dev->num_ents) {
|
|
nvgpu_err(dev->g,
|
|
"write_idx=%u out of range [0..%u]",
|
|
write_idx, dev->num_ents);
|
|
ret = -ENOSPC;
|
|
reason = "write_idx out of range";
|
|
goto disable;
|
|
}
|
|
|
|
entry->seqno = hdr->write_seqno++;
|
|
|
|
if (!dev->write_enabled) {
|
|
ret = -EBUSY;
|
|
reason = "write disabled";
|
|
goto drop;
|
|
}
|
|
|
|
if (unlikely(ring_is_full(hdr))) {
|
|
ret = -ENOSPC;
|
|
reason = "user fifo full";
|
|
goto drop;
|
|
}
|
|
|
|
if (!NVGPU_GPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
|
|
reason = "filtered out";
|
|
goto filter;
|
|
}
|
|
|
|
nvgpu_log(g, gpu_dbg_ctxsw,
|
|
"seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
|
|
entry->seqno, entry->context_id, entry->pid,
|
|
entry->tag, entry->timestamp);
|
|
|
|
dev->ents[write_idx] = *entry;
|
|
|
|
/* ensure record is written before updating write index */
|
|
nvgpu_smp_wmb();
|
|
|
|
write_idx++;
|
|
if (unlikely(write_idx >= hdr->num_ents))
|
|
write_idx = 0;
|
|
hdr->write_idx = write_idx;
|
|
nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
|
|
hdr->read_idx, hdr->write_idx, ring_len(hdr));
|
|
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
return ret;
|
|
|
|
disable:
|
|
g->ops.gr.fecs_trace.disable(g);
|
|
|
|
drop:
|
|
hdr->drop_count++;
|
|
|
|
filter:
|
|
nvgpu_log(g, gpu_dbg_ctxsw,
|
|
"dropping seqno=%d context_id=%08x pid=%lld "
|
|
"tag=%x time=%llx (%s)",
|
|
entry->seqno, entry->context_id, entry->pid,
|
|
entry->tag, entry->timestamp, reason);
|
|
|
|
done:
|
|
nvgpu_mutex_release(&dev->write_lock);
|
|
return ret;
|
|
}
|
|
|
|
void nvgpu_gr_fecs_trace_wake_up(struct gk20a *g, int vmid)
|
|
{
|
|
struct gk20a_ctxsw_dev *dev;
|
|
|
|
if (!g->ctxsw_trace)
|
|
return;
|
|
|
|
dev = &g->ctxsw_trace->devs[vmid];
|
|
nvgpu_cond_signal_interruptible(&dev->readout_wq);
|
|
}
|
|
|
|
void nvgpu_gr_fecs_trace_add_tsg_reset(struct gk20a *g, struct nvgpu_tsg *tsg)
|
|
{
|
|
struct nvgpu_gpu_ctxsw_trace_entry entry = {
|
|
.vmid = 0,
|
|
.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
|
|
.context_id = 0,
|
|
.pid = tsg->tgid,
|
|
};
|
|
|
|
if (!g->ctxsw_trace)
|
|
return;
|
|
|
|
g->ops.ptimer.read_ptimer(g, &entry.timestamp);
|
|
nvgpu_gr_fecs_trace_write_entry(g, &entry);
|
|
nvgpu_gr_fecs_trace_wake_up(g, 0);
|
|
|
|
#ifdef CONFIG_NVGPU_TRACE
|
|
trace_gk20a_channel_reset(~0, tsg->tsgid);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Convert linux nvgpu ctxsw tags type of the form of NVGPU_CTXSW_TAG_*
|
|
* into common nvgpu ctxsw tags type of the form of NVGPU_GPU_CTXSW_TAG_*
|
|
*/
|
|
|
|
u8 nvgpu_gpu_ctxsw_tags_to_common_tags(u8 tags)
|
|
{
|
|
switch (tags) {
|
|
case NVGPU_CTXSW_TAG_SOF:
|
|
return NVGPU_GPU_CTXSW_TAG_SOF;
|
|
case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
|
|
return NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST;
|
|
case NVGPU_CTXSW_TAG_FE_ACK:
|
|
return NVGPU_GPU_CTXSW_TAG_FE_ACK;
|
|
case NVGPU_CTXSW_TAG_FE_ACK_WFI:
|
|
return NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI;
|
|
case NVGPU_CTXSW_TAG_FE_ACK_GFXP:
|
|
return NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP;
|
|
case NVGPU_CTXSW_TAG_FE_ACK_CTAP:
|
|
return NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP;
|
|
case NVGPU_CTXSW_TAG_FE_ACK_CILP:
|
|
return NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP;
|
|
case NVGPU_CTXSW_TAG_SAVE_END:
|
|
return NVGPU_GPU_CTXSW_TAG_SAVE_END;
|
|
case NVGPU_CTXSW_TAG_RESTORE_START:
|
|
return NVGPU_GPU_CTXSW_TAG_RESTORE_START;
|
|
case NVGPU_CTXSW_TAG_CONTEXT_START:
|
|
return NVGPU_GPU_CTXSW_TAG_CONTEXT_START;
|
|
case NVGPU_CTXSW_TAG_ENGINE_RESET:
|
|
return NVGPU_GPU_CTXSW_TAG_ENGINE_RESET;
|
|
case NVGPU_CTXSW_TAG_INVALID_TIMESTAMP:
|
|
return NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP;
|
|
}
|
|
|
|
WARN_ON(1);
|
|
return tags;
|
|
}
|