mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 09:57:08 +03:00
Implement kmem abstraction and tracking in nvgpu. The abstraction helps move nvgpu's core code away from being Linux dependent and allows kmem allocation tracking to be done for Linux and any other OS supported by nvgpu. Bug 1799159 Bug 1823380 Change-Id: Ieaae4ca1bbd1d4db4a1546616ab8b9fc53a4079d Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1283828 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
495 lines
13 KiB
C
495 lines
13 KiB
C
/*
|
|
* Copyright (C) 2011-2017 NVIDIA Corporation. All rights reserved.
|
|
*
|
|
* This software is licensed under the terms of the GNU General Public
|
|
* License version 2, as published by the Free Software Foundation, and
|
|
* may be copied, distributed, and modified under those terms.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
*/
|
|
|
|
#ifdef CONFIG_TEGRA_GK20A
|
|
#include <linux/nvhost.h>
|
|
#endif
|
|
|
|
#include <linux/debugfs.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/io.h>
|
|
|
|
#include <nvgpu/semaphore.h>
|
|
#include <nvgpu/kmem.h>
|
|
|
|
#include "gk20a.h"
|
|
#include "debug_gk20a.h"
|
|
|
|
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
|
|
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
|
|
#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
|
|
#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
|
|
|
|
unsigned int gk20a_debug_trace_cmdbuf;
|
|
|
|
struct ch_state {
|
|
int pid;
|
|
int refs;
|
|
u32 inst_block[0];
|
|
};
|
|
|
|
static const char * const ccsr_chan_status_str[] = {
|
|
"idle",
|
|
"pending",
|
|
"pending_ctx_reload",
|
|
"pending_acquire",
|
|
"pending_acq_ctx_reload",
|
|
"on_pbdma",
|
|
"on_pbdma_and_eng",
|
|
"on_eng",
|
|
"on_eng_pending_acquire",
|
|
"on_eng_pending",
|
|
"on_pbdma_ctx_reload",
|
|
"on_pbdma_and_eng_ctx_reload",
|
|
"on_eng_ctx_reload",
|
|
"on_eng_pending_ctx_reload",
|
|
"on_eng_pending_acq_ctx_reload",
|
|
};
|
|
|
|
static const char * const chan_status_str[] = {
|
|
"invalid",
|
|
"valid",
|
|
"chsw_load",
|
|
"chsw_save",
|
|
"chsw_switch",
|
|
};
|
|
|
|
static const char * const ctx_status_str[] = {
|
|
"invalid",
|
|
"valid",
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
"ctxsw_load",
|
|
"ctxsw_save",
|
|
"ctxsw_switch",
|
|
};
|
|
|
|
static inline void gk20a_debug_write_printk(void *ctx, const char *str,
|
|
size_t len)
|
|
{
|
|
pr_info("%s", str);
|
|
}
|
|
|
|
static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
|
|
size_t len)
|
|
{
|
|
seq_write((struct seq_file *)ctx, str, len);
|
|
}
|
|
|
|
void gk20a_debug_output(struct gk20a_debug_output *o,
|
|
const char *fmt, ...)
|
|
{
|
|
va_list args;
|
|
int len;
|
|
|
|
va_start(args, fmt);
|
|
len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
|
|
va_end(args);
|
|
o->fn(o->ctx, o->buf, len);
|
|
}
|
|
|
|
static void gk20a_debug_show_channel(struct gk20a *g,
|
|
struct gk20a_debug_output *o,
|
|
u32 hw_chid,
|
|
struct ch_state *ch_state)
|
|
{
|
|
u32 channel = gk20a_readl(g, ccsr_channel_r(hw_chid));
|
|
u32 status = ccsr_channel_status_v(channel);
|
|
u32 syncpointa, syncpointb;
|
|
u32 *inst_mem;
|
|
struct channel_gk20a *c = g->fifo.channel + hw_chid;
|
|
struct nvgpu_semaphore_int *hw_sema = NULL;
|
|
|
|
if (c->hw_sema)
|
|
hw_sema = c->hw_sema;
|
|
|
|
if (!ch_state)
|
|
return;
|
|
|
|
inst_mem = &ch_state->inst_block[0];
|
|
|
|
syncpointa = inst_mem[ram_fc_syncpointa_w()];
|
|
syncpointb = inst_mem[ram_fc_syncpointb_w()];
|
|
|
|
gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid,
|
|
dev_name(g->dev),
|
|
ch_state->pid,
|
|
ch_state->refs);
|
|
gk20a_debug_output(o, "%s in use %s %s\n",
|
|
ccsr_channel_enable_v(channel) ? "" : "not",
|
|
ccsr_chan_status_str[status],
|
|
ccsr_channel_busy_v(channel) ? "busy" : "not busy");
|
|
gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx "
|
|
"FETCH: %016llx\nHEADER: %08x COUNT: %08x\n"
|
|
"SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n",
|
|
(u64)inst_mem[ram_fc_pb_top_level_get_w()] +
|
|
((u64)inst_mem[ram_fc_pb_top_level_get_hi_w()] << 32ULL),
|
|
(u64)inst_mem[ram_fc_pb_put_w()] +
|
|
((u64)inst_mem[ram_fc_pb_put_hi_w()] << 32ULL),
|
|
(u64)inst_mem[ram_fc_pb_get_w()] +
|
|
((u64)inst_mem[ram_fc_pb_get_hi_w()] << 32ULL),
|
|
(u64)inst_mem[ram_fc_pb_fetch_w()] +
|
|
((u64)inst_mem[ram_fc_pb_fetch_hi_w()] << 32ULL),
|
|
inst_mem[ram_fc_pb_header_w()],
|
|
inst_mem[ram_fc_pb_count_w()],
|
|
syncpointa,
|
|
syncpointb,
|
|
inst_mem[ram_fc_semaphorea_w()],
|
|
inst_mem[ram_fc_semaphoreb_w()],
|
|
inst_mem[ram_fc_semaphorec_w()],
|
|
inst_mem[ram_fc_semaphored_w()]);
|
|
if (hw_sema)
|
|
gk20a_debug_output(o, "SEMA STATE: value: 0x%08x "
|
|
"next_val: 0x%08x addr: 0x%010llx\n",
|
|
readl(hw_sema->value),
|
|
atomic_read(&hw_sema->next_value),
|
|
nvgpu_hw_sema_addr(hw_sema));
|
|
|
|
#ifdef CONFIG_TEGRA_GK20A
|
|
if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v())
|
|
&& (pbdma_syncpointb_wait_switch_v(syncpointb) ==
|
|
pbdma_syncpointb_wait_switch_en_v()))
|
|
gk20a_debug_output(o, "%s on syncpt %u (%s) val %u\n",
|
|
(status == 3 || status == 8) ? "Waiting" : "Waited",
|
|
pbdma_syncpointb_syncpt_index_v(syncpointb),
|
|
nvhost_syncpt_get_name(g->host1x_dev,
|
|
pbdma_syncpointb_syncpt_index_v(syncpointb)),
|
|
pbdma_syncpointa_payload_v(syncpointa));
|
|
#endif
|
|
|
|
gk20a_debug_output(o, "\n");
|
|
}
|
|
|
|
void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
u32 chid;
|
|
unsigned int i;
|
|
u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
|
|
u32 host_num_engines = nvgpu_get_litter_value(g,
|
|
GPU_LIT_HOST_NUM_ENGINES);
|
|
|
|
struct ch_state **ch_state;
|
|
|
|
for (i = 0; i < host_num_pbdma; i++) {
|
|
u32 status = gk20a_readl(g, fifo_pbdma_status_r(i));
|
|
u32 chan_status = fifo_pbdma_status_chan_status_v(status);
|
|
|
|
gk20a_debug_output(o, "%s pbdma %d: ", dev_name(g->dev), i);
|
|
gk20a_debug_output(o,
|
|
"id: %d (%s), next_id: %d (%s) status: %s\n",
|
|
fifo_pbdma_status_id_v(status),
|
|
fifo_pbdma_status_id_type_v(status) ?
|
|
"tsg" : "channel",
|
|
fifo_pbdma_status_next_id_v(status),
|
|
fifo_pbdma_status_next_id_type_v(status) ?
|
|
"tsg" : "channel",
|
|
chan_status_str[chan_status]);
|
|
gk20a_debug_output(o, "PUT: %016llx GET: %016llx "
|
|
"FETCH: %08x HEADER: %08x\n",
|
|
(u64)gk20a_readl(g, pbdma_put_r(i)) +
|
|
((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL),
|
|
(u64)gk20a_readl(g, pbdma_get_r(i)) +
|
|
((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL),
|
|
gk20a_readl(g, pbdma_gp_fetch_r(i)),
|
|
gk20a_readl(g, pbdma_pb_header_r(i)));
|
|
}
|
|
gk20a_debug_output(o, "\n");
|
|
|
|
for (i = 0; i < host_num_engines; i++) {
|
|
u32 status = gk20a_readl(g, fifo_engine_status_r(i));
|
|
u32 ctx_status = fifo_engine_status_ctx_status_v(status);
|
|
|
|
gk20a_debug_output(o, "%s eng %d: ", dev_name(g->dev), i);
|
|
gk20a_debug_output(o,
|
|
"id: %d (%s), next_id: %d (%s), ctx: %s ",
|
|
fifo_engine_status_id_v(status),
|
|
fifo_engine_status_id_type_v(status) ?
|
|
"tsg" : "channel",
|
|
fifo_engine_status_next_id_v(status),
|
|
fifo_engine_status_next_id_type_v(status) ?
|
|
"tsg" : "channel",
|
|
ctx_status_str[ctx_status]);
|
|
|
|
if (fifo_engine_status_faulted_v(status))
|
|
gk20a_debug_output(o, "faulted ");
|
|
if (fifo_engine_status_engine_v(status))
|
|
gk20a_debug_output(o, "busy ");
|
|
gk20a_debug_output(o, "\n");
|
|
}
|
|
gk20a_debug_output(o, "\n");
|
|
|
|
ch_state = kzalloc(sizeof(*ch_state)
|
|
* f->num_channels, GFP_KERNEL);
|
|
if (!ch_state) {
|
|
gk20a_debug_output(o, "cannot alloc memory for channels\n");
|
|
return;
|
|
}
|
|
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
struct channel_gk20a *ch = &f->channel[chid];
|
|
if (gk20a_channel_get(ch)) {
|
|
ch_state[chid] =
|
|
kmalloc(sizeof(struct ch_state) +
|
|
ram_in_alloc_size_v(), GFP_KERNEL);
|
|
/* ref taken stays to below loop with
|
|
* successful allocs */
|
|
if (!ch_state[chid])
|
|
gk20a_channel_put(ch);
|
|
}
|
|
}
|
|
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
struct channel_gk20a *ch = &f->channel[chid];
|
|
if (!ch_state[chid])
|
|
continue;
|
|
|
|
ch_state[chid]->pid = ch->pid;
|
|
ch_state[chid]->refs = atomic_read(&ch->ref_count);
|
|
gk20a_mem_rd_n(g, &ch->inst_block, 0,
|
|
&ch_state[chid]->inst_block[0],
|
|
ram_in_alloc_size_v());
|
|
gk20a_channel_put(ch);
|
|
}
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
if (ch_state[chid]) {
|
|
gk20a_debug_show_channel(g, o, chid, ch_state[chid]);
|
|
kfree(ch_state[chid]);
|
|
}
|
|
}
|
|
kfree(ch_state);
|
|
}
|
|
|
|
static int gk20a_gr_dump_regs(struct device *dev,
|
|
struct gk20a_debug_output *o)
|
|
{
|
|
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
|
struct gk20a *g = platform->g;
|
|
|
|
if (g->ops.gr.dump_gr_regs)
|
|
gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_gr_debug_dump(struct device *dev)
|
|
{
|
|
struct gk20a_debug_output o = {
|
|
.fn = gk20a_debug_write_printk
|
|
};
|
|
|
|
gk20a_gr_dump_regs(dev, &o);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
|
|
{
|
|
struct device *dev = s->private;
|
|
struct gk20a_debug_output o = {
|
|
.fn = gk20a_debug_write_to_seqfile,
|
|
.ctx = s,
|
|
};
|
|
int err;
|
|
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
gk20a_err(dev, "failed to power on gpu: %d", err);
|
|
return -EINVAL;
|
|
}
|
|
|
|
gk20a_gr_dump_regs(dev, &o);
|
|
|
|
gk20a_idle(dev);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void gk20a_debug_dump(struct device *dev)
|
|
{
|
|
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
|
struct gk20a *g = platform->g;
|
|
struct gk20a_debug_output o = {
|
|
.fn = gk20a_debug_write_printk
|
|
};
|
|
|
|
if (platform->dump_platform_dependencies)
|
|
platform->dump_platform_dependencies(dev);
|
|
|
|
/* HAL only initialized after 1st power-on */
|
|
if (g->ops.debug.show_dump)
|
|
g->ops.debug.show_dump(g, &o);
|
|
}
|
|
|
|
static int gk20a_debug_show(struct seq_file *s, void *unused)
|
|
{
|
|
struct device *dev = s->private;
|
|
struct gk20a_debug_output o = {
|
|
.fn = gk20a_debug_write_to_seqfile,
|
|
.ctx = s,
|
|
};
|
|
struct gk20a *g;
|
|
int err;
|
|
|
|
g = gk20a_get_platform(dev)->g;
|
|
|
|
err = gk20a_busy(g->dev);
|
|
if (err) {
|
|
gk20a_err(g->dev, "failed to power on gpu: %d", err);
|
|
return -EFAULT;
|
|
}
|
|
|
|
/* HAL only initialized after 1st power-on */
|
|
if (g->ops.debug.show_dump)
|
|
g->ops.debug.show_dump(g, &o);
|
|
|
|
gk20a_idle(g->dev);
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
|
|
{
|
|
return single_open(file, gk20a_gr_debug_show, inode->i_private);
|
|
}
|
|
|
|
static int gk20a_debug_open(struct inode *inode, struct file *file)
|
|
{
|
|
return single_open(file, gk20a_debug_show, inode->i_private);
|
|
}
|
|
|
|
static const struct file_operations gk20a_gr_debug_fops = {
|
|
.open = gk20a_gr_debug_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = single_release,
|
|
};
|
|
|
|
static const struct file_operations gk20a_debug_fops = {
|
|
.open = gk20a_debug_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = single_release,
|
|
};
|
|
|
|
void gk20a_init_debug_ops(struct gpu_ops *gops)
|
|
{
|
|
gops->debug.show_dump = gk20a_debug_show_dump;
|
|
}
|
|
|
|
void gk20a_debug_init(struct device *dev, const char *debugfs_symlink)
|
|
{
|
|
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
|
#ifdef CONFIG_DEBUG_FS
|
|
struct gk20a *g = platform->g;
|
|
#endif
|
|
|
|
platform->debugfs = debugfs_create_dir(dev_name(dev), NULL);
|
|
if (!platform->debugfs)
|
|
return;
|
|
|
|
if (debugfs_symlink)
|
|
platform->debugfs_alias =
|
|
debugfs_create_symlink(debugfs_symlink,
|
|
NULL, dev_name(dev));
|
|
|
|
debugfs_create_file("status", S_IRUGO, platform->debugfs,
|
|
dev, &gk20a_debug_fops);
|
|
debugfs_create_file("gr_status", S_IRUGO, platform->debugfs,
|
|
dev, &gk20a_gr_debug_fops);
|
|
debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
|
|
platform->debugfs, &gk20a_debug_trace_cmdbuf);
|
|
|
|
debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
|
|
platform->debugfs, &platform->ch_wdt_timeout_ms);
|
|
|
|
debugfs_create_bool("disable_syncpoints", S_IRUGO|S_IWUSR,
|
|
platform->debugfs, &platform->disable_syncpoints);
|
|
|
|
#if defined(GK20A_DEBUG)
|
|
debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR,
|
|
platform->debugfs, &gk20a_dbg_mask);
|
|
debugfs_create_u32("dbg_ftrace", S_IRUGO|S_IWUSR,
|
|
platform->debugfs, &gk20a_dbg_ftrace);
|
|
#endif
|
|
|
|
#ifdef CONFIG_DEBUG_FS
|
|
nvgpu_spinlock_init(&g->debugfs_lock);
|
|
|
|
g->mm.ltc_enabled = true;
|
|
g->mm.ltc_enabled_debug = true;
|
|
|
|
g->debugfs_ltc_enabled =
|
|
debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&g->mm.ltc_enabled_debug);
|
|
|
|
g->debugfs_gr_idle_timeout_default =
|
|
debugfs_create_u32("gr_idle_timeout_default_us",
|
|
S_IRUGO|S_IWUSR, platform->debugfs,
|
|
&g->gr_idle_timeout_default);
|
|
g->debugfs_timeouts_enabled =
|
|
debugfs_create_bool("timeouts_enabled",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&g->timeouts_enabled);
|
|
|
|
g->debugfs_bypass_smmu =
|
|
debugfs_create_bool("bypass_smmu",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&g->mm.bypass_smmu);
|
|
g->debugfs_disable_bigpage =
|
|
debugfs_create_bool("disable_bigpage",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&g->mm.disable_bigpage);
|
|
|
|
g->debugfs_timeslice_low_priority_us =
|
|
debugfs_create_u32("timeslice_low_priority_us",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&g->timeslice_low_priority_us);
|
|
g->debugfs_timeslice_medium_priority_us =
|
|
debugfs_create_u32("timeslice_medium_priority_us",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&g->timeslice_medium_priority_us);
|
|
g->debugfs_timeslice_high_priority_us =
|
|
debugfs_create_u32("timeslice_high_priority_us",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&g->timeslice_high_priority_us);
|
|
g->debugfs_runlist_interleave =
|
|
debugfs_create_bool("runlist_interleave",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&g->runlist_interleave);
|
|
|
|
gr_gk20a_debugfs_init(g);
|
|
gk20a_pmu_debugfs_init(g->dev);
|
|
gk20a_railgating_debugfs_init(g->dev);
|
|
gk20a_cde_debugfs_init(g->dev);
|
|
gk20a_ce_debugfs_init(g->dev);
|
|
nvgpu_alloc_debugfs_init(g->dev);
|
|
gk20a_mm_debugfs_init(g->dev);
|
|
gk20a_fifo_debugfs_init(g->dev);
|
|
gk20a_sched_debugfs_init(g->dev);
|
|
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
|
nvgpu_kmem_debugfs_init(g->dev);
|
|
#endif
|
|
#endif
|
|
|
|
}
|