diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index e7ea3c5df..4b6a8e879 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu @@ -66,7 +66,6 @@ nvgpu-y := \ gk20a/fifo_gk20a.o \ gk20a/channel_gk20a.o \ gk20a/channel_sync_gk20a.o \ - gk20a/debug_gk20a.o \ gk20a/dbg_gpu_gk20a.o \ gk20a/regops_gk20a.o \ gk20a/gr_gk20a.o \ @@ -107,7 +106,6 @@ nvgpu-y := \ gm20b/mm_gm20b.o \ gm20b/regops_gm20b.o \ gm20b/mc_gm20b.o \ - gm20b/debug_gm20b.o \ gm20b/cde_gm20b.o \ gm20b/therm_gm20b.o \ gm206/bios_gm206.o \ @@ -117,6 +115,18 @@ nvgpu-y := \ boardobj/boardobjgrp_e255.o \ boardobj/boardobjgrp_e32.o +nvgpu-$(CONFIG_DEBUG_FS) += \ + common/linux/debug.o \ + common/linux/debug_gr.o \ + common/linux/debug_fifo.o \ + common/linux/debug_cde.o \ + common/linux/debug_ce.o \ + common/linux/debug_pmu.o \ + common/linux/debug_sched.o \ + common/linux/debug_mm.o \ + common/linux/debug_allocator.o \ + common/linux/debug_kmem.o + nvgpu-$(CONFIG_TEGRA_GK20A) += tegra/linux/platform_gk20a_tegra.o nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/common/linux/debug.c similarity index 73% rename from drivers/gpu/nvgpu/gk20a/debug_gk20a.c rename to drivers/gpu/nvgpu/common/linux/debug.c index ac435046e..2962a4673 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/common/linux/debug.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2017 NVIDIA Corporation. All rights reserved. + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -12,26 +12,23 @@ * */ -#ifdef CONFIG_DEBUG_FS -#include -#endif -#include -#include -#include +#include "debug_cde.h" +#include "debug_ce.h" +#include "debug_fifo.h" +#include "debug_gr.h" +#include "debug_mm.h" +#include "debug_allocator.h" +#include "debug_kmem.h" +#include "debug_pmu.h" +#include "debug_sched.h" -#include -#include -#include -#include - -#include "gk20a.h" +#include "gk20a/gk20a.h" #include "gk20a/platform_gk20a.h" -#include "debug_gk20a.h" -#include -#include -#include -#include +#include +#include + +#include unsigned int gk20a_debug_trace_cmdbuf; @@ -59,81 +56,22 @@ void gk20a_debug_output(struct gk20a_debug_output *o, o->fn(o->ctx, o->buf, len); } -static void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g, - struct gk20a_debug_output *o) -{ - struct fifo_gk20a *f = &g->fifo; - u32 chid; - struct ch_state **ch_state; - - ch_state = nvgpu_kzalloc(g, sizeof(*ch_state) * f->num_channels); - if (!ch_state) { - gk20a_debug_output(o, "cannot alloc memory for channels\n"); - return; - } - - for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - if (gk20a_channel_get(ch)) { - ch_state[chid] = - nvgpu_kmalloc(g, sizeof(struct ch_state) + - ram_in_alloc_size_v()); - /* ref taken stays to below loop with - * successful allocs */ - if (!ch_state[chid]) - gk20a_channel_put(ch); - } - } - - for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - if (!ch_state[chid]) - continue; - - ch_state[chid]->pid = ch->pid; - ch_state[chid]->refs = atomic_read(&ch->ref_count); - nvgpu_mem_rd_n(g, &ch->inst_block, 0, - &ch_state[chid]->inst_block[0], - ram_in_alloc_size_v()); - gk20a_channel_put(ch); - } - for (chid = 0; chid < f->num_channels; chid++) { - if (ch_state[chid]) { - g->ops.fifo.dump_channel_status_ramfc(g, o, chid, - ch_state[chid]); - nvgpu_kfree(g, ch_state[chid]); - } - } - nvgpu_kfree(g, ch_state); -} - -void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) -{ - g->ops.fifo.dump_pbdma_status(g, o); - g->ops.fifo.dump_eng_status(g, o); - - gk20a_debug_dump_all_channel_status_ramfc(g, o); -} - -static int gk20a_gr_dump_regs(struct device *dev, +static int gk20a_gr_dump_regs(struct gk20a *g, struct gk20a_debug_output *o) { - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a *g = platform->g; - if (g->ops.gr.dump_gr_regs) gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o)); return 0; } -int gk20a_gr_debug_dump(struct device *dev) +int gk20a_gr_debug_dump(struct gk20a *g) { struct gk20a_debug_output o = { .fn = gk20a_debug_write_printk }; - gk20a_gr_dump_regs(dev, &o); + gk20a_gr_dump_regs(g, &o); return 0; } @@ -154,23 +92,22 @@ static int gk20a_gr_debug_show(struct seq_file *s, void *unused) return -EINVAL; } - gk20a_gr_dump_regs(dev, &o); + gk20a_gr_dump_regs(g, &o); gk20a_idle(g); return 0; } -void gk20a_debug_dump(struct device *dev) +void gk20a_debug_dump(struct gk20a *g) { - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a *g = platform->g; + struct gk20a_platform *platform = gk20a_get_platform(g->dev); struct gk20a_debug_output o = { .fn = gk20a_debug_write_printk }; if (platform->dump_platform_dependencies) - platform->dump_platform_dependencies(dev); + platform->dump_platform_dependencies(g->dev); /* HAL only initialized after 1st power-on */ if (g->ops.debug.show_dump) @@ -227,22 +164,28 @@ static const struct file_operations gk20a_debug_fops = { .release = single_release, }; +void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) +{ + g->ops.fifo.dump_pbdma_status(g, o); + g->ops.fifo.dump_eng_status(g, o); + + gk20a_debug_dump_all_channel_status_ramfc(g, o); +} + void gk20a_init_debug_ops(struct gpu_ops *gops) { gops->debug.show_dump = gk20a_debug_show_dump; } -#ifdef CONFIG_DEBUG_FS static int railgate_residency_show(struct seq_file *s, void *data) { - struct device *dev = s->private; - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a *g = get_gk20a(dev); + struct gk20a *g = s->private; + struct gk20a_platform *platform = dev_get_drvdata(g->dev); unsigned long time_since_last_state_transition_ms; unsigned long total_rail_gate_time_ms; unsigned long total_rail_ungate_time_ms; - if (platform->is_railgated(dev)) { + if (platform->is_railgated(g->dev)) { time_since_last_state_transition_ms = jiffies_to_msecs(jiffies - g->pstats.last_rail_gate_complete); @@ -282,30 +225,27 @@ static const struct file_operations railgate_residency_fops = { .release = single_release, }; -int gk20a_railgating_debugfs_init(struct device *dev) +static int gk20a_railgating_debugfs_init(struct gk20a *g) { + struct gk20a_platform *platform = dev_get_drvdata(g->dev); struct dentry *d; - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a *g = get_gk20a(dev); if (!g->can_railgate) return 0; d = debugfs_create_file( - "railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, dev, + "railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, g, &railgate_residency_fops); if (!d) return -ENOMEM; return 0; } -#endif -void gk20a_debug_init(struct device *dev, const char *debugfs_symlink) +void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink) { -#ifdef CONFIG_DEBUG_FS + struct device *dev = g->dev; struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a *g = platform->g; platform->debugfs = debugfs_create_dir(dev_name(dev), NULL); if (!platform->debugfs) @@ -409,17 +349,28 @@ void gk20a_debug_init(struct device *dev, const char *debugfs_symlink) #endif gr_gk20a_debugfs_init(g); - gk20a_pmu_debugfs_init(g->dev); - gk20a_railgating_debugfs_init(g->dev); - gk20a_cde_debugfs_init(g->dev); - gk20a_ce_debugfs_init(g->dev); - nvgpu_alloc_debugfs_init(g->dev); - gk20a_mm_debugfs_init(g->dev); - gk20a_fifo_debugfs_init(g->dev); - gk20a_sched_debugfs_init(g->dev); + gk20a_pmu_debugfs_init(g); + gk20a_railgating_debugfs_init(g); + gk20a_cde_debugfs_init(g); + gk20a_ce_debugfs_init(g); + nvgpu_alloc_debugfs_init(g); + gk20a_mm_debugfs_init(g); + gk20a_fifo_debugfs_init(g); + gk20a_sched_debugfs_init(g); #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE - nvgpu_kmem_debugfs_init(g->dev); + nvgpu_kmem_debugfs_init(g); #endif -#endif - +} + +void gk20a_debug_deinit(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(g->dev); + + if (!platform->debugfs) + return; + + gk20a_fifo_debugfs_deinit(g); + + debugfs_remove_recursive(platform->debugfs); + debugfs_remove_recursive(platform->debugfs_alias); } diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.c b/drivers/gpu/nvgpu/common/linux/debug_allocator.c new file mode 100644 index 000000000..3d4a2bb23 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_allocator.h" +#include "gk20a/platform_gk20a.h" + +#include +#include + +#include + +u32 nvgpu_alloc_tracing_on; + +void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a, + struct seq_file *s, int lock) +{ + __a->ops->print_stats(__a, s, lock); +} + +static int __alloc_show(struct seq_file *s, void *unused) +{ + struct nvgpu_allocator *a = s->private; + + nvgpu_alloc_print_stats(a, s, 1); + + return 0; +} + +static int __alloc_open(struct inode *inode, struct file *file) +{ + return single_open(file, __alloc_show, inode->i_private); +} + +static const struct file_operations __alloc_fops = { + .open = __alloc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a) +{ + if (!g->debugfs_allocators) + return; + + a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, + g->debugfs_allocators, + a, &__alloc_fops); +} + +void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a) +{ + if (!IS_ERR_OR_NULL(a->debugfs_entry)) + debugfs_remove(a->debugfs_entry); +} + +void nvgpu_alloc_debugfs_init(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(g->dev); + + g->debugfs_allocators = debugfs_create_dir("allocators", platform->debugfs); + if (IS_ERR_OR_NULL(g->debugfs_allocators)) { + g->debugfs_allocators = NULL; + return; + } + + debugfs_create_u32("tracing", 0664, g->debugfs_allocators, + &nvgpu_alloc_tracing_on); +} diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.h b/drivers/gpu/nvgpu/common/linux/debug_allocator.h new file mode 100644 index 000000000..1b21cfc5d --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_ALLOCATOR_H__ +#define __NVGPU_DEBUG_ALLOCATOR_H__ + +struct gk20a; +void nvgpu_alloc_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c new file mode 100644 index 000000000..eb7c33e23 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_cde.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_cde.h" +#include "gk20a/platform_gk20a.h" + +#include + + +static ssize_t gk20a_cde_reload_write(struct file *file, + const char __user *userbuf, size_t count, loff_t *ppos) +{ + struct gk20a *g = file->private_data; + gk20a_cde_reload(g); + return count; +} + +static const struct file_operations gk20a_cde_reload_fops = { + .open = simple_open, + .write = gk20a_cde_reload_write, +}; + +void gk20a_cde_debugfs_init(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(g->dev); + + if (!platform->has_cde) + return; + + debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, + platform->debugfs, &g->cde_app.shader_parameter); + debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, + platform->debugfs, &g->cde_app.ctx_count); + debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, + platform->debugfs, &g->cde_app.ctx_usecount); + debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, + platform->debugfs, &g->cde_app.ctx_count_top); + debugfs_create_file("reload_cde_firmware", S_IWUSR, platform->debugfs, + g, &gk20a_cde_reload_fops); +} diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.h b/drivers/gpu/nvgpu/common/linux/debug_cde.h new file mode 100644 index 000000000..4895edd69 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_cde.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_CDE_H__ +#define __NVGPU_DEBUG_CDE_H__ + +struct gk20a; +void gk20a_cde_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_CDE_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.c b/drivers/gpu/nvgpu/common/linux/debug_ce.c new file mode 100644 index 000000000..9c50870eb --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_ce.c @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_ce.h" +#include "gk20a/platform_gk20a.h" + +#include + +void gk20a_ce_debugfs_init(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(g->dev); + + debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO, + platform->debugfs, &g->ce_app.ctx_count); + debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO, + platform->debugfs, &g->ce_app.app_state); + debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO, + platform->debugfs, &g->ce_app.next_ctx_id); +} diff --git a/drivers/gpu/nvgpu/gm20b/debug_gm20b.h b/drivers/gpu/nvgpu/common/linux/debug_ce.h similarity index 68% rename from drivers/gpu/nvgpu/gm20b/debug_gm20b.h rename to drivers/gpu/nvgpu/common/linux/debug_ce.h index c3c5fed6c..2a8750c4f 100644 --- a/drivers/gpu/nvgpu/gm20b/debug_gm20b.h +++ b/drivers/gpu/nvgpu/common/linux/debug_ce.h @@ -1,7 +1,5 @@ /* - * GM20B Debug functionality - * - * Copyright (C) 2015 NVIDIA CORPORATION. All rights reserved. + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -14,11 +12,10 @@ * */ -#ifndef _DEBUG_GM20B_H_ -#define _DEBUG_GM20B_H_ +#ifndef __NVGPU_DEBUG_CE_H__ +#define __NVGPU_DEBUG_CE_H__ -struct gpu_ops; +struct gk20a; +void gk20a_ce_debugfs_init(struct gk20a *g); -void gm20b_init_debug_ops(struct gpu_ops *gops); - -#endif +#endif /* __NVGPU_DEBUG_CE_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c new file mode 100644 index 000000000..6a28b1a52 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c @@ -0,0 +1,369 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_fifo.h" +#include "gk20a/platform_gk20a.h" + +#include +#include + +#include + +void __gk20a_fifo_profile_free(struct kref *ref); + +static void *gk20a_fifo_sched_debugfs_seq_start( + struct seq_file *s, loff_t *pos) +{ + struct gk20a *g = s->private; + struct fifo_gk20a *f = &g->fifo; + + if (*pos >= f->num_channels) + return NULL; + + return &f->channel[*pos]; +} + +static void *gk20a_fifo_sched_debugfs_seq_next( + struct seq_file *s, void *v, loff_t *pos) +{ + struct gk20a *g = s->private; + struct fifo_gk20a *f = &g->fifo; + + ++(*pos); + if (*pos >= f->num_channels) + return NULL; + + return &f->channel[*pos]; +} + +static void gk20a_fifo_sched_debugfs_seq_stop( + struct seq_file *s, void *v) +{ +} + +static int gk20a_fifo_sched_debugfs_seq_show( + struct seq_file *s, void *v) +{ + struct gk20a *g = s->private; + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = v; + struct tsg_gk20a *tsg = NULL; + + struct fifo_engine_info_gk20a *engine_info; + struct fifo_runlist_info_gk20a *runlist; + u32 runlist_id; + int ret = SEQ_SKIP; + u32 engine_id; + + engine_id = gk20a_fifo_get_gr_engine_id(g); + engine_info = (f->engine_info + engine_id); + runlist_id = engine_info->runlist_id; + runlist = &f->runlist_info[runlist_id]; + + if (ch == f->channel) { + seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n"); + seq_puts(s, " (usecs) (msecs)\n"); + ret = 0; + } + + if (!test_bit(ch->hw_chid, runlist->active_channels)) + return ret; + + if (gk20a_channel_get(ch)) { + if (gk20a_is_channel_marked_as_tsg(ch)) + tsg = &f->tsg[ch->tsgid]; + + seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n", + ch->hw_chid, + ch->tsgid, + ch->tgid, + tsg ? tsg->timeslice_us : ch->timeslice_us, + ch->timeout_ms_max, + tsg ? tsg->interleave_level : ch->interleave_level, + ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX, + ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX); + gk20a_channel_put(ch); + } + return 0; +} + +static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = { + .start = gk20a_fifo_sched_debugfs_seq_start, + .next = gk20a_fifo_sched_debugfs_seq_next, + .stop = gk20a_fifo_sched_debugfs_seq_stop, + .show = gk20a_fifo_sched_debugfs_seq_show +}; + +static int gk20a_fifo_sched_debugfs_open(struct inode *inode, + struct file *file) +{ + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops); + if (err) + return err; + + gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private); + + ((struct seq_file *)file->private_data)->private = inode->i_private; + return 0; +}; + +/* + * The file operations structure contains our open function along with + * set of the canned seq_ ops. + */ +static const struct file_operations gk20a_fifo_sched_debugfs_fops = { + .owner = THIS_MODULE, + .open = gk20a_fifo_sched_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +static int gk20a_fifo_profile_enable(void *data, u64 val) +{ + struct gk20a *g = (struct gk20a *) data; + struct fifo_gk20a *f = &g->fifo; + + + nvgpu_mutex_acquire(&f->profile.lock); + if (val == 0) { + if (f->profile.enabled) { + f->profile.enabled = false; + kref_put(&f->profile.ref, __gk20a_fifo_profile_free); + } + } else { + if (!f->profile.enabled) { + /* not kref init as it can have a running condition if + * we enable/disable/enable while kickoff is happening + */ + if (!kref_get_unless_zero(&f->profile.ref)) { + f->profile.data = vzalloc( + FIFO_PROFILING_ENTRIES * + sizeof(struct fifo_profile_gk20a)); + f->profile.sorted = vzalloc( + FIFO_PROFILING_ENTRIES * + sizeof(u64)); + if (!(f->profile.data && f->profile.sorted)) { + nvgpu_vfree(g, f->profile.data); + nvgpu_vfree(g, f->profile.sorted); + nvgpu_mutex_release(&f->profile.lock); + return -ENOMEM; + } + kref_init(&f->profile.ref); + } + atomic_set(&f->profile.get, 0); + f->profile.enabled = true; + } + } + nvgpu_mutex_release(&f->profile.lock); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE( + gk20a_fifo_profile_enable_debugfs_fops, + NULL, + gk20a_fifo_profile_enable, + "%llu\n" +); + +static int __profile_cmp(const void *a, const void *b) +{ + return *((unsigned long long *) a) - *((unsigned long long *) b); +} + +/* + * This uses about 800b in the stack, but the function using it is not part + * of a callstack where much memory is being used, so it is fine + */ +#define PERCENTILE_WIDTH 5 +#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) + +static unsigned int __gk20a_fifo_create_stats(struct gk20a *g, + u64 *percentiles, u32 index_end, u32 index_start) +{ + unsigned int nelem = 0; + unsigned int index; + struct fifo_profile_gk20a *profile; + + for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) { + profile = &g->fifo.profile.data[index]; + + if (profile->timestamp[index_end] > + profile->timestamp[index_start]) { + /* This is a valid element */ + g->fifo.profile.sorted[nelem] = + profile->timestamp[index_end] - + profile->timestamp[index_start]; + nelem++; + } + } + + /* sort it */ + sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long), + __profile_cmp, NULL); + + /* build ranges */ + for (index = 0; index < PERCENTILE_RANGES; index++) + percentiles[index] = + g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) * + nelem)/100 - 1]; + return nelem; +} + +static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused) +{ + struct gk20a *g = s->private; + unsigned int get, nelem, index; + /* + * 800B in the stack, but function is declared statically and only + * called from debugfs handler + */ + u64 percentiles_ioctl[PERCENTILE_RANGES]; + u64 percentiles_kickoff[PERCENTILE_RANGES]; + u64 percentiles_jobtracking[PERCENTILE_RANGES]; + u64 percentiles_append[PERCENTILE_RANGES]; + u64 percentiles_userd[PERCENTILE_RANGES]; + + if (!kref_get_unless_zero(&g->fifo.profile.ref)) { + seq_printf(s, "Profiling disabled\n"); + return 0; + } + + get = atomic_read(&g->fifo.profile.get); + + __gk20a_fifo_create_stats(g, percentiles_ioctl, + PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY); + __gk20a_fifo_create_stats(g, percentiles_kickoff, + PROFILE_END, PROFILE_ENTRY); + __gk20a_fifo_create_stats(g, percentiles_jobtracking, + PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY); + __gk20a_fifo_create_stats(g, percentiles_append, + PROFILE_APPEND, PROFILE_JOB_TRACKING); + nelem = __gk20a_fifo_create_stats(g, percentiles_userd, + PROFILE_END, PROFILE_APPEND); + + seq_printf(s, "Number of kickoffs: %d\n", nelem); + seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n"); + + for (index = 0; index < PERCENTILE_RANGES; index++) + seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n", + PERCENTILE_WIDTH * (index+1), + percentiles_ioctl[index], + percentiles_kickoff[index], + percentiles_append[index], + percentiles_jobtracking[index], + percentiles_userd[index]); + + kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); + + return 0; +} + +static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, gk20a_fifo_profile_stats, inode->i_private); +} + +static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = { + .open = gk20a_fifo_profile_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + +void gk20a_fifo_debugfs_init(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(g->dev); + + struct dentry *gpu_root = platform->debugfs; + struct dentry *fifo_root; + struct dentry *profile_root; + + fifo_root = debugfs_create_dir("fifo", gpu_root); + if (IS_ERR_OR_NULL(fifo_root)) + return; + + gk20a_dbg(gpu_dbg_info, "g=%p", g); + + debugfs_create_file("sched", 0600, fifo_root, g, + &gk20a_fifo_sched_debugfs_fops); + + profile_root = debugfs_create_dir("profile", fifo_root); + if (IS_ERR_OR_NULL(profile_root)) + return; + + nvgpu_mutex_init(&g->fifo.profile.lock); + g->fifo.profile.enabled = false; + atomic_set(&g->fifo.profile.get, 0); + atomic_set(&g->fifo.profile.ref.refcount, 0); + + debugfs_create_file("enable", 0600, profile_root, g, + &gk20a_fifo_profile_enable_debugfs_fops); + + debugfs_create_file("stats", 0600, profile_root, g, + &gk20a_fifo_profile_stats_debugfs_fops); + +} + +void __gk20a_fifo_profile_free(struct kref *ref) +{ + struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a, + profile.ref); + nvgpu_vfree(f->g, f->profile.data); + nvgpu_vfree(f->g, f->profile.sorted); +} + +/* Get the next element in the ring buffer of profile entries + * and grab a reference to the structure + */ +struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + struct fifo_profile_gk20a *profile; + unsigned int index; + + /* If kref is zero, profiling is not enabled */ + if (!kref_get_unless_zero(&f->profile.ref)) + return NULL; + index = atomic_inc_return(&f->profile.get); + profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES]; + + return profile; +} + +/* Free the reference to the structure. This allows deferred cleanups */ +void gk20a_fifo_profile_release(struct gk20a *g, + struct fifo_profile_gk20a *profile) +{ + kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); +} + +void gk20a_fifo_debugfs_deinit(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + + nvgpu_mutex_acquire(&f->profile.lock); + if (f->profile.enabled) { + f->profile.enabled = false; + kref_put(&f->profile.ref, __gk20a_fifo_profile_free); + } + nvgpu_mutex_release(&f->profile.lock); +} diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.h b/drivers/gpu/nvgpu/common/linux/debug_fifo.h new file mode 100644 index 000000000..46ac853e6 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_FIFO_H__ +#define __NVGPU_DEBUG_FIFO_H__ + +struct gk20a; +void gk20a_fifo_debugfs_init(struct gk20a *g); +void gk20a_fifo_debugfs_deinit(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_FIFO_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.c b/drivers/gpu/nvgpu/common/linux/debug_gr.c new file mode 100644 index 000000000..56b8612ee --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_gr.c @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_gr.h" +#include "gk20a/platform_gk20a.h" + +#include + +int gr_gk20a_debugfs_init(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(g->dev); + + g->debugfs_gr_default_attrib_cb_size = + debugfs_create_u32("gr_default_attrib_cb_size", + S_IRUGO|S_IWUSR, platform->debugfs, + &g->gr.attrib_cb_default_size); + + return 0; +} + diff --git a/drivers/gpu/nvgpu/gm20b/debug_gm20b.c b/drivers/gpu/nvgpu/common/linux/debug_gr.h similarity index 68% rename from drivers/gpu/nvgpu/gm20b/debug_gm20b.c rename to drivers/gpu/nvgpu/common/linux/debug_gr.h index b266200c0..4b46acbb1 100644 --- a/drivers/gpu/nvgpu/gm20b/debug_gm20b.c +++ b/drivers/gpu/nvgpu/common/linux/debug_gr.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 NVIDIA Corporation. All rights reserved. + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -12,10 +12,10 @@ * */ -#include "gk20a/gk20a.h" -#include "debug_gm20b.h" +#ifndef __NVGPU_DEBUG_GR_H__ +#define __NVGPU_DEBUG_GR_H__ -void gm20b_init_debug_ops(struct gpu_ops *gops) -{ - gops->debug.show_dump = gk20a_debug_show_dump; -} +struct gk20a; +int gr_gk20a_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_GR_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.c b/drivers/gpu/nvgpu/common/linux/debug_kmem.c new file mode 100644 index 000000000..2ee542a83 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.c @@ -0,0 +1,315 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_kmem.h" +#include "kmem_priv.h" +#include "gk20a/platform_gk20a.h" + +#include +#include + +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE +/** + * to_human_readable_bytes - Determine suffix for passed size. + * + * @bytes - Number of bytes to generate a suffix for. + * @hr_bytes [out] - The human readable number of bytes. + * @hr_suffix [out] - The suffix for the HR number of bytes. + * + * Computes a human readable decomposition of the passed number of bytes. The + * suffix for the bytes is passed back through the @hr_suffix pointer. The right + * number of bytes is then passed back in @hr_bytes. This returns the following + * ranges: + * + * 0 - 1023 B + * 1 - 1023 KB + * 1 - 1023 MB + * 1 - 1023 GB + * 1 - 1023 TB + * 1 - ... PB + */ +static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes, + const char **hr_suffix) +{ + static const char *suffixes[] = + { "B", "KB", "MB", "GB", "TB", "PB" }; + + u64 suffix_ind = 0; + + while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) { + bytes >>= 10; + suffix_ind++; + } + + /* + * Handle case where bytes > 1023PB. + */ + suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ? + suffix_ind : ARRAY_SIZE(suffixes) - 1; + + *hr_bytes = bytes; + *hr_suffix = suffixes[suffix_ind]; +} + +/** + * print_hr_bytes - Print human readable bytes + * + * @s - A seq_file to print to. May be NULL. + * @msg - A message to print before the bytes. + * @bytes - Number of bytes. + * + * Print @msg followed by the human readable decomposition of the passed number + * of bytes. + * + * If @s is NULL then this prints will be made to the kernel log. + */ +static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes) +{ + u64 hr_bytes; + const char *hr_suffix; + + __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix); + __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix); +} + +/** + * print_histogram - Build a histogram of the memory usage. + * + * @tracker The tracking to pull data from. + * @s A seq_file to dump info into. + */ +static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker, + struct seq_file *s) +{ + int i; + u64 pot_min, pot_max; + u64 nr_buckets; + unsigned int *buckets; + unsigned int total_allocs; + struct nvgpu_rbtree_node *node; + static const char histogram_line[] = + "++++++++++++++++++++++++++++++++++++++++"; + + /* + * pot_min is essentially a round down to the nearest power of 2. This + * is the start of the histogram. pot_max is just a round up to the + * nearest power of two. Each histogram bucket is one power of two so + * the histogram buckets are exponential. + */ + pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc); + pot_max = (u64)roundup_pow_of_two(tracker->max_alloc); + + nr_buckets = __ffs(pot_max) - __ffs(pot_min); + + buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL); + if (!buckets) { + __pstat(s, "OOM: could not allocate bucket storage!?\n"); + return; + } + + /* + * Iterate across all of the allocs and determine what bucket they + * should go in. Round the size down to the nearest power of two to + * find the right bucket. + */ + nvgpu_rbtree_enum_start(0, &node, tracker->allocs); + while (node) { + int b; + u64 bucket_min; + struct nvgpu_mem_alloc *alloc = + nvgpu_mem_alloc_from_rbtree_node(node); + + bucket_min = (u64)rounddown_pow_of_two(alloc->size); + if (bucket_min < tracker->min_alloc) + bucket_min = tracker->min_alloc; + + b = __ffs(bucket_min) - __ffs(pot_min); + + /* + * Handle the one case were there's an alloc exactly as big as + * the maximum bucket size of the largest bucket. Most of the + * buckets have an inclusive minimum and exclusive maximum. But + * the largest bucket needs to have an _inclusive_ maximum as + * well. + */ + if (b == (int)nr_buckets) + b--; + + buckets[b]++; + + nvgpu_rbtree_enum_next(&node, node); + } + + total_allocs = 0; + for (i = 0; i < (int)nr_buckets; i++) + total_allocs += buckets[i]; + + __pstat(s, "Alloc histogram:\n"); + + /* + * Actually compute the histogram lines. + */ + for (i = 0; i < (int)nr_buckets; i++) { + char this_line[sizeof(histogram_line) + 1]; + u64 line_length; + u64 hr_bytes; + const char *hr_suffix; + + memset(this_line, 0, sizeof(this_line)); + + /* + * Compute the normalized line length. Cant use floating point + * so we will just multiply everything by 1000 and use fixed + * point. + */ + line_length = (1000 * buckets[i]) / total_allocs; + line_length *= sizeof(histogram_line); + line_length /= 1000; + + memset(this_line, '+', line_length); + + __to_human_readable_bytes(1 << (__ffs(pot_min) + i), + &hr_bytes, &hr_suffix); + __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n", + hr_bytes, hr_bytes << 1, + hr_suffix, buckets[i], this_line); + } +} + +/** + * nvgpu_kmem_print_stats - Print kmem tracking stats. + * + * @tracker The tracking to pull data from. + * @s A seq_file to dump info into. + * + * Print stats from a tracker. If @s is non-null then seq_printf() will be + * used with @s. Otherwise the stats are pr_info()ed. + */ +void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker, + struct seq_file *s) +{ + nvgpu_lock_tracker(tracker); + + __pstat(s, "Mem tracker: %s\n\n", tracker->name); + + __pstat(s, "Basic Stats:\n"); + __pstat(s, " Number of allocs %lld\n", + tracker->nr_allocs); + __pstat(s, " Number of frees %lld\n", + tracker->nr_frees); + print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc); + print_hr_bytes(s, " Largest alloc ", tracker->max_alloc); + print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced); + print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed); + print_hr_bytes(s, " Bytes allocated (real) ", + tracker->bytes_alloced_real); + print_hr_bytes(s, " Bytes freed (real) ", + tracker->bytes_freed_real); + __pstat(s, "\n"); + + print_histogram(tracker, s); + + nvgpu_unlock_tracker(tracker); +} + +static int __kmem_tracking_show(struct seq_file *s, void *unused) +{ + struct nvgpu_mem_alloc_tracker *tracker = s->private; + + nvgpu_kmem_print_stats(tracker, s); + + return 0; +} + +static int __kmem_tracking_open(struct inode *inode, struct file *file) +{ + return single_open(file, __kmem_tracking_show, inode->i_private); +} + +static const struct file_operations __kmem_tracking_fops = { + .open = __kmem_tracking_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __kmem_traces_dump_tracker(struct gk20a *g, + struct nvgpu_mem_alloc_tracker *tracker, + struct seq_file *s) +{ + struct nvgpu_rbtree_node *node; + + nvgpu_rbtree_enum_start(0, &node, tracker->allocs); + while (node) { + struct nvgpu_mem_alloc *alloc = + nvgpu_mem_alloc_from_rbtree_node(node); + + kmem_print_mem_alloc(g, alloc, s); + + nvgpu_rbtree_enum_next(&node, node); + } + + return 0; +} + +static int __kmem_traces_show(struct seq_file *s, void *unused) +{ + struct gk20a *g = s->private; + + nvgpu_lock_tracker(g->vmallocs); + seq_puts(s, "Oustanding vmallocs:\n"); + __kmem_traces_dump_tracker(g, g->vmallocs, s); + seq_puts(s, "\n"); + nvgpu_unlock_tracker(g->vmallocs); + + nvgpu_lock_tracker(g->kmallocs); + seq_puts(s, "Oustanding kmallocs:\n"); + __kmem_traces_dump_tracker(g, g->kmallocs, s); + nvgpu_unlock_tracker(g->kmallocs); + + return 0; +} + +static int __kmem_traces_open(struct inode *inode, struct file *file) +{ + return single_open(file, __kmem_traces_show, inode->i_private); +} + +static const struct file_operations __kmem_traces_fops = { + .open = __kmem_traces_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void nvgpu_kmem_debugfs_init(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(g->dev); + struct dentry *node; + + g->debugfs_kmem = debugfs_create_dir("kmem_tracking", platform->debugfs); + if (IS_ERR_OR_NULL(g->debugfs_kmem)) + return; + + node = debugfs_create_file(g->vmallocs->name, S_IRUGO, + g->debugfs_kmem, + g->vmallocs, &__kmem_tracking_fops); + node = debugfs_create_file(g->kmallocs->name, S_IRUGO, + g->debugfs_kmem, + g->kmallocs, &__kmem_tracking_fops); + node = debugfs_create_file("traces", S_IRUGO, + g->debugfs_kmem, + g, &__kmem_traces_fops); +} +#endif diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.h b/drivers/gpu/nvgpu/common/linux/debug_kmem.h new file mode 100644 index 000000000..44322b533 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_KMEM_H__ +#define __NVGPU_DEBUG_KMEM_H__ + +struct gk20a; +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE +void nvgpu_kmem_debugfs_init(struct gk20a *g); +#endif + +#endif /* __NVGPU_DEBUG_KMEM_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_mm.c b/drivers/gpu/nvgpu/common/linux/debug_mm.c new file mode 100644 index 000000000..1e260f898 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_mm.c @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_mm.h" +#include "gk20a/platform_gk20a.h" + +#include + +void gk20a_mm_debugfs_init(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(g->dev); + + debugfs_create_bool("force_pramin", 0664, platform->debugfs, + &g->mm.force_pramin); +} diff --git a/drivers/gpu/nvgpu/common/linux/debug_mm.h b/drivers/gpu/nvgpu/common/linux/debug_mm.h new file mode 100644 index 000000000..bf7bc9851 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_mm.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_MM_H__ +#define __NVGPU_DEBUG_MM_H__ + +struct gk20a; +void gk20a_mm_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_MM_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c new file mode 100644 index 000000000..f19f51398 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.c @@ -0,0 +1,479 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_pmu.h" +#include "gk20a/platform_gk20a.h" + +#include +#include +#include + +static int lpwr_debug_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + + if (g->ops.pmu.pmu_pg_engines_feature_list && + g->ops.pmu.pmu_pg_engines_feature_list(g, + PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != + PMU_PG_FEATURE_GR_POWER_GATING_ENABLED) { + seq_printf(s, "PSTATE: %u\n" + "RPPG Enabled: %u\n" + "RPPG ref count: %u\n" + "RPPG state: %u\n" + "MSCG Enabled: %u\n" + "MSCG pstate state: %u\n" + "MSCG transition state: %u\n", + g->ops.clk_arb.get_current_pstate(g), + g->elpg_enabled, g->pmu.elpg_refcnt, + g->pmu.elpg_stat, g->mscg_enabled, + g->pmu.mscg_stat, g->pmu.mscg_transition_state); + + } else + seq_printf(s, "ELPG Enabled: %u\n" + "ELPG ref count: %u\n" + "ELPG state: %u\n", + g->elpg_enabled, g->pmu.elpg_refcnt, + g->pmu.elpg_stat); + + return 0; + +} + +static int lpwr_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, lpwr_debug_show, inode->i_private); +} + +static const struct file_operations lpwr_debug_fops = { + .open = lpwr_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int mscg_stat_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + u64 total_ingating, total_ungating, residency, divisor, dividend; + struct pmu_pg_stats_data pg_stat_data = { 0 }; + int err; + + /* Don't unnecessarily power on the device */ + if (g->power_on) { + err = gk20a_busy(g); + if (err) + return err; + + gk20a_pmu_get_pg_stats(g, + PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); + gk20a_idle(g); + } + total_ingating = g->pg_ingating_time_us + + (u64)pg_stat_data.ingating_time; + total_ungating = g->pg_ungating_time_us + + (u64)pg_stat_data.ungating_time; + + divisor = total_ingating + total_ungating; + + /* We compute the residency on a scale of 1000 */ + dividend = total_ingating * 1000; + + if (divisor) + residency = div64_u64(dividend, divisor); + else + residency = 0; + + seq_printf(s, + "Time in MSCG: %llu us\n" + "Time out of MSCG: %llu us\n" + "MSCG residency ratio: %llu\n" + "MSCG Entry Count: %u\n" + "MSCG Avg Entry latency %u\n" + "MSCG Avg Exit latency %u\n", + total_ingating, total_ungating, + residency, pg_stat_data.gating_cnt, + pg_stat_data.avg_entry_latency_us, + pg_stat_data.avg_exit_latency_us); + return 0; + +} + +static int mscg_stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, mscg_stat_show, inode->i_private); +} + +static const struct file_operations mscg_stat_fops = { + .open = mscg_stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int mscg_transitions_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + struct pmu_pg_stats_data pg_stat_data = { 0 }; + u32 total_gating_cnt; + int err; + + if (g->power_on) { + err = gk20a_busy(g); + if (err) + return err; + + gk20a_pmu_get_pg_stats(g, + PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); + gk20a_idle(g); + } + total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; + + seq_printf(s, "%u\n", total_gating_cnt); + return 0; + +} + +static int mscg_transitions_open(struct inode *inode, struct file *file) +{ + return single_open(file, mscg_transitions_show, inode->i_private); +} + +static const struct file_operations mscg_transitions_fops = { + .open = mscg_transitions_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int elpg_stat_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + struct pmu_pg_stats_data pg_stat_data = { 0 }; + u64 total_ingating, total_ungating, residency, divisor, dividend; + int err; + + /* Don't unnecessarily power on the device */ + if (g->power_on) { + err = gk20a_busy(g); + if (err) + return err; + + gk20a_pmu_get_pg_stats(g, + PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); + gk20a_idle(g); + } + total_ingating = g->pg_ingating_time_us + + (u64)pg_stat_data.ingating_time; + total_ungating = g->pg_ungating_time_us + + (u64)pg_stat_data.ungating_time; + divisor = total_ingating + total_ungating; + + /* We compute the residency on a scale of 1000 */ + dividend = total_ingating * 1000; + + if (divisor) + residency = div64_u64(dividend, divisor); + else + residency = 0; + + seq_printf(s, + "Time in ELPG: %llu us\n" + "Time out of ELPG: %llu us\n" + "ELPG residency ratio: %llu\n" + "ELPG Entry Count: %u\n" + "ELPG Avg Entry latency %u us\n" + "ELPG Avg Exit latency %u us\n", + total_ingating, total_ungating, + residency, pg_stat_data.gating_cnt, + pg_stat_data.avg_entry_latency_us, + pg_stat_data.avg_exit_latency_us); + return 0; + +} + +static int elpg_stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, elpg_stat_show, inode->i_private); +} + +static const struct file_operations elpg_stat_fops = { + .open = elpg_stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int elpg_transitions_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + struct pmu_pg_stats_data pg_stat_data = { 0 }; + u32 total_gating_cnt; + int err; + + if (g->power_on) { + err = gk20a_busy(g); + if (err) + return err; + + gk20a_pmu_get_pg_stats(g, + PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); + gk20a_idle(g); + } + total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; + + seq_printf(s, "%u\n", total_gating_cnt); + return 0; + +} + +static int elpg_transitions_open(struct inode *inode, struct file *file) +{ + return single_open(file, elpg_transitions_show, inode->i_private); +} + +static const struct file_operations elpg_transitions_fops = { + .open = elpg_transitions_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int falc_trace_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + struct pmu_gk20a *pmu = &g->pmu; + u32 i = 0, j = 0, k, l, m; + char part_str[40]; + void *tracebuffer; + char *trace; + u32 *trace1; + + /* allocate system memory to copy pmu trace buffer */ + tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE); + if (tracebuffer == NULL) + return -ENOMEM; + + /* read pmu traces into system memory buffer */ + nvgpu_mem_rd_n(g, &pmu->trace_buf, + 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE); + + trace = (char *)tracebuffer; + trace1 = (u32 *)tracebuffer; + + for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { + for (j = 0; j < 0x40; j++) + if (trace1[(i / 4) + j]) + break; + if (j == 0x40) + break; + seq_printf(s, "Index %x: ", trace1[(i / 4)]); + l = 0; + m = 0; + while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) { + if (k >= 40) + break; + strncpy(part_str, (trace+i+20+m), k); + part_str[k] = 0; + seq_printf(s, "%s0x%x", part_str, + trace1[(i / 4) + 1 + l]); + l++; + m += k + 2; + } + seq_printf(s, "%s", (trace+i+20+m)); + } + + nvgpu_kfree(g, tracebuffer); + return 0; +} + +static int falc_trace_open(struct inode *inode, struct file *file) +{ + return single_open(file, falc_trace_show, inode->i_private); +} + +static const struct file_operations falc_trace_fops = { + .open = falc_trace_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int perfmon_events_enable_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + + seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0); + return 0; + +} + +static int perfmon_events_enable_open(struct inode *inode, struct file *file) +{ + return single_open(file, perfmon_events_enable_show, inode->i_private); +} + +static ssize_t perfmon_events_enable_write(struct file *file, + const char __user *userbuf, size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct gk20a *g = s->private; + unsigned long val = 0; + char buf[40]; + int buf_size; + int err; + + memset(buf, 0, sizeof(buf)); + buf_size = min(count, (sizeof(buf)-1)); + + if (copy_from_user(buf, userbuf, buf_size)) + return -EFAULT; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + /* Don't turn on gk20a unnecessarily */ + if (g->power_on) { + err = gk20a_busy(g); + if (err) + return err; + + if (val && !g->pmu.perfmon_sampling_enabled) { + g->pmu.perfmon_sampling_enabled = true; + nvgpu_pmu_perfmon_start_sampling(&(g->pmu)); + } else if (!val && g->pmu.perfmon_sampling_enabled) { + g->pmu.perfmon_sampling_enabled = false; + nvgpu_pmu_perfmon_stop_sampling(&(g->pmu)); + } + gk20a_idle(g); + } else { + g->pmu.perfmon_sampling_enabled = val ? true : false; + } + + return count; +} + +static const struct file_operations perfmon_events_enable_fops = { + .open = perfmon_events_enable_open, + .read = seq_read, + .write = perfmon_events_enable_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int perfmon_events_count_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + + seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt); + return 0; + +} + +static int perfmon_events_count_open(struct inode *inode, struct file *file) +{ + return single_open(file, perfmon_events_count_show, inode->i_private); +} + +static const struct file_operations perfmon_events_count_fops = { + .open = perfmon_events_count_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int security_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + + seq_printf(s, "%d\n", g->pmu.pmu_mode); + return 0; + +} + +static int security_open(struct inode *inode, struct file *file) +{ + return single_open(file, security_show, inode->i_private); +} + +static const struct file_operations security_fops = { + .open = security_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int gk20a_pmu_debugfs_init(struct gk20a *g) +{ + struct dentry *d; + struct gk20a_platform *platform = dev_get_drvdata(g->dev); + + d = debugfs_create_file( + "lpwr_debug", S_IRUGO|S_IWUSR, platform->debugfs, g, + &lpwr_debug_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "mscg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g, + &mscg_stat_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "mscg_transitions", S_IRUGO, platform->debugfs, g, + &mscg_transitions_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g, + &elpg_stat_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "elpg_transitions", S_IRUGO, platform->debugfs, g, + &elpg_transitions_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "falc_trace", S_IRUGO, platform->debugfs, g, + &falc_trace_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "perfmon_events_enable", S_IRUGO, platform->debugfs, g, + &perfmon_events_enable_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "perfmon_events_count", S_IRUGO, platform->debugfs, g, + &perfmon_events_count_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "pmu_security", S_IRUGO, platform->debugfs, g, + &security_fops); + if (!d) + goto err_out; + return 0; +err_out: + pr_err("%s: Failed to make debugfs node\n", __func__); + debugfs_remove_recursive(platform->debugfs); + return -ENOMEM; +} diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.h b/drivers/gpu/nvgpu/common/linux/debug_pmu.h new file mode 100644 index 000000000..c4e3243d8 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_PMU_H__ +#define __NVGPU_DEBUG_PMU_H__ + +struct gk20a; +int gk20a_pmu_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_PMU_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.c b/drivers/gpu/nvgpu/common/linux/debug_sched.c new file mode 100644 index 000000000..40b93149c --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_sched.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_sched.h" +#include "gk20a/platform_gk20a.h" + +#include +#include + +static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused) +{ + struct gk20a *g = s->private; + struct gk20a_sched_ctrl *sched = &g->sched_ctrl; + bool sched_busy = true; + + int n = sched->bitmap_size / sizeof(u64); + int i; + int err; + + err = gk20a_busy(g); + if (err) + return err; + + if (nvgpu_mutex_tryacquire(&sched->busy_lock)) { + sched_busy = false; + nvgpu_mutex_release(&sched->busy_lock); + } + + seq_printf(s, "control_locked=%d\n", sched->control_locked); + seq_printf(s, "busy=%d\n", sched_busy); + seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size); + + nvgpu_mutex_acquire(&sched->status_lock); + + seq_puts(s, "active_tsg_bitmap\n"); + for (i = 0; i < n; i++) + seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]); + + seq_puts(s, "recent_tsg_bitmap\n"); + for (i = 0; i < n; i++) + seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]); + + nvgpu_mutex_release(&sched->status_lock); + + gk20a_idle(g); + + return 0; +} + +static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, gk20a_sched_debugfs_show, inode->i_private); +} + +static const struct file_operations gk20a_sched_debugfs_fops = { + .open = gk20a_sched_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void gk20a_sched_debugfs_init(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(g->dev); + + debugfs_create_file("sched_ctrl", S_IRUGO, platform->debugfs, + g, &gk20a_sched_debugfs_fops); +} diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.h b/drivers/gpu/nvgpu/common/linux/debug_sched.h new file mode 100644 index 000000000..34a8f55f4 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_sched.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_SCHED_H__ +#define __NVGPU_DEBUG_SCHED_H__ + +struct gk20a; +void gk20a_sched_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_SCHED_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c index 80e7698b7..f85016d4c 100644 --- a/drivers/gpu/nvgpu/common/linux/driver_common.c +++ b/drivers/gpu/nvgpu/common/linux/driver_common.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "gk20a/gk20a_scale.h" #include "gk20a/gk20a.h" @@ -182,7 +183,7 @@ int nvgpu_probe(struct gk20a *g, nvgpu_init_mm_vars(g); gk20a_create_sysfs(g->dev); - gk20a_debug_init(g->dev, debugfs_symlink); + gk20a_debug_init(g, debugfs_symlink); g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); if (!g->dbg_regops_tmp_buf) { diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c index 2502ff30b..d81328f06 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c @@ -26,9 +26,9 @@ #include #include #include +#include #include "gk20a/gk20a.h" -#include "gk20a/debug_gk20a.h" #include "gk20a/ctxsw_trace_gk20a.h" #include "gk20a/dbg_gpu_gk20a.h" #include "gk20a/fence_gk20a.h" diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c index d058eba5e..41aaa7294 100644 --- a/drivers/gpu/nvgpu/common/linux/kmem.c +++ b/drivers/gpu/nvgpu/common/linux/kmem.c @@ -134,19 +134,19 @@ void __nvgpu_vfree(struct gk20a *g, void *addr) #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE -static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) +void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) { nvgpu_mutex_acquire(&tracker->lock); } -static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) +void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) { nvgpu_mutex_release(&tracker->lock); } -static void kmem_print_mem_alloc(struct gk20a *g, - struct nvgpu_mem_alloc *alloc, - struct seq_file *s) +void kmem_print_mem_alloc(struct gk20a *g, + struct nvgpu_mem_alloc *alloc, + struct seq_file *s) { #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES int i; @@ -231,7 +231,7 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, alloc->stack_length = stack_trace.nr_entries; #endif - lock_tracker(tracker); + nvgpu_lock_tracker(tracker); tracker->bytes_alloced += size; tracker->bytes_alloced_real += real_size; tracker->nr_allocs++; @@ -246,10 +246,10 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, if (ret) { WARN(1, "Duplicate alloc??? 0x%llx\n", addr); kfree(alloc); - unlock_tracker(tracker); + nvgpu_unlock_tracker(tracker); return ret; } - unlock_tracker(tracker); + nvgpu_unlock_tracker(tracker); return 0; } @@ -259,17 +259,17 @@ static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, { struct nvgpu_mem_alloc *alloc; - lock_tracker(tracker); + nvgpu_lock_tracker(tracker); alloc = nvgpu_rem_alloc(tracker, addr); if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { - unlock_tracker(tracker); + nvgpu_unlock_tracker(tracker); return -EINVAL; } tracker->nr_frees++; tracker->bytes_freed += alloc->size; tracker->bytes_freed_real += alloc->real_size; - unlock_tracker(tracker); + nvgpu_unlock_tracker(tracker); return 0; } @@ -407,307 +407,6 @@ void __nvgpu_track_kfree(struct gk20a *g, void *addr) __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); } -/** - * to_human_readable_bytes - Determine suffix for passed size. - * - * @bytes - Number of bytes to generate a suffix for. - * @hr_bytes [out] - The human readable number of bytes. - * @hr_suffix [out] - The suffix for the HR number of bytes. - * - * Computes a human readable decomposition of the passed number of bytes. The - * suffix for the bytes is passed back through the @hr_suffix pointer. The right - * number of bytes is then passed back in @hr_bytes. This returns the following - * ranges: - * - * 0 - 1023 B - * 1 - 1023 KB - * 1 - 1023 MB - * 1 - 1023 GB - * 1 - 1023 TB - * 1 - ... PB - */ -static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes, - const char **hr_suffix) -{ - static const char *suffixes[] = - { "B", "KB", "MB", "GB", "TB", "PB" }; - - u64 suffix_ind = 0; - - while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) { - bytes >>= 10; - suffix_ind++; - } - - /* - * Handle case where bytes > 1023PB. - */ - suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ? - suffix_ind : ARRAY_SIZE(suffixes) - 1; - - *hr_bytes = bytes; - *hr_suffix = suffixes[suffix_ind]; -} - -/** - * print_hr_bytes - Print human readable bytes - * - * @s - A seq_file to print to. May be NULL. - * @msg - A message to print before the bytes. - * @bytes - Number of bytes. - * - * Print @msg followed by the human readable decomposition of the passed number - * of bytes. - * - * If @s is NULL then this prints will be made to the kernel log. - */ -static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes) -{ - u64 hr_bytes; - const char *hr_suffix; - - __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix); - __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix); -} - -/** - * print_histogram - Build a histogram of the memory usage. - * - * @tracker The tracking to pull data from. - * @s A seq_file to dump info into. - */ -static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker, - struct seq_file *s) -{ - int i; - u64 pot_min, pot_max; - u64 nr_buckets; - unsigned int *buckets; - unsigned int total_allocs; - struct nvgpu_rbtree_node *node; - static const char histogram_line[] = - "++++++++++++++++++++++++++++++++++++++++"; - - /* - * pot_min is essentially a round down to the nearest power of 2. This - * is the start of the histogram. pot_max is just a round up to the - * nearest power of two. Each histogram bucket is one power of two so - * the histogram buckets are exponential. - */ - pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc); - pot_max = (u64)roundup_pow_of_two(tracker->max_alloc); - - nr_buckets = __ffs(pot_max) - __ffs(pot_min); - - buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL); - if (!buckets) { - __pstat(s, "OOM: could not allocate bucket storage!?\n"); - return; - } - - /* - * Iterate across all of the allocs and determine what bucket they - * should go in. Round the size down to the nearest power of two to - * find the right bucket. - */ - nvgpu_rbtree_enum_start(0, &node, tracker->allocs); - while (node) { - int b; - u64 bucket_min; - struct nvgpu_mem_alloc *alloc = - nvgpu_mem_alloc_from_rbtree_node(node); - - bucket_min = (u64)rounddown_pow_of_two(alloc->size); - if (bucket_min < tracker->min_alloc) - bucket_min = tracker->min_alloc; - - b = __ffs(bucket_min) - __ffs(pot_min); - - /* - * Handle the one case were there's an alloc exactly as big as - * the maximum bucket size of the largest bucket. Most of the - * buckets have an inclusive minimum and exclusive maximum. But - * the largest bucket needs to have an _inclusive_ maximum as - * well. - */ - if (b == (int)nr_buckets) - b--; - - buckets[b]++; - - nvgpu_rbtree_enum_next(&node, node); - } - - total_allocs = 0; - for (i = 0; i < (int)nr_buckets; i++) - total_allocs += buckets[i]; - - __pstat(s, "Alloc histogram:\n"); - - /* - * Actually compute the histogram lines. - */ - for (i = 0; i < (int)nr_buckets; i++) { - char this_line[sizeof(histogram_line) + 1]; - u64 line_length; - u64 hr_bytes; - const char *hr_suffix; - - memset(this_line, 0, sizeof(this_line)); - - /* - * Compute the normalized line length. Cant use floating point - * so we will just multiply everything by 1000 and use fixed - * point. - */ - line_length = (1000 * buckets[i]) / total_allocs; - line_length *= sizeof(histogram_line); - line_length /= 1000; - - memset(this_line, '+', line_length); - - __to_human_readable_bytes(1 << (__ffs(pot_min) + i), - &hr_bytes, &hr_suffix); - __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n", - hr_bytes, hr_bytes << 1, - hr_suffix, buckets[i], this_line); - } -} - -#ifdef CONFIG_DEBUG_FS -/** - * nvgpu_kmem_print_stats - Print kmem tracking stats. - * - * @tracker The tracking to pull data from. - * @s A seq_file to dump info into. - * - * Print stats from a tracker. If @s is non-null then seq_printf() will be - * used with @s. Otherwise the stats are pr_info()ed. - */ -void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker, - struct seq_file *s) -{ - lock_tracker(tracker); - - __pstat(s, "Mem tracker: %s\n\n", tracker->name); - - __pstat(s, "Basic Stats:\n"); - __pstat(s, " Number of allocs %lld\n", - tracker->nr_allocs); - __pstat(s, " Number of frees %lld\n", - tracker->nr_frees); - print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc); - print_hr_bytes(s, " Largest alloc ", tracker->max_alloc); - print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced); - print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed); - print_hr_bytes(s, " Bytes allocated (real) ", - tracker->bytes_alloced_real); - print_hr_bytes(s, " Bytes freed (real) ", - tracker->bytes_freed_real); - __pstat(s, "\n"); - - print_histogram(tracker, s); - - unlock_tracker(tracker); -} - -static int __kmem_tracking_show(struct seq_file *s, void *unused) -{ - struct nvgpu_mem_alloc_tracker *tracker = s->private; - - nvgpu_kmem_print_stats(tracker, s); - - return 0; -} - -static int __kmem_tracking_open(struct inode *inode, struct file *file) -{ - return single_open(file, __kmem_tracking_show, inode->i_private); -} - -static const struct file_operations __kmem_tracking_fops = { - .open = __kmem_tracking_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __kmem_traces_dump_tracker(struct gk20a *g, - struct nvgpu_mem_alloc_tracker *tracker, - struct seq_file *s) -{ - struct nvgpu_rbtree_node *node; - - nvgpu_rbtree_enum_start(0, &node, tracker->allocs); - while (node) { - struct nvgpu_mem_alloc *alloc = - nvgpu_mem_alloc_from_rbtree_node(node); - - kmem_print_mem_alloc(g, alloc, s); - - nvgpu_rbtree_enum_next(&node, node); - } - - return 0; -} - -static int __kmem_traces_show(struct seq_file *s, void *unused) -{ - struct gk20a *g = s->private; - - lock_tracker(g->vmallocs); - seq_puts(s, "Oustanding vmallocs:\n"); - __kmem_traces_dump_tracker(g, g->vmallocs, s); - seq_puts(s, "\n"); - unlock_tracker(g->vmallocs); - - lock_tracker(g->kmallocs); - seq_puts(s, "Oustanding kmallocs:\n"); - __kmem_traces_dump_tracker(g, g->kmallocs, s); - unlock_tracker(g->kmallocs); - - return 0; -} - -static int __kmem_traces_open(struct inode *inode, struct file *file) -{ - return single_open(file, __kmem_traces_show, inode->i_private); -} - -static const struct file_operations __kmem_traces_fops = { - .open = __kmem_traces_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -void nvgpu_kmem_debugfs_init(struct device *dev) -{ - struct gk20a_platform *plat = dev_get_drvdata(dev); - struct gk20a *g = get_gk20a(dev); - struct dentry *gpu_root = plat->debugfs; - struct dentry *node; - - g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root); - if (IS_ERR_OR_NULL(g->debugfs_kmem)) - return; - - node = debugfs_create_file(g->vmallocs->name, S_IRUGO, - g->debugfs_kmem, - g->vmallocs, &__kmem_tracking_fops); - node = debugfs_create_file(g->kmallocs->name, S_IRUGO, - g->debugfs_kmem, - g->kmallocs, &__kmem_tracking_fops); - node = debugfs_create_file("traces", S_IRUGO, - g->debugfs_kmem, - g, &__kmem_traces_fops); -} -#else -void nvgpu_kmem_debugfs_init(struct device *dev) -{ -} -#endif - static int __do_check_for_outstanding_allocs( struct gk20a *g, struct nvgpu_mem_alloc_tracker *tracker, diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h index d3abb3784..a41762af8 100644 --- a/drivers/gpu/nvgpu/common/linux/kmem_priv.h +++ b/drivers/gpu/nvgpu/common/linux/kmem_priv.h @@ -20,6 +20,8 @@ #include #include +struct seq_file; + #define __pstat(s, fmt, msg...) \ do { \ if (s) \ @@ -92,6 +94,12 @@ struct nvgpu_mem_alloc_tracker { unsigned long max_alloc; }; +void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker); +void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker); + +void kmem_print_mem_alloc(struct gk20a *g, + struct nvgpu_mem_alloc *alloc, + struct seq_file *s); #endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ #endif /* __KMEM_PRIV_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index d5fc40de4..4f7fc3fa9 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/platform_gk20a.h" @@ -970,10 +971,7 @@ static int __exit gk20a_remove(struct platform_device *pdev) gk20a_user_deinit(dev, &nvgpu_class); -#ifdef CONFIG_DEBUG_FS - debugfs_remove_recursive(platform->debugfs); - debugfs_remove_recursive(platform->debugfs_alias); -#endif + gk20a_debug_deinit(g); gk20a_remove_sysfs(dev); diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c index 40ee199a3..eae0475ac 100644 --- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c @@ -411,7 +411,9 @@ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, wmb(); a->inited = true; +#ifdef CONFIG_DEBUG_FS nvgpu_init_alloc_debug(g, __a); +#endif alloc_dbg(__a, "New allocator: type bitmap\n"); alloc_dbg(__a, " base 0x%llx\n", a->base); alloc_dbg(__a, " bit_offs 0x%llx\n", a->bit_offs); diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c index 34bc51dfe..0ef94c10f 100644 --- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c @@ -251,7 +251,9 @@ static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *__a) alloc_lock(__a); +#ifdef CONFIG_DEBUG_FS nvgpu_fini_alloc_debug(__a); +#endif /* * Free the fixed allocs first. @@ -1290,7 +1292,9 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, wmb(); a->initialized = 1; +#ifdef CONFIG_DEBUG_FS nvgpu_init_alloc_debug(g, __a); +#endif alloc_dbg(__a, "New allocator: type buddy\n"); alloc_dbg(__a, " base 0x%llx\n", a->base); alloc_dbg(__a, " size 0x%llx\n", a->length); diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c index 234ae4a39..944b4b0ff 100644 --- a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c @@ -99,7 +99,9 @@ static void nvgpu_lockless_alloc_destroy(struct nvgpu_allocator *a) { struct nvgpu_lockless_allocator *pa = a->priv; +#ifdef CONFIG_DEBUG_FS nvgpu_fini_alloc_debug(a); +#endif nvgpu_vfree(a->g, pa->next); nvgpu_kfree(nvgpu_alloc_to_gpu(a), pa); @@ -191,7 +193,9 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, wmb(); a->inited = true; +#ifdef CONFIG_DEBUG_FS nvgpu_init_alloc_debug(g, __a); +#endif alloc_dbg(__a, "New allocator: type lockless\n"); alloc_dbg(__a, " base 0x%llx\n", a->base); alloc_dbg(__a, " nodes %d\n", a->nr_nodes); diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c index 211b353bb..1646d2b1a 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c @@ -20,11 +20,6 @@ #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" -#ifdef CONFIG_DEBUG_FS -#include "gk20a/platform_gk20a.h" -#endif - -u32 nvgpu_alloc_tracing_on; u64 nvgpu_alloc_length(struct nvgpu_allocator *a) { @@ -151,68 +146,3 @@ int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g, return 0; } - -#ifdef CONFIG_DEBUG_FS -void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a, - struct seq_file *s, int lock) -{ - __a->ops->print_stats(__a, s, lock); -} - -static int __alloc_show(struct seq_file *s, void *unused) -{ - struct nvgpu_allocator *a = s->private; - - nvgpu_alloc_print_stats(a, s, 1); - - return 0; -} - -static int __alloc_open(struct inode *inode, struct file *file) -{ - return single_open(file, __alloc_show, inode->i_private); -} - -static const struct file_operations __alloc_fops = { - .open = __alloc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - -void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a) -{ -#ifdef CONFIG_DEBUG_FS - if (!g->debugfs_allocators) - return; - - a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, - g->debugfs_allocators, - a, &__alloc_fops); -#endif -} - -void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a) -{ -#ifdef CONFIG_DEBUG_FS - if (!IS_ERR_OR_NULL(a->debugfs_entry)) - debugfs_remove(a->debugfs_entry); -#endif -} - -#ifdef CONFIG_DEBUG_FS -void nvgpu_alloc_debugfs_init(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct dentry *gpu_root = platform->debugfs; - struct gk20a *g = get_gk20a(dev); - - g->debugfs_allocators = debugfs_create_dir("allocators", gpu_root); - if (IS_ERR_OR_NULL(g->debugfs_allocators)) - return; - - debugfs_create_u32("tracing", 0664, g->debugfs_allocators, - &nvgpu_alloc_tracing_on); -} -#endif diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c index 14b5da3c5..3f4f37069 100644 --- a/drivers/gpu/nvgpu/common/mm/page_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c @@ -916,7 +916,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, if (err) goto fail; +#ifdef CONFIG_DEBUG_FS nvgpu_init_alloc_debug(g, __a); +#endif palloc_dbg(a, "New allocator: type page\n"); palloc_dbg(a, " base 0x%llx\n", a->base); palloc_dbg(a, " size 0x%llx\n", a->length); diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index a01602748..084f17934 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c @@ -18,9 +18,6 @@ #include #include -#ifdef CONFIG_DEBUG_FS -#include -#endif #include #include @@ -40,8 +37,6 @@ #include "cde_gk20a.h" #include "fence_gk20a.h" #include "gr_gk20a.h" -#include "debug_gk20a.h" -#include "platform_gk20a.h" #include #include @@ -1585,8 +1580,7 @@ int gk20a_prepare_compressible_read( if (IS_ERR(dmabuf)) return -EINVAL; - err = gk20a_dmabuf_get_state(dmabuf, dev_from_gk20a(g), - offset, &state); + err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); if (err) { dma_buf_put(dmabuf); return err; @@ -1650,7 +1644,7 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, return -EINVAL; } - err = gk20a_dmabuf_get_state(dmabuf, dev_from_gk20a(g), offset, &state); + err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); if (err) { nvgpu_err(g, "could not get state from dmabuf"); dma_buf_put(dmabuf); @@ -1671,38 +1665,3 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, dma_buf_put(dmabuf); return 0; } - -#ifdef CONFIG_DEBUG_FS -static ssize_t gk20a_cde_reload_write(struct file *file, - const char __user *userbuf, size_t count, loff_t *ppos) -{ - struct gk20a *g = file->private_data; - gk20a_cde_reload(g); - return count; -} - -static const struct file_operations gk20a_cde_reload_fops = { - .open = simple_open, - .write = gk20a_cde_reload_write, -}; - -void gk20a_cde_debugfs_init(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a *g = get_gk20a(dev); - - if (!platform->has_cde) - return; - - debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, - platform->debugfs, &g->cde_app.shader_parameter); - debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, - platform->debugfs, &g->cde_app.ctx_count); - debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, - platform->debugfs, &g->cde_app.ctx_usecount); - debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, - platform->debugfs, &g->cde_app.ctx_count_top); - debugfs_create_file("reload_cde_firmware", S_IWUSR, platform->debugfs, - g, &gk20a_cde_reload_fops); -} -#endif diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h index ffd55b4dd..4f400bf3b 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h @@ -295,7 +295,6 @@ int gk20a_cde_convert(struct gk20a *g, struct nvgpu_fence *fence, u32 __flags, struct gk20a_cde_param *params, int num_params, struct gk20a_fence **fence_out); -void gk20a_cde_debugfs_init(struct device *dev); int gk20a_prepare_compressible_read( struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index 1ed90b145..c905bedb2 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c @@ -13,15 +13,10 @@ * more details. */ -#ifdef CONFIG_DEBUG_FS -#include -#endif - #include #include #include "gk20a.h" -#include "debug_gk20a.h" #include @@ -33,10 +28,6 @@ #include #include -#ifdef CONFIG_DEBUG_FS -#include "platform_gk20a.h" -#endif - static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr) { gk20a_dbg(gpu_dbg_intr, "ce2 non-blocking pipe interrupt\n"); @@ -728,18 +719,3 @@ void gk20a_ce_delete_context_priv(struct gk20a *g, return; } EXPORT_SYMBOL(gk20a_ce_delete_context); - -#ifdef CONFIG_DEBUG_FS -void gk20a_ce_debugfs_init(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a *g = get_gk20a(dev); - - debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO, - platform->debugfs, &g->ce_app.ctx_count); - debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO, - platform->debugfs, &g->ce_app.app_state); - debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO, - platform->debugfs, &g->ce_app.next_ctx_id); -} -#endif diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h index dfd190195..f972e1758 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h @@ -156,10 +156,4 @@ void gk20a_ce_delete_context_priv(struct gk20a *g, void gk20a_ce_delete_context(struct gk20a *g, u32 ce_ctx_id); - -#ifdef CONFIG_DEBUG_FS -/* CE app debugfs api */ -void gk20a_ce_debugfs_init(struct device *dev); -#endif - #endif /*__CE2_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 571570d8d..13abed955 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -30,9 +30,9 @@ #include #include #include +#include #include "gk20a.h" -#include "debug_gk20a.h" #include "ctxsw_trace_gk20a.h" #include "dbg_gpu_gk20a.h" #include "fence_gk20a.h" @@ -1403,6 +1403,7 @@ static u32 get_gp_free_count(struct channel_gk20a *c) return gp_free_count(c); } +#ifdef CONFIG_DEBUG_FS static void trace_write_pushbuffer(struct channel_gk20a *c, struct nvgpu_gpfifo *g) { @@ -1439,6 +1440,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, dma_buf_vunmap(dmabuf, mem); } } +#endif static void trace_write_pushbuffer_range(struct channel_gk20a *c, struct nvgpu_gpfifo *g, @@ -1446,6 +1448,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, int offset, int count) { +#ifdef CONFIG_DEBUG_FS u32 size; int i; struct nvgpu_gpfifo *gp; @@ -1478,6 +1481,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, if (gpfifo_allocated) nvgpu_big_free(c->g, g); +#endif } static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) @@ -1629,8 +1633,8 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch) nvgpu_err(g, "Job on channel %d timed out", ch->hw_chid); - gk20a_debug_dump(g->dev); - gk20a_gr_debug_dump(g->dev); + gk20a_debug_dump(g); + gk20a_gr_debug_dump(g); g->ops.fifo.force_reset_ch(ch, NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true); diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index ac3a3d57a..46560a560 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -29,12 +29,11 @@ #include #include #include -#include #include #include +#include #include "gk20a.h" -#include "debug_gk20a.h" #include "ctxsw_trace_gk20a.h" #include "mm_gk20a.h" @@ -46,10 +45,6 @@ #include #include -#ifdef CONFIG_DEBUG_FS -#include "platform_gk20a.h" -#endif - #define FECS_METHOD_WFI_RESTORE 0x80000 static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, @@ -57,10 +52,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, bool wait_for_finish); static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg); -#ifdef CONFIG_DEBUG_FS -static void __gk20a_fifo_profile_free(struct kref *ref); -#endif - u32 gk20a_fifo_get_engine_ids(struct gk20a *g, u32 engine_id[], u32 engine_id_sz, u32 engine_enum) @@ -562,14 +553,6 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f) f->engine_info = NULL; nvgpu_kfree(g, f->active_engines_list); f->active_engines_list = NULL; -#ifdef CONFIG_DEBUG_FS - nvgpu_mutex_acquire(&f->profile.lock); - if (f->profile.enabled) { - f->profile.enabled = false; - kref_put(&f->profile.ref, __gk20a_fifo_profile_free); - } - nvgpu_mutex_release(&f->profile.lock); -#endif } /* reads info from hardware and fills in pbmda exception info record */ @@ -1543,7 +1526,7 @@ static bool gk20a_fifo_handle_mmu_fault( } else { fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); fake_fault = false; - gk20a_debug_dump(g->dev); + gk20a_debug_dump(g); } @@ -1833,7 +1816,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose) gk20a_channel_abort(ch, false); if (gk20a_fifo_error_ch(g, ch)) - gk20a_debug_dump(g->dev); + gk20a_debug_dump(g); gk20a_channel_put(ch); } @@ -1860,7 +1843,7 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose) struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; if (gk20a_fifo_error_tsg(g, tsg)) - gk20a_debug_dump(g->dev); + gk20a_debug_dump(g); gk20a_fifo_abort_tsg(g, tsgid, false); } @@ -1957,7 +1940,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, unsigned int id_type; if (verbose) - gk20a_debug_dump(g->dev); + gk20a_debug_dump(g); if (g->ops.ltc.flush) g->ops.ltc.flush(g); @@ -3441,345 +3424,6 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, return NULL; } -#ifdef CONFIG_DEBUG_FS - -/* Get the next element in the ring buffer of profile entries - * and grab a reference to the structure - */ -struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g) -{ - struct fifo_gk20a *f = &g->fifo; - struct fifo_profile_gk20a *profile; - unsigned int index; - - /* If kref is zero, profiling is not enabled */ - if (!kref_get_unless_zero(&f->profile.ref)) - return NULL; - index = atomic_inc_return(&f->profile.get); - profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES]; - - return profile; -} - -/* Free the reference to the structure. This allows deferred cleanups */ -void gk20a_fifo_profile_release(struct gk20a *g, - struct fifo_profile_gk20a *profile) -{ - kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); -} - -static void *gk20a_fifo_sched_debugfs_seq_start( - struct seq_file *s, loff_t *pos) -{ - struct gk20a *g = s->private; - struct fifo_gk20a *f = &g->fifo; - - if (*pos >= f->num_channels) - return NULL; - - return &f->channel[*pos]; -} - -static void *gk20a_fifo_sched_debugfs_seq_next( - struct seq_file *s, void *v, loff_t *pos) -{ - struct gk20a *g = s->private; - struct fifo_gk20a *f = &g->fifo; - - ++(*pos); - if (*pos >= f->num_channels) - return NULL; - - return &f->channel[*pos]; -} - -static void gk20a_fifo_sched_debugfs_seq_stop( - struct seq_file *s, void *v) -{ -} - -static int gk20a_fifo_sched_debugfs_seq_show( - struct seq_file *s, void *v) -{ - struct gk20a *g = s->private; - struct fifo_gk20a *f = &g->fifo; - struct channel_gk20a *ch = v; - struct tsg_gk20a *tsg = NULL; - - struct fifo_engine_info_gk20a *engine_info; - struct fifo_runlist_info_gk20a *runlist; - u32 runlist_id; - int ret = SEQ_SKIP; - u32 engine_id; - - engine_id = gk20a_fifo_get_gr_engine_id(g); - engine_info = (f->engine_info + engine_id); - runlist_id = engine_info->runlist_id; - runlist = &f->runlist_info[runlist_id]; - - if (ch == f->channel) { - seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n"); - seq_puts(s, " (usecs) (msecs)\n"); - ret = 0; - } - - if (!test_bit(ch->hw_chid, runlist->active_channels)) - return ret; - - if (gk20a_channel_get(ch)) { - if (gk20a_is_channel_marked_as_tsg(ch)) - tsg = &f->tsg[ch->tsgid]; - - seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n", - ch->hw_chid, - ch->tsgid, - ch->tgid, - tsg ? tsg->timeslice_us : ch->timeslice_us, - ch->timeout_ms_max, - tsg ? tsg->interleave_level : ch->interleave_level, - ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX, - ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX); - gk20a_channel_put(ch); - } - return 0; -} - -static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = { - .start = gk20a_fifo_sched_debugfs_seq_start, - .next = gk20a_fifo_sched_debugfs_seq_next, - .stop = gk20a_fifo_sched_debugfs_seq_stop, - .show = gk20a_fifo_sched_debugfs_seq_show -}; - -static int gk20a_fifo_sched_debugfs_open(struct inode *inode, - struct file *file) -{ - int err; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops); - if (err) - return err; - - gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private); - - ((struct seq_file *)file->private_data)->private = inode->i_private; - return 0; -}; - -/* - * The file operations structure contains our open function along with - * set of the canned seq_ ops. - */ -static const struct file_operations gk20a_fifo_sched_debugfs_fops = { - .owner = THIS_MODULE, - .open = gk20a_fifo_sched_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - -static void __gk20a_fifo_profile_free(struct kref *ref) -{ - struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a, - profile.ref); - nvgpu_vfree(f->g, f->profile.data); - nvgpu_vfree(f->g, f->profile.sorted); -} - -static int gk20a_fifo_profile_enable(void *data, u64 val) -{ - struct gk20a *g = (struct gk20a *) data; - struct fifo_gk20a *f = &g->fifo; - - - nvgpu_mutex_acquire(&f->profile.lock); - if (val == 0) { - if (f->profile.enabled) { - f->profile.enabled = false; - kref_put(&f->profile.ref, __gk20a_fifo_profile_free); - } - } else { - if (!f->profile.enabled) { - /* not kref init as it can have a running condition if - * we enable/disable/enable while kickoff is happening - */ - if (!kref_get_unless_zero(&f->profile.ref)) { - f->profile.data = vzalloc( - FIFO_PROFILING_ENTRIES * - sizeof(struct fifo_profile_gk20a)); - f->profile.sorted = vzalloc( - FIFO_PROFILING_ENTRIES * - sizeof(u64)); - if (!(f->profile.data && f->profile.sorted)) { - nvgpu_vfree(g, f->profile.data); - nvgpu_vfree(g, f->profile.sorted); - nvgpu_mutex_release(&f->profile.lock); - return -ENOMEM; - } - kref_init(&f->profile.ref); - } - atomic_set(&f->profile.get, 0); - f->profile.enabled = true; - } - } - nvgpu_mutex_release(&f->profile.lock); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE( - gk20a_fifo_profile_enable_debugfs_fops, - NULL, - gk20a_fifo_profile_enable, - "%llu\n" -); - -static int __profile_cmp(const void *a, const void *b) -{ - return *((unsigned long long *) a) - *((unsigned long long *) b); -} - -/* - * This uses about 800b in the stack, but the function using it is not part - * of a callstack where much memory is being used, so it is fine - */ -#define PERCENTILE_WIDTH 5 -#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) - -static unsigned int __gk20a_fifo_create_stats(struct gk20a *g, - u64 *percentiles, u32 index_end, u32 index_start) -{ - unsigned int nelem = 0; - unsigned int index; - struct fifo_profile_gk20a *profile; - - for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) { - profile = &g->fifo.profile.data[index]; - - if (profile->timestamp[index_end] > - profile->timestamp[index_start]) { - /* This is a valid element */ - g->fifo.profile.sorted[nelem] = - profile->timestamp[index_end] - - profile->timestamp[index_start]; - nelem++; - } - } - - /* sort it */ - sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long), - __profile_cmp, NULL); - - /* build ranges */ - for (index = 0; index < PERCENTILE_RANGES; index++) - percentiles[index] = - g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) * - nelem)/100 - 1]; - return nelem; -} - -static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused) -{ - struct gk20a *g = s->private; - unsigned int get, nelem, index; - /* - * 800B in the stack, but function is declared statically and only - * called from debugfs handler - */ - u64 percentiles_ioctl[PERCENTILE_RANGES]; - u64 percentiles_kickoff[PERCENTILE_RANGES]; - u64 percentiles_jobtracking[PERCENTILE_RANGES]; - u64 percentiles_append[PERCENTILE_RANGES]; - u64 percentiles_userd[PERCENTILE_RANGES]; - - if (!kref_get_unless_zero(&g->fifo.profile.ref)) { - seq_printf(s, "Profiling disabled\n"); - return 0; - } - - get = atomic_read(&g->fifo.profile.get); - - __gk20a_fifo_create_stats(g, percentiles_ioctl, - PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY); - __gk20a_fifo_create_stats(g, percentiles_kickoff, - PROFILE_END, PROFILE_ENTRY); - __gk20a_fifo_create_stats(g, percentiles_jobtracking, - PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY); - __gk20a_fifo_create_stats(g, percentiles_append, - PROFILE_APPEND, PROFILE_JOB_TRACKING); - nelem = __gk20a_fifo_create_stats(g, percentiles_userd, - PROFILE_END, PROFILE_APPEND); - - seq_printf(s, "Number of kickoffs: %d\n", nelem); - seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n"); - - for (index = 0; index < PERCENTILE_RANGES; index++) - seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n", - PERCENTILE_WIDTH * (index+1), - percentiles_ioctl[index], - percentiles_kickoff[index], - percentiles_append[index], - percentiles_jobtracking[index], - percentiles_userd[index]); - - kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); - - return 0; -} - -static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, gk20a_fifo_profile_stats, inode->i_private); -} - -static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = { - .open = gk20a_fifo_profile_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - - -void gk20a_fifo_debugfs_init(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a *g = get_gk20a(dev); - - struct dentry *gpu_root = platform->debugfs; - struct dentry *fifo_root; - struct dentry *profile_root; - - - fifo_root = debugfs_create_dir("fifo", gpu_root); - if (IS_ERR_OR_NULL(fifo_root)) - return; - - gk20a_dbg(gpu_dbg_info, "g=%p", g); - - debugfs_create_file("sched", 0600, fifo_root, g, - &gk20a_fifo_sched_debugfs_fops); - - profile_root = debugfs_create_dir("profile", fifo_root); - if (IS_ERR_OR_NULL(profile_root)) - return; - - nvgpu_mutex_init(&g->fifo.profile.lock); - g->fifo.profile.enabled = false; - atomic_set(&g->fifo.profile.get, 0); - atomic_set(&g->fifo.profile.ref.refcount, 0); - - debugfs_create_file("enable", 0600, profile_root, g, - &gk20a_fifo_profile_enable_debugfs_fops); - - debugfs_create_file("stats", 0600, profile_root, g, - &gk20a_fifo_profile_stats_debugfs_fops); - -} -#endif /* CONFIG_DEBUG_FS */ - static const char * const ccsr_chan_status_str[] = { "idle", "pending", @@ -3901,6 +3545,54 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g, gk20a_debug_output(o, "\n"); } +void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g, + struct gk20a_debug_output *o) +{ + struct fifo_gk20a *f = &g->fifo; + u32 chid; + struct ch_state **ch_state; + + ch_state = nvgpu_kzalloc(g, sizeof(*ch_state) * f->num_channels); + if (!ch_state) { + gk20a_debug_output(o, "cannot alloc memory for channels\n"); + return; + } + + for (chid = 0; chid < f->num_channels; chid++) { + struct channel_gk20a *ch = &f->channel[chid]; + if (gk20a_channel_get(ch)) { + ch_state[chid] = + nvgpu_kmalloc(g, sizeof(struct ch_state) + + ram_in_alloc_size_v()); + /* ref taken stays to below loop with + * successful allocs */ + if (!ch_state[chid]) + gk20a_channel_put(ch); + } + } + + for (chid = 0; chid < f->num_channels; chid++) { + struct channel_gk20a *ch = &f->channel[chid]; + if (!ch_state[chid]) + continue; + + ch_state[chid]->pid = ch->pid; + ch_state[chid]->refs = atomic_read(&ch->ref_count); + nvgpu_mem_rd_n(g, &ch->inst_block, 0, + &ch_state[chid]->inst_block[0], + ram_in_alloc_size_v()); + gk20a_channel_put(ch); + } + for (chid = 0; chid < f->num_channels; chid++) { + if (ch_state[chid]) { + g->ops.fifo.dump_channel_status_ramfc(g, o, chid, + ch_state[chid]); + nvgpu_kfree(g, ch_state[chid]); + } + } + nvgpu_kfree(g, ch_state); +} + void gk20a_dump_pbdma_status(struct gk20a *g, struct gk20a_debug_output *o) { diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 6c8868a24..228e5130c 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -23,10 +23,11 @@ #include "channel_gk20a.h" #include "tsg_gk20a.h" -#include "debug_gk20a.h" #include +struct gk20a_debug_output; + #define MAX_RUNLIST_BUFFERS 2 #define FIFO_INVAL_ENGINE_ID ((u32)~0) @@ -287,8 +288,6 @@ int gk20a_fifo_set_runlist_interleave(struct gk20a *g, int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice); -void gk20a_fifo_debugfs_init(struct device *dev); - const char *gk20a_fifo_interleave_level_name(u32 interleave_level); int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type, @@ -341,6 +340,8 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g, struct gk20a_debug_output *o, u32 hw_chid, struct ch_state *ch_state); +void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g, + struct gk20a_debug_output *o); void gk20a_dump_pbdma_status(struct gk20a *g, struct gk20a_debug_output *o); void gk20a_dump_eng_status(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 689fafb11..899c1d6a3 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -30,6 +30,7 @@ struct acr_desc; struct nvgpu_mem_alloc_tracker; struct dbg_profiler_object_data; struct ecc_gk20a; +struct gk20a_debug_output; #include #include @@ -61,7 +62,6 @@ struct ecc_gk20a; #include "therm_gk20a.h" #include "gm20b/acr_gm20b.h" #include "cde_gk20a.h" -#include "debug_gk20a.h" #include "sched_gk20a.h" #ifdef CONFIG_ARCH_TEGRA_18x_SOC #include "clk/clk.h" @@ -1544,10 +1544,6 @@ void nvgpu_wait_for_deferred_interrupts(struct gk20a *g); struct gk20a * __must_check gk20a_get(struct gk20a *g); void gk20a_put(struct gk20a *g); -#ifdef CONFIG_DEBUG_FS -int gk20a_railgating_debugfs_init(struct device *dev); -#endif - static inline bool gk20a_platform_has_syncpoints(struct gk20a *g) { #ifdef CONFIG_TEGRA_GK20A_NVHOST diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 2188618c0..982cfac86 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "gk20a.h" #include "kind_gk20a.h" @@ -37,13 +38,8 @@ #include "gr_pri_gk20a.h" #include "regops_gk20a.h" #include "dbg_gpu_gk20a.h" -#include "debug_gk20a.h" #include "ctxsw_trace_gk20a.h" -#ifdef CONFIG_DEBUG_FS -#include "platform_gk20a.h" -#endif - #include #include #include @@ -514,7 +510,7 @@ int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, nvgpu_err(g, "timeout waiting on ucode response"); gk20a_fecs_dump_falcon_stats(g); - gk20a_gr_debug_dump(g->dev); + gk20a_gr_debug_dump(g); return -1; } else if (check == WAIT_UCODE_ERROR) { nvgpu_err(g, @@ -9032,20 +9028,6 @@ static int gr_gk20a_dump_gr_status_regs(struct gk20a *g, return 0; } -#ifdef CONFIG_DEBUG_FS -int gr_gk20a_debugfs_init(struct gk20a *g) -{ - struct gk20a_platform *platform = dev_get_drvdata(g->dev); - - g->debugfs_gr_default_attrib_cb_size = - debugfs_create_u32("gr_default_attrib_cb_size", - S_IRUGO|S_IWUSR, platform->debugfs, - &g->gr.attrib_cb_default_size); - - return 0; -} -#endif - static void gr_gk20a_init_cyclestats(struct gk20a *g) { #if defined(CONFIG_GK20A_CYCLE_STATS) diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 79aeb42f3..deb8ea9c0 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -653,7 +653,6 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, void gr_gk20a_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); int gr_gk20a_halt_pipe(struct gk20a *g); -int gr_gk20a_debugfs_init(struct gk20a *g); #if defined(CONFIG_GK20A_CYCLE_STATS) int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */ diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index 8a3beb39e..b19398a6a 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c @@ -37,6 +37,7 @@ #include "pramin_gk20a.h" #include "priv_ring_gk20a.h" +#include #include #include diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 53d22a7d3..08e2e9cc1 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -2563,13 +2563,13 @@ priv_exist_or_err: return 0; } -int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct device *dev, +int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, u64 offset, struct gk20a_buffer_state **state) { int err = 0; struct gk20a_dmabuf_priv *priv; struct gk20a_buffer_state *s; - struct gk20a *g = get_gk20a(dev); + struct device *dev = g->dev; if (WARN_ON(offset >= (u64)dmabuf->size)) return -EINVAL; @@ -3123,18 +3123,6 @@ static bool gk20a_mm_is_bar1_supported(struct gk20a *g) return true; } -#ifdef CONFIG_DEBUG_FS -void gk20a_mm_debugfs_init(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct dentry *gpu_root = platform->debugfs; - struct gk20a *g = gk20a_get_platform(dev)->g; - - debugfs_create_bool("force_pramin", 0664, gpu_root, - &g->mm.force_pramin); -} -#endif - void gk20a_init_mm(struct gpu_ops *gops) { gops->mm.gmmu_map = gk20a_locked_gmmu_map; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 79b553712..5d90cbf6c 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -146,7 +146,6 @@ struct channel_gk20a; int gk20a_init_mm_support(struct gk20a *g); int gk20a_init_mm_setup_sw(struct gk20a *g); int gk20a_init_mm_setup_hw(struct gk20a *g); -void gk20a_mm_debugfs_init(struct device *dev); void gk20a_init_mm_ce_context(struct gk20a *g); int gk20a_mm_fb_flush(struct gk20a *g); @@ -437,7 +436,7 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr); int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); -int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct device *dev, +int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, u64 offset, struct gk20a_buffer_state **state); int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry); diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index a9e039437..552d5d735 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -35,12 +35,6 @@ #include "nvgpu_gpuid_t19x.h" #endif -#ifdef CONFIG_DEBUG_FS -#include -#include -#include "platform_gk20a.h" -#endif - #define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin" #define PMU_MEM_SCRUBBING_TIMEOUT_MAX 1000 @@ -49,7 +43,7 @@ #define gk20a_dbg_pmu(fmt, arg...) \ gk20a_dbg(gpu_dbg_pmu, fmt, ##arg) -static int gk20a_pmu_get_pg_stats(struct gk20a *g, +int gk20a_pmu_get_pg_stats(struct gk20a *g, u32 pg_engine_id, struct pmu_pg_stats_data *pg_stat_data); static void ap_callback_init_and_enable_ctrl( @@ -281,7 +275,7 @@ static void set_pmu_cmdline_args_falctracesize_v1( pmu->args_v1.falc_trace_size = size; } -static bool find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos) +bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos) { u32 i = 0, j = strlen(strings); for (; i < j; i++) { @@ -326,7 +320,7 @@ static void printtrace(struct pmu_gk20a *pmu) count = scnprintf(buf, 0x40, "Index %x: ", trace1[(i / 4)]); l = 0; m = 0; - while (find_hex_in_string((trace+i+20+m), g, &k)) { + while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) { if (k >= 40) break; strncpy(part_str, (trace+i+20+m), k); @@ -4141,7 +4135,7 @@ void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) nvgpu_err(g, "ZBC save timeout"); } -static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu) +int nvgpu_pmu_perfmon_start_sampling(struct pmu_gk20a *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_v *pv = &g->ops.pmu_ver; @@ -4185,7 +4179,7 @@ static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu) return 0; } -static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu) +int nvgpu_pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); struct pmu_cmd cmd; @@ -4231,7 +4225,7 @@ static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu, /* restart sampling */ if (pmu->perfmon_sampling_enabled) - return pmu_perfmon_start_sampling(pmu); + return nvgpu_pmu_perfmon_start_sampling(pmu); return 0; } @@ -5173,9 +5167,9 @@ int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable) gk20a_dbg_fn(""); if (enable) - err = pmu_perfmon_start_sampling(pmu); + err = nvgpu_pmu_perfmon_start_sampling(pmu); else - err = pmu_perfmon_stop_sampling(pmu); + err = nvgpu_pmu_perfmon_stop_sampling(pmu); return err; } @@ -5293,7 +5287,7 @@ void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, pg_stat_data->avg_exit_latency_us = stats.pg_avg_exit_time_us; } -static int gk20a_pmu_get_pg_stats(struct gk20a *g, +int gk20a_pmu_get_pg_stats(struct gk20a *g, u32 pg_engine_id, struct pmu_pg_stats_data *pg_stat_data) { @@ -5463,466 +5457,3 @@ int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id) status = gk20a_pmu_ap_send_command(g, &ap_cmd, true); return status; } - -#ifdef CONFIG_DEBUG_FS -static int lpwr_debug_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - - if (g->ops.pmu.pmu_pg_engines_feature_list && - g->ops.pmu.pmu_pg_engines_feature_list(g, - PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != - PMU_PG_FEATURE_GR_POWER_GATING_ENABLED) { - seq_printf(s, "PSTATE: %u\n" - "RPPG Enabled: %u\n" - "RPPG ref count: %u\n" - "RPPG state: %u\n" - "MSCG Enabled: %u\n" - "MSCG pstate state: %u\n" - "MSCG transition state: %u\n", - g->ops.clk_arb.get_current_pstate(g), - g->elpg_enabled, g->pmu.elpg_refcnt, - g->pmu.elpg_stat, g->mscg_enabled, - g->pmu.mscg_stat, g->pmu.mscg_transition_state); - - } else - seq_printf(s, "ELPG Enabled: %u\n" - "ELPG ref count: %u\n" - "ELPG state: %u\n", - g->elpg_enabled, g->pmu.elpg_refcnt, - g->pmu.elpg_stat); - - return 0; - -} - -static int lpwr_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, lpwr_debug_show, inode->i_private); -} - -static const struct file_operations lpwr_debug_fops = { - .open = lpwr_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int mscg_stat_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - u64 total_ingating, total_ungating, residency, divisor, dividend; - struct pmu_pg_stats_data pg_stat_data = { 0 }; - int err; - - /* Don't unnecessarily power on the device */ - if (g->power_on) { - err = gk20a_busy(g); - if (err) - return err; - - gk20a_pmu_get_pg_stats(g, - PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); - gk20a_idle(g); - } - total_ingating = g->pg_ingating_time_us + - (u64)pg_stat_data.ingating_time; - total_ungating = g->pg_ungating_time_us + - (u64)pg_stat_data.ungating_time; - - divisor = total_ingating + total_ungating; - - /* We compute the residency on a scale of 1000 */ - dividend = total_ingating * 1000; - - if (divisor) - residency = div64_u64(dividend, divisor); - else - residency = 0; - - seq_printf(s, - "Time in MSCG: %llu us\n" - "Time out of MSCG: %llu us\n" - "MSCG residency ratio: %llu\n" - "MSCG Entry Count: %u\n" - "MSCG Avg Entry latency %u\n" - "MSCG Avg Exit latency %u\n", - total_ingating, total_ungating, - residency, pg_stat_data.gating_cnt, - pg_stat_data.avg_entry_latency_us, - pg_stat_data.avg_exit_latency_us); - return 0; - -} - -static int mscg_stat_open(struct inode *inode, struct file *file) -{ - return single_open(file, mscg_stat_show, inode->i_private); -} - -static const struct file_operations mscg_stat_fops = { - .open = mscg_stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int mscg_transitions_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - struct pmu_pg_stats_data pg_stat_data = { 0 }; - u32 total_gating_cnt; - int err; - - if (g->power_on) { - err = gk20a_busy(g); - if (err) - return err; - - gk20a_pmu_get_pg_stats(g, - PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); - gk20a_idle(g); - } - total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; - - seq_printf(s, "%u\n", total_gating_cnt); - return 0; - -} - -static int mscg_transitions_open(struct inode *inode, struct file *file) -{ - return single_open(file, mscg_transitions_show, inode->i_private); -} - -static const struct file_operations mscg_transitions_fops = { - .open = mscg_transitions_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int elpg_stat_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - struct pmu_pg_stats_data pg_stat_data = { 0 }; - u64 total_ingating, total_ungating, residency, divisor, dividend; - int err; - - /* Don't unnecessarily power on the device */ - if (g->power_on) { - err = gk20a_busy(g); - if (err) - return err; - - gk20a_pmu_get_pg_stats(g, - PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); - gk20a_idle(g); - } - total_ingating = g->pg_ingating_time_us + - (u64)pg_stat_data.ingating_time; - total_ungating = g->pg_ungating_time_us + - (u64)pg_stat_data.ungating_time; - divisor = total_ingating + total_ungating; - - /* We compute the residency on a scale of 1000 */ - dividend = total_ingating * 1000; - - if (divisor) - residency = div64_u64(dividend, divisor); - else - residency = 0; - - seq_printf(s, - "Time in ELPG: %llu us\n" - "Time out of ELPG: %llu us\n" - "ELPG residency ratio: %llu\n" - "ELPG Entry Count: %u\n" - "ELPG Avg Entry latency %u us\n" - "ELPG Avg Exit latency %u us\n", - total_ingating, total_ungating, - residency, pg_stat_data.gating_cnt, - pg_stat_data.avg_entry_latency_us, - pg_stat_data.avg_exit_latency_us); - return 0; - -} - -static int elpg_stat_open(struct inode *inode, struct file *file) -{ - return single_open(file, elpg_stat_show, inode->i_private); -} - -static const struct file_operations elpg_stat_fops = { - .open = elpg_stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int elpg_transitions_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - struct pmu_pg_stats_data pg_stat_data = { 0 }; - u32 total_gating_cnt; - int err; - - if (g->power_on) { - err = gk20a_busy(g); - if (err) - return err; - - gk20a_pmu_get_pg_stats(g, - PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); - gk20a_idle(g); - } - total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; - - seq_printf(s, "%u\n", total_gating_cnt); - return 0; - -} - -static int elpg_transitions_open(struct inode *inode, struct file *file) -{ - return single_open(file, elpg_transitions_show, inode->i_private); -} - -static const struct file_operations elpg_transitions_fops = { - .open = elpg_transitions_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int falc_trace_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - struct pmu_gk20a *pmu = &g->pmu; - u32 i = 0, j = 0, k, l, m; - char part_str[40]; - void *tracebuffer; - char *trace; - u32 *trace1; - - /* allocate system memory to copy pmu trace buffer */ - tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE); - if (tracebuffer == NULL) - return -ENOMEM; - - /* read pmu traces into system memory buffer */ - nvgpu_mem_rd_n(g, &pmu->trace_buf, - 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE); - - trace = (char *)tracebuffer; - trace1 = (u32 *)tracebuffer; - - for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { - for (j = 0; j < 0x40; j++) - if (trace1[(i / 4) + j]) - break; - if (j == 0x40) - break; - seq_printf(s, "Index %x: ", trace1[(i / 4)]); - l = 0; - m = 0; - while (find_hex_in_string((trace+i+20+m), g, &k)) { - if (k >= 40) - break; - strncpy(part_str, (trace+i+20+m), k); - part_str[k] = 0; - seq_printf(s, "%s0x%x", part_str, - trace1[(i / 4) + 1 + l]); - l++; - m += k + 2; - } - seq_printf(s, "%s", (trace+i+20+m)); - } - - nvgpu_kfree(g, tracebuffer); - return 0; -} - -static int falc_trace_open(struct inode *inode, struct file *file) -{ - return single_open(file, falc_trace_show, inode->i_private); -} - -static const struct file_operations falc_trace_fops = { - .open = falc_trace_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int perfmon_events_enable_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - - seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0); - return 0; - -} - -static int perfmon_events_enable_open(struct inode *inode, struct file *file) -{ - return single_open(file, perfmon_events_enable_show, inode->i_private); -} - -static ssize_t perfmon_events_enable_write(struct file *file, - const char __user *userbuf, size_t count, loff_t *ppos) -{ - struct seq_file *s = file->private_data; - struct gk20a *g = s->private; - unsigned long val = 0; - char buf[40]; - int buf_size; - int err; - - memset(buf, 0, sizeof(buf)); - buf_size = min(count, (sizeof(buf)-1)); - - if (copy_from_user(buf, userbuf, buf_size)) - return -EFAULT; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - /* Don't turn on gk20a unnecessarily */ - if (g->power_on) { - err = gk20a_busy(g); - if (err) - return err; - - if (val && !g->pmu.perfmon_sampling_enabled) { - g->pmu.perfmon_sampling_enabled = true; - pmu_perfmon_start_sampling(&(g->pmu)); - } else if (!val && g->pmu.perfmon_sampling_enabled) { - g->pmu.perfmon_sampling_enabled = false; - pmu_perfmon_stop_sampling(&(g->pmu)); - } - gk20a_idle(g); - } else { - g->pmu.perfmon_sampling_enabled = val ? true : false; - } - - return count; -} - -static const struct file_operations perfmon_events_enable_fops = { - .open = perfmon_events_enable_open, - .read = seq_read, - .write = perfmon_events_enable_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int perfmon_events_count_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - - seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt); - return 0; - -} - -static int perfmon_events_count_open(struct inode *inode, struct file *file) -{ - return single_open(file, perfmon_events_count_show, inode->i_private); -} - -static const struct file_operations perfmon_events_count_fops = { - .open = perfmon_events_count_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int security_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - - seq_printf(s, "%d\n", g->pmu.pmu_mode); - return 0; - -} - -static int security_open(struct inode *inode, struct file *file) -{ - return single_open(file, security_show, inode->i_private); -} - -static const struct file_operations security_fops = { - .open = security_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -int gk20a_pmu_debugfs_init(struct device *dev) -{ - struct dentry *d; - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a *g = get_gk20a(dev); - - d = debugfs_create_file( - "lpwr_debug", S_IRUGO|S_IWUSR, platform->debugfs, g, - &lpwr_debug_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "mscg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g, - &mscg_stat_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "mscg_transitions", S_IRUGO, platform->debugfs, g, - &mscg_transitions_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g, - &elpg_stat_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "elpg_transitions", S_IRUGO, platform->debugfs, g, - &elpg_transitions_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "falc_trace", S_IRUGO, platform->debugfs, g, - &falc_trace_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "perfmon_events_enable", S_IRUGO, platform->debugfs, g, - &perfmon_events_enable_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "perfmon_events_count", S_IRUGO, platform->debugfs, g, - &perfmon_events_count_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "pmu_security", S_IRUGO, platform->debugfs, g, - &security_fops); - if (!d) - goto err_out; - return 0; -err_out: - pr_err("%s: Failed to make debugfs node\n", __func__); - debugfs_remove_recursive(platform->debugfs); - return -ENOMEM; -} - -#endif diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index e7a8b7c27..cefb6577e 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h @@ -426,7 +426,6 @@ int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token); int gk20a_pmu_destroy(struct gk20a *g); int gk20a_pmu_load_norm(struct gk20a *g, u32 *load); int gk20a_pmu_load_update(struct gk20a *g); -int gk20a_pmu_debugfs_init(struct device *dev); void gk20a_pmu_reset_load_counters(struct gk20a *g); void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, u32 *total_cycles); @@ -468,5 +467,11 @@ int gk20a_pmu_vidmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem, u32 size); int gk20a_pmu_sysmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem, u32 size); +int gk20a_pmu_get_pg_stats(struct gk20a *g, + u32 pg_engine_id, struct pmu_pg_stats_data *pg_stat_data); +bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos); + +int nvgpu_pmu_perfmon_start_sampling(struct pmu_gk20a *pmu); +int nvgpu_pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu); #endif /*__PMU_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c index b7edf3f05..3f3119afd 100644 --- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c @@ -13,10 +13,6 @@ #include #include -#ifdef CONFIG_DEBUG_FS -#include -#include "platform_gk20a.h" -#endif #include #include #include @@ -523,69 +519,6 @@ int gk20a_sched_dev_release(struct inode *inode, struct file *filp) return 0; } -#ifdef CONFIG_DEBUG_FS -static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused) -{ - struct device *dev = s->private; - struct gk20a *g = gk20a_get_platform(dev)->g; - struct gk20a_sched_ctrl *sched = &g->sched_ctrl; - bool sched_busy = true; - - int n = sched->bitmap_size / sizeof(u64); - int i; - int err; - - err = gk20a_busy(g); - if (err) - return err; - - if (nvgpu_mutex_tryacquire(&sched->busy_lock)) { - sched_busy = false; - nvgpu_mutex_release(&sched->busy_lock); - } - - seq_printf(s, "control_locked=%d\n", sched->control_locked); - seq_printf(s, "busy=%d\n", sched_busy); - seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size); - - nvgpu_mutex_acquire(&sched->status_lock); - - seq_puts(s, "active_tsg_bitmap\n"); - for (i = 0; i < n; i++) - seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]); - - seq_puts(s, "recent_tsg_bitmap\n"); - for (i = 0; i < n; i++) - seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]); - - nvgpu_mutex_release(&sched->status_lock); - - gk20a_idle(g); - - return 0; -} - -static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file) -{ - return single_open(file, gk20a_sched_debugfs_show, inode->i_private); -} - -static const struct file_operations gk20a_sched_debugfs_fops = { - .open = gk20a_sched_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -void gk20a_sched_debugfs_init(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - - debugfs_create_file("sched_ctrl", S_IRUGO, platform->debugfs, - dev, &gk20a_sched_debugfs_fops); -} -#endif /* CONFIG_DEBUG_FS */ - void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg) { struct gk20a_sched_ctrl *sched = &g->sched_ctrl; diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h index 4f6d15105..776f689dd 100644 --- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h @@ -48,7 +48,6 @@ void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *); void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *); int gk20a_sched_ctrl_init(struct gk20a *); -void gk20a_sched_debugfs_init(struct device *dev); void gk20a_sched_ctrl_cleanup(struct gk20a *g); #endif /* __SCHED_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 82c587f95..c6e451e1f 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index f5328f035..831fd5dad 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -33,11 +33,11 @@ #include "clk_gm20b.h" #include "mc_gm20b.h" #include "regops_gm20b.h" -#include "debug_gm20b.h" #include "cde_gm20b.h" #include "therm_gm20b.h" #include "hal_gm20b.h" +#include #include #include @@ -234,7 +234,7 @@ int gm20b_init_hal(struct gk20a *g) gm20b_init_pmu_ops(gops); gm20b_init_clk_ops(gops); gm20b_init_regops(gops); - gm20b_init_debug_ops(gops); + gk20a_init_debug_ops(gops); gk20a_init_dbg_session_ops(gops); gm20b_init_cde_ops(gops); gm20b_init_therm_ops(gops); diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index f28ff45f0..d923e5e92 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -53,6 +53,7 @@ #include "hal_gp106.h" +#include #include #include diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 98a8be2ff..9a30ad7c2 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index e2a931bed..a1906a088 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -44,6 +44,7 @@ #include "gp10b.h" #include "hal_gp10b.h" +#include #include #include diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h index 3579b0fb5..567c44220 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h +++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h @@ -256,11 +256,13 @@ static inline struct gk20a *nvgpu_alloc_to_gpu(struct nvgpu_allocator *a) return a->g; } +#ifdef CONFIG_DEBUG_FS /* * Common functionality for the internals of the allocators. */ void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a); void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a); +#endif int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g, const char *name, void *priv, bool dbg, @@ -281,11 +283,6 @@ static inline void nvgpu_alloc_disable_dbg(struct nvgpu_allocator *a) */ extern u32 nvgpu_alloc_tracing_on; -#ifdef CONFIG_DEBUG_FS -struct device; -void nvgpu_alloc_debugfs_init(struct device *dev); -#endif - #define nvgpu_alloc_trace_func() \ do { \ if (nvgpu_alloc_tracing_on) \ diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/debug.h similarity index 54% rename from drivers/gpu/nvgpu/gk20a/debug_gk20a.h rename to drivers/gpu/nvgpu/include/nvgpu/debug.h index 213922b33..70a039786 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/debug.h @@ -14,28 +14,42 @@ * */ -#ifndef _DEBUG_GK20A_H_ -#define _DEBUG_GK20A_H_ +#ifndef __NVGPU_DEBUG_H__ +#define __NVGPU_DEBUG_H__ -struct platform_device; struct gk20a; struct gpu_ops; -extern unsigned int gk20a_debug_trace_cmdbuf; - struct gk20a_debug_output { void (*fn)(void *ctx, const char *str, size_t len); void *ctx; char buf[256]; }; +#ifdef CONFIG_DEBUG_FS +extern unsigned int gk20a_debug_trace_cmdbuf; + void gk20a_debug_output(struct gk20a_debug_output *o, const char *fmt, ...); -void gk20a_debug_dump(struct device *pdev); +void gk20a_debug_dump(struct gk20a *g); void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o); -int gk20a_gr_debug_dump(struct device *pdev); -void gk20a_debug_init(struct device *dev, const char *debugfs_symlink); +int gk20a_gr_debug_dump(struct gk20a *g); void gk20a_init_debug_ops(struct gpu_ops *gops); -void gk20a_debug_dump_device(void *dev); + +void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink); +void gk20a_debug_deinit(struct gk20a *g); +#else +static inline void gk20a_debug_output(struct gk20a_debug_output *o, + const char *fmt, ...) {} + +static inline void gk20a_debug_dump(struct gk20a *g) {} +static inline void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) {} +static inline int gk20a_gr_debug_dump(struct gk20a *g) { return 0;} +static inline void gk20a_init_debug_ops(struct gpu_ops *gops) {} + +static inline void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink) {} +static inline void gk20a_debug_deinit(struct gk20a *g) {} #endif + +#endif /* __NVGPU_DEBUG_H__ */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h index dc198a04c..611854f2b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h @@ -31,12 +31,6 @@ void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size, unsigned long ip); void __nvgpu_track_vfree(struct gk20a *g, void *addr); void __nvgpu_track_kfree(struct gk20a *g, void *addr); - -void nvgpu_kmem_debugfs_init(struct device *dev); -#else -static inline void nvgpu_kmem_debugfs_init(struct device *dev) -{ -} #endif /** diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 02cc5b474..cdd0d378c 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -22,10 +22,10 @@ #include #include #include +#include #include "vgpu/vgpu.h" #include "vgpu/fecs_trace_vgpu.h" -#include "gk20a/debug_gk20a.h" #include "gk20a/hal_gk20a.h" #include "gk20a/ctxsw_trace_gk20a.h" #include "gk20a/tsg_gk20a.h" @@ -667,7 +667,7 @@ int vgpu_probe(struct platform_device *pdev) if (err) return err; - gk20a_debug_init(dev, "gpu.0"); + gk20a_debug_init(gk20a, "gpu.0"); /* Set DMA parameters to allow larger sgt lists */ dev->dma_parms = &gk20a->dma_parms;