mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
Since all debugfs code is Linux specific, remove it from common code and move it to Linux module Debugfs code is now divided into below module specific files : common/linux/debug.c common/linux/debug_cde.c common/linux/debug_ce.c common/linux/debug_fifo.c common/linux/debug_gr.c common/linux/debug_mm.c common/linux/debug_allocator.c common/linux/debug_kmem.c common/linux/debug_pmu.c common/linux/debug_sched.c Add corresponding header files for above modules too And compile all of above files only if CONFIG_DEBUG_FS is set Some more details of the changes made - Move and rename gk20a/debug_gk20a.c to common/linux/debug.c - Move and rename gk20a/debug_gk20a.h to include/nvgpu/debug.h - Remove gm20b/debug_gm20b.c and gm20b/debug_gm20b.h and call gk20a_init_debug_ops() directly from gm20b_init_hal() - Update all debug APIs to receive struct gk20a as parameter instead of receiving struct device pointer - Update API gk20a_dmabuf_get_state() to receive struct gk20a pointer instead of struct device - Include <nvgpu/debug.h> explicitly in all files where debug operations are used - Remove "gk20a/platform_gk20a.h" include from HAL files which no longer need this include - Add new API gk20a_debug_deinit() to deinitialize debugfs and call it from gk20a_remove() - Move API gk20a_debug_dump_all_channel_status_ramfc() to gk20a/fifo_gk20a.c Jira NVGPU-62 Change-Id: I076975d3d7f669bdbe9212fa33d98529377feeb6 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1488902 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
316 lines
8.5 KiB
C
316 lines
8.5 KiB
C
/*
|
|
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
|
*
|
|
* This software is licensed under the terms of the GNU General Public
|
|
* License version 2, as published by the Free Software Foundation, and
|
|
* may be copied, distributed, and modified under those terms.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
*/
|
|
|
|
#include "debug_kmem.h"
|
|
#include "kmem_priv.h"
|
|
#include "gk20a/platform_gk20a.h"
|
|
|
|
#include <linux/debugfs.h>
|
|
#include <linux/seq_file.h>
|
|
|
|
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
|
/**
|
|
* to_human_readable_bytes - Determine suffix for passed size.
|
|
*
|
|
* @bytes - Number of bytes to generate a suffix for.
|
|
* @hr_bytes [out] - The human readable number of bytes.
|
|
* @hr_suffix [out] - The suffix for the HR number of bytes.
|
|
*
|
|
* Computes a human readable decomposition of the passed number of bytes. The
|
|
* suffix for the bytes is passed back through the @hr_suffix pointer. The right
|
|
* number of bytes is then passed back in @hr_bytes. This returns the following
|
|
* ranges:
|
|
*
|
|
* 0 - 1023 B
|
|
* 1 - 1023 KB
|
|
* 1 - 1023 MB
|
|
* 1 - 1023 GB
|
|
* 1 - 1023 TB
|
|
* 1 - ... PB
|
|
*/
|
|
static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
|
|
const char **hr_suffix)
|
|
{
|
|
static const char *suffixes[] =
|
|
{ "B", "KB", "MB", "GB", "TB", "PB" };
|
|
|
|
u64 suffix_ind = 0;
|
|
|
|
while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
|
|
bytes >>= 10;
|
|
suffix_ind++;
|
|
}
|
|
|
|
/*
|
|
* Handle case where bytes > 1023PB.
|
|
*/
|
|
suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
|
|
suffix_ind : ARRAY_SIZE(suffixes) - 1;
|
|
|
|
*hr_bytes = bytes;
|
|
*hr_suffix = suffixes[suffix_ind];
|
|
}
|
|
|
|
/**
|
|
* print_hr_bytes - Print human readable bytes
|
|
*
|
|
* @s - A seq_file to print to. May be NULL.
|
|
* @msg - A message to print before the bytes.
|
|
* @bytes - Number of bytes.
|
|
*
|
|
* Print @msg followed by the human readable decomposition of the passed number
|
|
* of bytes.
|
|
*
|
|
* If @s is NULL then this prints will be made to the kernel log.
|
|
*/
|
|
static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
|
|
{
|
|
u64 hr_bytes;
|
|
const char *hr_suffix;
|
|
|
|
__to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
|
|
__pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
|
|
}
|
|
|
|
/**
|
|
* print_histogram - Build a histogram of the memory usage.
|
|
*
|
|
* @tracker The tracking to pull data from.
|
|
* @s A seq_file to dump info into.
|
|
*/
|
|
static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
|
|
struct seq_file *s)
|
|
{
|
|
int i;
|
|
u64 pot_min, pot_max;
|
|
u64 nr_buckets;
|
|
unsigned int *buckets;
|
|
unsigned int total_allocs;
|
|
struct nvgpu_rbtree_node *node;
|
|
static const char histogram_line[] =
|
|
"++++++++++++++++++++++++++++++++++++++++";
|
|
|
|
/*
|
|
* pot_min is essentially a round down to the nearest power of 2. This
|
|
* is the start of the histogram. pot_max is just a round up to the
|
|
* nearest power of two. Each histogram bucket is one power of two so
|
|
* the histogram buckets are exponential.
|
|
*/
|
|
pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
|
|
pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
|
|
|
|
nr_buckets = __ffs(pot_max) - __ffs(pot_min);
|
|
|
|
buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
|
|
if (!buckets) {
|
|
__pstat(s, "OOM: could not allocate bucket storage!?\n");
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Iterate across all of the allocs and determine what bucket they
|
|
* should go in. Round the size down to the nearest power of two to
|
|
* find the right bucket.
|
|
*/
|
|
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
|
|
while (node) {
|
|
int b;
|
|
u64 bucket_min;
|
|
struct nvgpu_mem_alloc *alloc =
|
|
nvgpu_mem_alloc_from_rbtree_node(node);
|
|
|
|
bucket_min = (u64)rounddown_pow_of_two(alloc->size);
|
|
if (bucket_min < tracker->min_alloc)
|
|
bucket_min = tracker->min_alloc;
|
|
|
|
b = __ffs(bucket_min) - __ffs(pot_min);
|
|
|
|
/*
|
|
* Handle the one case were there's an alloc exactly as big as
|
|
* the maximum bucket size of the largest bucket. Most of the
|
|
* buckets have an inclusive minimum and exclusive maximum. But
|
|
* the largest bucket needs to have an _inclusive_ maximum as
|
|
* well.
|
|
*/
|
|
if (b == (int)nr_buckets)
|
|
b--;
|
|
|
|
buckets[b]++;
|
|
|
|
nvgpu_rbtree_enum_next(&node, node);
|
|
}
|
|
|
|
total_allocs = 0;
|
|
for (i = 0; i < (int)nr_buckets; i++)
|
|
total_allocs += buckets[i];
|
|
|
|
__pstat(s, "Alloc histogram:\n");
|
|
|
|
/*
|
|
* Actually compute the histogram lines.
|
|
*/
|
|
for (i = 0; i < (int)nr_buckets; i++) {
|
|
char this_line[sizeof(histogram_line) + 1];
|
|
u64 line_length;
|
|
u64 hr_bytes;
|
|
const char *hr_suffix;
|
|
|
|
memset(this_line, 0, sizeof(this_line));
|
|
|
|
/*
|
|
* Compute the normalized line length. Cant use floating point
|
|
* so we will just multiply everything by 1000 and use fixed
|
|
* point.
|
|
*/
|
|
line_length = (1000 * buckets[i]) / total_allocs;
|
|
line_length *= sizeof(histogram_line);
|
|
line_length /= 1000;
|
|
|
|
memset(this_line, '+', line_length);
|
|
|
|
__to_human_readable_bytes(1 << (__ffs(pot_min) + i),
|
|
&hr_bytes, &hr_suffix);
|
|
__pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
|
|
hr_bytes, hr_bytes << 1,
|
|
hr_suffix, buckets[i], this_line);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* nvgpu_kmem_print_stats - Print kmem tracking stats.
|
|
*
|
|
* @tracker The tracking to pull data from.
|
|
* @s A seq_file to dump info into.
|
|
*
|
|
* Print stats from a tracker. If @s is non-null then seq_printf() will be
|
|
* used with @s. Otherwise the stats are pr_info()ed.
|
|
*/
|
|
void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
|
|
struct seq_file *s)
|
|
{
|
|
nvgpu_lock_tracker(tracker);
|
|
|
|
__pstat(s, "Mem tracker: %s\n\n", tracker->name);
|
|
|
|
__pstat(s, "Basic Stats:\n");
|
|
__pstat(s, " Number of allocs %lld\n",
|
|
tracker->nr_allocs);
|
|
__pstat(s, " Number of frees %lld\n",
|
|
tracker->nr_frees);
|
|
print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
|
|
print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
|
|
print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
|
|
print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
|
|
print_hr_bytes(s, " Bytes allocated (real) ",
|
|
tracker->bytes_alloced_real);
|
|
print_hr_bytes(s, " Bytes freed (real) ",
|
|
tracker->bytes_freed_real);
|
|
__pstat(s, "\n");
|
|
|
|
print_histogram(tracker, s);
|
|
|
|
nvgpu_unlock_tracker(tracker);
|
|
}
|
|
|
|
static int __kmem_tracking_show(struct seq_file *s, void *unused)
|
|
{
|
|
struct nvgpu_mem_alloc_tracker *tracker = s->private;
|
|
|
|
nvgpu_kmem_print_stats(tracker, s);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __kmem_tracking_open(struct inode *inode, struct file *file)
|
|
{
|
|
return single_open(file, __kmem_tracking_show, inode->i_private);
|
|
}
|
|
|
|
static const struct file_operations __kmem_tracking_fops = {
|
|
.open = __kmem_tracking_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = single_release,
|
|
};
|
|
|
|
static int __kmem_traces_dump_tracker(struct gk20a *g,
|
|
struct nvgpu_mem_alloc_tracker *tracker,
|
|
struct seq_file *s)
|
|
{
|
|
struct nvgpu_rbtree_node *node;
|
|
|
|
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
|
|
while (node) {
|
|
struct nvgpu_mem_alloc *alloc =
|
|
nvgpu_mem_alloc_from_rbtree_node(node);
|
|
|
|
kmem_print_mem_alloc(g, alloc, s);
|
|
|
|
nvgpu_rbtree_enum_next(&node, node);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __kmem_traces_show(struct seq_file *s, void *unused)
|
|
{
|
|
struct gk20a *g = s->private;
|
|
|
|
nvgpu_lock_tracker(g->vmallocs);
|
|
seq_puts(s, "Oustanding vmallocs:\n");
|
|
__kmem_traces_dump_tracker(g, g->vmallocs, s);
|
|
seq_puts(s, "\n");
|
|
nvgpu_unlock_tracker(g->vmallocs);
|
|
|
|
nvgpu_lock_tracker(g->kmallocs);
|
|
seq_puts(s, "Oustanding kmallocs:\n");
|
|
__kmem_traces_dump_tracker(g, g->kmallocs, s);
|
|
nvgpu_unlock_tracker(g->kmallocs);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __kmem_traces_open(struct inode *inode, struct file *file)
|
|
{
|
|
return single_open(file, __kmem_traces_show, inode->i_private);
|
|
}
|
|
|
|
static const struct file_operations __kmem_traces_fops = {
|
|
.open = __kmem_traces_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = single_release,
|
|
};
|
|
|
|
void nvgpu_kmem_debugfs_init(struct gk20a *g)
|
|
{
|
|
struct gk20a_platform *platform = dev_get_drvdata(g->dev);
|
|
struct dentry *node;
|
|
|
|
g->debugfs_kmem = debugfs_create_dir("kmem_tracking", platform->debugfs);
|
|
if (IS_ERR_OR_NULL(g->debugfs_kmem))
|
|
return;
|
|
|
|
node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
|
|
g->debugfs_kmem,
|
|
g->vmallocs, &__kmem_tracking_fops);
|
|
node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
|
|
g->debugfs_kmem,
|
|
g->kmallocs, &__kmem_tracking_fops);
|
|
node = debugfs_create_file("traces", S_IRUGO,
|
|
g->debugfs_kmem,
|
|
g, &__kmem_traces_fops);
|
|
}
|
|
#endif
|