diff --git a/drivers/gpu/nvgpu/Kconfig.nvgpu b/drivers/gpu/nvgpu/Kconfig.nvgpu index 3e3607e0d..8baf6897c 100644 --- a/drivers/gpu/nvgpu/Kconfig.nvgpu +++ b/drivers/gpu/nvgpu/Kconfig.nvgpu @@ -47,6 +47,17 @@ config GK20A_DEVFREQ endchoice +config NVGPU_TRACK_MEM_USAGE + bool "Track the usage of system memory in nvgpu" + depends on GK20A + default n + help + Say Y here to allow nvgpu to track and keep statistics on + the system memory used by the driver. This does recreate + some of the kmem_leak tracking but this is also applicable + to other OSes which do not have Linux' kmem_leak. + + config GK20A_CYCLE_STATS bool "Support GK20A GPU CYCLE STATS" depends on GK20A diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c index 24e0ca5df..60e793480 100644 --- a/drivers/gpu/nvgpu/common/linux/kmem.c +++ b/drivers/gpu/nvgpu/common/linux/kmem.c @@ -15,11 +15,22 @@ */ #include +#include #include #include +#include +#include +#include +#include +#include +#include #include +#include "gk20a/gk20a.h" + +#include "kmem_priv.h" + /* * Statically declared because this needs to be shared across all nvgpu driver * instances. This makes sure that all kmem caches are _definitely_ uniquely @@ -27,26 +38,793 @@ */ static atomic_t kmem_cache_id; -/* - * Linux specific version of the nvgpu_kmem_cache struct. This type is - * completely opaque to the rest of the driver. - */ -struct nvgpu_kmem_cache { - struct gk20a *g; - struct kmem_cache *cache; +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + +static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) +{ + mutex_lock(&tracker->lock); +} + +static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) +{ + mutex_unlock(&tracker->lock); +} + +static void kmem_print_mem_alloc(struct gk20a *g, + struct nvgpu_mem_alloc *alloc, + struct seq_file *s) +{ +#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES + int i; + + __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n", + alloc->addr, alloc->size); + for (i = 0; i < alloc->stack_length; i++) + __pstat(s, " %3d [<%p>] %pS\n", i, + (void *)alloc->stack[i], + (void *)alloc->stack[i]); + __pstat(s, "\n"); +#else + __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n", + alloc->addr, alloc->size, alloc->ip); +#endif +} + +static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker, + struct nvgpu_mem_alloc *alloc) +{ + struct rb_node **new = &tracker->allocs.rb_node; + struct rb_node *parent = NULL; + + while (*new) { + struct nvgpu_mem_alloc *tmp = rb_entry(*new, + struct nvgpu_mem_alloc, + allocs_entry); + + parent = *new; + + if (alloc->addr < tmp->addr) + new = &(*new)->rb_left; + else if (alloc->addr > tmp->addr) + new = &(*new)->rb_right; + else + return -EINVAL; + } + + /* Put the new node there */ + rb_link_node(&alloc->allocs_entry, parent, new); + rb_insert_color(&alloc->allocs_entry, &tracker->allocs); + + return 0; +} + +static struct nvgpu_mem_alloc *nvgpu_rem_alloc( + struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr) +{ + struct rb_node *node = tracker->allocs.rb_node; + struct nvgpu_mem_alloc *alloc; + + while (node) { + alloc = container_of(node, + struct nvgpu_mem_alloc, allocs_entry); + + if (alloc_addr < alloc->addr) + node = node->rb_left; + else if (alloc_addr > alloc->addr) + node = node->rb_right; + else + break; + } + + if (!node) + return NULL; + + rb_erase(node, &tracker->allocs); + + return alloc; +} + +static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, + unsigned long size, unsigned long real_size, + u64 addr, unsigned long ip) +{ + int ret; + struct nvgpu_mem_alloc *alloc; +#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES + struct stack_trace stack_trace; +#endif + + alloc = kzalloc(sizeof(*alloc), GFP_KERNEL); + if (!alloc) + return -ENOMEM; + + alloc->owner = tracker; + alloc->size = size; + alloc->real_size = real_size; + alloc->addr = addr; + alloc->ip = (void *)(uintptr_t)ip; + +#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES + stack_trace.max_entries = MAX_STACK_TRACE; + stack_trace.nr_entries = 0; + stack_trace.entries = alloc->stack; + /* + * This 4 here skips the 2 function calls that happen for all traced + * allocs due to nvgpu: + * + * __nvgpu_save_kmem_alloc+0x7c/0x128 + * __nvgpu_track_kzalloc+0xcc/0xf8 + * + * And the function calls that get made by the stack trace code itself. + * If the trace savings code changes this will likely have to change + * as well. + */ + stack_trace.skip = 4; + save_stack_trace(&stack_trace); + alloc->stack_length = stack_trace.nr_entries; +#endif + + lock_tracker(tracker); + tracker->bytes_alloced += size; + tracker->bytes_alloced_real += real_size; + tracker->nr_allocs++; + + /* Keep track of this for building a histogram later on. */ + if (tracker->max_alloc < size) + tracker->max_alloc = size; + if (tracker->min_alloc > size) + tracker->min_alloc = size; + + ret = nvgpu_add_alloc(tracker, alloc); + if (ret) { + WARN(1, "Duplicate alloc??? 0x%llx\n", addr); + kfree(alloc); + unlock_tracker(tracker); + return ret; + } + unlock_tracker(tracker); + + return 0; +} + +static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, + u64 addr) +{ + struct nvgpu_mem_alloc *alloc; + + lock_tracker(tracker); + alloc = nvgpu_rem_alloc(tracker, addr); + if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { + unlock_tracker(tracker); + return -EINVAL; + } + + tracker->nr_frees++; + tracker->bytes_freed += alloc->size; + tracker->bytes_freed_real += alloc->real_size; + unlock_tracker(tracker); + + return 0; +} + +static void __nvgpu_check_valloc_size(unsigned long size) +{ + WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size); +} + +static void __nvgpu_check_kalloc_size(size_t size) +{ + WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size); +} + +void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size, + unsigned long ip) +{ + void *alloc = vmalloc(size); + + if (!alloc) + return NULL; + + kmem_dbg("vmalloc: size=%-6ld addr=0x%p", size, alloc); + __nvgpu_check_valloc_size(size); /* - * Memory to hold the kmem_cache unique name. Only necessary on our - * k3.10 kernel when not using the SLUB allocator but it's easier to - * just carry this on to newer kernels. + * Ignore the return message. If this fails let's not cause any issues + * for the rest of the driver. */ - char name[128]; + __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), + (u64)(uintptr_t)alloc, ip); + + return alloc; +} + +void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size, + unsigned long ip) +{ + void *alloc = vzalloc(size); + + if (!alloc) + return NULL; + + kmem_dbg("vzalloc: size=%-6ld addr=0x%p", size, alloc); + __nvgpu_check_valloc_size(size); + + /* + * Ignore the return message. If this fails let's not cause any issues + * for the rest of the driver. + */ + __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), + (u64)(uintptr_t)alloc, ip); + + return alloc; +} + +void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip) +{ + void *alloc = kmalloc(size, GFP_KERNEL); + + if (!alloc) + return NULL; + + kmem_dbg("kmalloc: size=%-6ld addr=0x%p gfp=0x%08x", + size, alloc, GFP_KERNEL); + __nvgpu_check_kalloc_size(size); + + __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), + (u64)(uintptr_t)alloc, ip); + + return alloc; +} + +void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip) +{ + void *alloc = kzalloc(size, GFP_KERNEL); + + if (!alloc) + return NULL; + + kmem_dbg("kzalloc: size=%-6ld addr=0x%p gfp=0x%08x", + size, alloc, GFP_KERNEL); + __nvgpu_check_kalloc_size(size); + + __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), + (u64)(uintptr_t)alloc, ip); + + return alloc; +} + +void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size, + unsigned long ip) +{ + void *alloc = kcalloc(n, size, GFP_KERNEL); + + if (!alloc) + return NULL; + + kmem_dbg("kcalloc: size=%-6ld addr=0x%p gfp=0x%08x", + n * size, alloc, GFP_KERNEL); + __nvgpu_check_kalloc_size(n * size); + + __nvgpu_save_kmem_alloc(g->kmallocs, n * size, + roundup_pow_of_two(n * size), + (u64)(uintptr_t)alloc, ip); + + return alloc; +} + +void __nvgpu_track_vfree(struct gk20a *g, void *addr) +{ + /* + * Often it is accepted practice to pass NULL pointers into free + * functions to save code. + */ + if (!addr) + return; + + vfree(addr); + + kmem_dbg("vfree: addr=0x%p", addr); + + __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr); +} + +void __nvgpu_track_kfree(struct gk20a *g, void *addr) +{ + if (!addr) + return; + + kfree(addr); + + kmem_dbg("kfree: addr=0x%p", addr); + + __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); +} + +/** + * to_human_readable_bytes - Determine suffix for passed size. + * + * @bytes - Number of bytes to generate a suffix for. + * @hr_bytes [out] - The human readable number of bytes. + * @hr_suffix [out] - The suffix for the HR number of bytes. + * + * Computes a human readable decomposition of the passed number of bytes. The + * suffix for the bytes is passed back through the @hr_suffix pointer. The right + * number of bytes is then passed back in @hr_bytes. This returns the following + * ranges: + * + * 0 - 1023 B + * 1 - 1023 KB + * 1 - 1023 MB + * 1 - 1023 GB + * 1 - 1023 TB + * 1 - ... PB + */ +static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes, + const char **hr_suffix) +{ + static const char *suffixes[] = + { "B", "KB", "MB", "GB", "TB", "PB" }; + + u64 suffix_ind = 0; + + while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) { + bytes >>= 10; + suffix_ind++; + } + + /* + * Handle case where bytes > 1023PB. + */ + suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ? + suffix_ind : ARRAY_SIZE(suffixes) - 1; + + *hr_bytes = bytes; + *hr_suffix = suffixes[suffix_ind]; +} + +/** + * print_hr_bytes - Print human readable bytes + * + * @s - A seq_file to print to. May be NULL. + * @msg - A message to print before the bytes. + * @bytes - Number of bytes. + * + * Print @msg followed by the human readable decomposition of the passed number + * of bytes. + * + * If @s is NULL then this prints will be made to the kernel log. + */ +static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes) +{ + u64 hr_bytes; + const char *hr_suffix; + + __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix); + __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix); +} + +/** + * print_histogram - Build a histogram of the memory usage. + * + * @tracker The tracking to pull data from. + * @s A seq_file to dump info into. + */ +static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker, + struct seq_file *s) +{ + int i; + u64 pot_min, pot_max; + u64 nr_buckets; + unsigned int *buckets; + unsigned int total_allocs; + struct rb_node *node; + static const char histogram_line[] = + "++++++++++++++++++++++++++++++++++++++++"; + + /* + * pot_min is essentially a round down to the nearest power of 2. This + * is the start of the histogram. pot_max is just a round up to the + * nearest power of two. Each histogram bucket is one power of two so + * the histogram buckets are exponential. + */ + pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc); + pot_max = (u64)roundup_pow_of_two(tracker->max_alloc); + + nr_buckets = __ffs(pot_max) - __ffs(pot_min); + + buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL); + if (!buckets) { + __pstat(s, "OOM: could not allocate bucket storage!?\n"); + return; + } + + /* + * Iterate across all of the allocs and determine what bucket they + * should go in. Round the size down to the nearest power of two to + * find the right bucket. + */ + for (node = rb_first(&tracker->allocs); + node != NULL; + node = rb_next(node)) { + int b; + u64 bucket_min; + struct nvgpu_mem_alloc *alloc; + + alloc = container_of(node, struct nvgpu_mem_alloc, + allocs_entry); + bucket_min = (u64)rounddown_pow_of_two(alloc->size); + if (bucket_min < tracker->min_alloc) + bucket_min = tracker->min_alloc; + + b = __ffs(bucket_min) - __ffs(pot_min); + + /* + * Handle the one case were there's an alloc exactly as big as + * the maximum bucket size of the largest bucket. Most of the + * buckets have an inclusive minimum and exclusive maximum. But + * the largest bucket needs to have an _inclusive_ maximum as + * well. + */ + if (b == (int)nr_buckets) + b--; + + buckets[b]++; + } + + total_allocs = 0; + for (i = 0; i < (int)nr_buckets; i++) + total_allocs += buckets[i]; + + __pstat(s, "Alloc histogram:\n"); + + /* + * Actually compute the histogram lines. + */ + for (i = 0; i < (int)nr_buckets; i++) { + char this_line[sizeof(histogram_line) + 1]; + u64 line_length; + u64 hr_bytes; + const char *hr_suffix; + + memset(this_line, 0, sizeof(this_line)); + + /* + * Compute the normalized line length. Cant use floating point + * so we will just multiply everything by 1000 and use fixed + * point. + */ + line_length = (1000 * buckets[i]) / total_allocs; + line_length *= sizeof(histogram_line); + line_length /= 1000; + + memset(this_line, '+', line_length); + + __to_human_readable_bytes(1 << (__ffs(pot_min) + i), + &hr_bytes, &hr_suffix); + __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n", + hr_bytes, hr_bytes << 1, + hr_suffix, buckets[i], this_line); + } +} + +/** + * nvgpu_kmem_print_stats - Print kmem tracking stats. + * + * @tracker The tracking to pull data from. + * @s A seq_file to dump info into. + * + * Print stats from a tracker. If @s is non-null then seq_printf() will be + * used with @s. Otherwise the stats are pr_info()ed. + */ +void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker, + struct seq_file *s) +{ + lock_tracker(tracker); + + __pstat(s, "Mem tracker: %s\n\n", tracker->name); + + __pstat(s, "Basic Stats:\n"); + __pstat(s, " Number of allocs %lld\n", + tracker->nr_allocs); + __pstat(s, " Number of frees %lld\n", + tracker->nr_frees); + print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc); + print_hr_bytes(s, " Largest alloc ", tracker->max_alloc); + print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced); + print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed); + print_hr_bytes(s, " Bytes allocated (real) ", + tracker->bytes_alloced_real); + print_hr_bytes(s, " Bytes freed (real) ", + tracker->bytes_freed_real); + __pstat(s, "\n"); + + print_histogram(tracker, s); + + unlock_tracker(tracker); +} + +#if defined(CONFIG_DEBUG_FS) +static int __kmem_tracking_show(struct seq_file *s, void *unused) +{ + struct nvgpu_mem_alloc_tracker *tracker = s->private; + + nvgpu_kmem_print_stats(tracker, s); + + return 0; +} + +static int __kmem_tracking_open(struct inode *inode, struct file *file) +{ + return single_open(file, __kmem_tracking_show, inode->i_private); +} + +static const struct file_operations __kmem_tracking_fops = { + .open = __kmem_tracking_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; +static int __kmem_traces_dump_tracker(struct gk20a *g, + struct nvgpu_mem_alloc_tracker *tracker, + struct seq_file *s) +{ + struct rb_node *node; + + for (node = rb_first(&tracker->allocs); + node != NULL; + node = rb_next(node)) { + struct nvgpu_mem_alloc *alloc; + + alloc = container_of(node, struct nvgpu_mem_alloc, + allocs_entry); + + kmem_print_mem_alloc(g, alloc, s); + } + + return 0; +} + +static int __kmem_traces_show(struct seq_file *s, void *unused) +{ + struct gk20a *g = s->private; + + lock_tracker(g->vmallocs); + seq_puts(s, "Oustanding vmallocs:\n"); + __kmem_traces_dump_tracker(g, g->vmallocs, s); + seq_puts(s, "\n"); + unlock_tracker(g->vmallocs); + + lock_tracker(g->kmallocs); + seq_puts(s, "Oustanding kmallocs:\n"); + __kmem_traces_dump_tracker(g, g->kmallocs, s); + unlock_tracker(g->kmallocs); + + return 0; +} + +static int __kmem_traces_open(struct inode *inode, struct file *file) +{ + return single_open(file, __kmem_traces_show, inode->i_private); +} + +static const struct file_operations __kmem_traces_fops = { + .open = __kmem_traces_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void nvgpu_kmem_debugfs_init(struct device *dev) +{ + struct gk20a_platform *plat = dev_get_drvdata(dev); + struct gk20a *g = get_gk20a(dev); + struct dentry *gpu_root = plat->debugfs; + struct dentry *node; + + g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root); + if (IS_ERR_OR_NULL(g->debugfs_kmem)) + return; + + node = debugfs_create_file(g->vmallocs->name, S_IRUGO, + g->debugfs_kmem, + g->vmallocs, &__kmem_tracking_fops); + node = debugfs_create_file(g->kmallocs->name, S_IRUGO, + g->debugfs_kmem, + g->kmallocs, &__kmem_tracking_fops); + node = debugfs_create_file("traces", S_IRUGO, + g->debugfs_kmem, + g, &__kmem_traces_fops); +} +#else +void nvgpu_kmem_debugfs_init(struct device *dev) +{ +} +#endif + +static int __do_check_for_outstanding_allocs( + struct gk20a *g, + struct nvgpu_mem_alloc_tracker *tracker, + const char *type, bool silent) +{ + struct rb_node *node; + int count = 0; + + for (node = rb_first(&tracker->allocs); + node != NULL; + node = rb_next(node)) { + struct nvgpu_mem_alloc *alloc; + + alloc = container_of(node, struct nvgpu_mem_alloc, + allocs_entry); + + if (!silent) + kmem_print_mem_alloc(g, alloc, NULL); + + count++; + } + + return count; +} + +/** + * check_for_outstanding_allocs - Count and display outstanding allocs + * + * @g - The GPU. + * @silent - If set don't print anything about the allocs. + * + * Dump (or just count) the number of allocations left outstanding. + */ +static int check_for_outstanding_allocs(struct gk20a *g, bool silent) +{ + int count = 0; + + count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc", + silent); + count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc", + silent); + + return count; +} + +static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker, + void (*force_free_func)(const void *)) +{ + struct rb_node *node; + + while ((node = rb_first(&tracker->allocs)) != NULL) { + struct nvgpu_mem_alloc *alloc; + + alloc = container_of(node, struct nvgpu_mem_alloc, + allocs_entry); + if (force_free_func) + force_free_func((void *)alloc->addr); + + kfree(alloc); + } +} + +/** + * nvgpu_kmem_cleanup - Cleanup the kmem tracking + * + * @g - The GPU. + * @force_free - If set will also free leaked objects if possible. + * + * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free + * is non-zero then the allocation made by nvgpu is also freed. This is risky, + * though, as it is possible that the memory is still in use by other parts of + * the GPU driver not aware that this has happened. + * + * In theory it should be fine if the GPU driver has been deinitialized and + * there are no bugs in that code. However, if there are any bugs in that code + * then they could likely manifest as odd crashes indeterminate amounts of time + * in the future. So use @force_free at your own risk. + */ +static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free) +{ + do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL); + do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL); +} + +void nvgpu_kmem_fini(struct gk20a *g, int flags) +{ + int count; + bool silent, force_free; + + if (!flags) + return; + + silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS); + force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP); + + count = check_for_outstanding_allocs(g, silent); + nvgpu_kmem_cleanup(g, force_free); + + /* + * If we leak objects we can either BUG() out or just WARN(). In general + * it doesn't make sense to BUG() on here since leaking a few objects + * won't crash the kernel but it can be helpful for development. + * + * If neither flag is set then we just silently do nothing. + */ + if (count > 0) { + if (flags & NVGPU_KMEM_FINI_WARN) { + WARN(1, "Letting %d allocs leak!!\n", count); + } else if (flags & NVGPU_KMEM_FINI_BUG) { + gk20a_err(g->dev, "Letting %d allocs leak!!\n", count); + BUG(); + } + } +} + +int nvgpu_kmem_init(struct gk20a *g) +{ + int err; + + g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL); + g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL); + + if (!g->vmallocs || !g->kmallocs) { + err = -ENOMEM; + goto fail; + } + + g->vmallocs->name = "vmalloc"; + g->kmallocs->name = "kmalloc"; + + g->vmallocs->allocs = RB_ROOT; + g->kmallocs->allocs = RB_ROOT; + + mutex_init(&g->vmallocs->lock); + mutex_init(&g->kmallocs->lock); + + g->vmallocs->min_alloc = PAGE_SIZE; + g->kmallocs->min_alloc = KMALLOC_MIN_SIZE; + + /* + * This needs to go after all the other initialization since they use + * the nvgpu_kzalloc() API. + */ + g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g, + sizeof(struct nvgpu_mem_alloc)); + g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g, + sizeof(struct nvgpu_mem_alloc)); + + if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) { + err = -ENOMEM; + if (g->vmallocs->allocs_cache) + nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache); + if (g->kmallocs->allocs_cache) + nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache); + goto fail; + } + + return 0; + +fail: + if (g->vmallocs) + kfree(g->vmallocs); + if (g->kmallocs) + kfree(g->kmallocs); + return err; +} + +#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */ + +int nvgpu_kmem_init(struct gk20a *g) +{ + return 0; +} + +void nvgpu_kmem_fini(struct gk20a *g, int flags) +{ +} +#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ + struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) { struct nvgpu_kmem_cache *cache = - kzalloc(sizeof(struct nvgpu_kmem_cache), GFP_KERNEL); + nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache)); if (!cache) return NULL; @@ -59,7 +837,7 @@ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) cache->cache = kmem_cache_create(cache->name, size, size, 0, NULL); if (!cache->cache) { - kfree(cache); + nvgpu_kfree(g, cache); return NULL; } @@ -68,8 +846,10 @@ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache) { + struct gk20a *g = cache->g; + kmem_cache_destroy(cache->cache); - kfree(cache); + nvgpu_kfree(g, cache); } void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache) diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h new file mode 100644 index 000000000..5e38ad5d1 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/kmem_priv.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __KMEM_PRIV_H__ +#define __KMEM_PRIV_H__ + +#include + +#define __pstat(s, fmt, msg...) \ + do { \ + if (s) \ + seq_printf(s, fmt, ##msg); \ + else \ + pr_info(fmt, ##msg); \ + } while (0) + +#define MAX_STACK_TRACE 20 + +/* + * Linux specific version of the nvgpu_kmem_cache struct. This type is + * completely opaque to the rest of the driver. + */ +struct nvgpu_kmem_cache { + struct gk20a *g; + struct kmem_cache *cache; + + /* + * Memory to hold the kmem_cache unique name. Only necessary on our + * k3.10 kernel when not using the SLUB allocator but it's easier to + * just carry this on to newer kernels. + */ + char name[128]; +}; + +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + +struct nvgpu_mem_alloc { + struct nvgpu_mem_alloc_tracker *owner; + + void *ip; +#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES + unsigned long stack[MAX_STACK_TRACE]; + int stack_length; +#endif + + u64 addr; + + unsigned long size; + unsigned long real_size; + + /* Ugh - linux specific. Will need to be abstracted. */ + struct rb_node allocs_entry; +}; + +/* + * Linux specific tracking of vmalloc, kmalloc, etc. + */ +struct nvgpu_mem_alloc_tracker { + const char *name; + struct nvgpu_kmem_cache *allocs_cache; + struct rb_root allocs; + struct mutex lock; + + u64 bytes_alloced; + u64 bytes_freed; + u64 bytes_alloced_real; + u64 bytes_freed_real; + u64 nr_allocs; + u64 nr_frees; + + unsigned long min_alloc; + unsigned long max_alloc; +}; + +#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ + +#endif /* __KMEM_PRIV_H__ */ diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index f228110eb..68e432599 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -986,7 +986,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); - nvgpu_big_free(ch->gpfifo.pipe); + nvgpu_big_free(g, ch->gpfifo.pipe); memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); #if defined(CONFIG_GK20A_CYCLE_STATS) @@ -1856,7 +1856,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, } if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) { - c->gpfifo.pipe = nvgpu_big_malloc( + c->gpfifo.pipe = nvgpu_big_malloc(g, gpfifo_size * sizeof(struct nvgpu_gpfifo)); if (!c->gpfifo.pipe) { err = -ENOMEM; @@ -1927,7 +1927,7 @@ clean_up_sync: c->sync = NULL; } clean_up_unmap: - nvgpu_big_free(c->gpfifo.pipe); + nvgpu_big_free(g, c->gpfifo.pipe); gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); clean_up: memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); @@ -2057,12 +2057,12 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, if (!g) { size = count * sizeof(struct nvgpu_gpfifo); if (size) { - g = nvgpu_big_malloc(size); + g = nvgpu_big_malloc(c->g, size); if (!g) return; if (copy_from_user(g, user_gpfifo, size)) { - nvgpu_big_free(g); + nvgpu_big_free(c->g, g); return; } } @@ -2074,7 +2074,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, trace_write_pushbuffer(c, gp); if (gpfifo_allocated) - nvgpu_big_free(g); + nvgpu_big_free(c->g, g); } static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 4a42e03fb..0a0aada7a 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -819,7 +819,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, goto fail_dmabuf_put; } - buffer = nvgpu_big_zalloc(access_limit_size); + buffer = nvgpu_big_zalloc(g, access_limit_size); if (!buffer) { err = -ENOMEM; goto fail_dmabuf_put; @@ -865,7 +865,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, fail_idle: gk20a_idle(g->dev); fail_free_buffer: - nvgpu_big_free(buffer); + nvgpu_big_free(g, buffer); fail_dmabuf_put: dma_buf_put(dmabuf); diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c index 67f9b5320..6341a962b 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c @@ -21,6 +21,7 @@ #include #include +#include #include "gk20a.h" #include "debug_gk20a.h" @@ -485,6 +486,9 @@ void gk20a_debug_init(struct device *dev, const char *debugfs_symlink) gk20a_mm_debugfs_init(g->dev); gk20a_fifo_debugfs_init(g->dev); gk20a_sched_debugfs_init(g->dev); +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + nvgpu_kmem_debugfs_init(g->dev); +#endif #endif } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 3504a32f5..6b026ee23 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -43,6 +43,7 @@ #include #include +#include #include #include @@ -1598,6 +1599,8 @@ static int gk20a_probe(struct platform_device *dev) set_gk20a(dev, gk20a); gk20a->dev = &dev->dev; + nvgpu_kmem_init(gk20a); + gk20a->irq_stall = platform_get_irq(dev, 0); gk20a->irq_nonstall = platform_get_irq(dev, 1); if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 8006a4fe3..69528c1f6 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -27,6 +27,7 @@ struct gk20a_ctxsw_ucode_segments; struct gk20a_fecs_trace; struct gk20a_ctxsw_trace; struct acr_desc; +struct nvgpu_mem_alloc_tracker; #include #include @@ -915,6 +916,7 @@ struct gk20a { struct dentry *debugfs_runlist_interleave; struct dentry *debugfs_allocators; struct dentry *debugfs_xve; + struct dentry *debugfs_kmem; #endif struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; @@ -1055,6 +1057,10 @@ struct gk20a { /* Check if msi is enabled */ bool msi_enabled; #endif +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + struct nvgpu_mem_alloc_tracker *vmallocs; + struct nvgpu_mem_alloc_tracker *kmallocs; +#endif }; static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) @@ -1131,6 +1137,7 @@ enum gk20a_dbg_categories { gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */ gpu_dbg_xv = BIT(18), /* XVE debugging */ gpu_dbg_shutdown = BIT(19), /* GPU shutdown tracing */ + gpu_dbg_kmem = BIT(20), /* Kmem tracking debugging */ gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ }; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 36b85f3b3..e695f02ed 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -3424,7 +3424,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) gr->ctx_vars.local_golden_image = NULL; if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) - nvgpu_big_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); + nvgpu_big_free(g, gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; gk20a_comptag_allocator_destroy(&gr->comp_tags); @@ -8055,7 +8055,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2; map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); - map = nvgpu_big_zalloc(map_size); + map = nvgpu_big_zalloc(g, map_size); if (!map) return -ENOMEM; @@ -8145,7 +8145,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) return 0; cleanup: gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map"); - nvgpu_big_free(map); + nvgpu_big_free(g, map); return -EINVAL; } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 7a64f79b5..2ff546536 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -1487,8 +1487,8 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm, nvgpu_mutex_acquire(&vm->update_gmmu_lock); - buffer_list = nvgpu_big_zalloc(sizeof(*buffer_list) * - vm->num_user_mapped_buffers); + buffer_list = nvgpu_big_zalloc(vm->mm->g, sizeof(*buffer_list) * + vm->num_user_mapped_buffers); if (!buffer_list) { nvgpu_mutex_release(&vm->update_gmmu_lock); return -ENOMEM; @@ -1572,7 +1572,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm, gk20a_vm_mapping_batch_finish_locked(vm, &batch); nvgpu_mutex_release(&vm->update_gmmu_lock); - nvgpu_big_free(mapped_buffers); + nvgpu_big_free(vm->mm->g, mapped_buffers); } static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset, diff --git a/drivers/gpu/nvgpu/include/nvgpu/kmem.h b/drivers/gpu/nvgpu/include/nvgpu/kmem.h index c08e40a60..591925252 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/kmem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/kmem.h @@ -14,18 +14,21 @@ * along with this program. If not, see . */ -#ifndef NVGPU_KMEM_H -#define NVGPU_KMEM_H +#ifndef __NVGPU_KMEM_H__ +#define __NVGPU_KMEM_H__ -#include -#include -#include - -#include +/* + * Incase this isn't defined already. + */ +#ifndef _THIS_IP_ +#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) +#endif struct gk20a; -/* +/** + * DOC: Kmem cache support + * * In Linux there is support for the notion of a kmem_cache. It gives better * memory usage characteristics for lots of allocations of the same size. Think * structs that get allocated over and over. Normal kmalloc() type routines @@ -37,26 +40,200 @@ struct gk20a; */ struct nvgpu_kmem_cache; +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE +/* + * Uncomment this if you want to enable stack traces in the memory profiling. + * Since this is a fairly high overhead operation and is only necessary for + * debugging actual bugs it's left here for developers to enable. + */ +/* #define __NVGPU_SAVE_KALLOC_STACK_TRACES */ + +/* + * Defined per-OS. + */ +struct nvgpu_mem_alloc_tracker; +#endif + + +/** + * nvgpu_kmem_cache_create - create an nvgpu kernel memory cache. + * + * @g The GPU driver struct using this cache. + * @size Size of the object allocated by the cache. + * + * This cache can be used to allocate objects of size @size. Common usage would + * be for a struct that gets allocated a lot. In that case @size should be + * sizeof(struct my_struct). + * + * A given implementation of this need not do anything special. The allocation + * routines can simply be passed on to nvgpu_kzalloc() if desired so packing + * and alignment of the structs cannot be assumed. + */ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size); + +/** + * nvgpu_kmem_cache_destroy - destroy a cache created by + * nvgpu_kmem_cache_create(). + * + * @cache The cache to destroy. + */ void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache); +/** + * nvgpu_kmem_cache_alloc - Allocate an object from the cache + * + * @cache The cache to alloc from. + */ void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache); + +/** + * nvgpu_kmem_cache_free - Free an object back to a cache + * + * @cache The cache to return the object to. + * @ptr Pointer to the object to free. + */ void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr); -static inline void *__nvgpu_big_alloc(size_t size, bool clear) +/** + * nvgpu_kmalloc - Allocate from the kernel's allocator. + * + * @g: Current GPU. + * @size: Size of the allocation. + * + * Allocate a chunk of system memory from the kernel. Allocations larger than 1 + * page may fail even when there may appear to be enough memory. + * + * This function may sleep so cannot be used in IRQs. + */ +#define nvgpu_kmalloc(g, size) __nvgpu_kmalloc(g, size, _THIS_IP_) + +/** + * nvgpu_kzalloc - Allocate from the kernel's allocator. + * + * @g: Current GPU. + * @size: Size of the allocation. + * + * Identical to nvgpu_kalloc() except the memory will be zeroed before being + * returned. + */ +#define nvgpu_kzalloc(g, size) __nvgpu_kzalloc(g, size, _THIS_IP_) + +/** + * nvgpu_kcalloc - Allocate from the kernel's allocator. + * + * @g: Current GPU. + * @n: Number of objects. + * @size: Size of each object. + * + * Identical to nvgpu_kalloc() except the size of the memory chunk returned is + * @n * @size. + */ +#define nvgpu_kcalloc(g, n, size) __nvgpu_kcalloc(g, n, size, _THIS_IP_) + +/** + * nvgpu_vmalloc - Allocate memory and return a map to it. + * + * @g: Current GPU. + * @size: Size of the allocation. + * + * Allocate some memory and return a pointer to a virtual memory mapping of + * that memory in the kernel's virtual address space. The underlying physical + * memory is not guaranteed to be contiguous (and indeed likely isn't). This + * allows for much larger allocations to be done without worrying about as much + * about physical memory fragmentation. + * + * This function may sleep. + */ +#define nvgpu_vmalloc(g, size) __nvgpu_vmalloc(g, size, _THIS_IP_) + +/** + * nvgpu_vzalloc - Allocate memory and return a map to it. + * + * @g: Current GPU. + * @size: Size of the allocation. + * + * Identical to nvgpu_vmalloc() except this will return zero'ed memory. + */ +#define nvgpu_vzalloc(g, size) __nvgpu_vzalloc(g, size, _THIS_IP_) + +/** + * nvgpu_kfree - Frees an alloc from nvgpu_kmalloc, nvgpu_kzalloc, + * nvgpu_kcalloc. + * + * @g: Current GPU. + * @addr: Address of object to free. + */ +#define nvgpu_kfree(g, addr) __nvgpu_kfree(g, addr) + +/** + * nvgpu_vfree - Frees an alloc from nvgpu_vmalloc, nvgpu_vzalloc. + * + * @g: Current GPU. + * @addr: Address of object to free. + */ +#define nvgpu_vfree(g, addr) __nvgpu_vfree(g, addr) + +#define kmem_dbg(fmt, args...) \ + gk20a_dbg(gpu_dbg_kmem, fmt, ##args) + +/** + * nvgpu_kmem_init - Initialize the kmem tracking stuff. + * + *@g: The driver to init. + * + * Returns non-zero on failure. + */ +int nvgpu_kmem_init(struct gk20a *g); + +/** + * nvgpu_kmem_fini - Finalize the kmem tracking code + * + * @g - The GPU. + * @flags - Flags that control operation of this finalization. + * + * Cleanup resources used by nvgpu_kmem. Available flags for cleanup are: + * + * %NVGPU_KMEM_FINI_DO_NOTHING + * %NVGPU_KMEM_FINI_FORCE_CLEANUP + * %NVGPU_KMEM_FINI_DUMP_ALLOCS + * %NVGPU_KMEM_FINI_WARN + * %NVGPU_KMEM_FINI_BUG + * + * %NVGPU_KMEM_FINI_DO_NOTHING will be overridden by anything else specified. + * Put another way don't just add %NVGPU_KMEM_FINI_DO_NOTHING and expect that + * to suppress other flags from doing anything. + */ +void nvgpu_kmem_fini(struct gk20a *g, int flags); + +/* + * These will simply be ignored if CONFIG_NVGPU_TRACK_MEM_USAGE is not defined. + */ +#define NVGPU_KMEM_FINI_DO_NOTHING 0 +#define NVGPU_KMEM_FINI_FORCE_CLEANUP (1 << 0) +#define NVGPU_KMEM_FINI_DUMP_ALLOCS (1 << 1) +#define NVGPU_KMEM_FINI_WARN (1 << 2) +#define NVGPU_KMEM_FINI_BUG (1 << 3) + +/* + * When there's other implementations make sure they are included instead of + * Linux when not compiling on Linux! + */ +#include + +static inline void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear) { void *p; if (size > PAGE_SIZE) { if (clear) - p = vzalloc(size); + p = nvgpu_vzalloc(g, size); else - p = vmalloc(size); + p = nvgpu_vmalloc(g, size); } else { if (clear) - p = kzalloc(size, GFP_KERNEL); + p = nvgpu_kzalloc(g, size); else - p = kmalloc(size, GFP_KERNEL); + p = nvgpu_kmalloc(g, size); } return p; @@ -65,6 +242,7 @@ static inline void *__nvgpu_big_alloc(size_t size, bool clear) /** * nvgpu_big_malloc - Pick virtual or physical alloc based on @size * + * @g - The GPU. * @size - Size of the allocation. * * On some platforms (i.e Linux) it is possible to allocate memory directly @@ -83,30 +261,31 @@ static inline void *__nvgpu_big_alloc(size_t size, bool clear) * Returns a pointer to a virtual address range that the kernel can access or * %NULL on failure. */ -static inline void *nvgpu_big_malloc(size_t size) +static inline void *nvgpu_big_malloc(struct gk20a *g, size_t size) { - return __nvgpu_big_alloc(size, false); + return __nvgpu_big_alloc(g, size, false); } /** * nvgpu_big_malloc - Pick virtual or physical alloc based on @size * + * @g - The GPU. * @size - Size of the allocation. * * Zeroed memory version of nvgpu_big_malloc(). */ -static inline void *nvgpu_big_zalloc(size_t size) +static inline void *nvgpu_big_zalloc(struct gk20a *g, size_t size) { - return __nvgpu_big_alloc(size, true); + return __nvgpu_big_alloc(g, size, true); } /** * nvgpu_big_free - Free and alloc from nvgpu_big_zalloc() or * nvgpu_big_malloc(). - * + * @g - The GPU. * @p - A pointer allocated by nvgpu_big_zalloc() or nvgpu_big_malloc(). */ -static inline void nvgpu_big_free(void *p) +static inline void nvgpu_big_free(struct gk20a *g, void *p) { /* * This will have to be fixed eventually. Allocs that use @@ -114,9 +293,9 @@ static inline void nvgpu_big_free(void *p) * when freeing. */ if (virt_addr_valid(p)) - kfree(p); + nvgpu_kfree(g, p); else - vfree(p); + nvgpu_vfree(g, p); } -#endif +#endif /* __NVGPU_KMEM_H__ */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/kmem_linux.h b/drivers/gpu/nvgpu/include/nvgpu/kmem_linux.h new file mode 100644 index 000000000..d1cd27f31 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/kmem_linux.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __NVGPU_KMEM_LINUX_H__ +#define __NVGPU_KMEM_LINUX_H__ + +#include +#include +#include + +#include + +struct gk20a; +struct device; + +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE +void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size, + unsigned long ip); +void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size, + unsigned long ip); +void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip); +void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip); +void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size, + unsigned long ip); +void __nvgpu_track_vfree(struct gk20a *g, void *addr); +void __nvgpu_track_kfree(struct gk20a *g, void *addr); + +void nvgpu_kmem_debugfs_init(struct device *dev); +#else +static inline void nvgpu_kmem_debugfs_init(struct device *dev) +{ +} +#endif + +/** + * DOC: Linux pass through kmem implementation. + * + * These are the Linux implementations of the various kmem functions defined by + * nvgpu. This should not be included directly - instead include . + */ + +static inline void *__nvgpu_kmalloc(struct gk20a *g, unsigned long size, + unsigned long ip) +{ +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + return __nvgpu_track_vmalloc(g, size, ip); +#else + return kmalloc(size, GFP_KERNEL); +#endif +} + +static inline void *__nvgpu_kzalloc(struct gk20a *g, size_t size, + unsigned long ip) +{ +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + return __nvgpu_track_kzalloc(g, size, ip); +#else + return kzalloc(size, GFP_KERNEL); +#endif +} + +static inline void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, + unsigned long ip) +{ +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + return __nvgpu_track_kcalloc(g, n, size, ip); +#else + return kcalloc(n, size, GFP_KERNEL); +#endif +} + +static inline void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, + unsigned long ip) +{ +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + return __nvgpu_track_vmalloc(g, size, ip); +#else + return vmalloc(size); +#endif +} + +static inline void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, + unsigned long ip) +{ +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + return __nvgpu_track_vzalloc(g, size, ip); +#else + return vzalloc(size); +#endif +} + +static inline void __nvgpu_kfree(struct gk20a *g, void *addr) +{ +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + __nvgpu_track_kfree(g, addr); +#else + kfree(addr); +#endif +} + +static inline void __nvgpu_vfree(struct gk20a *g, void *addr) +{ +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + __nvgpu_track_vfree(g, addr); +#else + vfree(addr); +#endif +} + +#endif diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c index 3677b02d6..39559dacc 100644 --- a/drivers/gpu/nvgpu/pci.c +++ b/drivers/gpu/nvgpu/pci.c @@ -19,6 +19,7 @@ #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/platform_gk20a.h" @@ -358,6 +359,8 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, platform->g = g; g->dev = &pdev->dev; + nvgpu_kmem_init(g); + err = pci_enable_device(pdev); if (err) return err; diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index d8e0dfa1f..37b4633bf 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -19,6 +19,8 @@ #include #include +#include + #include "vgpu/vgpu.h" #include "vgpu/fecs_trace_vgpu.h" #include "gk20a/debug_gk20a.h" @@ -562,6 +564,8 @@ int vgpu_probe(struct platform_device *pdev) platform->vgpu_priv = priv; gk20a->dev = dev; + nvgpu_kmem_init(gk20a); + err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class); if (err) return err;