Files
linux-nv-oot/drivers/video/tegra/nvmap/nvmap_pp.c
Ketan Patil ec9ee786c4 nvmap: Add traces for big pages in page pool
There are no traces present for allocating big pages from page pool,
this is limiting the debug ability for some of the memory leak bugs.
Hence add traces for such operations.

Change-Id: I6cece3fd796c5c3e37f87647a1635274a654d727
Signed-off-by: Ketan Patil <ketanp@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2820607
Reviewed-by: svcacv <svcacv@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2023-04-11 05:54:22 +00:00

781 lines
19 KiB
C

/*
* drivers/video/tegra/nvmap/nvmap_pp.c
*
* Manage page pools to speed up page allocation.
*
* Copyright (c) 2009-2022, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#define pr_fmt(fmt) "%s: " fmt, __func__
#include <linux/kernel.h>
#include <linux/vmalloc.h>
#include <linux/moduleparam.h>
#include <linux/nodemask.h>
#include <linux/shrinker.h>
#include <linux/kthread.h>
#include <linux/debugfs.h>
#include <linux/freezer.h>
#include <linux/highmem.h>
#include <linux/version.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
#include <linux/sched/clock.h>
#include <uapi/linux/sched/types.h>
#endif
#include <trace/events/nvmap.h>
#include "nvmap_priv.h"
#define NVMAP_TEST_PAGE_POOL_SHRINKER 1
#define PENDING_PAGES_SIZE (SZ_1M / PAGE_SIZE)
static bool enable_pp = 1;
static u32 pool_size;
static struct task_struct *background_allocator;
static DECLARE_WAIT_QUEUE_HEAD(nvmap_bg_wait);
#ifdef NVMAP_CONFIG_PAGE_POOL_DEBUG
static inline void __pp_dbg_var_add(u64 *dbg_var, u32 nr)
{
*dbg_var += nr;
}
#else
#define __pp_dbg_var_add(dbg_var, nr)
#endif
#define pp_alloc_add(pool, nr) __pp_dbg_var_add(&(pool)->allocs, nr)
#define pp_fill_add(pool, nr) __pp_dbg_var_add(&(pool)->fills, nr)
#define pp_hit_add(pool, nr) __pp_dbg_var_add(&(pool)->hits, nr)
#define pp_miss_add(pool, nr) __pp_dbg_var_add(&(pool)->misses, nr)
static int __nvmap_page_pool_fill_lots_locked(struct nvmap_page_pool *pool,
struct page **pages, u32 nr);
static inline struct page *get_zero_list_page(struct nvmap_page_pool *pool)
{
struct page *page;
trace_get_zero_list_page(pool->to_zero);
if (list_empty(&pool->zero_list))
return NULL;
page = list_first_entry(&pool->zero_list, struct page, lru);
list_del(&page->lru);
pool->to_zero--;
return page;
}
static inline struct page *get_page_list_page(struct nvmap_page_pool *pool)
{
struct page *page;
trace_get_page_list_page(pool->count);
if (list_empty(&pool->page_list))
return NULL;
page = list_first_entry(&pool->page_list, struct page, lru);
list_del(&page->lru);
pool->count--;
return page;
}
#ifdef CONFIG_ARM64_4K_PAGES
static inline struct page *get_page_list_page_bp(struct nvmap_page_pool *pool)
{
struct page *page;
trace_get_page_list_page_bp(pool->big_page_count);
if (list_empty(&pool->page_list_bp))
return NULL;
page = list_first_entry(&pool->page_list_bp, struct page, lru);
list_del(&page->lru);
pool->count -= pool->pages_per_big_pg;
pool->big_page_count -= pool->pages_per_big_pg;
return page;
}
#endif /* CONFIG_ARM64_4K_PAGES */
static inline bool nvmap_bg_should_run(struct nvmap_page_pool *pool)
{
return !list_empty(&pool->zero_list);
}
static void nvmap_pp_zero_pages(struct page **pages, int nr)
{
int i;
for (i = 0; i < nr; i++) {
clear_highpage(pages[i]);
nvmap_clean_cache_page(pages[i]);
}
trace_nvmap_pp_zero_pages(nr);
}
static void nvmap_pp_do_background_zero_pages(struct nvmap_page_pool *pool)
{
int i;
struct page *page;
int ret;
/*
* Statically declared array of pages to be zeroed in a batch,
* local to this thread but too big for the stack.
*/
static struct page *pending_zero_pages[PENDING_PAGES_SIZE];
rt_mutex_lock(&pool->lock);
for (i = 0; i < PENDING_PAGES_SIZE; i++) {
page = get_zero_list_page(pool);
if (page == NULL)
break;
pending_zero_pages[i] = page;
pool->under_zero++;
}
rt_mutex_unlock(&pool->lock);
nvmap_pp_zero_pages(pending_zero_pages, i);
rt_mutex_lock(&pool->lock);
ret = __nvmap_page_pool_fill_lots_locked(pool, pending_zero_pages, i);
pool->under_zero -= i;
rt_mutex_unlock(&pool->lock);
trace_nvmap_pp_do_background_zero_pages(ret, i);
for (; ret < i; ret++)
__free_page(pending_zero_pages[ret]);
}
/*
* This thread fills the page pools with zeroed pages. We avoid releasing the
* pages directly back into the page pools since we would then have to zero
* them ourselves. Instead it is easier to just reallocate zeroed pages. This
* happens in the background so that the overhead of allocating zeroed pages is
* not directly seen by userspace. Of course if the page pools are empty user
* space will suffer.
*/
static int nvmap_background_zero_thread(void *arg)
{
struct nvmap_page_pool *pool = &nvmap_dev->pool;
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0)
struct sched_param param = { .sched_priority = 0 };
#endif
pr_info("PP zeroing thread starting.\n");
set_freezable();
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0)
sched_setscheduler(current, SCHED_NORMAL, &param);
#else
sched_set_normal(current, MAX_NICE);
#endif
while (!kthread_should_stop()) {
while (nvmap_bg_should_run(pool))
nvmap_pp_do_background_zero_pages(pool);
wait_event_freezable(nvmap_bg_wait,
nvmap_bg_should_run(pool) ||
kthread_should_stop());
}
return 0;
}
#ifdef NVMAP_CONFIG_PAGE_POOL_DEBUG
static void nvmap_pgcount(struct page *page, bool incr)
{
page_ref_add(page, incr ? 1 : -1);
}
#endif /* NVMAP_CONFIG_PAGE_POOL_DEBUG */
/*
* Free the passed number of pages from the page pool. This happens regardless
* of whether the page pools are enabled. This lets one disable the page pools
* and then free all the memory therein.
*
* FIXME: Pages in pending_zero_pages[] can still be unreleased.
*/
static ulong nvmap_page_pool_free_pages_locked(struct nvmap_page_pool *pool,
ulong nr_pages)
{
struct page *page;
bool use_page_list = false;
#ifdef CONFIG_ARM64_4K_PAGES
bool use_page_list_bp = false;
int i;
#endif /* CONFIG_ARM64_4K_PAGES */
pr_debug("req to release pages=%ld\n", nr_pages);
while (nr_pages) {
#ifdef CONFIG_ARM64_4K_PAGES
if (use_page_list_bp)
page = get_page_list_page_bp(pool);
else if (use_page_list)
#else
if (use_page_list)
#endif /* CONFIG_ARM64_4K_PAGES */
page = get_page_list_page(pool);
else
page = get_zero_list_page(pool);
if (!page) {
if (!use_page_list) {
use_page_list = true;
continue;
}
#ifdef CONFIG_ARM64_4K_PAGES
else if (!use_page_list_bp) {
use_page_list_bp = true;
continue;
}
#endif /* CONFIG_ARM64_4K_PAGES */
break;
}
#ifdef CONFIG_ARM64_4K_PAGES
if (use_page_list_bp) {
for (i = 0; i < pool->pages_per_big_pg; i++)
__free_page(nth_page(page, i));
pr_debug("released %d pages\n", pool->pages_per_big_pg);
if (nr_pages > pool->pages_per_big_pg)
nr_pages -= pool->pages_per_big_pg;
else
nr_pages = 0;
} else {
#endif /* CONFIG_ARM64_4K_PAGES */
__free_page(page);
nr_pages--;
pr_debug("released 1 page\n");
#ifdef CONFIG_ARM64_4K_PAGES
}
#endif /* CONFIG_ARM64_4K_PAGES */
}
pr_debug("remaining pages to release=%ld\n", nr_pages);
return nr_pages;
}
/*
* Alloc a bunch of pages from the page pool. This will alloc as many as it can
* and return the number of pages allocated. Pages are placed into the passed
* array in a linear fashion starting from index 0.
*/
int nvmap_page_pool_alloc_lots(struct nvmap_page_pool *pool,
struct page **pages, u32 nr)
{
u32 ind = 0;
u32 non_zero_idx;
u32 non_zero_cnt = 0;
if (!enable_pp || !nr)
return 0;
rt_mutex_lock(&pool->lock);
while (ind < nr) {
struct page *page = NULL;
if (!non_zero_cnt)
page = get_page_list_page(pool);
if (!page) {
page = get_zero_list_page(pool);
if (!page)
break;
if (!non_zero_cnt)
non_zero_idx = ind;
non_zero_cnt++;
}
pages[ind++] = page;
#ifdef NVMAP_CONFIG_PAGE_POOL_DEBUG
nvmap_pgcount(page, false);
BUG_ON(page_count(page) != 1);
#endif /* NVMAP_CONFIG_PAGE_POOL_DEBUG */
}
rt_mutex_unlock(&pool->lock);
/* Zero non-zeroed pages, if any */
if (non_zero_cnt)
nvmap_pp_zero_pages(&pages[non_zero_idx], non_zero_cnt);
pp_alloc_add(pool, ind);
pp_hit_add(pool, ind);
pp_miss_add(pool, nr - ind);
trace_nvmap_pp_alloc_lots(ind, nr);
return ind;
}
#ifdef CONFIG_ARM64_4K_PAGES
int nvmap_page_pool_alloc_lots_bp(struct nvmap_page_pool *pool,
struct page **pages, u32 nr)
{
u32 ind = 0, nr_pages = nr;
struct page *page;
if (!enable_pp || pool->pages_per_big_pg <= 1 ||
nr_pages < pool->pages_per_big_pg)
return 0;
rt_mutex_lock(&pool->lock);
while (nr_pages - ind >= pool->pages_per_big_pg) {
int i;
page = get_page_list_page_bp(pool);
if (!page)
break;
for (i = 0; i < pool->pages_per_big_pg; i++)
pages[ind + i] = nth_page(page, i);
ind += pool->pages_per_big_pg;
}
rt_mutex_unlock(&pool->lock);
trace_nvmap_pp_alloc_lots_bp(ind, nr);
return ind;
}
static bool nvmap_is_big_page(struct nvmap_page_pool *pool,
struct page **pages, int idx, int nr)
{
int i;
struct page *page = pages[idx];
if (pool->pages_per_big_pg <= 1)
return false;
if (nr - idx < pool->pages_per_big_pg)
return false;
/* Allow coalescing pages at big page boundary only */
if (page_to_phys(page) & (pool->big_pg_sz - 1))
return false;
for (i = 1; i < pool->pages_per_big_pg; i++)
if (pages[idx + i] != nth_page(page, i))
break;
return i == pool->pages_per_big_pg ? true: false;
}
#endif /* CONFIG_ARM64_4K_PAGES */
/*
* Fill a bunch of pages into the page pool. This will fill as many as it can
* and return the number of pages filled. Pages are used from the start of the
* passed page pointer array in a linear fashion.
*
* You must lock the page pool before using this.
*/
static int __nvmap_page_pool_fill_lots_locked(struct nvmap_page_pool *pool,
struct page **pages, u32 nr)
{
int real_nr;
int pages_to_fill;
int ind = 0;
if (!enable_pp)
return 0;
BUG_ON(pool->count > pool->max);
real_nr = min_t(u32, pool->max - pool->count, nr);
pages_to_fill = real_nr;
if (real_nr == 0)
return 0;
while (real_nr > 0) {
#ifdef NVMAP_CONFIG_PAGE_POOL_DEBUG
nvmap_pgcount(pages[ind], true);
BUG_ON(page_count(pages[ind]) != 2);
#endif /* NVMAP_CONFIG_PAGE_POOL_DEBUG */
#ifdef CONFIG_ARM64_4K_PAGES
if (nvmap_is_big_page(pool, pages, ind, pages_to_fill)) {
list_add_tail(&pages[ind]->lru, &pool->page_list_bp);
ind += pool->pages_per_big_pg;
real_nr -= pool->pages_per_big_pg;
pool->big_page_count += pool->pages_per_big_pg;
} else {
#endif /* CONFIG_ARM64_4K_PAGES */
list_add_tail(&pages[ind++]->lru, &pool->page_list);
real_nr--;
#ifdef CONFIG_ARM64_4K_PAGES
}
#endif /* CONFIG_ARM64_4K_PAGES */
}
pool->count += ind;
BUG_ON(pool->count > pool->max);
pp_fill_add(pool, ind);
return ind;
}
u32 nvmap_page_pool_fill_lots(struct nvmap_page_pool *pool,
struct page **pages, u32 nr)
{
u32 ret = 0;
u32 i;
u32 save_to_zero;
rt_mutex_lock(&pool->lock);
save_to_zero = pool->to_zero;
ret = min(nr, pool->max - pool->count - pool->to_zero - pool->under_zero);
for (i = 0; i < ret; i++) {
/* If page has additonal referecnces, Don't add it into
* page pool. get_user_pages() on mmap'ed nvmap handle can
* hold a refcount on the page. These pages can't be
* reused till the additional refs are dropped.
*/
if (page_count(pages[i]) > 1) {
__free_page(pages[i]);
} else {
list_add_tail(&pages[i]->lru, &pool->zero_list);
pool->to_zero++;
}
}
if (pool->to_zero)
wake_up_interruptible(&nvmap_bg_wait);
ret = i;
trace_nvmap_pp_fill_zero_lots(save_to_zero, pool->to_zero,
ret, nr);
rt_mutex_unlock(&pool->lock);
return ret;
}
ulong nvmap_page_pool_get_unused_pages(void)
{
unsigned long total = 0;
if (!nvmap_dev)
return 0;
total = nvmap_dev->pool.count + nvmap_dev->pool.to_zero;
return total;
}
/*
* Remove and free to the system all the pages currently in the page
* pool. This operation will happen even if the page pools are disabled.
*/
int nvmap_page_pool_clear(void)
{
struct nvmap_page_pool *pool = &nvmap_dev->pool;
rt_mutex_lock(&pool->lock);
(void)nvmap_page_pool_free_pages_locked(pool, pool->count + pool->to_zero);
/* For some reason, if an error occured... */
if (!list_empty(&pool->page_list) || !list_empty(&pool->zero_list)) {
rt_mutex_unlock(&pool->lock);
return -ENOMEM;
}
rt_mutex_unlock(&pool->lock);
return 0;
}
/*
* Resizes the page pool to the passed size. If the passed size is 0 then
* all associated resources are released back to the system. This operation
* will only occur if the page pools are enabled.
*/
static void nvmap_page_pool_resize(struct nvmap_page_pool *pool, u32 size)
{
u64 curr;
rt_mutex_lock(&pool->lock);
curr = nvmap_page_pool_get_unused_pages();
if (curr > size)
(void)nvmap_page_pool_free_pages_locked(pool, curr - size);
pr_debug("page pool resized to %d from %d pages\n", size, pool->max);
pool->max = size;
rt_mutex_unlock(&pool->lock);
}
static unsigned long nvmap_page_pool_count_objects(struct shrinker *shrinker,
struct shrink_control *sc)
{
return nvmap_page_pool_get_unused_pages();
}
static unsigned long nvmap_page_pool_scan_objects(struct shrinker *shrinker,
struct shrink_control *sc)
{
unsigned long remaining;
pr_debug("sh_pages=%lu", sc->nr_to_scan);
rt_mutex_lock(&nvmap_dev->pool.lock);
remaining = nvmap_page_pool_free_pages_locked(
&nvmap_dev->pool, sc->nr_to_scan);
rt_mutex_unlock(&nvmap_dev->pool.lock);
return (remaining == sc->nr_to_scan) ? \
SHRINK_STOP : (sc->nr_to_scan - remaining);
}
static struct shrinker nvmap_page_pool_shrinker = {
.count_objects = nvmap_page_pool_count_objects,
.scan_objects = nvmap_page_pool_scan_objects,
.seeks = 1,
};
static void shrink_page_pools(unsigned long *total_pages, unsigned long *available_pages)
{
struct shrink_control sc;
if (*total_pages == 0) {
sc.gfp_mask = GFP_KERNEL;
sc.nr_to_scan = 0;
*total_pages = nvmap_page_pool_count_objects(NULL, &sc);
}
sc.nr_to_scan = *total_pages;
nvmap_page_pool_scan_objects(NULL, &sc);
*available_pages = nvmap_page_pool_count_objects(NULL, &sc);
}
#if NVMAP_TEST_PAGE_POOL_SHRINKER
static int shrink_pp;
static int shrink_set(const char *arg, const struct kernel_param *kp)
{
int cpu = smp_processor_id();
unsigned long long t1, t2;
unsigned long total_pages, available_pages;
param_set_int(arg, kp);
if (shrink_pp) {
total_pages = shrink_pp;
t1 = cpu_clock(cpu);
shrink_page_pools(&total_pages, &available_pages);
t2 = cpu_clock(cpu);
pr_debug("shrink page pools: time=%lldns, "
"total_pages_released=%lu, free_pages_available=%lu",
t2-t1, total_pages, available_pages);
}
return 0;
}
static int shrink_get(char *buff, const struct kernel_param *kp)
{
return param_get_int(buff, kp);
}
static struct kernel_param_ops shrink_ops = {
.get = shrink_get,
.set = shrink_set,
};
module_param_cb(shrink_page_pools, &shrink_ops, &shrink_pp, 0644);
#endif
static int enable_pp_set(const char *arg, const struct kernel_param *kp)
{
int ret;
ret = param_set_bool(arg, kp);
if (ret)
return ret;
if (!enable_pp)
nvmap_page_pool_clear();
return 0;
}
static int enable_pp_get(char *buff, const struct kernel_param *kp)
{
return param_get_bool(buff, kp);
}
static struct kernel_param_ops enable_pp_ops = {
.get = enable_pp_get,
.set = enable_pp_set,
};
module_param_cb(enable_page_pools, &enable_pp_ops, &enable_pp, 0644);
static int pool_size_set(const char *arg, const struct kernel_param *kp)
{
int ret = param_set_uint(arg, kp);
if (!ret && (pool_size != nvmap_dev->pool.max))
nvmap_page_pool_resize(&nvmap_dev->pool, pool_size);
return ret;
}
static int pool_size_get(char *buff, const struct kernel_param *kp)
{
return param_get_int(buff, kp);
}
static struct kernel_param_ops pool_size_ops = {
.get = pool_size_get,
.set = pool_size_set,
};
module_param_cb(pool_size, &pool_size_ops, &pool_size, 0644);
int nvmap_page_pool_debugfs_init(struct dentry *nvmap_root)
{
struct dentry *pp_root;
if (!nvmap_root)
return -ENODEV;
pp_root = debugfs_create_dir("pagepool", nvmap_root);
if (!pp_root)
return -ENODEV;
debugfs_create_u32("page_pool_available_pages",
S_IRUGO, pp_root,
&nvmap_dev->pool.count);
debugfs_create_u32("page_pool_pages_to_zero",
S_IRUGO, pp_root,
&nvmap_dev->pool.to_zero);
#ifdef CONFIG_ARM64_4K_PAGES
debugfs_create_u32("page_pool_available_big_pages",
S_IRUGO, pp_root,
&nvmap_dev->pool.big_page_count);
debugfs_create_u32("page_pool_big_page_size",
S_IRUGO, pp_root,
&nvmap_dev->pool.big_pg_sz);
debugfs_create_u64("total_big_page_allocs",
S_IRUGO, pp_root,
&nvmap_big_page_allocs);
#endif /* CONFIG_ARM64_4K_PAGES */
debugfs_create_u64("total_page_allocs",
S_IRUGO, pp_root,
&nvmap_total_page_allocs);
#ifdef NVMAP_CONFIG_PAGE_POOL_DEBUG
debugfs_create_u64("page_pool_allocs",
S_IRUGO, pp_root,
&nvmap_dev->pool.allocs);
debugfs_create_u64("page_pool_fills",
S_IRUGO, pp_root,
&nvmap_dev->pool.fills);
debugfs_create_u64("page_pool_hits",
S_IRUGO, pp_root,
&nvmap_dev->pool.hits);
debugfs_create_u64("page_pool_misses",
S_IRUGO, pp_root,
&nvmap_dev->pool.misses);
#endif
return 0;
}
int nvmap_page_pool_init(struct nvmap_device *dev)
{
struct sysinfo info;
struct nvmap_page_pool *pool = &dev->pool;
memset(pool, 0x0, sizeof(*pool));
rt_mutex_init(&pool->lock);
INIT_LIST_HEAD(&pool->page_list);
INIT_LIST_HEAD(&pool->zero_list);
#ifdef CONFIG_ARM64_4K_PAGES
INIT_LIST_HEAD(&pool->page_list_bp);
pool->big_pg_sz = NVMAP_PP_BIG_PAGE_SIZE;
pool->pages_per_big_pg = NVMAP_PP_BIG_PAGE_SIZE >> PAGE_SHIFT;
#endif /* CONFIG_ARM64_4K_PAGES */
si_meminfo(&info);
pr_info("Total RAM pages: %lu\n", info.totalram);
if (!NVMAP_CONFIG_PAGE_POOL_SIZE)
/* The ratio is pool pages per 1K ram pages.
* So, the >> 10 */
pool->max = (info.totalram * NVMAP_PP_POOL_SIZE) >> 10;
else
pool->max = NVMAP_CONFIG_PAGE_POOL_SIZE;
if (pool->max >= info.totalram)
goto fail;
pool_size = pool->max;
pr_info("nvmap page pool size: %u pages (%u MB)\n", pool->max,
(pool->max * info.mem_unit) >> 20);
background_allocator = kthread_run(nvmap_background_zero_thread,
NULL, "nvmap-bz");
if (IS_ERR(background_allocator))
goto fail;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 0, 0)
register_shrinker(&nvmap_page_pool_shrinker, "nvmap_pp_shrinker");
#else
register_shrinker(&nvmap_page_pool_shrinker);
#endif
return 0;
fail:
nvmap_page_pool_fini(dev);
return -ENOMEM;
}
int nvmap_page_pool_fini(struct nvmap_device *dev)
{
struct nvmap_page_pool *pool = &dev->pool;
/*
* if background allocator is not initialzed or not
* properly initialized, then shrinker is also not
* registered
*/
if (!IS_ERR_OR_NULL(background_allocator)) {
unregister_shrinker(&nvmap_page_pool_shrinker);
kthread_stop(background_allocator);
background_allocator = NULL;
}
WARN_ON(!list_empty(&pool->page_list));
return 0;
}