From 858d73775a5b6851187268ccd7ebaede31cd4ccc Mon Sep 17 00:00:00 2001 From: Ketan Patil Date: Thu, 31 Jul 2025 09:30:36 +0000 Subject: [PATCH] video: tegra: nvmap: Account NvMap memory for OOM Decisions Account NvMap allocated memory into both RSS and CG tracking to make efficient OOM kill decisions during memory pressure. NvMap allocates memory via kernel APIs like alloc_pages, the kernel memory is not accounted on behalf of process who requests the allocation. Hence in case OOM, the OOM killer never kills the process who has allocated memory via NvMap even though this process might be holding most of the memory. Solve this issue using following approach: - Use __GFP_ACCOUNT and __GFP_NORETRY flag - __GFP_NORETRY will not let the current allocation flow to go into OOM path, so that it will never trigger OOM. - __GFP_ACCOUNT causes the allocation to be accounted to kmemcg. So any allocation done by NvMap will be definitely accounted to kmemcg and cgroups can be used to define memory limits. - Add RSS counting for the process which allocates by NvMap, so that OOM score for that process will get updated and OOM killer can pick this process based upon the OOM score. - Every process that has a reference to NvMap Handle would have the memory size accounted into its RSS. On releasing the reference to handle, the RSS would be reduced. Bug 5222690 Change-Id: I3fa9b76ec9fc8d7f805111cb96e11e2ab1db42ce Signed-off-by: Ketan Patil Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3447072 GVS: buildbot_gerritrpt Reviewed-by: Krishna Reddy Reviewed-by: Jon Hunter Reviewed-by: Ajay Nandakumar Mannargudi --- drivers/video/tegra/nvmap/nvmap_alloc.c | 46 ++++++++++++++++++++- drivers/video/tegra/nvmap/nvmap_alloc_int.h | 2 +- drivers/video/tegra/nvmap/nvmap_dev.c | 11 +++++ drivers/video/tegra/nvmap/nvmap_handle.c | 39 +++++++++++++++-- drivers/video/tegra/nvmap/nvmap_handle.h | 18 +++++++- scripts/conftest/Makefile | 1 + scripts/conftest/conftest.sh | 17 ++++++++ 7 files changed, 127 insertions(+), 7 deletions(-) diff --git a/drivers/video/tegra/nvmap/nvmap_alloc.c b/drivers/video/tegra/nvmap/nvmap_alloc.c index 33f41b7c..6016b1a8 100644 --- a/drivers/video/tegra/nvmap/nvmap_alloc.c +++ b/drivers/video/tegra/nvmap/nvmap_alloc.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "nvmap_stats.h" #include "nvmap_dev.h" #include "nvmap_alloc.h" @@ -112,6 +113,8 @@ static int handle_page_alloc(struct nvmap_client *client, int pages_per_big_pg = 0; #endif #endif /* CONFIG_ARM64_4K_PAGES */ + struct mm_struct *mm = current->mm; + struct nvmap_handle_ref *ref; if (check_mul_overflow(nr_page, sizeof(*pages), &tot_size)) return -EOVERFLOW; @@ -120,6 +123,13 @@ static int handle_page_alloc(struct nvmap_client *client, if (!pages) return -ENOMEM; + /* + * Get refcount on mm_struct, so that it won't be freed until + * nvmap reduces refcount after it reduces the RSS counter. + */ + if (!mmget_not_zero(mm)) + goto page_free; + if (contiguous) { struct page *page; page = nvmap_alloc_pages_exact(gfp, size, h->numa_id); @@ -198,6 +208,12 @@ static int handle_page_alloc(struct nvmap_client *client, nvmap_total_page_allocs = result; + /* + * Increment the RSS counter of the allocating process by number of pages allocated. + */ + h->anon_count = nr_page; + nvmap_add_mm_counter(mm, MM_ANONPAGES, nr_page); + /* * Make sure any data in the caches is cleaned out before * passing these pages to userspace. Many nvmap clients assume that @@ -211,11 +227,28 @@ static int handle_page_alloc(struct nvmap_client *client, h->pgalloc.pages = pages; h->pgalloc.contig = contiguous; atomic_set(&h->pgalloc.ndirty, 0); + + nvmap_ref_lock(client); + ref = __nvmap_validate_locked(client, h, false); + if (ref) { + ref->mm = mm; + ref->anon_count = h->anon_count; + } else { + nvmap_add_mm_counter(mm, MM_ANONPAGES, -nr_page); + mmput(mm); + } + + nvmap_ref_unlock(client); return 0; fail: while (i--) __free_page(pages[i]); + + /* Incase of failure, release the reference on mm_struct. */ + mmput(mm); + +page_free: nvmap_altfree(pages, tot_size); wmb(); return -ENOMEM; @@ -679,9 +712,18 @@ void nvmap_alloc_free(struct page **pages, unsigned int nr_page, bool from_va, pages[i] = nvmap_to_page(pages[i]); #ifdef NVMAP_CONFIG_PAGE_POOLS - if (!from_va && !is_subhandle) - page_index = nvmap_page_pool_fill_lots(nvmap_dev->pool, + if (!from_va && !is_subhandle) { + /* + * When the process is exiting with kill signal pending, don't release the memory + * back into page pool. So that memory would be released back to the kernel and OOM + * killer would be able to actually free the memory. + */ + if (fatal_signal_pending(current) == 0 && + sigismember(¤t->signal->shared_pending.signal, SIGKILL) == 0) { + page_index = nvmap_page_pool_fill_lots(nvmap_dev->pool, pages, nr_page); + } + } #endif for (i = page_index; i < nr_page; i++) { diff --git a/drivers/video/tegra/nvmap/nvmap_alloc_int.h b/drivers/video/tegra/nvmap/nvmap_alloc_int.h index 8960f265..b8eb4d0b 100644 --- a/drivers/video/tegra/nvmap/nvmap_alloc_int.h +++ b/drivers/video/tegra/nvmap/nvmap_alloc_int.h @@ -8,7 +8,7 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0) -#define GFP_NVMAP (GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN) +#define GFP_NVMAP (GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN | __GFP_ACCOUNT | __GFP_NORETRY) #ifdef CONFIG_ARM64_4K_PAGES #define NVMAP_PP_BIG_PAGE_SIZE (0x10000) diff --git a/drivers/video/tegra/nvmap/nvmap_dev.c b/drivers/video/tegra/nvmap/nvmap_dev.c index 9b03e1dd..6650870e 100644 --- a/drivers/video/tegra/nvmap/nvmap_dev.c +++ b/drivers/video/tegra/nvmap/nvmap_dev.c @@ -270,6 +270,17 @@ static void destroy_client(struct nvmap_client *client) if (ref->handle->owner == client) ref->handle->owner = NULL; + /* + * When a reference is freed, decrement rss counter of the process corresponding + * to this ref and do mmput so that mm_struct can be freed, if required. + */ + if (ref->mm != NULL && ref->anon_count != 0) { + nvmap_add_mm_counter(ref->mm, MM_ANONPAGES, -ref->anon_count); + mmput(ref->mm); + ref->mm = NULL; + ref->anon_count = 0; + } + if (ref->is_ro) dma_buf_put(ref->handle->dmabuf_ro); else diff --git a/drivers/video/tegra/nvmap/nvmap_handle.c b/drivers/video/tegra/nvmap/nvmap_handle.c index f5010ae9..1d37de25 100644 --- a/drivers/video/tegra/nvmap/nvmap_handle.c +++ b/drivers/video/tegra/nvmap/nvmap_handle.c @@ -197,7 +197,7 @@ static inline void nvmap_lru_del(struct nvmap_handle *h) * * Note: to call this function make sure you own the client ref lock. */ -static struct nvmap_handle_ref *__nvmap_validate_locked(struct nvmap_client *c, +struct nvmap_handle_ref *__nvmap_validate_locked(struct nvmap_client *c, struct nvmap_handle *h, bool is_ro) { @@ -588,17 +588,31 @@ struct nvmap_handle_ref *nvmap_duplicate_handle(struct nvmap_client *client, atomic_set(&ref->dupes, 1); ref->handle = h; + + /* + * When a new reference is created to the handle, save mm, anon_count in ref and + * increment ref count of mm. + */ + ref->mm = current->mm; + ref->anon_count = h->anon_count; add_handle_ref(client, ref); + if (ref->anon_count != 0 && ref->mm != NULL) { + if (!mmget_not_zero(ref->mm)) + goto exit; + + nvmap_add_mm_counter(ref->mm, MM_ANONPAGES, ref->anon_count); + } + if (is_ro) { ref->is_ro = true; if (!h->dmabuf_ro) - goto exit; + goto exit_mm; get_dma_buf(h->dmabuf_ro); } else { ref->is_ro = false; if (!h->dmabuf) - goto exit; + goto exit_mm; get_dma_buf(h->dmabuf); } @@ -607,6 +621,14 @@ out: NVMAP_TP_ARGS_CHR(client, h, ref)); return ref; +exit_mm: + if (ref->anon_count != 0 && ref->mm != NULL) { + nvmap_add_mm_counter(ref->mm, MM_ANONPAGES, -ref->anon_count); + mmput(ref->mm); + ref->mm = NULL; + ref->anon_count = 0; + } + exit: remove_handle_ref(client, ref); pr_err("dmabuf is NULL\n"); @@ -776,6 +798,17 @@ void nvmap_free_handle(struct nvmap_client *client, if (h->owner == client) h->owner = NULL; + /* + * When a reference is freed, decrement rss counter of the process corresponding + * to this ref and do mmput so that mm_struct can be freed, if required. + */ + if (ref->mm != NULL && ref->anon_count != 0) { + nvmap_add_mm_counter(ref->mm, MM_ANONPAGES, -ref->anon_count); + mmput(ref->mm); + ref->mm = NULL; + ref->anon_count = 0; + } + if (is_ro) dma_buf_put(ref->handle->dmabuf_ro); else diff --git a/drivers/video/tegra/nvmap/nvmap_handle.h b/drivers/video/tegra/nvmap/nvmap_handle.h index 7d279892..a29571bc 100644 --- a/drivers/video/tegra/nvmap/nvmap_handle.h +++ b/drivers/video/tegra/nvmap/nvmap_handle.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only - * SPDX-FileCopyrightText: Copyright (c) 2009-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * GPU memory management driver for Tegra */ @@ -7,6 +7,7 @@ #ifndef _NVMAP_HANDLE_H_ #define _NVMAP_HANDLE_H_ +#include #include #include @@ -72,6 +73,7 @@ struct nvmap_handle { int numa_id; u64 serial_id; bool has_hugetlbfs_pages; + u64 anon_count; }; struct nvmap_handle_info { @@ -90,6 +92,8 @@ struct nvmap_handle_ref { struct rb_node node; atomic_t dupes; /* number of times to free on file close */ bool is_ro; + struct mm_struct *mm; + u64 anon_count; }; struct handles_range { @@ -380,6 +384,10 @@ void *__nvmap_mmap(struct nvmap_handle *h); void __nvmap_munmap(struct nvmap_handle *h, void *addr); +struct nvmap_handle_ref *__nvmap_validate_locked(struct nvmap_client *c, + struct nvmap_handle *h, + bool is_ro); + #ifdef NVMAP_CONFIG_SCIIPC int nvmap_sci_ipc_init(void); void nvmap_sci_ipc_exit(void); @@ -426,4 +434,12 @@ static inline struct dma_buf *nvmap_id_array_id_release(struct xarray *xarr, u32 } #endif /* NVMAP_CONFIG_HANDLE_AS_ID */ +static inline void nvmap_add_mm_counter(struct mm_struct *mm, int member, long value) +{ +#if defined(NV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT) /* Linux v6.2 */ + percpu_counter_add(&mm->rss_stat[member], value); +#else + atomic_long_add_return(value, &mm->rss_stat.count[member]); +#endif +} #endif /* _NVMAP_HANDLE_H_ */ diff --git a/scripts/conftest/Makefile b/scripts/conftest/Makefile index 13531c3e..91a881cf 100644 --- a/scripts/conftest/Makefile +++ b/scripts/conftest/Makefile @@ -171,6 +171,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_complete_and_exit NV_CONFTEST_FUNCTION_COMPILE_TESTS += mii_bus_struct_has_read_c45 NV_CONFTEST_FUNCTION_COMPILE_TESTS += mii_bus_struct_has_write_c45 NV_CONFTEST_FUNCTION_COMPILE_TESTS += media_entity_remote_pad +NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_struct_struct_has_percpu_counter_rss_stat NV_CONFTEST_FUNCTION_COMPILE_TESTS += module_import_ns_calls_stringify NV_CONFTEST_FUNCTION_COMPILE_TESTS += mrq_pcie_request_struct_present NV_CONFTEST_FUNCTION_COMPILE_TESTS += msi_get_virq diff --git a/scripts/conftest/conftest.sh b/scripts/conftest/conftest.sh index 512b3818..911a2f84 100755 --- a/scripts/conftest/conftest.sh +++ b/scripts/conftest/conftest.sh @@ -7933,6 +7933,23 @@ compile_test() { compile_check_conftest "$CODE" "NV_MII_BUS_STRUCT_HAS_WRITE_C45" "" "types" ;; + mm_struct_struct_has_percpu_counter_rss_stat) + # + # Determine if the 'rss_stat' member of the 'mm_struct' structure is + # defined with 'percpu_counter'. + # + # This change was made in Linux v6.2 by commit f1a7941243c1 ("mm: + # convert mm's rss stats into percpu_counter2"). + # + CODE=" + #include + void conftest_mm_struct_struct_has_percpu_counter_rss_stat(struct mm_struct *mm) { + percpu_counter_add(&mm->rss_stat[0], 0); + }" + + compile_check_conftest "$CODE" "NV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT" "" "types" + ;; + module_import_ns_calls_stringify) # # Determine if the MODULE_IMPORT_NS macro takes a string literal as