From fcb299a9cd9c7ecb58331a8150c74dec3e5116ec Mon Sep 17 00:00:00 2001
From: Ketan Patil <ketanp@nvidia.com>
Date: Mon, 12 Aug 2024 11:28:50 +0000
Subject: [PATCH] video: tegra: nvmap: Add flag to skip cache cleaning at alloc

For bigger buffer allocation (e.g. 4GB, 5GB etc) from IOMMU heap, 70% of
the total time is consumed in cache cleaning. CUDA team confirmed that,
it is not always necessary to clean the CPU cache during allocation
flow. Hence provide an option to users of libnvrm_mem to skip cache
cleaning whenever required.

Bug 4628529

Change-Id: I9f4cdc930fcc673b69344f0167c8bc1378ec8d61
Signed-off-by: Ketan Patil <ketanp@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3192376
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Reviewed-by: Pritesh Raithatha <praithatha@nvidia.com>
GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
---
 drivers/video/tegra/nvmap/nvmap_alloc.c | 24 ++++++++++++++----------
 include/linux/nvmap.h                   |  1 +
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/video/tegra/nvmap/nvmap_alloc.c b/drivers/video/tegra/nvmap/nvmap_alloc.c
index f2924fbf..4391ff37 100644
--- a/drivers/video/tegra/nvmap/nvmap_alloc.c
+++ b/drivers/video/tegra/nvmap/nvmap_alloc.c
@@ -124,7 +124,9 @@ static int handle_page_alloc(struct nvmap_client *client,
 
 			for (idx = 0; idx < pages_per_big_pg; idx++)
 				pages[i + idx] = nth_page(page, idx);
-			nvmap_clean_cache(&pages[i], pages_per_big_pg);
+
+			if ((h->userflags & NVMAP_HANDLE_SKIP_CACHE_CLEAN) == 0)
+				nvmap_clean_cache(&pages[i], pages_per_big_pg);
 		}
 		nvmap_big_page_allocs += page_index;
 #endif /* CONFIG_ARM64_4K_PAGES */
@@ -151,15 +153,17 @@ static int handle_page_alloc(struct nvmap_client *client,
 		nvmap_total_page_allocs += nr_page;
 	}
 
-	/*
-	 * Make sure any data in the caches is cleaned out before
-	 * passing these pages to userspace. Many nvmap clients assume that
-	 * the buffers are clean as soon as they are allocated. nvmap
-	 * clients can pass the buffer to hardware as it is without any
-	 * explicit cache maintenance.
-	 */
-	if (page_index < nr_page)
-		nvmap_clean_cache(&pages[page_index], nr_page - page_index);
+	if ((h->userflags & NVMAP_HANDLE_SKIP_CACHE_CLEAN) == 0) {
+		/*
+		 * Make sure any data in the caches is cleaned out before
+		 * passing these pages to userspace. Many nvmap clients assume that
+		 * the buffers are clean as soon as they are allocated. nvmap
+		 * clients can pass the buffer to hardware as it is without any
+		 * explicit cache maintenance.
+		 */
+		if (page_index < nr_page)
+			nvmap_clean_cache(&pages[page_index], nr_page - page_index);
+	}
 
 	h->pgalloc.pages = pages;
 	h->pgalloc.contig = contiguous;
diff --git a/include/linux/nvmap.h b/include/linux/nvmap.h
index 9117b97d..d0f05a7c 100644
--- a/include/linux/nvmap.h
+++ b/include/linux/nvmap.h
@@ -43,6 +43,7 @@
 #define NVMAP_HANDLE_CACHE_SYNC      (0x1ul << 7)
 #define NVMAP_HANDLE_CACHE_SYNC_AT_RESERVE      (0x1ul << 8)
 #define NVMAP_HANDLE_RO	             (0x1ul << 9)
+#define NVMAP_HANDLE_SKIP_CACHE_CLEAN (0x1ul << 10)
 
 #ifdef NVMAP_CONFIG_PAGE_POOLS
 ulong nvmap_page_pool_get_unused_pages(void);