From faa604087504f623c43bc05c5164c42bdbeeef68 Mon Sep 17 00:00:00 2001
From: Ketan Patil <ketanp@nvidia.com>
Date: Wed, 3 May 2023 15:01:32 +0000
Subject: [PATCH] video: tegra: nvmap: Switch to configurable granule size

In compression carveout, granule size should be configurable via device
tree. Earlier code was written by considering granule size of 2MB,
update the code to use configurable granule size read from DT.

Bug 3956637

Change-Id: Ib1e966117b2bd9511cbcde37a6011c17f38f22e2
Signed-off-by: Ketan Patil <ketanp@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2899865
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Pritesh Raithatha <praithatha@nvidia.com>
Reviewed-by: Krishna Reddy <vdumpa@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/video/tegra/nvmap/nvmap_alloc.c | 18 ++++--
 drivers/video/tegra/nvmap/nvmap_heap.c  | 29 ++++------
 drivers/video/tegra/nvmap/nvmap_heap.h  | 12 ++++
 drivers/video/tegra/nvmap/nvmap_init.c  | 74 +++++++++++++++++--------
 drivers/video/tegra/nvmap/nvmap_ioctl.c | 13 +++--
 drivers/video/tegra/nvmap/nvmap_priv.h  | 15 +++--
 include/linux/nvmap.h                   |  1 +
 7 files changed, 105 insertions(+), 57 deletions(-)

diff --git a/drivers/video/tegra/nvmap/nvmap_alloc.c b/drivers/video/tegra/nvmap/nvmap_alloc.c
index 3a5a31e5..2e07eeba 100644
--- a/drivers/video/tegra/nvmap/nvmap_alloc.c
+++ b/drivers/video/tegra/nvmap/nvmap_alloc.c
@@ -719,22 +719,28 @@ static void alloc_handle(struct nvmap_client *client,
 					if (h->pgalloc.pages &&
 					    h->heap_type == NVMAP_HEAP_CARVEOUT_COMPRESSION) {
 						unsigned long page_count;
+						u32 granule_size = 0;
 						int i;
+						struct list_block *lb;
 
+						lb = container_of(b, struct list_block, block);
+						granule_size = lb->heap->granule_size;
 						page_count = h->size >> PAGE_SHIFT;
-						/* Iterate over 2MB chunks */
-						for (i = 0; i < page_count; i += PAGES_PER_2MB) {
+						/* Iterate over granules */
+						for (i = 0; i < page_count;
+							i += PAGES_PER_GRANULE(granule_size)) {
 							cpu_addr = memremap(page_to_phys(
 									    h->pgalloc.pages[i]),
-									    SIZE_2MB, MEMREMAP_WB);
+									    granule_size,
+									    MEMREMAP_WB);
 							if (cpu_addr != NULL) {
-								memset(cpu_addr, 0, SIZE_2MB);
+								memset(cpu_addr, 0, granule_size);
 #ifdef NVMAP_UPSTREAM_KERNEL
 								arch_invalidate_pmem(cpu_addr,
-										     SIZE_2MB);
+										     granule_size);
 #else
 								__dma_flush_area(cpu_addr,
-										 SIZE_2MB);
+										 granule_size);
 #endif
 								memunmap(cpu_addr);
 							}
diff --git a/drivers/video/tegra/nvmap/nvmap_heap.c b/drivers/video/tegra/nvmap/nvmap_heap.c
index e137543b..27f92f03 100644
--- a/drivers/video/tegra/nvmap/nvmap_heap.c
+++ b/drivers/video/tegra/nvmap/nvmap_heap.c
@@ -52,17 +52,6 @@
 
 static struct kmem_cache *heap_block_cache;
 
-struct list_block {
-	struct nvmap_heap_block block;
-	struct list_head all_list;
-	unsigned int mem_prot;
-	phys_addr_t orig_addr;
-	size_t size;
-	size_t align;
-	struct nvmap_heap *heap;
-	struct list_head free_list;
-};
-
 struct device *dma_dev_from_handle(unsigned long type)
 {
 	int i;
@@ -161,9 +150,9 @@ static phys_addr_t nvmap_alloc_mem(struct nvmap_heap *h, size_t len,
 		err = nvmap_dma_alloc_attrs(dev, len, &pa,
 				GFP_KERNEL, DMA_ATTR_ALLOC_EXACT_SIZE);
 		/*
-		 * In case of Compression carveout, try to allocate the entire chunk in physically
+		 * In case of Compression carveout, try to allocate the entire granule in physically
 		 * contiguous manner. If it returns error, then try to allocate the memory in
-		 * 2MB chunks.
+		 * granules of specified granule size.
 		 */
 		if (h->is_compression_co && IS_ERR(err)) {
 			err = nvmap_dma_alloc_attrs(dev, len, &pa,
@@ -493,7 +482,7 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent,
 				DMA_MEMORY_NOMAP);
 #else
 		err = nvmap_dma_declare_coherent_memory(h->dma_dev, 0, base, len,
-				DMA_MEMORY_NOMAP, co->is_compression_co);
+				DMA_MEMORY_NOMAP, co->is_compression_co, co->granule_size);
 #endif
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
 		if (!err) {
@@ -517,6 +506,7 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent,
 	h->can_alloc = !!co->can_alloc;
 	h->is_ivm = co->is_ivm;
 	h->is_compression_co = co->is_compression_co;
+	h->granule_size = co->granule_size;
 	h->len = len;
 	h->free_size = len;
 	h->peer = co->peer;
@@ -645,15 +635,18 @@ int nvmap_flush_heap_block(struct nvmap_client *client,
 	h = block->handle;
 	if (h->pgalloc.pages) {
 		unsigned long page_count, i;
+		u32 granule_size = 0;
+		struct list_block *b = container_of(block, struct list_block, block);
 
 		/*
-		 * For Compression carveout with physically discontiguous 2MB chunks,
-		 * iterate over 2MB chunks and do cache maint for it.
+		 * For Compression carveout with physically discontiguous granules,
+		 * iterate over granules and do cache maint for it.
 		 */
 		page_count = h->size >> PAGE_SHIFT;
-		for (i = 0; i < page_count; i += PAGES_PER_2MB) {
+		granule_size = b->heap->granule_size;
+		for (i = 0; i < page_count; i += PAGES_PER_GRANULE(granule_size)) {
 			phys = page_to_phys(h->pgalloc.pages[i]);
-			end = phys + SIZE_2MB;
+			end = phys + granule_size;
 			ret = nvmap_cache_maint_phys_range(NVMAP_CACHE_OP_WB_INV, phys, end,
 					true, prot != NVMAP_HANDLE_INNER_CACHEABLE);
 			if (ret)
diff --git a/drivers/video/tegra/nvmap/nvmap_heap.h b/drivers/video/tegra/nvmap/nvmap_heap.h
index b0f6a4cb..c9d574d7 100644
--- a/drivers/video/tegra/nvmap/nvmap_heap.h
+++ b/drivers/video/tegra/nvmap/nvmap_heap.h
@@ -32,6 +32,7 @@ struct nvmap_heap {
 	struct device *dma_dev;
 	bool is_ivm;
 	bool is_compression_co;
+	u32 granule_size;
 	bool can_alloc; /* Used only if is_ivm == true */
 	unsigned int peer; /* Used only if is_ivm == true */
 	unsigned int vm_id; /* Used only if is_ivm == true */
@@ -41,6 +42,17 @@ struct nvmap_heap {
 #endif /* NVMAP_CONFIG_DEBUG_MAPS */
 };
 
+struct list_block {
+	struct nvmap_heap_block block;
+	struct list_head all_list;
+	unsigned int mem_prot;
+	phys_addr_t orig_addr;
+	size_t size;
+	size_t align;
+	struct nvmap_heap *heap;
+	struct list_head free_list;
+};
+
 struct nvmap_heap *nvmap_heap_create(struct device *parent,
 				     const struct nvmap_platform_carveout *co,
 				     phys_addr_t base, size_t len, void *arg);
diff --git a/drivers/video/tegra/nvmap/nvmap_init.c b/drivers/video/tegra/nvmap/nvmap_init.c
index 5030c2b7..a38a239a 100644
--- a/drivers/video/tegra/nvmap/nvmap_init.c
+++ b/drivers/video/tegra/nvmap/nvmap_init.c
@@ -364,6 +364,7 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 	int *bitmap_nos = NULL;
 	const char *device_name;
 	bool is_compression = false;
+	u32 granule_size = 0;
 
 	device_name = dev_name(dev);
 	if (!device_name) {
@@ -371,12 +372,17 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 		return NULL;
 	}
 
-	if (!strncmp(device_name, "compression", 11))
+	if (!strncmp(device_name, "compression", 11)) {
+		struct nvmap_platform_carveout *co;
+
 		is_compression = true;
+		co = nvmap_get_carveout_pdata("compression");
+		granule_size = co->granule_size;
+	}
 
 	if (is_compression) {
-		/* Calculation for Compression carveout should consider 2MB chunks */
-		count = size >> PAGE_SHIFT_2MB;
+		/* Calculation for Compression carveout should consider granule size */
+		count = size >> PAGE_SHIFT_GRANULE(granule_size);
 	} else {
 		if (dma_get_attr(DMA_ATTR_ALLOC_EXACT_SIZE, attrs)) {
 			page_count = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -404,7 +410,7 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 		if (!is_compression)
 			pages = nvmap_kvzalloc_pages(count);
 		else
-			pages = nvmap_kvzalloc_pages(count * PAGES_PER_2MB);
+			pages = nvmap_kvzalloc_pages(count * PAGES_PER_GRANULE(granule_size));
 
 		if (!pages) {
 			kvfree(bitmap_nos);
@@ -418,7 +424,8 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 
 	if (!is_compression && unlikely(size > ((u64)mem->size << PAGE_SHIFT)))
 		goto err;
-	else if (is_compression && unlikely(size > ((u64)mem->size << PAGE_SHIFT_2MB)))
+	else if (is_compression &&
+		 unlikely(size > ((u64)mem->size << PAGE_SHIFT_GRANULE(granule_size))))
 		goto err;
 
 	if (((mem->flags & DMA_MEMORY_NOMAP) &&
@@ -447,10 +454,10 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 			if (!is_compression)
 				pages[i++] = pfn_to_page(mem->pfn_base + pageno);
 			else {
-				/* Handle 2MB chunks */
-				for (k = 0; k < (alloc_size * PAGES_PER_2MB); k++)
-					pages[i++] = pfn_to_page(mem->pfn_base +
-								 pageno * PAGES_PER_2MB + k);
+				/* Handle granules */
+				for (k = 0; k < (alloc_size * PAGES_PER_GRANULE(granule_size)); k++)
+					pages[i++] = pfn_to_page(mem->pfn_base + pageno *
+								 PAGES_PER_GRANULE(granule_size) + k);
 			}
 		}
 
@@ -464,7 +471,7 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 	if (!is_compression)
 		*dma_handle = mem->device_base + (first_pageno << PAGE_SHIFT);
 	else
-		*dma_handle = mem->device_base + (first_pageno << PAGE_SHIFT_2MB);
+		*dma_handle = mem->device_base + (first_pageno << PAGE_SHIFT_GRANULE(granule_size));
 
 	if (!(mem->flags & DMA_MEMORY_NOMAP)) {
 		addr = mem->virt_base + (first_pageno << PAGE_SHIFT);
@@ -517,6 +524,7 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 	struct dma_coherent_mem_replica *mem;
 	bool is_compression = false;
 	const char *device_name;
+	u32 granule_size = 0;
 
 	if (!dev || !dev->dma_mem)
 		return;
@@ -527,8 +535,13 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 		return;
 	}
 
-	if (!strncmp(device_name, "compression", 11))
+	if (!strncmp(device_name, "compression", 11)) {
+		struct nvmap_platform_carveout *co;
+
 		is_compression = true;
+		co = nvmap_get_carveout_pdata("compression");
+		granule_size = co->granule_size;
+	}
 
 	mem = (struct dma_coherent_mem_replica *)(dev->dma_mem);
 	if ((mem->flags & DMA_MEMORY_NOMAP) &&
@@ -546,8 +559,9 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 				bitmap_clear(mem->bitmap, pageno, 1);
 			}
 		} else {
-			for (i = 0; i < (size >> PAGE_SHIFT); i += PAGES_PER_2MB) {
-				pageno = (page_to_pfn(pages[i]) - mem->pfn_base) / PAGES_PER_2MB;
+			for (i = 0; i < (size >> PAGE_SHIFT); i += PAGES_PER_GRANULE(granule_size)) {
+				pageno = (page_to_pfn(pages[i]) - mem->pfn_base) /
+						PAGES_PER_GRANULE(granule_size);
 				if (WARN_ONCE(pageno > mem->size,
 				      "invalid pageno:%d\n", pageno))
 					continue;
@@ -564,7 +578,7 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 	else
 		mem_addr =  mem->virt_base;
 
-	page_shift_val = is_compression ? PAGE_SHIFT_2MB : PAGE_SHIFT;
+	page_shift_val = is_compression ? PAGE_SHIFT_GRANULE(granule_size) : PAGE_SHIFT;
 	if (mem && cpu_addr >= mem_addr &&
 	    cpu_addr - mem_addr < (u64)mem->size << page_shift_val) {
 		unsigned int page = (cpu_addr - mem_addr) >> page_shift_val;
@@ -573,7 +587,7 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 
 		if (DMA_ATTR_ALLOC_EXACT_SIZE & attrs) {
 			if (is_compression)
-				count = ALIGN_2MB(size) >> page_shift_val;
+				count = ALIGN_GRANULE_SIZE(size, granule_size) >> page_shift_val;
 			else
 				count = PAGE_ALIGN(size) >> page_shift_val;
 		}
@@ -664,7 +678,7 @@ static int nvmap_dma_assign_coherent_memory(struct device *dev,
 
 static int nvmap_dma_init_coherent_memory(
 	phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags,
-	struct dma_coherent_mem_replica **mem, bool is_compression)
+	struct dma_coherent_mem_replica **mem, bool is_compression, u32 granule_size)
 {
 	struct dma_coherent_mem_replica *dma_mem = NULL;
 	void *mem_base = NULL;
@@ -676,7 +690,7 @@ static int nvmap_dma_init_coherent_memory(
 		return -EINVAL;
 
 	if (is_compression)
-		pages = size >> PAGE_SHIFT_2MB;
+		pages = size >> PAGE_SHIFT_GRANULE(granule_size);
 	else
 		pages = size >> PAGE_SHIFT;
 
@@ -719,13 +733,14 @@ err_memunmap:
 }
 
 int nvmap_dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-			dma_addr_t device_addr, size_t size, int flags, bool is_compression)
+			dma_addr_t device_addr, size_t size, int flags, bool is_compression,
+			u32 granule_size)
 {
 	struct dma_coherent_mem_replica *mem;
 	int ret;
 
 	ret = nvmap_dma_init_coherent_memory(phys_addr, device_addr, size, flags, &mem,
-					     is_compression);
+					     is_compression, granule_size);
 	if (ret)
 		return ret;
 
@@ -757,7 +772,8 @@ static int __init nvmap_co_device_init(struct reserved_mem *rmem,
 #else
 		err = nvmap_dma_declare_coherent_memory(co->dma_dev, 0,
 				co->base, co->size,
-				DMA_MEMORY_NOMAP, co->is_compression_co);
+				DMA_MEMORY_NOMAP, co->is_compression_co,
+				co->granule_size);
 #endif
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
 		if (!err) {
@@ -871,7 +887,7 @@ finish:
 	return ret;
 }
 #else
-int __init nvmap_co_setup(struct reserved_mem *rmem)
+int __init nvmap_co_setup(struct reserved_mem *rmem, u32 granule_size)
 {
 	struct nvmap_platform_carveout *co;
 	ulong start = sched_clock();
@@ -887,16 +903,18 @@ int __init nvmap_co_setup(struct reserved_mem *rmem)
 	co->base = rmem->base;
 	co->size = rmem->size;
 	co->cma_dev = NULL;
-	if (!strncmp(co->name, "compression", 11))
+	if (!strncmp(co->name, "compression", 11)) {
 		co->is_compression_co = true;
+		co->granule_size = granule_size;
+	}
 
 	nvmap_init_time += sched_clock() - start;
 	return ret;
 }
 #endif /* !NVMAP_LOADABLE_MODULE */
 
-RESERVEDMEM_OF_DECLARE(nvmap_co, "nvidia,generic_carveout", nvmap_co_setup);
 #ifndef NVMAP_LOADABLE_MODULE
+RESERVEDMEM_OF_DECLARE(nvmap_co, "nvidia,generic_carveout", nvmap_co_setup);
 RESERVEDMEM_OF_DECLARE(nvmap_vpr_co, "nvidia,vpr-carveout", nvmap_co_setup);
 RESERVEDMEM_OF_DECLARE(nvmap_fsi_co, "nvidia,fsi-carveout", nvmap_co_setup);
 #endif /* !NVMAP_LOADABLE_MODULE */
@@ -909,7 +927,9 @@ int __init nvmap_init(struct platform_device *pdev)
 {
 	int err;
 	struct reserved_mem rmem;
+
 #ifdef NVMAP_LOADABLE_MODULE
+	u32 granule_size = 0;
 	struct reserved_mem *rmem2;
 	struct device_node *np = pdev->dev.of_node;
 	struct of_phandle_iterator it;
@@ -919,6 +939,12 @@ int __init nvmap_init(struct platform_device *pdev)
 		while (!of_phandle_iterator_next(&it) && it.node) {
 			if (of_device_is_available(it.node) &&
 			    !of_device_is_compatible(it.node, "nvidia,ivm_carveout")) {
+				/* Read granule size in case of compression carveout */
+				if (of_device_is_compatible(it.node, "nvidia,compression_carveout")
+				    && of_property_read_u32(it.node, "granule-size", &granule_size)) {
+					pr_err("granule-size property is missing\n");
+					return -EINVAL;
+				}
 				rmem2 = of_reserved_mem_lookup(it.node);
 				if (!rmem2) {
 					if (!of_property_read_string(it.node, "compatible", &compp))
@@ -926,7 +952,7 @@ int __init nvmap_init(struct platform_device *pdev)
 							compp);
 					return -EINVAL;
 				}
-				nvmap_co_setup(rmem2);
+				nvmap_co_setup(rmem2, granule_size);
 			}
 		}
 	}
diff --git a/drivers/video/tegra/nvmap/nvmap_ioctl.c b/drivers/video/tegra/nvmap/nvmap_ioctl.c
index d3cc422a..4519a46a 100644
--- a/drivers/video/tegra/nvmap/nvmap_ioctl.c
+++ b/drivers/video/tegra/nvmap/nvmap_ioctl.c
@@ -181,7 +181,7 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg)
 	struct nvmap_handle *handle;
 	struct dma_buf *dmabuf = NULL;
 	bool is_ro;
-	int err;
+	int err, i;
 	unsigned int page_sz = PAGE_SIZE;
 
 	if (copy_from_user(&op, arg, sizeof(op)))
@@ -203,11 +203,16 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg)
 		return -EINVAL;
 
 	/*
-	 * In case of Compression carveout, the handle size needs to be aligned to 2MB.
+	 * In case of Compression carveout, the handle size needs to be aligned to granule.
 	 */
 	if (op.heap_mask & NVMAP_HEAP_CARVEOUT_COMPRESSION) {
-		handle->size = ALIGN_2MB(handle->size);
-		page_sz = SIZE_2MB;
+		u32 granule_size = 0;
+
+		for (i = 0; i < nvmap_dev->nr_carveouts; i++)
+			if (nvmap_dev->heaps[i].heap_bit & NVMAP_HEAP_CARVEOUT_COMPRESSION)
+				granule_size = nvmap_dev->heaps[i].carveout->granule_size;
+		handle->size = ALIGN_GRANULE_SIZE(handle->size, granule_size);
+		page_sz = granule_size;
 	}
 
 	if (!is_nvmap_memory_available(handle->size, op.heap_mask)) {
diff --git a/drivers/video/tegra/nvmap/nvmap_priv.h b/drivers/video/tegra/nvmap/nvmap_priv.h
index 729ea9aa..27de4899 100644
--- a/drivers/video/tegra/nvmap/nvmap_priv.h
+++ b/drivers/video/tegra/nvmap/nvmap_priv.h
@@ -41,10 +41,9 @@
 
 #include <linux/fdtable.h>
 
-#define SIZE_2MB (2*1024*1024)
-#define ALIGN_2MB(size) ((size + SIZE_2MB - 1) & ~(SIZE_2MB - 1))
-#define PAGE_SHIFT_2MB 21
-#define PAGES_PER_2MB (SIZE_2MB / PAGE_SIZE)
+#define ALIGN_GRANULE_SIZE(size, GRANULE_SIZE) ((size + GRANULE_SIZE - 1) & ~(GRANULE_SIZE - 1))
+#define PAGE_SHIFT_GRANULE(GRANULE_SIZE) (order_base_2(GRANULE_SIZE))
+#define PAGES_PER_GRANULE(GRANULE_SIZE) (GRANULE_SIZE / PAGE_SIZE)
 
 #define DMA_ERROR_CODE	(~(dma_addr_t)0)
 
@@ -487,14 +486,20 @@ struct dma_coherent_mem_replica {
 };
 
 int nvmap_dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-			dma_addr_t device_addr, size_t size, int flags, bool is_compression);
+			dma_addr_t device_addr, size_t size, int flags, bool is_compression,
+			u32 granule_size);
 #endif
 int nvmap_probe(struct platform_device *pdev);
 int nvmap_remove(struct platform_device *pdev);
 int nvmap_init(struct platform_device *pdev);
 
 int nvmap_create_carveout(const struct nvmap_platform_carveout *co);
+
+#ifdef NVMAP_LOADABLE_MODULE
+int nvmap_co_setup(struct reserved_mem *rmem, u32 granule_size);
+#else
 int nvmap_co_setup(struct reserved_mem *rmem);
+#endif
 
 struct device *dma_dev_from_handle(unsigned long type);
 struct nvmap_heap_block *nvmap_carveout_alloc(struct nvmap_client *dev,
diff --git a/include/linux/nvmap.h b/include/linux/nvmap.h
index d5e0e9c6..ad9ac6a6 100644
--- a/include/linux/nvmap.h
+++ b/include/linux/nvmap.h
@@ -95,6 +95,7 @@ struct nvmap_platform_carveout {
 	bool init_done;	/* FIXME: remove once all caveouts use reserved-memory */
 	struct nvmap_pm_ops pm_ops;
 	bool is_compression_co; /* Compression carveout is treated differently */
+	u32 granule_size; /* Granule size for compression carveout */
 };
 
 struct nvmap_platform_data {