diff --git a/drivers/video/tegra/nvmap/nvmap_alloc.c b/drivers/video/tegra/nvmap/nvmap_alloc.c
index 3a5a31e5..2e07eeba 100644
--- a/drivers/video/tegra/nvmap/nvmap_alloc.c
+++ b/drivers/video/tegra/nvmap/nvmap_alloc.c
@@ -719,22 +719,28 @@ static void alloc_handle(struct nvmap_client *client,
 					if (h->pgalloc.pages &&
 					    h->heap_type == NVMAP_HEAP_CARVEOUT_COMPRESSION) {
 						unsigned long page_count;
+						u32 granule_size = 0;
 						int i;
+						struct list_block *lb;
 
+						lb = container_of(b, struct list_block, block);
+						granule_size = lb->heap->granule_size;
 						page_count = h->size >> PAGE_SHIFT;
-						/* Iterate over 2MB chunks */
-						for (i = 0; i < page_count; i += PAGES_PER_2MB) {
+						/* Iterate over granules */
+						for (i = 0; i < page_count;
+							i += PAGES_PER_GRANULE(granule_size)) {
 							cpu_addr = memremap(page_to_phys(
 									    h->pgalloc.pages[i]),
-									    SIZE_2MB, MEMREMAP_WB);
+									    granule_size,
+									    MEMREMAP_WB);
 							if (cpu_addr != NULL) {
-								memset(cpu_addr, 0, SIZE_2MB);
+								memset(cpu_addr, 0, granule_size);
 #ifdef NVMAP_UPSTREAM_KERNEL
 								arch_invalidate_pmem(cpu_addr,
-										     SIZE_2MB);
+										     granule_size);
 #else
 								__dma_flush_area(cpu_addr,
-										 SIZE_2MB);
+										 granule_size);
 #endif
 								memunmap(cpu_addr);
 							}
diff --git a/drivers/video/tegra/nvmap/nvmap_heap.c b/drivers/video/tegra/nvmap/nvmap_heap.c
index e137543b..27f92f03 100644
--- a/drivers/video/tegra/nvmap/nvmap_heap.c
+++ b/drivers/video/tegra/nvmap/nvmap_heap.c
@@ -52,17 +52,6 @@
 
 static struct kmem_cache *heap_block_cache;
 
-struct list_block {
-	struct nvmap_heap_block block;
-	struct list_head all_list;
-	unsigned int mem_prot;
-	phys_addr_t orig_addr;
-	size_t size;
-	size_t align;
-	struct nvmap_heap *heap;
-	struct list_head free_list;
-};
-
 struct device *dma_dev_from_handle(unsigned long type)
 {
 	int i;
@@ -161,9 +150,9 @@ static phys_addr_t nvmap_alloc_mem(struct nvmap_heap *h, size_t len,
 		err = nvmap_dma_alloc_attrs(dev, len, &pa,
 				GFP_KERNEL, DMA_ATTR_ALLOC_EXACT_SIZE);
 		/*
-		 * In case of Compression carveout, try to allocate the entire chunk in physically
+		 * In case of Compression carveout, try to allocate the entire granule in physically
 		 * contiguous manner. If it returns error, then try to allocate the memory in
-		 * 2MB chunks.
+		 * granules of specified granule size.
 		 */
 		if (h->is_compression_co && IS_ERR(err)) {
 			err = nvmap_dma_alloc_attrs(dev, len, &pa,
@@ -493,7 +482,7 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent,
 				DMA_MEMORY_NOMAP);
 #else
 		err = nvmap_dma_declare_coherent_memory(h->dma_dev, 0, base, len,
-				DMA_MEMORY_NOMAP, co->is_compression_co);
+				DMA_MEMORY_NOMAP, co->is_compression_co, co->granule_size);
 #endif
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
 		if (!err) {
@@ -517,6 +506,7 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent,
 	h->can_alloc = !!co->can_alloc;
 	h->is_ivm = co->is_ivm;
 	h->is_compression_co = co->is_compression_co;
+	h->granule_size = co->granule_size;
 	h->len = len;
 	h->free_size = len;
 	h->peer = co->peer;
@@ -645,15 +635,18 @@ int nvmap_flush_heap_block(struct nvmap_client *client,
 	h = block->handle;
 	if (h->pgalloc.pages) {
 		unsigned long page_count, i;
+		u32 granule_size = 0;
+		struct list_block *b = container_of(block, struct list_block, block);
 
 		/*
-		 * For Compression carveout with physically discontiguous 2MB chunks,
-		 * iterate over 2MB chunks and do cache maint for it.
+		 * For Compression carveout with physically discontiguous granules,
+		 * iterate over granules and do cache maint for it.
 		 */
 		page_count = h->size >> PAGE_SHIFT;
-		for (i = 0; i < page_count; i += PAGES_PER_2MB) {
+		granule_size = b->heap->granule_size;
+		for (i = 0; i < page_count; i += PAGES_PER_GRANULE(granule_size)) {
 			phys = page_to_phys(h->pgalloc.pages[i]);
-			end = phys + SIZE_2MB;
+			end = phys + granule_size;
 			ret = nvmap_cache_maint_phys_range(NVMAP_CACHE_OP_WB_INV, phys, end,
 					true, prot != NVMAP_HANDLE_INNER_CACHEABLE);
 			if (ret)
diff --git a/drivers/video/tegra/nvmap/nvmap_heap.h b/drivers/video/tegra/nvmap/nvmap_heap.h
index b0f6a4cb..c9d574d7 100644
--- a/drivers/video/tegra/nvmap/nvmap_heap.h
+++ b/drivers/video/tegra/nvmap/nvmap_heap.h
@@ -32,6 +32,7 @@ struct nvmap_heap {
 	struct device *dma_dev;
 	bool is_ivm;
 	bool is_compression_co;
+	u32 granule_size;
 	bool can_alloc; /* Used only if is_ivm == true */
 	unsigned int peer; /* Used only if is_ivm == true */
 	unsigned int vm_id; /* Used only if is_ivm == true */
@@ -41,6 +42,17 @@ struct nvmap_heap {
 #endif /* NVMAP_CONFIG_DEBUG_MAPS */
 };
 
+struct list_block {
+	struct nvmap_heap_block block;
+	struct list_head all_list;
+	unsigned int mem_prot;
+	phys_addr_t orig_addr;
+	size_t size;
+	size_t align;
+	struct nvmap_heap *heap;
+	struct list_head free_list;
+};
+
 struct nvmap_heap *nvmap_heap_create(struct device *parent,
 				     const struct nvmap_platform_carveout *co,
 				     phys_addr_t base, size_t len, void *arg);
diff --git a/drivers/video/tegra/nvmap/nvmap_init.c b/drivers/video/tegra/nvmap/nvmap_init.c
index 5030c2b7..a38a239a 100644
--- a/drivers/video/tegra/nvmap/nvmap_init.c
+++ b/drivers/video/tegra/nvmap/nvmap_init.c
@@ -364,6 +364,7 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 	int *bitmap_nos = NULL;
 	const char *device_name;
 	bool is_compression = false;
+	u32 granule_size = 0;
 
 	device_name = dev_name(dev);
 	if (!device_name) {
@@ -371,12 +372,17 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 		return NULL;
 	}
 
-	if (!strncmp(device_name, "compression", 11))
+	if (!strncmp(device_name, "compression", 11)) {
+		struct nvmap_platform_carveout *co;
+
 		is_compression = true;
+		co = nvmap_get_carveout_pdata("compression");
+		granule_size = co->granule_size;
+	}
 
 	if (is_compression) {
-		/* Calculation for Compression carveout should consider 2MB chunks */
-		count = size >> PAGE_SHIFT_2MB;
+		/* Calculation for Compression carveout should consider granule size */
+		count = size >> PAGE_SHIFT_GRANULE(granule_size);
 	} else {
 		if (dma_get_attr(DMA_ATTR_ALLOC_EXACT_SIZE, attrs)) {
 			page_count = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -404,7 +410,7 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 		if (!is_compression)
 			pages = nvmap_kvzalloc_pages(count);
 		else
-			pages = nvmap_kvzalloc_pages(count * PAGES_PER_2MB);
+			pages = nvmap_kvzalloc_pages(count * PAGES_PER_GRANULE(granule_size));
 
 		if (!pages) {
 			kvfree(bitmap_nos);
@@ -418,7 +424,8 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 
 	if (!is_compression && unlikely(size > ((u64)mem->size << PAGE_SHIFT)))
 		goto err;
-	else if (is_compression && unlikely(size > ((u64)mem->size << PAGE_SHIFT_2MB)))
+	else if (is_compression &&
+		 unlikely(size > ((u64)mem->size << PAGE_SHIFT_GRANULE(granule_size))))
 		goto err;
 
 	if (((mem->flags & DMA_MEMORY_NOMAP) &&
@@ -447,10 +454,10 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 			if (!is_compression)
 				pages[i++] = pfn_to_page(mem->pfn_base + pageno);
 			else {
-				/* Handle 2MB chunks */
-				for (k = 0; k < (alloc_size * PAGES_PER_2MB); k++)
-					pages[i++] = pfn_to_page(mem->pfn_base +
-								 pageno * PAGES_PER_2MB + k);
+				/* Handle granules */
+				for (k = 0; k < (alloc_size * PAGES_PER_GRANULE(granule_size)); k++)
+					pages[i++] = pfn_to_page(mem->pfn_base + pageno *
+								 PAGES_PER_GRANULE(granule_size) + k);
 			}
 		}
 
@@ -464,7 +471,7 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 	if (!is_compression)
 		*dma_handle = mem->device_base + (first_pageno << PAGE_SHIFT);
 	else
-		*dma_handle = mem->device_base + (first_pageno << PAGE_SHIFT_2MB);
+		*dma_handle = mem->device_base + (first_pageno << PAGE_SHIFT_GRANULE(granule_size));
 
 	if (!(mem->flags & DMA_MEMORY_NOMAP)) {
 		addr = mem->virt_base + (first_pageno << PAGE_SHIFT);
@@ -517,6 +524,7 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 	struct dma_coherent_mem_replica *mem;
 	bool is_compression = false;
 	const char *device_name;
+	u32 granule_size = 0;
 
 	if (!dev || !dev->dma_mem)
 		return;
@@ -527,8 +535,13 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 		return;
 	}
 
-	if (!strncmp(device_name, "compression", 11))
+	if (!strncmp(device_name, "compression", 11)) {
+		struct nvmap_platform_carveout *co;
+
 		is_compression = true;
+		co = nvmap_get_carveout_pdata("compression");
+		granule_size = co->granule_size;
+	}
 
 	mem = (struct dma_coherent_mem_replica *)(dev->dma_mem);
 	if ((mem->flags & DMA_MEMORY_NOMAP) &&
@@ -546,8 +559,9 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 				bitmap_clear(mem->bitmap, pageno, 1);
 			}
 		} else {
-			for (i = 0; i < (size >> PAGE_SHIFT); i += PAGES_PER_2MB) {
-				pageno = (page_to_pfn(pages[i]) - mem->pfn_base) / PAGES_PER_2MB;
+			for (i = 0; i < (size >> PAGE_SHIFT); i += PAGES_PER_GRANULE(granule_size)) {
+				pageno = (page_to_pfn(pages[i]) - mem->pfn_base) /
+						PAGES_PER_GRANULE(granule_size);
 				if (WARN_ONCE(pageno > mem->size,
 				      "invalid pageno:%d\n", pageno))
 					continue;
@@ -564,7 +578,7 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 	else
 		mem_addr =  mem->virt_base;
 
-	page_shift_val = is_compression ? PAGE_SHIFT_2MB : PAGE_SHIFT;
+	page_shift_val = is_compression ? PAGE_SHIFT_GRANULE(granule_size) : PAGE_SHIFT;
 	if (mem && cpu_addr >= mem_addr &&
 	    cpu_addr - mem_addr < (u64)mem->size << page_shift_val) {
 		unsigned int page = (cpu_addr - mem_addr) >> page_shift_val;
@@ -573,7 +587,7 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 
 		if (DMA_ATTR_ALLOC_EXACT_SIZE & attrs) {
 			if (is_compression)
-				count = ALIGN_2MB(size) >> page_shift_val;
+				count = ALIGN_GRANULE_SIZE(size, granule_size) >> page_shift_val;
 			else
 				count = PAGE_ALIGN(size) >> page_shift_val;
 		}
@@ -664,7 +678,7 @@ static int nvmap_dma_assign_coherent_memory(struct device *dev,
 
 static int nvmap_dma_init_coherent_memory(
 	phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags,
-	struct dma_coherent_mem_replica **mem, bool is_compression)
+	struct dma_coherent_mem_replica **mem, bool is_compression, u32 granule_size)
 {
 	struct dma_coherent_mem_replica *dma_mem = NULL;
 	void *mem_base = NULL;
@@ -676,7 +690,7 @@ static int nvmap_dma_init_coherent_memory(
 		return -EINVAL;
 
 	if (is_compression)
-		pages = size >> PAGE_SHIFT_2MB;
+		pages = size >> PAGE_SHIFT_GRANULE(granule_size);
 	else
 		pages = size >> PAGE_SHIFT;
 
@@ -719,13 +733,14 @@ err_memunmap:
 }
 
 int nvmap_dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-			dma_addr_t device_addr, size_t size, int flags, bool is_compression)
+			dma_addr_t device_addr, size_t size, int flags, bool is_compression,
+			u32 granule_size)
 {
 	struct dma_coherent_mem_replica *mem;
 	int ret;
 
 	ret = nvmap_dma_init_coherent_memory(phys_addr, device_addr, size, flags, &mem,
-					     is_compression);
+					     is_compression, granule_size);
 	if (ret)
 		return ret;
 
@@ -757,7 +772,8 @@ static int __init nvmap_co_device_init(struct reserved_mem *rmem,
 #else
 		err = nvmap_dma_declare_coherent_memory(co->dma_dev, 0,
 				co->base, co->size,
-				DMA_MEMORY_NOMAP, co->is_compression_co);
+				DMA_MEMORY_NOMAP, co->is_compression_co,
+				co->granule_size);
 #endif
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
 		if (!err) {
@@ -871,7 +887,7 @@ finish:
 	return ret;
 }
 #else
-int __init nvmap_co_setup(struct reserved_mem *rmem)
+int __init nvmap_co_setup(struct reserved_mem *rmem, u32 granule_size)
 {
 	struct nvmap_platform_carveout *co;
 	ulong start = sched_clock();
@@ -887,16 +903,18 @@ int __init nvmap_co_setup(struct reserved_mem *rmem)
 	co->base = rmem->base;
 	co->size = rmem->size;
 	co->cma_dev = NULL;
-	if (!strncmp(co->name, "compression", 11))
+	if (!strncmp(co->name, "compression", 11)) {
 		co->is_compression_co = true;
+		co->granule_size = granule_size;
+	}
 
 	nvmap_init_time += sched_clock() - start;
 	return ret;
 }
 #endif /* !NVMAP_LOADABLE_MODULE */
 
-RESERVEDMEM_OF_DECLARE(nvmap_co, "nvidia,generic_carveout", nvmap_co_setup);
 #ifndef NVMAP_LOADABLE_MODULE
+RESERVEDMEM_OF_DECLARE(nvmap_co, "nvidia,generic_carveout", nvmap_co_setup);
 RESERVEDMEM_OF_DECLARE(nvmap_vpr_co, "nvidia,vpr-carveout", nvmap_co_setup);
 RESERVEDMEM_OF_DECLARE(nvmap_fsi_co, "nvidia,fsi-carveout", nvmap_co_setup);
 #endif /* !NVMAP_LOADABLE_MODULE */
@@ -909,7 +927,9 @@ int __init nvmap_init(struct platform_device *pdev)
 {
 	int err;
 	struct reserved_mem rmem;
+
 #ifdef NVMAP_LOADABLE_MODULE
+	u32 granule_size = 0;
 	struct reserved_mem *rmem2;
 	struct device_node *np = pdev->dev.of_node;
 	struct of_phandle_iterator it;
@@ -919,6 +939,12 @@ int __init nvmap_init(struct platform_device *pdev)
 		while (!of_phandle_iterator_next(&it) && it.node) {
 			if (of_device_is_available(it.node) &&
 			    !of_device_is_compatible(it.node, "nvidia,ivm_carveout")) {
+				/* Read granule size in case of compression carveout */
+				if (of_device_is_compatible(it.node, "nvidia,compression_carveout")
+				    && of_property_read_u32(it.node, "granule-size", &granule_size)) {
+					pr_err("granule-size property is missing\n");
+					return -EINVAL;
+				}
 				rmem2 = of_reserved_mem_lookup(it.node);
 				if (!rmem2) {
 					if (!of_property_read_string(it.node, "compatible", &compp))
@@ -926,7 +952,7 @@ int __init nvmap_init(struct platform_device *pdev)
 							compp);
 					return -EINVAL;
 				}
-				nvmap_co_setup(rmem2);
+				nvmap_co_setup(rmem2, granule_size);
 			}
 		}
 	}
diff --git a/drivers/video/tegra/nvmap/nvmap_ioctl.c b/drivers/video/tegra/nvmap/nvmap_ioctl.c
index d3cc422a..4519a46a 100644
--- a/drivers/video/tegra/nvmap/nvmap_ioctl.c
+++ b/drivers/video/tegra/nvmap/nvmap_ioctl.c
@@ -181,7 +181,7 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg)
 	struct nvmap_handle *handle;
 	struct dma_buf *dmabuf = NULL;
 	bool is_ro;
-	int err;
+	int err, i;
 	unsigned int page_sz = PAGE_SIZE;
 
 	if (copy_from_user(&op, arg, sizeof(op)))
@@ -203,11 +203,16 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg)
 		return -EINVAL;
 
 	/*
-	 * In case of Compression carveout, the handle size needs to be aligned to 2MB.
+	 * In case of Compression carveout, the handle size needs to be aligned to granule.
 	 */
 	if (op.heap_mask & NVMAP_HEAP_CARVEOUT_COMPRESSION) {
-		handle->size = ALIGN_2MB(handle->size);
-		page_sz = SIZE_2MB;
+		u32 granule_size = 0;
+
+		for (i = 0; i < nvmap_dev->nr_carveouts; i++)
+			if (nvmap_dev->heaps[i].heap_bit & NVMAP_HEAP_CARVEOUT_COMPRESSION)
+				granule_size = nvmap_dev->heaps[i].carveout->granule_size;
+		handle->size = ALIGN_GRANULE_SIZE(handle->size, granule_size);
+		page_sz = granule_size;
 	}
 
 	if (!is_nvmap_memory_available(handle->size, op.heap_mask)) {
diff --git a/drivers/video/tegra/nvmap/nvmap_priv.h b/drivers/video/tegra/nvmap/nvmap_priv.h
index 729ea9aa..27de4899 100644
--- a/drivers/video/tegra/nvmap/nvmap_priv.h
+++ b/drivers/video/tegra/nvmap/nvmap_priv.h
@@ -41,10 +41,9 @@
 
 #include <linux/fdtable.h>
 
-#define SIZE_2MB (2*1024*1024)
-#define ALIGN_2MB(size) ((size + SIZE_2MB - 1) & ~(SIZE_2MB - 1))
-#define PAGE_SHIFT_2MB 21
-#define PAGES_PER_2MB (SIZE_2MB / PAGE_SIZE)
+#define ALIGN_GRANULE_SIZE(size, GRANULE_SIZE) ((size + GRANULE_SIZE - 1) & ~(GRANULE_SIZE - 1))
+#define PAGE_SHIFT_GRANULE(GRANULE_SIZE) (order_base_2(GRANULE_SIZE))
+#define PAGES_PER_GRANULE(GRANULE_SIZE) (GRANULE_SIZE / PAGE_SIZE)
 
 #define DMA_ERROR_CODE	(~(dma_addr_t)0)
 
@@ -487,14 +486,20 @@ struct dma_coherent_mem_replica {
 };
 
 int nvmap_dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-			dma_addr_t device_addr, size_t size, int flags, bool is_compression);
+			dma_addr_t device_addr, size_t size, int flags, bool is_compression,
+			u32 granule_size);
 #endif
 int nvmap_probe(struct platform_device *pdev);
 int nvmap_remove(struct platform_device *pdev);
 int nvmap_init(struct platform_device *pdev);
 
 int nvmap_create_carveout(const struct nvmap_platform_carveout *co);
+
+#ifdef NVMAP_LOADABLE_MODULE
+int nvmap_co_setup(struct reserved_mem *rmem, u32 granule_size);
+#else
 int nvmap_co_setup(struct reserved_mem *rmem);
+#endif
 
 struct device *dma_dev_from_handle(unsigned long type);
 struct nvmap_heap_block *nvmap_carveout_alloc(struct nvmap_client *dev,
diff --git a/include/linux/nvmap.h b/include/linux/nvmap.h
index d5e0e9c6..ad9ac6a6 100644
--- a/include/linux/nvmap.h
+++ b/include/linux/nvmap.h
@@ -95,6 +95,7 @@ struct nvmap_platform_carveout {
 	bool init_done;	/* FIXME: remove once all caveouts use reserved-memory */
 	struct nvmap_pm_ops pm_ops;
 	bool is_compression_co; /* Compression carveout is treated differently */
+	u32 granule_size; /* Granule size for compression carveout */
 };
 
 struct nvmap_platform_data {