video: tegra: nvmap: Cleanup gpu carveout support

Cleanup gpu carveout support as we are moving to hugetlbfs. Bug 4510173 Change-Id: I68b88e81d2c4da7e35e241f2f29633ec39a4bcbe Signed-off-by: Ketan Patil <ketanp@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3174619 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 17:55:05 +03:00 · 2024-07-13 14:29:50 +00:00
parent 38bbe2a3b0
commit 9c92d38302
10 changed files with 102 additions and 347 deletions
--- a/drivers/video/tegra/nvmap/nvmap_alloc.c
+++ b/drivers/video/tegra/nvmap/nvmap_alloc.c
@@ -220,37 +220,13 @@ static void alloc_handle(struct nvmap_client *client,
 			/* Clear the allocated buffer */
 			if (nvmap_cpu_map_is_allowed(h)) {
 				void *cpu_addr;
-				if (h->pgalloc.pages &&
+
-				    h->heap_type == NVMAP_HEAP_CARVEOUT_GPU) {
+				cpu_addr = memremap(b->base, h->size,
 					unsigned long page_count;
 					u32 granule_size = 0;
 					int i;
 					struct list_block *lb;
 					lb = container_of(b, struct list_block, block);
 					granule_size = lb->heap->granule_size;
 					page_count = h->size >> PAGE_SHIFT;
 					/* Iterate over granules */
 					for (i = 0; i < page_count;
 						i += PAGES_PER_GRANULE(granule_size)) {
 						cpu_addr = memremap(page_to_phys(
 								    h->pgalloc.pages[i]),
 								    granule_size,
 								    MEMREMAP_WB);
 						if (cpu_addr != NULL) {
 							memset(cpu_addr, 0, granule_size);
 							arch_invalidate_pmem(cpu_addr,
 									     granule_size);
 							memunmap(cpu_addr);
 						}
 					}
 				} else {
 					cpu_addr = memremap(b->base, h->size,
 							MEMREMAP_WB);
-					if (cpu_addr != NULL) {
+				if (cpu_addr != NULL) {
-						memset(cpu_addr, 0, h->size);
+					memset(cpu_addr, 0, h->size);
-						arch_invalidate_pmem(cpu_addr, h->size);
+					arch_invalidate_pmem(cpu_addr, h->size);
-						memunmap(cpu_addr);
+					memunmap(cpu_addr);
 					}
 				}
 			}
 #endif /* NVMAP_CONFIG_CACHE_FLUSH_AT_ALLOC */
@@ -518,19 +494,14 @@ void _nvmap_handle_free(struct nvmap_handle *h)
 		if (h->vaddr) {
 			void *addr = h->vaddr;
-			if (h->pgalloc.pages) {
+			addr -= (h->carveout->base & ~PAGE_MASK);
-				vunmap(h->vaddr);
+			iounmap((void __iomem *)addr);
 			} else {
 				addr -= (h->carveout->base & ~PAGE_MASK);
 				iounmap((void __iomem *)addr);
 			}
 		}
 		nvmap_heap_free(h->carveout);
 		nvmap_kmaps_dec(h);
 		h->carveout = NULL;
 		h->vaddr = NULL;
 		h->pgalloc.pages = NULL;
 		goto out;
 	}
--- a/drivers/video/tegra/nvmap/nvmap_cache.c
+++ b/drivers/video/tegra/nvmap/nvmap_cache.c
@@ -175,7 +175,7 @@ static int do_cache_maint(struct cache_maint_op *cache_work)
 		goto out;
 	}
-	if (h->pgalloc.pages) {
+	if (h->heap_pgalloc) {
 		heap_page_cache_maint(h, pstart, pend, op, true,
 			(h->flags == NVMAP_HANDLE_INNER_CACHEABLE) ?
 			false : true, cache_work->clean_only_dirty);
--- a/drivers/video/tegra/nvmap/nvmap_core.c
+++ b/drivers/video/tegra/nvmap/nvmap_core.c
@@ -53,7 +53,6 @@ void *__nvmap_mmap(struct nvmap_handle *h)
 	if (h->from_va && h->is_ro)
 		goto put_handle;
 	if (!h->alloc)
 		goto put_handle;
@@ -66,7 +65,7 @@ void *__nvmap_mmap(struct nvmap_handle *h)
 	nvmap_kmaps_inc(h);
 	prot = nvmap_pgprot(h, PG_PROT_KERNEL);
-	if (h->pgalloc.pages) {
+	if (h->heap_pgalloc) {
 		pages = nvmap_pages(h->pgalloc.pages, h->size >> PAGE_SHIFT);
 		if (!pages)
 			goto out;
@@ -223,7 +222,7 @@ struct sg_table *__nvmap_sg_table(struct nvmap_client *client,
 		goto err;
 	}
-	if (!h->pgalloc.pages) {
+	if (!h->heap_pgalloc) {
 		phys_addr_t paddr = handle_phys(h);
 		struct page *page = phys_to_page(paddr);
--- a/drivers/video/tegra/nvmap/nvmap_fault.c
+++ b/drivers/video/tegra/nvmap/nvmap_fault.c
@@ -181,7 +181,7 @@ static vm_fault_t nvmap_vma_fault(struct vm_fault *vmf)
 	if (offs >= priv->handle->size)
 		return VM_FAULT_SIGBUS;
-	if (!priv->handle->pgalloc.pages) {
+	if (!priv->handle->heap_pgalloc) {
 		unsigned long pfn;
 		BUG_ON(priv->handle->carveout->base & ~PAGE_MASK);
@@ -195,54 +195,44 @@ static vm_fault_t nvmap_vma_fault(struct vm_fault *vmf)
 		page = pfn_to_page(pfn);
 	} else {
 		void *kaddr;
 		unsigned long pfn;
-		if (priv->handle->heap_type != NVMAP_HEAP_IOVMM) {
+		offs >>= PAGE_SHIFT;
-			offs >>= PAGE_SHIFT;
+		if (atomic_read(&priv->handle->pgalloc.reserved))
-			page = priv->handle->pgalloc.pages[offs];
+			return VM_FAULT_SIGBUS;
-			pfn = page_to_pfn(page);
+		page = nvmap_to_page(priv->handle->pgalloc.pages[offs]);
 			if (!pfn_is_map_memory(pfn)) {
 				vm_insert_pfn(vma,
 					(unsigned long)vmf_address, pfn);
 				return VM_FAULT_NOPAGE;
 			}
 		} else {
 			offs >>= PAGE_SHIFT;
 			if (atomic_read(&priv->handle->pgalloc.reserved))
 				return VM_FAULT_SIGBUS;
 			page = nvmap_to_page(priv->handle->pgalloc.pages[offs]);
-			if (PageAnon(page)) {
+		if (PageAnon(page)) {
-				if (vma->vm_flags & VM_SHARED)
+			if (vma->vm_flags & VM_SHARED)
-					return VM_FAULT_SIGSEGV;
+				return VM_FAULT_SIGSEGV;
-			}
+		}
-			if (!nvmap_handle_track_dirty(priv->handle))
+		if (!nvmap_handle_track_dirty(priv->handle))
-				goto finish;
+			goto finish;
-			mutex_lock(&priv->handle->lock);
+		mutex_lock(&priv->handle->lock);
-			if (nvmap_page_dirty(priv->handle->pgalloc.pages[offs])) {
+		if (nvmap_page_dirty(priv->handle->pgalloc.pages[offs])) {
-				mutex_unlock(&priv->handle->lock);
+			mutex_unlock(&priv->handle->lock);
-				goto finish;
+			goto finish;
-			}
+		}
-			/* inner cache maint */
+		/* inner cache maint */
-			kaddr  = kmap(page);
+		kaddr  = kmap(page);
-			BUG_ON(!kaddr);
+		BUG_ON(!kaddr);
-			inner_cache_maint(NVMAP_CACHE_OP_WB_INV, kaddr, PAGE_SIZE);
+		inner_cache_maint(NVMAP_CACHE_OP_WB_INV, kaddr, PAGE_SIZE);
-			kunmap(page);
+		kunmap(page);
-			if (priv->handle->flags & NVMAP_HANDLE_INNER_CACHEABLE)
+		if (priv->handle->flags & NVMAP_HANDLE_INNER_CACHEABLE)
-				goto make_dirty;
+			goto make_dirty;
 make_dirty:
-			nvmap_page_mkdirty(&priv->handle->pgalloc.pages[offs]);
+		nvmap_page_mkdirty(&priv->handle->pgalloc.pages[offs]);
-			atomic_inc(&priv->handle->pgalloc.ndirty);
+		atomic_inc(&priv->handle->pgalloc.ndirty);
-			mutex_unlock(&priv->handle->lock);
+		mutex_unlock(&priv->handle->lock);
 		}
 	}
 finish:
 	if (page)
 		get_page(page);
 	vmf->page = page;
 	return (page) ? 0 : VM_FAULT_SIGBUS;
 }
--- a/drivers/video/tegra/nvmap/nvmap_heap.c
+++ b/drivers/video/tegra/nvmap/nvmap_heap.c
@@ -79,11 +79,10 @@ void nvmap_heap_debugfs_init(struct dentry *heap_root, struct nvmap_heap *heap)
 }
 static phys_addr_t nvmap_alloc_mem(struct nvmap_heap *h, size_t len,
-				   phys_addr_t *start, struct nvmap_handle *handle)
+				   phys_addr_t *start)
 {
 	phys_addr_t pa = DMA_MAPPING_ERROR;
 	struct device *dev = h->dma_dev;
 	void *err = NULL;
 #ifdef CONFIG_TEGRA_VIRTUALIZATION
 	if (start && h->is_ivm) {
@@ -98,26 +97,8 @@ static phys_addr_t nvmap_alloc_mem(struct nvmap_heap *h, size_t len,
 	} else
 #endif
 	{
-		err = nvmap_dma_alloc_attrs(dev, len, &pa,
+		(void)nvmap_dma_alloc_attrs(dev, len, &pa,
 				GFP_KERNEL, DMA_ATTR_ALLOC_EXACT_SIZE);
 		/*
 		 * In case of Gpu carveout, try to allocate the entire granule in physically
 		 * contiguous manner. If it returns error, then try to allocate the memory in
 		 * granules of specified granule size.
 		 */
 		if (h->is_gpu_co && IS_ERR(err)) {
 			err = nvmap_dma_alloc_attrs(dev, len, &pa,
 				GFP_KERNEL, DMA_ATTR_ALLOC_EXACT_SIZE |
 				DMA_ATTR_ALLOC_SINGLE_PAGES);
 			if (!IS_ERR_OR_NULL(err)) {
 				/*
 				 * Need to keep track of pages, so that only those pages
 				 * can be freed while freeing the buffer.
 				 */
 				handle->pgalloc.pages = (struct page **)err;
 			}
 		}
 		if (!dma_mapping_error(dev, pa)) {
 			dev_dbg(dev, "Allocated addr (%pa) len(%zu)\n",
 					&pa, len);
@@ -128,7 +109,7 @@ static phys_addr_t nvmap_alloc_mem(struct nvmap_heap *h, size_t len,
 }
 static void nvmap_free_mem(struct nvmap_heap *h, phys_addr_t base,
-			   size_t len, struct nvmap_handle *handle)
+			   size_t len)
 {
 	struct device *dev = h->dma_dev;
@@ -140,18 +121,10 @@ static void nvmap_free_mem(struct nvmap_heap *h, phys_addr_t base,
 	} else
 #endif
 	{
-		if (h->is_gpu_co && handle->pgalloc.pages) {
+		nvmap_dma_free_attrs(dev, len,
 			/* In case of pages, we need to pass pointer to array of pages */
 			nvmap_dma_free_attrs(dev, len,
 				     (void *)handle->pgalloc.pages,
 				     (dma_addr_t)base,
 				     DMA_ATTR_ALLOC_EXACT_SIZE | DMA_ATTR_ALLOC_SINGLE_PAGES);
 		} else {
 			nvmap_dma_free_attrs(dev, len,
 				     (void *)(uintptr_t)base,
 				     (dma_addr_t)base,
 				     DMA_ATTR_ALLOC_EXACT_SIZE);
 		}
 	}
 }
@@ -163,8 +136,7 @@ static struct nvmap_heap_block *do_heap_alloc(struct nvmap_heap *heap,
 					      size_t len, size_t align,
 					      unsigned int mem_prot,
 					      phys_addr_t base_max,
-					      phys_addr_t *start,
+					      phys_addr_t *start)
 					      struct nvmap_handle *handle)
 {
 	struct list_block *heap_block = NULL;
 	dma_addr_t dev_base;
@@ -191,7 +163,7 @@ static struct nvmap_heap_block *do_heap_alloc(struct nvmap_heap *heap,
 		goto fail_heap_block_alloc;
 	}
-	dev_base = nvmap_alloc_mem(heap, len, start, handle);
+	dev_base = nvmap_alloc_mem(heap, len, start);
 	if (dma_mapping_error(dev, dev_base)) {
 		dev_err(dev, "failed to alloc mem of size (%zu)\n",
 			len);
@@ -222,7 +194,7 @@ static void do_heap_free(struct nvmap_heap_block *block)
 	list_del(&b->all_list);
-	nvmap_free_mem(heap, block->base, b->size, block->handle);
+	nvmap_free_mem(heap, block->base, b->size);
 	heap->free_size += b->size;
 	kmem_cache_free(heap_block_cache, b);
 }
@@ -276,7 +248,7 @@ struct nvmap_heap_block *nvmap_heap_alloc(struct nvmap_heap *h,
 	}
 	align = max_t(size_t, align, L1_CACHE_BYTES);
-	b = do_heap_alloc(h, len, align, prot, 0, start, handle);
+	b = do_heap_alloc(h, len, align, prot, 0, start);
 	if (b) {
 		b->handle = handle;
 		handle->carveout = b;
@@ -375,7 +347,7 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent,
 		/* declare Non-CMA heap */
 		err = nvmap_dma_declare_coherent_memory(h->dma_dev, 0, base, len,
-				DMA_MEMORY_NOMAP, co->is_gpu_co, co->granule_size);
+				DMA_MEMORY_NOMAP);
 		if (!err) {
 			pr_info("%s :dma coherent mem declare %pa,%zu\n",
 				co->name, &base, len);
@@ -394,9 +366,7 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent,
 	h->base = base;
 	h->can_alloc = !!co->can_alloc;
 	h->is_ivm = co->is_ivm;
 	h->is_gpu_co = co->is_gpu_co;
 	h->numa_node_id = co->numa_node_id;
 	h->granule_size = co->granule_size;
 	h->len = len;
 	h->free_size = len;
 	h->peer = co->peer;
@@ -518,33 +488,11 @@ static int nvmap_flush_heap_block(struct nvmap_client *client,
 	phys_addr_t phys = block->base;
 	phys_addr_t end = block->base + len;
 	int ret = 0;
 	struct nvmap_handle *h;
 	if (prot == NVMAP_HANDLE_UNCACHEABLE || prot == NVMAP_HANDLE_WRITE_COMBINE)
 		goto out;
-	h = block->handle;
+	ret = nvmap_cache_maint_phys_range(NVMAP_CACHE_OP_WB_INV, phys, end,
 	if (h->pgalloc.pages) {
 		unsigned long page_count, i;
 		u32 granule_size = 0;
 		struct list_block *b = container_of(block, struct list_block, block);
 		/*
 		 * For Gpu carveout with physically discontiguous granules,
 		 * iterate over granules and do cache maint for it.
 		 */
 		page_count = h->size >> PAGE_SHIFT;
 		granule_size = b->heap->granule_size;
 		for (i = 0; i < page_count; i += PAGES_PER_GRANULE(granule_size)) {
 			phys = page_to_phys(h->pgalloc.pages[i]);
 			end = phys + granule_size;
 			ret = nvmap_cache_maint_phys_range(NVMAP_CACHE_OP_WB_INV, phys, end,
 					true, prot != NVMAP_HANDLE_INNER_CACHEABLE);
 			if (ret)
 				goto out;
 		}
 	} else
 		ret = nvmap_cache_maint_phys_range(NVMAP_CACHE_OP_WB_INV, phys, end,
 				true, prot != NVMAP_HANDLE_INNER_CACHEABLE);
 out:
--- a/drivers/video/tegra/nvmap/nvmap_heap.h
+++ b/drivers/video/tegra/nvmap/nvmap_heap.h
@@ -40,8 +40,6 @@ struct nvmap_heap {
 	struct device *cma_dev;
 	struct device *dma_dev;
 	bool is_ivm;
 	bool is_gpu_co;
 	u32 granule_size;
 	int numa_node_id;
 	bool can_alloc; /* Used only if is_ivm == true */
 	unsigned int peer; /* Used only if is_ivm == true */
--- a/drivers/video/tegra/nvmap/nvmap_init.c
+++ b/drivers/video/tegra/nvmap/nvmap_init.c
@@ -96,21 +96,17 @@ static struct nvmap_platform_carveout nvmap_carveouts[] = {
 		.size		= 0,
 		.numa_node_id = 0,
 	},
 	/* Need uninitialized entries for IVM carveouts */
 	[5] = {
-		.name		= "gpu0",
+		.name		= NULL,
-		.usage_mask	= NVMAP_HEAP_CARVEOUT_GPU,
+		.usage_mask	= NVMAP_HEAP_CARVEOUT_IVM,
 		.base		= 0,
 		.size		= 0,
 		.numa_node_id = 0,
 	},
 	[6] = {
-		.name		= "gpu1",
+		.name		= NULL,
-		.usage_mask	= NVMAP_HEAP_CARVEOUT_GPU,
+		.usage_mask	= NVMAP_HEAP_CARVEOUT_IVM,
-		.base		= 0,
+		.numa_node_id = 0,
 		.size		= 0,
 		.numa_node_id = 1,
 	},
 	/* Need uninitialized entries for IVM carveouts */
 	[7] = {
 		.name		= NULL,
 		.usage_mask	= NVMAP_HEAP_CARVEOUT_IVM,
@@ -121,21 +117,11 @@ static struct nvmap_platform_carveout nvmap_carveouts[] = {
 		.usage_mask	= NVMAP_HEAP_CARVEOUT_IVM,
 		.numa_node_id = 0,
 	},
 	[9] = {
 		.name		= NULL,
 		.usage_mask	= NVMAP_HEAP_CARVEOUT_IVM,
 		.numa_node_id = 0,
 	},
 	[10] = {
 		.name		= NULL,
 		.usage_mask	= NVMAP_HEAP_CARVEOUT_IVM,
 		.numa_node_id = 0,
 	},
 };
 static struct nvmap_platform_data nvmap_data = {
 	.carveouts	= nvmap_carveouts,
-	.nr_carveouts	= 7,
+	.nr_carveouts	= 5,
 };
 static struct nvmap_platform_carveout *nvmap_get_carveout_pdata(const char *name)
@@ -313,49 +299,23 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 {
 	int order = get_order(size);
 	unsigned long flags;
-	unsigned int count = 0, i = 0, j = 0, k = 0;
+	unsigned int count = 0, i = 0, j = 0;
 	unsigned int alloc_size;
 	unsigned long align, pageno, page_count, first_pageno;
 	void *addr = NULL;
 	struct page **pages = NULL;
 	int do_memset = 0;
 	int *bitmap_nos = NULL;
 	const char *device_name;
 	bool is_gpu = false;
 	u32 granule_size = 0;
-	device_name = dev_name(dev);
+	if (dma_get_attr(DMA_ATTR_ALLOC_EXACT_SIZE, attrs)) {
-	if (!device_name) {
+		page_count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-		pr_err("Could not get device_name\n");
+		if (page_count > UINT_MAX) {
-		return NULL;
+			dev_err(dev, "Page count more than max value\n");
 	}
 	if (!strncmp(device_name, "gpu", 3)) {
 		struct nvmap_platform_carveout *co;
 		is_gpu = true;
 		co = nvmap_get_carveout_pdata("gpu");
 		if (!co) {
 			pr_err("Could not get carveout\n");
 			return NULL;
 		}
-		granule_size = co->granule_size;
+		count = (unsigned int)page_count;
-	}
+	} else
-
+		count = 1 << order;
 	if (is_gpu) {
 		/* Calculation for Gpu carveout should consider granule size */
 		count = size >> PAGE_SHIFT_GRANULE(granule_size);
 	} else {
 		if (dma_get_attr(DMA_ATTR_ALLOC_EXACT_SIZE, attrs)) {
 			page_count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 			if (page_count > UINT_MAX) {
 				dev_err(dev, "Page count more than max value\n");
 				return NULL;
 			}
 			count = (unsigned int)page_count;
 		} else
 			count = 1 << order;
 	}
 	if (!count)
 		return NULL;
@@ -368,11 +328,7 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 	if ((mem->flags & DMA_MEMORY_NOMAP) &&
 	    dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs)) {
 		alloc_size = 1;
-		/* pages contain the array of pages of kernel PAGE_SIZE */
+		pages = nvmap_kvzalloc_pages(count);
 		if (!is_gpu)
 			pages = nvmap_kvzalloc_pages(count);
 		else
 			pages = nvmap_kvzalloc_pages(count * PAGES_PER_GRANULE(granule_size));
 		if (!pages) {
 			kvfree(bitmap_nos);
@@ -384,15 +340,11 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 	spin_lock_irqsave(&mem->spinlock, flags);
-	if (!is_gpu && unlikely(size > ((u64)mem->size << PAGE_SHIFT)))
+	if (unlikely(size > ((u64)mem->size << PAGE_SHIFT)))
 		goto err;
 	else if (is_gpu &&
 		 unlikely(size > ((u64)mem->size << PAGE_SHIFT_GRANULE(granule_size))))
 		goto err;
-	if (((mem->flags & DMA_MEMORY_NOMAP) &&
+	if ((mem->flags & DMA_MEMORY_NOMAP) &&
-	    dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs)) ||
+	    dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs)) {
 	    is_gpu) {
 		align = 0;
 	} else  {
 		if (order > DMA_BUF_ALIGNMENT)
@@ -412,16 +364,8 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 			first_pageno = pageno;
 		count -= alloc_size;
-		if (pages) {
+		if (pages)
-			if (!is_gpu)
+			pages[i++] = pfn_to_page(mem->pfn_base + pageno);
 				pages[i++] = pfn_to_page(mem->pfn_base + pageno);
 			else {
 				/* Handle granules */
 				for (k = 0; k < (alloc_size * PAGES_PER_GRANULE(granule_size)); k++)
 					pages[i++] = pfn_to_page(mem->pfn_base + pageno *
 								 PAGES_PER_GRANULE(granule_size) + k);
 			}
 		}
 		bitmap_set(mem->bitmap, pageno, alloc_size);
 		bitmap_nos[j++] = pageno;
@@ -430,11 +374,7 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev,
 	/*
 	 * Memory was found in the coherent area.
 	 */
-	if (!is_gpu)
+	*dma_handle = mem->device_base + (first_pageno << PAGE_SHIFT);
 		*dma_handle = mem->device_base + (first_pageno << PAGE_SHIFT);
 	else
 		*dma_handle = mem->device_base + (first_pageno << PAGE_SHIFT_GRANULE(granule_size));
 	if (!(mem->flags & DMA_MEMORY_NOMAP)) {
 		addr = mem->virt_base + (first_pageno << PAGE_SHIFT);
 		do_memset = 1;
@@ -456,7 +396,7 @@ err:
 	spin_unlock_irqrestore(&mem->spinlock, flags);
 	kvfree(pages);
 	kvfree(bitmap_nos);
-	return ERR_PTR(-ENOMEM);
+	return NULL;
 }
 struct device *nvmap_get_vpr_dev(void)
@@ -516,33 +456,12 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 {
 	void *mem_addr;
 	unsigned long flags;
-	unsigned int pageno, page_shift_val;
+	unsigned int pageno;
 	struct dma_coherent_mem_replica *mem;
 	bool is_gpu = false;
 	const char *device_name;
 	size_t granule_size = 0;
 	if (!dev || !dev->dma_mem)
 		return;
 	device_name = dev_name(dev);
 	if (!device_name) {
 		pr_err("Could not get device_name\n");
 		return;
 	}
 	if (!strncmp(device_name, "gpu", 3)) {
 		struct nvmap_platform_carveout *co;
 		is_gpu = true;
 		co = nvmap_get_carveout_pdata("gpu");
 		if (!co) {
 			pr_err("Could not get carveout\n");
 			return;
 		}
 		granule_size = co->granule_size;
 	}
 	mem = (struct dma_coherent_mem_replica *)(dev->dma_mem);
 	if ((mem->flags & DMA_MEMORY_NOMAP) &&
 	    dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs)) {
@@ -550,23 +469,12 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 		int i;
 		spin_lock_irqsave(&mem->spinlock, flags);
-		if (!is_gpu) {
+		for (i = 0; i < (size >> PAGE_SHIFT); i++) {
-			for (i = 0; i < (size >> PAGE_SHIFT); i++) {
+			pageno = page_to_pfn(pages[i]) - mem->pfn_base;
-				pageno = page_to_pfn(pages[i]) - mem->pfn_base;
+			if (WARN_ONCE(pageno > mem->size,
 				if (WARN_ONCE(pageno > mem->size,
 				      "invalid pageno:%d\n", pageno))
-					continue;
+				continue;
-				bitmap_clear(mem->bitmap, pageno, 1);
+			bitmap_clear(mem->bitmap, pageno, 1);
 			}
 		} else {
 			for (i = 0; i < (size >> PAGE_SHIFT); i += PAGES_PER_GRANULE(granule_size)) {
 				pageno = (page_to_pfn(pages[i]) - mem->pfn_base) /
 						PAGES_PER_GRANULE(granule_size);
 				if (WARN_ONCE(pageno > mem->size,
 				      "invalid pageno:%d\n", pageno))
 					continue;
 				bitmap_clear(mem->bitmap, pageno, 1);
 			}
 		}
 		spin_unlock_irqrestore(&mem->spinlock, flags);
 		kvfree(pages);
@@ -578,19 +486,14 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 	else
 		mem_addr =  mem->virt_base;
 	page_shift_val = is_gpu ? PAGE_SHIFT_GRANULE(granule_size) : PAGE_SHIFT;
 	if (mem && cpu_addr >= mem_addr &&
-	    cpu_addr - mem_addr < (u64)mem->size << page_shift_val) {
+	    cpu_addr - mem_addr < (u64)mem->size << PAGE_SHIFT) {
-		unsigned int page = (cpu_addr - mem_addr) >> page_shift_val;
+		unsigned int page = (cpu_addr - mem_addr) >> PAGE_SHIFT;
 		unsigned long flags;
 		unsigned int count;
-		if (DMA_ATTR_ALLOC_EXACT_SIZE & attrs) {
+		if (DMA_ATTR_ALLOC_EXACT_SIZE & attrs)
-			if (is_gpu)
+			count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 				count = ALIGN_GRANULE_SIZE(size, granule_size) >> page_shift_val;
 			else
 				count = PAGE_ALIGN(size) >> page_shift_val;
 		}
 		else
 			count = 1 << get_order(size);
@@ -680,24 +583,17 @@ static int nvmap_dma_assign_coherent_memory(struct device *dev,
 static int nvmap_dma_init_coherent_memory(
 	phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags,
-	struct dma_coherent_mem_replica **mem, bool is_gpu, u32 granule_size)
+	struct dma_coherent_mem_replica **mem)
 {
 	struct dma_coherent_mem_replica *dma_mem = NULL;
 	void *mem_base = NULL;
-	int pages;
+	int pages = size >> PAGE_SHIFT;
-	int bitmap_size;
+	int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
 	int ret;
 	if (!size)
 		return -EINVAL;
 	if (is_gpu)
 		pages = size >> PAGE_SHIFT_GRANULE(granule_size);
 	else
 		pages = size >> PAGE_SHIFT;
 	bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
 	if (!(flags & DMA_MEMORY_NOMAP)) {
 		mem_base = memremap(phys_addr, size, MEMREMAP_WC);
 		if (!mem_base)
@@ -735,14 +631,12 @@ err_memunmap:
 }
 int nvmap_dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-			dma_addr_t device_addr, size_t size, int flags, bool is_gpu,
+			dma_addr_t device_addr, size_t size, int flags)
 			u32 granule_size)
 {
 	struct dma_coherent_mem_replica *mem;
 	int ret;
-	ret = nvmap_dma_init_coherent_memory(phys_addr, device_addr, size, flags, &mem,
+	ret = nvmap_dma_init_coherent_memory(phys_addr, device_addr, size, flags, &mem);
 					     is_gpu, granule_size);
 	if (ret)
 		return ret;
@@ -768,8 +662,7 @@ static int __init nvmap_co_device_init(struct reserved_mem *rmem,
 	if (!co->cma_dev) {
 		err = nvmap_dma_declare_coherent_memory(co->dma_dev, 0,
 				co->base, co->size,
-				DMA_MEMORY_NOMAP, co->is_gpu_co,
+				DMA_MEMORY_NOMAP);
 				co->granule_size);
 		if (!err) {
 			pr_info("%s :dma coherent mem declare %pa,%zu\n",
 				 co->name, &co->base, co->size);
@@ -796,7 +689,7 @@ static const struct reserved_mem_ops nvmap_co_ops = {
 	.device_release	= nvmap_co_device_release,
 };
-int __init nvmap_co_setup(struct reserved_mem *rmem, u32 granule_size)
+int __init nvmap_co_setup(struct reserved_mem *rmem)
 {
 	struct nvmap_platform_carveout *co;
 	ulong start = sched_clock();
@@ -812,10 +705,6 @@ int __init nvmap_co_setup(struct reserved_mem *rmem, u32 granule_size)
 	co->base = rmem->base;
 	co->size = rmem->size;
 	co->cma_dev = NULL;
 	if (!strncmp(co->name, "gpu", 3)) {
 		co->is_gpu_co = true;
 		co->granule_size = granule_size;
 	}
 	nvmap_init_time += sched_clock() - start;
 	return ret;
@@ -829,8 +718,6 @@ int __init nvmap_init(struct platform_device *pdev)
 {
 	int err;
 	struct reserved_mem rmem;
 	u32 granule_size = 0;
 	struct reserved_mem *rmem2;
 	struct device_node *np = pdev->dev.of_node;
 	struct of_phandle_iterator it;
@@ -840,14 +727,6 @@ int __init nvmap_init(struct platform_device *pdev)
 		while (!of_phandle_iterator_next(&it) && it.node) {
 			if (of_device_is_available(it.node) &&
 			    !of_device_is_compatible(it.node, "nvidia,ivm_carveout")) {
 				/* Read granule size in case of gpu carveout */
 				if ((of_device_is_compatible(it.node, "nvidia,gpu0_carveout") ||
 					of_device_is_compatible(it.node, "nvidia,gpu1_carveout")) &&
 					of_property_read_u32(it.node, "granule-size", &granule_size
 						)) {
 					pr_err("granule-size property is missing\n");
 					return -EINVAL;
 				}
 				rmem2 = of_reserved_mem_lookup(it.node);
 				if (!rmem2) {
 					if (!of_property_read_string(it.node, "compatible", &compp))
@@ -855,7 +734,7 @@ int __init nvmap_init(struct platform_device *pdev)
 							compp);
 					return -EINVAL;
 				}
-				nvmap_co_setup(rmem2, granule_size);
+				nvmap_co_setup(rmem2);
 			}
 		}
 	}
--- a/drivers/video/tegra/nvmap/nvmap_ioctl.c
+++ b/drivers/video/tegra/nvmap/nvmap_ioctl.c
@@ -198,10 +198,8 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg)
 	struct nvmap_handle *handle;
 	struct dma_buf *dmabuf = NULL;
 	bool is_ro = false;
-	int err, i;
+	int err;
 	unsigned int page_sz = PAGE_SIZE;
 	long dmabuf_ref = 0;
 	size_t old_size;
 	if (copy_from_user(&op, arg, sizeof(op)))
 		return -EFAULT;
@@ -221,22 +219,7 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg)
 	if (IS_ERR_OR_NULL(handle))
 		return -EINVAL;
 	old_size = handle->size;
 	/*
 	 * In case of Gpu carveout, the handle size needs to be aligned to granule.
 	 */
 	if (op.heap_mask & NVMAP_HEAP_CARVEOUT_GPU) {
 		size_t granule_size = 0;
 		for (i = 0; i < nvmap_dev->nr_carveouts; i++)
 			if (nvmap_dev->heaps[i].heap_bit & NVMAP_HEAP_CARVEOUT_GPU)
 				granule_size = nvmap_dev->heaps[i].carveout->granule_size;
 		handle->size = ALIGN_GRANULE_SIZE(handle->size, granule_size);
 		page_sz = granule_size;
 	}
 	if (!is_nvmap_memory_available(handle->size, op.heap_mask, op.numa_nid)) {
 		handle->size = old_size;
 		nvmap_handle_put(handle);
 		return -ENOMEM;
 	}
@@ -244,7 +227,7 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg)
 	handle->numa_id = op.numa_nid;
 	/* user-space handles are aligned to page boundaries, to prevent
 	 * data leakage. */
-	op.align = max_t(size_t, op.align, page_sz);
+	op.align = max_t(size_t, op.align, PAGE_SIZE);
 	err = nvmap_alloc_handle(client, handle, op.heap_mask, op.align,
 				  0, /* no kind */
@@ -266,8 +249,6 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg)
 				is_ro ? "RO" : "RW");
 	}
 	if (err)
 		handle->size = old_size;
 	nvmap_handle_put(handle);
 	return err;
 }
@@ -1008,15 +989,13 @@ int nvmap_ioctl_get_handle_parameters(struct file *filp, void __user *arg)
 	/*
 	 * Check handle is allocated or not while setting contig.
 	 * If heap type is IOVMM, check if it has flag set for contiguous memory
-	 * allocation request. Otherwise, if handle belongs to any carveout except gpu
+	 * allocation request. Otherwise, if handle belongs to any carveout
-	 * carveout then all allocations are contiguous, hence set contig flag to true.
+	 * then all allocations are contiguous, hence set contig flag to true.
 	 * In case of gpu carveout, if allocation is page based then set contig flag to
 	 * false otherwise true.
 	 */
 	if (handle->alloc &&
-	   ((handle->heap_type == NVMAP_HEAP_IOVMM &&
+		((handle->heap_type == NVMAP_HEAP_IOVMM &&
 		    handle->userflags & NVMAP_HANDLE_PHYS_CONTIG) ||
-	   (handle->heap_type != NVMAP_HEAP_IOVMM && !handle->pgalloc.pages))) {
+		handle->heap_type != NVMAP_HEAP_IOVMM)) {
 		op.contig = 1U;
 	} else {
 		op.contig = 0U;
@@ -1300,10 +1279,6 @@ static int nvmap_query_heap_params(void __user *arg, bool is_numa_aware)
 				heap = nvmap_dev->heaps[i].carveout;
 				op.total = nvmap_query_heap_size(heap);
 				op.free = heap->free_size;
 				if (nvmap_dev->heaps[i].carveout->is_gpu_co) {
 					op.granule_size =
 						nvmap_dev->heaps[i].carveout->granule_size;
 				}
 				break;
 			}
 		}
--- a/drivers/video/tegra/nvmap/nvmap_priv.h
+++ b/drivers/video/tegra/nvmap/nvmap_priv.h
@@ -43,10 +43,6 @@
 #include <linux/fdtable.h>
 #define ALIGN_GRANULE_SIZE(size, GRANULE_SIZE) ((size + GRANULE_SIZE - 1) & ~(GRANULE_SIZE - 1))
 #define PAGE_SHIFT_GRANULE(GRANULE_SIZE) (order_base_2(GRANULE_SIZE))
 #define PAGES_PER_GRANULE(GRANULE_SIZE) (GRANULE_SIZE / PAGE_SIZE)
 #define DMA_ERROR_CODE	(~(dma_addr_t)0)
 #define __DMA_ATTR(attrs) attrs
@@ -215,8 +211,10 @@ struct nvmap_handle {
 	struct nvmap_client *owner;
 	struct dma_buf *dmabuf;
 	struct dma_buf *dmabuf_ro;
-	struct nvmap_pgalloc pgalloc;
+	union {
-	struct nvmap_heap_block *carveout;
+		struct nvmap_pgalloc pgalloc;
 		struct nvmap_heap_block *carveout;
 	};
 	bool heap_pgalloc;	/* handle is page allocated (sysmem / iovmm) */
 	bool alloc;		/* handle has memory allocated */
 	bool from_va;		/* handle memory is from VA */
@@ -451,15 +449,14 @@ struct dma_coherent_mem_replica {
 };
 int nvmap_dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-			dma_addr_t device_addr, size_t size, int flags, bool is_gpu,
+			dma_addr_t device_addr, size_t size, int flags);
 			u32 granule_size);
 int nvmap_probe(struct platform_device *pdev);
 int nvmap_remove(struct platform_device *pdev);
 int nvmap_init(struct platform_device *pdev);
 int nvmap_create_carveout(const struct nvmap_platform_carveout *co);
-int nvmap_co_setup(struct reserved_mem *rmem, u32 granule_size);
+int nvmap_co_setup(struct reserved_mem *rmem);
 struct nvmap_heap_block *nvmap_carveout_alloc(struct nvmap_client *dev,
 					      struct nvmap_handle *handle,
--- a/include/linux/nvmap.h
+++ b/include/linux/nvmap.h
@@ -93,8 +93,6 @@ struct nvmap_platform_carveout {
 	bool no_cpu_access; /* carveout can't be accessed from cpu at all */
 	bool init_done;	/* FIXME: remove once all caveouts use reserved-memory */
 	struct nvmap_pm_ops pm_ops;
 	bool is_gpu_co; /* Gpu carveout is treated differently */
 	u32 granule_size; /* Granule size for gpu carveout */
 	int numa_node_id; /* NUMA node id from which the carveout is allocated from */
 };