From 3d11a320cc0a3decee127231ec68503bbcfde0ee Mon Sep 17 00:00:00 2001
From: Ketan Patil <ketanp@nvidia.com>
Date: Wed, 25 Oct 2023 05:00:41 +0000
Subject: [PATCH] video: tegra: nvmap: Add support for numa aware heaps

Add support for numa aware nvmap heaps:
- Add carveout node for gpu1 which is gpu carveout on numa node 1.
- Add numa_node_id property in nvmap_heap and nvmap platform carveout
structures to hold numa id info i.e. numa node on which this heap is
created.
- gpu0 and gpu1 would have same heap bit but different numa node ids.
- Update buffer allocation function: If user specify the allocate from
a particular numa node instance of the heap, then allocate from that
particular instance. By default input to numa node id is NUMA_NO_NODE,
so in this case, iterate over heaps on all numa nodes to satisfy the
allocation request e.g. if user specify to allocate from gpu carveout
without specifying any particular numa node, then iterate over all gpu
carveouts on all numa nodes, whichever has sufficient free memory,
allocate from thatheap instance.
- Update debugfs functions to pass heap type and numa id, so that
debugfs info is fetched from correct heap instance.

Bug 4231517

Change-Id: I77ba4b626546003ea3c40d09351d832100596d9a
Signed-off-by: Ketan Patil <ketanp@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3003219
Reviewed-by: Pritesh Raithatha <praithatha@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/video/tegra/nvmap/nvmap_carveout.c |  45 ++++++--
 drivers/video/tegra/nvmap/nvmap_dev.c      | 128 +++++++++++++++------
 drivers/video/tegra/nvmap/nvmap_handle.c   |   2 +-
 drivers/video/tegra/nvmap/nvmap_heap.c     |   4 +
 drivers/video/tegra/nvmap/nvmap_heap.h     |  11 ++
 drivers/video/tegra/nvmap/nvmap_init.c     |  34 ++++--
 drivers/video/tegra/nvmap/nvmap_ioctl.c    |   2 +-
 drivers/video/tegra/nvmap/nvmap_priv.h     |   2 +-
 include/linux/nvmap.h                      |   1 +
 9 files changed, 169 insertions(+), 60 deletions(-)

diff --git a/drivers/video/tegra/nvmap/nvmap_carveout.c b/drivers/video/tegra/nvmap/nvmap_carveout.c
index f407bd47..52acffc9 100644
--- a/drivers/video/tegra/nvmap/nvmap_carveout.c
+++ b/drivers/video/tegra/nvmap/nvmap_carveout.c
@@ -56,7 +56,8 @@ int nvmap_create_carveout(const struct nvmap_platform_carveout *co)
 	}
 
 	for (i = 0; i < nvmap_dev->nr_heaps; i++)
-		if ((co->usage_mask != NVMAP_HEAP_CARVEOUT_IVM) &&
+		if ((co->usage_mask != NVMAP_HEAP_CARVEOUT_IVM &&
+			co->usage_mask != NVMAP_HEAP_CARVEOUT_GPU) &&
 		    (nvmap_dev->heaps[i].heap_bit & co->usage_mask)) {
 			pr_err("carveout %s already exists\n", co->name);
 			err = -EEXIST;
@@ -87,33 +88,38 @@ int nvmap_create_carveout(const struct nvmap_platform_carveout *co)
 	if (!IS_ERR_OR_NULL(nvmap_dev->debug_root)) {
 		struct dentry *heap_root =
 			debugfs_create_dir(co->name, nvmap_dev->debug_root);
+		struct debugfs_info *carevout_debugfs_info = node->carveout->carevout_debugfs_info;
+
+		carevout_debugfs_info->heap_bit = node->heap_bit;
+		carevout_debugfs_info->numa_id = node->carveout->numa_node_id;
+
 		if (!IS_ERR_OR_NULL(heap_root)) {
 			debugfs_create_file("clients", S_IRUGO,
 				heap_root,
-				(void *)(uintptr_t)node->heap_bit,
+				(void *)carevout_debugfs_info,
 				&debug_clients_fops);
 			debugfs_create_file("allocations", S_IRUGO,
 				heap_root,
-				(void *)(uintptr_t)node->heap_bit,
+				(void *)carevout_debugfs_info,
 				&debug_allocations_fops);
 			debugfs_create_file("all_allocations", S_IRUGO,
 				heap_root,
-				(void *)(uintptr_t)node->heap_bit,
+				(void *)carevout_debugfs_info,
 				&debug_all_allocations_fops);
 			debugfs_create_file("orphan_handles", S_IRUGO,
 				heap_root,
-				(void *)(uintptr_t)node->heap_bit,
+				(void *)carevout_debugfs_info,
 				&debug_orphan_handles_fops);
 			debugfs_create_file("maps", S_IRUGO,
 				heap_root,
-				(void *)(uintptr_t)node->heap_bit,
+				(void *)carevout_debugfs_info,
 				&debug_maps_fops);
 			debugfs_create_bool("no_cpu_access", S_IRUGO,
 				heap_root, (bool *)&co->no_cpu_access);
 #ifdef NVMAP_CONFIG_DEBUG_MAPS
 			debugfs_create_file("device_list", S_IRUGO,
 				heap_root,
-				(void *)(uintptr_t)node->heap_bit,
+				(void *)carevout_debugfs_info,
 				&debug_device_list_fops);
 #endif /* NVMAP_CONFIG_DEBUG_MAPS */
 			nvmap_heap_debugfs_init(heap_root,
@@ -232,10 +238,10 @@ struct nvmap_heap_block *do_nvmap_carveout_alloc(struct nvmap_client *client,
 {
 	struct nvmap_carveout_node *co_heap;
 	struct nvmap_device *dev = nvmap_dev;
+	struct nvmap_heap_block *block = NULL;
 	int i;
 
 	for (i = 0; i < dev->nr_carveouts; i++) {
-		struct nvmap_heap_block *block;
 		co_heap = &dev->heaps[i];
 
 		if (!(co_heap->heap_bit & type))
@@ -244,11 +250,26 @@ struct nvmap_heap_block *do_nvmap_carveout_alloc(struct nvmap_client *client,
 		if (type & NVMAP_HEAP_CARVEOUT_IVM)
 			handle->size = ALIGN(handle->size, NVMAP_IVM_ALIGNMENT);
 
-		block = nvmap_heap_alloc(co_heap->carveout, handle, start);
-		if (block)
-			return block;
+		/*
+		 * When NUMA_NO_NODE is specified, iterate all carveouts with same heap_bit
+		 * and different numa nid. Else, specific numa nid is specified, then allocate
+		 * only from that particular carveout on given numa node.
+		 */
+		if (handle->numa_id == NUMA_NO_NODE) {
+			block = nvmap_heap_alloc(co_heap->carveout, handle, start);
+			if (!block)
+				continue;
+			goto exit;
+		} else {
+			if (handle->numa_id != co_heap->carveout->numa_node_id)
+				continue;
+			block = nvmap_heap_alloc(co_heap->carveout, handle, start);
+			goto exit;
+		}
 	}
-	return NULL;
+
+exit:
+	return block;
 }
 
 struct nvmap_heap_block *nvmap_carveout_alloc(struct nvmap_client *client,
diff --git a/drivers/video/tegra/nvmap/nvmap_dev.c b/drivers/video/tegra/nvmap/nvmap_dev.c
index 57feeda4..45f57ef9 100644
--- a/drivers/video/tegra/nvmap/nvmap_dev.c
+++ b/drivers/video/tegra/nvmap/nvmap_dev.c
@@ -70,6 +70,7 @@ static struct device_dma_parameters nvmap_dma_parameters = {
 	.max_segment_size = UINT_MAX,
 };
 
+static struct debugfs_info iovmm_debugfs_info;
 static int nvmap_open(struct inode *inode, struct file *filp);
 static int nvmap_release(struct inode *inode, struct file *filp);
 static long nvmap_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
@@ -592,13 +593,13 @@ next_page:
 	nvmap_ref_unlock(client);
 }
 
-bool is_nvmap_memory_available(size_t size, uint32_t heap)
+bool is_nvmap_memory_available(size_t size, uint32_t heap, int numa_nid)
 {
 	unsigned long total_num_pages;
 	unsigned int carveout_mask = NVMAP_HEAP_CARVEOUT_MASK;
 	unsigned int iovmm_mask = NVMAP_HEAP_IOVMM;
 	struct nvmap_device *dev = nvmap_dev;
-	bool heap_present = false;
+	bool memory_available = false;
 	int i;
 
 	if (!heap)
@@ -637,17 +638,33 @@ bool is_nvmap_memory_available(size_t size, uint32_t heap)
 		if (!(co_heap->heap_bit & heap))
 			continue;
 
-		heap_present = true;
 		h = co_heap->carveout;
-		if (size > h->free_size) {
-			pr_debug("Requested size is more than available memory");
-			pr_debug("Requested size : %lu B, Available memory : %lu B\n", size,
-					h->free_size);
-                        return false;
-                }
+		/*
+		 * When user does not specify numa node i.e. in default NUMA_NO_NODE case,
+		 * do not consider numa node id. So check for heap instances on all numa
+		 * nodes. When numa node is provided by user, then check heap instance only
+		 * on that numa node.
+		 */
+		if (numa_nid == NUMA_NO_NODE) {
+			if (size > h->free_size)
+				continue;
+			memory_available = true;
+			goto exit;
+		} else {
+			if (h->numa_node_id != numa_nid)
+				continue;
+			else if (size > h->free_size)
+				memory_available = false;
+			else
+				memory_available = true;
+
+			goto exit;
+		}
 		break;
 	}
-	return heap_present;
+
+exit:
+	return memory_available;
 }
 
 /* compute the total amount of handle physical memory that is mapped
@@ -750,7 +767,7 @@ next_page:
 }
 
 static void nvmap_get_client_mss(struct nvmap_client *client,
-				 u64 *total, u32 heap_type)
+				 u64 *total, u32 heap_type, int numa_id)
 {
 	struct rb_node *n;
 
@@ -761,15 +778,21 @@ static void nvmap_get_client_mss(struct nvmap_client *client,
 		struct nvmap_handle_ref *ref =
 			rb_entry(n, struct nvmap_handle_ref, node);
 		struct nvmap_handle *handle = ref->handle;
-		if (handle->alloc && handle->heap_type == heap_type)
+		if (handle->alloc && handle->heap_type == heap_type) {
+			if (heap_type != NVMAP_HEAP_IOVMM &&
+				(nvmap_block_to_heap(handle->carveout)->numa_node_id !=
+				 numa_id))
+				continue;
+
 			*total += handle->size /
 				  atomic_read(&handle->share_count);
+		}
 	}
 	nvmap_ref_unlock(client);
 }
 
 #define PSS_SHIFT 12
-static void nvmap_get_total_mss(u64 *pss, u64 *total, u32 heap_type)
+static void nvmap_get_total_mss(u64 *pss, u64 *total, u32 heap_type, int numa_id)
 {
 	int i;
 	struct rb_node *n;
@@ -789,6 +812,11 @@ static void nvmap_get_total_mss(u64 *pss, u64 *total, u32 heap_type)
 		if (!h || !h->alloc || h->heap_type != heap_type)
 			continue;
 
+		if (heap_type != NVMAP_HEAP_IOVMM &&
+			(nvmap_block_to_heap(h->carveout)->numa_node_id !=
+			numa_id))
+			continue;
+
 		*total += h->size;
 		if (!pss)
 			continue;
@@ -807,7 +835,9 @@ static int nvmap_debug_allocations_show(struct seq_file *s, void *unused)
 {
 	u64 total;
 	struct nvmap_client *client;
-	u32 heap_type = (u32)(uintptr_t)s->private;
+	struct debugfs_info *debugfs_information = (struct debugfs_info *)s->private;
+	u32 heap_type = debugfs_information->heap_bit;
+	int numa_id = debugfs_information->numa_id;
 
 	mutex_lock(&nvmap_dev->clients_lock);
 	seq_printf(s, "%-18s %18s %8s %11s\n",
@@ -818,13 +848,13 @@ static int nvmap_debug_allocations_show(struct seq_file *s, void *unused)
 	list_for_each_entry(client, &nvmap_dev->clients, list) {
 		u64 client_total;
 		client_stringify(client, s);
-		nvmap_get_client_mss(client, &client_total, heap_type);
+		nvmap_get_client_mss(client, &client_total, heap_type, numa_id);
 		seq_printf(s, " %10lluK\n", K(client_total));
 		allocations_stringify(client, s, heap_type);
 		seq_printf(s, "\n");
 	}
 	mutex_unlock(&nvmap_dev->clients_lock);
-	nvmap_get_total_mss(NULL, &total, heap_type);
+	nvmap_get_total_mss(NULL, &total, heap_type, numa_id);
 	seq_printf(s, "%-18s %-18s %8s %10lluK\n", "total", "", "", K(total));
 	return 0;
 }
@@ -846,7 +876,9 @@ DEBUGFS_OPEN_FOPS(free_size);
 #ifdef NVMAP_CONFIG_DEBUG_MAPS
 static int nvmap_debug_device_list_show(struct seq_file *s, void *unused)
 {
-	u32 heap_type = (u32)(uintptr_t)s->private;
+	struct debugfs_info *debugfs_information = (struct debugfs_info *)s->private;
+	u32 heap_type = debugfs_information->heap_bit;
+	int numa_id = debugfs_information->numa_id;
 	struct rb_node *n = NULL;
 	struct nvmap_device_list *dl = NULL;
 	int i;
@@ -857,7 +889,9 @@ static int nvmap_debug_device_list_show(struct seq_file *s, void *unused)
 		/* Iterate over all heaps to find the matching heap */
 		for (i = 0; i < nvmap_dev->nr_carveouts; i++) {
 			if (heap_type & nvmap_dev->heaps[i].heap_bit) {
-				if (nvmap_dev->heaps[i].carveout) {
+				if (nvmap_dev->heaps[i].carveout && (nvmap_block_to_heap
+					(nvmap_dev->heaps[i].carveout)->numa_node_id
+					 != numa_id)) {
 					n = rb_first(&nvmap_dev->heaps[i].carveout->device_names);
 					break;
 				}
@@ -878,10 +912,11 @@ DEBUGFS_OPEN_FOPS(device_list);
 
 static int nvmap_debug_all_allocations_show(struct seq_file *s, void *unused)
 {
-	u32 heap_type = (u32)(uintptr_t)s->private;
+	struct debugfs_info *debugfs_information = (struct debugfs_info *)s->private;
+	u32 heap_type = debugfs_information->heap_bit;
+	int numa_id = debugfs_information->numa_id;
 	struct rb_node *n;
 
-
 	spin_lock(&nvmap_dev->handle_lock);
 	seq_printf(s, "%8s %11s %9s %6s %6s %6s %6s %8s\n",
 			"BASE", "SIZE", "USERFLAGS", "REFS",
@@ -894,12 +929,16 @@ static int nvmap_debug_all_allocations_show(struct seq_file *s, void *unused)
 			rb_entry(n, struct nvmap_handle, node);
 		int i = 0;
 
-		if (handle->alloc && handle->heap_type == heap_type) {
+		if (handle->alloc && handle->heap_type == debugfs_information->heap_bit) {
 			phys_addr_t base = heap_type == NVMAP_HEAP_IOVMM ? 0 :
 					   handle->heap_pgalloc ? 0 :
 					   (handle->carveout->base);
 			size_t size = K(handle->size);
 
+			if (heap_type != NVMAP_HEAP_IOVMM &&
+			    (nvmap_block_to_heap(handle->carveout)->numa_node_id != numa_id))
+				continue;
+
 next_page:
 			if ((heap_type == NVMAP_HEAP_CARVEOUT_VPR) && handle->heap_pgalloc) {
 				base = page_to_phys(handle->pgalloc.pages[i++]);
@@ -933,7 +972,9 @@ DEBUGFS_OPEN_FOPS(all_allocations);
 
 static int nvmap_debug_orphan_handles_show(struct seq_file *s, void *unused)
 {
-	u32 heap_type = (u32)(uintptr_t)s->private;
+	struct debugfs_info *debugfs_information = (struct debugfs_info *)s->private;
+	u32 heap_type = debugfs_information->heap_bit;
+	int numa_id = debugfs_information->numa_id;
 	struct rb_node *n;
 
 
@@ -956,6 +997,11 @@ static int nvmap_debug_orphan_handles_show(struct seq_file *s, void *unused)
 					   (handle->carveout->base);
 			size_t size = K(handle->size);
 
+			if (heap_type != NVMAP_HEAP_IOVMM &&
+				(nvmap_block_to_heap(handle->carveout)->numa_node_id !=
+					numa_id))
+				continue;
+
 next_page:
 			if ((heap_type == NVMAP_HEAP_CARVEOUT_VPR) && handle->heap_pgalloc) {
 				base = page_to_phys(handle->pgalloc.pages[i++]);
@@ -990,7 +1036,9 @@ static int nvmap_debug_maps_show(struct seq_file *s, void *unused)
 {
 	u64 total;
 	struct nvmap_client *client;
-	u32 heap_type = (u32)(uintptr_t)s->private;
+	struct debugfs_info *debugfs_information = (struct debugfs_info *)s->private;
+	u32 heap_type = debugfs_information->heap_bit;
+	int numa_id = debugfs_information->numa_id;
 
 	mutex_lock(&nvmap_dev->clients_lock);
 	seq_printf(s, "%-18s %18s %8s %11s\n",
@@ -1002,14 +1050,14 @@ static int nvmap_debug_maps_show(struct seq_file *s, void *unused)
 	list_for_each_entry(client, &nvmap_dev->clients, list) {
 		u64 client_total;
 		client_stringify(client, s);
-		nvmap_get_client_mss(client, &client_total, heap_type);
+		nvmap_get_client_mss(client, &client_total, heap_type, numa_id);
 		seq_printf(s, " %10lluK\n", K(client_total));
 		maps_stringify(client, s, heap_type);
 		seq_printf(s, "\n");
 	}
 	mutex_unlock(&nvmap_dev->clients_lock);
 
-	nvmap_get_total_mss(NULL, &total, heap_type);
+	nvmap_get_total_mss(NULL, &total, heap_type, numa_id);
 	seq_printf(s, "%-18s %-18s %8s %10lluK\n", "total", "", "", K(total));
 	return 0;
 }
@@ -1020,7 +1068,9 @@ static int nvmap_debug_clients_show(struct seq_file *s, void *unused)
 {
 	u64 total;
 	struct nvmap_client *client;
-	ulong heap_type = (ulong)s->private;
+	struct debugfs_info *debugfs_information = (struct debugfs_info *)s->private;
+	u32 heap_type = debugfs_information->heap_bit;
+	int numa_id = debugfs_information->numa_id;
 
 	mutex_lock(&nvmap_dev->clients_lock);
 	seq_printf(s, "%-18s %18s %8s %11s\n",
@@ -1028,11 +1078,11 @@ static int nvmap_debug_clients_show(struct seq_file *s, void *unused)
 	list_for_each_entry(client, &nvmap_dev->clients, list) {
 		u64 client_total;
 		client_stringify(client, s);
-		nvmap_get_client_mss(client, &client_total, heap_type);
+		nvmap_get_client_mss(client, &client_total, heap_type, numa_id);
 		seq_printf(s, " %10lluK\n", K(client_total));
 	}
 	mutex_unlock(&nvmap_dev->clients_lock);
-	nvmap_get_total_mss(NULL, &total, heap_type);
+	nvmap_get_total_mss(NULL, &total, heap_type, numa_id);
 	seq_printf(s, "%-18s %18s %8s %10lluK\n", "total", "", "", K(total));
 	return 0;
 }
@@ -1291,7 +1341,7 @@ static int nvmap_debug_iovmm_procrank_show(struct seq_file *s, void *unused)
 	}
 	mutex_unlock(&dev->clients_lock);
 
-	nvmap_get_total_mss(&total_pss, &total_memory, NVMAP_HEAP_IOVMM);
+	nvmap_get_total_mss(&total_pss, &total_memory, NVMAP_HEAP_IOVMM, NUMA_NO_NODE);
 	seq_printf(s, "%-18s %18s %8s %10lluK %10lluK\n",
 		"total", "", "", K(total_pss), K(total_memory));
 	return 0;
@@ -1305,7 +1355,7 @@ ulong nvmap_iovmm_get_used_pages(void)
 {
 	u64 total;
 
-	nvmap_get_total_mss(NULL, &total, NVMAP_HEAP_IOVMM);
+	nvmap_get_total_mss(NULL, &total, NVMAP_HEAP_IOVMM, NUMA_NO_NODE);
 	return total >> PAGE_SHIFT;
 }
 #endif
@@ -1315,28 +1365,32 @@ static void nvmap_iovmm_debugfs_init(void)
 	if (!IS_ERR_OR_NULL(nvmap_dev->debug_root)) {
 		struct dentry *iovmm_root =
 			debugfs_create_dir("iovmm", nvmap_dev->debug_root);
+
+		iovmm_debugfs_info.heap_bit = NVMAP_HEAP_IOVMM;
+		iovmm_debugfs_info.numa_id = NUMA_NO_NODE;
+
 		if (!IS_ERR_OR_NULL(iovmm_root)) {
 			debugfs_create_file("clients", S_IRUGO, iovmm_root,
-				(void *)(uintptr_t)NVMAP_HEAP_IOVMM,
+				(void *)&iovmm_debugfs_info,
 				&debug_clients_fops);
 			debugfs_create_file("allocations", S_IRUGO, iovmm_root,
-				(void *)(uintptr_t)NVMAP_HEAP_IOVMM,
+				(void *)&iovmm_debugfs_info,
 				&debug_allocations_fops);
 			debugfs_create_file("all_allocations", S_IRUGO,
-				iovmm_root, (void *)(uintptr_t)NVMAP_HEAP_IOVMM,
+				iovmm_root, (void *)&iovmm_debugfs_info,
 				&debug_all_allocations_fops);
 			debugfs_create_file("orphan_handles", S_IRUGO,
-				iovmm_root, (void *)(uintptr_t)NVMAP_HEAP_IOVMM,
+				iovmm_root, (void *)&iovmm_debugfs_info,
 				&debug_orphan_handles_fops);
 			debugfs_create_file("maps", S_IRUGO, iovmm_root,
-				(void *)(uintptr_t)NVMAP_HEAP_IOVMM,
+				(void *)&iovmm_debugfs_info,
 				&debug_maps_fops);
 			debugfs_create_file("free_size", S_IRUGO, iovmm_root,
-				(void *)(uintptr_t)NVMAP_HEAP_IOVMM,
+				(void *)&iovmm_debugfs_info,
 				&debug_free_size_fops);
 #ifdef NVMAP_CONFIG_DEBUG_MAPS
 			debugfs_create_file("device_list", S_IRUGO, iovmm_root,
-				(void *)(uintptr_t)NVMAP_HEAP_IOVMM,
+				(void *)&iovmm_debugfs_info,
 				&debug_device_list_fops);
 #endif /* NVMAP_CONFIG_DEBUG_MAPS */
 
diff --git a/drivers/video/tegra/nvmap/nvmap_handle.c b/drivers/video/tegra/nvmap/nvmap_handle.c
index 9953b78c..5f7c080b 100644
--- a/drivers/video/tegra/nvmap/nvmap_handle.c
+++ b/drivers/video/tegra/nvmap/nvmap_handle.c
@@ -185,7 +185,7 @@ struct nvmap_handle_ref *nvmap_create_handle_from_va(struct nvmap_client *client
 		size = vma->vm_end - vaddr;
 
 	/* Don't allow exuberantly large sizes. */
-	if (!is_nvmap_memory_available(size, NVMAP_HEAP_IOVMM)) {
+	if (!is_nvmap_memory_available(size, NVMAP_HEAP_IOVMM, NUMA_NO_NODE)) {
 		pr_debug("Cannot allocate %zu bytes.\n", size);
 		nvmap_release_mmap_read_lock(mm);
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/video/tegra/nvmap/nvmap_heap.c b/drivers/video/tegra/nvmap/nvmap_heap.c
index 485cc708..df357ace 100644
--- a/drivers/video/tegra/nvmap/nvmap_heap.c
+++ b/drivers/video/tegra/nvmap/nvmap_heap.c
@@ -506,6 +506,7 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent,
 	h->can_alloc = !!co->can_alloc;
 	h->is_ivm = co->is_ivm;
 	h->is_gpu_co = co->is_gpu_co;
+	h->numa_node_id = co->numa_node_id;
 	h->granule_size = co->granule_size;
 	h->len = len;
 	h->free_size = len;
@@ -517,6 +518,7 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent,
 	if (co->pm_ops.idle)
 		h->pm_ops.idle = co->pm_ops.idle;
 
+	h->carevout_debugfs_info = kmalloc(sizeof(struct debugfs_info), GFP_KERNEL);
 	INIT_LIST_HEAD(&h->all_list);
 	mutex_init(&h->lock);
 #ifdef NVMAP_CONFIG_DEBUG_MAPS
@@ -546,6 +548,7 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent,
 		co->name, (void *)(uintptr_t)base, len/1024);
 	return h;
 fail:
+	kfree(h->carevout_debugfs_info);
 	if (h->dma_dev->kobj.name)
 		kfree_const(h->dma_dev->kobj.name);
 	kfree(h);
@@ -562,6 +565,7 @@ void nvmap_heap_destroy(struct nvmap_heap *heap)
 	if (heap->is_ivm)
 		kfree(heap->name);
 
+	kfree(heap->carevout_debugfs_info);
 #ifdef NVMAP_LOADABLE_MODULE
 	nvmap_dma_release_coherent_memory((struct dma_coherent_mem_replica *)
 					  heap->dma_dev->dma_mem);
diff --git a/drivers/video/tegra/nvmap/nvmap_heap.h b/drivers/video/tegra/nvmap/nvmap_heap.h
index e4d2b446..3e60f289 100644
--- a/drivers/video/tegra/nvmap/nvmap_heap.h
+++ b/drivers/video/tegra/nvmap/nvmap_heap.h
@@ -18,6 +18,15 @@ struct nvmap_heap_block {
 	struct nvmap_handle *handle;
 };
 
+/*
+ * Info to be passed to debugfs nodes, so as to provide heap type and
+ * numa node id.
+ */
+struct debugfs_info {
+	unsigned int heap_bit;
+	int numa_id;
+};
+
 struct nvmap_heap {
 	struct list_head all_list;
 	struct mutex lock;
@@ -33,6 +42,7 @@ struct nvmap_heap {
 	bool is_ivm;
 	bool is_gpu_co;
 	u32 granule_size;
+	int numa_node_id;
 	bool can_alloc; /* Used only if is_ivm == true */
 	unsigned int peer; /* Used only if is_ivm == true */
 	unsigned int vm_id; /* Used only if is_ivm == true */
@@ -40,6 +50,7 @@ struct nvmap_heap {
 #ifdef NVMAP_CONFIG_DEBUG_MAPS
 	struct rb_root device_names;
 #endif /* NVMAP_CONFIG_DEBUG_MAPS */
+	struct debugfs_info *carevout_debugfs_info; /* Used for storing debugfs info */
 };
 
 struct list_block {
diff --git a/drivers/video/tegra/nvmap/nvmap_init.c b/drivers/video/tegra/nvmap/nvmap_init.c
index dd36eebd..c2a335e6 100644
--- a/drivers/video/tegra/nvmap/nvmap_init.c
+++ b/drivers/video/tegra/nvmap/nvmap_init.c
@@ -108,6 +108,7 @@ static struct nvmap_platform_carveout nvmap_carveouts[] = {
 #ifdef NVMAP_CONFIG_VPR_RESIZE
 		.dma_info	= &generic_dma_info,
 #endif
+		.numa_node_id = 0,
 	},
 	[1] = {
 		.name		= "vpr",
@@ -120,6 +121,7 @@ static struct nvmap_platform_carveout nvmap_carveouts[] = {
 		.dma_info	= &vpr_dma_info,
 #endif
 		.enable_static_dma_map = true,
+		.numa_node_id = 0,
 	},
 	[2] = {
 		.name		= "vidmem",
@@ -128,41 +130,55 @@ static struct nvmap_platform_carveout nvmap_carveouts[] = {
 		.size		= 0,
 		.disable_dynamic_dma_map = true,
 		.no_cpu_access = true,
+		.numa_node_id = 0,
 	},
 	[3] = {
 		.name		= "fsi",
 		.usage_mask	= NVMAP_HEAP_CARVEOUT_FSI,
 		.base		= 0,
 		.size		= 0,
+		.numa_node_id = 0,
 	},
 	[4] = {
-		.name		= "gpu",
+		.name		= "gpu0",
 		.usage_mask	= NVMAP_HEAP_CARVEOUT_GPU,
 		.base		= 0,
 		.size		= 0,
+		.numa_node_id = 0,
+	},
+	[5] = {
+		.name		= "gpu1",
+		.usage_mask	= NVMAP_HEAP_CARVEOUT_GPU,
+		.base		= 0,
+		.size		= 0,
+		.numa_node_id = 1,
 	},
 	/* Need uninitialized entries for IVM carveouts */
-	[5] = {
-		.name		= NULL,
-		.usage_mask	= NVMAP_HEAP_CARVEOUT_IVM,
-	},
 	[6] = {
 		.name		= NULL,
 		.usage_mask	= NVMAP_HEAP_CARVEOUT_IVM,
+		.numa_node_id = 0,
 	},
 	[7] = {
 		.name		= NULL,
 		.usage_mask	= NVMAP_HEAP_CARVEOUT_IVM,
+		.numa_node_id = 0,
 	},
 	[8] = {
 		.name		= NULL,
 		.usage_mask	= NVMAP_HEAP_CARVEOUT_IVM,
+		.numa_node_id = 0,
+	},
+	[9] = {
+		.name		= NULL,
+		.usage_mask	= NVMAP_HEAP_CARVEOUT_IVM,
+		.numa_node_id = 0,
 	},
 };
 
 static struct nvmap_platform_data nvmap_data = {
 	.carveouts	= nvmap_carveouts,
-	.nr_carveouts	= 5,
+	.nr_carveouts	= 6,
 };
 
 static struct nvmap_platform_carveout *nvmap_get_carveout_pdata(const char *name)
@@ -944,8 +960,10 @@ int __init nvmap_init(struct platform_device *pdev)
 			if (of_device_is_available(it.node) &&
 			    !of_device_is_compatible(it.node, "nvidia,ivm_carveout")) {
 				/* Read granule size in case of gpu carveout */
-				if (of_device_is_compatible(it.node, "nvidia,gpu_carveout")
-				    && of_property_read_u32(it.node, "granule-size", &granule_size)) {
+				if ((of_device_is_compatible(it.node, "nvidia,gpu0_carveout") ||
+					of_device_is_compatible(it.node, "nvidia,gpu1_carveout")) &&
+					of_property_read_u32(it.node, "granule-size", &granule_size
+						)) {
 					pr_err("granule-size property is missing\n");
 					return -EINVAL;
 				}
diff --git a/drivers/video/tegra/nvmap/nvmap_ioctl.c b/drivers/video/tegra/nvmap/nvmap_ioctl.c
index 29135405..6d6d0923 100644
--- a/drivers/video/tegra/nvmap/nvmap_ioctl.c
+++ b/drivers/video/tegra/nvmap/nvmap_ioctl.c
@@ -227,7 +227,7 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg)
 		page_sz = granule_size;
 	}
 
-	if (!is_nvmap_memory_available(handle->size, op.heap_mask)) {
+	if (!is_nvmap_memory_available(handle->size, op.heap_mask, op.numa_nid)) {
 		nvmap_handle_put(handle);
 		return -ENOMEM;
 	}
diff --git a/drivers/video/tegra/nvmap/nvmap_priv.h b/drivers/video/tegra/nvmap/nvmap_priv.h
index 232326cb..e5830b64 100644
--- a/drivers/video/tegra/nvmap/nvmap_priv.h
+++ b/drivers/video/tegra/nvmap/nvmap_priv.h
@@ -915,7 +915,7 @@ static inline struct dma_buf *nvmap_id_array_id_release(struct xarray *xarr, u32
 #endif
 void *nvmap_dmabuf_get_drv_data(struct dma_buf *dmabuf,
 		struct device *dev);
-bool is_nvmap_memory_available(size_t size, uint32_t heap);
+bool is_nvmap_memory_available(size_t size, uint32_t heap, int numa_nid);
 int system_heap_free_mem(unsigned long *mem_val);
 
 #ifdef NVMAP_CONFIG_DEBUG_MAPS
diff --git a/include/linux/nvmap.h b/include/linux/nvmap.h
index fa02dcf6..fdff8e60 100644
--- a/include/linux/nvmap.h
+++ b/include/linux/nvmap.h
@@ -96,6 +96,7 @@ struct nvmap_platform_carveout {
 	struct nvmap_pm_ops pm_ops;
 	bool is_gpu_co; /* Gpu carveout is treated differently */
 	u32 granule_size; /* Granule size for gpu carveout */
+	int numa_node_id; /* NUMA node id from which the carveout is allocated from */
 };
 
 struct nvmap_platform_data {