diff --git a/drivers/video/tegra/nvmap/nvmap_alloc.c b/drivers/video/tegra/nvmap/nvmap_alloc.c index 3a5a31e5..2e07eeba 100644 --- a/drivers/video/tegra/nvmap/nvmap_alloc.c +++ b/drivers/video/tegra/nvmap/nvmap_alloc.c @@ -719,22 +719,28 @@ static void alloc_handle(struct nvmap_client *client, if (h->pgalloc.pages && h->heap_type == NVMAP_HEAP_CARVEOUT_COMPRESSION) { unsigned long page_count; + u32 granule_size = 0; int i; + struct list_block *lb; + lb = container_of(b, struct list_block, block); + granule_size = lb->heap->granule_size; page_count = h->size >> PAGE_SHIFT; - /* Iterate over 2MB chunks */ - for (i = 0; i < page_count; i += PAGES_PER_2MB) { + /* Iterate over granules */ + for (i = 0; i < page_count; + i += PAGES_PER_GRANULE(granule_size)) { cpu_addr = memremap(page_to_phys( h->pgalloc.pages[i]), - SIZE_2MB, MEMREMAP_WB); + granule_size, + MEMREMAP_WB); if (cpu_addr != NULL) { - memset(cpu_addr, 0, SIZE_2MB); + memset(cpu_addr, 0, granule_size); #ifdef NVMAP_UPSTREAM_KERNEL arch_invalidate_pmem(cpu_addr, - SIZE_2MB); + granule_size); #else __dma_flush_area(cpu_addr, - SIZE_2MB); + granule_size); #endif memunmap(cpu_addr); } diff --git a/drivers/video/tegra/nvmap/nvmap_heap.c b/drivers/video/tegra/nvmap/nvmap_heap.c index e137543b..27f92f03 100644 --- a/drivers/video/tegra/nvmap/nvmap_heap.c +++ b/drivers/video/tegra/nvmap/nvmap_heap.c @@ -52,17 +52,6 @@ static struct kmem_cache *heap_block_cache; -struct list_block { - struct nvmap_heap_block block; - struct list_head all_list; - unsigned int mem_prot; - phys_addr_t orig_addr; - size_t size; - size_t align; - struct nvmap_heap *heap; - struct list_head free_list; -}; - struct device *dma_dev_from_handle(unsigned long type) { int i; @@ -161,9 +150,9 @@ static phys_addr_t nvmap_alloc_mem(struct nvmap_heap *h, size_t len, err = nvmap_dma_alloc_attrs(dev, len, &pa, GFP_KERNEL, DMA_ATTR_ALLOC_EXACT_SIZE); /* - * In case of Compression carveout, try to allocate the entire chunk in physically + * In case of Compression carveout, try to allocate the entire granule in physically * contiguous manner. If it returns error, then try to allocate the memory in - * 2MB chunks. + * granules of specified granule size. */ if (h->is_compression_co && IS_ERR(err)) { err = nvmap_dma_alloc_attrs(dev, len, &pa, @@ -493,7 +482,7 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent, DMA_MEMORY_NOMAP); #else err = nvmap_dma_declare_coherent_memory(h->dma_dev, 0, base, len, - DMA_MEMORY_NOMAP, co->is_compression_co); + DMA_MEMORY_NOMAP, co->is_compression_co, co->granule_size); #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) if (!err) { @@ -517,6 +506,7 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent, h->can_alloc = !!co->can_alloc; h->is_ivm = co->is_ivm; h->is_compression_co = co->is_compression_co; + h->granule_size = co->granule_size; h->len = len; h->free_size = len; h->peer = co->peer; @@ -645,15 +635,18 @@ int nvmap_flush_heap_block(struct nvmap_client *client, h = block->handle; if (h->pgalloc.pages) { unsigned long page_count, i; + u32 granule_size = 0; + struct list_block *b = container_of(block, struct list_block, block); /* - * For Compression carveout with physically discontiguous 2MB chunks, - * iterate over 2MB chunks and do cache maint for it. + * For Compression carveout with physically discontiguous granules, + * iterate over granules and do cache maint for it. */ page_count = h->size >> PAGE_SHIFT; - for (i = 0; i < page_count; i += PAGES_PER_2MB) { + granule_size = b->heap->granule_size; + for (i = 0; i < page_count; i += PAGES_PER_GRANULE(granule_size)) { phys = page_to_phys(h->pgalloc.pages[i]); - end = phys + SIZE_2MB; + end = phys + granule_size; ret = nvmap_cache_maint_phys_range(NVMAP_CACHE_OP_WB_INV, phys, end, true, prot != NVMAP_HANDLE_INNER_CACHEABLE); if (ret) diff --git a/drivers/video/tegra/nvmap/nvmap_heap.h b/drivers/video/tegra/nvmap/nvmap_heap.h index b0f6a4cb..c9d574d7 100644 --- a/drivers/video/tegra/nvmap/nvmap_heap.h +++ b/drivers/video/tegra/nvmap/nvmap_heap.h @@ -32,6 +32,7 @@ struct nvmap_heap { struct device *dma_dev; bool is_ivm; bool is_compression_co; + u32 granule_size; bool can_alloc; /* Used only if is_ivm == true */ unsigned int peer; /* Used only if is_ivm == true */ unsigned int vm_id; /* Used only if is_ivm == true */ @@ -41,6 +42,17 @@ struct nvmap_heap { #endif /* NVMAP_CONFIG_DEBUG_MAPS */ }; +struct list_block { + struct nvmap_heap_block block; + struct list_head all_list; + unsigned int mem_prot; + phys_addr_t orig_addr; + size_t size; + size_t align; + struct nvmap_heap *heap; + struct list_head free_list; +}; + struct nvmap_heap *nvmap_heap_create(struct device *parent, const struct nvmap_platform_carveout *co, phys_addr_t base, size_t len, void *arg); diff --git a/drivers/video/tegra/nvmap/nvmap_init.c b/drivers/video/tegra/nvmap/nvmap_init.c index 5030c2b7..a38a239a 100644 --- a/drivers/video/tegra/nvmap/nvmap_init.c +++ b/drivers/video/tegra/nvmap/nvmap_init.c @@ -364,6 +364,7 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev, int *bitmap_nos = NULL; const char *device_name; bool is_compression = false; + u32 granule_size = 0; device_name = dev_name(dev); if (!device_name) { @@ -371,12 +372,17 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev, return NULL; } - if (!strncmp(device_name, "compression", 11)) + if (!strncmp(device_name, "compression", 11)) { + struct nvmap_platform_carveout *co; + is_compression = true; + co = nvmap_get_carveout_pdata("compression"); + granule_size = co->granule_size; + } if (is_compression) { - /* Calculation for Compression carveout should consider 2MB chunks */ - count = size >> PAGE_SHIFT_2MB; + /* Calculation for Compression carveout should consider granule size */ + count = size >> PAGE_SHIFT_GRANULE(granule_size); } else { if (dma_get_attr(DMA_ATTR_ALLOC_EXACT_SIZE, attrs)) { page_count = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -404,7 +410,7 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev, if (!is_compression) pages = nvmap_kvzalloc_pages(count); else - pages = nvmap_kvzalloc_pages(count * PAGES_PER_2MB); + pages = nvmap_kvzalloc_pages(count * PAGES_PER_GRANULE(granule_size)); if (!pages) { kvfree(bitmap_nos); @@ -418,7 +424,8 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev, if (!is_compression && unlikely(size > ((u64)mem->size << PAGE_SHIFT))) goto err; - else if (is_compression && unlikely(size > ((u64)mem->size << PAGE_SHIFT_2MB))) + else if (is_compression && + unlikely(size > ((u64)mem->size << PAGE_SHIFT_GRANULE(granule_size)))) goto err; if (((mem->flags & DMA_MEMORY_NOMAP) && @@ -447,10 +454,10 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev, if (!is_compression) pages[i++] = pfn_to_page(mem->pfn_base + pageno); else { - /* Handle 2MB chunks */ - for (k = 0; k < (alloc_size * PAGES_PER_2MB); k++) - pages[i++] = pfn_to_page(mem->pfn_base + - pageno * PAGES_PER_2MB + k); + /* Handle granules */ + for (k = 0; k < (alloc_size * PAGES_PER_GRANULE(granule_size)); k++) + pages[i++] = pfn_to_page(mem->pfn_base + pageno * + PAGES_PER_GRANULE(granule_size) + k); } } @@ -464,7 +471,7 @@ static void *__nvmap_dma_alloc_from_coherent(struct device *dev, if (!is_compression) *dma_handle = mem->device_base + (first_pageno << PAGE_SHIFT); else - *dma_handle = mem->device_base + (first_pageno << PAGE_SHIFT_2MB); + *dma_handle = mem->device_base + (first_pageno << PAGE_SHIFT_GRANULE(granule_size)); if (!(mem->flags & DMA_MEMORY_NOMAP)) { addr = mem->virt_base + (first_pageno << PAGE_SHIFT); @@ -517,6 +524,7 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, struct dma_coherent_mem_replica *mem; bool is_compression = false; const char *device_name; + u32 granule_size = 0; if (!dev || !dev->dma_mem) return; @@ -527,8 +535,13 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, return; } - if (!strncmp(device_name, "compression", 11)) + if (!strncmp(device_name, "compression", 11)) { + struct nvmap_platform_carveout *co; + is_compression = true; + co = nvmap_get_carveout_pdata("compression"); + granule_size = co->granule_size; + } mem = (struct dma_coherent_mem_replica *)(dev->dma_mem); if ((mem->flags & DMA_MEMORY_NOMAP) && @@ -546,8 +559,9 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, bitmap_clear(mem->bitmap, pageno, 1); } } else { - for (i = 0; i < (size >> PAGE_SHIFT); i += PAGES_PER_2MB) { - pageno = (page_to_pfn(pages[i]) - mem->pfn_base) / PAGES_PER_2MB; + for (i = 0; i < (size >> PAGE_SHIFT); i += PAGES_PER_GRANULE(granule_size)) { + pageno = (page_to_pfn(pages[i]) - mem->pfn_base) / + PAGES_PER_GRANULE(granule_size); if (WARN_ONCE(pageno > mem->size, "invalid pageno:%d\n", pageno)) continue; @@ -564,7 +578,7 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, else mem_addr = mem->virt_base; - page_shift_val = is_compression ? PAGE_SHIFT_2MB : PAGE_SHIFT; + page_shift_val = is_compression ? PAGE_SHIFT_GRANULE(granule_size) : PAGE_SHIFT; if (mem && cpu_addr >= mem_addr && cpu_addr - mem_addr < (u64)mem->size << page_shift_val) { unsigned int page = (cpu_addr - mem_addr) >> page_shift_val; @@ -573,7 +587,7 @@ void nvmap_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, if (DMA_ATTR_ALLOC_EXACT_SIZE & attrs) { if (is_compression) - count = ALIGN_2MB(size) >> page_shift_val; + count = ALIGN_GRANULE_SIZE(size, granule_size) >> page_shift_val; else count = PAGE_ALIGN(size) >> page_shift_val; } @@ -664,7 +678,7 @@ static int nvmap_dma_assign_coherent_memory(struct device *dev, static int nvmap_dma_init_coherent_memory( phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags, - struct dma_coherent_mem_replica **mem, bool is_compression) + struct dma_coherent_mem_replica **mem, bool is_compression, u32 granule_size) { struct dma_coherent_mem_replica *dma_mem = NULL; void *mem_base = NULL; @@ -676,7 +690,7 @@ static int nvmap_dma_init_coherent_memory( return -EINVAL; if (is_compression) - pages = size >> PAGE_SHIFT_2MB; + pages = size >> PAGE_SHIFT_GRANULE(granule_size); else pages = size >> PAGE_SHIFT; @@ -719,13 +733,14 @@ err_memunmap: } int nvmap_dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size, int flags, bool is_compression) + dma_addr_t device_addr, size_t size, int flags, bool is_compression, + u32 granule_size) { struct dma_coherent_mem_replica *mem; int ret; ret = nvmap_dma_init_coherent_memory(phys_addr, device_addr, size, flags, &mem, - is_compression); + is_compression, granule_size); if (ret) return ret; @@ -757,7 +772,8 @@ static int __init nvmap_co_device_init(struct reserved_mem *rmem, #else err = nvmap_dma_declare_coherent_memory(co->dma_dev, 0, co->base, co->size, - DMA_MEMORY_NOMAP, co->is_compression_co); + DMA_MEMORY_NOMAP, co->is_compression_co, + co->granule_size); #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) if (!err) { @@ -871,7 +887,7 @@ finish: return ret; } #else -int __init nvmap_co_setup(struct reserved_mem *rmem) +int __init nvmap_co_setup(struct reserved_mem *rmem, u32 granule_size) { struct nvmap_platform_carveout *co; ulong start = sched_clock(); @@ -887,16 +903,18 @@ int __init nvmap_co_setup(struct reserved_mem *rmem) co->base = rmem->base; co->size = rmem->size; co->cma_dev = NULL; - if (!strncmp(co->name, "compression", 11)) + if (!strncmp(co->name, "compression", 11)) { co->is_compression_co = true; + co->granule_size = granule_size; + } nvmap_init_time += sched_clock() - start; return ret; } #endif /* !NVMAP_LOADABLE_MODULE */ -RESERVEDMEM_OF_DECLARE(nvmap_co, "nvidia,generic_carveout", nvmap_co_setup); #ifndef NVMAP_LOADABLE_MODULE +RESERVEDMEM_OF_DECLARE(nvmap_co, "nvidia,generic_carveout", nvmap_co_setup); RESERVEDMEM_OF_DECLARE(nvmap_vpr_co, "nvidia,vpr-carveout", nvmap_co_setup); RESERVEDMEM_OF_DECLARE(nvmap_fsi_co, "nvidia,fsi-carveout", nvmap_co_setup); #endif /* !NVMAP_LOADABLE_MODULE */ @@ -909,7 +927,9 @@ int __init nvmap_init(struct platform_device *pdev) { int err; struct reserved_mem rmem; + #ifdef NVMAP_LOADABLE_MODULE + u32 granule_size = 0; struct reserved_mem *rmem2; struct device_node *np = pdev->dev.of_node; struct of_phandle_iterator it; @@ -919,6 +939,12 @@ int __init nvmap_init(struct platform_device *pdev) while (!of_phandle_iterator_next(&it) && it.node) { if (of_device_is_available(it.node) && !of_device_is_compatible(it.node, "nvidia,ivm_carveout")) { + /* Read granule size in case of compression carveout */ + if (of_device_is_compatible(it.node, "nvidia,compression_carveout") + && of_property_read_u32(it.node, "granule-size", &granule_size)) { + pr_err("granule-size property is missing\n"); + return -EINVAL; + } rmem2 = of_reserved_mem_lookup(it.node); if (!rmem2) { if (!of_property_read_string(it.node, "compatible", &compp)) @@ -926,7 +952,7 @@ int __init nvmap_init(struct platform_device *pdev) compp); return -EINVAL; } - nvmap_co_setup(rmem2); + nvmap_co_setup(rmem2, granule_size); } } } diff --git a/drivers/video/tegra/nvmap/nvmap_ioctl.c b/drivers/video/tegra/nvmap/nvmap_ioctl.c index d3cc422a..4519a46a 100644 --- a/drivers/video/tegra/nvmap/nvmap_ioctl.c +++ b/drivers/video/tegra/nvmap/nvmap_ioctl.c @@ -181,7 +181,7 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg) struct nvmap_handle *handle; struct dma_buf *dmabuf = NULL; bool is_ro; - int err; + int err, i; unsigned int page_sz = PAGE_SIZE; if (copy_from_user(&op, arg, sizeof(op))) @@ -203,11 +203,16 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg) return -EINVAL; /* - * In case of Compression carveout, the handle size needs to be aligned to 2MB. + * In case of Compression carveout, the handle size needs to be aligned to granule. */ if (op.heap_mask & NVMAP_HEAP_CARVEOUT_COMPRESSION) { - handle->size = ALIGN_2MB(handle->size); - page_sz = SIZE_2MB; + u32 granule_size = 0; + + for (i = 0; i < nvmap_dev->nr_carveouts; i++) + if (nvmap_dev->heaps[i].heap_bit & NVMAP_HEAP_CARVEOUT_COMPRESSION) + granule_size = nvmap_dev->heaps[i].carveout->granule_size; + handle->size = ALIGN_GRANULE_SIZE(handle->size, granule_size); + page_sz = granule_size; } if (!is_nvmap_memory_available(handle->size, op.heap_mask)) { diff --git a/drivers/video/tegra/nvmap/nvmap_priv.h b/drivers/video/tegra/nvmap/nvmap_priv.h index 729ea9aa..27de4899 100644 --- a/drivers/video/tegra/nvmap/nvmap_priv.h +++ b/drivers/video/tegra/nvmap/nvmap_priv.h @@ -41,10 +41,9 @@ #include -#define SIZE_2MB (2*1024*1024) -#define ALIGN_2MB(size) ((size + SIZE_2MB - 1) & ~(SIZE_2MB - 1)) -#define PAGE_SHIFT_2MB 21 -#define PAGES_PER_2MB (SIZE_2MB / PAGE_SIZE) +#define ALIGN_GRANULE_SIZE(size, GRANULE_SIZE) ((size + GRANULE_SIZE - 1) & ~(GRANULE_SIZE - 1)) +#define PAGE_SHIFT_GRANULE(GRANULE_SIZE) (order_base_2(GRANULE_SIZE)) +#define PAGES_PER_GRANULE(GRANULE_SIZE) (GRANULE_SIZE / PAGE_SIZE) #define DMA_ERROR_CODE (~(dma_addr_t)0) @@ -487,14 +486,20 @@ struct dma_coherent_mem_replica { }; int nvmap_dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size, int flags, bool is_compression); + dma_addr_t device_addr, size_t size, int flags, bool is_compression, + u32 granule_size); #endif int nvmap_probe(struct platform_device *pdev); int nvmap_remove(struct platform_device *pdev); int nvmap_init(struct platform_device *pdev); int nvmap_create_carveout(const struct nvmap_platform_carveout *co); + +#ifdef NVMAP_LOADABLE_MODULE +int nvmap_co_setup(struct reserved_mem *rmem, u32 granule_size); +#else int nvmap_co_setup(struct reserved_mem *rmem); +#endif struct device *dma_dev_from_handle(unsigned long type); struct nvmap_heap_block *nvmap_carveout_alloc(struct nvmap_client *dev, diff --git a/include/linux/nvmap.h b/include/linux/nvmap.h index d5e0e9c6..ad9ac6a6 100644 --- a/include/linux/nvmap.h +++ b/include/linux/nvmap.h @@ -95,6 +95,7 @@ struct nvmap_platform_carveout { bool init_done; /* FIXME: remove once all caveouts use reserved-memory */ struct nvmap_pm_ops pm_ops; bool is_compression_co; /* Compression carveout is treated differently */ + u32 granule_size; /* Granule size for compression carveout */ }; struct nvmap_platform_data {