Files
linux-nv-oot/drivers/video/tegra/nvmap/nvmap_heap.c
Ketan Patil faa6040875 video: tegra: nvmap: Switch to configurable granule size
In compression carveout, granule size should be configurable via device
tree. Earlier code was written by considering granule size of 2MB,
update the code to use configurable granule size read from DT.

Bug 3956637

Change-Id: Ib1e966117b2bd9511cbcde37a6011c17f38f22e2
Signed-off-by: Ketan Patil <ketanp@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2899865
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Pritesh Raithatha <praithatha@nvidia.com>
Reviewed-by: Krishna Reddy <vdumpa@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2023-05-15 01:18:01 -07:00

663 lines
18 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2011-2023, NVIDIA Corporation. All rights reserved.
*
* GPU heap allocator.
*/
#define pr_fmt(fmt) "%s: " fmt, __func__
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/bug.h>
#include <linux/stat.h>
#include <linux/sizes.h>
#include <linux/io.h>
#include <linux/version.h>
#include <linux/limits.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
#include <linux/sched/clock.h>
#endif
#include <linux/nvmap.h>
#include <linux/dma-mapping.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
#include <linux/dma-map-ops.h>
#else
#include <linux/dma-contiguous.h>
#endif
#include "nvmap_priv.h"
#include "nvmap_heap.h"
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
#include "include/linux/nvmap_exports.h"
#endif
/*
* "carveouts" are platform-defined regions of physically contiguous memory
* which are not managed by the OS. A platform may specify multiple carveouts,
* for either small special-purpose memory regions or reserved regions of main
* system memory.
*
* The carveout allocator returns allocations which are physically contiguous.
*/
static struct kmem_cache *heap_block_cache;
struct device *dma_dev_from_handle(unsigned long type)
{
int i;
struct nvmap_carveout_node *co_heap;
for (i = 0; i < nvmap_dev->nr_carveouts; i++) {
co_heap = &nvmap_dev->heaps[i];
if (!(co_heap->heap_bit & type))
continue;
return co_heap->carveout->dma_dev;
}
return ERR_PTR(-ENODEV);
}
int nvmap_query_heap_peer(struct nvmap_heap *heap, unsigned int *peer)
{
if (!heap || !heap->is_ivm)
return -EINVAL;
*peer = heap->peer;
return 0;
}
size_t nvmap_query_heap_size(struct nvmap_heap *heap)
{
if (!heap)
return 0;
return heap->len;
}
void nvmap_heap_debugfs_init(struct dentry *heap_root, struct nvmap_heap *heap)
{
if (sizeof(heap->base) == sizeof(u64))
debugfs_create_x64("base", S_IRUGO,
heap_root, (u64 *)&heap->base);
else
debugfs_create_x32("base", S_IRUGO,
heap_root, (u32 *)&heap->base);
if (sizeof(heap->len) == sizeof(u64))
debugfs_create_x64("size", S_IRUGO,
heap_root, (u64 *)&heap->len);
else
debugfs_create_x32("size", S_IRUGO,
heap_root, (u32 *)&heap->len);
if (sizeof(heap->free_size) == sizeof(u64))
debugfs_create_x64("free_size", S_IRUGO,
heap_root, (u64 *)&heap->free_size);
else
debugfs_create_x32("free_size", S_IRUGO,
heap_root, (u32 *)&heap->free_size);
}
static phys_addr_t nvmap_alloc_mem(struct nvmap_heap *h, size_t len,
phys_addr_t *start, struct nvmap_handle *handle)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)
phys_addr_t pa = DMA_ERROR_CODE;
#else
phys_addr_t pa = DMA_MAPPING_ERROR;
#endif
struct device *dev = h->dma_dev;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
void *err = NULL;
#endif
if (len > UINT_MAX) {
dev_err(dev, "%s: %d alloc size is out of range\n",
__func__, __LINE__);
return DMA_ERROR_CODE;
}
#ifdef CONFIG_TEGRA_VIRTUALIZATION
if (start && h->is_ivm) {
void *ret;
pa = h->base + (*start);
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)
ret = dma_mark_declared_memory_occupied(dev, pa, len,
DMA_ATTR_ALLOC_EXACT_SIZE);
#else
ret = nvmap_dma_mark_declared_memory_occupied(dev, pa, len);
#endif
if (IS_ERR(ret)) {
dev_err(dev, "Failed to reserve (%pa) len(%zu)\n",
&pa, len);
return DMA_ERROR_CODE;
}
} else
#endif
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 10, 0)
(void)dma_alloc_attrs(dev, len, &pa,
GFP_KERNEL, DMA_ATTR_ALLOC_EXACT_SIZE);
#else
err = nvmap_dma_alloc_attrs(dev, len, &pa,
GFP_KERNEL, DMA_ATTR_ALLOC_EXACT_SIZE);
/*
* In case of Compression carveout, try to allocate the entire granule in physically
* contiguous manner. If it returns error, then try to allocate the memory in
* granules of specified granule size.
*/
if (h->is_compression_co && IS_ERR(err)) {
err = nvmap_dma_alloc_attrs(dev, len, &pa,
GFP_KERNEL, DMA_ATTR_ALLOC_EXACT_SIZE |
DMA_ATTR_ALLOC_SINGLE_PAGES);
if (!IS_ERR_OR_NULL(err)) {
/*
* Need to keep track of pages, so that only those pages
* can be freed while freeing the buffer.
*/
handle->pgalloc.pages = (struct page **)err;
}
}
#endif
if (!dma_mapping_error(dev, pa)) {
#ifdef NVMAP_CONFIG_VPR_RESIZE
int ret;
dev_dbg(dev, "Allocated addr (%pa) len(%zu)\n",
&pa, len);
if (!dma_is_coherent_dev(dev) && h->cma_dev) {
ret = nvmap_cache_maint_phys_range(
NVMAP_CACHE_OP_WB, pa, pa + len,
true, true);
if (!ret)
return pa;
dev_err(dev, "cache WB on (%pa, %zu) failed\n",
&pa, len);
}
#endif
dev_dbg(dev, "Allocated addr (%pa) len(%zu)\n",
&pa, len);
}
}
return pa;
}
static void nvmap_free_mem(struct nvmap_heap *h, phys_addr_t base,
size_t len, struct nvmap_handle *handle)
{
struct device *dev = h->dma_dev;
dev_dbg(dev, "Free base (%pa) size (%zu)\n", &base, len);
if (len > UINT_MAX) {
dev_err(dev, "%s: %d freeing length out of range\n",
__func__, __LINE__);
return;
}
#ifdef CONFIG_TEGRA_VIRTUALIZATION
if (h->is_ivm && !h->can_alloc) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)
dma_mark_declared_memory_unoccupied(dev, base, len,
DMA_ATTR_ALLOC_EXACT_SIZE);
#else
nvmap_dma_mark_declared_memory_unoccupied(dev, base, len);
#endif
} else
#endif
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 10, 0)
dma_free_attrs(dev, len,
(void *)(uintptr_t)base,
(dma_addr_t)base, DMA_ATTR_ALLOC_EXACT_SIZE);
#else
if (h->is_compression_co && handle->pgalloc.pages) {
/* In case of pages, we need to pass pointer to array of pages */
nvmap_dma_free_attrs(dev, len,
(void *)handle->pgalloc.pages,
(dma_addr_t)base,
DMA_ATTR_ALLOC_EXACT_SIZE | DMA_ATTR_ALLOC_SINGLE_PAGES);
} else {
nvmap_dma_free_attrs(dev, len,
(void *)(uintptr_t)base,
(dma_addr_t)base,
DMA_ATTR_ALLOC_EXACT_SIZE);
}
#endif
}
}
/*
* base_max limits position of allocated chunk in memory.
* if base_max is 0 then there is no such limitation.
*/
static struct nvmap_heap_block *do_heap_alloc(struct nvmap_heap *heap,
size_t len, size_t align,
unsigned int mem_prot,
phys_addr_t base_max,
phys_addr_t *start,
struct nvmap_handle *handle)
{
struct list_block *heap_block = NULL;
dma_addr_t dev_base;
struct device *dev = heap->dma_dev;
/* since pages are only mappable with one cache attribute,
* and most allocations from carveout heaps are DMA coherent
* (i.e., non-cacheable), round cacheable allocations up to
* a page boundary to ensure that the physical pages will
* only be mapped one way. */
if (mem_prot == NVMAP_HANDLE_CACHEABLE ||
mem_prot == NVMAP_HANDLE_INNER_CACHEABLE) {
align = max_t(size_t, align, PAGE_SIZE);
len = PAGE_ALIGN(len);
}
if (heap->is_ivm)
align = max_t(size_t, align, NVMAP_IVM_ALIGNMENT);
heap_block = kmem_cache_zalloc(heap_block_cache, GFP_KERNEL);
if (!heap_block) {
dev_err(dev, "%s: failed to alloc heap block %s\n",
__func__, dev_name(dev));
goto fail_heap_block_alloc;
}
dev_base = nvmap_alloc_mem(heap, len, start, handle);
if (dma_mapping_error(dev, dev_base)) {
dev_err(dev, "failed to alloc mem of size (%zu)\n",
len);
#ifdef NVMAP_CONFIG_VPR_RESIZE
if (dma_is_coherent_dev(dev)) {
struct dma_coherent_stats stats;
dma_get_coherent_stats(dev, &stats);
dev_err(dev, "used:%zu,curr_size:%zu max:%zu\n",
stats.used, stats.size, stats.max);
}
#endif
goto fail_dma_alloc;
}
heap_block->block.base = dev_base;
heap_block->orig_addr = dev_base;
heap_block->size = len;
list_add_tail(&heap_block->all_list, &heap->all_list);
heap_block->heap = heap;
heap->free_size -= len;
heap_block->mem_prot = mem_prot;
heap_block->align = align;
return &heap_block->block;
fail_dma_alloc:
kmem_cache_free(heap_block_cache, heap_block);
fail_heap_block_alloc:
return NULL;
}
static void do_heap_free(struct nvmap_heap_block *block)
{
struct list_block *b = container_of(block, struct list_block, block);
struct nvmap_heap *heap = b->heap;
list_del(&b->all_list);
nvmap_free_mem(heap, block->base, b->size, block->handle);
heap->free_size += b->size;
kmem_cache_free(heap_block_cache, b);
}
/* nvmap_heap_alloc: allocates a block of memory of len bytes, aligned to
* align bytes. */
struct nvmap_heap_block *nvmap_heap_alloc(struct nvmap_heap *h,
struct nvmap_handle *handle,
phys_addr_t *start)
{
struct nvmap_heap_block *b;
size_t len = handle->size;
size_t align = handle->align;
unsigned int prot = handle->flags;
mutex_lock(&h->lock);
if (h->is_ivm) { /* Is IVM carveout? */
/* Check if this correct IVM heap */
if (handle->peer != h->peer) {
mutex_unlock(&h->lock);
return NULL;
} else {
if (h->can_alloc && start) {
/* If this partition does actual allocation, it
* should not specify start_offset.
*/
mutex_unlock(&h->lock);
return NULL;
} else if (!h->can_alloc && !start) {
/* If this partition does not do actual
* allocation, it should specify start_offset.
*/
mutex_unlock(&h->lock);
return NULL;
}
}
}
/*
* If this HEAP has pm_ops defined and powering on the
* RAM attached with the HEAP returns error, don't
* allocate from the heap and return NULL.
*/
if (h->pm_ops.busy) {
if (h->pm_ops.busy() < 0) {
pr_err("Unable to power on the heap device\n");
mutex_unlock(&h->lock);
return NULL;
}
}
align = max_t(size_t, align, L1_CACHE_BYTES);
b = do_heap_alloc(h, len, align, prot, 0, start, handle);
if (b) {
b->handle = handle;
handle->carveout = b;
/* Generate IVM for partition that can alloc */
if (h->is_ivm && h->can_alloc) {
unsigned int offs = (b->base - h->base);
BUG_ON(offs & (NVMAP_IVM_ALIGNMENT - 1));
BUG_ON((offs >> ffs(NVMAP_IVM_ALIGNMENT)) &
~((1 << NVMAP_IVM_OFFSET_WIDTH) - 1));
BUG_ON(h->vm_id & ~(NVMAP_IVM_IVMID_MASK));
/* So, page alignment is sufficient check.
*/
BUG_ON(len & ~(PAGE_MASK));
/* Copy offset from IVM mem pool in nvmap handle.
* The offset will be exported via ioctl.
*/
handle->offs = offs;
handle->ivm_id = ((u64)h->vm_id << NVMAP_IVM_IVMID_SHIFT);
handle->ivm_id |= (((offs >> (ffs(NVMAP_IVM_ALIGNMENT) - 1)) &
((1ULL << NVMAP_IVM_OFFSET_WIDTH) - 1)) <<
NVMAP_IVM_OFFSET_SHIFT);
handle->ivm_id |= (len >> PAGE_SHIFT);
}
}
mutex_unlock(&h->lock);
return b;
}
struct nvmap_heap *nvmap_block_to_heap(struct nvmap_heap_block *b)
{
struct list_block *lb;
lb = container_of(b, struct list_block, block);
return lb->heap;
}
/* nvmap_heap_free: frees block b*/
void nvmap_heap_free(struct nvmap_heap_block *b)
{
struct nvmap_heap *h;
struct list_block *lb;
if (!b)
return;
h = nvmap_block_to_heap(b);
mutex_lock(&h->lock);
lb = container_of(b, struct list_block, block);
if (!nvmap_dev->co_cache_flush_at_alloc) {
/*
* For carveouts, if cache flush is done at buffer allocation time
* then no need to do it during buffer release time.
*/
nvmap_flush_heap_block(NULL, b, lb->size, lb->mem_prot);
}
do_heap_free(b);
/*
* If this HEAP has pm_ops defined and powering off the
* RAM attached with the HEAP returns error, raise warning.
*/
if (h->pm_ops.idle) {
if (h->pm_ops.idle() < 0)
WARN_ON(1);
}
mutex_unlock(&h->lock);
}
/* nvmap_heap_create: create a heap object of len bytes, starting from
* address base.
*/
struct nvmap_heap *nvmap_heap_create(struct device *parent,
const struct nvmap_platform_carveout *co,
phys_addr_t base, size_t len, void *arg)
{
struct nvmap_heap *h;
h = kzalloc(sizeof(*h), GFP_KERNEL);
if (!h) {
pr_err("%s: out of memory\n", __func__);
return NULL;
}
h->dma_dev = co->dma_dev;
if (co->cma_dev) {
#ifdef CONFIG_DMA_CMA
#ifdef NVMAP_CONFIG_VPR_RESIZE
struct dma_contiguous_stats stats;
if (dma_get_contiguous_stats(co->cma_dev, &stats))
goto fail;
base = stats.base;
len = stats.size;
h->cma_dev = co->cma_dev;
#endif
#else
pr_err("invalid resize config for carveout %s\n",
co->name);
goto fail;
#endif
} else if (!co->init_done) {
int err;
/* declare Non-CMA heap */
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
err = dma_declare_coherent_memory(h->dma_dev, 0, base, len,
DMA_MEMORY_NOMAP);
#else
err = nvmap_dma_declare_coherent_memory(h->dma_dev, 0, base, len,
DMA_MEMORY_NOMAP, co->is_compression_co, co->granule_size);
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
if (!err) {
#else
if (err & DMA_MEMORY_NOMAP) {
#endif
pr_info("%s :dma coherent mem declare %pa,%zu\n",
co->name, &base, len);
} else {
pr_err("%s: dma coherent declare fail %pa,%zu\n",
co->name, &base, len);
goto fail;
}
}
dev_set_name(h->dma_dev, "%s", co->name);
dma_set_coherent_mask(h->dma_dev, DMA_BIT_MASK(64));
h->name = co->name;
h->arg = arg;
h->base = base;
h->can_alloc = !!co->can_alloc;
h->is_ivm = co->is_ivm;
h->is_compression_co = co->is_compression_co;
h->granule_size = co->granule_size;
h->len = len;
h->free_size = len;
h->peer = co->peer;
h->vm_id = co->vmid;
if (co->pm_ops.busy)
h->pm_ops.busy = co->pm_ops.busy;
if (co->pm_ops.idle)
h->pm_ops.idle = co->pm_ops.idle;
INIT_LIST_HEAD(&h->all_list);
mutex_init(&h->lock);
#ifdef NVMAP_CONFIG_DEBUG_MAPS
h->device_names = RB_ROOT;
#endif /* NVMAP_CONFIG_DEBUG_MAPS */
if (!nvmap_dev->co_cache_flush_at_alloc) {
/*
* For carveouts, if cache flush is done at buffer allocation time
* then no need to do it during carveout creation time.
*/
if (!co->no_cpu_access && co->usage_mask != NVMAP_HEAP_CARVEOUT_VPR
&& nvmap_cache_maint_phys_range(NVMAP_CACHE_OP_WB_INV,
base, base + len, true, true)) {
pr_err("cache flush failed\n");
goto fail;
}
}
wmb();
if (co->disable_dynamic_dma_map)
nvmap_dev->dynamic_dma_map_mask &= ~co->usage_mask;
if (co->no_cpu_access)
nvmap_dev->cpu_access_mask &= ~co->usage_mask;
pr_info("created heap %s base 0x%px size (%zuKiB)\n",
co->name, (void *)(uintptr_t)base, len/1024);
return h;
fail:
if (h->dma_dev->kobj.name)
kfree_const(h->dma_dev->kobj.name);
kfree(h);
return NULL;
}
/* nvmap_heap_destroy: frees all resources in heap */
void nvmap_heap_destroy(struct nvmap_heap *heap)
{
WARN_ON(!list_empty(&heap->all_list));
if (heap->dma_dev->kobj.name)
kfree_const(heap->dma_dev->kobj.name);
if (heap->is_ivm)
kfree(heap->name);
#ifdef NVMAP_LOADABLE_MODULE
nvmap_dma_release_coherent_memory((struct dma_coherent_mem_replica *)
heap->dma_dev->dma_mem);
#endif /* NVMAP_LOADABLE_MODULE */
while (!list_empty(&heap->all_list)) {
struct list_block *l;
l = list_first_entry(&heap->all_list, struct list_block,
all_list);
list_del(&l->all_list);
kmem_cache_free(heap_block_cache, l);
}
kfree(heap);
}
int nvmap_heap_init(void)
{
ulong start_time = sched_clock();
heap_block_cache = KMEM_CACHE(list_block, 0);
if (!heap_block_cache) {
pr_err("%s: unable to create heap block cache\n", __func__);
return -ENOMEM;
}
pr_info("%s: created heap block cache\n", __func__);
nvmap_init_time += sched_clock() - start_time;
return 0;
}
void nvmap_heap_deinit(void)
{
if (heap_block_cache)
kmem_cache_destroy(heap_block_cache);
heap_block_cache = NULL;
}
/*
* This routine is used to flush the carveout memory from cache.
* Why cache flush is needed for carveout? Consider the case, where a piece of
* carveout is allocated as cached and released. After this, if the same memory is
* allocated for uncached request and the memory is not flushed out from cache.
* In this case, the client might pass this to H/W engine and it could start modify
* the memory. As this was cached earlier, it might have some portion of it in cache.
* During cpu request to read/write other memory, the cached portion of this memory
* might get flushed back to main memory and would cause corruptions, if it happens
* after H/W writes data to memory.
*
* But flushing out the memory blindly on each carveout allocation is redundant.
*
* In order to optimize the carveout buffer cache flushes, the following
* strategy is used.
*
* The whole Carveout is flushed out from cache during its initialization.
* During allocation, carveout buffers are not flused from cache.
* During deallocation, carveout buffers are flushed, if they were allocated as cached.
* if they were allocated as uncached/writecombined, no cache flush is needed.
* Just draining store buffers is enough.
*/
int nvmap_flush_heap_block(struct nvmap_client *client,
struct nvmap_heap_block *block, size_t len, unsigned int prot)
{
phys_addr_t phys = block->base;
phys_addr_t end = block->base + len;
int ret = 0;
struct nvmap_handle *h;
if (prot == NVMAP_HANDLE_UNCACHEABLE || prot == NVMAP_HANDLE_WRITE_COMBINE)
goto out;
h = block->handle;
if (h->pgalloc.pages) {
unsigned long page_count, i;
u32 granule_size = 0;
struct list_block *b = container_of(block, struct list_block, block);
/*
* For Compression carveout with physically discontiguous granules,
* iterate over granules and do cache maint for it.
*/
page_count = h->size >> PAGE_SHIFT;
granule_size = b->heap->granule_size;
for (i = 0; i < page_count; i += PAGES_PER_GRANULE(granule_size)) {
phys = page_to_phys(h->pgalloc.pages[i]);
end = phys + granule_size;
ret = nvmap_cache_maint_phys_range(NVMAP_CACHE_OP_WB_INV, phys, end,
true, prot != NVMAP_HANDLE_INNER_CACHEABLE);
if (ret)
goto out;
}
} else
ret = nvmap_cache_maint_phys_range(NVMAP_CACHE_OP_WB_INV, phys, end,
true, prot != NVMAP_HANDLE_INNER_CACHEABLE);
out:
wmb();
return ret;
}