mirror of
git://nv-tegra.nvidia.com/linux-nv-oot.git
synced 2025-12-24 10:11:26 +03:00
Fix MODS_VIRTUAL_TO_PHYSICAL for allocations exceeding 4GB It's unlikely for us to support such large allocations, but if we ever did, integer overflow would truncate the allocation offset, resulting in the incorrect PA being returned. This change also brings the driver up to date with the Perforce copy and fixes the version. CID 421846 Change-Id: Ia9328dd91743631f39a16dfc3077261656384f2c Signed-off-by: Chris Dragan <kdragan@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2810261 Reviewed-by: Sachin Nikam <snikam@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2664 lines
64 KiB
C
2664 lines
64 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* This file is part of NVIDIA MODS kernel driver.
|
|
*
|
|
* Copyright (c) 2008-2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* NVIDIA MODS kernel driver is free software: you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* NVIDIA MODS kernel driver is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with NVIDIA MODS kernel driver.
|
|
* If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "mods_internal.h"
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/pagemap.h>
|
|
|
|
#if defined(MODS_HAS_SET_DMA_MASK)
|
|
#include <linux/dma-mapping.h>
|
|
#include <linux/of.h>
|
|
#endif
|
|
|
|
#ifdef CONFIG_ARM64
|
|
#include <linux/cache.h>
|
|
#endif
|
|
|
|
static struct MODS_MEM_INFO *get_mem_handle(struct mods_client *client,
|
|
u64 handle)
|
|
{
|
|
/* For now just check if we hit first or last page, i.e. if
|
|
* we have a valid pointer. In the future, add proper handle
|
|
* accounting.
|
|
*/
|
|
if (unlikely((handle + PAGE_SIZE) < (2 * PAGE_SIZE))) {
|
|
cl_error("invalid memory handle 0x%llx\n",
|
|
(unsigned long long)handle);
|
|
return NULL;
|
|
}
|
|
|
|
return (struct MODS_MEM_INFO *)(size_t)handle;
|
|
}
|
|
|
|
static bool validate_mem_handle(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info)
|
|
{
|
|
struct list_head *head = &client->mem_alloc_list;
|
|
struct list_head *iter;
|
|
|
|
if (unlikely(!p_mem_info))
|
|
return false;
|
|
|
|
list_for_each(iter, head) {
|
|
struct MODS_MEM_INFO *p_mem =
|
|
list_entry(iter, struct MODS_MEM_INFO, list);
|
|
|
|
if (p_mem == p_mem_info)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/****************************
|
|
* DMA MAP HELPER FUNCTIONS *
|
|
****************************/
|
|
|
|
/*
|
|
* Starting on Power9 systems, DMA addresses for NVLink are no longer
|
|
* the same as used over PCIE.
|
|
*
|
|
* Power9 supports a 56-bit Real Address. This address range is compressed
|
|
* when accessed over NvLink to allow the GPU to access all of memory using
|
|
* its 47-bit Physical address.
|
|
*
|
|
* If there is an NPU device present on the system, it implies that NvLink
|
|
* sysmem links are present and we need to apply the required address
|
|
* conversion for NvLink within the driver. This is intended to be temporary
|
|
* to ease the transition to kernel APIs to handle NvLink DMA mappings
|
|
* via the NPU device.
|
|
*
|
|
* Note, a deviation from the documented compression scheme is that the
|
|
* upper address bits (i.e. bit 56-63) instead of being set to zero are
|
|
* preserved during NvLink address compression so the orignal PCIE DMA
|
|
* address can be reconstructed on expansion. These bits can be safely
|
|
* ignored on NvLink since they are truncated by the GPU.
|
|
*/
|
|
#if defined(CONFIG_PPC64) && defined(CONFIG_PCI)
|
|
static dma_addr_t compress_nvlink_addr(struct pci_dev *dev, dma_addr_t addr)
|
|
{
|
|
dma_addr_t addr47 = addr;
|
|
|
|
/* Note, one key difference from the documented compression scheme
|
|
* is that BIT59 used for TCE bypass mode on PCIe is preserved during
|
|
* NVLink address compression to allow for the resulting DMA address to
|
|
* be used transparently on PCIe.
|
|
*/
|
|
if (dev && has_npu_dev(dev, 0)) {
|
|
addr47 = addr & (1LLU << 59);
|
|
addr47 |= ((addr >> 45) & 0x3) << 43;
|
|
addr47 |= ((addr >> 49) & 0x3) << 45;
|
|
addr47 |= addr & ((1LLU << 43) - 1);
|
|
}
|
|
|
|
return addr47;
|
|
}
|
|
#else
|
|
#define compress_nvlink_addr(dev, addr) (addr)
|
|
#endif
|
|
|
|
static void copy_wc_bitmap(struct MODS_MEM_INFO *p_dest_mem_info,
|
|
unsigned long first_dst_chunk,
|
|
struct MODS_MEM_INFO *p_src_mem_info,
|
|
unsigned long num_chunks)
|
|
{
|
|
unsigned long src_pos = 0;
|
|
|
|
WARN_ON(p_dest_mem_info->cache_type != p_src_mem_info->cache_type);
|
|
|
|
if (p_src_mem_info->cache_type == MODS_ALLOC_CACHED)
|
|
return;
|
|
|
|
WARN_ON(!p_dest_mem_info->wc_bitmap);
|
|
WARN_ON(!p_src_mem_info->wc_bitmap);
|
|
|
|
for (;;) {
|
|
src_pos = find_next_bit(p_src_mem_info->wc_bitmap,
|
|
num_chunks,
|
|
src_pos);
|
|
|
|
if (src_pos >= num_chunks)
|
|
break;
|
|
|
|
set_bit(src_pos + first_dst_chunk, p_dest_mem_info->wc_bitmap);
|
|
|
|
++src_pos;
|
|
}
|
|
}
|
|
|
|
static inline bool is_chunk_wc(struct MODS_MEM_INFO *p_mem_info, u32 ichunk)
|
|
{
|
|
return p_mem_info->wc_bitmap && test_bit(ichunk, p_mem_info->wc_bitmap);
|
|
}
|
|
|
|
static void mark_chunk_wc(struct MODS_MEM_INFO *p_mem_info, u32 ichunk)
|
|
{
|
|
WARN_ON(p_mem_info->cache_type == MODS_ALLOC_CACHED);
|
|
WARN_ON(!p_mem_info->wc_bitmap);
|
|
set_bit(ichunk, p_mem_info->wc_bitmap);
|
|
}
|
|
|
|
static void print_map_info(struct mods_client *client,
|
|
const char *action,
|
|
struct scatterlist *sg,
|
|
u32 nents,
|
|
struct device *dev)
|
|
{
|
|
u32 i;
|
|
|
|
for_each_sg(sg, sg, nents, i) {
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"dma %s iova=0x%llx dma_len=0x%x phys=0x%llx size=0x%x on dev %s\n",
|
|
action,
|
|
(unsigned long long)sg_dma_address(sg),
|
|
sg_dma_len(sg),
|
|
(unsigned long long)sg_phys(sg),
|
|
sg->length,
|
|
dev_name(dev));
|
|
}
|
|
}
|
|
|
|
static int map_sg(struct mods_client *client,
|
|
struct device *dev,
|
|
struct scatterlist *sg,
|
|
u32 num_chunks,
|
|
u32 num_pages)
|
|
{
|
|
const u32 max_pages = (u32)(0x100000000ULL >> PAGE_SHIFT);
|
|
|
|
if (num_pages >= max_pages)
|
|
cl_warn("requested to map %u pages in %u chunks\n",
|
|
num_pages, num_chunks);
|
|
|
|
do {
|
|
u32 chunks_to_map = num_chunks;
|
|
u32 pages_to_map = num_pages;
|
|
int mapped;
|
|
|
|
/* Some HW IOMMU drivers can coalesce multiple chunks into
|
|
* a single contiguous VA mapping, which is exposed via the
|
|
* first chunk. However, dma_length field is unsigned int
|
|
* and not able to represent mappings which exceed 4GB.
|
|
* To alleviate it, split large allocations into multiple
|
|
* mappings.
|
|
*/
|
|
if (num_pages >= max_pages) {
|
|
|
|
struct scatterlist *cur_sg;
|
|
|
|
pages_to_map = 0;
|
|
|
|
for_each_sg(sg, cur_sg, num_chunks, chunks_to_map) {
|
|
|
|
const unsigned int len = cur_sg->length;
|
|
const u32 cur_pages = len >> PAGE_SHIFT;
|
|
|
|
if ((u64)pages_to_map + cur_pages >= max_pages)
|
|
break;
|
|
|
|
pages_to_map += cur_pages;
|
|
}
|
|
}
|
|
|
|
mapped = dma_map_sg(dev, sg, (int)chunks_to_map,
|
|
DMA_BIDIRECTIONAL);
|
|
|
|
if (mapped == 0) {
|
|
cl_error(
|
|
"failed to dma map %u chunks at 0x%llx to dev %s with dma mask 0x%llx\n",
|
|
num_chunks,
|
|
(unsigned long long)sg_phys(sg),
|
|
dev_name(dev),
|
|
(unsigned long long)dma_get_mask(dev));
|
|
|
|
return -EIO;
|
|
}
|
|
|
|
sg += chunks_to_map;
|
|
num_chunks -= chunks_to_map;
|
|
num_pages -= pages_to_map;
|
|
|
|
} while (num_chunks);
|
|
|
|
return OK;
|
|
}
|
|
|
|
static void unmap_sg(struct device *dev,
|
|
struct scatterlist *sg,
|
|
u32 num_chunks)
|
|
{
|
|
do {
|
|
struct scatterlist *cur_sg;
|
|
u32 chunks_to_unmap = 0;
|
|
|
|
for_each_sg(sg, cur_sg, num_chunks, chunks_to_unmap)
|
|
if (!sg_dma_len(cur_sg))
|
|
break;
|
|
|
|
dma_unmap_sg(dev, sg, (int)chunks_to_unmap, DMA_BIDIRECTIONAL);
|
|
|
|
sg += chunks_to_unmap;
|
|
num_chunks -= chunks_to_unmap;
|
|
|
|
/* Skip chunks which don't maintain any DMA mappings.
|
|
* This can happen for large allocations with the workaround
|
|
* in map_sg().
|
|
*/
|
|
if (num_chunks) {
|
|
for_each_sg(sg, sg, num_chunks, chunks_to_unmap)
|
|
if (sg_dma_len(sg))
|
|
break;
|
|
num_chunks -= chunks_to_unmap;
|
|
}
|
|
|
|
} while (num_chunks);
|
|
}
|
|
|
|
/* Unmap and delete the specified DMA mapping */
|
|
static void dma_unmap_and_free(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info,
|
|
struct MODS_DMA_MAP *p_del_map)
|
|
|
|
{
|
|
const u32 nents = get_num_chunks(p_mem_info);
|
|
|
|
print_map_info(client, "unmap", p_del_map->sg, nents, p_del_map->dev);
|
|
|
|
unmap_sg(p_del_map->dev, p_del_map->sg, nents);
|
|
|
|
pci_dev_put(p_del_map->pcidev);
|
|
|
|
list_del(&p_del_map->list);
|
|
|
|
kfree(p_del_map);
|
|
atomic_dec(&client->num_allocs);
|
|
}
|
|
|
|
/* Unmap and delete all DMA mappings for the specified allocation */
|
|
static int dma_unmap_all(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info,
|
|
struct device *dev)
|
|
{
|
|
int err = OK;
|
|
struct list_head *head = &p_mem_info->dma_map_list;
|
|
struct list_head *iter;
|
|
struct list_head *tmp;
|
|
|
|
#ifdef CONFIG_PCI
|
|
if (sg_dma_address(p_mem_info->sg) &&
|
|
(dev == &p_mem_info->dev->dev || !dev)) {
|
|
|
|
unmap_sg(&p_mem_info->dev->dev,
|
|
p_mem_info->sg,
|
|
get_num_chunks(p_mem_info));
|
|
|
|
sg_dma_address(p_mem_info->sg) = 0;
|
|
}
|
|
#endif
|
|
|
|
list_for_each_safe(iter, tmp, head) {
|
|
struct MODS_DMA_MAP *p_dma_map;
|
|
|
|
p_dma_map = list_entry(iter, struct MODS_DMA_MAP, list);
|
|
|
|
if (!dev || (p_dma_map->dev == dev)) {
|
|
dma_unmap_and_free(client, p_mem_info, p_dma_map);
|
|
if (dev)
|
|
break;
|
|
}
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/* Create a DMA map on the specified allocation for the pci device.
|
|
* Lazy-initialize the map list structure if one does not yet exist.
|
|
*/
|
|
static int create_dma_map(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info,
|
|
struct pci_dev *pcidev,
|
|
struct device *dev)
|
|
{
|
|
struct MODS_DMA_MAP *p_dma_map;
|
|
struct scatterlist *sg;
|
|
const u32 num_chunks = get_num_chunks(p_mem_info);
|
|
size_t alloc_size;
|
|
u32 i;
|
|
int err;
|
|
|
|
alloc_size = sizeof(struct MODS_DMA_MAP) +
|
|
(num_chunks - 1) * sizeof(struct scatterlist);
|
|
|
|
p_dma_map = kzalloc(alloc_size, GFP_KERNEL | __GFP_NORETRY);
|
|
|
|
if (unlikely(!p_dma_map)) {
|
|
cl_error("failed to allocate device map data\n");
|
|
return -ENOMEM;
|
|
}
|
|
atomic_inc(&client->num_allocs);
|
|
|
|
#ifdef CONFIG_PCI
|
|
p_dma_map->pcidev = pcidev ? pci_dev_get(pcidev) : NULL;
|
|
#endif
|
|
p_dma_map->dev = dev;
|
|
|
|
sg_init_table(p_dma_map->sg, num_chunks);
|
|
|
|
for_each_sg(p_mem_info->sg, sg, num_chunks, i)
|
|
sg_set_page(&p_dma_map->sg[i], sg_page(sg), sg->length, 0);
|
|
|
|
err = map_sg(client, dev, p_dma_map->sg, num_chunks,
|
|
p_mem_info->num_pages);
|
|
|
|
print_map_info(client, "map", p_dma_map->sg, num_chunks, dev);
|
|
|
|
if (unlikely(err)) {
|
|
pci_dev_put(pcidev);
|
|
kfree(p_dma_map);
|
|
atomic_dec(&client->num_allocs);
|
|
} else {
|
|
list_add(&p_dma_map->list, &p_mem_info->dma_map_list);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
#ifdef CONFIG_PCI
|
|
/* DMA-map memory to the device for which it has been allocated, if it hasn't
|
|
* been mapped already.
|
|
*/
|
|
static int dma_map_to_default_dev(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info)
|
|
{
|
|
struct device *const dev = &p_mem_info->dev->dev;
|
|
const u32 num_chunks = get_num_chunks(p_mem_info);
|
|
int err;
|
|
|
|
if (sg_dma_address(p_mem_info->sg)) {
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"memory %p already mapped to dev %s\n",
|
|
p_mem_info,
|
|
dev_name(dev));
|
|
return OK;
|
|
}
|
|
|
|
err = map_sg(client, dev, p_mem_info->sg, num_chunks,
|
|
p_mem_info->num_pages);
|
|
|
|
print_map_info(client, "map default", p_mem_info->sg, num_chunks, dev);
|
|
|
|
return err;
|
|
}
|
|
#endif /* CONFIG_PCI */
|
|
|
|
#ifdef CONFIG_ARM64
|
|
static void clear_contiguous_cache(struct mods_client *client,
|
|
u64 virt_start,
|
|
u64 phys_start,
|
|
u32 size);
|
|
#endif
|
|
|
|
static int setup_cache_attr(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info,
|
|
u32 ichunk)
|
|
{
|
|
const bool need_wc = p_mem_info->cache_type != MODS_ALLOC_CACHED;
|
|
int err = 0;
|
|
|
|
if (need_wc && !is_chunk_wc(p_mem_info, ichunk)) {
|
|
struct scatterlist *sg = &p_mem_info->alloc_sg[ichunk];
|
|
unsigned int offs;
|
|
|
|
for (offs = 0; offs < sg->length; offs += PAGE_SIZE) {
|
|
void *ptr;
|
|
|
|
ptr = kmap(sg_page(sg) + (offs >> PAGE_SHIFT));
|
|
if (unlikely(!ptr)) {
|
|
cl_error("kmap failed\n");
|
|
return -ENOMEM;
|
|
}
|
|
#ifdef CONFIG_ARM64
|
|
clear_contiguous_cache(client,
|
|
(u64)(size_t)ptr,
|
|
sg_phys(sg) + offs,
|
|
PAGE_SIZE);
|
|
#else
|
|
if (p_mem_info->cache_type == MODS_ALLOC_WRITECOMBINE)
|
|
err = MODS_SET_MEMORY_WC((unsigned long)ptr, 1);
|
|
else
|
|
err = MODS_SET_MEMORY_UC((unsigned long)ptr, 1);
|
|
#endif
|
|
kunmap(ptr);
|
|
if (unlikely(err)) {
|
|
cl_error("set cache type failed\n");
|
|
return err;
|
|
}
|
|
|
|
/* Set this flag early, so that when an error occurs,
|
|
* release_chunks() will restore cache attributes
|
|
* for all pages. It's OK to restore cache attributes
|
|
* even for chunks where we haven't change them.
|
|
*/
|
|
mark_chunk_wc(p_mem_info, ichunk);
|
|
}
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/* Find the dma mapping chunk for the specified memory. */
|
|
static struct MODS_DMA_MAP *find_dma_map(struct MODS_MEM_INFO *p_mem_info,
|
|
struct mods_pci_dev_2 *pcidev)
|
|
{
|
|
struct MODS_DMA_MAP *p_dma_map = NULL;
|
|
struct list_head *head = &p_mem_info->dma_map_list;
|
|
struct list_head *iter;
|
|
|
|
if (!head)
|
|
return NULL;
|
|
|
|
list_for_each(iter, head) {
|
|
p_dma_map = list_entry(iter, struct MODS_DMA_MAP, list);
|
|
|
|
if (mods_is_pci_dev(p_dma_map->pcidev, pcidev))
|
|
return p_dma_map;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/* In order to map pages as UC or WC to the CPU, we need to change their
|
|
* attributes by calling set_memory_uc()/set_memory_wc(), respectively.
|
|
* On some CPUs this operation is extremely slow. In order to incur
|
|
* this penalty only once, we save pages mapped as UC or WC so that
|
|
* we can reuse them later.
|
|
*/
|
|
static void save_non_wb_chunks(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info)
|
|
{
|
|
struct scatterlist *sg = NULL;
|
|
u32 ichunk;
|
|
|
|
if (p_mem_info->cache_type == MODS_ALLOC_CACHED)
|
|
return;
|
|
|
|
if (unlikely(mutex_lock_interruptible(&client->mtx)))
|
|
return;
|
|
|
|
/* Steal the chunks from MODS_MEM_INFO and put them on free list. */
|
|
|
|
for_each_sg(p_mem_info->alloc_sg, sg, p_mem_info->num_chunks, ichunk) {
|
|
|
|
struct MODS_FREE_PHYS_CHUNK *free_chunk;
|
|
u32 order;
|
|
|
|
if (!sg)
|
|
break;
|
|
|
|
WARN_ON(!sg_page(sg));
|
|
|
|
if (!is_chunk_wc(p_mem_info, ichunk))
|
|
continue;
|
|
|
|
free_chunk = kzalloc(sizeof(struct MODS_FREE_PHYS_CHUNK),
|
|
GFP_KERNEL | __GFP_NORETRY);
|
|
|
|
if (unlikely(!free_chunk))
|
|
break;
|
|
atomic_inc(&client->num_allocs);
|
|
|
|
order = get_order(sg->length);
|
|
WARN_ON((PAGE_SIZE << order) != sg->length);
|
|
|
|
free_chunk->numa_node = p_mem_info->numa_node;
|
|
free_chunk->order = order;
|
|
free_chunk->cache_type = p_mem_info->cache_type;
|
|
free_chunk->dma32 = p_mem_info->dma32;
|
|
free_chunk->p_page = sg_page(sg);
|
|
|
|
sg_set_page(sg, NULL, 0, 0);
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"save %p 2^%u pages %s\n",
|
|
free_chunk->p_page,
|
|
order,
|
|
p_mem_info->cache_type == MODS_ALLOC_WRITECOMBINE
|
|
? "WC" : "UC");
|
|
|
|
list_add(&free_chunk->list, &client->free_mem_list);
|
|
}
|
|
|
|
mutex_unlock(&client->mtx);
|
|
}
|
|
|
|
static int restore_cache_one_chunk(struct page *p_page, u8 order)
|
|
{
|
|
int final_err = 0;
|
|
u32 num_pages = 1U << order;
|
|
u32 i;
|
|
|
|
for (i = 0; i < num_pages; i++) {
|
|
void *ptr = kmap(p_page + i);
|
|
int err = -ENOMEM;
|
|
|
|
if (likely(ptr))
|
|
err = MODS_SET_MEMORY_WB((unsigned long)ptr, 1);
|
|
|
|
kunmap(ptr);
|
|
|
|
if (likely(!final_err))
|
|
final_err = err;
|
|
}
|
|
|
|
return final_err;
|
|
}
|
|
|
|
static int release_free_chunks(struct mods_client *client)
|
|
{
|
|
struct list_head *head;
|
|
struct list_head *iter;
|
|
struct list_head *next;
|
|
unsigned long num_restored = 0;
|
|
unsigned long num_failed = 0;
|
|
unsigned long pages_freed = 0;
|
|
int final_err = 0;
|
|
|
|
mutex_lock(&client->mtx);
|
|
|
|
head = &client->free_mem_list;
|
|
|
|
list_for_each_prev_safe(iter, next, head) {
|
|
|
|
struct MODS_FREE_PHYS_CHUNK *free_chunk;
|
|
int err;
|
|
|
|
free_chunk = list_entry(iter,
|
|
struct MODS_FREE_PHYS_CHUNK,
|
|
list);
|
|
|
|
list_del(iter);
|
|
|
|
err = restore_cache_one_chunk(free_chunk->p_page,
|
|
free_chunk->order);
|
|
if (likely(!final_err))
|
|
final_err = err;
|
|
|
|
if (unlikely(err))
|
|
++num_failed;
|
|
else
|
|
++num_restored;
|
|
|
|
pages_freed += 1u << free_chunk->order;
|
|
|
|
__free_pages(free_chunk->p_page, free_chunk->order);
|
|
atomic_sub(1 << free_chunk->order, &client->num_pages);
|
|
|
|
kfree(free_chunk);
|
|
atomic_dec(&client->num_allocs);
|
|
}
|
|
|
|
mutex_unlock(&client->mtx);
|
|
|
|
if (pages_freed) {
|
|
cl_debug(DEBUG_MEM, "released %lu free WC/UC pages, restored cache on %lu free chunks\n",
|
|
pages_freed, num_restored);
|
|
if (unlikely(num_failed))
|
|
cl_error("failed to restore cache on %lu (out of %lu) free chunks\n",
|
|
num_failed, num_failed + num_restored);
|
|
}
|
|
|
|
return final_err;
|
|
}
|
|
|
|
static int restore_cache(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info)
|
|
{
|
|
struct scatterlist *sg;
|
|
unsigned int i;
|
|
int final_err = 0;
|
|
|
|
if (p_mem_info->cache_type == MODS_ALLOC_CACHED)
|
|
return 0;
|
|
|
|
for_each_sg(p_mem_info->alloc_sg, sg, p_mem_info->num_chunks, i) {
|
|
|
|
const u32 order = get_order(sg->length);
|
|
int err;
|
|
|
|
WARN_ON((PAGE_SIZE << order) != sg->length);
|
|
|
|
if (!sg_page(sg) || !is_chunk_wc(p_mem_info, i))
|
|
continue;
|
|
|
|
err = restore_cache_one_chunk(sg_page(sg), order);
|
|
if (likely(!final_err))
|
|
final_err = err;
|
|
}
|
|
|
|
if (unlikely(final_err))
|
|
cl_error("failed to restore cache attributes\n");
|
|
|
|
return final_err;
|
|
}
|
|
|
|
static void release_chunks(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info)
|
|
{
|
|
u32 i;
|
|
|
|
WARN_ON(sg_dma_address(p_mem_info->sg));
|
|
WARN_ON(!list_empty(&p_mem_info->dma_map_list));
|
|
|
|
restore_cache(client, p_mem_info);
|
|
|
|
/* release in reverse order */
|
|
for (i = p_mem_info->num_chunks; i > 0; ) {
|
|
struct scatterlist *sg;
|
|
u32 order;
|
|
|
|
--i;
|
|
sg = &p_mem_info->alloc_sg[i];
|
|
if (!sg_page(sg))
|
|
continue;
|
|
|
|
order = get_order(sg->length);
|
|
WARN_ON((PAGE_SIZE << order) != sg->length);
|
|
|
|
__free_pages(sg_page(sg), order);
|
|
atomic_sub(1u << order, &client->num_pages);
|
|
|
|
sg_set_page(sg, NULL, 0, 0);
|
|
}
|
|
}
|
|
|
|
static gfp_t get_alloc_flags(struct MODS_MEM_INFO *p_mem_info, u32 order)
|
|
{
|
|
gfp_t flags = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
|
|
|
|
if (p_mem_info->force_numa)
|
|
flags |= __GFP_THISNODE;
|
|
|
|
if (order)
|
|
flags |= __GFP_COMP;
|
|
|
|
if (p_mem_info->dma32)
|
|
#ifdef CONFIG_ZONE_DMA32
|
|
flags |= __GFP_DMA32;
|
|
#else
|
|
flags |= __GFP_DMA;
|
|
#endif
|
|
else
|
|
flags |= __GFP_HIGHMEM;
|
|
|
|
return flags;
|
|
}
|
|
|
|
static struct page *alloc_chunk(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info,
|
|
u32 order,
|
|
int *need_cup)
|
|
{
|
|
struct page *p_page = NULL;
|
|
u8 cache_type = p_mem_info->cache_type;
|
|
u8 dma32 = p_mem_info->dma32;
|
|
int numa_node = p_mem_info->numa_node;
|
|
|
|
if ((cache_type != MODS_MEMORY_CACHED) &&
|
|
likely(!mutex_lock_interruptible(&client->mtx))) {
|
|
|
|
struct list_head *iter;
|
|
struct list_head *head = &client->free_mem_list;
|
|
struct MODS_FREE_PHYS_CHUNK *free_chunk = NULL;
|
|
|
|
list_for_each(iter, head) {
|
|
free_chunk = list_entry(iter,
|
|
struct MODS_FREE_PHYS_CHUNK,
|
|
list);
|
|
|
|
if (free_chunk->cache_type == cache_type &&
|
|
free_chunk->dma32 == dma32 &&
|
|
free_chunk->numa_node == numa_node &&
|
|
free_chunk->order == order) {
|
|
|
|
list_del(iter);
|
|
break;
|
|
}
|
|
|
|
free_chunk = NULL;
|
|
}
|
|
|
|
mutex_unlock(&client->mtx);
|
|
|
|
if (free_chunk) {
|
|
p_page = free_chunk->p_page;
|
|
kfree(free_chunk);
|
|
atomic_dec(&client->num_allocs);
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED, "reuse %p 2^%u pages %s\n",
|
|
p_page, order,
|
|
cache_type == MODS_ALLOC_WRITECOMBINE
|
|
? "WC" : "UC");
|
|
|
|
*need_cup = 0;
|
|
return p_page;
|
|
}
|
|
}
|
|
|
|
p_page = alloc_pages_node(p_mem_info->numa_node,
|
|
get_alloc_flags(p_mem_info, order),
|
|
order);
|
|
|
|
*need_cup = 1;
|
|
|
|
if (likely(p_page))
|
|
atomic_add(1 << order, &client->num_pages);
|
|
|
|
return p_page;
|
|
}
|
|
|
|
static int alloc_contig_sys_pages(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info)
|
|
{
|
|
const unsigned long req_bytes = (unsigned long)p_mem_info->num_pages
|
|
<< PAGE_SHIFT;
|
|
struct page *p_page;
|
|
u64 phys_addr;
|
|
u64 end_addr = 0;
|
|
u32 order = 0;
|
|
int is_wb = 1;
|
|
int err = -ENOMEM;
|
|
|
|
LOG_ENT();
|
|
|
|
while ((1U << order) < p_mem_info->num_pages)
|
|
order++;
|
|
|
|
p_page = alloc_chunk(client, p_mem_info, order, &is_wb);
|
|
|
|
if (unlikely(!p_page))
|
|
goto failed;
|
|
|
|
p_mem_info->num_pages = 1U << order;
|
|
|
|
sg_set_page(p_mem_info->alloc_sg, p_page, PAGE_SIZE << order, 0);
|
|
|
|
if (!is_wb)
|
|
mark_chunk_wc(p_mem_info, 0);
|
|
|
|
phys_addr = sg_phys(p_mem_info->alloc_sg);
|
|
if (unlikely(phys_addr == 0)) {
|
|
cl_error("failed to determine physical address\n");
|
|
goto failed;
|
|
}
|
|
|
|
cl_debug(DEBUG_MEM,
|
|
"alloc contig 0x%lx bytes, 2^%u pages, %s, node %d,%s phys 0x%llx\n",
|
|
req_bytes,
|
|
order,
|
|
mods_get_prot_str(p_mem_info->cache_type),
|
|
p_mem_info->numa_node,
|
|
p_mem_info->dma32 ? " dma32," : "",
|
|
(unsigned long long)phys_addr);
|
|
|
|
end_addr = phys_addr +
|
|
((unsigned long)p_mem_info->num_pages << PAGE_SHIFT);
|
|
if (unlikely(p_mem_info->dma32 && (end_addr > 0x100000000ULL))) {
|
|
cl_error("allocation exceeds 32-bit addressing\n");
|
|
goto failed;
|
|
}
|
|
|
|
err = setup_cache_attr(client, p_mem_info, 0);
|
|
|
|
failed:
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
static u32 get_max_order_needed(u32 num_pages)
|
|
{
|
|
const u32 order = min(10, get_order(num_pages << PAGE_SHIFT));
|
|
|
|
return ((1u << order) <= num_pages) ? order : (order >> 1u);
|
|
}
|
|
|
|
static int alloc_noncontig_sys_pages(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info)
|
|
{
|
|
const unsigned long req_bytes = (unsigned long)p_mem_info->num_pages
|
|
<< PAGE_SHIFT;
|
|
u32 pages_needed = p_mem_info->num_pages;
|
|
u32 num_chunks = 0;
|
|
int err;
|
|
|
|
LOG_ENT();
|
|
|
|
p_mem_info->num_pages = 0;
|
|
|
|
for (; pages_needed > 0; ++num_chunks) {
|
|
struct scatterlist *sg = &p_mem_info->alloc_sg[num_chunks];
|
|
u64 phys_addr = 0;
|
|
u32 order = get_max_order_needed(pages_needed);
|
|
u32 allocated_pages = 0;
|
|
int is_wb = 1;
|
|
|
|
/* Fail if memory fragmentation is very high */
|
|
if (unlikely(num_chunks >= p_mem_info->num_chunks)) {
|
|
cl_error("detected high memory fragmentation\n");
|
|
err = -ENOMEM;
|
|
goto failed;
|
|
}
|
|
|
|
for (;;) {
|
|
struct page *p_page = alloc_chunk(client,
|
|
p_mem_info,
|
|
order,
|
|
&is_wb);
|
|
if (p_page) {
|
|
sg_set_page(sg, p_page, PAGE_SIZE << order, 0);
|
|
allocated_pages = 1u << order;
|
|
break;
|
|
}
|
|
if (order == 0)
|
|
break;
|
|
--order;
|
|
}
|
|
|
|
if (unlikely(!allocated_pages)) {
|
|
cl_error("out of memory\n");
|
|
err = -ENOMEM;
|
|
goto failed;
|
|
}
|
|
|
|
if (!is_wb)
|
|
mark_chunk_wc(p_mem_info, num_chunks);
|
|
|
|
pages_needed -= min(allocated_pages, pages_needed);
|
|
p_mem_info->num_pages += allocated_pages;
|
|
|
|
phys_addr = sg_phys(sg);
|
|
if (unlikely(phys_addr == 0)) {
|
|
cl_error("phys addr lookup failed\n");
|
|
err = -ENOMEM;
|
|
goto failed;
|
|
}
|
|
|
|
cl_debug(DEBUG_MEM,
|
|
"alloc 0x%lx bytes [%u], 2^%u pages, %s, node %d,%s phys 0x%llx\n",
|
|
req_bytes,
|
|
(unsigned int)num_chunks,
|
|
order,
|
|
mods_get_prot_str(p_mem_info->cache_type),
|
|
p_mem_info->numa_node,
|
|
p_mem_info->dma32 ? " dma32," : "",
|
|
(unsigned long long)phys_addr);
|
|
|
|
err = setup_cache_attr(client, p_mem_info, num_chunks);
|
|
if (unlikely(err))
|
|
goto failed;
|
|
}
|
|
|
|
err = 0;
|
|
|
|
failed:
|
|
if (num_chunks)
|
|
sg_mark_end(&p_mem_info->alloc_sg[num_chunks - 1]);
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
static int register_alloc(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info)
|
|
{
|
|
const int err = mutex_lock_interruptible(&client->mtx);
|
|
|
|
if (likely(!err)) {
|
|
|
|
list_add(&p_mem_info->list, &client->mem_alloc_list);
|
|
|
|
mutex_unlock(&client->mtx);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static int unregister_and_free_alloc(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_del_mem)
|
|
{
|
|
struct MODS_MEM_INFO *p_mem_info = NULL;
|
|
struct list_head *head;
|
|
struct list_head *iter;
|
|
int err;
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED, "free %p\n", p_del_mem);
|
|
|
|
mutex_lock(&client->mtx);
|
|
|
|
head = &client->mem_alloc_list;
|
|
|
|
list_for_each(iter, head) {
|
|
p_mem_info = list_entry(iter, struct MODS_MEM_INFO, list);
|
|
|
|
if (p_del_mem == p_mem_info) {
|
|
list_del(iter);
|
|
break;
|
|
}
|
|
|
|
p_mem_info = NULL;
|
|
}
|
|
|
|
mutex_unlock(&client->mtx);
|
|
|
|
if (likely(p_mem_info)) {
|
|
dma_unmap_all(client, p_mem_info, NULL);
|
|
save_non_wb_chunks(client, p_mem_info);
|
|
release_chunks(client, p_mem_info);
|
|
|
|
pci_dev_put(p_mem_info->dev);
|
|
|
|
kfree(p_mem_info);
|
|
atomic_dec(&client->num_allocs);
|
|
|
|
err = OK;
|
|
} else {
|
|
cl_error("failed to unregister allocation %p\n", p_del_mem);
|
|
err = -EINVAL;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
int mods_unregister_all_alloc(struct mods_client *client)
|
|
{
|
|
int final_err = OK;
|
|
int err;
|
|
struct list_head *head = &client->mem_alloc_list;
|
|
struct list_head *iter;
|
|
struct list_head *tmp;
|
|
|
|
list_for_each_safe(iter, tmp, head) {
|
|
|
|
struct MODS_MEM_INFO *p_mem_info;
|
|
|
|
p_mem_info = list_entry(iter, struct MODS_MEM_INFO, list);
|
|
err = unregister_and_free_alloc(client, p_mem_info);
|
|
if (likely(!final_err))
|
|
final_err = err;
|
|
}
|
|
|
|
err = release_free_chunks(client);
|
|
if (likely(!final_err))
|
|
final_err = err;
|
|
|
|
return final_err;
|
|
}
|
|
|
|
static int get_addr_range(struct mods_client *client,
|
|
struct MODS_GET_PHYSICAL_ADDRESS_3 *p,
|
|
struct mods_pci_dev_2 *pcidev)
|
|
{
|
|
struct scatterlist *sg;
|
|
struct MODS_MEM_INFO *p_mem_info;
|
|
struct MODS_DMA_MAP *p_dma_map = NULL;
|
|
u64 offs;
|
|
u32 num_chunks;
|
|
u32 ichunk;
|
|
int err = OK;
|
|
|
|
LOG_ENT();
|
|
|
|
p->physical_address = 0;
|
|
|
|
p_mem_info = get_mem_handle(client, p->memory_handle);
|
|
if (unlikely(!p_mem_info)) {
|
|
LOG_EXT();
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (unlikely(pcidev && (pcidev->bus > 0xFFU ||
|
|
pcidev->device > 0xFFU))) {
|
|
cl_error("dev %04x:%02x:%02x.%x not found\n",
|
|
pcidev->domain,
|
|
pcidev->bus,
|
|
pcidev->device,
|
|
pcidev->function);
|
|
LOG_EXT();
|
|
return -EINVAL;
|
|
}
|
|
|
|
sg = p_mem_info->sg;
|
|
num_chunks = get_num_chunks(p_mem_info);
|
|
|
|
err = mutex_lock_interruptible(&client->mtx);
|
|
if (err) {
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
/* If pcidev was specified, retrieve IOVA,
|
|
* otherwise retrieve physical address.
|
|
*/
|
|
if (pcidev) {
|
|
if (mods_is_pci_dev(p_mem_info->dev, pcidev)) {
|
|
if (!sg_dma_address(sg))
|
|
err = -EINVAL;
|
|
} else {
|
|
p_dma_map = find_dma_map(p_mem_info, pcidev);
|
|
if (!p_dma_map)
|
|
err = -EINVAL;
|
|
else
|
|
sg = p_dma_map->sg;
|
|
}
|
|
|
|
if (err) {
|
|
mutex_unlock(&client->mtx);
|
|
|
|
cl_error(
|
|
"allocation %p is not mapped to dev %04x:%02x:%02x.%x\n",
|
|
p_mem_info,
|
|
pcidev->domain,
|
|
pcidev->bus,
|
|
pcidev->device,
|
|
pcidev->function);
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
}
|
|
|
|
offs = p->offset;
|
|
err = -EINVAL;
|
|
|
|
for_each_sg(sg, sg, num_chunks, ichunk) {
|
|
unsigned int size;
|
|
|
|
if (!sg)
|
|
break;
|
|
|
|
size = pcidev ? sg_dma_len(sg) : sg->length;
|
|
if (size <= offs) {
|
|
offs -= size;
|
|
continue;
|
|
}
|
|
|
|
if (pcidev) {
|
|
dma_addr_t addr = sg_dma_address(sg) + offs;
|
|
|
|
addr = compress_nvlink_addr(p_mem_info->dev, addr);
|
|
|
|
p->physical_address = (u64)addr;
|
|
} else {
|
|
p->physical_address = (u64)sg_phys(sg) + offs;
|
|
}
|
|
|
|
err = OK;
|
|
break;
|
|
}
|
|
|
|
mutex_unlock(&client->mtx);
|
|
|
|
if (err && pcidev) {
|
|
cl_error(
|
|
"invalid offset 0x%llx requested for va on dev %04x:%02x:%02x.%x in allocation %p of size 0x%llx\n",
|
|
(unsigned long long)p->offset,
|
|
pcidev->domain,
|
|
pcidev->bus,
|
|
pcidev->device,
|
|
pcidev->function,
|
|
p_mem_info,
|
|
(unsigned long long)p_mem_info->num_pages << PAGE_SHIFT);
|
|
} else if (err && !pcidev) {
|
|
cl_error(
|
|
"invalid offset 0x%llx requested for pa in allocation %p of size 0x%llx\n",
|
|
(unsigned long long)p->offset,
|
|
p_mem_info,
|
|
(unsigned long long)p_mem_info->num_pages << PAGE_SHIFT);
|
|
}
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
/* Returns an offset within an allocation deduced from physical address.
|
|
* If physical address doesn't belong to the allocation, returns non-zero.
|
|
*/
|
|
static int get_alloc_offset(struct MODS_MEM_INFO *p_mem_info,
|
|
u64 phys_addr,
|
|
u64 *ret_offs)
|
|
{
|
|
struct scatterlist *sg;
|
|
u64 offset = 0;
|
|
const u32 num_chunks = get_num_chunks(p_mem_info);
|
|
u32 ichunk;
|
|
|
|
for_each_sg(p_mem_info->sg, sg, num_chunks, ichunk) {
|
|
dma_addr_t addr;
|
|
unsigned int size;
|
|
|
|
addr = sg_phys(sg);
|
|
size = sg->length;
|
|
|
|
if (phys_addr >= addr && phys_addr < addr + size) {
|
|
*ret_offs = phys_addr - addr + offset;
|
|
return 0;
|
|
}
|
|
|
|
offset += size;
|
|
}
|
|
|
|
/* The physical address doesn't belong to the allocation */
|
|
return -EINVAL;
|
|
}
|
|
|
|
struct MODS_MEM_INFO *mods_find_alloc(struct mods_client *client, u64 phys_addr)
|
|
{
|
|
struct list_head *plist_head = &client->mem_alloc_list;
|
|
struct list_head *plist_iter;
|
|
struct MODS_MEM_INFO *p_mem_info;
|
|
u64 offset;
|
|
|
|
list_for_each(plist_iter, plist_head) {
|
|
p_mem_info = list_entry(plist_iter,
|
|
struct MODS_MEM_INFO,
|
|
list);
|
|
|
|
if (!get_alloc_offset(p_mem_info, phys_addr, &offset))
|
|
return p_mem_info;
|
|
}
|
|
|
|
/* The physical address doesn't belong to any allocation */
|
|
return NULL;
|
|
}
|
|
|
|
/* Estimate the initial number of chunks supported, assuming medium memory
|
|
* fragmentation.
|
|
*/
|
|
static u32 estimate_num_chunks(u32 num_pages)
|
|
{
|
|
u32 num_chunks = 0;
|
|
u32 bit_scan;
|
|
|
|
/* Count each contiguous block <=256KB */
|
|
for (bit_scan = num_pages; bit_scan && num_chunks < 6; bit_scan >>= 1)
|
|
++num_chunks;
|
|
|
|
/* Count remaining contiguous blocks >256KB */
|
|
num_chunks += bit_scan;
|
|
|
|
/* 4x slack for medium memory fragmentation, except huge allocs */
|
|
if (num_chunks < 32 * 1024)
|
|
num_chunks <<= 2;
|
|
else if (num_chunks < 64 * 1024)
|
|
num_chunks <<= 1;
|
|
|
|
/* No sense to allocate more chunks than pages */
|
|
if (num_chunks > num_pages)
|
|
num_chunks = num_pages;
|
|
|
|
return num_chunks;
|
|
}
|
|
|
|
static inline size_t calc_mem_info_size_no_bitmap(u32 num_chunks)
|
|
{
|
|
return sizeof(struct MODS_MEM_INFO) +
|
|
(num_chunks - 1) * sizeof(struct scatterlist);
|
|
}
|
|
|
|
static inline u32 calc_mem_info_size(u32 num_chunks, u8 cache_type)
|
|
{
|
|
size_t size = calc_mem_info_size_no_bitmap(num_chunks);
|
|
|
|
if (cache_type != MODS_ALLOC_CACHED)
|
|
size += sizeof(long) * BITS_TO_LONGS(num_chunks);
|
|
|
|
return (u32)size;
|
|
}
|
|
|
|
static void init_mem_info(struct MODS_MEM_INFO *p_mem_info,
|
|
u32 num_chunks,
|
|
u8 cache_type)
|
|
{
|
|
p_mem_info->sg = p_mem_info->alloc_sg;
|
|
p_mem_info->num_chunks = num_chunks;
|
|
p_mem_info->cache_type = cache_type;
|
|
|
|
if (cache_type != MODS_ALLOC_CACHED)
|
|
p_mem_info->wc_bitmap = (unsigned long *)
|
|
&p_mem_info->alloc_sg[num_chunks];
|
|
|
|
INIT_LIST_HEAD(&p_mem_info->dma_map_list);
|
|
}
|
|
|
|
static struct MODS_MEM_INFO *alloc_mem_info(struct mods_client *client,
|
|
u32 num_chunks,
|
|
u8 cache_type,
|
|
u32 *alloc_size)
|
|
{
|
|
struct MODS_MEM_INFO *p_mem_info = NULL;
|
|
|
|
const u32 calc_size = calc_mem_info_size(num_chunks, cache_type);
|
|
|
|
*alloc_size = calc_size;
|
|
|
|
p_mem_info = kzalloc(calc_size, GFP_KERNEL | __GFP_NORETRY);
|
|
|
|
if (likely(p_mem_info)) {
|
|
atomic_inc(&client->num_allocs);
|
|
|
|
sg_init_table(&p_mem_info->contig_sg, 1);
|
|
sg_init_table(p_mem_info->alloc_sg, num_chunks);
|
|
}
|
|
|
|
return p_mem_info;
|
|
}
|
|
|
|
/* For large non-contiguous allocations, we typically use significantly less
|
|
* chunks than originally estimated. This function reallocates the
|
|
* MODS_MEM_INFO struct so that it uses only as much memory as it needs.
|
|
*/
|
|
static struct MODS_MEM_INFO *optimize_chunks(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info)
|
|
{
|
|
struct scatterlist *sg;
|
|
struct MODS_MEM_INFO *p_new_mem_info = NULL;
|
|
u32 num_chunks = 0;
|
|
u32 alloc_size = 0;
|
|
|
|
for_each_sg(p_mem_info->alloc_sg, sg,
|
|
p_mem_info->num_chunks, num_chunks) {
|
|
if (!sg || !sg_page(sg))
|
|
break;
|
|
}
|
|
|
|
if (num_chunks < p_mem_info->num_chunks)
|
|
p_new_mem_info = alloc_mem_info(client, num_chunks,
|
|
p_mem_info->cache_type,
|
|
&alloc_size);
|
|
|
|
if (p_new_mem_info) {
|
|
const size_t copy_size =
|
|
calc_mem_info_size_no_bitmap(num_chunks);
|
|
|
|
memcpy(p_new_mem_info, p_mem_info, copy_size);
|
|
init_mem_info(p_new_mem_info, num_chunks,
|
|
p_mem_info->cache_type);
|
|
copy_wc_bitmap(p_new_mem_info, 0, p_mem_info, num_chunks);
|
|
|
|
kfree(p_mem_info);
|
|
atomic_dec(&client->num_allocs);
|
|
|
|
p_mem_info = p_new_mem_info;
|
|
}
|
|
|
|
return p_mem_info;
|
|
}
|
|
|
|
/************************
|
|
* ESCAPE CALL FUNCTONS *
|
|
************************/
|
|
|
|
int esc_mods_alloc_pages_2(struct mods_client *client,
|
|
struct MODS_ALLOC_PAGES_2 *p)
|
|
{
|
|
struct MODS_MEM_INFO *p_mem_info = NULL;
|
|
u32 num_pages;
|
|
u32 alloc_size;
|
|
u32 num_chunks;
|
|
int err = -EINVAL;
|
|
u8 cache_type;
|
|
|
|
LOG_ENT();
|
|
|
|
p->memory_handle = 0;
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"alloc 0x%llx bytes flags=0x%x (%s %s%s%s%s%s) node=%d on dev %04x:%02x:%02x.%x\n",
|
|
(unsigned long long)p->num_bytes,
|
|
p->flags,
|
|
mods_get_prot_str(p->flags & MODS_ALLOC_CACHE_MASK),
|
|
(p->flags & MODS_ALLOC_CONTIGUOUS) ? "contiguous" :
|
|
"noncontiguous",
|
|
(p->flags & MODS_ALLOC_DMA32) ? " dma32" : "",
|
|
(p->flags & MODS_ALLOC_USE_NUMA) ? " usenuma" : "",
|
|
(p->flags & MODS_ALLOC_FORCE_NUMA) ? " forcenuma" : "",
|
|
(p->flags & MODS_ALLOC_MAP_DEV) ? " dmamap" : "",
|
|
p->numa_node,
|
|
p->pci_device.domain,
|
|
p->pci_device.bus,
|
|
p->pci_device.device,
|
|
p->pci_device.function);
|
|
|
|
if (unlikely(!p->num_bytes)) {
|
|
cl_error("zero bytes requested\n");
|
|
goto failed;
|
|
}
|
|
|
|
num_pages = (u32)((p->num_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT);
|
|
if (p->flags & MODS_ALLOC_CONTIGUOUS)
|
|
num_chunks = 1;
|
|
else
|
|
num_chunks = estimate_num_chunks(num_pages);
|
|
|
|
if (unlikely(((u64)num_pages << PAGE_SHIFT) < p->num_bytes)) {
|
|
cl_error("invalid allocation size requested: 0x%llx\n",
|
|
(unsigned long long)p->num_bytes);
|
|
goto failed;
|
|
}
|
|
|
|
if (unlikely((p->flags & MODS_ALLOC_USE_NUMA) &&
|
|
(p->numa_node != MODS_ANY_NUMA_NODE) &&
|
|
((unsigned int)p->numa_node >=
|
|
(unsigned int)num_possible_nodes()))) {
|
|
|
|
cl_error("invalid NUMA node: %d\n", p->numa_node);
|
|
goto failed;
|
|
}
|
|
|
|
#ifdef CONFIG_PPC64
|
|
if (unlikely((p->flags & MODS_ALLOC_CACHE_MASK) != MODS_ALLOC_CACHED)) {
|
|
cl_error("unsupported cache attr %u (%s)\n",
|
|
p->flags & MODS_ALLOC_CACHE_MASK,
|
|
mods_get_prot_str(p->flags & MODS_ALLOC_CACHE_MASK));
|
|
err = -ENOMEM;
|
|
goto failed;
|
|
}
|
|
#endif
|
|
|
|
cache_type = (u8)(p->flags & MODS_ALLOC_CACHE_MASK);
|
|
|
|
p_mem_info = alloc_mem_info(client, num_chunks, cache_type,
|
|
&alloc_size);
|
|
|
|
if (unlikely(!p_mem_info)) {
|
|
cl_error("failed to allocate auxiliary 0x%x bytes for %u chunks to hold %u pages\n",
|
|
alloc_size, num_chunks, num_pages);
|
|
err = -ENOMEM;
|
|
goto failed;
|
|
}
|
|
|
|
init_mem_info(p_mem_info, num_chunks, cache_type);
|
|
|
|
p_mem_info->num_pages = num_pages;
|
|
p_mem_info->dma32 = (p->flags & MODS_ALLOC_DMA32) ? true : false;
|
|
p_mem_info->force_numa = (p->flags & MODS_ALLOC_FORCE_NUMA)
|
|
? true : false;
|
|
#ifdef MODS_HASNT_NUMA_NO_NODE
|
|
p_mem_info->numa_node = numa_node_id();
|
|
#else
|
|
p_mem_info->numa_node = NUMA_NO_NODE;
|
|
#endif
|
|
p_mem_info->dev = NULL;
|
|
|
|
if ((p->flags & MODS_ALLOC_USE_NUMA) &&
|
|
p->numa_node != MODS_ANY_NUMA_NODE)
|
|
p_mem_info->numa_node = p->numa_node;
|
|
|
|
#ifdef CONFIG_PCI
|
|
if (!(p->flags & MODS_ALLOC_USE_NUMA) ||
|
|
(p->flags & MODS_ALLOC_MAP_DEV)) {
|
|
|
|
struct pci_dev *dev = NULL;
|
|
|
|
err = mods_find_pci_dev(client, &p->pci_device, &dev);
|
|
if (unlikely(err)) {
|
|
cl_error("dev %04x:%02x:%02x.%x not found\n",
|
|
p->pci_device.domain,
|
|
p->pci_device.bus,
|
|
p->pci_device.device,
|
|
p->pci_device.function);
|
|
goto failed;
|
|
}
|
|
|
|
p_mem_info->dev = dev;
|
|
if (!(p->flags & MODS_ALLOC_USE_NUMA))
|
|
p_mem_info->numa_node = dev_to_node(&dev->dev);
|
|
|
|
#ifdef CONFIG_PPC64
|
|
if (!mods_is_nvlink_sysmem_trained(client, dev)) {
|
|
/* Until NvLink is trained, we must use memory
|
|
* on node 0.
|
|
*/
|
|
if (has_npu_dev(dev, 0))
|
|
p_mem_info->numa_node = 0;
|
|
}
|
|
#endif
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"affinity dev %04x:%02x:%02x.%x node %d\n",
|
|
p->pci_device.domain,
|
|
p->pci_device.bus,
|
|
p->pci_device.device,
|
|
p->pci_device.function,
|
|
p_mem_info->numa_node);
|
|
}
|
|
#endif
|
|
|
|
if (p->flags & MODS_ALLOC_CONTIGUOUS)
|
|
err = alloc_contig_sys_pages(client, p_mem_info);
|
|
else {
|
|
err = alloc_noncontig_sys_pages(client, p_mem_info);
|
|
|
|
if (likely(!err))
|
|
p_mem_info = optimize_chunks(client, p_mem_info);
|
|
}
|
|
|
|
if (unlikely(err)) {
|
|
cl_error("failed to alloc 0x%lx %s bytes, %s, node %d%s\n",
|
|
((unsigned long)num_pages) << PAGE_SHIFT,
|
|
(p->flags & MODS_ALLOC_CONTIGUOUS) ? "contiguous" :
|
|
"non-contiguous",
|
|
mods_get_prot_str(p_mem_info->cache_type),
|
|
p_mem_info->numa_node,
|
|
p_mem_info->dma32 ? ", dma32" : "");
|
|
goto failed;
|
|
}
|
|
|
|
err = register_alloc(client, p_mem_info);
|
|
if (unlikely(err))
|
|
goto failed;
|
|
|
|
p->memory_handle = (u64)(size_t)p_mem_info;
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED, "alloc %p: %u chunks, %u pages\n",
|
|
p_mem_info, p_mem_info->num_chunks, p_mem_info->num_pages);
|
|
|
|
failed:
|
|
if (unlikely(err && p_mem_info)) {
|
|
dma_unmap_all(client, p_mem_info, NULL);
|
|
release_chunks(client, p_mem_info);
|
|
pci_dev_put(p_mem_info->dev);
|
|
|
|
kfree(p_mem_info);
|
|
atomic_dec(&client->num_allocs);
|
|
}
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
int esc_mods_device_alloc_pages_2(struct mods_client *client,
|
|
struct MODS_DEVICE_ALLOC_PAGES_2 *p)
|
|
{
|
|
int err;
|
|
u32 flags = 0;
|
|
struct MODS_ALLOC_PAGES_2 dev_alloc_pages = {0};
|
|
|
|
LOG_ENT();
|
|
|
|
if (p->contiguous)
|
|
flags |= MODS_ALLOC_CONTIGUOUS;
|
|
|
|
if (p->address_bits == 32)
|
|
flags |= MODS_ALLOC_DMA32;
|
|
|
|
if (p->attrib == MODS_MEMORY_UNCACHED)
|
|
flags |= MODS_ALLOC_UNCACHED;
|
|
else if (p->attrib == MODS_MEMORY_WRITECOMBINE)
|
|
flags |= MODS_ALLOC_WRITECOMBINE;
|
|
else if (unlikely(p->attrib != MODS_MEMORY_CACHED)) {
|
|
cl_error("invalid cache attrib: %u\n", p->attrib);
|
|
LOG_EXT();
|
|
return -ENOMEM;
|
|
}
|
|
|
|
if (p->pci_device.bus > 0xFFU || p->pci_device.device > 0xFFU)
|
|
flags |= MODS_ALLOC_USE_NUMA;
|
|
else
|
|
flags |= MODS_ALLOC_MAP_DEV | MODS_ALLOC_FORCE_NUMA;
|
|
|
|
dev_alloc_pages.num_bytes = p->num_bytes;
|
|
dev_alloc_pages.flags = flags;
|
|
dev_alloc_pages.numa_node = MODS_ANY_NUMA_NODE;
|
|
dev_alloc_pages.pci_device = p->pci_device;
|
|
|
|
err = esc_mods_alloc_pages_2(client, &dev_alloc_pages);
|
|
if (likely(!err))
|
|
p->memory_handle = dev_alloc_pages.memory_handle;
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
int esc_mods_device_alloc_pages(struct mods_client *client,
|
|
struct MODS_DEVICE_ALLOC_PAGES *p)
|
|
{
|
|
int err;
|
|
u32 flags = 0;
|
|
struct MODS_ALLOC_PAGES_2 dev_alloc_pages = {0};
|
|
|
|
LOG_ENT();
|
|
|
|
if (p->contiguous)
|
|
flags |= MODS_ALLOC_CONTIGUOUS;
|
|
|
|
if (p->address_bits == 32)
|
|
flags |= MODS_ALLOC_DMA32;
|
|
|
|
if (p->attrib == MODS_MEMORY_UNCACHED)
|
|
flags |= MODS_ALLOC_UNCACHED;
|
|
else if (p->attrib == MODS_MEMORY_WRITECOMBINE)
|
|
flags |= MODS_ALLOC_WRITECOMBINE;
|
|
else if (unlikely(p->attrib != MODS_MEMORY_CACHED)) {
|
|
cl_error("invalid cache attrib: %u\n", p->attrib);
|
|
LOG_EXT();
|
|
return -ENOMEM;
|
|
}
|
|
|
|
if (p->pci_device.bus > 0xFFU || p->pci_device.device > 0xFFU)
|
|
flags |= MODS_ALLOC_USE_NUMA;
|
|
else
|
|
flags |= MODS_ALLOC_MAP_DEV | MODS_ALLOC_FORCE_NUMA;
|
|
|
|
dev_alloc_pages.num_bytes = p->num_bytes;
|
|
dev_alloc_pages.flags = flags;
|
|
dev_alloc_pages.numa_node = MODS_ANY_NUMA_NODE;
|
|
dev_alloc_pages.pci_device.domain = 0;
|
|
dev_alloc_pages.pci_device.bus = p->pci_device.bus;
|
|
dev_alloc_pages.pci_device.device = p->pci_device.device;
|
|
dev_alloc_pages.pci_device.function = p->pci_device.function;
|
|
|
|
err = esc_mods_alloc_pages_2(client, &dev_alloc_pages);
|
|
if (likely(!err))
|
|
p->memory_handle = dev_alloc_pages.memory_handle;
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
int esc_mods_alloc_pages(struct mods_client *client, struct MODS_ALLOC_PAGES *p)
|
|
{
|
|
int err;
|
|
u32 flags = MODS_ALLOC_USE_NUMA;
|
|
struct MODS_ALLOC_PAGES_2 dev_alloc_pages = {0};
|
|
|
|
LOG_ENT();
|
|
|
|
if (p->contiguous)
|
|
flags |= MODS_ALLOC_CONTIGUOUS;
|
|
|
|
if (p->address_bits == 32)
|
|
flags |= MODS_ALLOC_DMA32;
|
|
|
|
if (p->attrib == MODS_MEMORY_UNCACHED)
|
|
flags |= MODS_ALLOC_UNCACHED;
|
|
else if (p->attrib == MODS_MEMORY_WRITECOMBINE)
|
|
flags |= MODS_ALLOC_WRITECOMBINE;
|
|
else if (unlikely(p->attrib != MODS_MEMORY_CACHED)) {
|
|
cl_error("invalid cache attrib: %u\n", p->attrib);
|
|
LOG_EXT();
|
|
return -ENOMEM;
|
|
}
|
|
|
|
dev_alloc_pages.num_bytes = p->num_bytes;
|
|
dev_alloc_pages.flags = flags;
|
|
dev_alloc_pages.numa_node = MODS_ANY_NUMA_NODE;
|
|
dev_alloc_pages.pci_device.domain = 0xFFFFU;
|
|
dev_alloc_pages.pci_device.bus = 0xFFFFU;
|
|
dev_alloc_pages.pci_device.device = 0xFFFFU;
|
|
dev_alloc_pages.pci_device.function = 0xFFFFU;
|
|
|
|
err = esc_mods_alloc_pages_2(client, &dev_alloc_pages);
|
|
if (likely(!err))
|
|
p->memory_handle = dev_alloc_pages.memory_handle;
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
int esc_mods_free_pages(struct mods_client *client, struct MODS_FREE_PAGES *p)
|
|
{
|
|
struct MODS_MEM_INFO *p_mem_info;
|
|
int err = -EINVAL;
|
|
|
|
LOG_ENT();
|
|
|
|
p_mem_info = get_mem_handle(client, p->memory_handle);
|
|
|
|
if (likely(p_mem_info))
|
|
err = unregister_and_free_alloc(client, p_mem_info);
|
|
|
|
LOG_EXT();
|
|
|
|
return err;
|
|
}
|
|
|
|
static phys_addr_t get_contig_pa(struct mods_client *client,
|
|
struct MODS_MEM_INFO *p_mem_info)
|
|
{
|
|
struct scatterlist *sg;
|
|
struct scatterlist *prev_sg = NULL;
|
|
u32 i;
|
|
bool contig = true;
|
|
|
|
for_each_sg(p_mem_info->alloc_sg, sg, p_mem_info->num_chunks, i) {
|
|
if ((i > 0) &&
|
|
(sg_phys(prev_sg) + prev_sg->length != sg_phys(sg))) {
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"merge is non-contiguous because alloc %p chunk %u pa 0x%llx size 0x%x and chunk %u pa 0x%llx\n",
|
|
p_mem_info,
|
|
i - 1,
|
|
(unsigned long long)sg_phys(prev_sg),
|
|
prev_sg->length,
|
|
i,
|
|
(unsigned long long)sg_phys(sg));
|
|
contig = false;
|
|
break;
|
|
}
|
|
|
|
prev_sg = sg;
|
|
}
|
|
|
|
return contig ? sg_phys(p_mem_info->alloc_sg) : 0;
|
|
}
|
|
|
|
int esc_mods_merge_pages(struct mods_client *client,
|
|
struct MODS_MERGE_PAGES *p)
|
|
{
|
|
struct MODS_MEM_INFO *p_mem_info;
|
|
int err = OK;
|
|
u32 num_chunks = 0;
|
|
u32 alloc_size = 0;
|
|
unsigned int i;
|
|
bool contig = true;
|
|
u32 cache_type;
|
|
|
|
LOG_ENT();
|
|
|
|
if (unlikely(p->num_in_handles < 2 ||
|
|
p->num_in_handles > MODS_MAX_MERGE_HANDLES)) {
|
|
cl_error("invalid number of input handles: %u\n",
|
|
p->num_in_handles);
|
|
LOG_EXT();
|
|
return -EINVAL;
|
|
}
|
|
|
|
err = mutex_lock_interruptible(&client->mtx);
|
|
if (unlikely(err)) {
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
{
|
|
const char *err_msg = NULL;
|
|
phys_addr_t prev_pa;
|
|
unsigned long prev_size;
|
|
|
|
p_mem_info = get_mem_handle(client, p->in_memory_handles[0]);
|
|
|
|
if (unlikely(!validate_mem_handle(client, p_mem_info))) {
|
|
cl_error("handle 0: invalid handle %p\n", p_mem_info);
|
|
err = -EINVAL;
|
|
goto failed;
|
|
}
|
|
|
|
WARN_ON(p_mem_info->num_pages == 0);
|
|
if (unlikely(!list_empty(&p_mem_info->dma_map_list) ||
|
|
sg_dma_address(p_mem_info->sg))) {
|
|
cl_error("handle 0: found dma mappings\n");
|
|
err = -EINVAL;
|
|
goto failed;
|
|
}
|
|
|
|
cache_type = p_mem_info->cache_type;
|
|
num_chunks = p_mem_info->num_chunks;
|
|
prev_pa = get_contig_pa(client, p_mem_info);
|
|
prev_size = p_mem_info->num_pages << PAGE_SHIFT;
|
|
|
|
for (i = 1; i < p->num_in_handles; i++) {
|
|
struct MODS_MEM_INFO *const p_other =
|
|
get_mem_handle(client, p->in_memory_handles[i]);
|
|
phys_addr_t next_pa;
|
|
unsigned int j;
|
|
|
|
if (!validate_mem_handle(client, p_other)) {
|
|
cl_error("handle %u: invalid handle %p\n",
|
|
i, p);
|
|
err = -EINVAL;
|
|
goto failed;
|
|
}
|
|
|
|
for (j = 0; j < i; j++) {
|
|
if (unlikely(p->in_memory_handles[i] ==
|
|
p->in_memory_handles[j])) {
|
|
err_msg = "duplicate handle";
|
|
break;
|
|
}
|
|
}
|
|
if (err_msg)
|
|
break;
|
|
|
|
if (unlikely(p_mem_info->cache_type !=
|
|
p_other->cache_type)) {
|
|
err_msg = "cache attr mismatch";
|
|
break;
|
|
}
|
|
|
|
if (unlikely(p_mem_info->force_numa &&
|
|
p_mem_info->numa_node != p_other->numa_node)) {
|
|
err_msg = "numa node mismatch";
|
|
break;
|
|
}
|
|
|
|
if (unlikely(p_mem_info->dma32 != p_other->dma32)) {
|
|
err_msg = "dma32 mismatch";
|
|
break;
|
|
}
|
|
|
|
if (p_mem_info->dev) {
|
|
if (unlikely(p_mem_info->dev !=
|
|
p_other->dev)) {
|
|
err_msg = "device mismatch";
|
|
break;
|
|
}
|
|
}
|
|
|
|
WARN_ON(p_other->num_pages == 0);
|
|
if (unlikely(!list_empty(&p_other->dma_map_list) ||
|
|
sg_dma_address(p_other->sg))) {
|
|
err_msg = "found dma mappings";
|
|
break;
|
|
}
|
|
|
|
num_chunks += p_other->num_chunks;
|
|
next_pa = get_contig_pa(client, p_other);
|
|
|
|
if (contig && ((prev_pa + prev_size) != next_pa)) {
|
|
contig = false;
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"merge is non-contiguous because alloc %u %p pa 0x%llx size 0x%lx and alloc %u %p pa 0x%llx\n",
|
|
i - 1,
|
|
get_mem_handle(client,
|
|
p->in_memory_handles[i - 1]),
|
|
(unsigned long long)prev_pa,
|
|
prev_size,
|
|
i,
|
|
p_other,
|
|
(unsigned long long)next_pa);
|
|
}
|
|
|
|
prev_pa = next_pa;
|
|
prev_size = p_other->num_pages << PAGE_SHIFT;
|
|
}
|
|
|
|
if (unlikely(err_msg)) {
|
|
cl_error("merging handle %u: %s\n", i, err_msg);
|
|
err = -EINVAL;
|
|
goto failed;
|
|
}
|
|
}
|
|
|
|
p_mem_info = alloc_mem_info(client, num_chunks, cache_type,
|
|
&alloc_size);
|
|
|
|
if (unlikely(!p_mem_info)) {
|
|
err = -ENOMEM;
|
|
goto failed;
|
|
}
|
|
|
|
for (i = 0; i < p->num_in_handles; i++) {
|
|
struct MODS_MEM_INFO *p_other =
|
|
get_mem_handle(client, p->in_memory_handles[i]);
|
|
const u32 other_chunks = p_other->num_chunks;
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED, "merge %p (%u) into %p[%u..%u], phys 0x%llx\n",
|
|
p_other, i, p_mem_info, p_mem_info->num_chunks,
|
|
p_mem_info->num_chunks + other_chunks - 1,
|
|
(unsigned long long)sg_phys(p_other->sg));
|
|
|
|
list_del(&p_other->list);
|
|
|
|
if (i == 0) {
|
|
const size_t copy_size =
|
|
calc_mem_info_size_no_bitmap(other_chunks);
|
|
|
|
memcpy(p_mem_info, p_other, copy_size);
|
|
init_mem_info(p_mem_info, num_chunks,
|
|
p_other->cache_type);
|
|
p_mem_info->num_chunks = other_chunks;
|
|
copy_wc_bitmap(p_mem_info, 0, p_other, other_chunks);
|
|
|
|
list_add(&p_mem_info->list, &client->mem_alloc_list);
|
|
} else {
|
|
const u32 num_chunks = p_mem_info->num_chunks;
|
|
|
|
memcpy(&p_mem_info->alloc_sg[num_chunks],
|
|
p_other->alloc_sg,
|
|
other_chunks * sizeof(struct scatterlist));
|
|
copy_wc_bitmap(p_mem_info, num_chunks,
|
|
p_other, other_chunks);
|
|
|
|
MODS_SG_UNMARK_END(&p_mem_info->alloc_sg[num_chunks - 1]);
|
|
|
|
p_mem_info->num_chunks += other_chunks;
|
|
p_mem_info->num_pages += p_other->num_pages;
|
|
}
|
|
|
|
kfree(p_other);
|
|
atomic_dec(&client->num_allocs);
|
|
}
|
|
|
|
cl_debug(DEBUG_MEM, "merge alloc %p: %u chunks, %u pages\n",
|
|
p_mem_info, p_mem_info->num_chunks, p_mem_info->num_pages);
|
|
|
|
WARN_ON(num_chunks != p_mem_info->num_chunks);
|
|
|
|
if (contig) {
|
|
p_mem_info->sg = &p_mem_info->contig_sg;
|
|
|
|
sg_set_page(&p_mem_info->contig_sg,
|
|
sg_page(p_mem_info->alloc_sg),
|
|
p_mem_info->num_pages << PAGE_SHIFT,
|
|
0);
|
|
}
|
|
|
|
p->memory_handle = (u64)(size_t)p_mem_info;
|
|
|
|
failed:
|
|
mutex_unlock(&client->mtx);
|
|
|
|
LOG_EXT();
|
|
|
|
return err;
|
|
}
|
|
|
|
int esc_mods_set_mem_type(struct mods_client *client,
|
|
struct MODS_MEMORY_TYPE *p)
|
|
{
|
|
struct MODS_MEM_INFO *p_mem_info;
|
|
u8 type = MODS_ALLOC_CACHED;
|
|
int err;
|
|
|
|
LOG_ENT();
|
|
|
|
switch (p->type) {
|
|
case MODS_MEMORY_CACHED:
|
|
break;
|
|
|
|
case MODS_MEMORY_UNCACHED:
|
|
type = MODS_ALLOC_UNCACHED;
|
|
break;
|
|
|
|
case MODS_MEMORY_WRITECOMBINE:
|
|
type = MODS_ALLOC_WRITECOMBINE;
|
|
break;
|
|
|
|
default:
|
|
cl_error("unsupported memory type: %u\n", p->type);
|
|
LOG_EXT();
|
|
return -EINVAL;
|
|
}
|
|
|
|
err = mutex_lock_interruptible(&client->mtx);
|
|
if (unlikely(err)) {
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
p_mem_info = mods_find_alloc(client, p->physical_address);
|
|
if (unlikely(p_mem_info)) {
|
|
cl_error("cannot set mem type on phys addr 0x%llx\n",
|
|
p->physical_address);
|
|
err = -EINVAL;
|
|
} else {
|
|
client->mem_type.phys_addr = p->physical_address;
|
|
client->mem_type.size = p->size;
|
|
client->mem_type.type = type;
|
|
}
|
|
|
|
mutex_unlock(&client->mtx);
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
int esc_mods_get_phys_addr(struct mods_client *client,
|
|
struct MODS_GET_PHYSICAL_ADDRESS *p)
|
|
{
|
|
struct MODS_GET_PHYSICAL_ADDRESS_3 range;
|
|
int err;
|
|
|
|
LOG_ENT();
|
|
|
|
range.memory_handle = p->memory_handle;
|
|
range.offset = p->offset;
|
|
memset(&range.pci_device, 0, sizeof(range.pci_device));
|
|
|
|
err = get_addr_range(client, &range, NULL);
|
|
|
|
if (!err)
|
|
p->physical_address = range.physical_address;
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
int esc_mods_get_phys_addr_2(struct mods_client *client,
|
|
struct MODS_GET_PHYSICAL_ADDRESS_3 *p)
|
|
{
|
|
struct MODS_GET_PHYSICAL_ADDRESS_3 range;
|
|
int err;
|
|
|
|
LOG_ENT();
|
|
|
|
range.memory_handle = p->memory_handle;
|
|
range.offset = p->offset;
|
|
memset(&range.pci_device, 0, sizeof(range.pci_device));
|
|
|
|
err = get_addr_range(client, &range, NULL);
|
|
|
|
if (!err)
|
|
p->physical_address = range.physical_address;
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
int esc_mods_get_mapped_phys_addr(struct mods_client *client,
|
|
struct MODS_GET_PHYSICAL_ADDRESS *p)
|
|
{
|
|
struct MODS_GET_PHYSICAL_ADDRESS_3 range;
|
|
struct MODS_MEM_INFO *p_mem_info;
|
|
int err;
|
|
|
|
LOG_ENT();
|
|
|
|
p_mem_info = get_mem_handle(client, p->memory_handle);
|
|
if (unlikely(!p_mem_info)) {
|
|
LOG_EXT();
|
|
return -EINVAL;
|
|
}
|
|
|
|
range.memory_handle = p->memory_handle;
|
|
range.offset = p->offset;
|
|
|
|
if (p_mem_info->dev) {
|
|
range.pci_device.domain =
|
|
pci_domain_nr(p_mem_info->dev->bus);
|
|
range.pci_device.bus =
|
|
p_mem_info->dev->bus->number;
|
|
range.pci_device.device =
|
|
PCI_SLOT(p_mem_info->dev->devfn);
|
|
range.pci_device.function =
|
|
PCI_FUNC(p_mem_info->dev->devfn);
|
|
|
|
err = get_addr_range(client, &range, &range.pci_device);
|
|
} else {
|
|
memset(&range.pci_device, 0, sizeof(range.pci_device));
|
|
err = get_addr_range(client, &range, NULL);
|
|
}
|
|
|
|
if (!err)
|
|
p->physical_address = range.physical_address;
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
int esc_mods_get_mapped_phys_addr_2(struct mods_client *client,
|
|
struct MODS_GET_PHYSICAL_ADDRESS_2 *p)
|
|
{
|
|
struct MODS_GET_PHYSICAL_ADDRESS_3 range;
|
|
int err;
|
|
|
|
LOG_ENT();
|
|
|
|
range.memory_handle = p->memory_handle;
|
|
range.offset = p->offset;
|
|
range.pci_device = p->pci_device;
|
|
|
|
err = get_addr_range(client, &range, &range.pci_device);
|
|
|
|
if (!err)
|
|
p->physical_address = range.physical_address;
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
int esc_mods_get_mapped_phys_addr_3(struct mods_client *client,
|
|
struct MODS_GET_PHYSICAL_ADDRESS_3 *p)
|
|
{
|
|
struct MODS_GET_PHYSICAL_ADDRESS_3 range;
|
|
int err;
|
|
|
|
LOG_ENT();
|
|
|
|
range.memory_handle = p->memory_handle;
|
|
range.offset = p->offset;
|
|
range.pci_device = p->pci_device;
|
|
|
|
err = get_addr_range(client, &range, &range.pci_device);
|
|
|
|
if (!err)
|
|
p->physical_address = range.physical_address;
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
int esc_mods_virtual_to_phys(struct mods_client *client,
|
|
struct MODS_VIRTUAL_TO_PHYSICAL *p)
|
|
{
|
|
struct MODS_GET_PHYSICAL_ADDRESS_3 range;
|
|
struct list_head *head;
|
|
struct list_head *iter;
|
|
int err;
|
|
|
|
LOG_ENT();
|
|
|
|
memset(&range, 0, sizeof(range));
|
|
|
|
err = mutex_lock_interruptible(&client->mtx);
|
|
if (unlikely(err)) {
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
head = &client->mem_map_list;
|
|
|
|
list_for_each(iter, head) {
|
|
struct SYS_MAP_MEMORY *p_map_mem;
|
|
u64 begin, end;
|
|
u64 phys_offs;
|
|
|
|
p_map_mem = list_entry(iter, struct SYS_MAP_MEMORY, list);
|
|
|
|
begin = p_map_mem->virtual_addr;
|
|
end = p_map_mem->virtual_addr + p_map_mem->mapping_length;
|
|
|
|
if (p->virtual_address >= begin && p->virtual_address < end) {
|
|
|
|
u64 virt_offs = p->virtual_address - begin;
|
|
|
|
/* device memory mapping */
|
|
if (!p_map_mem->p_mem_info) {
|
|
p->physical_address = p_map_mem->phys_addr
|
|
+ virt_offs;
|
|
mutex_unlock(&client->mtx);
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"get phys: map %p virt 0x%llx -> 0x%llx\n",
|
|
p_map_mem,
|
|
p->virtual_address,
|
|
p->physical_address);
|
|
|
|
LOG_EXT();
|
|
return OK;
|
|
}
|
|
|
|
if (get_alloc_offset(p_map_mem->p_mem_info,
|
|
p_map_mem->phys_addr,
|
|
&phys_offs) != OK)
|
|
break;
|
|
|
|
range.memory_handle =
|
|
(u64)(size_t)p_map_mem->p_mem_info;
|
|
range.offset = virt_offs + phys_offs;
|
|
|
|
mutex_unlock(&client->mtx);
|
|
|
|
err = get_addr_range(client, &range, NULL);
|
|
if (err) {
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
p->physical_address = range.physical_address;
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"get phys: map %p virt 0x%llx -> 0x%llx\n",
|
|
p_map_mem,
|
|
p->virtual_address,
|
|
p->physical_address);
|
|
|
|
LOG_EXT();
|
|
return OK;
|
|
}
|
|
}
|
|
|
|
mutex_unlock(&client->mtx);
|
|
|
|
cl_error("invalid virtual address 0x%llx\n", p->virtual_address);
|
|
LOG_EXT();
|
|
return -EINVAL;
|
|
}
|
|
|
|
int esc_mods_phys_to_virtual(struct mods_client *client,
|
|
struct MODS_PHYSICAL_TO_VIRTUAL *p)
|
|
{
|
|
struct SYS_MAP_MEMORY *p_map_mem;
|
|
struct list_head *head;
|
|
struct list_head *iter;
|
|
u64 offset;
|
|
u64 map_offset;
|
|
int err;
|
|
|
|
LOG_ENT();
|
|
|
|
err = mutex_lock_interruptible(&client->mtx);
|
|
if (unlikely(err)) {
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
head = &client->mem_map_list;
|
|
|
|
list_for_each(iter, head) {
|
|
p_map_mem = list_entry(iter, struct SYS_MAP_MEMORY, list);
|
|
|
|
/* device memory mapping */
|
|
if (!p_map_mem->p_mem_info) {
|
|
u64 end = p_map_mem->phys_addr
|
|
+ p_map_mem->mapping_length;
|
|
if (p->physical_address < p_map_mem->phys_addr ||
|
|
p->physical_address >= end)
|
|
continue;
|
|
|
|
offset = p->physical_address - p_map_mem->phys_addr;
|
|
p->virtual_address = p_map_mem->virtual_addr
|
|
+ offset;
|
|
mutex_unlock(&client->mtx);
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"get virt: map %p phys 0x%llx -> 0x%llx\n",
|
|
p_map_mem,
|
|
p->physical_address,
|
|
p->virtual_address);
|
|
|
|
LOG_EXT();
|
|
return OK;
|
|
}
|
|
|
|
/* offset from the beginning of the allocation */
|
|
if (get_alloc_offset(p_map_mem->p_mem_info,
|
|
p->physical_address,
|
|
&offset))
|
|
continue;
|
|
|
|
/* offset from the beginning of the mapping */
|
|
if (get_alloc_offset(p_map_mem->p_mem_info,
|
|
p_map_mem->phys_addr,
|
|
&map_offset))
|
|
continue;
|
|
|
|
if ((offset >= map_offset) &&
|
|
(offset < map_offset + p_map_mem->mapping_length)) {
|
|
p->virtual_address = p_map_mem->virtual_addr
|
|
+ offset - map_offset;
|
|
|
|
mutex_unlock(&client->mtx);
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"get virt: map %p phys 0x%llx -> 0x%llx\n",
|
|
p_map_mem,
|
|
p->physical_address,
|
|
p->virtual_address);
|
|
|
|
LOG_EXT();
|
|
return OK;
|
|
}
|
|
}
|
|
|
|
mutex_unlock(&client->mtx);
|
|
|
|
cl_error("phys addr 0x%llx is not mapped\n", p->physical_address);
|
|
LOG_EXT();
|
|
return -EINVAL;
|
|
}
|
|
|
|
#if defined(CONFIG_ARM)
|
|
int esc_mods_memory_barrier(struct mods_client *client)
|
|
{
|
|
/* Full memory barrier on ARMv7 */
|
|
wmb();
|
|
return OK;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_PCI
|
|
int esc_mods_dma_map_memory(struct mods_client *client,
|
|
struct MODS_DMA_MAP_MEMORY *p)
|
|
{
|
|
struct MODS_MEM_INFO *p_mem_info;
|
|
struct MODS_DMA_MAP *p_dma_map;
|
|
struct pci_dev *dev = NULL;
|
|
int err = -EINVAL;
|
|
bool locked = false;
|
|
|
|
LOG_ENT();
|
|
|
|
p_mem_info = get_mem_handle(client, p->memory_handle);
|
|
if (unlikely(!p_mem_info))
|
|
goto failed;
|
|
|
|
err = mutex_lock_interruptible(&client->mtx);
|
|
if (unlikely(err))
|
|
goto failed;
|
|
locked = true;
|
|
|
|
if (mods_is_pci_dev(p_mem_info->dev, &p->pci_device)) {
|
|
err = dma_map_to_default_dev(client, p_mem_info);
|
|
goto failed;
|
|
}
|
|
|
|
if (mods_is_pci_dev(client->cached_dev, &p->pci_device))
|
|
dev = pci_dev_get(client->cached_dev);
|
|
else {
|
|
mutex_unlock(&client->mtx);
|
|
locked = false;
|
|
|
|
err = mods_find_pci_dev(client, &p->pci_device, &dev);
|
|
if (unlikely(err)) {
|
|
if (err == -ENODEV)
|
|
cl_error("dev %04x:%02x:%02x.%x not found\n",
|
|
p->pci_device.domain,
|
|
p->pci_device.bus,
|
|
p->pci_device.device,
|
|
p->pci_device.function);
|
|
goto failed;
|
|
}
|
|
|
|
err = mutex_lock_interruptible(&client->mtx);
|
|
if (unlikely(err))
|
|
goto failed;
|
|
locked = true;
|
|
}
|
|
|
|
p_dma_map = find_dma_map(p_mem_info, &p->pci_device);
|
|
if (unlikely(p_dma_map)) {
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"memory %p already mapped to dev %04x:%02x:%02x.%x\n",
|
|
p_mem_info,
|
|
p->pci_device.domain,
|
|
p->pci_device.bus,
|
|
p->pci_device.device,
|
|
p->pci_device.function);
|
|
goto failed;
|
|
}
|
|
|
|
err = create_dma_map(client, p_mem_info, dev, &dev->dev);
|
|
|
|
failed:
|
|
if (locked)
|
|
mutex_unlock(&client->mtx);
|
|
|
|
pci_dev_put(dev);
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
int esc_mods_dma_unmap_memory(struct mods_client *client,
|
|
struct MODS_DMA_MAP_MEMORY *p)
|
|
{
|
|
struct MODS_MEM_INFO *p_mem_info;
|
|
struct pci_dev *dev = NULL;
|
|
int err = -EINVAL;
|
|
|
|
LOG_ENT();
|
|
|
|
p_mem_info = get_mem_handle(client, p->memory_handle);
|
|
if (unlikely(!p_mem_info))
|
|
goto failed;
|
|
|
|
err = mods_find_pci_dev(client, &p->pci_device, &dev);
|
|
if (unlikely(err)) {
|
|
if (err == -ENODEV)
|
|
cl_error("dev %04x:%02x:%02x.%x not found\n",
|
|
p->pci_device.domain,
|
|
p->pci_device.bus,
|
|
p->pci_device.device,
|
|
p->pci_device.function);
|
|
goto failed;
|
|
}
|
|
|
|
err = mutex_lock_interruptible(&client->mtx);
|
|
if (unlikely(err))
|
|
goto failed;
|
|
|
|
err = dma_unmap_all(client, p_mem_info, &dev->dev);
|
|
|
|
mutex_unlock(&client->mtx);
|
|
|
|
failed:
|
|
pci_dev_put(dev);
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
#endif /* CONFIG_PCI */
|
|
|
|
#ifdef MODS_HAS_TEGRA
|
|
/* map dma buffer by iommu */
|
|
int esc_mods_iommu_dma_map_memory(struct mods_client *client,
|
|
struct MODS_IOMMU_DMA_MAP_MEMORY *p)
|
|
{
|
|
struct scatterlist *sg;
|
|
struct MODS_MEM_INFO *p_mem_info;
|
|
char *dev_name = p->dev_name;
|
|
struct mods_smmu_dev *smmu_pdev = NULL;
|
|
struct MODS_DMA_MAP *p_dma_map;
|
|
dma_addr_t next_iova = 0;
|
|
u32 num_chunks;
|
|
u32 i;
|
|
int smmudev_idx;
|
|
int err = -EINVAL;
|
|
bool locked = false;
|
|
|
|
LOG_ENT();
|
|
|
|
if (!(p->flags & MODS_IOMMU_MAP_CONTIGUOUS))
|
|
cl_error("contiguous flag not set\n");
|
|
|
|
p_mem_info = get_mem_handle(client, p->memory_handle);
|
|
if (unlikely(!p_mem_info))
|
|
goto failed;
|
|
|
|
if (!list_empty(&p_mem_info->dma_map_list)) {
|
|
cl_error("smmu is already mapped\n");
|
|
goto failed;
|
|
}
|
|
|
|
smmudev_idx = get_mods_smmu_device_index(dev_name);
|
|
if (smmudev_idx >= 0)
|
|
smmu_pdev = get_mods_smmu_device(smmudev_idx);
|
|
if (!smmu_pdev || smmudev_idx < 0) {
|
|
cl_error("smmu device %s not found\n", dev_name);
|
|
err = -ENODEV;
|
|
goto failed;
|
|
}
|
|
|
|
err = mutex_lock_interruptible(&client->mtx);
|
|
if (unlikely(err))
|
|
goto failed;
|
|
locked = true;
|
|
|
|
/* do smmu mapping */
|
|
err = create_dma_map(client, p_mem_info, NULL, smmu_pdev->dev);
|
|
if (err)
|
|
goto failed;
|
|
|
|
/* Check if IOVAs are contiguous */
|
|
p_dma_map = list_first_entry(&p_mem_info->dma_map_list,
|
|
struct MODS_DMA_MAP, list);
|
|
num_chunks = get_num_chunks(p_mem_info);
|
|
for_each_sg(p_dma_map->sg, sg, num_chunks, i) {
|
|
|
|
const dma_addr_t iova = sg_dma_address(sg);
|
|
const dma_addr_t iova_end = iova + sg_dma_len(sg);
|
|
|
|
/* Skip checking if IOMMU driver merged it into a single
|
|
* contiguous chunk.
|
|
*/
|
|
if (iova_end == iova)
|
|
continue;
|
|
|
|
if ((i > 0) && (iova != next_iova)) {
|
|
cl_error("sg not contiguous: dma 0x%llx, expected 0x%llx\n",
|
|
(unsigned long long)sg_dma_address(sg),
|
|
(unsigned long long)next_iova);
|
|
|
|
dma_unmap_and_free(client, p_mem_info, p_dma_map);
|
|
err = -EINVAL;
|
|
goto failed;
|
|
}
|
|
|
|
next_iova = iova_end;
|
|
}
|
|
|
|
p->physical_address = sg_dma_address(p_dma_map->sg);
|
|
|
|
failed:
|
|
if (locked)
|
|
mutex_unlock(&client->mtx);
|
|
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
/* unmap dma buffer by iommu */
|
|
int esc_mods_iommu_dma_unmap_memory(struct mods_client *client,
|
|
struct MODS_IOMMU_DMA_MAP_MEMORY *p)
|
|
{
|
|
struct MODS_MEM_INFO *p_mem_info;
|
|
struct MODS_DMA_MAP *p_dma_map;
|
|
int err = -EINVAL;
|
|
|
|
LOG_ENT();
|
|
|
|
p_mem_info = get_mem_handle(client, p->memory_handle);
|
|
if (unlikely(!p_mem_info))
|
|
goto failed;
|
|
|
|
if (!list_is_singular(&p_mem_info->dma_map_list)) {
|
|
cl_error("smmu buffer is not mapped, handle=0x%llx\n",
|
|
(unsigned long long)p_mem_info);
|
|
goto failed;
|
|
}
|
|
|
|
err = mutex_lock_interruptible(&client->mtx);
|
|
if (unlikely(err))
|
|
goto failed;
|
|
|
|
p_dma_map = list_first_entry(&p_mem_info->dma_map_list,
|
|
struct MODS_DMA_MAP,
|
|
list);
|
|
|
|
dma_unmap_and_free(client, p_mem_info, p_dma_map);
|
|
|
|
mutex_unlock(&client->mtx);
|
|
|
|
failed:
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
#endif /* MODS_HAS_TEGRA */
|
|
|
|
#ifdef CONFIG_ARM64
|
|
static void clear_contiguous_cache(struct mods_client *client,
|
|
u64 virt_start,
|
|
u64 phys_start,
|
|
u32 size)
|
|
{
|
|
u64 end = virt_start + size;
|
|
u64 cur;
|
|
u64 d_size;
|
|
static u32 d_line_shift;
|
|
|
|
if (!d_line_shift) {
|
|
#if KERNEL_VERSION(5, 10, 0) <= MODS_KERNEL_VERSION
|
|
const u64 ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
|
|
#if KERNEL_VERSION(6, 0, 0) <= MODS_KERNEL_VERSION
|
|
const int field = CTR_EL0_DminLine_SHIFT;
|
|
#else
|
|
const int field = CTR_DMINLINE_SHIFT;
|
|
#endif
|
|
|
|
d_line_shift =
|
|
cpuid_feature_extract_unsigned_field(ctr_el0, field);
|
|
#else
|
|
d_line_shift = 4; /* Fallback for kernel 5.9 or older */
|
|
#endif
|
|
}
|
|
|
|
d_size = (u64)4 << d_line_shift;
|
|
cur = virt_start & ~(d_size - 1);
|
|
do {
|
|
asm volatile("dc civac, %0" : : "r" (cur) : "memory");
|
|
} while (cur += d_size, cur < end);
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"clear cache virt 0x%llx phys 0x%llx size 0x%x\n",
|
|
virt_start, phys_start, size);
|
|
}
|
|
|
|
static void clear_entry_cache_mappings(struct mods_client *client,
|
|
struct SYS_MAP_MEMORY *p_map_mem,
|
|
u64 virt_offs,
|
|
u64 virt_offs_end)
|
|
{
|
|
struct MODS_MEM_INFO *p_mem_info = p_map_mem->p_mem_info;
|
|
struct scatterlist *sg;
|
|
u64 cur_vo = p_map_mem->virtual_addr;
|
|
u32 num_chunks;
|
|
u32 i;
|
|
|
|
if (!p_mem_info)
|
|
return;
|
|
|
|
if (p_mem_info->cache_type != MODS_ALLOC_CACHED)
|
|
return;
|
|
|
|
num_chunks = get_num_chunks(p_mem_info);
|
|
|
|
for_each_sg(p_mem_info->sg, sg, num_chunks, i) {
|
|
u32 chunk_offs = 0;
|
|
u32 chunk_offs_end = sg->length;
|
|
u64 cur_vo_end = cur_vo + chunk_offs_end;
|
|
|
|
if (virt_offs_end <= cur_vo)
|
|
break;
|
|
|
|
if (virt_offs >= cur_vo_end) {
|
|
cur_vo = cur_vo_end;
|
|
continue;
|
|
}
|
|
|
|
if (cur_vo < virt_offs)
|
|
chunk_offs = (u32)(virt_offs - cur_vo);
|
|
|
|
if (virt_offs_end < cur_vo_end)
|
|
chunk_offs_end -= (u32)(cur_vo_end - virt_offs_end);
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"clear cache %p [%u]\n",
|
|
p_mem_info,
|
|
i);
|
|
|
|
while (chunk_offs < chunk_offs_end) {
|
|
u32 i_page = chunk_offs >> PAGE_SHIFT;
|
|
u32 page_offs = chunk_offs - (i_page << PAGE_SHIFT);
|
|
u64 page_va =
|
|
(u64)(size_t)kmap(sg_page(sg) + i_page);
|
|
u64 clear_va = page_va + page_offs;
|
|
u64 clear_pa = sg_phys(sg) + chunk_offs;
|
|
u32 clear_size = PAGE_SIZE - page_offs;
|
|
u64 remaining = chunk_offs_end - chunk_offs;
|
|
|
|
if (likely(page_va)) {
|
|
if ((u64)clear_size > remaining)
|
|
clear_size = (u32)remaining;
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED,
|
|
"clear page %u, chunk offs 0x%x, page va 0x%llx\n",
|
|
i_page,
|
|
chunk_offs,
|
|
page_va);
|
|
|
|
clear_contiguous_cache(client,
|
|
clear_va,
|
|
clear_pa,
|
|
clear_size);
|
|
|
|
kunmap((void *)(size_t)page_va);
|
|
} else {
|
|
cl_error("kmap failed\n");
|
|
}
|
|
|
|
chunk_offs += clear_size;
|
|
}
|
|
|
|
cur_vo = cur_vo_end;
|
|
}
|
|
}
|
|
|
|
int esc_mods_flush_cpu_cache_range(struct mods_client *client,
|
|
struct MODS_FLUSH_CPU_CACHE_RANGE *p)
|
|
{
|
|
struct list_head *head;
|
|
struct list_head *iter;
|
|
int err;
|
|
|
|
LOG_ENT();
|
|
|
|
if (irqs_disabled() || in_interrupt() ||
|
|
p->virt_addr_start > p->virt_addr_end) {
|
|
|
|
cl_debug(DEBUG_MEM_DETAILED, "cannot flush cache\n");
|
|
LOG_EXT();
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (p->flags == MODS_INVALIDATE_CPU_CACHE) {
|
|
cl_debug(DEBUG_MEM_DETAILED, "cannot invalidate cache\n");
|
|
LOG_EXT();
|
|
return -EINVAL;
|
|
}
|
|
|
|
err = mutex_lock_interruptible(&client->mtx);
|
|
if (unlikely(err)) {
|
|
LOG_EXT();
|
|
return err;
|
|
}
|
|
|
|
head = &client->mem_map_list;
|
|
|
|
list_for_each(iter, head) {
|
|
struct SYS_MAP_MEMORY *p_map_mem
|
|
= list_entry(iter, struct SYS_MAP_MEMORY, list);
|
|
|
|
u64 mapped_va = p_map_mem->virtual_addr;
|
|
|
|
/* Note: mapping end points to the first address of next range*/
|
|
u64 mapping_end = mapped_va + p_map_mem->mapping_length;
|
|
|
|
int start_on_page = p->virt_addr_start >= mapped_va
|
|
&& p->virt_addr_start < mapping_end;
|
|
int start_before_page = p->virt_addr_start < mapped_va;
|
|
int end_on_page = p->virt_addr_end >= mapped_va
|
|
&& p->virt_addr_end < mapping_end;
|
|
int end_after_page = p->virt_addr_end >= mapping_end;
|
|
u64 virt_start = p->virt_addr_start;
|
|
|
|
/* Kernel expects end to point to the first address of next
|
|
* range
|
|
*/
|
|
u64 virt_end = p->virt_addr_end + 1;
|
|
|
|
if ((start_on_page || start_before_page)
|
|
&& (end_on_page || end_after_page)) {
|
|
|
|
if (!start_on_page)
|
|
virt_start = p_map_mem->virtual_addr;
|
|
if (!end_on_page)
|
|
virt_end = mapping_end;
|
|
clear_entry_cache_mappings(client,
|
|
p_map_mem,
|
|
virt_start,
|
|
virt_end);
|
|
}
|
|
}
|
|
mutex_unlock(&client->mtx);
|
|
|
|
LOG_EXT();
|
|
return OK;
|
|
}
|
|
#endif /* CONFIG_ARM64 */
|