Files
linux-nv-oot/drivers/misc/mods/mods_mem.c
Lael Jones 2ce330f11a misc: mods: Replace CONFIG_ARCH_TEGRA
The perforce version of the MODS kernel driver
does not pull in any of the Tegra specific driver
functions. This prevents the perforce driver
from being compiled on any Tegra system or non
Tegra system with CONFIG_ARCH_TEGRA=y.

To allow the perforce driver to be compiled
replace CONFIG_ARCH_TEGRA with MODS_HAS_TEGRA
which is set based on CONFIG_ARCH_TEGRA in git
but left unset in perforce.

Bug 3397113

Signed-off-by: Lael Jones <lajones@nvidia.com>
Change-Id: Ie113d632c4dcc372058b9a1e3a549a70b8f7c03f
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2607859
Reviewed-by: svcacv <svcacv@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Chris Dragan <kdragan@nvidia.com>
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2023-04-03 13:27:52 +00:00

2491 lines
59 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* mods_mem.c - This file is part of NVIDIA MODS kernel driver.
*
* Copyright (c) 2008-2021, NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA MODS kernel driver is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* NVIDIA MODS kernel driver is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NVIDIA MODS kernel driver.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "mods_internal.h"
#include <linux/pagemap.h>
#if defined(MODS_HAS_SET_DMA_MASK)
#include <linux/dma-mapping.h>
#include <linux/of.h>
#endif
#ifdef CONFIG_ARCH64
#include <linux/cache.h>
#endif
static int mods_post_alloc(struct mods_client *client,
struct MODS_PHYS_CHUNK *chunk,
u64 phys_addr,
struct MODS_MEM_INFO *p_mem_info);
#if defined(MODS_HAS_TEGRA)
static int mods_smmu_unmap_memory(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info);
#endif
/****************************
* DMA MAP HELPER FUNCTIONS *
****************************/
/*
* Starting on Power9 systems, DMA addresses for NVLink are no longer
* the same as used over PCIE.
*
* Power9 supports a 56-bit Real Address. This address range is compressed
* when accessed over NvLink to allow the GPU to access all of memory using
* its 47-bit Physical address.
*
* If there is an NPU device present on the system, it implies that NvLink
* sysmem links are present and we need to apply the required address
* conversion for NvLink within the driver. This is intended to be temporary
* to ease the transition to kernel APIs to handle NvLink DMA mappings
* via the NPU device.
*
* Note, a deviation from the documented compression scheme is that the
* upper address bits (i.e. bit 56-63) instead of being set to zero are
* preserved during NvLink address compression so the orignal PCIE DMA
* address can be reconstructed on expansion. These bits can be safely
* ignored on NvLink since they are truncated by the GPU.
*/
#if defined(CONFIG_PPC64) && defined(CONFIG_PCI)
static u64 mods_compress_nvlink_addr(struct pci_dev *dev, u64 addr)
{
u64 addr47 = addr;
/* Note, one key difference from the documented compression scheme
* is that BIT59 used for TCE bypass mode on PCIe is preserved during
* NVLink address compression to allow for the resulting DMA address to
* be used transparently on PCIe.
*/
if (has_npu_dev(dev, 0)) {
addr47 = addr & (1LLU << 59);
addr47 |= ((addr >> 45) & 0x3) << 43;
addr47 |= ((addr >> 49) & 0x3) << 45;
addr47 |= addr & ((1LLU << 43) - 1);
}
return addr47;
}
#else
#define mods_compress_nvlink_addr(dev, addr) (addr)
#endif
#if defined(CONFIG_PPC64) && defined(CONFIG_PCI)
static u64 mods_expand_nvlink_addr(struct pci_dev *dev, u64 addr47)
{
u64 addr = addr47;
if (has_npu_dev(dev, 0)) {
addr = addr47 & ((1LLU << 43) - 1);
addr |= (addr47 & (3ULL << 43)) << 2;
addr |= (addr47 & (3ULL << 45)) << 4;
addr |= addr47 & ~((1ULL << 56) - 1);
}
return addr;
}
#else
#define mods_expand_nvlink_addr(dev, addr) (addr)
#endif
#ifdef CONFIG_PCI
/* Unmap a page if it was mapped */
static void mods_dma_unmap_page(struct mods_client *client,
struct pci_dev *dev,
u64 dev_addr,
u32 order)
{
dev_addr = mods_expand_nvlink_addr(dev, dev_addr);
pci_unmap_page(dev,
dev_addr,
PAGE_SIZE << order,
DMA_BIDIRECTIONAL);
cl_debug(DEBUG_MEM_DETAILED,
"dma unmap dev_addr=0x%llx on dev %04x:%02x:%02x.%x\n",
(unsigned long long)dev_addr,
pci_domain_nr(dev->bus),
dev->bus->number,
PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn));
}
/* Unmap and delete the specified DMA mapping */
static void dma_unmap_and_free(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info,
struct MODS_DMA_MAP *p_del_map)
{
u32 i;
for (i = 0; i < p_mem_info->num_chunks; i++)
mods_dma_unmap_page(client,
p_del_map->dev,
p_del_map->dev_addr[i],
p_mem_info->pages[i].order);
pci_dev_put(p_del_map->dev);
kfree(p_del_map);
atomic_dec(&client->num_allocs);
}
#endif
/* Unmap and delete all DMA mappings on the specified allocation */
static int dma_unmap_all(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info,
struct pci_dev *dev)
{
#ifdef CONFIG_PCI
int err = OK;
struct list_head *head = &p_mem_info->dma_map_list;
struct list_head *iter;
struct list_head *tmp;
list_for_each_safe(iter, tmp, head) {
struct MODS_DMA_MAP *p_dma_map;
p_dma_map = list_entry(iter, struct MODS_DMA_MAP, list);
if (!dev || (p_dma_map->dev == dev)) {
list_del(iter);
dma_unmap_and_free(client, p_mem_info, p_dma_map);
if (dev)
break;
}
}
return err;
#else
return OK;
#endif
}
#ifdef CONFIG_PCI
static int pci_map_chunk(struct mods_client *client,
struct pci_dev *dev,
struct MODS_PHYS_CHUNK *chunk,
u64 *out_dev_addr)
{
u64 dev_addr = pci_map_page(dev,
chunk->p_page,
0,
PAGE_SIZE << chunk->order,
DMA_BIDIRECTIONAL);
int err = pci_dma_mapping_error(dev, dev_addr);
if (err) {
cl_error(
"failed to map 2^%u pages at 0x%llx to dev %04x:%02x:%02x.%x with dma mask 0x%llx\n",
chunk->order,
(unsigned long long)chunk->dma_addr,
pci_domain_nr(dev->bus),
dev->bus->number,
PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn),
(unsigned long long)dma_get_mask(&dev->dev));
return err;
}
*out_dev_addr = mods_compress_nvlink_addr(dev, dev_addr);
return OK;
}
/* DMA map all pages in an allocation */
static int mods_dma_map_pages(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info,
struct MODS_DMA_MAP *p_dma_map)
{
int i;
struct pci_dev *dev = p_dma_map->dev;
for (i = 0; i < p_mem_info->num_chunks; i++) {
struct MODS_PHYS_CHUNK *chunk = &p_mem_info->pages[i];
u64 dev_addr;
int err = pci_map_chunk(client, dev, chunk, &dev_addr);
if (err) {
while (--i >= 0)
mods_dma_unmap_page(client,
dev,
p_dma_map->dev_addr[i],
chunk->order);
return err;
}
p_dma_map->dev_addr[i] = dev_addr;
cl_debug(DEBUG_MEM_DETAILED,
"dma map dev_addr=0x%llx, phys_addr=0x%llx on dev %04x:%02x:%02x.%x\n",
(unsigned long long)dev_addr,
(unsigned long long)chunk->dma_addr,
pci_domain_nr(dev->bus),
dev->bus->number,
PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn));
}
return OK;
}
/* Create a DMA map on the specified allocation for the pci device.
* Lazy-initialize the map list structure if one does not yet exist.
*/
static int mods_create_dma_map(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info,
struct pci_dev *dev)
{
struct MODS_DMA_MAP *p_dma_map;
u32 alloc_size;
int err;
alloc_size = sizeof(*p_dma_map) +
(p_mem_info->num_chunks - 1) * sizeof(u64);
p_dma_map = kzalloc(alloc_size, GFP_KERNEL | __GFP_NORETRY);
if (unlikely(!p_dma_map)) {
cl_error("failed to allocate device map data\n");
return -ENOMEM;
}
atomic_inc(&client->num_allocs);
p_dma_map->dev = pci_dev_get(dev);
err = mods_dma_map_pages(client, p_mem_info, p_dma_map);
if (unlikely(err)) {
pci_dev_put(dev);
kfree(p_dma_map);
atomic_dec(&client->num_allocs);
} else
list_add(&p_dma_map->list, &p_mem_info->dma_map_list);
return err;
}
static int mods_dma_map_default_page(struct mods_client *client,
struct MODS_PHYS_CHUNK *chunk,
struct pci_dev *dev)
{
u64 dev_addr;
int err = pci_map_chunk(client, dev, chunk, &dev_addr);
if (unlikely(err))
return err;
chunk->dev_addr = dev_addr;
chunk->mapped = 1;
cl_debug(DEBUG_MEM_DETAILED,
"auto dma map dev_addr=0x%llx, phys_addr=0x%llx on dev %04x:%02x:%02x.%x\n",
(unsigned long long)dev_addr,
(unsigned long long)chunk->dma_addr,
pci_domain_nr(dev->bus),
dev->bus->number,
PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn));
return OK;
}
/* DMA-map memory to the device for which it has been allocated, if it hasn't
* been mapped already.
*/
static int mods_create_default_dma_map(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info)
{
int err = OK;
unsigned int i;
struct pci_dev *dev = p_mem_info->dev;
for (i = 0; i < p_mem_info->num_chunks; i++) {
struct MODS_PHYS_CHUNK *chunk = &p_mem_info->pages[i];
if (chunk->mapped) {
cl_debug(DEBUG_MEM_DETAILED,
"memory %p already mapped to dev %04x:%02x:%02x.%x\n",
p_mem_info,
pci_domain_nr(dev->bus),
dev->bus->number,
PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn));
return OK;
}
err = mods_dma_map_default_page(client, chunk, dev);
if (unlikely(err))
break;
}
return err;
}
#endif
/* Find the dma mapping chunk for the specified memory. */
static struct MODS_DMA_MAP *find_dma_map(struct MODS_MEM_INFO *p_mem_info,
struct mods_pci_dev_2 *pcidev)
{
struct MODS_DMA_MAP *p_dma_map = NULL;
struct list_head *head = &p_mem_info->dma_map_list;
struct list_head *iter;
if (!head)
return NULL;
list_for_each(iter, head) {
p_dma_map = list_entry(iter, struct MODS_DMA_MAP, list);
if (mods_is_pci_dev(p_dma_map->dev, pcidev))
return p_dma_map;
}
return NULL;
}
/* In order to map pages as UC or WC to the CPU, we need to change their
* attributes by calling set_memory_uc()/set_memory_wc(), respectively.
* On some CPUs this operation is extremely slow. In order to incur
* this penalty only once, we save pages mapped as UC or WC so that
* we can reuse them later.
*/
static int save_non_wb_chunks(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info)
{
u32 ichunk;
int err = 0;
if (p_mem_info->cache_type == MODS_ALLOC_CACHED)
return 0;
err = mutex_lock_interruptible(&client->mtx);
if (unlikely(err))
return err;
/* Steal the chunks from MODS_MEM_INFO and put them on free list. */
for (ichunk = 0; ichunk < p_mem_info->num_chunks; ichunk++) {
struct MODS_PHYS_CHUNK *chunk = &p_mem_info->pages[ichunk];
struct MODS_FREE_PHYS_CHUNK *free_chunk;
if (!chunk->wc)
continue;
free_chunk = kzalloc(sizeof(struct MODS_FREE_PHYS_CHUNK),
GFP_KERNEL | __GFP_NORETRY);
if (unlikely(!free_chunk)) {
err = -ENOMEM;
break;
}
atomic_inc(&client->num_allocs);
free_chunk->numa_node = p_mem_info->numa_node;
free_chunk->order = chunk->order;
free_chunk->cache_type = p_mem_info->cache_type;
free_chunk->dma32 = p_mem_info->dma32;
free_chunk->p_page = chunk->p_page;
chunk->p_page = NULL;
cl_debug(DEBUG_MEM_DETAILED,
"save 0x%llx 2^%u pages %s\n",
(unsigned long long)(size_t)free_chunk->p_page,
chunk->order,
p_mem_info->cache_type == MODS_ALLOC_WRITECOMBINE
? "WC" : "UC");
#ifdef CONFIG_PCI
if (chunk->mapped) {
mods_dma_unmap_page(client,
p_mem_info->dev,
chunk->dev_addr,
chunk->order);
chunk->mapped = 0;
}
#endif
list_add(&free_chunk->list, &client->free_mem_list);
}
mutex_unlock(&client->mtx);
return err;
}
static int mods_restore_cache_one_chunk(struct page *p_page, u8 order)
{
int final_err = 0;
u32 num_pages = 1U << order;
u32 i;
for (i = 0; i < num_pages; i++) {
void *ptr = kmap(p_page + i);
int err = -ENOMEM;
if (likely(ptr))
err = MODS_SET_MEMORY_WB((unsigned long)ptr, 1);
kunmap(ptr);
if (likely(!final_err))
final_err = err;
}
return final_err;
}
static int release_free_chunks(struct mods_client *client)
{
struct list_head *head;
struct list_head *iter;
struct list_head *next;
int final_err = 0;
mutex_lock(&client->mtx);
head = &client->free_mem_list;
list_for_each_prev_safe(iter, next, head) {
struct MODS_FREE_PHYS_CHUNK *free_chunk;
int err;
free_chunk = list_entry(iter,
struct MODS_FREE_PHYS_CHUNK,
list);
list_del(iter);
err = mods_restore_cache_one_chunk(free_chunk->p_page,
free_chunk->order);
if (likely(!final_err))
final_err = err;
__free_pages(free_chunk->p_page, free_chunk->order);
atomic_sub(1u << free_chunk->order, &client->num_pages);
kfree(free_chunk);
atomic_dec(&client->num_allocs);
}
mutex_unlock(&client->mtx);
if (unlikely(final_err))
cl_error("failed to restore cache attributes\n");
return final_err;
}
static int mods_restore_cache(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info)
{
unsigned int i;
int final_err = 0;
if (p_mem_info->cache_type == MODS_ALLOC_CACHED)
return 0;
for (i = 0; i < p_mem_info->num_chunks; i++) {
struct MODS_PHYS_CHUNK *chunk = &p_mem_info->pages[i];
int err;
if (!chunk->p_page || !chunk->wc)
continue;
err = mods_restore_cache_one_chunk(chunk->p_page, chunk->order);
if (likely(!final_err))
final_err = err;
}
if (unlikely(final_err))
cl_error("failed to restore cache attributes\n");
return final_err;
}
static void mods_free_pages(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info)
{
unsigned int i;
mods_restore_cache(client, p_mem_info);
#if defined(MODS_HAS_TEGRA)
if (p_mem_info->iommu_mapped)
mods_smmu_unmap_memory(client, p_mem_info);
#endif
/* release in reverse order */
for (i = p_mem_info->num_chunks; i > 0; ) {
struct MODS_PHYS_CHUNK *chunk;
--i;
chunk = &p_mem_info->pages[i];
if (!chunk->p_page)
continue;
#ifdef CONFIG_PCI
if (chunk->mapped) {
mods_dma_unmap_page(client,
p_mem_info->dev,
chunk->dev_addr,
chunk->order);
chunk->mapped = 0;
}
#endif
__free_pages(chunk->p_page, chunk->order);
atomic_sub(1u << chunk->order, &client->num_pages);
chunk->p_page = 0;
}
}
static gfp_t mods_alloc_flags(struct MODS_MEM_INFO *p_mem_info, u32 order)
{
gfp_t flags = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
if (p_mem_info->force_numa)
flags |= __GFP_THISNODE;
if (order)
flags |= __GFP_COMP;
if (p_mem_info->dma32)
#ifdef CONFIG_ZONE_DMA32
flags |= __GFP_DMA32;
#else
flags |= __GFP_DMA;
#endif
else
flags |= __GFP_HIGHMEM;
return flags;
}
static struct page *mods_alloc_pages(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info,
u32 order,
int *need_cup)
{
struct page *p_page = NULL;
u8 cache_type = p_mem_info->cache_type;
u8 dma32 = p_mem_info->dma32;
int numa_node = p_mem_info->numa_node;
if ((cache_type != MODS_MEMORY_CACHED) &&
likely(!mutex_lock_interruptible(&client->mtx))) {
struct list_head *iter;
struct list_head *head = &client->free_mem_list;
struct MODS_FREE_PHYS_CHUNK *free_chunk = NULL;
list_for_each(iter, head) {
free_chunk = list_entry(iter,
struct MODS_FREE_PHYS_CHUNK,
list);
if (free_chunk->cache_type == cache_type &&
free_chunk->dma32 == dma32 &&
free_chunk->numa_node == numa_node &&
free_chunk->order == order) {
list_del(iter);
break;
}
free_chunk = NULL;
}
mutex_unlock(&client->mtx);
if (free_chunk) {
p_page = free_chunk->p_page;
kfree(free_chunk);
atomic_dec(&client->num_allocs);
cl_debug(DEBUG_MEM_DETAILED,
"reuse 0x%llx 2^%u pages %s\n",
(unsigned long long)(size_t)p_page,
order,
cache_type == MODS_ALLOC_WRITECOMBINE
? "WC" : "UC");
*need_cup = 0;
return p_page;
}
}
p_page = alloc_pages_node(p_mem_info->numa_node,
mods_alloc_flags(p_mem_info, order),
order);
*need_cup = 1;
if (likely(p_page))
atomic_add(1u << order, &client->num_pages);
return p_page;
}
static int mods_alloc_contig_sys_pages(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info)
{
int err = -ENOMEM;
u64 phys_addr;
u64 dma_addr;
u64 end_addr = 0;
u32 order = 0;
int is_wb = 1;
struct page *p_page;
LOG_ENT();
while ((1U << order) < p_mem_info->num_pages)
order++;
p_mem_info->pages[0].order = order;
p_page = mods_alloc_pages(client, p_mem_info, order, &is_wb);
if (unlikely(!p_page))
goto failed;
p_mem_info->pages[0].p_page = p_page;
if (!is_wb)
p_mem_info->pages[0].wc = 1;
phys_addr = page_to_phys(p_page);
if (unlikely(phys_addr == 0)) {
cl_error("failed to determine physical address\n");
goto failed;
}
dma_addr = MODS_PHYS_TO_DMA(phys_addr);
if (unlikely(dma_addr >= (1ULL << DMA_BITS))) {
cl_error("dma_addr 0x%llx exceeds supported range\n",
dma_addr);
goto failed;
}
p_mem_info->pages[0].dma_addr = dma_addr;
cl_debug(DEBUG_MEM,
"alloc contig 0x%lx bytes, 2^%u pages, %s, node %d,%s phys 0x%llx\n",
(unsigned long)p_mem_info->num_pages << PAGE_SHIFT,
p_mem_info->pages[0].order,
mods_get_prot_str(p_mem_info->cache_type),
p_mem_info->numa_node,
p_mem_info->dma32 ? " dma32," : "",
(unsigned long long)dma_addr);
end_addr = dma_addr +
((unsigned long)p_mem_info->num_pages << PAGE_SHIFT);
if (unlikely(p_mem_info->dma32 && (end_addr > 0x100000000ULL))) {
cl_error("allocation exceeds 32-bit addressing\n");
goto failed;
}
err = mods_post_alloc(client, p_mem_info->pages, phys_addr, p_mem_info);
failed:
LOG_EXT();
return err;
}
static u32 mods_get_max_order_needed(u32 num_pages)
{
u32 order = 0;
while (order < 10 && (1U<<(order+1)) <= num_pages)
++order;
return order;
}
static int mods_alloc_noncontig_sys_pages(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info)
{
int err;
u32 pages_left = p_mem_info->num_pages;
u32 num_chunks = 0;
LOG_ENT();
memset(p_mem_info->pages, 0,
p_mem_info->num_chunks * sizeof(p_mem_info->pages[0]));
while (pages_left > 0) {
u64 phys_addr = 0;
u64 dma_addr = 0;
u32 order = mods_get_max_order_needed(pages_left);
int is_wb = 1;
struct MODS_PHYS_CHUNK *chunk = &p_mem_info->pages[num_chunks];
/* Fail if memory fragmentation is very high */
if (unlikely(num_chunks >= p_mem_info->num_chunks)) {
cl_error("detected high memory fragmentation\n");
err = -ENOMEM;
goto failed;
}
for (;;) {
chunk->p_page = mods_alloc_pages(client,
p_mem_info,
order,
&is_wb);
if (chunk->p_page)
break;
if (order == 0)
break;
--order;
}
if (unlikely(!chunk->p_page)) {
cl_error("out of memory\n");
err = -ENOMEM;
goto failed;
}
if (!is_wb)
chunk->wc = 1;
pages_left -= 1U << order;
chunk->order = order;
phys_addr = page_to_phys(chunk->p_page);
if (unlikely(phys_addr == 0)) {
cl_error("phys addr lookup failed\n");
err = -ENOMEM;
goto failed;
}
dma_addr = MODS_PHYS_TO_DMA(phys_addr);
if (unlikely(dma_addr >= (1ULL << DMA_BITS))) {
cl_error("dma_addr 0x%llx exceeds supported range\n",
dma_addr);
err = -ENOMEM;
goto failed;
}
chunk->dma_addr = dma_addr;
cl_debug(DEBUG_MEM,
"alloc 0x%lx bytes [%u], 2^%u pages, %s, node %d,%s phys 0x%llx\n",
(unsigned long)p_mem_info->num_pages << PAGE_SHIFT,
(unsigned int)num_chunks,
chunk->order,
mods_get_prot_str(p_mem_info->cache_type),
p_mem_info->numa_node,
p_mem_info->dma32 ? " dma32," : "",
(unsigned long long)chunk->dma_addr);
++num_chunks;
err = mods_post_alloc(client, chunk, phys_addr, p_mem_info);
if (unlikely(err))
goto failed;
}
err = 0;
failed:
LOG_EXT();
return err;
}
static int mods_register_alloc(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info)
{
int err = mutex_lock_interruptible(&client->mtx);
if (unlikely(err))
return err;
list_add(&p_mem_info->list, &client->mem_alloc_list);
mutex_unlock(&client->mtx);
return OK;
}
static int validate_mem_handle(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info)
{
struct list_head *head = &client->mem_alloc_list;
struct list_head *iter;
list_for_each(iter, head) {
struct MODS_MEM_INFO *p_mem =
list_entry(iter, struct MODS_MEM_INFO, list);
if (p_mem == p_mem_info)
return true;
}
return false;
}
static int mods_unregister_and_free(struct mods_client *client,
struct MODS_MEM_INFO *p_del_mem)
{
struct MODS_MEM_INFO *p_mem_info;
struct list_head *head;
struct list_head *iter;
cl_debug(DEBUG_MEM_DETAILED, "free %p\n", p_del_mem);
mutex_lock(&client->mtx);
head = &client->mem_alloc_list;
list_for_each(iter, head) {
p_mem_info = list_entry(iter, struct MODS_MEM_INFO, list);
if (p_del_mem == p_mem_info) {
list_del(iter);
mutex_unlock(&client->mtx);
dma_unmap_all(client, p_mem_info, NULL);
save_non_wb_chunks(client, p_mem_info);
mods_free_pages(client, p_mem_info);
pci_dev_put(p_mem_info->dev);
kfree(p_mem_info);
atomic_dec(&client->num_allocs);
return OK;
}
}
mutex_unlock(&client->mtx);
cl_error("failed to unregister allocation %p\n", p_del_mem);
return -EINVAL;
}
int mods_unregister_all_alloc(struct mods_client *client)
{
int final_err = OK;
int err;
struct list_head *head = &client->mem_alloc_list;
struct list_head *iter;
struct list_head *tmp;
list_for_each_safe(iter, tmp, head) {
struct MODS_MEM_INFO *p_mem_info;
p_mem_info = list_entry(iter, struct MODS_MEM_INFO, list);
err = mods_unregister_and_free(client, p_mem_info);
if (likely(!final_err))
final_err = err;
}
err = release_free_chunks(client);
if (likely(!final_err))
final_err = err;
return final_err;
}
static int get_addr_range(struct mods_client *client,
struct MODS_GET_PHYSICAL_ADDRESS_3 *p,
struct mods_pci_dev_2 *pcidev)
{
struct MODS_MEM_INFO *p_mem_info;
struct MODS_DMA_MAP *p_dma_map = NULL;
u64 *out;
u32 num_out = 1;
u32 skip_pages;
u32 i;
int err = OK;
u32 page_offs;
LOG_ENT();
p_mem_info = (struct MODS_MEM_INFO *)(size_t)p->memory_handle;
if (unlikely(!p_mem_info)) {
cl_error("no allocation given\n");
LOG_EXT();
return -EINVAL;
}
if (unlikely(pcidev && (pcidev->bus > 0xFFU ||
pcidev->device > 0xFFU))) {
cl_error("dev %04x:%02x:%02x.%x not found\n",
pcidev->domain,
pcidev->bus,
pcidev->device,
pcidev->function);
LOG_EXT();
return -EINVAL;
}
out = &p->physical_address;
if (pcidev) {
if (mods_is_pci_dev(p_mem_info->dev, pcidev)) {
if (!p_mem_info->pages[0].mapped)
err = -EINVAL;
} else {
p_dma_map = find_dma_map(p_mem_info, pcidev);
if (!p_dma_map)
err = -EINVAL;
}
if (err) {
cl_error(
"allocation %p is not mapped to dev %04x:%02x:%02x.%x\n",
p_mem_info,
pcidev->domain,
pcidev->bus,
pcidev->device,
pcidev->function);
LOG_EXT();
return err;
}
}
page_offs = p->offset & (~PAGE_MASK);
skip_pages = p->offset >> PAGE_SHIFT;
for (i = 0; i < p_mem_info->num_chunks && num_out; i++) {
u32 num_pages;
u64 addr;
struct MODS_PHYS_CHUNK *chunk = &p_mem_info->pages[i];
num_pages = 1U << chunk->order;
if (num_pages <= skip_pages) {
skip_pages -= num_pages;
continue;
}
addr = pcidev ?
(p_dma_map ? p_dma_map->dev_addr[i] : chunk->dev_addr)
: chunk->dma_addr;
if (skip_pages) {
num_pages -= skip_pages;
addr += skip_pages << PAGE_SHIFT;
skip_pages = 0;
}
if (num_pages > num_out)
num_pages = num_out;
while (num_pages) {
*out = addr + page_offs;
++out;
--num_out;
addr += PAGE_SIZE;
--num_pages;
}
}
if (unlikely(num_out)) {
cl_error("invalid offset 0x%llx requested for allocation %p\n",
p->offset, p_mem_info);
err = -EINVAL;
}
LOG_EXT();
return err;
}
/* Returns an offset within an allocation deduced from physical address.
* If dma address doesn't belong to the allocation, returns non-zero.
*/
int mods_get_alloc_offset(struct MODS_MEM_INFO *p_mem_info,
u64 dma_addr,
u64 *ret_offs)
{
u32 i;
u64 offset = 0;
for (i = 0; i < p_mem_info->num_chunks; i++) {
struct MODS_PHYS_CHUNK *chunk = &p_mem_info->pages[i];
u64 addr = chunk->dma_addr;
u32 size = PAGE_SIZE << chunk->order;
if (dma_addr >= addr &&
dma_addr < addr + size) {
*ret_offs = dma_addr - addr + offset;
return 0;
}
offset += size;
}
/* The physical address doesn't belong to the allocation */
return -EINVAL;
}
struct MODS_MEM_INFO *mods_find_alloc(struct mods_client *client, u64 phys_addr)
{
struct list_head *plist_head = &client->mem_alloc_list;
struct list_head *plist_iter;
struct MODS_MEM_INFO *p_mem_info;
u64 offset;
list_for_each(plist_iter, plist_head) {
p_mem_info = list_entry(plist_iter,
struct MODS_MEM_INFO,
list);
if (!mods_get_alloc_offset(p_mem_info, phys_addr, &offset))
return p_mem_info;
}
/* The physical address doesn't belong to any allocation */
return NULL;
}
/* Estimate the initial number of chunks supported, assuming medium memory
* fragmentation.
*/
static u32 estimate_num_chunks(u32 num_pages)
{
u32 num_chunks = 0;
u32 bit_scan;
/* Count each contiguous block <=256KB */
for (bit_scan = num_pages; bit_scan && num_chunks < 6; bit_scan >>= 1)
++num_chunks;
/* Count remaining contiguous blocks >256KB */
num_chunks += bit_scan;
/* 4x slack for medium memory fragmentation */
num_chunks <<= 2;
/* No sense to allocate more chunks than pages */
if (num_chunks > num_pages)
num_chunks = num_pages;
return num_chunks;
}
/* For large non-contiguous allocations, we typically use significantly less
* chunks than originally estimated. This function reallocates the
* MODS_MEM_INFO struct so that it uses only as much memory as it needs.
*/
static struct MODS_MEM_INFO *optimize_chunks(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info)
{
u32 i;
u32 num_chunks;
u32 alloc_size = 0;
struct MODS_MEM_INFO *p_new_mem_info = NULL;
for (i = 0; i < p_mem_info->num_chunks; i++)
if (!p_mem_info->pages[i].p_page)
break;
num_chunks = i;
if (num_chunks < p_mem_info->num_chunks) {
alloc_size = sizeof(*p_mem_info) +
(num_chunks - 1) * sizeof(struct MODS_PHYS_CHUNK);
p_new_mem_info = kzalloc(alloc_size,
GFP_KERNEL | __GFP_NORETRY);
}
if (p_new_mem_info) {
memcpy(p_new_mem_info, p_mem_info, alloc_size);
p_new_mem_info->num_chunks = num_chunks;
INIT_LIST_HEAD(&p_new_mem_info->dma_map_list);
kfree(p_mem_info);
p_mem_info = p_new_mem_info;
}
return p_mem_info;
}
/************************
* ESCAPE CALL FUNCTONS *
************************/
int esc_mods_alloc_pages_2(struct mods_client *client,
struct MODS_ALLOC_PAGES_2 *p)
{
int err = -EINVAL;
struct MODS_MEM_INFO *p_mem_info = NULL;
u32 num_pages;
u32 alloc_size;
u32 num_chunks;
LOG_ENT();
p->memory_handle = 0;
cl_debug(DEBUG_MEM_DETAILED,
"alloc 0x%llx bytes flags=0x%x (%s %s%s%s%s%s) node=%d on dev %04x:%02x:%02x.%x\n",
(unsigned long long)p->num_bytes,
p->flags,
mods_get_prot_str(p->flags & MODS_ALLOC_CACHE_MASK),
(p->flags & MODS_ALLOC_CONTIGUOUS) ? "contiguous" :
"noncontiguous",
(p->flags & MODS_ALLOC_DMA32) ? " dma32" : "",
(p->flags & MODS_ALLOC_USE_NUMA) ? " usenuma" : "",
(p->flags & MODS_ALLOC_FORCE_NUMA) ? " forcenuma" : "",
(p->flags & MODS_ALLOC_MAP_DEV) ? " dmamap" : "",
p->numa_node,
p->pci_device.domain,
p->pci_device.bus,
p->pci_device.device,
p->pci_device.function);
if (unlikely(!p->num_bytes)) {
cl_error("zero bytes requested\n");
goto failed;
}
num_pages = (u32)((p->num_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT);
if (p->flags & MODS_ALLOC_CONTIGUOUS)
num_chunks = 1;
else
num_chunks = estimate_num_chunks(num_pages);
alloc_size = sizeof(*p_mem_info) +
(num_chunks - 1) * sizeof(struct MODS_PHYS_CHUNK);
if (unlikely(((u64)num_pages << PAGE_SHIFT) < p->num_bytes)) {
cl_error("invalid allocation size requested: 0x%llx\n",
(unsigned long long)p->num_bytes);
goto failed;
}
if (unlikely((p->flags & MODS_ALLOC_USE_NUMA) &&
(p->numa_node != MODS_ANY_NUMA_NODE) &&
((unsigned int)p->numa_node >=
(unsigned int)num_possible_nodes()))) {
cl_error("invalid NUMA node: %d\n", p->numa_node);
goto failed;
}
#ifdef CONFIG_PPC64
if (unlikely((p->flags & MODS_ALLOC_CACHE_MASK) != MODS_ALLOC_CACHED)) {
cl_error("unsupported cache attr %u (%s)\n",
p->flags & MODS_ALLOC_CACHE_MASK,
mods_get_prot_str(p->flags & MODS_ALLOC_CACHE_MASK));
err = -ENOMEM;
goto failed;
}
#endif
p_mem_info = kzalloc(alloc_size, GFP_KERNEL | __GFP_NORETRY);
if (unlikely(!p_mem_info)) {
cl_error("failed to allocate auxiliary 0x%x bytes\n",
alloc_size);
err = -ENOMEM;
goto failed;
}
atomic_inc(&client->num_allocs);
p_mem_info->num_chunks = num_chunks;
p_mem_info->num_pages = num_pages;
p_mem_info->cache_type = p->flags & MODS_ALLOC_CACHE_MASK;
p_mem_info->dma32 = (p->flags & MODS_ALLOC_DMA32) ? true : false;
p_mem_info->contig = (p->flags & MODS_ALLOC_CONTIGUOUS)
? true : false;
p_mem_info->force_numa = (p->flags & MODS_ALLOC_FORCE_NUMA)
? true : false;
#ifdef MODS_HASNT_NUMA_NO_NODE
p_mem_info->numa_node = numa_node_id();
#else
p_mem_info->numa_node = NUMA_NO_NODE;
#endif
p_mem_info->dev = NULL;
if ((p->flags & MODS_ALLOC_USE_NUMA) &&
p->numa_node != MODS_ANY_NUMA_NODE)
p_mem_info->numa_node = p->numa_node;
INIT_LIST_HEAD(&p_mem_info->dma_map_list);
#ifdef CONFIG_PCI
if (!(p->flags & MODS_ALLOC_USE_NUMA) ||
(p->flags & MODS_ALLOC_MAP_DEV)) {
struct pci_dev *dev = NULL;
err = mods_find_pci_dev(client, &p->pci_device, &dev);
if (unlikely(err)) {
cl_error("dev %04x:%02x:%02x.%x not found\n",
p->pci_device.domain,
p->pci_device.bus,
p->pci_device.device,
p->pci_device.function);
goto failed;
}
p_mem_info->dev = dev;
if (!(p->flags & MODS_ALLOC_USE_NUMA))
p_mem_info->numa_node = dev_to_node(&dev->dev);
#ifdef CONFIG_PPC64
if (!mods_is_nvlink_sysmem_trained(client, dev)) {
/* Until NvLink is trained, we must use memory
* on node 0.
*/
if (has_npu_dev(dev, 0))
p_mem_info->numa_node = 0;
}
#endif
cl_debug(DEBUG_MEM_DETAILED,
"affinity dev %04x:%02x:%02x.%x node %d\n",
p->pci_device.domain,
p->pci_device.bus,
p->pci_device.device,
p->pci_device.function,
p_mem_info->numa_node);
if (!(p->flags & MODS_ALLOC_MAP_DEV)) {
pci_dev_put(p_mem_info->dev);
p_mem_info->dev = NULL;
}
}
#endif
if (p->flags & MODS_ALLOC_CONTIGUOUS)
err = mods_alloc_contig_sys_pages(client, p_mem_info);
else {
err = mods_alloc_noncontig_sys_pages(client, p_mem_info);
if (likely(!err))
p_mem_info = optimize_chunks(client, p_mem_info);
}
if (unlikely(err)) {
cl_error("failed to alloc 0x%lx %s bytes, %s, node %d%s\n",
(unsigned long)p_mem_info->num_pages << PAGE_SHIFT,
(p->flags & MODS_ALLOC_CONTIGUOUS) ? "contiguous" :
"non-contiguous",
mods_get_prot_str(p_mem_info->cache_type),
p_mem_info->numa_node,
p_mem_info->dma32 ? ", dma32" : "");
goto failed;
}
err = mods_register_alloc(client, p_mem_info);
if (unlikely(err))
goto failed;
p->memory_handle = (u64)(size_t)p_mem_info;
cl_debug(DEBUG_MEM_DETAILED, "alloc %p\n", p_mem_info);
failed:
if (unlikely(err && p_mem_info)) {
mods_free_pages(client, p_mem_info);
pci_dev_put(p_mem_info->dev);
kfree(p_mem_info);
atomic_dec(&client->num_allocs);
}
LOG_EXT();
return err;
}
int esc_mods_device_alloc_pages_2(struct mods_client *client,
struct MODS_DEVICE_ALLOC_PAGES_2 *p)
{
int err;
u32 flags = 0;
struct MODS_ALLOC_PAGES_2 dev_alloc_pages = {0};
LOG_ENT();
if (p->contiguous)
flags |= MODS_ALLOC_CONTIGUOUS;
if (p->address_bits == 32)
flags |= MODS_ALLOC_DMA32;
if (p->attrib == MODS_MEMORY_UNCACHED)
flags |= MODS_ALLOC_UNCACHED;
else if (p->attrib == MODS_MEMORY_WRITECOMBINE)
flags |= MODS_ALLOC_WRITECOMBINE;
else if (unlikely(p->attrib != MODS_MEMORY_CACHED)) {
cl_error("invalid cache attrib: %u\n", p->attrib);
LOG_EXT();
return -ENOMEM;
}
if (p->pci_device.bus > 0xFFU || p->pci_device.device > 0xFFU)
flags |= MODS_ALLOC_USE_NUMA;
else
flags |= MODS_ALLOC_MAP_DEV | MODS_ALLOC_FORCE_NUMA;
dev_alloc_pages.num_bytes = p->num_bytes;
dev_alloc_pages.flags = flags;
dev_alloc_pages.numa_node = MODS_ANY_NUMA_NODE;
dev_alloc_pages.pci_device = p->pci_device;
err = esc_mods_alloc_pages_2(client, &dev_alloc_pages);
if (likely(!err))
p->memory_handle = dev_alloc_pages.memory_handle;
LOG_EXT();
return err;
}
int esc_mods_device_alloc_pages(struct mods_client *client,
struct MODS_DEVICE_ALLOC_PAGES *p)
{
int err;
u32 flags = 0;
struct MODS_ALLOC_PAGES_2 dev_alloc_pages = {0};
LOG_ENT();
if (p->contiguous)
flags |= MODS_ALLOC_CONTIGUOUS;
if (p->address_bits == 32)
flags |= MODS_ALLOC_DMA32;
if (p->attrib == MODS_MEMORY_UNCACHED)
flags |= MODS_ALLOC_UNCACHED;
else if (p->attrib == MODS_MEMORY_WRITECOMBINE)
flags |= MODS_ALLOC_WRITECOMBINE;
else if (unlikely(p->attrib != MODS_MEMORY_CACHED)) {
cl_error("invalid cache attrib: %u\n", p->attrib);
LOG_EXT();
return -ENOMEM;
}
if (p->pci_device.bus > 0xFFU || p->pci_device.device > 0xFFU)
flags |= MODS_ALLOC_USE_NUMA;
else
flags |= MODS_ALLOC_MAP_DEV | MODS_ALLOC_FORCE_NUMA;
dev_alloc_pages.num_bytes = p->num_bytes;
dev_alloc_pages.flags = flags;
dev_alloc_pages.numa_node = MODS_ANY_NUMA_NODE;
dev_alloc_pages.pci_device.domain = 0;
dev_alloc_pages.pci_device.bus = p->pci_device.bus;
dev_alloc_pages.pci_device.device = p->pci_device.device;
dev_alloc_pages.pci_device.function = p->pci_device.function;
err = esc_mods_alloc_pages_2(client, &dev_alloc_pages);
if (likely(!err))
p->memory_handle = dev_alloc_pages.memory_handle;
LOG_EXT();
return err;
}
int esc_mods_alloc_pages(struct mods_client *client, struct MODS_ALLOC_PAGES *p)
{
int err;
u32 flags = MODS_ALLOC_USE_NUMA;
struct MODS_ALLOC_PAGES_2 dev_alloc_pages = {0};
LOG_ENT();
if (p->contiguous)
flags |= MODS_ALLOC_CONTIGUOUS;
if (p->address_bits == 32)
flags |= MODS_ALLOC_DMA32;
if (p->attrib == MODS_MEMORY_UNCACHED)
flags |= MODS_ALLOC_UNCACHED;
else if (p->attrib == MODS_MEMORY_WRITECOMBINE)
flags |= MODS_ALLOC_WRITECOMBINE;
else if (unlikely(p->attrib != MODS_MEMORY_CACHED)) {
cl_error("invalid cache attrib: %u\n", p->attrib);
LOG_EXT();
return -ENOMEM;
}
dev_alloc_pages.num_bytes = p->num_bytes;
dev_alloc_pages.flags = flags;
dev_alloc_pages.numa_node = MODS_ANY_NUMA_NODE;
dev_alloc_pages.pci_device.domain = 0xFFFFU;
dev_alloc_pages.pci_device.bus = 0xFFFFU;
dev_alloc_pages.pci_device.device = 0xFFFFU;
dev_alloc_pages.pci_device.function = 0xFFFFU;
err = esc_mods_alloc_pages_2(client, &dev_alloc_pages);
if (likely(!err))
p->memory_handle = dev_alloc_pages.memory_handle;
LOG_EXT();
return err;
}
int esc_mods_free_pages(struct mods_client *client, struct MODS_FREE_PAGES *p)
{
int err;
LOG_ENT();
err = mods_unregister_and_free(client,
(struct MODS_MEM_INFO *)(size_t)p->memory_handle);
LOG_EXT();
return err;
}
int esc_mods_merge_pages(struct mods_client *client,
struct MODS_MERGE_PAGES *p)
{
int err = OK;
u32 num_chunks = 0;
u32 alloc_size = 0;
unsigned int i;
struct MODS_MEM_INFO *p_mem_info;
LOG_ENT();
if (unlikely(p->num_in_handles < 2 ||
p->num_in_handles > MODS_MAX_MERGE_HANDLES)) {
cl_error("invalid number of input handles: %u\n",
p->num_in_handles);
LOG_EXT();
return -EINVAL;
}
err = mutex_lock_interruptible(&client->mtx);
if (unlikely(err)) {
LOG_EXT();
return err;
}
{
const char *err_msg = NULL;
p_mem_info = (struct MODS_MEM_INFO *)(size_t)
p->in_memory_handles[0];
if (!validate_mem_handle(client, p_mem_info)) {
cl_error("handle 0: invalid handle %p\n", p_mem_info);
err = -EINVAL;
goto failed;
}
if (unlikely(!list_empty(&p_mem_info->dma_map_list))) {
cl_error("handle 0: found dma mappings\n");
err = -EINVAL;
goto failed;
}
num_chunks = p_mem_info->num_chunks;
for (i = 1; i < p->num_in_handles; i++) {
unsigned int j;
struct MODS_MEM_INFO *p_other =
(struct MODS_MEM_INFO *)(size_t)
p->in_memory_handles[i];
if (!validate_mem_handle(client, p_other)) {
cl_error("handle %u: invalid handle %p\n",
i, p);
err = -EINVAL;
goto failed;
}
for (j = 0; j < i; j++) {
if (unlikely(p->in_memory_handles[i] ==
p->in_memory_handles[j])) {
err_msg = "duplicate handle";
break;
}
}
if (err_msg)
break;
if (unlikely(p_mem_info->cache_type !=
p_other->cache_type)) {
err_msg = "cache attr mismatch";
break;
}
if (unlikely(p_mem_info->force_numa &&
p_mem_info->numa_node != p_other->numa_node)) {
err_msg = "numa node mismatch";
break;
}
if (unlikely(p_mem_info->dma32 != p_other->dma32)) {
err_msg = "dma32 mismatch";
break;
}
if (p_mem_info->dev) {
if (unlikely(p_mem_info->dev !=
p_other->dev)) {
err_msg = "device mismatch";
break;
}
if (unlikely(p_mem_info->pages[0].mapped !=
p_other->pages[0].mapped)) {
err_msg = "dma mapping mismatch";
break;
}
}
if (unlikely(!list_empty(&p_other->dma_map_list))) {
err_msg = "found dma mappings";
break;
}
num_chunks += p_other->num_chunks;
}
if (unlikely(err_msg)) {
cl_error("merging handle %u: %s\n", i, err_msg);
err = -EINVAL;
goto failed;
}
}
alloc_size = sizeof(*p_mem_info) +
(num_chunks - 1) * sizeof(struct MODS_PHYS_CHUNK);
p_mem_info = kzalloc(alloc_size, GFP_KERNEL | __GFP_NORETRY);
if (unlikely(!p_mem_info)) {
err = -ENOMEM;
goto failed;
}
atomic_inc(&client->num_allocs);
for (i = 0; i < p->num_in_handles; i++) {
struct MODS_MEM_INFO *p_other = (struct MODS_MEM_INFO *)(size_t)
p->in_memory_handles[i];
u32 other_chunks = p_other->num_chunks;
u32 other_size = sizeof(*p_other) +
(other_chunks - 1) * sizeof(struct MODS_PHYS_CHUNK);
list_del(&p_other->list);
if (i == 0) {
memcpy(p_mem_info, p_other, other_size);
p_mem_info->contig = false;
INIT_LIST_HEAD(&p_mem_info->dma_map_list);
list_add(&p_mem_info->list, &client->mem_alloc_list);
} else {
memcpy(&p_mem_info->pages[p_mem_info->num_chunks],
&p_other->pages[0],
other_chunks * sizeof(struct MODS_PHYS_CHUNK));
p_mem_info->num_chunks += other_chunks;
p_mem_info->num_pages += p_other->num_pages;
}
kfree(p_other);
atomic_dec(&client->num_allocs);
}
WARN_ON(num_chunks != p_mem_info->num_chunks);
p->memory_handle = (u64)(size_t)p_mem_info;
failed:
mutex_unlock(&client->mtx);
LOG_EXT();
return err;
}
int esc_mods_set_mem_type(struct mods_client *client,
struct MODS_MEMORY_TYPE *p)
{
struct MODS_MEM_INFO *p_mem_info;
u8 type = MODS_ALLOC_CACHED;
int err;
LOG_ENT();
switch (p->type) {
case MODS_MEMORY_CACHED:
break;
case MODS_MEMORY_UNCACHED:
type = MODS_ALLOC_UNCACHED;
break;
case MODS_MEMORY_WRITECOMBINE:
type = MODS_ALLOC_WRITECOMBINE;
break;
default:
cl_error("unsupported memory type: %u\n", p->type);
LOG_EXT();
return -EINVAL;
}
err = mutex_lock_interruptible(&client->mtx);
if (unlikely(err)) {
LOG_EXT();
return err;
}
p_mem_info = mods_find_alloc(client, p->physical_address);
if (p_mem_info) {
mutex_unlock(&client->mtx);
cl_error("cannot set mem type on phys addr 0x%llx\n",
p->physical_address);
LOG_EXT();
return -EINVAL;
}
client->mem_type.dma_addr = p->physical_address;
client->mem_type.size = p->size;
client->mem_type.type = type;
mutex_unlock(&client->mtx);
LOG_EXT();
return OK;
}
int esc_mods_get_phys_addr(struct mods_client *client,
struct MODS_GET_PHYSICAL_ADDRESS *p)
{
struct MODS_GET_PHYSICAL_ADDRESS_3 range;
int err;
LOG_ENT();
range.memory_handle = p->memory_handle;
range.offset = p->offset;
memset(&range.pci_device, 0, sizeof(range.pci_device));
err = get_addr_range(client, &range, NULL);
if (!err)
p->physical_address = range.physical_address;
LOG_EXT();
return err;
}
int esc_mods_get_phys_addr_2(struct mods_client *client,
struct MODS_GET_PHYSICAL_ADDRESS_3 *p)
{
struct MODS_GET_PHYSICAL_ADDRESS_3 range;
int err;
LOG_ENT();
range.memory_handle = p->memory_handle;
range.offset = p->offset;
memset(&range.pci_device, 0, sizeof(range.pci_device));
err = get_addr_range(client, &range, NULL);
if (!err)
p->physical_address = range.physical_address;
LOG_EXT();
return err;
}
int esc_mods_get_mapped_phys_addr(struct mods_client *client,
struct MODS_GET_PHYSICAL_ADDRESS *p)
{
struct MODS_GET_PHYSICAL_ADDRESS_3 range;
struct MODS_MEM_INFO *p_mem_info;
int err;
LOG_ENT();
range.memory_handle = p->memory_handle;
range.offset = p->offset;
p_mem_info = (struct MODS_MEM_INFO *)(size_t)p->memory_handle;
if (p_mem_info->dev) {
range.pci_device.domain =
pci_domain_nr(p_mem_info->dev->bus);
range.pci_device.bus =
p_mem_info->dev->bus->number;
range.pci_device.device =
PCI_SLOT(p_mem_info->dev->devfn);
range.pci_device.function =
PCI_FUNC(p_mem_info->dev->devfn);
err = get_addr_range(client, &range, &range.pci_device);
} else {
memset(&range.pci_device, 0, sizeof(range.pci_device));
err = get_addr_range(client, &range, NULL);
}
if (!err)
p->physical_address = range.physical_address;
LOG_EXT();
return err;
}
int esc_mods_get_mapped_phys_addr_2(struct mods_client *client,
struct MODS_GET_PHYSICAL_ADDRESS_2 *p)
{
struct MODS_GET_PHYSICAL_ADDRESS_3 range;
int err;
LOG_ENT();
range.memory_handle = p->memory_handle;
range.offset = p->offset;
range.pci_device = p->pci_device;
err = get_addr_range(client, &range, &range.pci_device);
if (!err)
p->physical_address = range.physical_address;
LOG_EXT();
return err;
}
int esc_mods_get_mapped_phys_addr_3(struct mods_client *client,
struct MODS_GET_PHYSICAL_ADDRESS_3 *p)
{
struct MODS_GET_PHYSICAL_ADDRESS_3 range;
int err;
LOG_ENT();
range.memory_handle = p->memory_handle;
range.offset = p->offset;
range.pci_device = p->pci_device;
err = get_addr_range(client, &range, &range.pci_device);
if (!err)
p->physical_address = range.physical_address;
LOG_EXT();
return err;
}
int esc_mods_virtual_to_phys(struct mods_client *client,
struct MODS_VIRTUAL_TO_PHYSICAL *p)
{
struct MODS_GET_PHYSICAL_ADDRESS get_phys_addr;
struct list_head *head;
struct list_head *iter;
int err;
LOG_ENT();
err = mutex_lock_interruptible(&client->mtx);
if (unlikely(err)) {
LOG_EXT();
return err;
}
head = &client->mem_map_list;
list_for_each(iter, head) {
struct SYS_MAP_MEMORY *p_map_mem;
u64 begin, end;
u64 phys_offs;
p_map_mem = list_entry(iter, struct SYS_MAP_MEMORY, list);
begin = p_map_mem->virtual_addr;
end = p_map_mem->virtual_addr + p_map_mem->mapping_length;
if (p->virtual_address >= begin && p->virtual_address < end) {
u64 virt_offs = p->virtual_address - begin;
/* device memory mapping */
if (!p_map_mem->p_mem_info) {
p->physical_address = p_map_mem->dma_addr
+ virt_offs;
mutex_unlock(&client->mtx);
cl_debug(DEBUG_MEM_DETAILED,
"get phys: map %p virt 0x%llx -> 0x%llx\n",
p_map_mem,
p->virtual_address,
p->physical_address);
LOG_EXT();
return OK;
}
if (mods_get_alloc_offset(p_map_mem->p_mem_info,
p_map_mem->dma_addr,
&phys_offs) != OK)
break;
get_phys_addr.memory_handle =
(u64)(size_t)p_map_mem->p_mem_info;
get_phys_addr.offset = virt_offs + phys_offs;
mutex_unlock(&client->mtx);
err = esc_mods_get_phys_addr(client, &get_phys_addr);
if (err) {
LOG_EXT();
return err;
}
p->physical_address = get_phys_addr.physical_address;
cl_debug(DEBUG_MEM_DETAILED,
"get phys: map %p virt 0x%llx -> 0x%llx\n",
p_map_mem,
p->virtual_address,
p->physical_address);
LOG_EXT();
return OK;
}
}
mutex_unlock(&client->mtx);
cl_error("invalid virtual address 0x%llx\n", p->virtual_address);
LOG_EXT();
return -EINVAL;
}
int esc_mods_phys_to_virtual(struct mods_client *client,
struct MODS_PHYSICAL_TO_VIRTUAL *p)
{
struct SYS_MAP_MEMORY *p_map_mem;
struct list_head *head;
struct list_head *iter;
u64 offset;
u64 map_offset;
int err;
LOG_ENT();
err = mutex_lock_interruptible(&client->mtx);
if (unlikely(err)) {
LOG_EXT();
return err;
}
head = &client->mem_map_list;
list_for_each(iter, head) {
p_map_mem = list_entry(iter, struct SYS_MAP_MEMORY, list);
/* device memory mapping */
if (!p_map_mem->p_mem_info) {
u64 end = p_map_mem->dma_addr
+ p_map_mem->mapping_length;
if (p->physical_address < p_map_mem->dma_addr ||
p->physical_address >= end)
continue;
offset = p->physical_address
- p_map_mem->dma_addr;
p->virtual_address = p_map_mem->virtual_addr
+ offset;
mutex_unlock(&client->mtx);
cl_debug(DEBUG_MEM_DETAILED,
"get virt: map %p phys 0x%llx -> 0x%llx\n",
p_map_mem,
p->physical_address,
p->virtual_address);
LOG_EXT();
return OK;
}
/* offset from the beginning of the allocation */
if (mods_get_alloc_offset(p_map_mem->p_mem_info,
p->physical_address,
&offset))
continue;
/* offset from the beginning of the mapping */
if (mods_get_alloc_offset(p_map_mem->p_mem_info,
p_map_mem->dma_addr,
&map_offset))
continue;
if ((offset >= map_offset) &&
(offset < map_offset + p_map_mem->mapping_length)) {
p->virtual_address = p_map_mem->virtual_addr
+ offset - map_offset;
mutex_unlock(&client->mtx);
cl_debug(DEBUG_MEM_DETAILED,
"get virt: map %p phys 0x%llx -> 0x%llx\n",
p_map_mem,
p->physical_address,
p->virtual_address);
LOG_EXT();
return OK;
}
}
mutex_unlock(&client->mtx);
cl_error("phys addr 0x%llx is not mapped\n", p->physical_address);
LOG_EXT();
return -EINVAL;
}
#if defined(CONFIG_ARM)
int esc_mods_memory_barrier(struct mods_client *client)
{
/* Full memory barrier on ARMv7 */
wmb();
return OK;
}
#endif
#ifdef CONFIG_PCI
int esc_mods_dma_map_memory(struct mods_client *client,
struct MODS_DMA_MAP_MEMORY *p)
{
struct MODS_MEM_INFO *p_mem_info;
struct MODS_DMA_MAP *p_dma_map;
struct pci_dev *dev = NULL;
int err = -EINVAL;
LOG_ENT();
p_mem_info = (struct MODS_MEM_INFO *)(size_t)p->memory_handle;
if (unlikely(!p_mem_info)) {
cl_error("no allocation given\n");
LOG_EXT();
return -EINVAL;
}
if (mods_is_pci_dev(p_mem_info->dev, &p->pci_device)) {
err = mods_create_default_dma_map(client, p_mem_info);
LOG_EXT();
return err;
}
p_dma_map = find_dma_map(p_mem_info, &p->pci_device);
if (p_dma_map) {
cl_debug(DEBUG_MEM_DETAILED,
"memory %p already mapped to dev %04x:%02x:%02x.%x\n",
p_mem_info,
p->pci_device.domain,
p->pci_device.bus,
p->pci_device.device,
p->pci_device.function);
LOG_EXT();
return 0;
}
err = mods_find_pci_dev(client, &p->pci_device, &dev);
if (unlikely(err)) {
if (err == -ENODEV)
cl_error("dev %04x:%02x:%02x.%x not found\n",
p->pci_device.domain,
p->pci_device.bus,
p->pci_device.device,
p->pci_device.function);
LOG_EXT();
return err;
}
err = mods_create_dma_map(client, p_mem_info, dev);
pci_dev_put(dev);
LOG_EXT();
return err;
}
int esc_mods_dma_unmap_memory(struct mods_client *client,
struct MODS_DMA_MAP_MEMORY *p)
{
struct MODS_MEM_INFO *p_mem_info;
struct pci_dev *dev = NULL;
int err = -EINVAL;
LOG_ENT();
p_mem_info = (struct MODS_MEM_INFO *)(size_t)p->memory_handle;
if (unlikely(!p_mem_info)) {
cl_error("no allocation given\n");
LOG_EXT();
return -EINVAL;
}
err = mods_find_pci_dev(client, &p->pci_device, &dev);
if (unlikely(err)) {
if (err == -ENODEV)
cl_error("dev %04x:%02x:%02x.%x not found\n",
p->pci_device.domain,
p->pci_device.bus,
p->pci_device.device,
p->pci_device.function);
} else
err = dma_unmap_all(client, p_mem_info, dev);
pci_dev_put(dev);
LOG_EXT();
return err;
}
#endif
#ifdef MODS_HAS_TEGRA
/* map dma buffer by iommu */
int esc_mods_iommu_dma_map_memory(struct mods_client *client,
struct MODS_IOMMU_DMA_MAP_MEMORY *p)
{
struct sg_table *sgt;
struct scatterlist *sg;
u64 iova;
int smmudev_idx;
struct MODS_MEM_INFO *p_mem_info;
char *dev_name = p->dev_name;
struct mods_smmu_dev *smmu_pdev = NULL;
u32 num_chunks = 0;
int ents, i, err = 0;
size_t iova_offset = 0;
LOG_ENT();
if (!(p->flags & MODS_IOMMU_MAP_CONTIGUOUS)) {
cl_error("contiguous flag not set\n");
err = -EINVAL;
goto failed;
}
p_mem_info = (struct MODS_MEM_INFO *)(size_t)p->memory_handle;
if (p_mem_info->iommu_mapped) {
cl_error("smmu is already mapped\n");
err = -EINVAL;
goto failed;
}
smmudev_idx = get_mods_smmu_device_index(dev_name);
if (smmudev_idx >= 0)
smmu_pdev = get_mods_smmu_device(smmudev_idx);
if (!smmu_pdev || smmudev_idx < 0) {
cl_error("smmu device %s is not found\n", dev_name);
err = -ENODEV;
goto failed;
}
/* do smmu mapping */
num_chunks = p_mem_info->num_chunks;
cl_debug(DEBUG_MEM_DETAILED,
"smmu_map_sg: dev_name=%s, pages=%u, chunks=%u\n",
dev_name,
p_mem_info->num_pages,
num_chunks);
sgt = vzalloc(sizeof(*sgt));
if (!sgt) {
err = -ENOMEM;
goto failed;
}
err = sg_alloc_table(sgt, num_chunks, GFP_KERNEL);
if (err) {
cl_error("failed to allocate sg table, err=%d\n",
err);
kvfree(sgt);
goto failed;
}
sg = sgt->sgl;
for (i = 0; i < num_chunks; i++) {
u32 size = PAGE_SIZE << p_mem_info->pages[i].order;
sg_set_page(sg,
p_mem_info->pages[i].p_page,
size,
0);
sg = sg_next(sg);
}
ents = dma_map_sg_attrs(smmu_pdev->dev, sgt->sgl, sgt->nents,
DMA_BIDIRECTIONAL, 0);
if (ents <= 0) {
cl_error("failed to map sg attrs. err=%d\n", ents);
sg_free_table(sgt);
kvfree(sgt);
err = -ENOMEM;
goto failed;
}
p_mem_info->smmudev_idx = smmudev_idx;
p_mem_info->iommu_mapped = 1;
iova = sg_dma_address(sgt->sgl);
p_mem_info->pages[0].dev_addr = iova;
p_mem_info->sgt = sgt;
/* Check if IOVAs are contiguous */
iova_offset = 0;
for_each_sg(sgt->sgl, sg, sgt->nents, i) {
if (sg_dma_address(sg) != (~(dma_addr_t)0) &&
sg_dma_address(sg) != (iova + iova_offset)) {
cl_error("sg not contiguous:dma 0x%llx, iova 0x%llx\n",
sg_dma_address(sg),
(u64)(iova + iova_offset));
err = -EINVAL;
break;
}
iova_offset += sg->length;
}
if (err) {
dma_unmap_sg_attrs(smmu_pdev->dev, sgt->sgl, sgt->nents,
DMA_BIDIRECTIONAL, 0);
sg_free_table(sgt);
kvfree(sgt);
p_mem_info->sgt = NULL;
p_mem_info->smmudev_idx = 0;
p_mem_info->iommu_mapped = 0;
goto failed;
}
p->physical_address = iova;
cl_debug(DEBUG_MEM_DETAILED,
"phyaddr = 0x%llx, smmu iova = 0x%llx, ents=%d\n",
(u64)p_mem_info->pages[0].dma_addr, iova, ents);
failed:
LOG_EXT();
return err;
}
/* unmap dma buffer by iommu */
int esc_mods_iommu_dma_unmap_memory(struct mods_client *client,
struct MODS_IOMMU_DMA_MAP_MEMORY *p)
{
struct MODS_MEM_INFO *p_mem_info;
int err;
LOG_ENT();
p_mem_info = (struct MODS_MEM_INFO *)(size_t)p->memory_handle;
err = mods_smmu_unmap_memory(client, p_mem_info);
LOG_EXT();
return err;
}
static int mods_smmu_unmap_memory(struct mods_client *client,
struct MODS_MEM_INFO *p_mem_info)
{
struct sg_table *sgt;
u32 smmudev_idx;
int err = 0;
struct mods_smmu_dev *smmu_pdev = NULL;
LOG_ENT();
if (unlikely(!p_mem_info)) {
cl_error("%s nullptr\n", __func__);
err = -EINVAL;
goto failed;
}
if (p_mem_info->sgt == NULL || !p_mem_info->iommu_mapped) {
cl_error("smmu buffer is not mapped, handle=0x%llx\n",
(u64)p_mem_info);
err = -EINVAL;
goto failed;
}
smmudev_idx = p_mem_info->smmudev_idx;
smmu_pdev = get_mods_smmu_device(smmudev_idx);
if (!smmu_pdev) {
cl_error("smmu device on index %u is not found\n",
smmudev_idx);
err = -ENODEV;
goto failed;
}
sgt = p_mem_info->sgt;
dma_unmap_sg_attrs(smmu_pdev->dev, sgt->sgl, sgt->nents,
DMA_BIDIRECTIONAL, 0);
cl_debug(DEBUG_MEM_DETAILED,
"smmu: dma_unmap_sg_attrs: %s, iova=0x%llx, pages=%d\n",
smmu_pdev->dev_name,
p_mem_info->pages[0].dev_addr,
p_mem_info->num_pages);
sg_free_table(sgt);
kvfree(sgt);
p_mem_info->sgt = NULL;
p_mem_info->smmudev_idx = 0;
p_mem_info->iommu_mapped = 0;
failed:
LOG_EXT();
return err;
}
#endif
#ifdef CONFIG_ARM64
static void clear_contiguous_cache(struct mods_client *client,
u64 virt_start,
u64 phys_start,
u32 size)
{
#ifdef MODS_HAS_TEGRA
__flush_dcache_area((void *)(size_t)(virt_start), size);
#else
/* __flush_dcache_area is not exported in upstream kernels */
u64 end = virt_start + size;
u64 cur;
u32 d_line_shift = 4; /* Fallback for kernel 5.9 or older */
u64 d_size;
#ifdef MODS_HAS_ARM64_READ_FTR_REG
{
const u64 ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
d_line_shift = cpuid_feature_extract_unsigned_field(ctr_el0,
CTR_DMINLINE_SHIFT);
}
#endif
d_size = 4 << d_line_shift;
cur = virt_start & ~(d_size - 1);
do {
asm volatile("dc civac, %0" : : "r" (cur) : "memory");
} while (cur += d_size, cur < end);
#endif
cl_debug(DEBUG_MEM_DETAILED,
"clear cache virt 0x%llx phys 0x%llx size 0x%x\n",
virt_start, phys_start, size);
}
static void clear_entry_cache_mappings(struct mods_client *client,
struct SYS_MAP_MEMORY *p_map_mem,
u64 virt_offs,
u64 virt_offs_end)
{
struct MODS_MEM_INFO *p_mem_info = p_map_mem->p_mem_info;
u64 cur_vo = p_map_mem->virtual_addr;
unsigned int i;
if (!p_mem_info)
return;
if (p_mem_info->cache_type != MODS_ALLOC_CACHED)
return;
for (i = 0; i < p_mem_info->num_chunks; i++) {
struct MODS_PHYS_CHUNK *chunk = &p_mem_info->pages[i];
u32 chunk_offs = 0;
u32 chunk_offs_end = PAGE_SIZE << chunk->order;
u64 cur_vo_end = cur_vo + chunk_offs_end;
if (virt_offs_end <= cur_vo)
break;
if (virt_offs >= cur_vo_end) {
cur_vo = cur_vo_end;
continue;
}
if (cur_vo < virt_offs)
chunk_offs = (u32)(virt_offs - cur_vo);
if (virt_offs_end < cur_vo_end)
chunk_offs_end -= (u32)(cur_vo_end - virt_offs_end);
cl_debug(DEBUG_MEM_DETAILED,
"clear cache %p [%u]\n",
p_mem_info,
i);
while (chunk_offs < chunk_offs_end) {
u32 i_page = chunk_offs >> PAGE_SHIFT;
u32 page_offs = chunk_offs - (i_page << PAGE_SHIFT);
u64 page_va =
(u64)(size_t)kmap(chunk->p_page + i_page);
u64 clear_va = page_va + page_offs;
u64 clear_pa = MODS_DMA_TO_PHYS(chunk->dma_addr)
+ chunk_offs;
u32 clear_size = PAGE_SIZE - page_offs;
u64 remaining = chunk_offs_end - chunk_offs;
if (likely(page_va)) {
if ((u64)clear_size > remaining)
clear_size = (u32)remaining;
cl_debug(DEBUG_MEM_DETAILED,
"clear page %u, chunk offs 0x%x, page va 0x%llx\n",
i_page,
chunk_offs,
page_va);
clear_contiguous_cache(client,
clear_va,
clear_pa,
clear_size);
kunmap((void *)(size_t)page_va);
} else
cl_error("kmap failed\n");
chunk_offs += clear_size;
}
cur_vo = cur_vo_end;
}
}
int esc_mods_flush_cpu_cache_range(struct mods_client *client,
struct MODS_FLUSH_CPU_CACHE_RANGE *p)
{
struct list_head *head;
struct list_head *iter;
int err;
LOG_ENT();
if (irqs_disabled() || in_interrupt() ||
p->virt_addr_start > p->virt_addr_end) {
cl_debug(DEBUG_MEM_DETAILED, "cannot flush cache\n");
LOG_EXT();
return -EINVAL;
}
if (p->flags == MODS_INVALIDATE_CPU_CACHE) {
cl_debug(DEBUG_MEM_DETAILED, "cannot invalidate cache\n");
LOG_EXT();
return -EINVAL;
}
err = mutex_lock_interruptible(&client->mtx);
if (unlikely(err)) {
LOG_EXT();
return err;
}
head = &client->mem_map_list;
list_for_each(iter, head) {
struct SYS_MAP_MEMORY *p_map_mem
= list_entry(iter, struct SYS_MAP_MEMORY, list);
u64 mapped_va = p_map_mem->virtual_addr;
/* Note: mapping end points to the first address of next range*/
u64 mapping_end = mapped_va + p_map_mem->mapping_length;
int start_on_page = p->virt_addr_start >= mapped_va
&& p->virt_addr_start < mapping_end;
int start_before_page = p->virt_addr_start < mapped_va;
int end_on_page = p->virt_addr_end >= mapped_va
&& p->virt_addr_end < mapping_end;
int end_after_page = p->virt_addr_end >= mapping_end;
u64 virt_start = p->virt_addr_start;
/* Kernel expects end to point to the first address of next
* range
*/
u64 virt_end = p->virt_addr_end + 1;
if ((start_on_page || start_before_page)
&& (end_on_page || end_after_page)) {
if (!start_on_page)
virt_start = p_map_mem->virtual_addr;
if (!end_on_page)
virt_end = mapping_end;
clear_entry_cache_mappings(client,
p_map_mem,
virt_start,
virt_end);
}
}
mutex_unlock(&client->mtx);
LOG_EXT();
return OK;
}
#endif
static int mods_post_alloc(struct mods_client *client,
struct MODS_PHYS_CHUNK *chunk,
u64 phys_addr,
struct MODS_MEM_INFO *p_mem_info)
{
int err = 0;
if ((p_mem_info->cache_type != MODS_ALLOC_CACHED) && !chunk->wc) {
u32 num_pages = 1U << chunk->order;
u32 i;
for (i = 0; i < num_pages; i++) {
void *ptr;
ptr = kmap(chunk->p_page + i);
if (unlikely(!ptr)) {
cl_error("kmap failed\n");
return -ENOMEM;
}
#if defined(MODS_HAS_TEGRA) && !defined(CONFIG_CPA)
clear_contiguous_cache(client,
(u64)(size_t)ptr,
phys_addr + (i << PAGE_SHIFT),
PAGE_SIZE);
#else
if (p_mem_info->cache_type == MODS_ALLOC_WRITECOMBINE)
err = MODS_SET_MEMORY_WC((unsigned long)ptr, 1);
else
err = MODS_SET_MEMORY_UC((unsigned long)ptr, 1);
#endif
kunmap(ptr);
if (unlikely(err)) {
cl_error("set cache type failed\n");
return err;
}
/* Set this flag early, so that when an error occurs,
* mods_free_pages() will restore cache attributes
* for all pages. It's OK to restore cache attributes
* even for chunks where we haven't change them.
*/
chunk->wc = 1;
}
}
#ifdef CONFIG_PCI
if (p_mem_info->dev) {
struct pci_dev *dev = p_mem_info->dev;
/* On systems with SWIOTLB active, disable default DMA mapping
* because we don't support scatter-gather lists.
*/
#if defined(CONFIG_SWIOTLB) && defined(MODS_HAS_DMA_OPS)
const struct dma_map_ops *ops = get_dma_ops(&dev->dev);
if (ops->map_sg == swiotlb_map_sg_attrs)
return 0;
#endif
err = mods_dma_map_default_page(client, chunk, dev);
}
#endif
return err;
}