Open source GPL/LGPL release

This commit is contained in:
svcmobrel-release
2022-07-21 16:03:29 -07:00
commit f338182221
2260 changed files with 576813 additions and 0 deletions

View File

@@ -0,0 +1,514 @@
/*
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/bitops.h>
#include <nvgpu/allocator.h>
#include <nvgpu/kmem.h>
#include <nvgpu/bug.h>
#include <nvgpu/barrier.h>
#include "bitmap_allocator_priv.h"
static u64 nvgpu_bitmap_alloc_length(struct nvgpu_allocator *a)
{
struct nvgpu_bitmap_allocator *ba = a->priv;
return ba->length;
}
static u64 nvgpu_bitmap_alloc_base(struct nvgpu_allocator *a)
{
struct nvgpu_bitmap_allocator *ba = a->priv;
return ba->base;
}
static bool nvgpu_bitmap_alloc_inited(struct nvgpu_allocator *a)
{
struct nvgpu_bitmap_allocator *ba = a->priv;
bool inited = ba->inited;
nvgpu_smp_rmb();
return inited;
}
static u64 nvgpu_bitmap_alloc_end(struct nvgpu_allocator *a)
{
struct nvgpu_bitmap_allocator *ba = a->priv;
return nvgpu_safe_add_u64(ba->base, ba->length);
}
/*
* @page_size is ignored.
*/
static u64 nvgpu_bitmap_balloc_fixed(struct nvgpu_allocator *na,
u64 base, u64 len, u32 page_size)
{
struct nvgpu_bitmap_allocator *a = bitmap_allocator(na);
u64 blks, offs, ret;
/* Compute the bit offset and make sure it's aligned to a block. */
offs = base >> a->blk_shift;
if (nvgpu_safe_mult_u64(offs, a->blk_size) != base) {
return 0;
}
offs = nvgpu_safe_sub_u64(offs, a->bit_offs);
blks = len >> a->blk_shift;
if (nvgpu_safe_mult_u64(blks, a->blk_size) != len) {
blks++;
}
nvgpu_assert(blks <= U32_MAX);
alloc_lock(na);
/* Check if the space requested is already occupied. */
ret = bitmap_find_next_zero_area(a->bitmap, a->num_bits, offs,
(u32)blks, 0UL);
if (ret != offs) {
goto fail;
}
nvgpu_assert(blks <= U32_MAX);
nvgpu_bitmap_set(a->bitmap, (u32)offs, U32(blks));
a->bytes_alloced = nvgpu_safe_add_u64(a->bytes_alloced,
nvgpu_safe_mult_u64(blks, a->blk_size));
NVGPU_COV_WHITELIST(false_positive, NVGPU_MISRA(Rule, 14_3), "Bug 2615925")
nvgpu_assert(a->nr_fixed_allocs < U64_MAX);
a->nr_fixed_allocs++;
alloc_unlock(na);
alloc_dbg(na, "Alloc-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]",
base, len, blks, blks);
return base;
fail:
alloc_unlock(na);
alloc_dbg(na, "Alloc-fixed failed! (0x%llx)", base);
return 0;
}
/*
* Two possibilities for this function: either we are freeing a fixed allocation
* or we are freeing a regular alloc but with GPU_ALLOC_NO_ALLOC_PAGE defined.
*
* Note: this function won't do much error checking. Thus you could really
* confuse the allocator if you misuse this function.
*/
static void nvgpu_bitmap_free_fixed(struct nvgpu_allocator *na,
u64 base, u64 len)
{
struct nvgpu_bitmap_allocator *a = bitmap_allocator(na);
u64 blks, offs;
offs = base >> a->blk_shift;
if (nvgpu_safe_mult_u64(offs, a->blk_size) != base) {
nvgpu_do_assert();
return;
}
offs = nvgpu_safe_sub_u64(offs, a->bit_offs);
blks = len >> a->blk_shift;
if (nvgpu_safe_mult_u64(blks, a->blk_size) != len) {
blks++;
}
alloc_lock(na);
nvgpu_assert(offs <= U32_MAX);
nvgpu_assert(blks <= (u32)INT_MAX);
nvgpu_bitmap_clear(a->bitmap, (u32)offs, (u32)blks);
a->bytes_freed = nvgpu_safe_add_u64(a->bytes_freed,
nvgpu_safe_mult_u64(blks, a->blk_size));
alloc_unlock(na);
alloc_dbg(na, "Free-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]",
base, len, blks, blks);
}
/*
* Add the passed alloc to the tree of stored allocations.
*/
static void insert_alloc_metadata(struct nvgpu_bitmap_allocator *a,
struct nvgpu_bitmap_alloc *alloc)
{
alloc->alloc_entry.key_start = alloc->base;
alloc->alloc_entry.key_end = nvgpu_safe_add_u64(alloc->base,
alloc->length);
nvgpu_rbtree_insert(&alloc->alloc_entry, &a->allocs);
}
/*
* Find and remove meta-data from the outstanding allocations.
*/
static struct nvgpu_bitmap_alloc *find_alloc_metadata(
struct nvgpu_bitmap_allocator *a, u64 addr)
{
struct nvgpu_bitmap_alloc *alloc;
struct nvgpu_rbtree_node *node = NULL;
nvgpu_rbtree_search(addr, &node, a->allocs);
if (node == NULL) {
return NULL;
}
alloc = nvgpu_bitmap_alloc_from_rbtree_node(node);
nvgpu_rbtree_unlink(node, &a->allocs);
return alloc;
}
/*
* Tree of alloc meta data stores the address of the alloc not the bit offset.
*/
static int nvgpu_bitmap_store_alloc(struct nvgpu_bitmap_allocator *a,
u64 addr, u64 len)
{
struct nvgpu_bitmap_alloc *alloc =
nvgpu_kmem_cache_alloc(a->meta_data_cache);
if (alloc == NULL) {
return -ENOMEM;
}
alloc->base = addr;
alloc->length = len;
insert_alloc_metadata(a, alloc);
return 0;
}
/*
* @len is in bytes. This routine will figure out the right number of bits to
* actually allocate. The return is the address in bytes as well.
*
* This is a find-first-fit allocator.
* Check the input parameter validity.
* Acquire the alloc_lock.
* Searche a bitmap for the first space that is large enough to satisfy the
* requested size of bits by walking the next available free blocks by
* bitmap_find_next_zero_area().
* Release the alloc_lock.
*/
static u64 nvgpu_bitmap_balloc(struct nvgpu_allocator *na, u64 len)
{
u64 tmp_u64, addr;
u32 blks;
unsigned long offs, adjusted_offs, limit;
struct nvgpu_bitmap_allocator *a = bitmap_allocator(na);
if (len == 0ULL) {
alloc_dbg(na, "len = 0, Alloc failed!");
return 0;
}
tmp_u64 = len >> a->blk_shift;
nvgpu_assert(tmp_u64 <= U32_MAX);
blks = (u32)tmp_u64;
if (nvgpu_safe_mult_u64(blks, a->blk_size) != len) {
blks++;
}
alloc_lock(na);
/*
* First look from next_blk and onwards...
*/
offs = bitmap_find_next_zero_area(a->bitmap, a->num_bits,
a->next_blk, blks, 0);
if (offs >= a->num_bits) {
/*
* If that didn't work try the remaining area. Since there can
* be available space that spans across a->next_blk we need to
* search up to the first set bit after that.
*/
limit = find_next_bit(a->bitmap, a->num_bits, a->next_blk);
offs = bitmap_find_next_zero_area(a->bitmap, limit,
0, blks, 0);
if (offs >= a->next_blk) {
goto fail;
}
}
nvgpu_assert(offs <= U32_MAX);
nvgpu_bitmap_set(a->bitmap, (u32)offs, blks);
a->next_blk = offs + blks;
adjusted_offs = nvgpu_safe_add_u64(offs, a->bit_offs);
addr = nvgpu_safe_mult_u64(((u64)adjusted_offs), a->blk_size);
/*
* Only do meta-data storage if we are allowed to allocate storage for
* that meta-data. The issue with using malloc and friends is that
* in latency and success critical paths an alloc_page() call can either
* sleep for potentially a long time or fail. Since we might not want
* either of these possibilities assume that the caller will keep what
* data it needs around to successfully free this allocation.
*/
if ((a->flags & GPU_ALLOC_NO_ALLOC_PAGE) == 0ULL) {
if (nvgpu_bitmap_store_alloc(a, addr,
blks * a->blk_size) != 0) {
goto fail_reset_bitmap;
}
}
alloc_dbg(na, "Alloc 0x%-10llx 0x%-5llx [bits=0x%x (%u)]",
addr, len, blks, blks);
NVGPU_COV_WHITELIST(false_positive, NVGPU_MISRA(Rule, 14_3), "Bug 2615925")
nvgpu_assert(a->nr_allocs < U64_MAX);
a->nr_allocs++;
a->bytes_alloced = nvgpu_safe_add_u64(a->bytes_alloced,
nvgpu_safe_mult_u64(blks, a->blk_size));
alloc_unlock(na);
return addr;
fail_reset_bitmap:
nvgpu_assert(blks <= (u32)INT_MAX);
nvgpu_assert(offs <= U32_MAX);
nvgpu_bitmap_clear(a->bitmap, (u32)offs, blks);
fail:
a->next_blk = 0;
alloc_unlock(na);
alloc_dbg(na, "Alloc failed!");
return 0;
}
static void nvgpu_bitmap_free(struct nvgpu_allocator *na, u64 addr)
{
struct nvgpu_bitmap_allocator *a = bitmap_allocator(na);
struct nvgpu_bitmap_alloc *alloc = NULL;
u64 offs, adjusted_offs, blks;
alloc_lock(na);
if ((a->flags & GPU_ALLOC_NO_ALLOC_PAGE) != 0ULL) {
(void) WARN(true,
"Using wrong free for NO_ALLOC_PAGE bitmap allocator");
goto done;
}
alloc = find_alloc_metadata(a, addr);
if (alloc == NULL) {
goto done;
}
/*
* Address comes from adjusted offset (i.e the bit offset with
* a->bit_offs added. So start with that and then work out the real
* offs into the bitmap.
*/
adjusted_offs = addr >> a->blk_shift;
offs = adjusted_offs - a->bit_offs;
blks = alloc->length >> a->blk_shift;
nvgpu_assert(blks <= (u32)INT_MAX);
nvgpu_assert(offs <= U32_MAX);
nvgpu_bitmap_clear(a->bitmap, (u32)offs, (u32)blks);
alloc_dbg(na, "Free 0x%-10llx", addr);
a->bytes_freed = nvgpu_safe_add_u64(a->bytes_freed, alloc->length);
done:
if ((a->meta_data_cache != NULL) && (alloc != NULL)) {
nvgpu_kmem_cache_free(a->meta_data_cache, alloc);
}
alloc_unlock(na);
}
static void nvgpu_bitmap_alloc_destroy(struct nvgpu_allocator *na)
{
struct nvgpu_bitmap_allocator *a = bitmap_allocator(na);
struct nvgpu_bitmap_alloc *alloc;
struct nvgpu_rbtree_node *node = NULL;
/*
* Kill any outstanding allocations.
*/
nvgpu_rbtree_enum_start(0, &node, a->allocs);
while (node != NULL) {
alloc = nvgpu_bitmap_alloc_from_rbtree_node(node);
nvgpu_rbtree_unlink(node, &a->allocs);
nvgpu_kmem_cache_free(a->meta_data_cache, alloc);
nvgpu_rbtree_enum_start(0, &node, a->allocs);
}
nvgpu_kmem_cache_destroy(a->meta_data_cache);
nvgpu_kfree(nvgpu_alloc_to_gpu(na), a->bitmap);
nvgpu_kfree(nvgpu_alloc_to_gpu(na), a);
}
#ifdef __KERNEL__
static void nvgpu_bitmap_print_stats(struct nvgpu_allocator *na,
struct seq_file *s, int lock)
{
struct nvgpu_bitmap_allocator *a = bitmap_allocator(na);
alloc_pstat(s, na, "Bitmap allocator params:");
alloc_pstat(s, na, " start = 0x%llx", a->base);
alloc_pstat(s, na, " end = 0x%llx", a->base + a->length);
alloc_pstat(s, na, " blks = 0x%llx", a->num_bits);
/* Actual stats. */
alloc_pstat(s, na, "Stats:");
alloc_pstat(s, na, " Number allocs = 0x%llx", a->nr_allocs);
alloc_pstat(s, na, " Number fixed = 0x%llx", a->nr_fixed_allocs);
alloc_pstat(s, na, " Bytes alloced = 0x%llx", a->bytes_alloced);
alloc_pstat(s, na, " Bytes freed = 0x%llx", a->bytes_freed);
alloc_pstat(s, na, " Outstanding = 0x%llx",
a->bytes_alloced - a->bytes_freed);
}
#endif
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 8_7), "Bug 2823817")
static const struct nvgpu_allocator_ops bitmap_ops = {
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 8_7))
.alloc = nvgpu_bitmap_balloc,
.free_alloc = nvgpu_bitmap_free,
.alloc_fixed = nvgpu_bitmap_balloc_fixed,
.free_fixed = nvgpu_bitmap_free_fixed,
.base = nvgpu_bitmap_alloc_base,
.length = nvgpu_bitmap_alloc_length,
.end = nvgpu_bitmap_alloc_end,
.inited = nvgpu_bitmap_alloc_inited,
.fini = nvgpu_bitmap_alloc_destroy,
#ifdef __KERNEL__
.print_stats = nvgpu_bitmap_print_stats,
#endif
};
static int nvgpu_bitmap_check_argument_limits(u64 base, u64 length,
u64 blk_size)
{
bool is_blk_size_pwr_2;
bool is_base_aligned;
bool is_length_aligned;
nvgpu_assert(blk_size > 0ULL);
is_blk_size_pwr_2 = (blk_size & (blk_size - 1ULL)) == 0ULL;
is_base_aligned = (base & (blk_size - 1ULL)) == 0ULL;
is_length_aligned = (length & (blk_size - 1ULL)) == 0ULL;
if (!is_blk_size_pwr_2) {
nvgpu_do_assert();
return -EINVAL;
}
if (!is_base_aligned || !is_length_aligned) {
return -EINVAL;
}
if (length == 0ULL) {
return -EINVAL;
}
return 0;
}
int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
const char *name, u64 base, u64 length,
u64 blk_size, u64 flags)
{
int err;
struct nvgpu_bitmap_allocator *a;
err = nvgpu_bitmap_check_argument_limits(base, length, blk_size);
if (err != 0) {
return err;
}
if (base == 0ULL) {
base = blk_size;
length = nvgpu_safe_sub_u64(length, blk_size);
}
a = nvgpu_kzalloc(g, sizeof(struct nvgpu_bitmap_allocator));
if (a == NULL) {
return -ENOMEM;
}
err = nvgpu_alloc_common_init(na, g, name, a, false, &bitmap_ops);
if (err != 0) {
goto fail;
}
if ((flags & GPU_ALLOC_NO_ALLOC_PAGE) == 0ULL) {
a->meta_data_cache = nvgpu_kmem_cache_create(g,
sizeof(struct nvgpu_bitmap_alloc));
if (a->meta_data_cache == NULL) {
err = -ENOMEM;
goto fail;
}
}
a->base = base;
a->length = length;
a->blk_size = blk_size;
a->blk_shift = nvgpu_safe_sub_u64(nvgpu_ffs(a->blk_size), 1UL);
a->num_bits = length >> a->blk_shift;
a->bit_offs = a->base >> a->blk_shift;
a->flags = flags;
a->allocs = NULL;
a->bitmap = nvgpu_kcalloc(g, BITS_TO_LONGS(a->num_bits),
sizeof(*a->bitmap));
if (a->bitmap == NULL) {
err = -ENOMEM;
goto fail;
}
nvgpu_smp_wmb();
a->inited = true;
#ifdef CONFIG_DEBUG_FS
nvgpu_init_alloc_debug(g, na);
#endif
alloc_dbg(na, "New allocator: type bitmap");
alloc_dbg(na, " base 0x%llx", a->base);
alloc_dbg(na, " bit_offs 0x%llx", a->bit_offs);
alloc_dbg(na, " size 0x%llx", a->length);
alloc_dbg(na, " blk_size 0x%llx", a->blk_size);
alloc_dbg(na, " flags 0x%llx", a->flags);
return 0;
fail:
if (a->meta_data_cache != NULL) {
nvgpu_kmem_cache_destroy(a->meta_data_cache);
}
nvgpu_kfree(g, a);
return err;
}

View File

@@ -0,0 +1,183 @@
/*
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef BITMAP_ALLOCATOR_PRIV_H
#define BITMAP_ALLOCATOR_PRIV_H
/**
* @file
*
* Implementation of a bitmap allocator.
*/
#include <nvgpu/rbtree.h>
#include <nvgpu/kmem.h>
struct nvgpu_allocator;
/**
* Structure to hold the implementation details of the bitmap allocator.
*/
struct nvgpu_bitmap_allocator {
/**
* Pointer to the common allocator structure.
*/
struct nvgpu_allocator *owner;
/**
* Base address of the space.
*/
u64 base;
/**
* Length of the space.
*/
u64 length;
/**
* Size that corresponds to 1 bit.
*/
u64 blk_size;
/**
* Bit shift to divide by blk_size.
*/
u64 blk_shift;
/**
* Number of allocatable bits.
*/
u64 num_bits;
/**
* Offset of bitmap.
*/
u64 bit_offs;
/**
* Optimization for making repeated allocations faster. Keep track of
* the next bit after the most recent allocation. This is where the next
* search will start from. This should make allocation faster in cases
* where lots of allocations get made one after another. It shouldn't
* have a negative impact on the case where the allocator is fragmented.
*/
u64 next_blk;
/**
* The actual bitmap used for allocations.
*/
unsigned long *bitmap;
/**
* Tree of outstanding allocations.
*/
struct nvgpu_rbtree_node *allocs;
/**
* Metadata cache of allocations (contains address and size of
* allocations).
*/
struct nvgpu_kmem_cache *meta_data_cache;
/**
* Configuration flags of the allocator. See \a GPU_ALLOC_* flags.
*/
u64 flags;
/**
* Boolean to indicate if the allocator has been fully initialized.
*/
bool inited;
/**
* Statistics: track the number of non-fixed allocations.
*/
u64 nr_allocs;
/**
* Statistics: track the number of fixed allocations.
*/
u64 nr_fixed_allocs;
/**
* Statistics: total number of bytes allocated for both fixed and non-
* fixed allocations.
*/
u64 bytes_alloced;
/**
* Statistics: total number of bytes freed for both fixed and non-fixed
* allocations.
*/
u64 bytes_freed;
};
/**
* Structure to hold the allocation metadata.
*/
struct nvgpu_bitmap_alloc {
/**
* Base address of the allocation.
*/
u64 base;
/**
* Size of the allocation.
*/
u64 length;
/**
* RB tree of allocations.
*/
struct nvgpu_rbtree_node alloc_entry;
};
/**
* @brief Given a tree node, retrieve the metdata of the allocation.
*
* @param[in] node Pointer to the tree node.
*
* @return pointer to the struct nvgpu_bitmap_alloc of the node.
*/
static inline struct nvgpu_bitmap_alloc *
nvgpu_bitmap_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node)
{
return (struct nvgpu_bitmap_alloc *)
((uintptr_t)node - offsetof(struct nvgpu_bitmap_alloc, alloc_entry));
};
/**
* @brief Given a generic allocator context, retrieve a pointer to the bitmap
* allocator context structure.
*
* @param[in] a Pointer to nvgpu allocator.
*
* @return pointer to the struct nvgpu_bitmap_allocator.
*/
static inline struct nvgpu_bitmap_allocator *bitmap_allocator(
struct nvgpu_allocator *a)
{
return (struct nvgpu_bitmap_allocator *)(a)->priv;
}
#endif

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,451 @@
/*
* Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_MM_BUDDY_ALLOCATOR_PRIV_H
#define NVGPU_MM_BUDDY_ALLOCATOR_PRIV_H
/**
* @file
*
* Implementation of the buddy allocator.
*/
#include <nvgpu/rbtree.h>
#include <nvgpu/list.h>
#include <nvgpu/static_analysis.h>
struct nvgpu_kmem_cache;
struct nvgpu_allocator;
struct vm_gk20a;
/**
* Structure that defines each buddy as an element in a binary tree.
*/
struct nvgpu_buddy {
/**
* Parent node.
*/
struct nvgpu_buddy *parent;
/**
* This node's buddy.
*/
struct nvgpu_buddy *buddy;
/**
* Lower address sub-node.
*/
struct nvgpu_buddy *left;
/**
* Higher address sub-node.
*/
struct nvgpu_buddy *right;
/**
* List entry for various lists.
*/
struct nvgpu_list_node buddy_entry;
/**
* RB tree of allocations.
*/
struct nvgpu_rbtree_node alloced_entry;
/**
* Start address of this buddy.
*/
u64 start;
/**
* End address of this buddy.
*/
u64 end;
/**
* Buddy order.
*/
u64 order;
/**
* Possible flags to use in the buddy allocator. Set in the #flags
* member.
* @addtogroup BALLOC_BUDDY_FLAGS
* @{
*/
#define BALLOC_BUDDY_ALLOCED 0x1U
#define BALLOC_BUDDY_SPLIT 0x2U
#define BALLOC_BUDDY_IN_LIST 0x4U
/**@}*/
/**
* Buddy flags among the @ref BALLOC_BUDDY_FLAGS
*/
u32 flags;
/**
* Possible PDE sizes. This allows for grouping like sized allocations
* into the same PDE. Set in the #pte_size member.
* @addtogroup BALLOC_PTE_SIZE
* @{
*/
#define BALLOC_PTE_SIZE_ANY (~0U)
#define BALLOC_PTE_SIZE_INVALID 0U
#define BALLOC_PTE_SIZE_SMALL 1U
#define BALLOC_PTE_SIZE_BIG 2U
/**@}*/
/**
* Size of the PDE this buddy is using. Possible values in
* @ref BALLOC_PTE_SIZE
*/
u32 pte_size;
};
/**
* @brief Given a list node, retrieve the buddy.
*
* @param[in] node Pointer to the list node.
*
* @return pointer to the struct nvgpu_buddy of the node.
*/
static inline struct nvgpu_buddy *
nvgpu_buddy_from_buddy_entry(struct nvgpu_list_node *node)
{
return (struct nvgpu_buddy *)
((uintptr_t)node - offsetof(struct nvgpu_buddy, buddy_entry));
};
/**
* @brief Given a tree node, retrieve the buddy.
*
* @param[in] node Pointer to the tree node.
*
* @return pointer to the struct nvgpu_buddy of the node.
*/
static inline struct nvgpu_buddy *
nvgpu_buddy_from_rbtree_node(struct nvgpu_rbtree_node *node)
{
return (struct nvgpu_buddy *)
((uintptr_t)node - offsetof(struct nvgpu_buddy, alloced_entry));
};
/**
* @brief Macro generator to create is/set/clr operations for each of the
* flags in @ref BALLOC_BUDDY_FLAGS.
*
* The created functions are:
*
* bool buddy_is_alloced(struct nvgpu_buddy *b);
* void buddy_set_alloced(struct nvgpu_buddy *b);
* void buddy_clr_alloced(struct nvgpu_buddy *b);
*
* bool buddy_is_split(struct nvgpu_buddy *b);
* void buddy_set_split(struct nvgpu_buddy *b);
* void buddy_clr_split(struct nvgpu_buddy *b);
*
* bool buddy_is_in_list(struct nvgpu_buddy *b);
* void buddy_set_in_list(struct nvgpu_buddy *b);
* void buddy_clr_in_list(struct nvgpu_buddy *b);
*
* @param[in] flag One of is, set or clr
* @param[in] flag_up One of the @ref BALLOC_BUDDY_FLAGS
*
* @{
*/
#define nvgpu_buddy_allocator_flag_ops(flag, flag_up) \
static inline bool buddy_is_ ## flag(struct nvgpu_buddy *b) \
{ \
return (b->flags & BALLOC_BUDDY_ ## flag_up) != 0U; \
} \
static inline void buddy_set_ ## flag(struct nvgpu_buddy *b) \
{ \
b->flags |= BALLOC_BUDDY_ ## flag_up; \
} \
static inline void buddy_clr_ ## flag(struct nvgpu_buddy *b) \
{ \
b->flags &= ~BALLOC_BUDDY_ ## flag_up; \
}
nvgpu_buddy_allocator_flag_ops(alloced, ALLOCED);
nvgpu_buddy_allocator_flag_ops(split, SPLIT);
nvgpu_buddy_allocator_flag_ops(in_list, IN_LIST);
/**@} */
/**
* Structure to keep information for a fixed allocation.
*/
struct nvgpu_fixed_alloc {
/**
* List of buddies.
*/
struct nvgpu_list_node buddies;
/**
* RB tree of fixed allocations.
*/
struct nvgpu_rbtree_node alloced_entry;
/**
* Start of fixed block.
*/
u64 start;
/**
* End address.
*/
u64 end;
};
/**
* @brief Given a tree node, retrieve the fixed allocation.
*
* @param[in] node Pointer to the tree node.
*
* @return pointer to the struct nvgpu_fixed_alloc of the node.
*/
static inline struct nvgpu_fixed_alloc *
nvgpu_fixed_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node)
{
return (struct nvgpu_fixed_alloc *)
((uintptr_t)node - offsetof(struct nvgpu_fixed_alloc, alloced_entry));
};
/**
* GPU buddy allocator for the various GPU address spaces. Each addressable unit
* doesn't have to correspond to a byte. In some cases each unit is a more
* complex object such as a comp_tag line or the like.
*
* The max order is computed based on the size of the minimum order and the size
* of the address space.
*
* #blk_size is the size of an order 0 buddy.
*/
struct nvgpu_buddy_allocator {
/**
* Pointer to the common allocator structure.
*/
struct nvgpu_allocator *owner;
/**
* Parent VM - can be NULL.
*/
struct vm_gk20a *vm;
/**
* Base address of the space.
*/
u64 base;
/**
* Length of the space.
*/
u64 length;
/**
* Size of order 0 allocation.
*/
u64 blk_size;
/**
* Shift to divide by blk_size.
*/
u64 blk_shift;
/**
* Internal: real start (aligned to #blk_size).
*/
u64 start;
/**
* Internal: real end, trimmed if needed.
*/
u64 end;
/**
* Internal: count of objects in space.
*/
u64 count;
/**
* Internal: count of blks in the space.
*/
u64 blks;
/**
* Internal: specific maximum order.
*/
u64 max_order;
/**
* Outstanding allocations.
*/
struct nvgpu_rbtree_node *alloced_buddies;
/**
* Outstanding fixed allocations.
*/
struct nvgpu_rbtree_node *fixed_allocs;
/**
* List of carveouts.
*/
struct nvgpu_list_node co_list;
/**
* Cache of allocations (contains address and size of allocations).
*/
struct nvgpu_kmem_cache *buddy_cache;
/**
* Impose an upper bound on the maximum order.
*/
#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1U)
/**
* List of buddies.
*/
struct nvgpu_list_node buddy_list[GPU_BALLOC_ORDER_LIST_LEN];
/**
* Length of the buddy list.
*/
u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN];
/**
* Number of split nodes.
*/
u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN];
/**
* Number of allocated nodes.
*/
u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN];
/**
* This is for when the allocator is managing a GVA space (the
* #GPU_ALLOC_GVA_SPACE bit is set in #flags). This requires
* that we group like sized allocations into PDE blocks.
*/
u64 pte_blk_order;
/**
* Boolean to indicate if the allocator has been fully initialized.
*/
bool initialized;
/**
* Boolean set to true after the first allocation is made.
*/
bool alloc_made;
/**
* Flags in used by the allocator as defined by @ref GPU_ALLOC_FLAGS
*/
u64 flags;
/**
* Statistics: total number of bytes allocated.
*/
u64 bytes_alloced;
/**
* Statistics: total number of bytes allocated taking into account the
* buddy order.
*/
u64 bytes_alloced_real;
/**
* Statistics: total number of bytes freed.
*/
u64 bytes_freed;
};
/**
* @brief Given a generic allocator context, retrieve a pointer to the buddy
* allocator context structure.
*
* @param[in] a Pointer to nvgpu allocator.
*
* @return pointer to the struct nvgpu_bitmap_allocator.
*/
static inline struct nvgpu_buddy_allocator *buddy_allocator(
struct nvgpu_allocator *a)
{
return (struct nvgpu_buddy_allocator *)(a)->priv;
}
/**
* @brief Given a buddy allocator, retrieve the list of buddies of the chosen
* order.
*
* @param[in] a Pointer to the buddy allocator.
* @param[in] order Buddy order.
*
* @return list of buddies whose order is \a order.
*/
static inline struct nvgpu_list_node *balloc_get_order_list(
struct nvgpu_buddy_allocator *a, u64 order)
{
return &a->buddy_list[order];
}
/**
* @brief Convert a buddy order to a length in bytes, based on the block size.
*
* @param[in] a Pointer to the buddy allocator.
* @param[in] order Buddy order.
*
* @return length in bytes.
*/
static inline u64 balloc_order_to_len(struct nvgpu_buddy_allocator *a,
u64 order)
{
return nvgpu_safe_mult_u64(BIT64(order), a->blk_size);
}
/**
* @brief Given a base address, shift it by the base address of the buddy.
*
* @param[in] a Pointer to the buddy allocator.
* @param[in] order Base address.
*
* @return shifted address.
*/
static inline u64 balloc_base_shift(struct nvgpu_buddy_allocator *a,
u64 base)
{
return nvgpu_safe_sub_u64(base, a->start);
}
/**
* @brief Given a shifted address, unshift it by the base address of the buddy.
*
* @param[in] a Pointer to the buddy allocator.
* @param[in] order Shifted address.
*
* @return unshifted address.
*/
static inline u64 balloc_base_unshift(struct nvgpu_buddy_allocator *a,
u64 base)
{
return nvgpu_safe_add_u64(base, a->start);
}
/**
* @brief Given a buddy allocator context, retrieve a pointer to the generic
* allocator context structure.
*
* @param[in] a Pointer to nvgpu buddy allocator.
*
* @return pointer to the struct nvgpu_allocator.
*/
static inline struct nvgpu_allocator *balloc_owner(
struct nvgpu_buddy_allocator *a)
{
return a->owner;
}
#endif /* NVGPU_MM_BUDDY_ALLOCATOR_PRIV_H */

View File

@@ -0,0 +1,216 @@
/*
* gk20a allocator
*
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/allocator.h>
#include <nvgpu/gk20a.h>
u64 nvgpu_alloc_length(struct nvgpu_allocator *a)
{
if (a->ops->length != NULL) {
return a->ops->length(a);
}
return 0;
}
u64 nvgpu_alloc_base(struct nvgpu_allocator *a)
{
if (a->ops->base != NULL) {
return a->ops->base(a);
}
return 0;
}
bool nvgpu_alloc_initialized(struct nvgpu_allocator *a)
{
if ((a->ops == NULL) || (a->ops->inited == NULL)) {
return false;
}
return a->ops->inited(a);
}
u64 nvgpu_alloc_end(struct nvgpu_allocator *a)
{
if (a->ops->end != NULL) {
return a->ops->end(a);
}
return 0;
}
u64 nvgpu_alloc_space(struct nvgpu_allocator *a)
{
if (a->ops->space != NULL) {
return a->ops->space(a);
}
return 0;
}
u64 nvgpu_alloc(struct nvgpu_allocator *a, u64 len)
{
return a->ops->alloc(a, len);
}
u64 nvgpu_alloc_pte(struct nvgpu_allocator *a, u64 len, u32 page_size)
{
return a->ops->alloc_pte(a, len, page_size);
}
void nvgpu_free(struct nvgpu_allocator *a, u64 addr)
{
a->ops->free_alloc(a, addr);
}
u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len,
u32 page_size)
{
if ((U64_MAX - base) < len) {
return 0ULL;
}
if (a->ops->alloc_fixed != NULL) {
return a->ops->alloc_fixed(a, base, len, page_size);
}
return 0;
}
void nvgpu_free_fixed(struct nvgpu_allocator *a, u64 base, u64 len)
{
/*
* If this operation is not defined for the allocator then just do
* nothing. The alternative would be to fall back on the regular
* free but that may be harmful in unexpected ways.
*/
if (a->ops->free_fixed != NULL) {
a->ops->free_fixed(a, base, len);
}
}
int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a,
struct nvgpu_alloc_carveout *co)
{
if (a->ops->reserve_carveout != NULL) {
return a->ops->reserve_carveout(a, co);
}
return -ENODEV;
}
void nvgpu_alloc_release_carveout(struct nvgpu_allocator *a,
struct nvgpu_alloc_carveout *co)
{
if (a->ops->release_carveout != NULL) {
a->ops->release_carveout(a, co);
}
}
void nvgpu_alloc_destroy(struct nvgpu_allocator *a)
{
a->ops->fini(a);
nvgpu_mutex_destroy(&a->lock);
(void) memset(a, 0, sizeof(*a));
}
#ifdef __KERNEL__
void nvgpu_alloc_print_stats(struct nvgpu_allocator *na,
struct seq_file *s, int lock)
{
na->ops->print_stats(na, s, lock);
}
#endif
/*
* Handle the common init stuff for a nvgpu_allocator.
*/
int nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g,
const char *name, void *priv, bool dbg,
const struct nvgpu_allocator_ops *ops)
{
if (ops == NULL) {
return -EINVAL;
}
/*
* This is the bare minimum operations required for a sensible
* allocator.
*/
if ((ops->alloc == NULL) || (ops->free_alloc == NULL) ||
(ops->fini == NULL)) {
return -EINVAL;
}
nvgpu_mutex_init(&a->lock);
a->g = g;
a->ops = ops;
a->priv = priv;
a->debug = dbg;
(void) strncpy(a->name, name, sizeof(a->name));
a->name[sizeof(a->name) - 1U] = '\0';
return 0;
}
/*
* Initialize requested type of allocator
*/
int nvgpu_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
struct vm_gk20a *vm, const char *name,
u64 base, u64 length, u64 blk_size, u64 max_order,
u64 flags, enum nvgpu_allocator_type alloc_type)
{
int err = -EINVAL;
switch (alloc_type) {
case BUDDY_ALLOCATOR:
err = nvgpu_buddy_allocator_init(g, na, vm, name, base, length,
blk_size, max_order, flags);
break;
#ifdef CONFIG_NVGPU_DGPU
case PAGE_ALLOCATOR:
err = nvgpu_page_allocator_init(g, na, name, base, length,
blk_size, flags);
break;
#endif
case BITMAP_ALLOCATOR:
err = nvgpu_bitmap_allocator_init(g, na, name, base, length,
blk_size, flags);
break;
default:
nvgpu_err(g, "Incorrect allocator type, couldn't initialize");
break;
}
if (err < 0) {
nvgpu_err(g, "Failed!");
}
return err;
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,245 @@
/*
* GK20A Address Spaces
*
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/trace.h>
#include <nvgpu/kmem.h>
#include <nvgpu/vm.h>
#include <nvgpu/log2.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/string.h>
#include <nvgpu/nvgpu_init.h>
#define VM_NAME_PREFIX "as_"
/* dumb allocator... */
static int generate_as_share_id(struct gk20a_as *as)
{
struct gk20a *g = gk20a_from_as(as);
nvgpu_log_fn(g, " ");
as->last_share_id = nvgpu_safe_add_s32(as->last_share_id, 1);
return as->last_share_id;
}
/* still dumb */
static void release_as_share_id(struct gk20a_as_share *as_share)
{
struct gk20a *g = gk20a_from_as(as_share->as);
nvgpu_log_fn(g, " ");
return;
}
/* address space interfaces for the gk20a module */
static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share,
u32 big_page_size, u32 flags,
u64 va_range_start, u64 va_range_end,
u64 va_range_split)
{
struct gk20a_as *as = as_share->as;
struct gk20a *g = gk20a_from_as(as);
struct mm_gk20a *mm = &g->mm;
struct vm_gk20a *vm;
char name[NVGPU_VM_NAME_LEN] = VM_NAME_PREFIX;
char *p;
u64 user_size;
u64 kernel_size = mm->channel.kernel_size;
u64 pde_size, pde_size_mask;
bool big_pages;
const bool userspace_managed =
(flags & NVGPU_AS_ALLOC_USERSPACE_MANAGED) != 0U;
const bool unified_va =
nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES) ||
((flags & NVGPU_AS_ALLOC_UNIFIED_VA) != 0U);
nvgpu_log_fn(g, " ");
if (big_page_size == 0U) {
big_pages = false;
big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
} else {
if (!is_power_of_2(big_page_size)) {
return -EINVAL;
}
if ((big_page_size &
nvgpu_mm_get_available_big_page_sizes(g)) == 0U) {
return -EINVAL;
}
big_pages = true;
}
pde_size = BIT64(nvgpu_vm_pde_coverage_bit_count(g, big_page_size));
pde_size_mask = nvgpu_safe_sub_u64(pde_size, U64(1));
if ((va_range_start == 0ULL) ||
((va_range_start & pde_size_mask) != 0ULL)) {
return -EINVAL;
}
if ((va_range_end == 0ULL) ||
((va_range_end & pde_size_mask) != 0ULL)) {
return -EINVAL;
}
if (va_range_start >= va_range_end) {
return -EINVAL;
}
user_size = nvgpu_safe_sub_u64(va_range_end, va_range_start);
if (unified_va || !big_pages) {
if (va_range_split != 0ULL) {
return -EINVAL;
}
} else {
/* non-unified VA: split required */
if ((va_range_split == 0ULL) ||
((va_range_split & pde_size_mask) != 0ULL)) {
return -EINVAL;
}
/* non-unified VA: split range checks */
if ((va_range_split <= va_range_start) ||
(va_range_split >= va_range_end)) {
return -EINVAL;
}
}
nvgpu_log_info(g,
"vm: low_hole=0x%llx, user_size=0x%llx, kernel_size=0x%llx",
va_range_start, user_size, kernel_size);
p = name + strlen(name);
(void) nvgpu_strnadd_u32(p, nvgpu_safe_cast_s32_to_u32(as_share->id),
sizeof(name) - sizeof(VM_NAME_PREFIX), 10U);
vm = nvgpu_vm_init(g, big_page_size,
va_range_start,
user_size,
kernel_size,
va_range_split,
big_pages, userspace_managed, unified_va, name);
if (vm == NULL) {
return -ENOMEM;
}
as_share->vm = vm;
vm->as_share = as_share;
vm->enable_ctag = true;
return 0;
}
int gk20a_as_alloc_share(struct gk20a *g,
u32 big_page_size, u32 flags, u64 va_range_start,
u64 va_range_end, u64 va_range_split,
struct gk20a_as_share **out)
{
struct gk20a_as_share *as_share;
int err = 0;
nvgpu_log_fn(g, " ");
g = nvgpu_get(g);
if (g == NULL) {
return -ENODEV;
}
*out = NULL;
as_share = nvgpu_kzalloc(g, sizeof(*as_share));
if (as_share == NULL) {
return -ENOMEM;
}
as_share->as = &g->as;
as_share->id = generate_as_share_id(as_share->as);
/* this will set as_share->vm. */
err = gk20a_busy(g);
if (err != 0) {
goto failed;
}
err = gk20a_vm_alloc_share(as_share, big_page_size, flags,
va_range_start, va_range_end, va_range_split);
gk20a_idle(g);
if (err != 0) {
goto failed;
}
*out = as_share;
return 0;
failed:
nvgpu_kfree(g, as_share);
return err;
}
int gk20a_vm_release_share(struct gk20a_as_share *as_share)
{
struct vm_gk20a *vm = as_share->vm;
struct gk20a *g = gk20a_from_vm(vm);
nvgpu_log_fn(g, " ");
vm->as_share = NULL;
as_share->vm = NULL;
nvgpu_vm_put(vm);
return 0;
}
/*
* channels and the device nodes call this to release.
* once the ref_cnt hits zero the share is deleted.
*/
int gk20a_as_release_share(struct gk20a_as_share *as_share)
{
struct gk20a *g = as_share->vm->mm->g;
int err;
nvgpu_log_fn(g, " ");
err = gk20a_busy(g);
if (err != 0) {
goto release_fail;
}
err = gk20a_vm_release_share(as_share);
gk20a_idle(g);
release_fail:
release_as_share_id(as_share);
nvgpu_put(g);
nvgpu_kfree(g, as_share);
return err;
}
struct gk20a *gk20a_from_as(struct gk20a_as *as)
{
return (struct gk20a *)((uintptr_t)as - offsetof(struct gk20a, as));
}

View File

@@ -0,0 +1,105 @@
/*
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/bug.h>
#include <nvgpu/bitops.h>
#include <nvgpu/comptags.h>
#include <nvgpu/gk20a.h>
int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
u32 *offset, u32 len)
{
unsigned long addr;
int err = 0;
if (allocator->size == 0UL) {
return -EINVAL;
}
nvgpu_mutex_acquire(&allocator->lock);
addr = bitmap_find_next_zero_area(allocator->bitmap, allocator->size,
0, len, 0);
if (addr < allocator->size) {
/* number zero is reserved; bitmap base is 1 */
nvgpu_assert(addr < U64(U32_MAX));
*offset = 1U + U32(addr);
nvgpu_bitmap_set(allocator->bitmap, U32(addr), len);
} else {
err = -ENOMEM;
}
nvgpu_mutex_release(&allocator->lock);
return err;
}
void gk20a_comptaglines_free(struct gk20a_comptag_allocator *allocator,
u32 offset, u32 len)
{
/* number zero is reserved; bitmap base is 1 */
u32 addr = offset - 1U;
if (allocator->size == 0UL) {
return;
}
WARN_ON(offset == 0U);
WARN_ON(addr > allocator->size);
WARN_ON((unsigned long)addr + (unsigned long)len > allocator->size);
nvgpu_mutex_acquire(&allocator->lock);
nvgpu_bitmap_clear(allocator->bitmap, addr, len);
nvgpu_mutex_release(&allocator->lock);
}
int gk20a_comptag_allocator_init(struct gk20a *g,
struct gk20a_comptag_allocator *allocator,
unsigned long size)
{
nvgpu_mutex_init(&allocator->lock);
/*
* 0th comptag is special and is never used. The base for this bitmap
* is 1, and its size is one less than the size of comptag store.
*/
size--;
allocator->bitmap = nvgpu_vzalloc(g,
BITS_TO_LONGS(size) * sizeof(long));
if (allocator->bitmap == NULL) {
return -ENOMEM;
}
allocator->size = size;
return 0;
}
void gk20a_comptag_allocator_destroy(struct gk20a *g,
struct gk20a_comptag_allocator *allocator)
{
/*
* called only when exiting the driver (gk20a_remove, or unwinding the
* init stage); no users should be active, so taking the mutex is
* unnecessary here.
*/
allocator->size = 0;
nvgpu_vfree(g, allocator->bitmap);
}

View File

@@ -0,0 +1,234 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/mm.h>
#include <nvgpu/vm.h>
#include <nvgpu/dma.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/enabled.h>
#include <nvgpu/nvgpu_mem.h>
int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
{
return nvgpu_dma_alloc_flags(g, 0, size, mem);
}
int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
struct nvgpu_mem *mem)
{
#ifdef CONFIG_NVGPU_DGPU
if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
/*
* Force the no-kernel-mapping flag on because we don't support
* the lack of it for vidmem - the user should not care when
* using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a
* difference, the user should use the flag explicitly anyway.
*
* Incoming flags are ignored here, since bits other than the
* no-kernel-mapping flag are ignored by the vidmem mapping
* functions anyway.
*/
int err = nvgpu_dma_alloc_flags_vid(g,
NVGPU_DMA_NO_KERNEL_MAPPING,
size, mem);
if (err == 0) {
return 0;
}
/*
* Fall back to sysmem (which may then also fail) in case
* vidmem is exhausted.
*/
}
#endif
return nvgpu_dma_alloc_flags_sys(g, flags, size, mem);
}
int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
{
return nvgpu_dma_alloc_flags_sys(g, 0, size, mem);
}
#ifdef CONFIG_NVGPU_DGPU
int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
{
return nvgpu_dma_alloc_flags_vid(g,
NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
}
int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags,
size_t size, struct nvgpu_mem *mem)
{
return nvgpu_dma_alloc_flags_vid_at(g, flags, size, mem, 0);
}
int nvgpu_dma_alloc_vid_at(struct gk20a *g,
size_t size, struct nvgpu_mem *mem, u64 at)
{
return nvgpu_dma_alloc_flags_vid_at(g,
NVGPU_DMA_NO_KERNEL_MAPPING, size, mem, at);
}
#endif
int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
struct nvgpu_mem *mem)
{
int err = nvgpu_dma_alloc_map_flags(vm, 0, size, mem);
if (err < 0) {
nvgpu_err(vm->mm->g, "Failed!");
}
return err;
}
int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
size_t size, struct nvgpu_mem *mem)
{
int err = 0;
#ifdef CONFIG_NVGPU_DGPU
if (!nvgpu_is_enabled(gk20a_from_vm(vm), NVGPU_MM_UNIFIED_MEMORY)) {
/*
* Force the no-kernel-mapping flag on because we don't support
* the lack of it for vidmem - the user should not care when
* using nvgpu_dma_alloc_map and it's vidmem, or if there's a
* difference, the user should use the flag explicitly anyway.
*/
err = nvgpu_dma_alloc_map_flags_vid(vm,
flags | NVGPU_DMA_NO_KERNEL_MAPPING,
size, mem);
if (err == 0) {
return 0;
}
/*
* Fall back to sysmem (which may then also fail) in case
* vidmem is exhausted.
*/
}
#endif
err = nvgpu_dma_alloc_map_flags_sys(vm, flags, size, mem);
if (err < 0) {
nvgpu_err(vm->mm->g, "Failed!");
}
return err;
}
int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size,
struct nvgpu_mem *mem)
{
int err = 0;
err = nvgpu_dma_alloc_map_flags_sys(vm, 0, size, mem);
if (err < 0) {
nvgpu_err(vm->mm->g, "Failed!");
}
return err;
}
int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
size_t size, struct nvgpu_mem *mem)
{
int err = nvgpu_dma_alloc_flags_sys(vm->mm->g, flags, size, mem);
if (err != 0) {
return err;
}
mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
gk20a_mem_flag_none, false,
mem->aperture);
if (mem->gpu_va == 0ULL) {
err = -ENOMEM;
goto fail_free;
}
return 0;
fail_free:
nvgpu_dma_free(vm->mm->g, mem);
return err;
}
#ifdef CONFIG_NVGPU_DGPU
int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size,
struct nvgpu_mem *mem)
{
return nvgpu_dma_alloc_map_flags_vid(vm,
NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
}
int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
size_t size, struct nvgpu_mem *mem)
{
int err = nvgpu_dma_alloc_flags_vid(vm->mm->g, flags, size, mem);
if (err != 0) {
return err;
}
mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
gk20a_mem_flag_none, false,
mem->aperture);
if (mem->gpu_va == 0ULL) {
err = -ENOMEM;
goto fail_free;
}
return 0;
fail_free:
nvgpu_dma_free(vm->mm->g, mem);
return err;
}
#endif
void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem)
{
switch (mem->aperture) {
case APERTURE_SYSMEM:
nvgpu_dma_free_sys(g, mem);
break;
#ifdef CONFIG_NVGPU_DGPU
case APERTURE_VIDMEM:
nvgpu_dma_free_vid(g, mem);
break;
#endif
default:
/* like free() on "null" memory */
break;
}
}
void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
{
if (mem->gpu_va != 0ULL) {
nvgpu_gmmu_unmap(vm, mem, mem->gpu_va);
}
mem->gpu_va = 0;
nvgpu_dma_free(vm->mm->g, mem);
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,514 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/bug.h>
#include <nvgpu/log.h>
#include <nvgpu/dma.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/list.h>
#include <nvgpu/log2.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/enabled.h>
#include <nvgpu/static_analysis.h>
#include "pd_cache_priv.h"
static inline struct nvgpu_pd_mem_entry *
nvgpu_pd_mem_entry_from_list_entry(struct nvgpu_list_node *node)
{
return (struct nvgpu_pd_mem_entry *)
((uintptr_t)node -
offsetof(struct nvgpu_pd_mem_entry, list_entry));
};
static inline struct nvgpu_pd_mem_entry *
nvgpu_pd_mem_entry_from_tree_entry(struct nvgpu_rbtree_node *node)
{
return (struct nvgpu_pd_mem_entry *)
((uintptr_t)node -
offsetof(struct nvgpu_pd_mem_entry, tree_entry));
};
static u32 nvgpu_pd_cache_nr(u32 bytes)
{
unsigned long tmp = ilog2((unsigned long)bytes >>
((unsigned long)NVGPU_PD_CACHE_MIN_SHIFT - 1UL));
nvgpu_assert(tmp <= U32_MAX);
return (u32)tmp;
}
static u32 nvgpu_pd_cache_get_nr_entries(struct nvgpu_pd_mem_entry *pentry)
{
BUG_ON(pentry->pd_size == 0);
return (nvgpu_safe_cast_u64_to_u32(NVGPU_PD_CACHE_SIZE)) /
pentry->pd_size;
}
/*
* Return the _physical_ address of a page directory.
*/
u64 nvgpu_pd_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
{
u64 page_addr;
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
page_addr = nvgpu_mem_get_phys_addr(g, pd->mem);
} else {
page_addr = nvgpu_mem_get_addr(g, pd->mem);
}
return nvgpu_safe_add_u64(page_addr, U64(pd->mem_offs));
}
u32 nvgpu_pd_offset_from_index(const struct gk20a_mmu_level *l, u32 pd_idx)
{
return nvgpu_safe_mult_u32(pd_idx, l->entry_size) / U32(sizeof(u32));
}
void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
size_t w, u32 data)
{
u64 tmp_offset = nvgpu_safe_add_u64((pd->mem_offs / sizeof(u32)), w);
nvgpu_mem_wr32(g, pd->mem,
nvgpu_safe_cast_u64_to_u32(tmp_offset),
data);
}
int nvgpu_pd_cache_init(struct gk20a *g)
{
struct nvgpu_pd_cache *cache;
u32 i;
/*
* This gets called from finalize_poweron() so we need to make sure we
* don't reinit the pd_cache over and over.
*/
if (g->mm.pd_cache != NULL) {
return 0;
}
cache = nvgpu_kzalloc(g, sizeof(*cache));
if (cache == NULL) {
nvgpu_err(g, "Failed to alloc pd_cache!");
return -ENOMEM;
}
for (i = 0U; i < NVGPU_PD_CACHE_COUNT; i++) {
nvgpu_init_list_node(&cache->full[i]);
nvgpu_init_list_node(&cache->partial[i]);
}
cache->mem_tree = NULL;
nvgpu_mutex_init(&cache->lock);
g->mm.pd_cache = cache;
pd_dbg(g, "PD cache initialized!");
return 0;
}
void nvgpu_pd_cache_fini(struct gk20a *g)
{
u32 i;
struct nvgpu_pd_cache *cache = g->mm.pd_cache;
if (cache == NULL) {
return;
}
for (i = 0U; i < NVGPU_PD_CACHE_COUNT; i++) {
nvgpu_assert(nvgpu_list_empty(&cache->full[i]));
nvgpu_assert(nvgpu_list_empty(&cache->partial[i]));
}
nvgpu_kfree(g, g->mm.pd_cache);
g->mm.pd_cache = NULL;
}
/*
* This is the simple pass-through for greater than page or page sized PDs.
*
* Note: this does not need the cache lock since it does not modify any of the
* PD cache data structures.
*/
int nvgpu_pd_cache_alloc_direct(struct gk20a *g,
struct nvgpu_gmmu_pd *pd, u32 bytes)
{
int err;
unsigned long flags = 0;
pd_dbg(g, "PD-Alloc [D] %u bytes", bytes);
pd->mem = nvgpu_kzalloc(g, sizeof(*pd->mem));
if (pd->mem == NULL) {
nvgpu_err(g, "OOM allocating nvgpu_mem struct!");
return -ENOMEM;
}
/*
* If bytes == NVGPU_CPU_PAGE_SIZE then it's impossible to get a discontiguous DMA
* allocation. Some DMA implementations may, despite this fact, still
* use the contiguous pool for page sized allocations. As such only
* request explicitly contiguous allocs if the page directory is larger
* than the page size. Also, of course, this is all only revelant for
* GPUs not using an IOMMU. If there is an IOMMU DMA allocs are always
* going to be virtually contiguous and we don't have to force the
* underlying allocations to be physically contiguous as well.
*/
if (!nvgpu_iommuable(g) && (bytes > NVGPU_CPU_PAGE_SIZE)) {
flags = NVGPU_DMA_PHYSICALLY_ADDRESSED;
}
err = nvgpu_dma_alloc_flags(g, flags, bytes, pd->mem);
if (err != 0) {
nvgpu_err(g, "OOM allocating page directory!");
nvgpu_kfree(g, pd->mem);
return -ENOMEM;
}
pd->cached = false;
pd->mem_offs = 0;
return 0;
}
/*
* Make a new nvgpu_pd_cache_entry and allocate a PD from it. Update the passed
* pd to reflect this allocation.
*/
static int nvgpu_pd_cache_alloc_new(struct gk20a *g,
struct nvgpu_pd_cache *cache,
struct nvgpu_gmmu_pd *pd,
u32 bytes)
{
struct nvgpu_pd_mem_entry *pentry;
u64 flags = 0UL;
int32_t err;
pd_dbg(g, "PD-Alloc [C] New: offs=0");
pentry = nvgpu_kzalloc(g, sizeof(*pentry));
if (pentry == NULL) {
nvgpu_err(g, "OOM allocating pentry!");
return -ENOMEM;
}
if (!nvgpu_iommuable(g) && (NVGPU_PD_CACHE_SIZE > NVGPU_CPU_PAGE_SIZE)) {
flags = NVGPU_DMA_PHYSICALLY_ADDRESSED;
}
err = nvgpu_dma_alloc_flags(g, flags,
NVGPU_PD_CACHE_SIZE, &pentry->mem);
if (err != 0) {
nvgpu_kfree(g, pentry);
/* Not enough contiguous space, but a direct
* allocation may work
*/
if (err == -ENOMEM) {
return nvgpu_pd_cache_alloc_direct(g, pd, bytes);
}
nvgpu_err(g, "Unable to DMA alloc!");
return -ENOMEM;
}
pentry->pd_size = bytes;
nvgpu_list_add(&pentry->list_entry,
&cache->partial[nvgpu_pd_cache_nr(bytes)]);
/*
* This allocates the very first PD table in the set of tables in this
* nvgpu_pd_mem_entry.
*/
nvgpu_set_bit(0U, pentry->alloc_map);
pentry->allocs = 1;
/*
* Now update the nvgpu_gmmu_pd to reflect this allocation.
*/
pd->mem = &pentry->mem;
pd->mem_offs = 0;
pd->cached = true;
pentry->tree_entry.key_start = (u64)(uintptr_t)&pentry->mem;
nvgpu_rbtree_insert(&pentry->tree_entry, &cache->mem_tree);
return 0;
}
static int nvgpu_pd_cache_alloc_from_partial(struct gk20a *g,
struct nvgpu_pd_cache *cache,
struct nvgpu_pd_mem_entry *pentry,
struct nvgpu_gmmu_pd *pd)
{
u32 bit_offs;
u32 mem_offs;
u32 nr_bits = nvgpu_pd_cache_get_nr_entries(pentry);
/*
* Find and allocate an open PD.
*/
bit_offs = nvgpu_safe_cast_u64_to_u32(
find_first_zero_bit(pentry->alloc_map, nr_bits));
mem_offs = nvgpu_safe_mult_u32(bit_offs, pentry->pd_size);
pd_dbg(g, "PD-Alloc [C] Partial: offs=%u nr_bits=%d src=0x%p",
bit_offs, nr_bits, pentry);
/* Bit map full. Somethings wrong. */
nvgpu_assert(bit_offs < nr_bits);
nvgpu_set_bit(bit_offs, pentry->alloc_map);
pentry->allocs = nvgpu_safe_add_u32(pentry->allocs, 1U);
/*
* First update the pd.
*/
pd->mem = &pentry->mem;
pd->mem_offs = mem_offs;
pd->cached = true;
/*
* Now make sure the pentry is in the correct list (full vs partial).
*/
if (pentry->allocs >= nr_bits) {
pd_dbg(g, "Adding pentry to full list!");
nvgpu_list_del(&pentry->list_entry);
nvgpu_list_add(&pentry->list_entry,
&cache->full[nvgpu_pd_cache_nr(pentry->pd_size)]);
}
return 0;
}
/*
* Get a partially full nvgpu_pd_mem_entry. Returns NULL if there is no partial
* nvgpu_pd_mem_entry's.
*/
static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_get_partial(
struct nvgpu_pd_cache *cache, u32 bytes)
{
struct nvgpu_list_node *list =
&cache->partial[nvgpu_pd_cache_nr(bytes)];
if (nvgpu_list_empty(list)) {
return NULL;
}
return nvgpu_list_first_entry(list,
nvgpu_pd_mem_entry,
list_entry);
}
/*
* Allocate memory from an nvgpu_mem for the page directory.
*/
static int nvgpu_pd_cache_alloc(struct gk20a *g, struct nvgpu_pd_cache *cache,
struct nvgpu_gmmu_pd *pd, u32 bytes)
{
struct nvgpu_pd_mem_entry *pentry;
int err;
bool bytes_valid;
pd_dbg(g, "PD-Alloc [C] %u bytes", bytes);
bytes_valid = bytes >= NVGPU_PD_CACHE_MIN;
if (bytes_valid) {
bytes_valid = (bytes & nvgpu_safe_sub_u32(bytes, 1U)) == 0U;
}
if (!bytes_valid) {
pd_dbg(g, "PD-Alloc [C] Invalid (bytes=%u)!", bytes);
return -EINVAL;
}
nvgpu_assert(bytes < NVGPU_PD_CACHE_SIZE);
pentry = nvgpu_pd_cache_get_partial(cache, bytes);
if (pentry == NULL) {
err = nvgpu_pd_cache_alloc_new(g, cache, pd, bytes);
} else {
err = nvgpu_pd_cache_alloc_from_partial(g, cache, pentry, pd);
}
if (err != 0) {
nvgpu_err(g, "PD-Alloc [C] Failed!");
}
return err;
}
/*
* Allocate the DMA memory for a page directory. This handles the necessary PD
* cache logistics. Since on Parker and later GPUs some of the page directories
* are smaller than a page packing these PDs together saves a lot of memory.
*/
int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes)
{
struct gk20a *g = gk20a_from_vm(vm);
int err;
/*
* Simple case: PD is bigger than a page so just do a regular DMA
* alloc.
*/
if (bytes >= NVGPU_PD_CACHE_SIZE) {
err = nvgpu_pd_cache_alloc_direct(g, pd, bytes);
if (err != 0) {
return err;
}
pd->pd_size = bytes;
return 0;
}
if (g->mm.pd_cache == NULL) {
nvgpu_do_assert();
return -ENOMEM;
}
nvgpu_mutex_acquire(&g->mm.pd_cache->lock);
err = nvgpu_pd_cache_alloc(g, g->mm.pd_cache, pd, bytes);
if (err == 0) {
pd->pd_size = bytes;
}
nvgpu_mutex_release(&g->mm.pd_cache->lock);
return err;
}
static void nvgpu_pd_cache_free_direct(struct gk20a *g,
struct nvgpu_gmmu_pd *pd)
{
pd_dbg(g, "PD-Free [D] 0x%p", pd->mem);
if (pd->mem == NULL) {
return;
}
nvgpu_dma_free(g, pd->mem);
nvgpu_kfree(g, pd->mem);
pd->mem = NULL;
}
static void nvgpu_pd_cache_free_mem_entry(struct gk20a *g,
struct nvgpu_pd_cache *cache,
struct nvgpu_pd_mem_entry *pentry)
{
nvgpu_dma_free(g, &pentry->mem);
nvgpu_list_del(&pentry->list_entry);
nvgpu_rbtree_unlink(&pentry->tree_entry, &cache->mem_tree);
nvgpu_kfree(g, pentry);
}
static void nvgpu_pd_cache_do_free(struct gk20a *g,
struct nvgpu_pd_cache *cache,
struct nvgpu_pd_mem_entry *pentry,
struct nvgpu_gmmu_pd *pd)
{
u32 bit = pd->mem_offs / pentry->pd_size;
/* Mark entry as free. */
nvgpu_clear_bit(bit, pentry->alloc_map);
pentry->allocs = nvgpu_safe_sub_u32(pentry->allocs, 1U);
if (pentry->allocs > 0U) {
/*
* Partially full still. If it was already on the partial list
* this just re-adds it.
*
* Since the memory used for the entries is still mapped, if
* igpu make sure the entries are invalidated so that the hw
* doesn't acccidentally try to prefetch non-existent fb memory.
*
* As IOMMU prefetching of invalid pd entries cause the IOMMU fault,
* fill them with zero.
*/
if ((nvgpu_iommuable(g)) &&
(NVGPU_PD_CACHE_SIZE > NVGPU_CPU_SMALL_PAGE_SIZE) &&
(pd->mem->cpu_va != NULL)) {
(void)memset(((u8 *)pd->mem->cpu_va + pd->mem_offs), 0,
pd->pd_size);
}
nvgpu_list_del(&pentry->list_entry);
nvgpu_list_add(&pentry->list_entry,
&cache->partial[nvgpu_pd_cache_nr(pentry->pd_size)]);
} else {
/* Empty now so free it. */
nvgpu_pd_cache_free_mem_entry(g, cache, pentry);
}
pd->mem = NULL;
}
static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_look_up(
struct nvgpu_pd_cache *cache,
struct nvgpu_gmmu_pd *pd)
{
struct nvgpu_rbtree_node *node = NULL;
nvgpu_rbtree_search((u64)(uintptr_t)pd->mem, &node,
cache->mem_tree);
if (node == NULL) {
return NULL;
}
return nvgpu_pd_mem_entry_from_tree_entry(node);
}
static void nvgpu_pd_cache_free(struct gk20a *g, struct nvgpu_pd_cache *cache,
struct nvgpu_gmmu_pd *pd)
{
struct nvgpu_pd_mem_entry *pentry;
pd_dbg(g, "PD-Free [C] 0x%p", pd->mem);
pentry = nvgpu_pd_cache_look_up(cache, pd);
if (pentry == NULL) {
nvgpu_do_assert_print(g, "Attempting to free non-existent pd");
return;
}
nvgpu_pd_cache_do_free(g, cache, pentry, pd);
}
void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd)
{
struct gk20a *g = gk20a_from_vm(vm);
/*
* Simple case: just DMA free.
*/
if (!pd->cached) {
return nvgpu_pd_cache_free_direct(g, pd);
}
nvgpu_mutex_acquire(&g->mm.pd_cache->lock);
nvgpu_pd_cache_free(g, g->mm.pd_cache, pd);
nvgpu_mutex_release(&g->mm.pd_cache->lock);
}

View File

@@ -0,0 +1,177 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GMMU_PD_CACHE_PRIV_H
#define NVGPU_GMMU_PD_CACHE_PRIV_H
/**
* @file
*
* Page directory cache private interface
* --------------------------------------
*
* To save memory when using sub-page sized PD levels in Pascal and beyond a way
* of packing PD tables together is necessary. If a PD table only requires 1024
* bytes, then it is possible to have 4 of these PDs in one page. This is even
* more pronounced for 256 byte PD tables.
*
* This also matters for page directories on any chip when using a 64K page
* granule. Having 4K PDs packed into a 64K page saves a bunch of memory. Even
* more so for the 256B PDs on Pascal+.
*
* The pd cache is basially just a slab allocator. Each instance of the nvgpu
* driver makes one of these structs:
*
* struct nvgpu_pd_cache {
* struct nvgpu_list_node full[NVGPU_PD_CACHE_COUNT];
* struct nvgpu_list_node partial[NVGPU_PD_CACHE_COUNT];
*
* struct nvgpu_rbtree_node *mem_tree;
* };
*
* There are two sets of lists, the full and the partial. The full lists contain
* pages of memory for which all the memory in that page is in use. The partial
* lists contain partially full pages of memory which can be used for more PD
* allocations. There a couple of assumptions here:
*
* 1. PDs greater than or equal to the page size bypass the pd cache.
* 2. PDs are always power of 2 and greater than %NVGPU_PD_CACHE_MIN bytes.
*
* There are NVGPU_PD_CACHE_COUNT full lists and the same number of partial
* lists. For a 4Kb page NVGPU_PD_CACHE_COUNT is 4. This is enough space for
* 256, 512, 1024, and 2048 byte PDs.
*
* nvgpu_pd_alloc() will allocate a PD for the GMMU. It will check if the PD
* size is page size or larger and choose the correct allocation scheme - either
* from the PD cache or directly. Similarly nvgpu_pd_free() will free a PD
* allocated by nvgpu_pd_alloc().
*/
#include <nvgpu/bug.h>
#include <nvgpu/log.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/list.h>
#include <nvgpu/rbtree.h>
#include <nvgpu/lock.h>
#define pd_dbg(g, fmt, args...) nvgpu_log(g, gpu_dbg_pd_cache, fmt, ##args)
/**
* Minimum size of a cache. The number of different caches in the nvgpu_pd_cache
* structure is of course depending on this.
*/
#define NVGPU_PD_CACHE_MIN 256UL
/**
* MIN_SHIFT is the right number of bits to shift to determine
* which list to use in the array of lists.
*/
#define NVGPU_PD_CACHE_MIN_SHIFT 9UL
/**
* Maximum PD cache count. This specifies the number of slabs; since each
* slab represents a PO2 increase in size a count of 8 leads to:
*
* NVGPU_PD_CACHE_SIZE = 256B * 2^8 = 64KB
*
* For Linux with 4K pages, if the cache size is larger than 4KB then we
* need to allocate from CMA. This puts a lot of pressure on the CMA space.
* For kernel with a PAGE_SIZE of 64K this isn't the case, so allow the
* PD cache size to be 64K if PAGE_SIZE > 4K (i.e PAGE_SIZE == 64K).
*/
#ifdef __KERNEL__
# if NVGPU_CPU_PAGE_SIZE > 4096
# define NVGPU_PD_CACHE_COUNT 8UL
# else
# define NVGPU_PD_CACHE_COUNT 4UL
# endif
#else
#define NVGPU_PD_CACHE_COUNT 8UL
#endif
#define NVGPU_PD_CACHE_SIZE (NVGPU_PD_CACHE_MIN * \
(1UL << NVGPU_PD_CACHE_COUNT))
/**
* This structure describes a slab within the slab allocator.
*/
struct nvgpu_pd_mem_entry {
/**
* Structure for storing the PD memory information.
*/
struct nvgpu_mem mem;
/**
* Size of the page directories (not the mem).
*/
u32 pd_size;
/**
* alloc_map is a bitmap showing which PDs have been allocated.
* The size of mem will always
* be one page. pd_size will always be a power of 2.
*/
DECLARE_BITMAP(alloc_map, NVGPU_PD_CACHE_SIZE / NVGPU_PD_CACHE_MIN);
/**
* Total number of allocations in this PD.
*/
u32 allocs;
/**
* This is a list node within the list. The list node will be either from
* a full or partial list in #nvgpu_pd_cache.
*/
struct nvgpu_list_node list_entry;
/**
* This is a tree node within the node.
*/
struct nvgpu_rbtree_node tree_entry;
};
/**
* A cache for allocating PD memory. This enables smaller PDs to be packed
* into single pages.
*/
struct nvgpu_pd_cache {
/**
* Array of lists of full nvgpu_pd_mem_entries and partially full
* nvgpu_pd_mem_entries.
*/
struct nvgpu_list_node full[NVGPU_PD_CACHE_COUNT];
/**
* Array of lists of empty nvgpu_pd_mem_entries and partially
* empty nvgpu_pd_mem_entries.
*/
struct nvgpu_list_node partial[NVGPU_PD_CACHE_COUNT];
/**
* Tree of all allocated struct nvgpu_mem's for fast look up.
*/
struct nvgpu_rbtree_node *mem_tree;
/**
* All access to the cache much be locked. This protects the lists and
* the rb tree.
*/
struct nvgpu_mutex lock;
};
#endif /* NVGPU_GMMU_PD_CACHE_PRIV_H */

View File

@@ -0,0 +1,119 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/static_analysis.h>
#include <nvgpu/string.h>
#include <nvgpu/gmmu.h>
u32 nvgpu_gmmu_default_big_page_size(void)
{
return U32(SZ_64K);
}
/*
* MSS NVLINK HW settings are in force_snoop mode.
* This will force all the GPU mappings to be coherent.
* By default the mem aperture is set to sysmem_non_coherent and will use L2
* atomics.
* Change target pte aperture to sysmem_coherent if mem attribute requests for
* platform atomics to use rmw atomic capability.
*
*/
u32 nvgpu_gmmu_aperture_mask(struct gk20a *g,
enum nvgpu_aperture mem_ap,
bool platform_atomic_attr,
u32 sysmem_mask,
u32 sysmem_coh_mask,
u32 vidmem_mask)
{
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC) &&
platform_atomic_attr) {
mem_ap = APERTURE_SYSMEM_COH;
}
return nvgpu_aperture_mask_raw(g, mem_ap,
sysmem_mask,
sysmem_coh_mask,
vidmem_mask);
}
static char *map_attrs_to_str(char *dest, struct nvgpu_gmmu_attrs *attrs)
{
dest[0] = attrs->cacheable ? 'C' : '-';
dest[1] = attrs->sparse ? 'S' : '-';
dest[2] = attrs->priv ? 'P' : '-';
dest[3] = attrs->valid ? 'V' : '-';
dest[4] = attrs->platform_atomic ? 'A' : '-';
dest[5] = '\0';
return dest;
}
void nvgpu_pte_dbg_print(struct gk20a *g,
struct nvgpu_gmmu_attrs *attrs,
const char *vm_name, u32 pd_idx, u32 mmu_level_entry_size,
u64 virt_addr, u64 phys_addr, u32 page_size, u32 *pte_w)
{
char attrs_str[6];
char ctag_str[32] = "\0";
const char *aperture_str = nvgpu_aperture_str(attrs->aperture);
const char *perm_str = nvgpu_gmmu_perm_str(attrs->rw_flag);
#ifdef CONFIG_NVGPU_COMPRESSION
u64 ctag_tmp = attrs->ctag;
u32 str_len = 0U;
u32 ctag_num = 0U;
/*
* attrs->ctag is incremented to count current page size as well.
* Subtract to get this page's ctag line number.
*/
if (ctag_tmp != 0ULL) {
ctag_tmp = nvgpu_safe_sub_u64(ctag_tmp, page_size);
}
ctag_num = nvgpu_safe_cast_u64_to_u32(ctag_tmp /
g->ops.fb.compression_page_size(g));
(void)strcpy(ctag_str, "ctag=0x\0");
str_len = (u32)strlen(ctag_str);
(void)nvgpu_strnadd_u32(ctag_str + str_len, ctag_num,
nvgpu_safe_sub_u32(31U, str_len), 16U);
#endif
(void)map_attrs_to_str(attrs_str, attrs);
pte_dbg(g, attrs,
"vm=%s "
"PTE: i=%-4u size=%-2u | "
"GPU %#-12llx phys %#-12llx "
"pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %-5s "
"%s "
"[0x%08x, 0x%08x]",
vm_name,
pd_idx, mmu_level_entry_size,
virt_addr, phys_addr,
page_size >> 10,
perm_str,
attrs->kind_v,
aperture_str,
attrs_str,
ctag_str,
pte_w[1], pte_w[0]);
}

View File

@@ -0,0 +1,710 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/mm.h>
#include <nvgpu/vm.h>
#include <nvgpu/dma.h>
#include <nvgpu/vm_area.h>
#include <nvgpu/acr.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/vidmem.h>
#include <nvgpu/semaphore.h>
#include <nvgpu/pramin.h>
#include <nvgpu/enabled.h>
#include <nvgpu/errata.h>
#include <nvgpu/ce_app.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/engines.h>
#include <nvgpu/static_analysis.h>
#include <nvgpu/power_features/cg.h>
int nvgpu_mm_suspend(struct gk20a *g)
{
int err;
nvgpu_log_info(g, "MM suspend running...");
#ifdef CONFIG_NVGPU_DGPU
nvgpu_vidmem_thread_pause_sync(&g->mm);
#endif
#ifdef CONFIG_NVGPU_COMPRESSION
g->ops.mm.cache.cbc_clean(g);
#endif
err = g->ops.mm.cache.l2_flush(g, false);
if (err != 0) {
nvgpu_err(g, "l2_flush failed");
return err;
}
if (g->ops.fb.intr.disable != NULL) {
g->ops.fb.intr.disable(g);
}
if (g->ops.mm.mmu_fault.disable_hw != NULL) {
g->ops.mm.mmu_fault.disable_hw(g);
}
nvgpu_log_info(g, "MM suspend done!");
return err;
}
u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
{
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
return nvgpu_mem_get_phys_addr(g, inst_block);
} else {
return nvgpu_mem_get_addr(g, inst_block);
}
}
u32 nvgpu_inst_block_ptr(struct gk20a *g, struct nvgpu_mem *inst_block)
{
u64 addr = nvgpu_inst_block_addr(g, inst_block) >>
g->ops.ramin.base_shift();
nvgpu_assert(u64_hi32(addr) == 0U);
return u64_lo32(addr);
}
void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
{
if (nvgpu_mem_is_valid(inst_block)) {
nvgpu_dma_free(g, inst_block);
}
}
int nvgpu_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
{
int err;
nvgpu_log_fn(g, " ");
err = nvgpu_dma_alloc(g, g->ops.ramin.alloc_size(), inst_block);
if (err != 0) {
nvgpu_err(g, "%s: memory allocation failed", __func__);
return err;
}
nvgpu_log_fn(g, "done");
return 0;
}
static int nvgpu_alloc_sysmem_flush(struct gk20a *g)
{
return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
}
#ifdef CONFIG_NVGPU_DGPU
static void nvgpu_remove_mm_ce_support(struct mm_gk20a *mm)
{
struct gk20a *g = gk20a_from_mm(mm);
if (mm->vidmem.ce_ctx_id != NVGPU_CE_INVAL_CTX_ID) {
nvgpu_ce_app_delete_context(g, mm->vidmem.ce_ctx_id);
}
mm->vidmem.ce_ctx_id = NVGPU_CE_INVAL_CTX_ID;
nvgpu_vm_put(mm->ce.vm);
}
#endif
static void nvgpu_remove_mm_support(struct mm_gk20a *mm)
{
struct gk20a *g = gk20a_from_mm(mm);
nvgpu_dma_free(g, &mm->mmu_wr_mem);
nvgpu_dma_free(g, &mm->mmu_rd_mem);
#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
if (nvgpu_fb_vab_teardown_hal(g) != 0) {
nvgpu_err(g, "failed to teardown VAB");
}
#endif
if (g->ops.mm.mmu_fault.info_mem_destroy != NULL) {
g->ops.mm.mmu_fault.info_mem_destroy(g);
}
if (g->ops.mm.remove_bar2_vm != NULL) {
g->ops.mm.remove_bar2_vm(g);
}
nvgpu_free_inst_block(g, &mm->bar1.inst_block);
nvgpu_vm_put(mm->bar1.vm);
nvgpu_free_inst_block(g, &mm->pmu.inst_block);
nvgpu_free_inst_block(g, &mm->hwpm.inst_block);
nvgpu_vm_put(mm->pmu.vm);
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_VM)) {
nvgpu_free_inst_block(g, &mm->sec2.inst_block);
nvgpu_vm_put(mm->sec2.vm);
}
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_GSP_VM)) {
nvgpu_free_inst_block(g, &mm->gsp.inst_block);
nvgpu_vm_put(mm->gsp.vm);
}
if (g->has_cde) {
nvgpu_vm_put(mm->cde.vm);
}
#ifdef CONFIG_NVGPU_SW_SEMAPHORE
nvgpu_semaphore_sea_destroy(g);
#endif
#ifdef CONFIG_NVGPU_DGPU
nvgpu_vidmem_destroy(g);
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_INIT_PDB_CACHE)) {
g->ops.ramin.deinit_pdb_cache_errata(g);
}
#endif
nvgpu_pd_cache_fini(g);
}
/* pmu vm, share channel_vm interfaces */
static int nvgpu_init_system_vm(struct mm_gk20a *mm)
{
int err;
struct gk20a *g = gk20a_from_mm(mm);
struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
u64 low_hole, aperture_size;
/*
* For some reason the maxwell PMU code is dependent on the large page
* size. No reason AFAICT for this. Probably a bug somewhere.
*/
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_MM_FORCE_128K_PMU_VM)) {
big_page_size = nvgpu_safe_cast_u64_to_u32(SZ_128K);
}
/*
* No user region - so we will pass that as zero sized.
*/
low_hole = SZ_4K * 16UL;
aperture_size = GK20A_PMU_VA_SIZE;
mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
nvgpu_log_info(g, "pmu vm size = 0x%x", mm->pmu.aperture_size);
mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
low_hole,
0ULL,
nvgpu_safe_sub_u64(aperture_size, low_hole),
0ULL,
true,
false,
false,
"system");
if (mm->pmu.vm == NULL) {
return -ENOMEM;
}
err = nvgpu_alloc_inst_block(g, inst_block);
if (err != 0) {
goto clean_up_vm;
}
g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
return 0;
clean_up_vm:
nvgpu_vm_put(mm->pmu.vm);
return err;
}
static int nvgpu_init_hwpm(struct mm_gk20a *mm)
{
int err;
struct gk20a *g = gk20a_from_mm(mm);
struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
err = nvgpu_alloc_inst_block(g, inst_block);
if (err != 0) {
return err;
}
g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
return 0;
}
static int nvgpu_init_cde_vm(struct mm_gk20a *mm)
{
struct gk20a *g = gk20a_from_mm(mm);
u64 user_size, kernel_size;
u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
g->ops.mm.get_default_va_sizes(NULL, &user_size, &kernel_size);
mm->cde.vm = nvgpu_vm_init(g, big_page_size,
U64(big_page_size) << U64(10),
nvgpu_safe_sub_u64(user_size,
U64(big_page_size) << U64(10)),
kernel_size,
0ULL,
false, false, false, "cde");
if (mm->cde.vm == NULL) {
return -ENOMEM;
}
return 0;
}
static int nvgpu_init_ce_vm(struct mm_gk20a *mm)
{
struct gk20a *g = gk20a_from_mm(mm);
u64 user_size, kernel_size;
u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
g->ops.mm.get_default_va_sizes(NULL, &user_size, &kernel_size);
mm->ce.vm = nvgpu_vm_init(g, big_page_size,
U64(big_page_size) << U64(10),
nvgpu_safe_sub_u64(user_size,
U64(big_page_size) << U64(10)),
kernel_size,
0ULL,
false, false, false, "ce");
if (mm->ce.vm == NULL) {
return -ENOMEM;
}
return 0;
}
static int nvgpu_init_mmu_debug(struct mm_gk20a *mm)
{
struct gk20a *g = gk20a_from_mm(mm);
int err;
if (!nvgpu_mem_is_valid(&mm->mmu_wr_mem)) {
err = nvgpu_dma_alloc_sys(g, SZ_4K, &mm->mmu_wr_mem);
if (err != 0) {
goto err;
}
}
if (!nvgpu_mem_is_valid(&mm->mmu_rd_mem)) {
err = nvgpu_dma_alloc_sys(g, SZ_4K, &mm->mmu_rd_mem);
if (err != 0) {
goto err_free_wr_mem;
}
}
return 0;
err_free_wr_mem:
nvgpu_dma_free(g, &mm->mmu_wr_mem);
err:
return -ENOMEM;
}
#if defined(CONFIG_NVGPU_DGPU)
void nvgpu_init_mm_ce_context(struct gk20a *g)
{
if (g->mm.vidmem.size > 0U &&
(g->mm.vidmem.ce_ctx_id == NVGPU_CE_INVAL_CTX_ID)) {
g->mm.vidmem.ce_ctx_id =
nvgpu_ce_app_create_context(g,
nvgpu_engine_get_fast_ce_runlist_id(g),
-1,
-1);
if (g->mm.vidmem.ce_ctx_id == NVGPU_CE_INVAL_CTX_ID) {
nvgpu_err(g,
"Failed to allocate CE context for vidmem page clearing support");
}
}
}
#endif
static int nvgpu_init_bar1_vm(struct mm_gk20a *mm)
{
int err;
struct gk20a *g = gk20a_from_mm(mm);
struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
nvgpu_log_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size);
mm->bar1.vm = nvgpu_vm_init(g,
big_page_size,
SZ_64K,
0ULL,
nvgpu_safe_sub_u64(mm->bar1.aperture_size, SZ_64K),
0ULL,
true, false, false,
"bar1");
if (mm->bar1.vm == NULL) {
return -ENOMEM;
}
err = nvgpu_alloc_inst_block(g, inst_block);
if (err != 0) {
goto clean_up_vm;
}
g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
return 0;
clean_up_vm:
nvgpu_vm_put(mm->bar1.vm);
return err;
}
static int nvgpu_init_engine_ucode_vm(struct gk20a *g,
struct engine_ucode *ucode, const char *address_space_name)
{
int err;
struct nvgpu_mem *inst_block = &ucode->inst_block;
u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
/* ucode aperture size is 32MB */
ucode->aperture_size = U32(32) << 20U;
nvgpu_log_info(g, "%s vm size = 0x%x", address_space_name,
ucode->aperture_size);
ucode->vm = nvgpu_vm_init(g, big_page_size, SZ_4K,
0ULL, nvgpu_safe_sub_u64(ucode->aperture_size, SZ_4K), 0ULL,
false, false, false,
address_space_name);
if (ucode->vm == NULL) {
return -ENOMEM;
}
/* allocate instance mem for engine ucode */
err = nvgpu_alloc_inst_block(g, inst_block);
if (err != 0) {
goto clean_up_va;
}
g->ops.mm.init_inst_block(inst_block, ucode->vm, big_page_size);
return 0;
clean_up_va:
nvgpu_vm_put(ucode->vm);
return err;
}
static int nvgpu_init_mm_setup_bar(struct gk20a *g)
{
struct mm_gk20a *mm = &g->mm;
int err;
err = nvgpu_init_bar1_vm(mm);
if (err != 0) {
return err;
}
if (g->ops.mm.init_bar2_vm != NULL) {
err = g->ops.mm.init_bar2_vm(g);
if (err != 0) {
return err;
}
}
err = nvgpu_init_system_vm(mm);
if (err != 0) {
return err;
}
err = nvgpu_init_hwpm(mm);
if (err != 0) {
return err;
}
return err;
}
static int nvgpu_init_mm_setup_vm(struct gk20a *g)
{
struct mm_gk20a *mm = &g->mm;
int err;
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_VM)) {
err = nvgpu_init_engine_ucode_vm(g, &mm->sec2, "sec2");
if (err != 0) {
return err;
}
}
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_GSP_VM)) {
err = nvgpu_init_engine_ucode_vm(g, &mm->gsp, "gsp");
if (err != 0) {
return err;
}
}
if (g->has_cde) {
err = nvgpu_init_cde_vm(mm);
if (err != 0) {
return err;
}
}
err = nvgpu_init_ce_vm(mm);
if (err != 0) {
return err;
}
return err;
}
static int nvgpu_init_mm_components(struct gk20a *g)
{
int err = 0;
struct mm_gk20a *mm = &g->mm;
err = nvgpu_alloc_sysmem_flush(g);
if (err != 0) {
return err;
}
err = nvgpu_init_mm_setup_bar(g);
if (err != 0) {
return err;
}
err = nvgpu_init_mm_setup_vm(g);
if (err != 0) {
return err;
}
err = nvgpu_init_mmu_debug(mm);
if (err != 0) {
return err;
}
/*
* Some chips support replayable MMU faults. For such chips make sure
* SW is initialized.
*/
if (g->ops.mm.mmu_fault.setup_sw != NULL) {
err = g->ops.mm.mmu_fault.setup_sw(g);
if (err != 0) {
return err;
}
}
return 0;
}
static int nvgpu_init_mm_setup_sw(struct gk20a *g)
{
struct mm_gk20a *mm = &g->mm;
int err = 0;
if (mm->sw_ready) {
nvgpu_log_info(g, "skip init");
return 0;
}
mm->g = g;
nvgpu_mutex_init(&mm->l2_op_lock);
/*TBD: make channel vm size configurable */
g->ops.mm.get_default_va_sizes(NULL, &mm->channel.user_size,
&mm->channel.kernel_size);
nvgpu_log_info(g, "channel vm size: user %uMB kernel %uMB",
nvgpu_safe_cast_u64_to_u32(mm->channel.user_size >> U64(20)),
nvgpu_safe_cast_u64_to_u32(mm->channel.kernel_size >> U64(20)));
#ifdef CONFIG_NVGPU_DGPU
mm->vidmem.ce_ctx_id = NVGPU_CE_INVAL_CTX_ID;
nvgpu_init_pramin(mm);
err = nvgpu_vidmem_init(mm);
if (err != 0) {
return err;
}
/*
* this requires fixed allocations in vidmem which must be
* allocated before all other buffers
*/
if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY) &&
nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
err = nvgpu_acr_alloc_blob_prerequisite(g, g->acr, 0);
if (err != 0) {
return err;
}
}
#endif
err = nvgpu_init_mm_components(g);
if (err != 0) {
return err;
}
if ((g->ops.fb.ecc.init != NULL) && !g->ecc.initialized) {
err = g->ops.fb.ecc.init(g);
if (err != 0) {
return err;
}
}
#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
if (nvgpu_fb_vab_init_hal(g) != 0) {
nvgpu_err(g, "failed to init VAB");
}
#endif
mm->remove_support = nvgpu_remove_mm_support;
#ifdef CONFIG_NVGPU_DGPU
mm->remove_ce_support = nvgpu_remove_mm_ce_support;
#endif
mm->sw_ready = true;
return 0;
}
#ifdef CONFIG_NVGPU_DGPU
static int nvgpu_init_mm_pdb_cache_errata(struct gk20a *g)
{
int err;
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_INIT_PDB_CACHE)) {
err = g->ops.ramin.init_pdb_cache_errata(g);
if (err != 0) {
return err;
}
}
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_FB_PDB_CACHE)) {
err = g->ops.fb.apply_pdb_cache_errata(g);
if (err != 0) {
return err;
}
}
return 0;
}
#endif
/*
* Called through the HAL to handle vGPU: the vGPU doesn't have HW to initialize
* here.
*/
int nvgpu_mm_setup_hw(struct gk20a *g)
{
struct mm_gk20a *mm = &g->mm;
int err;
nvgpu_log_fn(g, " ");
if (g->ops.fb.set_mmu_page_size != NULL) {
g->ops.fb.set_mmu_page_size(g);
}
#ifdef CONFIG_NVGPU_COMPRESSION
if (g->ops.fb.set_use_full_comp_tag_line != NULL) {
mm->use_full_comp_tag_line =
g->ops.fb.set_use_full_comp_tag_line(g);
}
#endif
g->ops.fb.init_hw(g);
if (g->ops.bus.bar1_bind != NULL) {
err = g->ops.bus.bar1_bind(g, &mm->bar1.inst_block);
if (err != 0) {
return err;
}
}
if (g->ops.bus.bar2_bind != NULL) {
err = g->ops.bus.bar2_bind(g, &mm->bar2.inst_block);
if (err != 0) {
return err;
}
}
if ((g->ops.mm.cache.fb_flush(g) != 0) ||
(g->ops.mm.cache.fb_flush(g) != 0)) {
return -EBUSY;
}
if (g->ops.mm.mmu_fault.setup_hw != NULL) {
g->ops.mm.mmu_fault.setup_hw(g);
}
nvgpu_log_fn(g, "done");
return 0;
}
int nvgpu_init_mm_support(struct gk20a *g)
{
int err;
#ifdef CONFIG_NVGPU_DGPU
err = nvgpu_init_mm_pdb_cache_errata(g);
if (err != 0) {
return err;
}
#endif
err = nvgpu_init_mm_setup_sw(g);
if (err != 0) {
return err;
}
if (g->ops.mm.setup_hw != NULL) {
err = g->ops.mm.setup_hw(g);
}
return err;
}
u32 nvgpu_mm_get_default_big_page_size(struct gk20a *g)
{
u32 big_page_size;
big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
if (g->mm.disable_bigpage) {
big_page_size = 0;
}
return big_page_size;
}
u32 nvgpu_mm_get_available_big_page_sizes(struct gk20a *g)
{
u32 available_big_page_sizes = 0;
if (g->mm.disable_bigpage) {
return available_big_page_sizes;
}
available_big_page_sizes = g->ops.mm.gmmu.get_default_big_page_size();
if (g->ops.mm.gmmu.get_big_page_sizes != NULL) {
available_big_page_sizes |= g->ops.mm.gmmu.get_big_page_sizes();
}
return available_big_page_sizes;
}

View File

@@ -0,0 +1,418 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/bug.h>
#include <nvgpu/kmem.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/nvgpu_sgt.h>
#include <nvgpu/dma.h>
#include <nvgpu/vidmem.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/pramin.h>
#include <nvgpu/string.h>
/*
* Make sure to use the right coherency aperture if you use this function! This
* will not add any checks. If you want to simply use the default coherency then
* use nvgpu_aperture_mask().
*/
u32 nvgpu_aperture_mask_raw(struct gk20a *g, enum nvgpu_aperture aperture,
u32 sysmem_mask, u32 sysmem_coh_mask,
u32 vidmem_mask)
{
u32 ret_mask = 0;
if ((aperture == APERTURE_INVALID) || (aperture >= APERTURE_MAX_ENUM)) {
nvgpu_do_assert_print(g, "Bad aperture");
return 0;
}
/*
* Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the
* "sysmem" aperture should really be translated to VIDMEM.
*/
if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)) {
aperture = APERTURE_VIDMEM;
}
switch (aperture) {
case APERTURE_SYSMEM_COH:
ret_mask = sysmem_coh_mask;
break;
case APERTURE_SYSMEM:
ret_mask = sysmem_mask;
break;
case APERTURE_VIDMEM:
ret_mask = vidmem_mask;
break;
default:
nvgpu_do_assert_print(g, "Bad aperture");
ret_mask = 0;
break;
}
return ret_mask;
}
u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
{
enum nvgpu_aperture ap = mem->aperture;
return nvgpu_aperture_mask_raw(g, ap,
sysmem_mask,
sysmem_coh_mask,
vidmem_mask);
}
bool nvgpu_aperture_is_sysmem(enum nvgpu_aperture ap)
{
return (ap == APERTURE_SYSMEM_COH) || (ap == APERTURE_SYSMEM);
}
bool nvgpu_mem_is_sysmem(struct nvgpu_mem *mem)
{
return nvgpu_aperture_is_sysmem(mem->aperture);
}
u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys)
{
/* ensure it is not vidmem allocation */
#ifdef CONFIG_NVGPU_DGPU
WARN_ON(nvgpu_addr_is_vidmem_page_alloc(phys));
#endif
if (nvgpu_iommuable(g) && (g->ops.mm.gmmu.get_iommu_bit != NULL)) {
return phys | (1ULL << g->ops.mm.gmmu.get_iommu_bit(g));
}
return phys;
}
u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u64 w)
{
u32 data = 0;
if (mem->aperture == APERTURE_SYSMEM) {
u32 *ptr = mem->cpu_va;
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
WARN_ON(ptr == NULL);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
data = ptr[w];
}
#ifdef CONFIG_NVGPU_DGPU
else if (mem->aperture == APERTURE_VIDMEM) {
nvgpu_pramin_rd_n(g, mem, w * (u64)sizeof(u32),
(u64)sizeof(u32), &data);
}
#endif
else {
nvgpu_do_assert_print(g, "Accessing unallocated nvgpu_mem");
}
return data;
}
u64 nvgpu_mem_rd32_pair(struct gk20a *g, struct nvgpu_mem *mem, u32 lo, u32 hi)
{
u64 lo_data = U64(nvgpu_mem_rd32(g, mem, lo));
u64 hi_data = U64(nvgpu_mem_rd32(g, mem, hi));
return lo_data | (hi_data << 32ULL);
}
u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u64 offset)
{
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
WARN_ON((offset & 3ULL) != 0ULL);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
return nvgpu_mem_rd32(g, mem, offset / (u64)sizeof(u32));
}
void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
u64 offset, void *dest, u64 size)
{
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
WARN_ON((offset & 3ULL) != 0ULL);
WARN_ON((size & 3ULL) != 0ULL);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
if (mem->aperture == APERTURE_SYSMEM) {
u8 *src = (u8 *)mem->cpu_va + offset;
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
WARN_ON(mem->cpu_va == NULL);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
nvgpu_memcpy((u8 *)dest, src, size);
}
#ifdef CONFIG_NVGPU_DGPU
else if (mem->aperture == APERTURE_VIDMEM) {
nvgpu_pramin_rd_n(g, mem, offset, size, dest);
}
#endif
else {
nvgpu_do_assert_print(g, "Accessing unallocated nvgpu_mem");
}
}
void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u64 w, u32 data)
{
if (mem->aperture == APERTURE_SYSMEM) {
u32 *ptr = mem->cpu_va;
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
WARN_ON(ptr == NULL);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
ptr[w] = data;
}
#ifdef CONFIG_NVGPU_DGPU
else if (mem->aperture == APERTURE_VIDMEM) {
nvgpu_pramin_wr_n(g, mem, w * (u64)sizeof(u32),
(u64)sizeof(u32), &data);
if (!mem->skip_wmb) {
nvgpu_wmb();
}
}
#endif
else {
nvgpu_do_assert_print(g, "Accessing unallocated nvgpu_mem");
}
}
void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, u32 data)
{
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
WARN_ON((offset & 3ULL) != 0ULL);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
nvgpu_mem_wr32(g, mem, offset / (u64)sizeof(u32), data);
}
void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u64 offset,
void *src, u64 size)
{
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
WARN_ON((offset & 3ULL) != 0ULL);
WARN_ON((size & 3ULL) != 0ULL);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
if (mem->aperture == APERTURE_SYSMEM) {
u8 *dest = (u8 *)mem->cpu_va + offset;
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
WARN_ON(mem->cpu_va == NULL);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
nvgpu_memcpy(dest, (u8 *)src, size);
}
#ifdef CONFIG_NVGPU_DGPU
else if (mem->aperture == APERTURE_VIDMEM) {
nvgpu_pramin_wr_n(g, mem, offset, size, src);
if (!mem->skip_wmb) {
nvgpu_wmb();
}
}
#endif
else {
nvgpu_do_assert_print(g, "Accessing unallocated nvgpu_mem");
}
}
void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u64 offset,
u32 c, u64 size)
{
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 3, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 3, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 3, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
WARN_ON((offset & 3ULL) != 0ULL);
WARN_ON((size & 3ULL) != 0ULL);
WARN_ON((c & ~0xffU) != 0U);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
c &= 0xffU;
if (mem->aperture == APERTURE_SYSMEM) {
u8 *dest = (u8 *)mem->cpu_va + offset;
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
WARN_ON(mem->cpu_va == NULL);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
(void) memset(dest, (int)c, size);
}
#ifdef CONFIG_NVGPU_DGPU
else if (mem->aperture == APERTURE_VIDMEM) {
u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24);
nvgpu_pramin_memset(g, mem, offset, size, repeat_value);
if (!mem->skip_wmb) {
nvgpu_wmb();
}
}
#endif
else {
nvgpu_do_assert_print(g, "Accessing unallocated nvgpu_mem");
}
}
static void *nvgpu_mem_phys_sgl_next(void *sgl)
{
struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
return (void *)(void *)sgl_impl->next;
}
/*
* Provided for compatibility - the DMA address is the same as the phys address
* for these nvgpu_mem's.
*/
static u64 nvgpu_mem_phys_sgl_dma(void *sgl)
{
struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
return sgl_impl->phys;
}
static u64 nvgpu_mem_phys_sgl_phys(struct gk20a *g, void *sgl)
{
struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
return sgl_impl->phys;
}
static u64 nvgpu_mem_phys_sgl_ipa_to_pa(struct gk20a *g,
void *sgl, u64 ipa, u64 *pa_len)
{
return ipa;
}
static u64 nvgpu_mem_phys_sgl_length(void *sgl)
{
struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
return sgl_impl->length;
}
static u64 nvgpu_mem_phys_sgl_gpu_addr(struct gk20a *g, void *sgl,
struct nvgpu_gmmu_attrs *attrs)
{
struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
return sgl_impl->phys;
}
static void nvgpu_mem_phys_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
{
/*
* No-op here. The free is handled by freeing the nvgpu_mem itself.
*/
}
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 8_7), "Bug 2823817")
static const struct nvgpu_sgt_ops nvgpu_mem_phys_ops = {
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 8_7))
.sgl_next = nvgpu_mem_phys_sgl_next,
.sgl_dma = nvgpu_mem_phys_sgl_dma,
.sgl_phys = nvgpu_mem_phys_sgl_phys,
.sgl_ipa = nvgpu_mem_phys_sgl_phys,
.sgl_ipa_to_pa = nvgpu_mem_phys_sgl_ipa_to_pa,
.sgl_length = nvgpu_mem_phys_sgl_length,
.sgl_gpu_addr = nvgpu_mem_phys_sgl_gpu_addr,
.sgt_free = nvgpu_mem_phys_sgt_free,
/*
* The physical nvgpu_mems are never IOMMU'able by definition.
*/
.sgt_iommuable = NULL
};
int nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
u64 src_phys, u64 nr_pages)
{
int ret = 0;
struct nvgpu_sgt *sgt;
struct nvgpu_mem_sgl *sgl;
/*
* Do the two operations that can fail before touching *dest.
*/
sgt = nvgpu_kzalloc(g, sizeof(*sgt));
sgl = nvgpu_kzalloc(g, sizeof(*sgl));
if ((sgt == NULL) || (sgl == NULL)) {
nvgpu_kfree(g, sgt);
nvgpu_kfree(g, sgl);
return -ENOMEM;
}
(void) memset(dest, 0, sizeof(*dest));
dest->aperture = APERTURE_SYSMEM;
dest->size = nvgpu_safe_mult_u64(nr_pages,
(u64)NVGPU_CPU_PAGE_SIZE);
dest->aligned_size = dest->size;
dest->mem_flags = NVGPU_MEM_FLAG_NO_DMA;
dest->phys_sgt = sgt;
sgl->next = NULL;
sgl->phys = src_phys;
sgl->length = dest->size;
sgt->sgl = (void *)sgl;
sgt->ops = &nvgpu_mem_phys_ops;
return ret;
}

View File

@@ -0,0 +1,135 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/dma.h>
#include <nvgpu/bitops.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/nvgpu_sgt.h>
#include <nvgpu/nvgpu_sgt_os.h>
#include <nvgpu/log.h>
void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl)
{
return sgt->ops->sgl_next(sgl);
}
u64 nvgpu_sgt_get_phys(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl)
{
return sgt->ops->sgl_phys(g, sgl);
}
u64 nvgpu_sgt_get_ipa(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl)
{
return sgt->ops->sgl_ipa(g, sgl);
}
u64 nvgpu_sgt_ipa_to_pa(struct gk20a *g, struct nvgpu_sgt *sgt,
void *sgl, u64 ipa, u64 *pa_len)
{
return sgt->ops->sgl_ipa_to_pa(g, sgl, ipa, pa_len);
}
u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl)
{
return sgt->ops->sgl_dma(sgl);
}
u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl)
{
return sgt->ops->sgl_length(sgl);
}
u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl,
struct nvgpu_gmmu_attrs *attrs)
{
return sgt->ops->sgl_gpu_addr(g, sgl, attrs);
}
bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt)
{
if (sgt->ops->sgt_iommuable != NULL) {
return sgt->ops->sgt_iommuable(g, sgt);
}
return false;
}
void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
{
if ((sgt != NULL) && (sgt->ops->sgt_free != NULL)) {
sgt->ops->sgt_free(g, sgt);
}
}
/*
* Determine alignment for a passed buffer. Necessary since the buffer may
* appear big enough to map with large pages but the SGL may have chunks that
* are not aligned on a 64/128kB large page boundary. There's also the
* possibility chunks are odd sizes which will necessitate small page mappings
* to correctly glue them together into a contiguous virtual mapping.
*/
u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt)
{
u64 align = 0, chunk_align = 0;
void *sgl;
/*
* If this SGT is iommuable and we want to use the IOMMU address then
* the SGT's first entry has the IOMMU address. We will align on this
* and double check length of buffer later. Also, since there's an
* IOMMU we know that this DMA address is contiguous.
*/
if (nvgpu_iommuable(g) &&
nvgpu_sgt_iommuable(g, sgt) &&
(nvgpu_sgt_get_dma(sgt, sgt->sgl) != 0ULL)) {
return 1ULL << (nvgpu_ffs(nvgpu_sgt_get_dma(sgt, sgt->sgl))
- 1UL);
}
/*
* Otherwise the buffer is not iommuable (VIDMEM, for example) or we are
* bypassing the IOMMU and need to use the underlying physical entries
* of the SGT.
*/
nvgpu_sgt_for_each_sgl(sgl, sgt) {
chunk_align = 1ULL << nvgpu_safe_sub_u64(nvgpu_ffs(
nvgpu_sgt_get_phys(g, sgt, sgl) |
nvgpu_sgt_get_length(sgt, sgl)), 1UL);
if (align != 0ULL) {
align = min(align, chunk_align);
} else {
align = chunk_align;
}
}
return align;
}
struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
struct nvgpu_mem *mem)
{
if ((mem->mem_flags & NVGPU_MEM_FLAG_NO_DMA) != 0U) {
return mem->phys_sgt;
}
return nvgpu_sgt_os_create_from_mem(g, mem);
}

View File

@@ -0,0 +1,621 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/bug.h>
#include <nvgpu/ce_app.h>
#include <nvgpu/timers.h>
#include <nvgpu/dma.h>
#include <nvgpu/vidmem.h>
#include <nvgpu/page_allocator.h>
#include <nvgpu/enabled.h>
#include <nvgpu/sizes.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_sgt.h>
#include <nvgpu/fence.h>
/*
* This is expected to be called from the shutdown path (or the error path in
* the vidmem init code). As such we do not expect new vidmem frees to be
* enqueued.
*/
void nvgpu_vidmem_destroy(struct gk20a *g)
{
struct nvgpu_timeout timeout;
int err;
if (g->ops.fb.get_vidmem_size == NULL) {
return;
}
err = nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER);
if (err != 0) {
nvgpu_err(g, "nvgpu_timeout_init() failed err=%d", err);
}
/*
* Ensure that the thread runs one last time to flush anything in the
* queue.
*/
nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond);
/*
* Wait for at most 1 second before just continuing on. It doesn't make
* sense to hang the system over some potential memory leaks.
*/
do {
bool empty;
nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
if (empty) {
break;
}
nvgpu_msleep(10);
} while (nvgpu_timeout_expired(&timeout) == 0);
/*
* Kill the vidmem clearing thread now. This will wake the thread up
* automatically and cause the wait_interruptible condition trigger.
*/
nvgpu_thread_stop(&g->mm.vidmem.clearing_thread);
if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) {
nvgpu_alloc_destroy(&g->mm.vidmem.allocator);
}
if (nvgpu_alloc_initialized(&g->mm.vidmem.bootstrap_allocator)) {
nvgpu_alloc_destroy(&g->mm.vidmem.bootstrap_allocator);
}
}
static int nvgpu_vidmem_clear_fence_wait(struct gk20a *g,
struct nvgpu_fence_type *fence_out)
{
struct nvgpu_timeout timeout;
bool done;
int err;
err = nvgpu_timeout_init(g, &timeout,
nvgpu_get_poll_timeout(g),
NVGPU_TIMER_CPU_TIMER);
if (err != 0) {
nvgpu_err(g, "nvgpu_timeout_init() failed err=%d", err);
return err;
}
do {
err = nvgpu_fence_wait(g, fence_out,
nvgpu_get_poll_timeout(g));
if (err != -ERESTARTSYS) {
done = true;
} else if (nvgpu_timeout_expired(&timeout) != 0) {
done = true;
} else {
done = false;
}
} while (!done);
nvgpu_fence_put(fence_out);
if (err != 0) {
nvgpu_err(g,
"fence wait failed for CE execute ops");
return err;
}
return 0;
}
static int nvgpu_vidmem_do_clear_all(struct gk20a *g)
{
struct mm_gk20a *mm = &g->mm;
struct nvgpu_fence_type *fence_out = NULL;
int err = 0;
if (mm->vidmem.ce_ctx_id == NVGPU_CE_INVAL_CTX_ID) {
return -EINVAL;
}
vidmem_dbg(g, "Clearing all VIDMEM:");
#ifdef CONFIG_NVGPU_DGPU
err = nvgpu_ce_execute_ops(g,
mm->vidmem.ce_ctx_id,
0,
mm->vidmem.base,
mm->vidmem.bootstrap_base - mm->vidmem.base,
0x00000000,
NVGPU_CE_DST_LOCATION_LOCAL_FB,
NVGPU_CE_MEMSET,
0,
&fence_out);
if (err != 0) {
nvgpu_err(g,
"Failed to clear vidmem : %d", err);
return err;
}
#else
/* fail due to lack of ce app support */
return -ENOSYS;
#endif
if (fence_out != NULL) {
err = nvgpu_vidmem_clear_fence_wait(g, fence_out);
if (err != 0) {
return err;
}
}
mm->vidmem.cleared = true;
vidmem_dbg(g, "Done!");
return 0;
}
void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm)
{
/*
* On the first increment of the pause_count (0 -> 1) take the pause
* lock and prevent the vidmem clearing thread from processing work
* items.
*
* Otherwise the increment is all that's needed - it's essentially a
* ref-count for the number of pause() calls.
*
* The sync component is implemented by waiting for the lock to be
* released by the clearing thread in case the thread is currently
* processing work items.
*/
if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1) {
nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock);
}
vidmem_dbg(mm->g, "Clearing thread paused; new count=%d",
nvgpu_atomic_read(&mm->vidmem.pause_count));
}
void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm)
{
vidmem_dbg(mm->g, "Unpausing clearing thread; current count=%d",
nvgpu_atomic_read(&mm->vidmem.pause_count));
/*
* And on the last decrement (1 -> 0) release the pause lock and let
* the vidmem clearing thread continue.
*/
if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0) {
nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
vidmem_dbg(mm->g, " > Clearing thread really unpaused!");
}
}
int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem)
{
struct mm_gk20a *mm = &g->mm;
/*
* Crap. Can't enqueue new vidmem bufs! CE may be gone!
*
* However, an errant app can hold a vidmem dma_buf FD open past when
* the nvgpu driver has exited. Thus when the FD does get closed
* eventually the dma_buf release function will try to call the vidmem
* free function which will attempt to enqueue the vidmem into the
* vidmem clearing thread.
*/
if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
return -ENOSYS;
}
nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
nvgpu_list_add_tail(&mem->clear_list_entry,
&mm->vidmem.clear_list_head);
nvgpu_atomic64_add((long)mem->aligned_size, &mm->vidmem.bytes_pending);
nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond);
return 0;
}
static struct nvgpu_mem *nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a *mm)
{
struct nvgpu_mem *mem = NULL;
nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
nvgpu_mem, clear_list_entry);
nvgpu_list_del(&mem->clear_list_entry);
}
nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
return mem;
}
static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm)
{
struct gk20a *g = mm->g;
struct nvgpu_mem *mem;
int err;
vidmem_dbg(g, "Running VIDMEM clearing thread:");
while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) {
err = nvgpu_vidmem_clear(g, mem);
if (err != 0) {
nvgpu_err(g, "nvgpu_vidmem_clear() failed err=%d", err);
}
WARN_ON(nvgpu_atomic64_sub_return((long)mem->aligned_size,
&g->mm.vidmem.bytes_pending) < 0);
mem->size = 0;
mem->aperture = APERTURE_INVALID;
nvgpu_mem_free_vidmem_alloc(g, mem);
nvgpu_kfree(g, mem);
}
vidmem_dbg(g, "Done!");
}
static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr)
{
struct mm_gk20a *mm = mm_ptr;
/*
* Simple thread who's sole job is to periodically clear userspace
* vidmem allocations that have been recently freed.
*
* Since it doesn't make sense to run unless there's pending work a
* condition field is used to wait for work. When the DMA API frees a
* userspace vidmem buf it enqueues it into the clear list and alerts us
* that we have some work to do.
*/
while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) {
int ret;
/*
* Wait for work but also make sure we should not be paused.
*/
ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
&mm->vidmem.clearing_thread_cond,
nvgpu_thread_should_stop(
&mm->vidmem.clearing_thread) ||
!nvgpu_list_empty(&mm->vidmem.clear_list_head),
0U);
if (ret == -ERESTARTSYS) {
continue;
}
/*
* Use this lock to implement a pause mechanism. By taking this
* lock some other code can prevent this thread from processing
* work items.
*/
if (nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock)
== 0) {
continue;
}
nvgpu_vidmem_clear_pending_allocs(mm);
nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
}
return 0;
}
int nvgpu_vidmem_init(struct mm_gk20a *mm)
{
struct gk20a *g = mm->g;
u64 bootstrap_base, base;
u64 bootstrap_size = SZ_512M;
u64 default_page_size = SZ_64K;
size_t size;
int err;
static struct nvgpu_alloc_carveout bootstrap_co =
NVGPU_CARVEOUT("bootstrap-region", 0, 0);
if (g->ops.fb.get_vidmem_size == NULL) {
/*
* As it is a common function, the return value
* need to be handled for igpu.
*/
return 0;
} else {
size = g->ops.fb.get_vidmem_size(g);
if (size == 0UL) {
nvgpu_err(g, "Found zero vidmem");
return -ENOMEM;
}
}
vidmem_dbg(g, "init begin");
#ifdef CONFIG_NVGPU_SIM
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
bootstrap_size = SZ_32M;
}
#endif
bootstrap_co.base = size - bootstrap_size;
bootstrap_co.length = bootstrap_size;
bootstrap_base = bootstrap_co.base;
base = default_page_size;
/*
* Bootstrap allocator for use before the CE is initialized (CE
* initialization requires vidmem but we want to use the CE to zero
* out vidmem before allocating it...
*/
err = nvgpu_allocator_init(g, &g->mm.vidmem.bootstrap_allocator,
NULL, "vidmem-bootstrap", bootstrap_base,
bootstrap_size, SZ_4K, 0ULL,
GPU_ALLOC_FORCE_CONTIG, PAGE_ALLOCATOR);
err = nvgpu_allocator_init(g, &g->mm.vidmem.allocator, NULL,
"vidmem", base, size - base, default_page_size, 0ULL,
GPU_ALLOC_4K_VIDMEM_PAGES, PAGE_ALLOCATOR);
if (err != 0) {
nvgpu_err(g, "Failed to register vidmem for size %zu: %d",
size, err);
return err;
}
/* Reserve bootstrap region in vidmem allocator */
err = nvgpu_alloc_reserve_carveout(&g->mm.vidmem.allocator,
&bootstrap_co);
if (err != 0) {
nvgpu_err(g, "nvgpu_alloc_reserve_carveout() failed err=%d",
err);
goto fail;
}
mm->vidmem.base = base;
mm->vidmem.size = size - base;
mm->vidmem.bootstrap_base = bootstrap_base;
mm->vidmem.bootstrap_size = bootstrap_size;
err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond);
if (err != 0) {
goto fail;
}
nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
nvgpu_init_list_node(&mm->vidmem.clear_list_head);
nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock);
nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);
nvgpu_atomic_set(&mm->vidmem.pause_count, 0);
/*
* Start the thread off in the paused state. The thread doesn't have to
* be running for this to work. It will be woken up later on in
* finalize_poweron(). We won't necessarily have a CE context yet
* either, so hypothetically one could cause a race where we try to
* clear a vidmem struct before we have a CE context to do so.
*/
nvgpu_vidmem_thread_pause_sync(mm);
err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm,
nvgpu_vidmem_clear_pending_allocs_thr,
"vidmem-clear");
if (err != 0) {
goto fail;
}
vidmem_dbg(g, "VIDMEM Total: %zu MB", size >> 20);
vidmem_dbg(g, "VIDMEM Ranges:");
vidmem_dbg(g, " 0x%-10llx -> 0x%-10llx Primary",
mm->vidmem.base, mm->vidmem.base + mm->vidmem.size);
vidmem_dbg(g, " 0x%-10llx -> 0x%-10llx Bootstrap",
mm->vidmem.bootstrap_base,
mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size);
vidmem_dbg(g, "VIDMEM carveouts:");
vidmem_dbg(g, " 0x%-10llx -> 0x%-10llx %s",
bootstrap_co.base, bootstrap_co.base + bootstrap_co.length,
bootstrap_co.name);
return 0;
fail:
nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond);
nvgpu_vidmem_destroy(g);
return err;
}
int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space)
{
struct nvgpu_allocator *allocator = &g->mm.vidmem.allocator;
nvgpu_log_fn(g, " ");
if (!nvgpu_alloc_initialized(allocator)) {
return -ENOSYS;
}
*space = nvgpu_alloc_space(allocator) +
U64(nvgpu_atomic64_read(&g->mm.vidmem.bytes_pending));
return 0;
}
int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem)
{
struct nvgpu_fence_type *fence_out = NULL;
struct nvgpu_fence_type *last_fence = NULL;
struct nvgpu_page_alloc *alloc = NULL;
void *sgl = NULL;
int err = 0;
if (g->mm.vidmem.ce_ctx_id == NVGPU_CE_INVAL_CTX_ID) {
return -EINVAL;
}
alloc = mem->vidmem_alloc;
nvgpu_sgt_for_each_sgl(sgl, &alloc->sgt) {
if (last_fence != NULL) {
nvgpu_fence_put(last_fence);
}
#ifdef CONFIG_NVGPU_DGPU
err = nvgpu_ce_execute_ops(g,
g->mm.vidmem.ce_ctx_id,
0,
nvgpu_sgt_get_phys(g, &alloc->sgt, sgl),
nvgpu_sgt_get_length(&alloc->sgt, sgl),
0x00000000,
NVGPU_CE_DST_LOCATION_LOCAL_FB,
NVGPU_CE_MEMSET,
0,
&fence_out);
#else
/* fail due to lack of ce app support */
err = -ENOSYS;
#endif
if (err != 0) {
#ifdef CONFIG_NVGPU_DGPU
nvgpu_err(g,
"Failed nvgpu_ce_execute_ops[%d]", err);
#endif
return err;
}
vidmem_dbg(g, " > [0x%llx +0x%llx]",
nvgpu_sgt_get_phys(g, &alloc->sgt, sgl),
nvgpu_sgt_get_length(&alloc->sgt, sgl));
last_fence = fence_out;
}
if (last_fence != NULL) {
err = nvgpu_vidmem_clear_fence_wait(g, last_fence);
if (err != 0) {
return err;
}
}
vidmem_dbg(g, " Done");
return err;
}
static int nvgpu_vidmem_clear_all(struct gk20a *g)
{
int err;
if (g->mm.vidmem.cleared) {
return 0;
}
nvgpu_mutex_acquire(&g->mm.vidmem.first_clear_mutex);
if (!g->mm.vidmem.cleared) {
err = nvgpu_vidmem_do_clear_all(g);
if (err != 0) {
nvgpu_mutex_release(&g->mm.vidmem.first_clear_mutex);
nvgpu_err(g, "failed to clear whole vidmem");
return err;
}
}
nvgpu_mutex_release(&g->mm.vidmem.first_clear_mutex);
return 0;
}
int nvgpu_vidmem_user_alloc(struct gk20a *g, size_t bytes,
struct nvgpu_vidmem_buf **vidmem_buf)
{
struct nvgpu_vidmem_buf *buf;
int err;
if (vidmem_buf == NULL) {
return -EINVAL;
}
err = nvgpu_vidmem_clear_all(g);
if (err != 0) {
return -ENOMEM;
}
buf = nvgpu_kzalloc(g, sizeof(*buf));
if (buf == NULL) {
return -ENOMEM;
}
buf->g = g;
buf->mem = nvgpu_kzalloc(g, sizeof(*buf->mem));
if (buf->mem == NULL) {
err = -ENOMEM;
goto fail;
}
err = nvgpu_dma_alloc_vid(g, bytes, buf->mem);
if (err != 0) {
goto fail;
}
/*
* Alerts the DMA API that when we free this vidmem buf we have to
* clear it to avoid leaking data to userspace.
*/
buf->mem->mem_flags |= NVGPU_MEM_FLAG_USER_MEM;
*vidmem_buf = buf;
return 0;
fail:
/* buf will never be NULL here. */
nvgpu_kfree(g, buf->mem);
nvgpu_kfree(g, buf);
return err;
}
void nvgpu_vidmem_buf_free(struct gk20a *g, struct nvgpu_vidmem_buf *buf)
{
/*
* In some error paths it's convenient to be able to "free" a NULL buf.
*/
if (buf == NULL) {
return;
}
nvgpu_dma_free(g, buf->mem);
/*
* We don't free buf->mem here. This is handled by nvgpu_dma_free()!
* Since these buffers are cleared in the background the nvgpu_mem
* struct must live on through that. We transfer ownership here to the
* DMA API and let the DMA API free the buffer.
*/
nvgpu_kfree(g, buf);
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,311 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/vm.h>
#include <nvgpu/vm_area.h>
#include <nvgpu/barrier.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/static_analysis.h>
struct nvgpu_vm_area *nvgpu_vm_area_find(struct vm_gk20a *vm, u64 addr)
{
struct nvgpu_vm_area *vm_area;
nvgpu_list_for_each_entry(vm_area, &vm->vm_area_list,
nvgpu_vm_area, vm_area_list) {
if (addr >= vm_area->addr) {
if (addr < nvgpu_safe_add_u64(vm_area->addr,
vm_area->size)) {
return vm_area;
}
}
}
return NULL;
}
int nvgpu_vm_area_validate_buffer(struct vm_gk20a *vm,
u64 map_addr, u64 map_size, u32 pgsz_idx,
struct nvgpu_vm_area **pvm_area)
{
struct gk20a *g = vm->mm->g;
struct nvgpu_vm_area *vm_area;
struct nvgpu_mapped_buf *buffer;
u64 map_end;
/* can wrap around with insane map_size; zero is disallowed too */
if (((U64_MAX - map_size) < map_addr) || (map_size == 0ULL)) {
nvgpu_warn(g, "fixed offset mapping with invalid map_size");
return -EINVAL;
}
map_end = map_addr + map_size;
if ((map_addr &
nvgpu_safe_sub_u64(U64(vm->gmmu_page_sizes[pgsz_idx]), U64(1)))
!= 0ULL) {
nvgpu_err(g, "map offset must be buffer page size aligned 0x%llx",
map_addr);
return -EINVAL;
}
/* Find the space reservation, but it's ok to have none for
* userspace-managed address spaces */
vm_area = nvgpu_vm_area_find(vm, map_addr);
if ((vm_area == NULL) && !vm->userspace_managed) {
nvgpu_warn(g, "fixed offset mapping without space allocation");
return -EINVAL;
}
/* Mapped area should fit inside va, if there's one */
if (vm_area != NULL) {
if (map_end > nvgpu_safe_add_u64(vm_area->addr,
vm_area->size)) {
nvgpu_warn(g,
"fixed offset mapping size overflows va node");
return -EINVAL;
}
}
/* check that this mapping does not collide with existing
* mappings by checking the buffer with the highest GPU VA
* that is less than our buffer end */
buffer = nvgpu_vm_find_mapped_buf_less_than(
vm, map_end);
if (buffer != NULL) {
if (nvgpu_safe_add_u64(buffer->addr, buffer->size) > map_addr) {
nvgpu_warn(g, "overlapping buffer map requested");
return -EINVAL;
}
}
*pvm_area = vm_area;
return 0;
}
static int nvgpu_vm_area_alloc_get_pagesize_index(struct vm_gk20a *vm,
u32 *pgsz_idx_ptr, u32 page_size)
{
u32 pgsz_idx = *pgsz_idx_ptr;
for (; pgsz_idx < GMMU_NR_PAGE_SIZES; pgsz_idx++) {
if (vm->gmmu_page_sizes[pgsz_idx] == page_size) {
break;
}
}
*pgsz_idx_ptr = pgsz_idx;
if (pgsz_idx > GMMU_PAGE_SIZE_BIG) {
return -EINVAL;
}
/*
* pgsz_idx isn't likely to get too crazy, since it starts at 0 and
* increments but this ensures that we still have a definitely valid
* page size before proceeding.
*/
nvgpu_speculation_barrier();
if (!vm->big_pages && (pgsz_idx == GMMU_PAGE_SIZE_BIG)) {
return -EINVAL;
}
return 0;
}
static int nvgpu_vm_area_alloc_memory(struct nvgpu_allocator *vma, u64 our_addr,
u64 pages, u32 page_size, u32 flags,
u64 *vaddr_start_ptr)
{
u64 vaddr_start = 0;
if ((flags & NVGPU_VM_AREA_ALLOC_FIXED_OFFSET) != 0U) {
vaddr_start = nvgpu_alloc_fixed(vma, our_addr,
pages *
(u64)page_size,
page_size);
} else {
vaddr_start = nvgpu_alloc_pte(vma,
pages *
(u64)page_size,
page_size);
}
if (vaddr_start == 0ULL) {
return -ENOMEM;
}
*vaddr_start_ptr = vaddr_start;
return 0;
}
static int nvgpu_vm_area_alloc_gmmu_map(struct vm_gk20a *vm,
struct nvgpu_vm_area *vm_area, u64 vaddr_start,
u32 pgsz_idx, u32 flags)
{
struct gk20a *g = vm->mm->g;
if ((flags & NVGPU_VM_AREA_ALLOC_SPARSE) != 0U) {
u64 map_addr = g->ops.mm.gmmu.map(vm, vaddr_start,
NULL,
0,
vm_area->size,
pgsz_idx,
0,
0,
flags,
gk20a_mem_flag_none,
false,
true,
false,
NULL,
APERTURE_INVALID);
if (map_addr == 0ULL) {
return -ENOMEM;
}
vm_area->sparse = true;
}
nvgpu_list_add_tail(&vm_area->vm_area_list, &vm->vm_area_list);
return 0;
}
int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u64 pages, u32 page_size,
u64 *addr, u32 flags)
{
struct gk20a *g = vm->mm->g;
struct nvgpu_allocator *vma;
struct nvgpu_vm_area *vm_area;
u64 vaddr_start = 0;
u64 our_addr = *addr;
u32 pgsz_idx = GMMU_PAGE_SIZE_SMALL;
/*
* If we have a fixed address then use the passed address in *addr. This
* corresponds to the o_a field in the IOCTL. But since we do not
* support specific alignments in the buddy allocator we ignore the
* field if it isn't a fixed offset.
*/
if ((flags & NVGPU_VM_AREA_ALLOC_FIXED_OFFSET) != 0U) {
our_addr = *addr;
}
nvgpu_log(g, gpu_dbg_map,
"ADD vm_area: pgsz=%#-8x pages=%-9llu a/o=%#-14llx flags=0x%x",
page_size, pages, our_addr, flags);
if (nvgpu_vm_area_alloc_get_pagesize_index(vm, &pgsz_idx,
page_size) != 0) {
return -EINVAL;
}
vm_area = nvgpu_kzalloc(g, sizeof(*vm_area));
if (vm_area == NULL) {
return -ENOMEM;
}
vma = vm->vma[pgsz_idx];
if (nvgpu_vm_area_alloc_memory(vma, our_addr, pages,
page_size, flags, &vaddr_start) != 0) {
goto free_vm_area;
}
vm_area->flags = flags;
vm_area->addr = vaddr_start;
vm_area->size = (u64)page_size * pages;
vm_area->pgsz_idx = pgsz_idx;
nvgpu_init_list_node(&vm_area->buffer_list_head);
nvgpu_init_list_node(&vm_area->vm_area_list);
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
if (nvgpu_vm_area_alloc_gmmu_map(vm, vm_area, vaddr_start,
pgsz_idx, flags) != 0) {
nvgpu_mutex_release(&vm->update_gmmu_lock);
goto free_vaddr;
}
nvgpu_mutex_release(&vm->update_gmmu_lock);
*addr = vaddr_start;
return 0;
free_vaddr:
nvgpu_free(vma, vaddr_start);
free_vm_area:
nvgpu_kfree(g, vm_area);
return -ENOMEM;
}
int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr)
{
struct gk20a *g = gk20a_from_vm(vm);
struct nvgpu_mapped_buf *buffer;
struct nvgpu_vm_area *vm_area;
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
vm_area = nvgpu_vm_area_find(vm, addr);
if (vm_area == NULL) {
nvgpu_mutex_release(&vm->update_gmmu_lock);
return 0;
}
nvgpu_list_del(&vm_area->vm_area_list);
nvgpu_log(g, gpu_dbg_map,
"DEL vm_area: pgsz=%#-8x pages=%-9llu "
"addr=%#-14llx flags=0x%x",
vm->gmmu_page_sizes[vm_area->pgsz_idx],
vm_area->size / vm->gmmu_page_sizes[vm_area->pgsz_idx],
vm_area->addr,
vm_area->flags);
/* Decrement the ref count on all buffers in this vm_area. This
* allows userspace to let the kernel free mappings that are
* only used by this vm_area. */
while (!nvgpu_list_empty(&vm_area->buffer_list_head)) {
buffer = nvgpu_list_first_entry(&vm_area->buffer_list_head,
nvgpu_mapped_buf, buffer_list);
nvgpu_list_del(&buffer->buffer_list);
nvgpu_ref_put(&buffer->ref, nvgpu_vm_unmap_ref_internal);
}
/* if this was a sparse mapping, free the va */
if (vm_area->sparse) {
g->ops.mm.gmmu.unmap(vm,
vm_area->addr,
vm_area->size,
vm_area->pgsz_idx,
false,
gk20a_mem_flag_none,
true,
NULL);
}
nvgpu_mutex_release(&vm->update_gmmu_lock);
nvgpu_free(vm->vma[vm_area->pgsz_idx], vm_area->addr);
nvgpu_kfree(g, vm_area);
return 0;
}