mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 02:22:34 +03:00
Revert "gpu: nvgpu: New allocator for VA space"
This reverts commit 2e235ac150fa4af8632c9abf0f109a10973a0bf5. Change-Id: I3aa745152124c2bc09c6c6dc5aeb1084ae7e08a4 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/741469 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Hiroshi Doyu <hdoyu@nvidia.com> Tested-by: Hiroshi Doyu <hdoyu@nvidia.com>
This commit is contained in:
committed by
Hiroshi Doyu
parent
a2e8523645
commit
aa25a952ea
@@ -199,14 +199,21 @@ static int gk20a_as_ioctl_get_va_regions(
|
||||
|
||||
for (i = 0; i < write_entries; ++i) {
|
||||
struct nvgpu_as_va_region region;
|
||||
u32 base, limit;
|
||||
|
||||
memset(®ion, 0, sizeof(struct nvgpu_as_va_region));
|
||||
|
||||
if (!vm->vma[i].constraint.enable) {
|
||||
base = vm->vma[i].base;
|
||||
limit = vm->vma[i].limit;
|
||||
} else {
|
||||
base = vm->vma[i].constraint.base;
|
||||
limit = vm->vma[i].constraint.limit;
|
||||
}
|
||||
|
||||
region.page_size = vm->gmmu_page_sizes[i];
|
||||
region.offset = vm->vma[i].base;
|
||||
/* No __aeabi_uldivmod() on some platforms... */
|
||||
region.pages = (vm->vma[i].end - vm->vma[i].start) >>
|
||||
ilog2(region.page_size);
|
||||
region.offset = (u64)base * region.page_size;
|
||||
region.pages = limit - base; /* NOTE: limit is exclusive */
|
||||
|
||||
if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region)))
|
||||
return -EFAULT;
|
||||
|
||||
@@ -59,7 +59,6 @@
|
||||
#include "hw_fb_gk20a.h"
|
||||
#include "gk20a_scale.h"
|
||||
#include "dbg_gpu_gk20a.h"
|
||||
#include "gk20a_allocator.h"
|
||||
#include "hal.h"
|
||||
#include "vgpu/vgpu.h"
|
||||
|
||||
@@ -1511,7 +1510,6 @@ static int gk20a_probe(struct platform_device *dev)
|
||||
&gk20a->mm.disable_bigpage);
|
||||
gk20a_pmu_debugfs_init(dev);
|
||||
gk20a_cde_debugfs_init(dev);
|
||||
gk20a_alloc_debugfs_init(dev);
|
||||
#endif
|
||||
|
||||
gk20a_init_gr(gk20a);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -17,190 +17,75 @@
|
||||
#ifndef GK20A_ALLOCATOR_H
|
||||
#define GK20A_ALLOCATOR_H
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
/* #define ALLOCATOR_DEBUG */
|
||||
|
||||
/*
|
||||
* Each buddy is an element in a binary tree.
|
||||
*/
|
||||
struct gk20a_buddy {
|
||||
struct gk20a_buddy *parent; /* Parent node. */
|
||||
struct gk20a_buddy *buddy; /* This node's buddy. */
|
||||
struct gk20a_buddy *left; /* Lower address sub-node. */
|
||||
struct gk20a_buddy *right; /* Higher address sub-node. */
|
||||
|
||||
struct list_head buddy_entry; /* List entry for various lists. */
|
||||
struct rb_node alloced_entry; /* RB tree of allocations. */
|
||||
|
||||
u64 start; /* Start address of this buddy. */
|
||||
u64 end; /* End address of this buddy. */
|
||||
u64 order; /* Buddy order. */
|
||||
|
||||
#define BALLOC_BUDDY_ALLOCED 0x1
|
||||
#define BALLOC_BUDDY_SPLIT 0x2
|
||||
#define BALLOC_BUDDY_IN_LIST 0x4
|
||||
int flags; /* List of associated flags. */
|
||||
|
||||
/*
|
||||
* Size of the PDE this buddy is using. This allows for grouping like
|
||||
* sized allocations into the same PDE.
|
||||
*/
|
||||
#define BALLOC_PTE_SIZE_ANY 0x0
|
||||
#define BALLOC_PTE_SIZE_SMALL 0x1
|
||||
#define BALLOC_PTE_SIZE_BIG 0x2
|
||||
int pte_size;
|
||||
};
|
||||
|
||||
#define __buddy_flag_ops(flag, flag_up) \
|
||||
static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \
|
||||
{ \
|
||||
return b->flags & BALLOC_BUDDY_ ## flag_up; \
|
||||
} \
|
||||
static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \
|
||||
{ \
|
||||
b->flags |= BALLOC_BUDDY_ ## flag_up; \
|
||||
} \
|
||||
static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \
|
||||
{ \
|
||||
b->flags &= ~BALLOC_BUDDY_ ## flag_up; \
|
||||
}
|
||||
|
||||
/*
|
||||
* int buddy_is_alloced(struct gk20a_buddy *b);
|
||||
* void buddy_set_alloced(struct gk20a_buddy *b);
|
||||
* void buddy_clr_alloced(struct gk20a_buddy *b);
|
||||
*
|
||||
* int buddy_is_split(struct gk20a_buddy *b);
|
||||
* void buddy_set_split(struct gk20a_buddy *b);
|
||||
* void buddy_clr_split(struct gk20a_buddy *b);
|
||||
*
|
||||
* int buddy_is_in_list(struct gk20a_buddy *b);
|
||||
* void buddy_set_in_list(struct gk20a_buddy *b);
|
||||
* void buddy_clr_in_list(struct gk20a_buddy *b);
|
||||
*/
|
||||
__buddy_flag_ops(alloced, ALLOCED);
|
||||
__buddy_flag_ops(split, SPLIT);
|
||||
__buddy_flag_ops(in_list, IN_LIST);
|
||||
|
||||
/*
|
||||
* Keeps info for a fixed allocation.
|
||||
*/
|
||||
struct gk20a_fixed_alloc {
|
||||
struct list_head buddies; /* List of buddies. */
|
||||
struct rb_node alloced_entry; /* RB tree of fixed allocations. */
|
||||
|
||||
u64 start; /* Start of fixed block. */
|
||||
u64 end; /* End address. */
|
||||
};
|
||||
|
||||
struct vm_gk20a;
|
||||
|
||||
/*
|
||||
* GPU buddy allocator for the various GPU address spaces. Each addressable unit
|
||||
* doesn't have to correspond to a byte. In some cases each unit is a more
|
||||
* complex object such as a comp_tag line or the like.
|
||||
*
|
||||
* The max order is computed based on the size of the minimum order and the size
|
||||
* of the address space.
|
||||
*
|
||||
* order_size is the size of an order 0 buddy.
|
||||
*/
|
||||
/* main struct */
|
||||
struct gk20a_allocator {
|
||||
|
||||
struct vm_gk20a *vm; /* Parent VM - can be NULL. */
|
||||
char name[32]; /* name for allocator */
|
||||
struct rb_root rb_root; /* rb tree root for blocks */
|
||||
|
||||
char name[32]; /* Name of allocator. */
|
||||
u32 base; /* min value of this linear space */
|
||||
u32 limit; /* max value = limit - 1 */
|
||||
|
||||
u64 base; /* Base address of the space. */
|
||||
u64 length; /* Length of the space. */
|
||||
u64 blk_size; /* Size of order 0 allocation. */
|
||||
u64 blk_shift; /* Shift to divide by blk_size. */
|
||||
unsigned long *bitmap; /* bitmap */
|
||||
|
||||
int init; /* Non-zero if initialized. */
|
||||
struct gk20a_alloc_block *block_first; /* first block in list */
|
||||
struct gk20a_alloc_block *block_recent; /* last visited block */
|
||||
|
||||
/* Internal stuff. */
|
||||
u64 start; /* Real start (aligned to blk_size). */
|
||||
u64 end; /* Real end, trimmed if needed. */
|
||||
u64 count; /* Count of objects in space. */
|
||||
u64 blks; /* Count of blks in the space. */
|
||||
u64 max_order; /* Specific maximum order. */
|
||||
u32 first_free_addr; /* first free addr, non-contigous
|
||||
allocation preferred start,
|
||||
in order to pick up small holes */
|
||||
u32 last_free_addr; /* last free addr, contiguous
|
||||
allocation preferred start */
|
||||
u32 cached_hole_size; /* max free hole size up to
|
||||
last_free_addr */
|
||||
u32 block_count; /* number of blocks */
|
||||
|
||||
struct rb_root alloced_buddies; /* Outstanding allocations. */
|
||||
struct rb_root fixed_allocs; /* Outstanding fixed allocations. */
|
||||
struct rw_semaphore rw_sema; /* lock */
|
||||
struct kmem_cache *block_cache; /* slab cache */
|
||||
|
||||
struct mutex lock; /* Protects buddy access. */
|
||||
/* if enabled, constrain to [base, limit) */
|
||||
struct {
|
||||
bool enable;
|
||||
u32 base;
|
||||
u32 limit;
|
||||
} constraint;
|
||||
|
||||
#define GPU_BALLOC_GVA_SPACE 0x1
|
||||
u64 flags;
|
||||
int (*alloc)(struct gk20a_allocator *allocator,
|
||||
u32 *addr, u32 len, u32 align);
|
||||
int (*free)(struct gk20a_allocator *allocator,
|
||||
u32 addr, u32 len, u32 align);
|
||||
|
||||
/*
|
||||
* Impose an upper bound on the maximum order.
|
||||
*/
|
||||
#define GPU_BALLOC_MAX_ORDER 31
|
||||
#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1)
|
||||
|
||||
struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN];
|
||||
u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN];
|
||||
u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN];
|
||||
u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN];
|
||||
|
||||
/*
|
||||
* This is for when the allocator is managing a GVA space (the
|
||||
* GPU_BALLOC_GVA_SPACE bit is set in @flags). This requires
|
||||
* that we group like sized allocations into PDE blocks.
|
||||
*/
|
||||
u64 pte_blk_order;
|
||||
|
||||
struct dentry *debugfs_entry;
|
||||
|
||||
u64 bytes_alloced;
|
||||
u64 bytes_alloced_real;
|
||||
u64 bytes_freed;
|
||||
};
|
||||
|
||||
#define balloc_lock(a) mutex_lock(&(a)->lock)
|
||||
#define balloc_unlock(a) mutex_unlock(&(a)->lock)
|
||||
|
||||
#define balloc_get_order_list(a, order) (&(a)->buddy_list[(order)])
|
||||
#define balloc_order_to_len(a, order) ((1 << order) * (a)->blk_size)
|
||||
#define balloc_base_shift(a, base) ((base) - (a)->start)
|
||||
#define balloc_base_unshift(a, base) ((base) + (a)->start)
|
||||
|
||||
int gk20a_allocator_init(struct gk20a_allocator *allocator,
|
||||
const char *name, u64 base, u64 size, u64 order0);
|
||||
int __gk20a_allocator_init(struct gk20a_allocator *allocator,
|
||||
struct vm_gk20a *vm, const char *name,
|
||||
u64 base, u64 size, u64 order0,
|
||||
u64 max_order, u64 flags);
|
||||
int gk20a_allocator_init(struct gk20a_allocator *allocator,
|
||||
const char *name, u32 base, u32 size);
|
||||
void gk20a_allocator_destroy(struct gk20a_allocator *allocator);
|
||||
|
||||
/*
|
||||
* Normal alloc/free operations for the buddy allocator.
|
||||
*/
|
||||
u64 gk20a_balloc(struct gk20a_allocator *allocator, u64 len);
|
||||
void gk20a_bfree(struct gk20a_allocator *allocator, u64 addr);
|
||||
int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
|
||||
u32 *addr, u32 len, u32 align);
|
||||
|
||||
/*
|
||||
* Special interface to allocate a memory regions with a specific starting
|
||||
* address. Yikes.
|
||||
*/
|
||||
u64 gk20a_balloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len);
|
||||
|
||||
/*
|
||||
* Debugfs init.
|
||||
*/
|
||||
void gk20a_alloc_debugfs_init(struct platform_device *pdev);
|
||||
int gk20a_allocator_block_free(struct gk20a_allocator *allocator,
|
||||
u32 addr, u32 len, u32 align);
|
||||
|
||||
#if defined(ALLOCATOR_DEBUG)
|
||||
#define balloc_dbg(alloctor, format, arg...) \
|
||||
pr_info("%-25s %25s() " format, \
|
||||
alloctor->name, __func__, ##arg)
|
||||
#else
|
||||
#define balloc_dbg(allocator, format, arg...)
|
||||
#endif
|
||||
|
||||
#define allocator_dbg(alloctor, format, arg...) \
|
||||
do { \
|
||||
if (1) \
|
||||
pr_debug("gk20a_allocator (%s) %s: " format "\n",\
|
||||
alloctor->name, __func__, ##arg);\
|
||||
} while (0)
|
||||
|
||||
#else /* ALLOCATOR_DEBUG */
|
||||
|
||||
#define allocator_dbg(format, arg...)
|
||||
|
||||
#endif /* ALLOCATOR_DEBUG */
|
||||
|
||||
#endif /* GK20A_ALLOCATOR_H */
|
||||
|
||||
@@ -89,8 +89,9 @@ static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
__gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
|
||||
1, max_comptag_lines - 1, 1, 10, 0);
|
||||
gk20a_allocator_init(&gr->comp_tags, "comptag",
|
||||
1, /* start */
|
||||
max_comptag_lines - 1); /* length*/
|
||||
|
||||
gr->comptags_per_cacheline = comptags_per_cacheline;
|
||||
gr->slices_per_ltc = slices_per_fbp / g->ltc_count;
|
||||
|
||||
@@ -132,8 +132,10 @@ static void gk20a_mm_delete_priv(void *_priv)
|
||||
|
||||
if (priv->comptags.lines) {
|
||||
BUG_ON(!priv->comptag_allocator);
|
||||
gk20a_bfree(priv->comptag_allocator,
|
||||
priv->comptags.real_offset);
|
||||
priv->comptag_allocator->free(priv->comptag_allocator,
|
||||
priv->comptags.offset,
|
||||
priv->comptags.allocated_lines,
|
||||
1);
|
||||
}
|
||||
|
||||
/* Free buffer states */
|
||||
@@ -222,9 +224,10 @@ static int gk20a_alloc_comptags(struct gk20a *g,
|
||||
u32 lines, bool user_mappable)
|
||||
{
|
||||
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
|
||||
u32 offset = 0;
|
||||
int err;
|
||||
u32 ctaglines_to_allocate;
|
||||
u32 ctagline_align = 1;
|
||||
u32 offset;
|
||||
u32 ctagline_align;
|
||||
const u32 aggregate_cacheline_sz =
|
||||
g->gr.cacheline_size * g->gr.slices_per_ltc *
|
||||
g->ltc_count;
|
||||
@@ -238,6 +241,7 @@ static int gk20a_alloc_comptags(struct gk20a *g,
|
||||
|
||||
if (!user_mappable) {
|
||||
ctaglines_to_allocate = lines;
|
||||
ctagline_align = 1;
|
||||
} else {
|
||||
/* Unfortunately, we cannot use allocation alignment
|
||||
* here, since compbits per cacheline is not always a
|
||||
@@ -269,26 +273,72 @@ static int gk20a_alloc_comptags(struct gk20a *g,
|
||||
|
||||
if (ctaglines_to_allocate < lines)
|
||||
return -EINVAL; /* integer overflow */
|
||||
pr_info("user-mapped CTAGS: %u\n", ctaglines_to_allocate);
|
||||
}
|
||||
|
||||
/* store the allocator so we can use it when we free the ctags */
|
||||
priv->comptag_allocator = allocator;
|
||||
offset = gk20a_balloc(allocator, ctaglines_to_allocate);
|
||||
if (!offset)
|
||||
return -ENOMEM;
|
||||
err = allocator->alloc(allocator, &offset,
|
||||
ctaglines_to_allocate, 1);
|
||||
if (!err) {
|
||||
const u32 alignment_lines =
|
||||
DIV_ROUND_UP(offset, ctagline_align) * ctagline_align -
|
||||
offset;
|
||||
|
||||
priv->comptags.lines = lines;
|
||||
priv->comptags.real_offset = offset;
|
||||
/* prune the preceding ctaglines that were allocated
|
||||
for alignment */
|
||||
if (alignment_lines) {
|
||||
/* free alignment lines */
|
||||
int tmp=
|
||||
allocator->free(allocator, offset,
|
||||
alignment_lines,
|
||||
1);
|
||||
WARN_ON(tmp);
|
||||
|
||||
if (user_mappable)
|
||||
offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align;
|
||||
offset += alignment_lines;
|
||||
ctaglines_to_allocate -= alignment_lines;
|
||||
}
|
||||
|
||||
priv->comptags.offset = offset;
|
||||
/* check if we can prune the trailing, too */
|
||||
if (user_mappable)
|
||||
{
|
||||
u32 needed_cachelines =
|
||||
DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
|
||||
|
||||
return 0;
|
||||
u32 first_unneeded_cacheline =
|
||||
DIV_ROUND_UP(round_up(needed_cachelines *
|
||||
aggregate_cacheline_sz,
|
||||
small_pgsz),
|
||||
aggregate_cacheline_sz);
|
||||
u32 needed_ctaglines =
|
||||
first_unneeded_cacheline *
|
||||
g->gr.comptags_per_cacheline;
|
||||
|
||||
if (needed_ctaglines < ctaglines_to_allocate) {
|
||||
/* free alignment lines */
|
||||
int tmp=
|
||||
allocator->free(
|
||||
allocator,
|
||||
offset + needed_ctaglines,
|
||||
(ctaglines_to_allocate -
|
||||
needed_ctaglines),
|
||||
1);
|
||||
WARN_ON(tmp);
|
||||
|
||||
ctaglines_to_allocate = needed_ctaglines;
|
||||
}
|
||||
}
|
||||
|
||||
priv->comptags.offset = offset;
|
||||
priv->comptags.lines = lines;
|
||||
priv->comptags.allocated_lines = ctaglines_to_allocate;
|
||||
priv->comptags.user_mappable = user_mappable;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
|
||||
{
|
||||
gk20a_dbg_fn("");
|
||||
@@ -839,12 +889,14 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
|
||||
}
|
||||
|
||||
u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
|
||||
u64 size,
|
||||
enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
|
||||
u64 size,
|
||||
enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
|
||||
|
||||
{
|
||||
struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
|
||||
int err;
|
||||
u64 offset;
|
||||
u32 start_page_nr = 0, num_pages;
|
||||
u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
|
||||
|
||||
if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) {
|
||||
@@ -860,19 +912,28 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
|
||||
|
||||
}
|
||||
|
||||
/* Be certain we round up to gmmu_page_size if needed */
|
||||
/* be certain we round up to gmmu_page_size if needed */
|
||||
/* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
|
||||
size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
|
||||
|
||||
gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
|
||||
vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
|
||||
|
||||
offset = gk20a_balloc(vma, size);
|
||||
if (!offset) {
|
||||
/* The vma allocator represents page accounting. */
|
||||
num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
|
||||
|
||||
err = vma->alloc(vma, &start_page_nr, num_pages, 1);
|
||||
|
||||
if (err) {
|
||||
gk20a_err(dev_from_vm(vm),
|
||||
"%s oom: sz=0x%llx", vma->name, size);
|
||||
"%s oom: sz=0x%llx", vma->name, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
offset = (u64)start_page_nr <<
|
||||
ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
|
||||
gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
@@ -881,12 +942,25 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
|
||||
enum gmmu_pgsz_gk20a pgsz_idx)
|
||||
{
|
||||
struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
|
||||
u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
|
||||
u32 page_shift = ilog2(page_size);
|
||||
u32 start_page_nr, num_pages;
|
||||
int err;
|
||||
|
||||
gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
|
||||
vma->name, offset, size);
|
||||
gk20a_bfree(vma, offset);
|
||||
|
||||
return 0;
|
||||
start_page_nr = (u32)(offset >> page_shift);
|
||||
num_pages = (u32)((size + page_size - 1) >> page_shift);
|
||||
|
||||
err = vma->free(vma, start_page_nr, num_pages, 1);
|
||||
if (err) {
|
||||
gk20a_err(dev_from_vm(vm),
|
||||
"not found: offset=0x%llx, sz=0x%llx",
|
||||
offset, size);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int insert_mapped_buffer(struct rb_root *root,
|
||||
@@ -1062,7 +1136,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
|
||||
|
||||
if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) {
|
||||
gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
|
||||
map_offset);
|
||||
map_offset);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -2359,6 +2433,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
|
||||
char *name)
|
||||
{
|
||||
int err, i;
|
||||
u32 num_small_pages, num_large_pages, low_hole_pages;
|
||||
char alloc_name[32];
|
||||
u64 small_vma_size, large_vma_size;
|
||||
u32 pde_lo, pde_hi;
|
||||
@@ -2419,31 +2494,34 @@ int gk20a_init_vm(struct mm_gk20a *mm,
|
||||
large_vma_size = vm->va_limit - small_vma_size;
|
||||
}
|
||||
|
||||
num_small_pages = (u32)(small_vma_size >>
|
||||
ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
|
||||
|
||||
/* num_pages above is without regard to the low-side hole. */
|
||||
low_hole_pages = (vm->va_start >>
|
||||
ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
|
||||
|
||||
snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
|
||||
vm->gmmu_page_sizes[gmmu_page_size_small]>>10);
|
||||
err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
|
||||
vm, alloc_name,
|
||||
vm->va_start,
|
||||
small_vma_size - vm->va_start,
|
||||
SZ_4K,
|
||||
GPU_BALLOC_MAX_ORDER,
|
||||
GPU_BALLOC_GVA_SPACE);
|
||||
err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
|
||||
alloc_name,
|
||||
low_hole_pages, /*start*/
|
||||
num_small_pages - low_hole_pages);/* length*/
|
||||
if (err)
|
||||
goto clean_up_ptes;
|
||||
|
||||
if (big_pages) {
|
||||
u32 start = (u32)(small_vma_size >>
|
||||
ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
|
||||
num_large_pages = (u32)(large_vma_size >>
|
||||
ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
|
||||
|
||||
snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
|
||||
name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
|
||||
/*
|
||||
* Big page VMA starts at the end of the small page VMA.
|
||||
*/
|
||||
err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
|
||||
vm, alloc_name,
|
||||
small_vma_size,
|
||||
large_vma_size,
|
||||
big_page_size,
|
||||
GPU_BALLOC_MAX_ORDER,
|
||||
GPU_BALLOC_GVA_SPACE);
|
||||
err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
|
||||
alloc_name,
|
||||
start, /* start */
|
||||
num_large_pages); /* length */
|
||||
if (err)
|
||||
goto clean_up_small_allocator;
|
||||
}
|
||||
@@ -2524,9 +2602,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share)
|
||||
int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
|
||||
struct nvgpu_as_alloc_space_args *args)
|
||||
|
||||
{
|
||||
int err = -ENOMEM;
|
||||
{ int err = -ENOMEM;
|
||||
int pgsz_idx = gmmu_page_size_small;
|
||||
u32 start_page_nr;
|
||||
struct gk20a_allocator *vma;
|
||||
struct vm_gk20a *vm = as_share->vm;
|
||||
struct gk20a *g = vm->mm->g;
|
||||
@@ -2557,19 +2635,21 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
vma = &vm->vma[pgsz_idx];
|
||||
start_page_nr = 0;
|
||||
if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
|
||||
vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset,
|
||||
(u64)args->pages *
|
||||
(u64)args->page_size);
|
||||
else
|
||||
vaddr_start = gk20a_balloc(vma, args->pages * args->page_size);
|
||||
start_page_nr = (u32)(args->o_a.offset >>
|
||||
ilog2(vm->gmmu_page_sizes[pgsz_idx]));
|
||||
|
||||
if (!vaddr_start) {
|
||||
vma = &vm->vma[pgsz_idx];
|
||||
err = vma->alloc(vma, &start_page_nr, args->pages, 1);
|
||||
if (err) {
|
||||
kfree(va_node);
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
vaddr_start = (u64)start_page_nr <<
|
||||
ilog2(vm->gmmu_page_sizes[pgsz_idx]);
|
||||
|
||||
va_node->vaddr_start = vaddr_start;
|
||||
va_node->size = (u64)args->page_size * (u64)args->pages;
|
||||
va_node->pgsz_idx = pgsz_idx;
|
||||
@@ -2593,7 +2673,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
|
||||
true);
|
||||
if (!map_offset) {
|
||||
mutex_unlock(&vm->update_gmmu_lock);
|
||||
gk20a_bfree(vma, vaddr_start);
|
||||
vma->free(vma, start_page_nr, args->pages, 1);
|
||||
kfree(va_node);
|
||||
goto clean_up;
|
||||
}
|
||||
@@ -2605,7 +2685,6 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
|
||||
mutex_unlock(&vm->update_gmmu_lock);
|
||||
|
||||
args->o_a.offset = vaddr_start;
|
||||
err = 0;
|
||||
|
||||
clean_up:
|
||||
return err;
|
||||
@@ -2616,6 +2695,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
|
||||
{
|
||||
int err = -ENOMEM;
|
||||
int pgsz_idx;
|
||||
u32 start_page_nr;
|
||||
struct gk20a_allocator *vma;
|
||||
struct vm_gk20a *vm = as_share->vm;
|
||||
struct vm_reserved_va_node *va_node;
|
||||
@@ -2628,8 +2708,14 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
|
||||
pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
|
||||
gmmu_page_size_big : gmmu_page_size_small;
|
||||
|
||||
start_page_nr = (u32)(args->offset >>
|
||||
ilog2(vm->gmmu_page_sizes[pgsz_idx]));
|
||||
|
||||
vma = &vm->vma[pgsz_idx];
|
||||
gk20a_bfree(vma, args->offset);
|
||||
err = vma->free(vma, start_page_nr, args->pages, 1);
|
||||
|
||||
if (err)
|
||||
goto clean_up;
|
||||
|
||||
mutex_lock(&vm->update_gmmu_lock);
|
||||
va_node = addr_to_reservation(vm, args->offset);
|
||||
@@ -2659,8 +2745,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
|
||||
kfree(va_node);
|
||||
}
|
||||
mutex_unlock(&vm->update_gmmu_lock);
|
||||
err = 0;
|
||||
|
||||
clean_up:
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@@ -131,7 +131,6 @@ enum gmmu_pgsz_gk20a {
|
||||
};
|
||||
|
||||
struct gk20a_comptags {
|
||||
u32 real_offset;
|
||||
u32 offset;
|
||||
u32 lines;
|
||||
u32 allocated_lines;
|
||||
|
||||
@@ -2438,6 +2438,7 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu)
|
||||
struct pmu_payload payload;
|
||||
u32 seq;
|
||||
u32 data;
|
||||
int err = 0;
|
||||
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
@@ -2488,11 +2489,12 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu)
|
||||
gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
|
||||
|
||||
if (!pmu->sample_buffer)
|
||||
pmu->sample_buffer = gk20a_balloc(&pmu->dmem,
|
||||
2 * sizeof(u16));
|
||||
if (!pmu->sample_buffer) {
|
||||
err = pmu->dmem.alloc(&pmu->dmem,
|
||||
&pmu->sample_buffer, 2 * sizeof(u16),
|
||||
PMU_DMEM_ALLOC_ALIGNMENT);
|
||||
if (err) {
|
||||
gk20a_err(dev_from_gk20a(g),
|
||||
"failed to allocate perfmon sample buffer");
|
||||
"failed to allocate perfmon sample buffer");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -2590,17 +2592,15 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu,
|
||||
for (i = 0; i < PMU_QUEUE_COUNT; i++)
|
||||
pmu_queue_init(pmu, i, init);
|
||||
|
||||
if (!pmu->dmem.init) {
|
||||
/* Align start and end addresses */
|
||||
if (!pmu->dmem.alloc) {
|
||||
/*Align start and end addresses*/
|
||||
u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init),
|
||||
PMU_DMEM_ALLOC_ALIGNMENT);
|
||||
PMU_DMEM_ALLOC_ALIGNMENT);
|
||||
u32 end = (pv->get_pmu_init_msg_pmu_sw_mg_off(init) +
|
||||
pv->get_pmu_init_msg_pmu_sw_mg_size(init)) &
|
||||
pv->get_pmu_init_msg_pmu_sw_mg_size(init)) &
|
||||
~(PMU_DMEM_ALLOC_ALIGNMENT - 1);
|
||||
u32 size = end - start;
|
||||
__gk20a_allocator_init(&pmu->dmem, NULL, "gk20a_pmu_dmem",
|
||||
start, size,
|
||||
PMU_DMEM_ALLOC_ALIGNMENT, 4, 0);
|
||||
gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", start, size);
|
||||
}
|
||||
|
||||
pmu->pmu_ready = true;
|
||||
@@ -2737,14 +2737,20 @@ static int pmu_response_handle(struct pmu_gk20a *pmu,
|
||||
seq->callback = NULL;
|
||||
if (pv->pmu_allocation_get_dmem_size(pmu,
|
||||
pv->get_pmu_seq_in_a_ptr(seq)) != 0)
|
||||
gk20a_bfree(&pmu->dmem,
|
||||
pmu->dmem.free(&pmu->dmem,
|
||||
pv->pmu_allocation_get_dmem_offset(pmu,
|
||||
pv->get_pmu_seq_in_a_ptr(seq)));
|
||||
pv->get_pmu_seq_in_a_ptr(seq)),
|
||||
pv->pmu_allocation_get_dmem_size(pmu,
|
||||
pv->get_pmu_seq_in_a_ptr(seq)),
|
||||
PMU_DMEM_ALLOC_ALIGNMENT);
|
||||
if (pv->pmu_allocation_get_dmem_size(pmu,
|
||||
pv->get_pmu_seq_out_a_ptr(seq)) != 0)
|
||||
gk20a_bfree(&pmu->dmem,
|
||||
pmu->dmem.free(&pmu->dmem,
|
||||
pv->pmu_allocation_get_dmem_offset(pmu,
|
||||
pv->get_pmu_seq_out_a_ptr(seq)));
|
||||
pv->get_pmu_seq_out_a_ptr(seq)),
|
||||
pv->pmu_allocation_get_dmem_size(pmu,
|
||||
pv->get_pmu_seq_out_a_ptr(seq)),
|
||||
PMU_DMEM_ALLOC_ALIGNMENT);
|
||||
|
||||
if (seq->callback)
|
||||
seq->callback(g, msg, seq->cb_params, seq->desc, ret);
|
||||
@@ -3381,10 +3387,11 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
|
||||
pv->pmu_allocation_set_dmem_size(pmu, in,
|
||||
(u16)max(payload->in.size, payload->out.size));
|
||||
|
||||
*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) =
|
||||
gk20a_balloc(&pmu->dmem,
|
||||
pv->pmu_allocation_get_dmem_size(pmu, in));
|
||||
if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)))
|
||||
err = pmu->dmem.alloc(&pmu->dmem,
|
||||
pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
|
||||
pv->pmu_allocation_get_dmem_size(pmu, in),
|
||||
PMU_DMEM_ALLOC_ALIGNMENT);
|
||||
if (err)
|
||||
goto clean_up;
|
||||
|
||||
pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
|
||||
@@ -3405,12 +3412,11 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
|
||||
(u16)payload->out.size);
|
||||
|
||||
if (payload->out.buf != payload->in.buf) {
|
||||
|
||||
*(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) =
|
||||
gk20a_balloc(&pmu->dmem,
|
||||
pv->pmu_allocation_get_dmem_size(pmu, out));
|
||||
if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu,
|
||||
out)))
|
||||
err = pmu->dmem.alloc(&pmu->dmem,
|
||||
pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
|
||||
pv->pmu_allocation_get_dmem_size(pmu, out),
|
||||
PMU_DMEM_ALLOC_ALIGNMENT);
|
||||
if (err)
|
||||
goto clean_up;
|
||||
} else {
|
||||
BUG_ON(in == NULL);
|
||||
@@ -3438,11 +3444,15 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
|
||||
clean_up:
|
||||
gk20a_dbg_fn("fail");
|
||||
if (in)
|
||||
gk20a_bfree(&pmu->dmem,
|
||||
pv->pmu_allocation_get_dmem_offset(pmu, in));
|
||||
pmu->dmem.free(&pmu->dmem,
|
||||
pv->pmu_allocation_get_dmem_offset(pmu, in),
|
||||
pv->pmu_allocation_get_dmem_size(pmu, in),
|
||||
PMU_DMEM_ALLOC_ALIGNMENT);
|
||||
if (out)
|
||||
gk20a_bfree(&pmu->dmem,
|
||||
pv->pmu_allocation_get_dmem_offset(pmu, out));
|
||||
pmu->dmem.free(&pmu->dmem,
|
||||
pv->pmu_allocation_get_dmem_offset(pmu, out),
|
||||
pv->pmu_allocation_get_dmem_size(pmu, out),
|
||||
PMU_DMEM_ALLOC_ALIGNMENT);
|
||||
|
||||
pmu_seq_release(pmu, seq);
|
||||
return err;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
*
|
||||
* GK20A PMU (aka. gPMU outside gk20a context)
|
||||
*
|
||||
* Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -437,7 +437,7 @@ struct pmu_ucode_desc {
|
||||
#define PMU_UNIT_ID_IS_VALID(id) \
|
||||
(((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START))
|
||||
|
||||
#define PMU_DMEM_ALLOC_ALIGNMENT (4)
|
||||
#define PMU_DMEM_ALLOC_ALIGNMENT (32)
|
||||
#define PMU_DMEM_ALIGNMENT (4)
|
||||
|
||||
#define PMU_CMD_FLAGS_PMU_MASK (0xF0)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
*
|
||||
* GK20A Semaphores
|
||||
*
|
||||
* Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -44,10 +44,8 @@ struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d,
|
||||
if (gk20a_get_sgtable(d, &p->sgt, p->cpu_va, p->iova, p->size))
|
||||
goto clean_up;
|
||||
|
||||
/* Sacrifice one semaphore in the name of returning error codes. */
|
||||
if (gk20a_allocator_init(&p->alloc, unique_name,
|
||||
SEMAPHORE_SIZE, p->size - SEMAPHORE_SIZE,
|
||||
SEMAPHORE_SIZE))
|
||||
if (gk20a_allocator_init(&p->alloc, unique_name, 0,
|
||||
p->size))
|
||||
goto clean_up;
|
||||
|
||||
gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va,
|
||||
@@ -165,8 +163,8 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool)
|
||||
if (!s)
|
||||
return NULL;
|
||||
|
||||
s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE);
|
||||
if (!s->offset) {
|
||||
if (pool->alloc.alloc(&pool->alloc, &s->offset, SEMAPHORE_SIZE,
|
||||
SEMAPHORE_SIZE)) {
|
||||
gk20a_err(pool->dev, "failed to allocate semaphore");
|
||||
kfree(s);
|
||||
return NULL;
|
||||
@@ -188,7 +186,8 @@ static void gk20a_semaphore_free(struct kref *ref)
|
||||
struct gk20a_semaphore *s =
|
||||
container_of(ref, struct gk20a_semaphore, ref);
|
||||
|
||||
gk20a_bfree(&s->pool->alloc, s->offset);
|
||||
s->pool->alloc.free(&s->pool->alloc, s->offset, SEMAPHORE_SIZE,
|
||||
SEMAPHORE_SIZE);
|
||||
gk20a_semaphore_pool_put(s->pool);
|
||||
kfree(s);
|
||||
}
|
||||
|
||||
@@ -90,8 +90,9 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
__gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
|
||||
1, max_comptag_lines - 1, 1, 10, 0);
|
||||
gk20a_allocator_init(&gr->comp_tags, "comptag",
|
||||
1, /* start */
|
||||
max_comptag_lines - 1); /* length*/
|
||||
|
||||
gr->comptags_per_cacheline = comptags_per_cacheline;
|
||||
gr->slices_per_ltc = slices_per_ltc;
|
||||
|
||||
@@ -41,8 +41,9 @@ static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
|
||||
if (max_comptag_lines < 2)
|
||||
return -ENXIO;
|
||||
|
||||
__gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
|
||||
1, max_comptag_lines - 1, 1, 10, 0); /* length*/
|
||||
gk20a_allocator_init(&gr->comp_tags, "comptag",
|
||||
1, /* start */
|
||||
max_comptag_lines - 1); /* length*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -243,9 +243,11 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
|
||||
struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
|
||||
struct mm_gk20a *mm = &g->mm;
|
||||
struct vm_gk20a *vm;
|
||||
u32 num_small_pages, num_large_pages, low_hole_pages;
|
||||
u64 small_vma_size, large_vma_size;
|
||||
char name[32];
|
||||
int err, i;
|
||||
u32 start;
|
||||
|
||||
/* note: keep the page sizes sorted lowest to highest here */
|
||||
u32 gmmu_page_sizes[gmmu_nr_page_sizes] = {
|
||||
@@ -292,27 +294,33 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
|
||||
small_vma_size = (u64)16 << 30;
|
||||
large_vma_size = vm->va_limit - small_vma_size;
|
||||
|
||||
num_small_pages = (u32)(small_vma_size >>
|
||||
ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
|
||||
|
||||
/* num_pages above is without regard to the low-side hole. */
|
||||
low_hole_pages = (vm->va_start >>
|
||||
ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
|
||||
|
||||
snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
|
||||
gmmu_page_sizes[gmmu_page_size_small]>>10);
|
||||
err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
|
||||
vm, name,
|
||||
vm->va_start,
|
||||
small_vma_size - vm->va_start,
|
||||
SZ_4K,
|
||||
GPU_BALLOC_MAX_ORDER,
|
||||
GPU_BALLOC_GVA_SPACE);
|
||||
err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
|
||||
name,
|
||||
low_hole_pages, /*start*/
|
||||
num_small_pages - low_hole_pages);/* length*/
|
||||
if (err)
|
||||
goto clean_up_share;
|
||||
|
||||
start = (u32)(small_vma_size >>
|
||||
ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
|
||||
num_large_pages = (u32)(large_vma_size >>
|
||||
ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
|
||||
|
||||
snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
|
||||
gmmu_page_sizes[gmmu_page_size_big]>>10);
|
||||
err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
|
||||
vm, name,
|
||||
small_vma_size,
|
||||
large_vma_size,
|
||||
big_page_size,
|
||||
GPU_BALLOC_MAX_ORDER,
|
||||
GPU_BALLOC_GVA_SPACE);
|
||||
err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
|
||||
name,
|
||||
start, /* start */
|
||||
num_large_pages); /* length */
|
||||
if (err)
|
||||
goto clean_up_small_allocator;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user