mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
Don't use enum dma_attr in the gk20a_gmmu_alloc_attr* functions, but define nvgpu-internal flags for no kernel mapping, force contiguous, and read only modes. Store the flags in the allocated struct mem_desc and only use gk20a_gmmu_free, remove gk20a_gmmu_free_attr. This helps in OS abstraction. Rename the notion of attr to flags. Add implicit NVGPU_DMA_NO_KERNEL_MAPPING to all vidmem buffers allocated via gk20a_gmmu_alloc_vid for consistency. Fix a bug in gk20a_gmmu_alloc_map_attr that dropped the attr parameter accidentally. Bug 1853519 Change-Id: I1ff67dff9fc425457ae445ce4976a780eb4dcc9f Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1321101 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
807 lines
22 KiB
C
807 lines
22 KiB
C
/*
|
|
* GK20A memory management
|
|
*
|
|
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#ifndef MM_GK20A_H
|
|
#define MM_GK20A_H
|
|
|
|
#include <linux/scatterlist.h>
|
|
#include <linux/iommu.h>
|
|
#include <soc/tegra/chip-id.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/version.h>
|
|
#include <asm/dma-iommu.h>
|
|
#include <asm/cacheflush.h>
|
|
|
|
#include <nvgpu/allocator.h>
|
|
|
|
#ifdef CONFIG_ARM64
|
|
#define outer_flush_range(a, b)
|
|
#define __cpuc_flush_dcache_area __flush_dcache_area
|
|
#endif
|
|
|
|
#define FLUSH_CPU_DCACHE(va, pa, size) \
|
|
do { \
|
|
__cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
|
|
outer_flush_range(pa, pa + (size_t)(size)); \
|
|
} while (0)
|
|
|
|
/*
|
|
* Real location of a buffer - gk20a_aperture_mask() will deduce what will be
|
|
* told to the gpu about the aperture, but this flag designates where the
|
|
* memory actually was allocated from.
|
|
*/
|
|
enum gk20a_aperture {
|
|
APERTURE_INVALID, /* unallocated or N/A */
|
|
APERTURE_SYSMEM,
|
|
APERTURE_VIDMEM
|
|
};
|
|
|
|
enum gk20a_mem_rw_flag {
|
|
gk20a_mem_flag_none = 0,
|
|
gk20a_mem_flag_read_only = 1,
|
|
gk20a_mem_flag_write_only = 2,
|
|
};
|
|
|
|
static inline const char *gk20a_aperture_str(enum gk20a_aperture aperture)
|
|
{
|
|
switch (aperture) {
|
|
case APERTURE_INVALID: return "invalid";
|
|
case APERTURE_SYSMEM: return "sysmem";
|
|
case APERTURE_VIDMEM: return "vidmem";
|
|
};
|
|
return "UNKNOWN";
|
|
}
|
|
|
|
struct mem_desc {
|
|
void *cpu_va; /* sysmem only */
|
|
struct page **pages; /* sysmem only */
|
|
struct sg_table *sgt;
|
|
enum gk20a_aperture aperture;
|
|
size_t size;
|
|
u64 gpu_va;
|
|
bool fixed; /* vidmem only */
|
|
bool user_mem; /* vidmem only */
|
|
struct nvgpu_allocator *allocator; /* vidmem only */
|
|
struct list_head clear_list_entry; /* vidmem only */
|
|
bool skip_wmb;
|
|
unsigned long flags;
|
|
};
|
|
|
|
struct mem_desc_sub {
|
|
u32 offset;
|
|
u32 size;
|
|
};
|
|
|
|
struct gpfifo_desc {
|
|
struct mem_desc mem;
|
|
u32 entry_num;
|
|
|
|
u32 get;
|
|
u32 put;
|
|
|
|
bool wrap;
|
|
|
|
/* if gpfifo lives in vidmem or is forced to go via PRAMIN, first copy
|
|
* from userspace to pipe and then from pipe to gpu buffer */
|
|
void *pipe;
|
|
};
|
|
|
|
struct patch_desc {
|
|
struct mem_desc mem;
|
|
u32 data_count;
|
|
};
|
|
|
|
struct zcull_ctx_desc {
|
|
u64 gpu_va;
|
|
u32 ctx_attr;
|
|
u32 ctx_sw_mode;
|
|
};
|
|
|
|
struct pm_ctx_desc {
|
|
struct mem_desc mem;
|
|
u32 pm_mode;
|
|
};
|
|
|
|
struct gk20a;
|
|
|
|
struct compbit_store_desc {
|
|
struct mem_desc mem;
|
|
|
|
/* The value that is written to the hardware. This depends on
|
|
* on the number of ltcs and is not an address. */
|
|
u64 base_hw;
|
|
};
|
|
|
|
struct gk20a_buffer_state {
|
|
struct list_head list;
|
|
|
|
/* The valid compbits and the fence must be changed atomically. */
|
|
struct nvgpu_mutex lock;
|
|
|
|
/* Offset of the surface within the dma-buf whose state is
|
|
* described by this struct (one dma-buf can contain multiple
|
|
* surfaces with different states). */
|
|
size_t offset;
|
|
|
|
/* A bitmask of valid sets of compbits (0 = uncompressed). */
|
|
u32 valid_compbits;
|
|
|
|
/* The ZBC color used on this buffer. */
|
|
u32 zbc_color;
|
|
|
|
/* This struct reflects the state of the buffer when this
|
|
* fence signals. */
|
|
struct gk20a_fence *fence;
|
|
};
|
|
|
|
enum gmmu_pgsz_gk20a {
|
|
gmmu_page_size_small = 0,
|
|
gmmu_page_size_big = 1,
|
|
gmmu_page_size_kernel = 2,
|
|
gmmu_nr_page_sizes = 3,
|
|
};
|
|
|
|
struct gk20a_comptags {
|
|
u32 offset;
|
|
u32 lines;
|
|
u32 allocated_lines;
|
|
bool user_mappable;
|
|
};
|
|
|
|
struct gk20a_mm_entry {
|
|
/* backing for */
|
|
struct mem_desc mem;
|
|
u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */
|
|
int pgsz;
|
|
struct gk20a_mm_entry *entries;
|
|
int num_entries;
|
|
};
|
|
|
|
struct priv_cmd_queue {
|
|
struct mem_desc mem;
|
|
u32 size; /* num of entries in words */
|
|
u32 put; /* put for priv cmd queue */
|
|
u32 get; /* get for priv cmd queue */
|
|
};
|
|
|
|
struct priv_cmd_entry {
|
|
bool valid;
|
|
struct mem_desc *mem;
|
|
u32 off; /* offset in mem, in u32 entries */
|
|
u64 gva;
|
|
u32 get; /* start of entry in queue */
|
|
u32 size; /* in words */
|
|
struct list_head list; /* node for lists */
|
|
};
|
|
|
|
struct mapped_buffer_node {
|
|
struct vm_gk20a *vm;
|
|
struct rb_node node;
|
|
struct list_head unmap_list;
|
|
struct list_head va_buffers_list;
|
|
struct vm_reserved_va_node *va_node;
|
|
u64 addr;
|
|
u64 size;
|
|
struct dma_buf *dmabuf;
|
|
struct sg_table *sgt;
|
|
struct kref ref;
|
|
u32 user_mapped;
|
|
bool own_mem_ref;
|
|
u32 pgsz_idx;
|
|
u32 ctag_offset;
|
|
u32 ctag_lines;
|
|
u32 ctag_allocated_lines;
|
|
|
|
/* For comptag mapping, these are the mapping window parameters */
|
|
bool ctags_mappable;
|
|
u64 ctag_map_win_addr; /* non-zero if mapped */
|
|
u64 ctag_map_win_size; /* non-zero if ctags_mappable */
|
|
u32 ctag_map_win_ctagline; /* ctagline at win start, set if
|
|
* ctags_mappable */
|
|
|
|
u32 flags;
|
|
u32 kind;
|
|
bool va_allocated;
|
|
};
|
|
|
|
struct vm_reserved_va_node {
|
|
struct list_head reserved_va_list;
|
|
struct list_head va_buffers_list;
|
|
u32 pgsz_idx;
|
|
u64 vaddr_start;
|
|
u64 size;
|
|
bool sparse;
|
|
};
|
|
|
|
struct gk20a_mmu_level {
|
|
int hi_bit[2];
|
|
int lo_bit[2];
|
|
int (*update_entry)(struct vm_gk20a *vm,
|
|
struct gk20a_mm_entry *pte,
|
|
u32 i, u32 gmmu_pgsz_idx,
|
|
struct scatterlist **sgl,
|
|
u64 *offset,
|
|
u64 *iova,
|
|
u32 kind_v, u64 *ctag,
|
|
bool cacheable, bool unmapped_pte,
|
|
int rw_flag, bool sparse, bool priv,
|
|
enum gk20a_aperture aperture);
|
|
size_t entry_size;
|
|
};
|
|
|
|
/* map/unmap batch state */
|
|
struct vm_gk20a_mapping_batch
|
|
{
|
|
bool gpu_l2_flushed;
|
|
bool need_tlb_invalidate;
|
|
};
|
|
|
|
struct vm_gk20a {
|
|
struct mm_gk20a *mm;
|
|
struct gk20a_as_share *as_share; /* as_share this represents */
|
|
|
|
u64 va_start;
|
|
u64 va_limit;
|
|
|
|
int num_user_mapped_buffers;
|
|
|
|
bool big_pages; /* enable large page support */
|
|
bool enable_ctag;
|
|
bool mapped;
|
|
|
|
u32 big_page_size;
|
|
|
|
bool userspace_managed;
|
|
|
|
const struct gk20a_mmu_level *mmu_levels;
|
|
|
|
struct kref ref;
|
|
|
|
struct nvgpu_mutex update_gmmu_lock;
|
|
|
|
struct gk20a_mm_entry pdb;
|
|
|
|
/*
|
|
* These structs define the address spaces. In some cases it's possible
|
|
* to merge address spaces (user and user_lp) and in other cases it's
|
|
* not. vma[] allows the code to be agnostic to this by always using
|
|
* address spaces through this pointer array.
|
|
*/
|
|
struct nvgpu_allocator *vma[gmmu_nr_page_sizes];
|
|
struct nvgpu_allocator kernel;
|
|
struct nvgpu_allocator user;
|
|
struct nvgpu_allocator user_lp;
|
|
|
|
struct rb_root mapped_buffers;
|
|
|
|
struct list_head reserved_va_list;
|
|
|
|
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
|
|
u64 handle;
|
|
#endif
|
|
u32 gmmu_page_sizes[gmmu_nr_page_sizes];
|
|
|
|
/* if non-NULL, kref_put will use this batch when
|
|
unmapping. Must hold vm->update_gmmu_lock. */
|
|
struct vm_gk20a_mapping_batch *kref_put_batch;
|
|
|
|
/*
|
|
* Each address space needs to have a semaphore pool.
|
|
*/
|
|
struct nvgpu_semaphore_pool *sema_pool;
|
|
};
|
|
|
|
struct gk20a;
|
|
struct channel_gk20a;
|
|
|
|
int gk20a_init_mm_support(struct gk20a *g);
|
|
int gk20a_init_mm_setup_sw(struct gk20a *g);
|
|
int gk20a_init_mm_setup_hw(struct gk20a *g);
|
|
void gk20a_mm_debugfs_init(struct device *dev);
|
|
void gk20a_init_mm_ce_context(struct gk20a *g);
|
|
|
|
int gk20a_mm_fb_flush(struct gk20a *g);
|
|
void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
|
|
void gk20a_mm_cbc_clean(struct gk20a *g);
|
|
void gk20a_mm_l2_invalidate(struct gk20a *g);
|
|
|
|
struct mm_gk20a {
|
|
struct gk20a *g;
|
|
|
|
/* GPU VA default sizes address spaces for channels */
|
|
struct {
|
|
u64 user_size; /* userspace-visible GPU VA region */
|
|
u64 kernel_size; /* kernel-only GPU VA region */
|
|
} channel;
|
|
|
|
struct {
|
|
u32 aperture_size;
|
|
struct vm_gk20a vm;
|
|
struct mem_desc inst_block;
|
|
} bar1;
|
|
|
|
struct {
|
|
u32 aperture_size;
|
|
struct vm_gk20a vm;
|
|
struct mem_desc inst_block;
|
|
} bar2;
|
|
|
|
struct {
|
|
u32 aperture_size;
|
|
struct vm_gk20a vm;
|
|
struct mem_desc inst_block;
|
|
} pmu;
|
|
|
|
struct {
|
|
/* using pmu vm currently */
|
|
struct mem_desc inst_block;
|
|
} hwpm;
|
|
|
|
struct {
|
|
struct vm_gk20a vm;
|
|
} cde;
|
|
|
|
struct {
|
|
struct vm_gk20a vm;
|
|
} ce;
|
|
|
|
struct nvgpu_mutex l2_op_lock;
|
|
struct nvgpu_mutex tlb_lock;
|
|
struct nvgpu_mutex priv_lock;
|
|
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
|
|
struct mem_desc bar2_desc;
|
|
#endif
|
|
/*
|
|
* Separate function to cleanup the CE since it requires a channel to
|
|
* be closed which must happen before fifo cleanup.
|
|
*/
|
|
void (*remove_ce_support)(struct mm_gk20a *mm);
|
|
void (*remove_support)(struct mm_gk20a *mm);
|
|
bool sw_ready;
|
|
int physical_bits;
|
|
bool use_full_comp_tag_line;
|
|
#ifdef CONFIG_DEBUG_FS
|
|
u32 ltc_enabled;
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
|
|
u32 ltc_enabled_debug;
|
|
#else
|
|
bool ltc_enabled_debug;
|
|
#endif
|
|
#endif
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
|
|
u32 bypass_smmu;
|
|
u32 disable_bigpage;
|
|
#else
|
|
bool bypass_smmu;
|
|
bool disable_bigpage;
|
|
#endif
|
|
bool has_physical_mode;
|
|
/* false if vidmem aperture actually points to sysmem */
|
|
bool vidmem_is_vidmem;
|
|
|
|
struct mem_desc sysmem_flush;
|
|
|
|
u32 pramin_window;
|
|
struct nvgpu_spinlock pramin_window_lock;
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
|
|
u32 force_pramin; /* via debugfs */
|
|
#else
|
|
bool force_pramin; /* via debugfs */
|
|
#endif
|
|
|
|
struct {
|
|
size_t size;
|
|
u64 base;
|
|
size_t bootstrap_size;
|
|
u64 bootstrap_base;
|
|
|
|
struct nvgpu_allocator allocator;
|
|
struct nvgpu_allocator bootstrap_allocator;
|
|
|
|
u32 ce_ctx_id;
|
|
volatile bool cleared;
|
|
struct nvgpu_mutex first_clear_mutex;
|
|
|
|
struct list_head clear_list_head;
|
|
struct nvgpu_mutex clear_list_mutex;
|
|
|
|
struct work_struct clear_mem_worker;
|
|
atomic64_t bytes_pending;
|
|
} vidmem;
|
|
};
|
|
|
|
int gk20a_mm_init(struct mm_gk20a *mm);
|
|
|
|
#define gk20a_from_mm(mm) ((mm)->g)
|
|
#define gk20a_from_vm(vm) ((vm)->mm->g)
|
|
|
|
#define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
|
|
|
|
#define DEFAULT_ALLOC_ALIGNMENT (4*1024)
|
|
|
|
static inline int bar1_aperture_size_mb_gk20a(void)
|
|
{
|
|
return 16; /* 16MB is more than enough atm. */
|
|
}
|
|
|
|
/* The maximum GPU VA range supported */
|
|
#define NV_GMMU_VA_RANGE 38
|
|
|
|
/* The default userspace-visible GPU VA size */
|
|
#define NV_MM_DEFAULT_USER_SIZE (1ULL << 37)
|
|
|
|
/* The default kernel-reserved GPU VA size */
|
|
#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
|
|
|
|
/*
|
|
* When not using unified address spaces the bottom 16GB of the space are used
|
|
* for small pages and the remaining high memory is used for large pages.
|
|
*/
|
|
static inline u64 __nv_gmmu_va_small_page_limit(void)
|
|
{
|
|
return ((u64)SZ_1G * 16);
|
|
}
|
|
|
|
enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
|
|
u64 base, u64 size);
|
|
enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size);
|
|
|
|
/*
|
|
* Buffer accessors - wrap between begin() and end() if there is no permanent
|
|
* kernel mapping for this buffer.
|
|
*/
|
|
|
|
int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem);
|
|
/* nop for null mem, like with free() or vunmap() */
|
|
void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem);
|
|
|
|
/* word-indexed offset */
|
|
u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w);
|
|
/* byte offset (32b-aligned) */
|
|
u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset);
|
|
/* memcpy to cpu, offset and size in bytes (32b-aligned) */
|
|
void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
|
|
void *dest, u32 size);
|
|
|
|
/* word-indexed offset */
|
|
void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data);
|
|
/* byte offset (32b-aligned) */
|
|
void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data);
|
|
/* memcpy from cpu, offset and size in bytes (32b-aligned) */
|
|
void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
|
|
void *src, u32 size);
|
|
/* size and offset in bytes (32b-aligned), filled with the constant byte c */
|
|
void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset,
|
|
u32 c, u32 size);
|
|
|
|
#if 0 /*related to addr bits above, concern below TBD on which is accurate */
|
|
#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
|
|
bus_bar1_block_ptr_s())
|
|
#else
|
|
#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v()
|
|
#endif
|
|
|
|
int gk20a_alloc_inst_block(struct gk20a *g, struct mem_desc *inst_block);
|
|
void gk20a_free_inst_block(struct gk20a *g, struct mem_desc *inst_block);
|
|
void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm,
|
|
u32 big_page_size);
|
|
u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct mem_desc *mem);
|
|
|
|
void gk20a_mm_dump_vm(struct vm_gk20a *vm,
|
|
u64 va_begin, u64 va_end, char *label);
|
|
|
|
int gk20a_mm_suspend(struct gk20a *g);
|
|
|
|
int gk20a_get_sgtable(struct device *d, struct sg_table **sgt,
|
|
void *cpuva, u64 iova,
|
|
size_t size);
|
|
|
|
int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt,
|
|
struct page **pages, u64 iova,
|
|
size_t size);
|
|
|
|
void gk20a_free_sgtable(struct sg_table **sgt);
|
|
|
|
u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
|
|
u32 flags);
|
|
u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova);
|
|
u64 gk20a_mem_get_base_addr(struct gk20a *g, struct mem_desc *mem,
|
|
u32 flags);
|
|
|
|
void gk20a_mm_ltc_isr(struct gk20a *g);
|
|
|
|
bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
|
|
|
|
int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
|
|
|
|
u64 gk20a_gmmu_map(struct vm_gk20a *vm,
|
|
struct sg_table **sgt,
|
|
u64 size,
|
|
u32 flags,
|
|
int rw_flag,
|
|
bool priv,
|
|
enum gk20a_aperture aperture);
|
|
u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm,
|
|
struct sg_table **sgt,
|
|
u64 addr,
|
|
u64 size,
|
|
u32 flags,
|
|
int rw_flag,
|
|
bool priv,
|
|
enum gk20a_aperture aperture);
|
|
|
|
/* Flags for the below gk20a_gmmu_{alloc,alloc_map}_flags* */
|
|
|
|
/*
|
|
* Don't create a virtual kernel mapping for the buffer but only allocate it;
|
|
* this may save some resources. The buffer can be mapped later explicitly.
|
|
*/
|
|
#define NVGPU_DMA_NO_KERNEL_MAPPING (1 << 0)
|
|
/*
|
|
* Don't allow building the buffer from individual pages but require a
|
|
* physically contiguous block.
|
|
*/
|
|
#define NVGPU_DMA_FORCE_CONTIGUOUS (1 << 1)
|
|
/*
|
|
* Make the mapping read-only.
|
|
*/
|
|
#define NVGPU_DMA_READ_ONLY (1 << 2)
|
|
|
|
int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
|
|
struct mem_desc *mem);
|
|
int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
|
|
size_t size, struct mem_desc *mem);
|
|
|
|
int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
|
|
struct mem_desc *mem);
|
|
int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
|
|
size_t size, struct mem_desc *mem);
|
|
|
|
int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
|
|
struct mem_desc *mem);
|
|
int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
|
|
size_t size, struct mem_desc *mem);
|
|
|
|
void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct mem_desc *mem);
|
|
|
|
int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct mem_desc *mem);
|
|
int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
|
|
struct mem_desc *mem);
|
|
|
|
int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct mem_desc *mem);
|
|
int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
|
|
size_t size, struct mem_desc *mem);
|
|
|
|
int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct mem_desc *mem);
|
|
int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
|
|
size_t size, struct mem_desc *mem);
|
|
int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
|
|
size_t size, struct mem_desc *mem, dma_addr_t at);
|
|
|
|
void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem);
|
|
|
|
static inline phys_addr_t gk20a_mem_phys(struct mem_desc *mem)
|
|
{
|
|
/* FIXME: the sgt/sgl may get null if this is accessed e.g. in an isr
|
|
* during channel deletion - attempt to fix at least null derefs */
|
|
struct sg_table *sgt = mem->sgt;
|
|
|
|
if (sgt) {
|
|
struct scatterlist *sgl = sgt->sgl;
|
|
if (sgl)
|
|
return sg_phys(sgl);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture,
|
|
u32 sysmem_mask, u32 vidmem_mask);
|
|
u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem,
|
|
u32 sysmem_mask, u32 vidmem_mask);
|
|
|
|
void gk20a_pde_wr32(struct gk20a *g, struct gk20a_mm_entry *entry,
|
|
size_t w, size_t data);
|
|
u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry);
|
|
|
|
u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
|
|
u64 map_offset,
|
|
struct sg_table *sgt,
|
|
u64 buffer_offset,
|
|
u64 size,
|
|
int pgsz_idx,
|
|
u8 kind_v,
|
|
u32 ctag_offset,
|
|
u32 flags,
|
|
int rw_flag,
|
|
bool clear_ctags,
|
|
bool sparse,
|
|
bool priv,
|
|
struct vm_gk20a_mapping_batch *batch,
|
|
enum gk20a_aperture aperture);
|
|
|
|
void gk20a_gmmu_unmap(struct vm_gk20a *vm,
|
|
u64 vaddr,
|
|
u64 size,
|
|
int rw_flag);
|
|
|
|
void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
|
|
u64 vaddr,
|
|
u64 size,
|
|
int pgsz_idx,
|
|
bool va_allocated,
|
|
int rw_flag,
|
|
bool sparse,
|
|
struct vm_gk20a_mapping_batch *batch);
|
|
|
|
struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
|
|
void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
|
|
struct sg_table *sgt);
|
|
|
|
u64 gk20a_vm_map(struct vm_gk20a *vm,
|
|
struct dma_buf *dmabuf,
|
|
u64 offset_align,
|
|
u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/,
|
|
int kind,
|
|
struct sg_table **sgt,
|
|
bool user_mapped,
|
|
int rw_flag,
|
|
u64 buffer_offset,
|
|
u64 mapping_size,
|
|
struct vm_gk20a_mapping_batch *mapping_batch);
|
|
|
|
int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
|
|
u64 mapping_gva,
|
|
u64 *compbits_win_size,
|
|
u32 *compbits_win_ctagline,
|
|
u32 *mapping_ctagline,
|
|
u32 *flags);
|
|
|
|
int gk20a_vm_map_compbits(struct vm_gk20a *vm,
|
|
u64 mapping_gva,
|
|
u64 *compbits_win_gva,
|
|
u64 *mapping_iova,
|
|
u32 flags);
|
|
|
|
/* unmap handle from kernel */
|
|
void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
|
|
|
|
void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
|
|
struct vm_gk20a_mapping_batch *batch);
|
|
|
|
/* get reference to all currently mapped buffers */
|
|
int gk20a_vm_get_buffers(struct vm_gk20a *vm,
|
|
struct mapped_buffer_node ***mapped_buffers,
|
|
int *num_buffers);
|
|
|
|
/* put references on the given buffers */
|
|
void gk20a_vm_put_buffers(struct vm_gk20a *vm,
|
|
struct mapped_buffer_node **mapped_buffers,
|
|
int num_buffers);
|
|
|
|
/* find buffer corresponding to va */
|
|
int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
|
|
struct dma_buf **dmabuf,
|
|
u64 *offset);
|
|
|
|
void gk20a_vm_get(struct vm_gk20a *vm);
|
|
void gk20a_vm_put(struct vm_gk20a *vm);
|
|
|
|
void gk20a_vm_remove_support(struct vm_gk20a *vm);
|
|
|
|
u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
|
|
u64 size,
|
|
enum gmmu_pgsz_gk20a gmmu_pgsz_idx);
|
|
|
|
int gk20a_vm_free_va(struct vm_gk20a *vm,
|
|
u64 offset, u64 size,
|
|
enum gmmu_pgsz_gk20a pgsz_idx);
|
|
|
|
/* vm-as interface */
|
|
struct nvgpu_as_alloc_space_args;
|
|
struct nvgpu_as_free_space_args;
|
|
int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
|
|
u32 flags);
|
|
int gk20a_vm_release_share(struct gk20a_as_share *as_share);
|
|
int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
|
|
struct nvgpu_as_alloc_space_args *args);
|
|
int gk20a_vm_free_space(struct gk20a_as_share *as_share,
|
|
struct nvgpu_as_free_space_args *args);
|
|
int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
|
|
struct channel_gk20a *ch);
|
|
int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch);
|
|
|
|
/* batching eliminates redundant cache flushes and invalidates */
|
|
void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
|
|
void gk20a_vm_mapping_batch_finish(
|
|
struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
|
|
/* called when holding vm->update_gmmu_lock */
|
|
void gk20a_vm_mapping_batch_finish_locked(
|
|
struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
|
|
|
|
|
|
int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes);
|
|
int gk20a_vidmem_get_space(struct gk20a *g, u64 *space);
|
|
int gk20a_vidbuf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
|
|
void *buffer, u64 offset, u64 size, u32 cmd);
|
|
|
|
/* Note: batch may be NULL if map op is not part of a batch */
|
|
int gk20a_vm_map_buffer(struct vm_gk20a *vm,
|
|
int dmabuf_fd,
|
|
u64 *offset_align,
|
|
u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
|
|
int kind,
|
|
u64 buffer_offset,
|
|
u64 mapping_size,
|
|
struct vm_gk20a_mapping_batch *batch);
|
|
|
|
int gk20a_init_vm(struct mm_gk20a *mm,
|
|
struct vm_gk20a *vm,
|
|
u32 big_page_size,
|
|
u64 low_hole,
|
|
u64 kernel_reserved,
|
|
u64 aperture_size,
|
|
bool big_pages,
|
|
bool userspace_managed,
|
|
char *name);
|
|
void gk20a_deinit_vm(struct vm_gk20a *vm);
|
|
|
|
/* Note: batch may be NULL if unmap op is not part of a batch */
|
|
int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
|
|
struct vm_gk20a_mapping_batch *batch);
|
|
void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
|
|
struct gk20a_comptags *comptags);
|
|
dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr);
|
|
|
|
int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
|
|
|
|
int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct device *dev,
|
|
u64 offset, struct gk20a_buffer_state **state);
|
|
|
|
int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry);
|
|
void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry);
|
|
void pde_range_from_vaddr_range(struct vm_gk20a *vm,
|
|
u64 addr_lo, u64 addr_hi,
|
|
u32 *pde_lo, u32 *pde_hi);
|
|
int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm);
|
|
u32 pte_index_from_vaddr(struct vm_gk20a *vm,
|
|
u64 addr, enum gmmu_pgsz_gk20a pgsz_idx);
|
|
void free_gmmu_pages(struct vm_gk20a *vm,
|
|
struct gk20a_mm_entry *entry);
|
|
|
|
u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g);
|
|
|
|
struct gpu_ops;
|
|
void gk20a_init_mm(struct gpu_ops *gops);
|
|
const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
|
|
u32 big_page_size);
|
|
void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem,
|
|
struct vm_gk20a *vm);
|
|
|
|
void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block);
|
|
|
|
int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
|
|
|
|
extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
|
|
extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
|
|
|
|
int gk20a_mm_get_buffer_info(struct device *dev, int dmabuf_fd,
|
|
u64 *buffer_id, u64 *buffer_len);
|
|
|
|
#endif /* MM_GK20A_H */
|