mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
TLB invalidate can have a race if several contexts use the same address space. One thread starting an invalidate allows another thread to submit before invalidate is completed. Bug 1502332 Change-Id: I074ec493eac3b153c5f23d796a1dee1d8db24855 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/407578 Reviewed-by: Riham Haidar <rhaidar@nvidia.com> Tested-by: Riham Haidar <rhaidar@nvidia.com>
465 lines
10 KiB
C
465 lines
10 KiB
C
/*
|
|
* drivers/video/tegra/host/gk20a/mm_gk20a.h
|
|
*
|
|
* GK20A memory management
|
|
*
|
|
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with
|
|
* this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*/
|
|
#ifndef __MM_GK20A_H__
|
|
#define __MM_GK20A_H__
|
|
|
|
#include <linux/scatterlist.h>
|
|
#include <linux/dma-attrs.h>
|
|
#include <linux/iommu.h>
|
|
#include <asm/dma-iommu.h>
|
|
#include "gk20a_allocator.h"
|
|
|
|
/* This "address bit" in the gmmu ptes (and other gk20a accesses)
|
|
* signals the address as presented should be translated by the SMMU.
|
|
* Without this bit present gk20a accesses are *not* translated.
|
|
*/
|
|
/* Hack, get this from manuals somehow... */
|
|
#define NV_MC_SMMU_VADDR_TRANSLATION_BIT 34
|
|
#define NV_MC_SMMU_VADDR_TRANSLATE(x) (x | \
|
|
(1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT))
|
|
|
|
/* For now keep the size relatively small-ish compared to the full
|
|
* 40b va. 32GB for now. It consists of two 16GB spaces. */
|
|
#define NV_GMMU_VA_RANGE 35ULL
|
|
#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1)))
|
|
|
|
struct mem_desc {
|
|
struct dma_buf *ref;
|
|
struct sg_table *sgt;
|
|
u32 size;
|
|
};
|
|
|
|
struct mem_desc_sub {
|
|
u32 offset;
|
|
u32 size;
|
|
};
|
|
|
|
struct gpfifo_desc {
|
|
size_t size;
|
|
u32 entry_num;
|
|
|
|
u32 get;
|
|
u32 put;
|
|
|
|
bool wrap;
|
|
|
|
u64 iova;
|
|
struct gpfifo *cpu_va;
|
|
u64 gpu_va;
|
|
};
|
|
|
|
struct mmu_desc {
|
|
void *cpuva;
|
|
u64 iova;
|
|
size_t size;
|
|
};
|
|
|
|
struct inst_desc {
|
|
u64 iova;
|
|
void *cpuva;
|
|
phys_addr_t cpu_pa;
|
|
size_t size;
|
|
};
|
|
|
|
struct surface_mem_desc {
|
|
u64 iova;
|
|
void *cpuva;
|
|
struct sg_table *sgt;
|
|
size_t size;
|
|
};
|
|
|
|
struct userd_desc {
|
|
struct sg_table *sgt;
|
|
u64 iova;
|
|
void *cpuva;
|
|
size_t size;
|
|
u64 gpu_va;
|
|
};
|
|
|
|
struct runlist_mem_desc {
|
|
u64 iova;
|
|
void *cpuva;
|
|
size_t size;
|
|
};
|
|
|
|
struct patch_desc {
|
|
struct page **pages;
|
|
u64 iova;
|
|
size_t size;
|
|
void *cpu_va;
|
|
u64 gpu_va;
|
|
u32 data_count;
|
|
};
|
|
|
|
struct pmu_mem_desc {
|
|
void *cpuva;
|
|
u64 iova;
|
|
u64 pmu_va;
|
|
size_t size;
|
|
};
|
|
|
|
struct priv_cmd_queue_mem_desc {
|
|
u64 base_iova;
|
|
u32 *base_cpuva;
|
|
size_t size;
|
|
};
|
|
|
|
struct zcull_ctx_desc {
|
|
struct mem_desc mem;
|
|
u64 gpu_va;
|
|
u32 ctx_attr;
|
|
u32 ctx_sw_mode;
|
|
};
|
|
|
|
struct pm_ctx_desc {
|
|
struct mem_desc mem;
|
|
u64 gpu_va;
|
|
u32 ctx_attr;
|
|
u32 ctx_sw_mode;
|
|
};
|
|
|
|
struct gr_ctx_buffer_desc;
|
|
struct platform_device;
|
|
struct gr_ctx_buffer_desc {
|
|
void (*destroy)(struct platform_device *, struct gr_ctx_buffer_desc *);
|
|
struct sg_table *sgt;
|
|
struct page **pages;
|
|
size_t size;
|
|
u64 iova;
|
|
struct dma_attrs attrs;
|
|
void *priv;
|
|
};
|
|
|
|
struct gr_ctx_desc {
|
|
struct page **pages;
|
|
u64 iova;
|
|
size_t size;
|
|
u64 gpu_va;
|
|
};
|
|
|
|
struct compbit_store_desc {
|
|
struct page **pages;
|
|
struct sg_table *sgt;
|
|
size_t size;
|
|
u64 base_iova;
|
|
};
|
|
|
|
struct page_table_gk20a {
|
|
/* backing for */
|
|
/* Either a *page or a *mem_handle */
|
|
void *ref;
|
|
/* track mapping cnt on this page table */
|
|
u32 ref_cnt;
|
|
struct sg_table *sgt;
|
|
size_t size;
|
|
};
|
|
|
|
#ifndef _NVHOST_MEM_MGR_H
|
|
enum gk20a_mem_rw_flag {
|
|
gk20a_mem_flag_none = 0,
|
|
gk20a_mem_flag_read_only = 1,
|
|
gk20a_mem_flag_write_only = 2,
|
|
};
|
|
#endif
|
|
|
|
enum gmmu_pgsz_gk20a {
|
|
gmmu_page_size_small = 0,
|
|
gmmu_page_size_big = 1,
|
|
gmmu_nr_page_sizes = 2
|
|
};
|
|
|
|
|
|
struct page_directory_gk20a {
|
|
/* backing for */
|
|
u32 num_pdes;
|
|
void *kv;
|
|
/* Either a *page or a *mem_handle */
|
|
void *ref;
|
|
struct sg_table *sgt;
|
|
size_t size;
|
|
struct page_table_gk20a *ptes[gmmu_nr_page_sizes];
|
|
};
|
|
|
|
struct priv_cmd_queue {
|
|
struct priv_cmd_queue_mem_desc mem;
|
|
u64 base_gpuva; /* gpu_va base */
|
|
u16 size; /* num of entries in words */
|
|
u16 put; /* put for priv cmd queue */
|
|
u16 get; /* get for priv cmd queue */
|
|
struct list_head free; /* list of pre-allocated free entries */
|
|
struct list_head head; /* list of used entries */
|
|
};
|
|
|
|
struct priv_cmd_entry {
|
|
u32 *ptr;
|
|
u64 gva;
|
|
u16 get; /* start of entry in queue */
|
|
u16 size; /* in words */
|
|
u32 gp_get; /* gp_get when submitting last priv cmd */
|
|
u32 gp_put; /* gp_put when submitting last priv cmd */
|
|
u32 gp_wrap; /* wrap when submitting last priv cmd */
|
|
bool pre_alloc; /* prealloc entry, free to free list */
|
|
struct list_head list; /* node for lists */
|
|
};
|
|
|
|
struct mapped_buffer_node {
|
|
struct vm_gk20a *vm;
|
|
struct rb_node node;
|
|
struct list_head unmap_list;
|
|
struct list_head va_buffers_list;
|
|
struct vm_reserved_va_node *va_node;
|
|
u64 addr;
|
|
u64 size;
|
|
struct dma_buf *dmabuf;
|
|
struct sg_table *sgt;
|
|
struct kref ref;
|
|
u32 user_mapped;
|
|
bool own_mem_ref;
|
|
u32 pgsz_idx;
|
|
u32 ctag_offset;
|
|
u32 ctag_lines;
|
|
u32 flags;
|
|
u32 kind;
|
|
bool va_allocated;
|
|
};
|
|
|
|
struct vm_reserved_va_node {
|
|
struct list_head reserved_va_list;
|
|
struct list_head va_buffers_list;
|
|
u32 pgsz_idx;
|
|
u64 vaddr_start;
|
|
u64 size;
|
|
bool sparse;
|
|
};
|
|
|
|
struct vm_gk20a {
|
|
struct mm_gk20a *mm;
|
|
struct gk20a_as_share *as_share; /* as_share this represents */
|
|
|
|
u64 va_start;
|
|
u64 va_limit;
|
|
|
|
int num_user_mapped_buffers;
|
|
|
|
bool big_pages; /* enable large page support */
|
|
bool enable_ctag;
|
|
bool tlb_dirty;
|
|
bool mapped;
|
|
|
|
struct kref ref;
|
|
|
|
struct mutex update_gmmu_lock;
|
|
|
|
struct page_directory_gk20a pdes;
|
|
|
|
struct gk20a_allocator vma[gmmu_nr_page_sizes];
|
|
struct rb_root mapped_buffers;
|
|
|
|
struct list_head reserved_va_list;
|
|
|
|
dma_addr_t zero_page_iova;
|
|
void *zero_page_cpuva;
|
|
struct sg_table *zero_page_sgt;
|
|
};
|
|
|
|
struct gk20a;
|
|
struct channel_gk20a;
|
|
|
|
int gk20a_init_mm_support(struct gk20a *g);
|
|
int gk20a_init_mm_setup_sw(struct gk20a *g);
|
|
int gk20a_init_bar1_vm(struct mm_gk20a *mm);
|
|
int gk20a_init_pmu_vm(struct mm_gk20a *mm);
|
|
|
|
int gk20a_mm_fb_flush(struct gk20a *g);
|
|
void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
|
|
void gk20a_mm_l2_invalidate(struct gk20a *g);
|
|
|
|
struct mm_gk20a {
|
|
struct gk20a *g;
|
|
|
|
u32 compression_page_size;
|
|
u32 big_page_size;
|
|
u32 pde_stride;
|
|
u32 pde_stride_shift;
|
|
|
|
struct {
|
|
u32 order;
|
|
u32 num_ptes;
|
|
} page_table_sizing[gmmu_nr_page_sizes];
|
|
|
|
|
|
struct {
|
|
u64 size;
|
|
} channel;
|
|
|
|
struct {
|
|
u32 aperture_size;
|
|
struct vm_gk20a vm;
|
|
struct inst_desc inst_block;
|
|
} bar1;
|
|
|
|
struct {
|
|
u32 aperture_size;
|
|
struct vm_gk20a vm;
|
|
struct inst_desc inst_block;
|
|
} pmu;
|
|
|
|
struct mutex l2_op_lock;
|
|
|
|
void (*remove_support)(struct mm_gk20a *mm);
|
|
bool sw_ready;
|
|
#ifdef CONFIG_DEBUG_FS
|
|
u32 ltc_enabled;
|
|
u32 ltc_enabled_debug;
|
|
#endif
|
|
};
|
|
|
|
int gk20a_mm_init(struct mm_gk20a *mm);
|
|
|
|
#define gk20a_from_mm(mm) ((mm)->g)
|
|
#define gk20a_from_vm(vm) ((vm)->mm->g)
|
|
|
|
#define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
|
|
|
|
#define DEFAULT_ALLOC_ALIGNMENT (4*1024)
|
|
|
|
static inline int bar1_aperture_size_mb_gk20a(void)
|
|
{
|
|
return 128; /*TBD read this from fuses?*/
|
|
}
|
|
/* max address bits */
|
|
static inline int max_physaddr_bits_gk20a(void)
|
|
{
|
|
return 40;/*"old" sys physaddr, meaningful? */
|
|
}
|
|
static inline int max_vid_physaddr_bits_gk20a(void)
|
|
{
|
|
/* "vid phys" is asid/smmu phys?,
|
|
* i.e. is this the real sys physaddr? */
|
|
return 37;
|
|
}
|
|
static inline int max_vaddr_bits_gk20a(void)
|
|
{
|
|
return 40; /* chopped for area? */
|
|
}
|
|
|
|
#if 0 /*related to addr bits above, concern below TBD on which is accurate */
|
|
#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
|
|
bus_bar1_block_ptr_s())
|
|
#else
|
|
#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v()
|
|
#endif
|
|
|
|
void gk20a_mm_dump_vm(struct vm_gk20a *vm,
|
|
u64 va_begin, u64 va_end, char *label);
|
|
|
|
int gk20a_mm_suspend(struct gk20a *g);
|
|
|
|
phys_addr_t gk20a_get_phys_from_iova(struct device *d,
|
|
u64 dma_addr);
|
|
|
|
int gk20a_get_sgtable(struct device *d, struct sg_table **sgt,
|
|
void *cpuva, u64 iova,
|
|
size_t size);
|
|
|
|
int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt,
|
|
struct page **pages, u64 iova,
|
|
size_t size);
|
|
|
|
void gk20a_free_sgtable(struct sg_table **sgt);
|
|
|
|
u64 gk20a_mm_iova_addr(struct scatterlist *sgl);
|
|
|
|
void gk20a_mm_ltc_isr(struct gk20a *g);
|
|
|
|
bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
|
|
|
|
int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
|
|
|
|
u64 gk20a_gmmu_map(struct vm_gk20a *vm,
|
|
struct sg_table **sgt,
|
|
u64 size,
|
|
u32 flags,
|
|
int rw_flag);
|
|
|
|
void gk20a_gmmu_unmap(struct vm_gk20a *vm,
|
|
u64 vaddr,
|
|
u64 size,
|
|
int rw_flag);
|
|
|
|
struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
|
|
void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
|
|
struct sg_table *sgt);
|
|
|
|
u64 gk20a_vm_map(struct vm_gk20a *vm,
|
|
struct dma_buf *dmabuf,
|
|
u64 offset_align,
|
|
u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/,
|
|
int kind,
|
|
struct sg_table **sgt,
|
|
bool user_mapped,
|
|
int rw_flag);
|
|
|
|
/* unmap handle from kernel */
|
|
void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
|
|
|
|
/* get reference to all currently mapped buffers */
|
|
int gk20a_vm_get_buffers(struct vm_gk20a *vm,
|
|
struct mapped_buffer_node ***mapped_buffers,
|
|
int *num_buffers);
|
|
|
|
/* put references on the given buffers */
|
|
void gk20a_vm_put_buffers(struct vm_gk20a *vm,
|
|
struct mapped_buffer_node **mapped_buffers,
|
|
int num_buffers);
|
|
|
|
/* invalidate tlbs for the vm area */
|
|
void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm);
|
|
|
|
/* find buffer corresponding to va */
|
|
int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
|
|
struct dma_buf **dmabuf,
|
|
u64 *offset);
|
|
|
|
void gk20a_vm_get(struct vm_gk20a *vm);
|
|
void gk20a_vm_put(struct vm_gk20a *vm);
|
|
|
|
/* vm-as interface */
|
|
struct nvhost_as_alloc_space_args;
|
|
struct nvhost_as_free_space_args;
|
|
int gk20a_vm_alloc_share(struct gk20a_as_share *as_share);
|
|
int gk20a_vm_release_share(struct gk20a_as_share *as_share);
|
|
int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
|
|
struct nvhost_as_alloc_space_args *args);
|
|
int gk20a_vm_free_space(struct gk20a_as_share *as_share,
|
|
struct nvhost_as_free_space_args *args);
|
|
int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
|
|
struct channel_gk20a *ch);
|
|
int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
|
|
int dmabuf_fd,
|
|
u64 *offset_align,
|
|
u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/
|
|
int kind);
|
|
int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset);
|
|
|
|
int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
|
|
#endif /*_MM_GK20A_H_ */
|