mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
Add functions to get per-chip cache maintenance timeout overrides. JIRA: NVGPUGV100-GV100 Change-Id: Ie14efc616e7af52ede60031c789bd2ae70857a6e Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1582768 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
405 lines
10 KiB
C
405 lines
10 KiB
C
/*
|
|
* GK20A memory management
|
|
*
|
|
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
#ifndef MM_GK20A_H
|
|
#define MM_GK20A_H
|
|
|
|
#include <linux/scatterlist.h>
|
|
#include <linux/iommu.h>
|
|
#include <asm/dma-iommu.h>
|
|
#include <asm/cacheflush.h>
|
|
|
|
#include <nvgpu/nvgpu_mem.h>
|
|
#include <nvgpu/allocator.h>
|
|
#include <nvgpu/vm.h>
|
|
#include <nvgpu/list.h>
|
|
#include <nvgpu/rbtree.h>
|
|
#include <nvgpu/kref.h>
|
|
#include <nvgpu/atomic.h>
|
|
#include <nvgpu/cond.h>
|
|
#include <nvgpu/thread.h>
|
|
|
|
struct nvgpu_pd_cache;
|
|
|
|
#ifdef CONFIG_ARM64
|
|
#define outer_flush_range(a, b)
|
|
#define __cpuc_flush_dcache_area __flush_dcache_area
|
|
#endif
|
|
|
|
#define FLUSH_CPU_DCACHE(va, pa, size) \
|
|
do { \
|
|
__cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
|
|
outer_flush_range(pa, pa + (size_t)(size)); \
|
|
} while (0)
|
|
|
|
struct gpfifo_desc {
|
|
struct nvgpu_mem mem;
|
|
u32 entry_num;
|
|
|
|
u32 get;
|
|
u32 put;
|
|
|
|
bool wrap;
|
|
|
|
/* if gpfifo lives in vidmem or is forced to go via PRAMIN, first copy
|
|
* from userspace to pipe and then from pipe to gpu buffer */
|
|
void *pipe;
|
|
};
|
|
|
|
struct patch_desc {
|
|
struct nvgpu_mem mem;
|
|
u32 data_count;
|
|
};
|
|
|
|
struct zcull_ctx_desc {
|
|
u64 gpu_va;
|
|
u32 ctx_attr;
|
|
u32 ctx_sw_mode;
|
|
};
|
|
|
|
struct pm_ctx_desc {
|
|
struct nvgpu_mem mem;
|
|
u32 pm_mode;
|
|
};
|
|
|
|
struct compbit_store_desc {
|
|
struct nvgpu_mem mem;
|
|
|
|
/* The value that is written to the hardware. This depends on
|
|
* on the number of ltcs and is not an address. */
|
|
u64 base_hw;
|
|
};
|
|
|
|
struct gk20a_buffer_state {
|
|
struct nvgpu_list_node list;
|
|
|
|
/* The valid compbits and the fence must be changed atomically. */
|
|
struct nvgpu_mutex lock;
|
|
|
|
/* Offset of the surface within the dma-buf whose state is
|
|
* described by this struct (one dma-buf can contain multiple
|
|
* surfaces with different states). */
|
|
size_t offset;
|
|
|
|
/* A bitmask of valid sets of compbits (0 = uncompressed). */
|
|
u32 valid_compbits;
|
|
|
|
/* The ZBC color used on this buffer. */
|
|
u32 zbc_color;
|
|
|
|
/* This struct reflects the state of the buffer when this
|
|
* fence signals. */
|
|
struct gk20a_fence *fence;
|
|
};
|
|
|
|
static inline struct gk20a_buffer_state *
|
|
gk20a_buffer_state_from_list(struct nvgpu_list_node *node)
|
|
{
|
|
return (struct gk20a_buffer_state *)
|
|
((uintptr_t)node - offsetof(struct gk20a_buffer_state, list));
|
|
};
|
|
|
|
struct priv_cmd_queue {
|
|
struct nvgpu_mem mem;
|
|
u32 size; /* num of entries in words */
|
|
u32 put; /* put for priv cmd queue */
|
|
u32 get; /* get for priv cmd queue */
|
|
};
|
|
|
|
struct priv_cmd_entry {
|
|
bool valid;
|
|
struct nvgpu_mem *mem;
|
|
u32 off; /* offset in mem, in u32 entries */
|
|
u64 gva;
|
|
u32 get; /* start of entry in queue */
|
|
u32 size; /* in words */
|
|
};
|
|
|
|
struct gk20a;
|
|
struct channel_gk20a;
|
|
|
|
int gk20a_init_mm_support(struct gk20a *g);
|
|
int gk20a_init_mm_setup_sw(struct gk20a *g);
|
|
int gk20a_init_mm_setup_hw(struct gk20a *g);
|
|
void gk20a_init_mm_ce_context(struct gk20a *g);
|
|
|
|
int gk20a_mm_fb_flush(struct gk20a *g);
|
|
void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
|
|
void gk20a_mm_cbc_clean(struct gk20a *g);
|
|
void gk20a_mm_l2_invalidate(struct gk20a *g);
|
|
|
|
#define FAULT_TYPE_NUM 2 /* replay and nonreplay faults */
|
|
|
|
struct mmu_fault_info {
|
|
u64 inst_ptr;
|
|
u32 inst_aperture;
|
|
u64 fault_addr;
|
|
u32 fault_addr_aperture;
|
|
u32 timestamp_lo;
|
|
u32 timestamp_hi;
|
|
u32 mmu_engine_id;
|
|
u32 gpc_id;
|
|
u32 client_type;
|
|
u32 client_id;
|
|
u32 fault_type;
|
|
u32 access_type;
|
|
u32 protected_mode;
|
|
u32 replayable_fault;
|
|
u32 replay_fault_en;
|
|
u32 valid;
|
|
u32 faulted_pbdma;
|
|
u32 faulted_engine;
|
|
u32 faulted_subid;
|
|
u32 chid;
|
|
struct channel_gk20a *refch;
|
|
const char *client_type_desc;
|
|
const char *fault_type_desc;
|
|
const char *client_id_desc;
|
|
};
|
|
|
|
struct mm_gk20a {
|
|
struct gk20a *g;
|
|
|
|
/* GPU VA default sizes address spaces for channels */
|
|
struct {
|
|
u64 user_size; /* userspace-visible GPU VA region */
|
|
u64 kernel_size; /* kernel-only GPU VA region */
|
|
} channel;
|
|
|
|
struct {
|
|
u32 aperture_size;
|
|
struct vm_gk20a *vm;
|
|
struct nvgpu_mem inst_block;
|
|
} bar1;
|
|
|
|
struct {
|
|
u32 aperture_size;
|
|
struct vm_gk20a *vm;
|
|
struct nvgpu_mem inst_block;
|
|
} bar2;
|
|
|
|
struct {
|
|
u32 aperture_size;
|
|
struct vm_gk20a *vm;
|
|
struct nvgpu_mem inst_block;
|
|
} pmu;
|
|
|
|
struct {
|
|
/* using pmu vm currently */
|
|
struct nvgpu_mem inst_block;
|
|
} hwpm;
|
|
|
|
struct {
|
|
struct vm_gk20a *vm;
|
|
struct nvgpu_mem inst_block;
|
|
} perfbuf;
|
|
|
|
struct {
|
|
struct vm_gk20a *vm;
|
|
} cde;
|
|
|
|
struct {
|
|
struct vm_gk20a *vm;
|
|
} ce;
|
|
|
|
struct nvgpu_pd_cache *pd_cache;
|
|
|
|
struct nvgpu_mutex l2_op_lock;
|
|
struct nvgpu_mutex tlb_lock;
|
|
struct nvgpu_mutex priv_lock;
|
|
|
|
struct nvgpu_mem bar2_desc;
|
|
|
|
#ifdef CONFIG_TEGRA_19x_GPU
|
|
struct nvgpu_mem hw_fault_buf[FAULT_TYPE_NUM];
|
|
unsigned int hw_fault_buf_status[FAULT_TYPE_NUM];
|
|
struct mmu_fault_info *fault_info[FAULT_TYPE_NUM];
|
|
struct nvgpu_mutex hub_isr_mutex;
|
|
u32 hub_intr_types;
|
|
#endif
|
|
/*
|
|
* Separate function to cleanup the CE since it requires a channel to
|
|
* be closed which must happen before fifo cleanup.
|
|
*/
|
|
void (*remove_ce_support)(struct mm_gk20a *mm);
|
|
void (*remove_support)(struct mm_gk20a *mm);
|
|
bool sw_ready;
|
|
int physical_bits;
|
|
bool use_full_comp_tag_line;
|
|
bool ltc_enabled_current;
|
|
bool ltc_enabled_target;
|
|
bool bypass_smmu;
|
|
bool disable_bigpage;
|
|
bool has_physical_mode;
|
|
|
|
struct nvgpu_mem sysmem_flush;
|
|
|
|
u32 pramin_window;
|
|
struct nvgpu_spinlock pramin_window_lock;
|
|
bool force_pramin; /* via debugfs */
|
|
|
|
struct {
|
|
size_t size;
|
|
u64 base;
|
|
size_t bootstrap_size;
|
|
u64 bootstrap_base;
|
|
|
|
struct nvgpu_allocator allocator;
|
|
struct nvgpu_allocator bootstrap_allocator;
|
|
|
|
u32 ce_ctx_id;
|
|
volatile bool cleared;
|
|
struct nvgpu_mutex first_clear_mutex;
|
|
|
|
struct nvgpu_list_node clear_list_head;
|
|
struct nvgpu_mutex clear_list_mutex;
|
|
|
|
struct nvgpu_cond clearing_thread_cond;
|
|
struct nvgpu_thread clearing_thread;
|
|
struct nvgpu_mutex clearing_thread_lock;
|
|
nvgpu_atomic_t pause_count;
|
|
|
|
nvgpu_atomic64_t bytes_pending;
|
|
} vidmem;
|
|
};
|
|
|
|
int gk20a_mm_init(struct mm_gk20a *mm);
|
|
|
|
#define gk20a_from_mm(mm) ((mm)->g)
|
|
#define gk20a_from_vm(vm) ((vm)->mm->g)
|
|
|
|
#define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
|
|
|
|
#define DEFAULT_ALLOC_ALIGNMENT (4*1024)
|
|
|
|
static inline int bar1_aperture_size_mb_gk20a(void)
|
|
{
|
|
return 16; /* 16MB is more than enough atm. */
|
|
}
|
|
|
|
/* The maximum GPU VA range supported */
|
|
#define NV_GMMU_VA_RANGE 38
|
|
|
|
/* The default userspace-visible GPU VA size */
|
|
#define NV_MM_DEFAULT_USER_SIZE (1ULL << 37)
|
|
|
|
/* The default kernel-reserved GPU VA size */
|
|
#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
|
|
|
|
/*
|
|
* When not using unified address spaces, the bottom 56GB of the space are used
|
|
* for small pages, and the remaining high memory is used for large pages.
|
|
*/
|
|
static inline u64 __nv_gmmu_va_small_page_limit(void)
|
|
{
|
|
return ((u64)SZ_1G * 56);
|
|
}
|
|
|
|
enum nvgpu_flush_op {
|
|
NVGPU_FLUSH_DEFAULT,
|
|
NVGPU_FLUSH_FB,
|
|
NVGPU_FLUSH_L2_INV,
|
|
NVGPU_FLUSH_L2_FLUSH,
|
|
NVGPU_FLUSH_CBC_CLEAN,
|
|
};
|
|
|
|
enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
|
|
u64 base, u64 size);
|
|
enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size);
|
|
|
|
#if 0 /*related to addr bits above, concern below TBD on which is accurate */
|
|
#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
|
|
bus_bar1_block_ptr_s())
|
|
#else
|
|
#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v()
|
|
#endif
|
|
|
|
int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
|
|
void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
|
|
void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
|
|
u32 big_page_size);
|
|
u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *mem);
|
|
|
|
void gk20a_mm_dump_vm(struct vm_gk20a *vm,
|
|
u64 va_begin, u64 va_end, char *label);
|
|
|
|
int gk20a_mm_suspend(struct gk20a *g);
|
|
|
|
void gk20a_mm_ltc_isr(struct gk20a *g);
|
|
|
|
bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
|
|
|
|
int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
|
|
|
|
u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
|
|
u64 map_offset,
|
|
struct nvgpu_sgt *sgt,
|
|
u64 buffer_offset,
|
|
u64 size,
|
|
int pgsz_idx,
|
|
u8 kind_v,
|
|
u32 ctag_offset,
|
|
u32 flags,
|
|
int rw_flag,
|
|
bool clear_ctags,
|
|
bool sparse,
|
|
bool priv,
|
|
struct vm_gk20a_mapping_batch *batch,
|
|
enum nvgpu_aperture aperture);
|
|
|
|
void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
|
|
u64 vaddr,
|
|
u64 size,
|
|
int pgsz_idx,
|
|
bool va_allocated,
|
|
int rw_flag,
|
|
bool sparse,
|
|
struct vm_gk20a_mapping_batch *batch);
|
|
|
|
/* vm-as interface */
|
|
struct nvgpu_as_alloc_space_args;
|
|
struct nvgpu_as_free_space_args;
|
|
int gk20a_vm_release_share(struct gk20a_as_share *as_share);
|
|
int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
|
|
struct channel_gk20a *ch);
|
|
int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch);
|
|
|
|
void pde_range_from_vaddr_range(struct vm_gk20a *vm,
|
|
u64 addr_lo, u64 addr_hi,
|
|
u32 *pde_lo, u32 *pde_hi);
|
|
int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm);
|
|
u32 gk20a_mm_get_iommu_bit(struct gk20a *g);
|
|
|
|
const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
|
|
u32 big_page_size);
|
|
void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem,
|
|
struct vm_gk20a *vm);
|
|
|
|
extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
|
|
extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
|
|
|
|
int gk20a_mm_get_buffer_info(struct device *dev, int dmabuf_fd,
|
|
u64 *buffer_id, u64 *buffer_len);
|
|
|
|
#endif /* MM_GK20A_H */
|