mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
Remove the special cases for fmodel in the GMMU allocation code. There is no reason to treat fmodel any different than regular DMA memory. If there is no IOMMU the DMA api will handle that perfectly acceptably. JIRA NVGPU-30 Change-Id: Icceb832735a98b601b9f41064dd73a6edee29002 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master/r/1507562 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
636 lines
16 KiB
C
636 lines
16 KiB
C
/*
|
|
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <nvgpu/log.h>
|
|
#include <nvgpu/list.h>
|
|
#include <nvgpu/dma.h>
|
|
#include <nvgpu/gmmu.h>
|
|
#include <nvgpu/nvgpu_mem.h>
|
|
#include <nvgpu/enabled.h>
|
|
#include <nvgpu/page_allocator.h>
|
|
|
|
#include "gk20a/gk20a.h"
|
|
#include "gk20a/mm_gk20a.h"
|
|
|
|
#define gmmu_dbg(g, fmt, args...) \
|
|
nvgpu_log(g, gpu_dbg_map, fmt, ##args)
|
|
#define gmmu_dbg_v(g, fmt, args...) \
|
|
nvgpu_log(g, gpu_dbg_map_v, fmt, ##args)
|
|
|
|
static int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
|
|
{
|
|
return nvgpu_mem_begin(g, &entry->mem);
|
|
}
|
|
|
|
static void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
|
|
{
|
|
nvgpu_mem_end(g, &entry->mem);
|
|
}
|
|
|
|
static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
|
|
struct gk20a_mm_entry *entry)
|
|
{
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
u32 num_pages = 1 << order;
|
|
u32 len = num_pages * PAGE_SIZE;
|
|
int err;
|
|
|
|
err = nvgpu_dma_alloc(g, len, &entry->mem);
|
|
|
|
if (err) {
|
|
nvgpu_err(g, "memory allocation failed");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void nvgpu_free_gmmu_pages(struct vm_gk20a *vm,
|
|
struct gk20a_mm_entry *entry)
|
|
{
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
|
|
if (!entry->mem.size)
|
|
return;
|
|
|
|
if (entry->woffset) /* fake shadow mem */
|
|
return;
|
|
|
|
nvgpu_dma_free(g, &entry->mem);
|
|
}
|
|
|
|
/*
|
|
* Allocate a phys contig region big enough for a full
|
|
* sized gmmu page table for the given gmmu_page_size.
|
|
* the whole range is zeroed so it's "invalid"/will fault.
|
|
*
|
|
* If a previous entry is supplied, its memory will be used for
|
|
* suballocation for this next entry too, if there is space.
|
|
*/
|
|
int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm,
|
|
enum gmmu_pgsz_gk20a pgsz_idx,
|
|
const struct gk20a_mmu_level *l,
|
|
struct gk20a_mm_entry *entry,
|
|
struct gk20a_mm_entry *prev_entry)
|
|
{
|
|
int err = -ENOMEM;
|
|
int order;
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
u32 bytes;
|
|
|
|
/* allocate enough pages for the table */
|
|
order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1;
|
|
order += ilog2(l->entry_size);
|
|
bytes = 1 << order;
|
|
order -= PAGE_SHIFT;
|
|
if (order < 0 && prev_entry) {
|
|
/* try to suballocate from previous chunk */
|
|
u32 capacity = prev_entry->mem.size / bytes;
|
|
u32 prev = prev_entry->woffset * sizeof(u32) / bytes;
|
|
u32 free = capacity - prev - 1;
|
|
|
|
nvgpu_log(g, gpu_dbg_pte, "cap %d prev %d free %d bytes %d",
|
|
capacity, prev, free, bytes);
|
|
|
|
if (free) {
|
|
memcpy(&entry->mem, &prev_entry->mem,
|
|
sizeof(entry->mem));
|
|
entry->woffset = prev_entry->woffset
|
|
+ bytes / sizeof(u32);
|
|
err = 0;
|
|
}
|
|
}
|
|
|
|
if (err) {
|
|
/* no suballoc space */
|
|
order = max(0, order);
|
|
err = nvgpu_alloc_gmmu_pages(vm, order, entry);
|
|
entry->woffset = 0;
|
|
}
|
|
|
|
nvgpu_log(g, gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x",
|
|
entry,
|
|
(entry->mem.priv.sgt &&
|
|
entry->mem.aperture == APERTURE_SYSMEM) ?
|
|
g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0,
|
|
order, entry->woffset);
|
|
if (err)
|
|
return err;
|
|
entry->pgsz = pgsz_idx;
|
|
entry->mem.skip_wmb = true;
|
|
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
|
|
* VA will be allocated for you. If addr is non-zero then the buffer will be
|
|
* mapped at @addr.
|
|
*/
|
|
static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
|
|
struct nvgpu_mem *mem,
|
|
u64 addr,
|
|
u64 size,
|
|
u32 flags,
|
|
int rw_flag,
|
|
bool priv,
|
|
enum nvgpu_aperture aperture)
|
|
{
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
u64 vaddr;
|
|
|
|
struct sg_table *sgt = mem->priv.sgt;
|
|
|
|
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
|
vaddr = g->ops.mm.gmmu_map(vm, addr,
|
|
sgt, /* sg table */
|
|
0, /* sg offset */
|
|
size,
|
|
gmmu_page_size_kernel,
|
|
0, /* kind */
|
|
0, /* ctag_offset */
|
|
flags, rw_flag,
|
|
false, /* clear_ctags */
|
|
false, /* sparse */
|
|
priv, /* priv */
|
|
NULL, /* mapping_batch handle */
|
|
aperture);
|
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
|
if (!vaddr) {
|
|
nvgpu_err(g, "failed to allocate va space");
|
|
return 0;
|
|
}
|
|
|
|
return vaddr;
|
|
}
|
|
|
|
/*
|
|
* Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings.
|
|
*/
|
|
u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
|
|
struct nvgpu_mem *mem,
|
|
u64 size,
|
|
u32 flags,
|
|
int rw_flag,
|
|
bool priv,
|
|
enum nvgpu_aperture aperture)
|
|
{
|
|
return __nvgpu_gmmu_map(vm, mem, 0, size, flags, rw_flag, priv,
|
|
aperture);
|
|
}
|
|
|
|
/*
|
|
* Like nvgpu_gmmu_map() except it can work on a fixed address instead.
|
|
*/
|
|
u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
|
|
struct nvgpu_mem *mem,
|
|
u64 addr,
|
|
u64 size,
|
|
u32 flags,
|
|
int rw_flag,
|
|
bool priv,
|
|
enum nvgpu_aperture aperture)
|
|
{
|
|
return __nvgpu_gmmu_map(vm, mem, addr, size, flags, rw_flag, priv,
|
|
aperture);
|
|
}
|
|
|
|
void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va)
|
|
{
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
|
|
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
|
g->ops.mm.gmmu_unmap(vm,
|
|
gpu_va,
|
|
mem->size,
|
|
gmmu_page_size_kernel,
|
|
true, /*va_allocated */
|
|
gk20a_mem_flag_none,
|
|
false,
|
|
NULL);
|
|
|
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
|
}
|
|
|
|
static int update_gmmu_level_locked(struct vm_gk20a *vm,
|
|
struct gk20a_mm_entry *pte,
|
|
enum gmmu_pgsz_gk20a pgsz_idx,
|
|
struct scatterlist **sgl,
|
|
u64 *offset,
|
|
u64 *iova,
|
|
u64 gpu_va, u64 gpu_end,
|
|
u8 kind_v, u64 *ctag,
|
|
bool cacheable, bool unmapped_pte,
|
|
int rw_flag,
|
|
bool sparse,
|
|
int lvl,
|
|
bool priv,
|
|
enum nvgpu_aperture aperture)
|
|
{
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
|
|
const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1];
|
|
int err = 0;
|
|
u32 pde_i;
|
|
u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx];
|
|
struct gk20a_mm_entry *next_pte = NULL, *prev_pte = NULL;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL))
|
|
>> (u64)l->lo_bit[pgsz_idx];
|
|
|
|
gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx",
|
|
pgsz_idx, lvl, gpu_va, gpu_end-1, *iova);
|
|
|
|
while (gpu_va < gpu_end) {
|
|
u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end);
|
|
|
|
/* Allocate next level */
|
|
if (next_l->update_entry) {
|
|
if (!pte->entries) {
|
|
int num_entries =
|
|
1 <<
|
|
(l->hi_bit[pgsz_idx]
|
|
- l->lo_bit[pgsz_idx] + 1);
|
|
pte->entries =
|
|
nvgpu_vzalloc(g,
|
|
sizeof(struct gk20a_mm_entry) *
|
|
num_entries);
|
|
if (!pte->entries)
|
|
return -ENOMEM;
|
|
pte->pgsz = pgsz_idx;
|
|
pte->num_entries = num_entries;
|
|
}
|
|
prev_pte = next_pte;
|
|
next_pte = pte->entries + pde_i;
|
|
|
|
if (!next_pte->mem.size) {
|
|
err = nvgpu_zalloc_gmmu_page_table(vm,
|
|
pgsz_idx, next_l, next_pte, prev_pte);
|
|
if (err)
|
|
return err;
|
|
}
|
|
}
|
|
|
|
err = l->update_entry(vm, pte, pde_i, pgsz_idx,
|
|
sgl, offset, iova,
|
|
kind_v, ctag, cacheable, unmapped_pte,
|
|
rw_flag, sparse, priv, aperture);
|
|
if (err)
|
|
return err;
|
|
|
|
if (next_l->update_entry) {
|
|
/* get cpu access to the ptes */
|
|
err = map_gmmu_pages(g, next_pte);
|
|
if (err) {
|
|
nvgpu_err(g,
|
|
"couldn't map ptes for update as=%d",
|
|
vm_aspace_id(vm));
|
|
return err;
|
|
}
|
|
err = update_gmmu_level_locked(vm, next_pte,
|
|
pgsz_idx,
|
|
sgl,
|
|
offset,
|
|
iova,
|
|
gpu_va,
|
|
next,
|
|
kind_v, ctag, cacheable, unmapped_pte,
|
|
rw_flag, sparse, lvl+1, priv, aperture);
|
|
unmap_gmmu_pages(g, next_pte);
|
|
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
pde_i++;
|
|
gpu_va = next;
|
|
}
|
|
|
|
gk20a_dbg_fn("done");
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* This is the true top level GMMU mapping logic. This breaks down the incoming
|
|
* scatter gather table and does actual programming of GPU virtual address to
|
|
* physical* address.
|
|
*
|
|
* The update of each level of the page tables is farmed out to chip specific
|
|
* implementations. But the logic around that is generic to all chips. Every chip
|
|
* has some number of PDE levels and then a PTE level.
|
|
*
|
|
* Each chunk of the incoming SGT is sent to the chip specific implementation
|
|
* of page table update.
|
|
*
|
|
* [*] Note: the "physical" address may actually be an IO virtual address in the
|
|
* case of SMMU usage.
|
|
*/
|
|
static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
|
|
enum gmmu_pgsz_gk20a pgsz_idx,
|
|
struct sg_table *sgt,
|
|
u64 buffer_offset,
|
|
u64 gpu_va, u64 gpu_end,
|
|
u8 kind_v, u32 ctag_offset,
|
|
bool cacheable, bool unmapped_pte,
|
|
int rw_flag,
|
|
bool sparse,
|
|
bool priv,
|
|
enum nvgpu_aperture aperture)
|
|
{
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
int ctag_granularity = g->ops.fb.compression_page_size(g);
|
|
u64 ctag = (u64)ctag_offset * (u64)ctag_granularity;
|
|
u64 iova = 0;
|
|
u64 space_to_skip = buffer_offset;
|
|
u64 map_size = gpu_end - gpu_va;
|
|
u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
|
|
int err;
|
|
struct scatterlist *sgl = NULL;
|
|
struct nvgpu_page_alloc *alloc = NULL;
|
|
struct page_alloc_chunk *chunk = NULL;
|
|
u64 length;
|
|
|
|
/* note: here we need to map kernel to small, since the
|
|
* low-level mmu code assumes 0 is small and 1 is big pages */
|
|
if (pgsz_idx == gmmu_page_size_kernel)
|
|
pgsz_idx = gmmu_page_size_small;
|
|
|
|
if (space_to_skip & (page_size - 1))
|
|
return -EINVAL;
|
|
|
|
err = map_gmmu_pages(g, &vm->pdb);
|
|
if (err) {
|
|
nvgpu_err(g,
|
|
"couldn't map ptes for update as=%d",
|
|
vm_aspace_id(vm));
|
|
return err;
|
|
}
|
|
|
|
if (aperture == APERTURE_VIDMEM) {
|
|
gmmu_dbg_v(g, "vidmem map size_idx=%d, gpu_va=[%llx,%llx]",
|
|
pgsz_idx, gpu_va, gpu_end-1);
|
|
|
|
if (sgt) {
|
|
alloc = get_vidmem_page_alloc(sgt->sgl);
|
|
|
|
nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
|
|
page_alloc_chunk, list_entry) {
|
|
if (space_to_skip &&
|
|
space_to_skip > chunk->length) {
|
|
space_to_skip -= chunk->length;
|
|
} else {
|
|
iova = chunk->base + space_to_skip;
|
|
length = chunk->length - space_to_skip;
|
|
length = min(length, map_size);
|
|
space_to_skip = 0;
|
|
|
|
err = update_gmmu_level_locked(vm,
|
|
&vm->pdb, pgsz_idx,
|
|
&sgl,
|
|
&space_to_skip,
|
|
&iova,
|
|
gpu_va, gpu_va + length,
|
|
kind_v, &ctag,
|
|
cacheable, unmapped_pte,
|
|
rw_flag, sparse, 0, priv,
|
|
aperture);
|
|
if (err)
|
|
break;
|
|
|
|
/* need to set explicit zero here */
|
|
space_to_skip = 0;
|
|
gpu_va += length;
|
|
map_size -= length;
|
|
|
|
if (!map_size)
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
|
|
&sgl,
|
|
&space_to_skip,
|
|
&iova,
|
|
gpu_va, gpu_end,
|
|
kind_v, &ctag,
|
|
cacheable, unmapped_pte, rw_flag,
|
|
sparse, 0, priv,
|
|
aperture);
|
|
}
|
|
} else {
|
|
gmmu_dbg_v(g,
|
|
"pgsz=%-6d, gpu_va: %#-12llx +%#-6llx phys: %#-12llx "
|
|
"buffer offset: %-4lld, nents: %d",
|
|
page_size,
|
|
gpu_va, gpu_end - gpu_va,
|
|
sgt ? g->ops.mm.get_iova_addr(g, sgt->sgl, 0) : 0ULL,
|
|
buffer_offset,
|
|
sgt ? sgt->nents : 0);
|
|
|
|
if (sgt) {
|
|
iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
|
|
if (!vm->mm->bypass_smmu && iova) {
|
|
iova += space_to_skip;
|
|
} else {
|
|
sgl = sgt->sgl;
|
|
|
|
gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
|
|
(u64)sg_phys(sgl),
|
|
sgl->length);
|
|
|
|
while (space_to_skip && sgl &&
|
|
space_to_skip + page_size > sgl->length) {
|
|
space_to_skip -= sgl->length;
|
|
sgl = sg_next(sgl);
|
|
gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
|
|
(u64)sg_phys(sgl),
|
|
sgl->length);
|
|
}
|
|
|
|
iova = sg_phys(sgl) + space_to_skip;
|
|
}
|
|
}
|
|
|
|
err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
|
|
&sgl,
|
|
&space_to_skip,
|
|
&iova,
|
|
gpu_va, gpu_end,
|
|
kind_v, &ctag,
|
|
cacheable, unmapped_pte, rw_flag,
|
|
sparse, 0, priv,
|
|
aperture);
|
|
}
|
|
|
|
unmap_gmmu_pages(g, &vm->pdb);
|
|
|
|
mb();
|
|
|
|
gk20a_dbg_fn("done");
|
|
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* gk20a_locked_gmmu_map - Map a buffer into the GMMU
|
|
*
|
|
* This is for non-vGPU chips. It's part of the HAL at the moment but really
|
|
* should not be. Chip specific stuff is handled at the PTE/PDE programming
|
|
* layer. The rest of the logic is essentially generic for all chips.
|
|
*
|
|
* To call this function you must have locked the VM lock: vm->update_gmmu_lock.
|
|
* However, note: this function is not called directly. It's used through the
|
|
* mm.gmmu_lock() HAL. So before calling the mm.gmmu_lock() HAL make sure you
|
|
* have the update_gmmu_lock aquired.
|
|
*/
|
|
u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
|
|
u64 map_offset,
|
|
struct sg_table *sgt,
|
|
u64 buffer_offset,
|
|
u64 size,
|
|
int pgsz_idx,
|
|
u8 kind_v,
|
|
u32 ctag_offset,
|
|
u32 flags,
|
|
int rw_flag,
|
|
bool clear_ctags,
|
|
bool sparse,
|
|
bool priv,
|
|
struct vm_gk20a_mapping_batch *batch,
|
|
enum nvgpu_aperture aperture)
|
|
{
|
|
int err = 0;
|
|
bool allocated = false;
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
int ctag_granularity = g->ops.fb.compression_page_size(g);
|
|
u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity);
|
|
|
|
/* Allocate (or validate when map_offset != 0) the virtual address. */
|
|
if (!map_offset) {
|
|
map_offset = __nvgpu_vm_alloc_va(vm, size,
|
|
pgsz_idx);
|
|
if (!map_offset) {
|
|
nvgpu_err(g, "failed to allocate va space");
|
|
err = -ENOMEM;
|
|
goto fail_alloc;
|
|
}
|
|
allocated = true;
|
|
}
|
|
|
|
gmmu_dbg(g,
|
|
"gv: 0x%04x_%08x + 0x%-7llx "
|
|
"[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
|
|
"pgsz=%-3dKb as=%-2d ctags=%d start=%d "
|
|
"kind=0x%x flags=0x%x apt=%s",
|
|
u64_hi32(map_offset), u64_lo32(map_offset), size,
|
|
sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0,
|
|
sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0,
|
|
sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0,
|
|
sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0,
|
|
vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm),
|
|
ctag_lines, ctag_offset,
|
|
kind_v, flags, nvgpu_aperture_str(aperture));
|
|
|
|
err = update_gmmu_ptes_locked(vm, pgsz_idx,
|
|
sgt,
|
|
buffer_offset,
|
|
map_offset, map_offset + size,
|
|
kind_v,
|
|
ctag_offset,
|
|
flags &
|
|
NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
|
|
flags &
|
|
NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE,
|
|
rw_flag,
|
|
sparse,
|
|
priv,
|
|
aperture);
|
|
if (err) {
|
|
nvgpu_err(g, "failed to update ptes on map");
|
|
goto fail_validate;
|
|
}
|
|
|
|
if (!batch)
|
|
g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
|
|
else
|
|
batch->need_tlb_invalidate = true;
|
|
|
|
return map_offset;
|
|
fail_validate:
|
|
if (allocated)
|
|
__nvgpu_vm_free_va(vm, map_offset, pgsz_idx);
|
|
fail_alloc:
|
|
nvgpu_err(g, "%s: failed with err=%d", __func__, err);
|
|
return 0;
|
|
}
|
|
|
|
void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
|
|
u64 vaddr,
|
|
u64 size,
|
|
int pgsz_idx,
|
|
bool va_allocated,
|
|
int rw_flag,
|
|
bool sparse,
|
|
struct vm_gk20a_mapping_batch *batch)
|
|
{
|
|
int err = 0;
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
|
|
if (va_allocated) {
|
|
err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
|
|
if (err) {
|
|
nvgpu_err(g, "failed to free va");
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* unmap here needs to know the page size we assigned at mapping */
|
|
err = update_gmmu_ptes_locked(vm,
|
|
pgsz_idx,
|
|
NULL, /* n/a for unmap */
|
|
0,
|
|
vaddr,
|
|
vaddr + size,
|
|
0, 0, false /* n/a for unmap */,
|
|
false, rw_flag,
|
|
sparse, 0,
|
|
APERTURE_INVALID); /* don't care for unmap */
|
|
if (err)
|
|
nvgpu_err(g, "failed to update gmmu ptes on unmap");
|
|
|
|
/* flush l2 so any dirty lines are written out *now*.
|
|
* also as we could potentially be switching this buffer
|
|
* from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
|
|
* some point in the future we need to invalidate l2. e.g. switching
|
|
* from a render buffer unmap (here) to later using the same memory
|
|
* for gmmu ptes. note the positioning of this relative to any smmu
|
|
* unmapping (below). */
|
|
|
|
if (!batch) {
|
|
gk20a_mm_l2_flush(g, true);
|
|
g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
|
|
} else {
|
|
if (!batch->gpu_l2_flushed) {
|
|
gk20a_mm_l2_flush(g, true);
|
|
batch->gpu_l2_flushed = true;
|
|
}
|
|
batch->need_tlb_invalidate = true;
|
|
}
|
|
}
|