mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
Move vm_priv.h to <nvgpu/linux/vm.h> and rename nvgpu_vm_map() to nvgpu_vm_map_linux(). Also remove a redundant unmap function from the unmap path. These changes are the beginning of reworking the nvgpu Linux mapping and unmapping code. The rest of this patch is just the necessary changes to use the new map function naming and the new path to the Linux vm header. Patch Series Goal ----------------- There's two major goals for this patch series. Note that these goals are not achieved in this patch. There will be subsequent patches. 1. Remove all last vestiges of Linux code from common/mm/vm.c 2. Implement map caching in the common/mm/vm.c code To accomplish this firstly the VM mapping code needs to have the struct nvgpu_mapped_buf data struct be completely Linux free. That means implementing an abstraction for this to hold the Linux stuff that mapped buffers carry about (SGT, dma_buf). This is why the vm_priv.h code has been moved: it will need to be included by the <nvgpu/vm.h> header so that the OS specific struct can be pulled into struct nvgpu_mapped_buf. Next renaming the nvgpu_vm_map() to nvgpu_vm_map_linux() is in preparation for adding a new nvgpu_vm_map() that handles the map caching with nvgpu_mapped_buf. The mapping code is fairly straight forward: nvgpu_vm_map does OS generic stuff; each OS then calls this function from an nvgpu_vm_map_<OS>() or the like that does any OS specific adjustments/management. Freeing buffers is much more tricky however. The maps are all reference counted since userspace does not track buffers and expects us to handle this instead. Ugh! Since there's ref-counts the free code will require a callback into the OS specific code since the OS specific code cannot free a buffer directly. THis make's the path for freeing a buffer quite convoluted. JIRA NVGPU-30 JIRA NVGPU-71 Change-Id: I5e0975f60663a0d6cf0a6bd90e099f51e02c2395 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1578896 GVS: Gerrit_Virtual_Submit Reviewed-by: David Martinez Nieto <dmartineznie@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
729 lines
19 KiB
C
729 lines
19 KiB
C
/*
|
|
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <linux/dma-buf.h>
|
|
#include <linux/scatterlist.h>
|
|
|
|
#include <nvgpu/log.h>
|
|
#include <nvgpu/lock.h>
|
|
#include <nvgpu/rbtree.h>
|
|
#include <nvgpu/vm_area.h>
|
|
#include <nvgpu/nvgpu_mem.h>
|
|
#include <nvgpu/page_allocator.h>
|
|
#include <nvgpu/vidmem.h>
|
|
|
|
#include <nvgpu/linux/vm.h>
|
|
#include <nvgpu/linux/vidmem.h>
|
|
#include <nvgpu/linux/nvgpu_mem.h>
|
|
|
|
#include "gk20a/gk20a.h"
|
|
#include "gk20a/mm_gk20a.h"
|
|
#include "gk20a/kind_gk20a.h"
|
|
#include "gk20a/platform_gk20a.h"
|
|
|
|
#include "os_linux.h"
|
|
#include "dmabuf.h"
|
|
|
|
static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
|
|
struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind)
|
|
{
|
|
struct nvgpu_rbtree_node *node = NULL;
|
|
struct nvgpu_rbtree_node *root = vm->mapped_buffers;
|
|
|
|
nvgpu_rbtree_enum_start(0, &node, root);
|
|
|
|
while (node) {
|
|
struct nvgpu_mapped_buf *mapped_buffer =
|
|
mapped_buffer_from_rbtree_node(node);
|
|
|
|
if (mapped_buffer->dmabuf == dmabuf &&
|
|
kind == mapped_buffer->kind)
|
|
return mapped_buffer;
|
|
|
|
nvgpu_rbtree_enum_next(&node, node);
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Determine alignment for a passed buffer. Necessary since the buffer may
|
|
* appear big to map with large pages but the SGL may have chunks that are not
|
|
* aligned on a 64/128kB large page boundary.
|
|
*/
|
|
static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
|
|
enum nvgpu_aperture aperture)
|
|
{
|
|
u64 align = 0, chunk_align = 0;
|
|
u64 buf_addr;
|
|
|
|
if (aperture == APERTURE_VIDMEM) {
|
|
struct nvgpu_page_alloc *alloc =
|
|
nvgpu_vidmem_get_page_alloc(sgl);
|
|
struct nvgpu_sgt *sgt = &alloc->sgt;
|
|
void *sgl_vid = sgt->sgl;
|
|
|
|
while (sgl_vid) {
|
|
chunk_align = 1ULL <<
|
|
__ffs(nvgpu_sgt_get_phys(sgt, sgl_vid)) |
|
|
nvgpu_sgt_get_length(sgt, sgl_vid);
|
|
|
|
if (align)
|
|
align = min(align, chunk_align);
|
|
else
|
|
align = chunk_align;
|
|
|
|
sgl_vid = nvgpu_sgt_get_next(sgt, sgl_vid);
|
|
}
|
|
|
|
return align;
|
|
}
|
|
|
|
buf_addr = (u64)sg_dma_address(sgl);
|
|
|
|
if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) {
|
|
while (sgl) {
|
|
buf_addr = (u64)sg_phys(sgl);
|
|
chunk_align = 1ULL << __ffs(buf_addr |
|
|
(u64)sgl->length);
|
|
|
|
if (align)
|
|
align = min(align, chunk_align);
|
|
else
|
|
align = chunk_align;
|
|
sgl = sg_next(sgl);
|
|
}
|
|
|
|
return align;
|
|
}
|
|
|
|
align = 1ULL << __ffs(buf_addr);
|
|
|
|
return align;
|
|
}
|
|
|
|
static int setup_kind_legacy(struct vm_gk20a *vm, struct buffer_attrs *bfr,
|
|
bool *pkind_compressible)
|
|
{
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
bool kind_compressible;
|
|
|
|
if (unlikely(bfr->kind_v == g->ops.mm.get_kind_invalid()))
|
|
bfr->kind_v = g->ops.mm.get_kind_pitch();
|
|
|
|
if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) {
|
|
nvgpu_err(g, "kind 0x%x not supported", bfr->kind_v);
|
|
return -EINVAL;
|
|
}
|
|
|
|
bfr->uc_kind_v = g->ops.mm.get_kind_invalid();
|
|
/* find a suitable incompressible kind if it becomes necessary later */
|
|
kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
|
|
if (kind_compressible) {
|
|
bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
|
|
if (unlikely(bfr->uc_kind_v == g->ops.mm.get_kind_invalid())) {
|
|
/* shouldn't happen, but it is worth cross-checking */
|
|
nvgpu_err(g, "comptag kind 0x%x can't be"
|
|
" downgraded to uncompressed kind",
|
|
bfr->kind_v);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
*pkind_compressible = kind_compressible;
|
|
return 0;
|
|
}
|
|
|
|
static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
|
|
u32 flags,
|
|
struct buffer_attrs *bfr,
|
|
enum gmmu_pgsz_gk20a pgsz_idx)
|
|
{
|
|
bool kind_compressible;
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
int ctag_granularity = g->ops.fb.compression_page_size(g);
|
|
|
|
if (!bfr->use_kind_v)
|
|
bfr->kind_v = g->ops.mm.get_kind_invalid();
|
|
if (!bfr->use_uc_kind_v)
|
|
bfr->uc_kind_v = g->ops.mm.get_kind_invalid();
|
|
|
|
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
|
|
kind_compressible = (bfr->kind_v !=
|
|
g->ops.mm.get_kind_invalid());
|
|
if (!kind_compressible)
|
|
bfr->kind_v = bfr->uc_kind_v;
|
|
} else {
|
|
int err = setup_kind_legacy(vm, bfr, &kind_compressible);
|
|
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
/* comptags only supported for suitable kinds, 128KB pagesize */
|
|
if (kind_compressible &&
|
|
vm->gmmu_page_sizes[pgsz_idx] <
|
|
g->ops.fb.compressible_page_size(g)) {
|
|
/* it is safe to fall back to uncompressed as
|
|
functionality is not harmed */
|
|
bfr->kind_v = bfr->uc_kind_v;
|
|
kind_compressible = false;
|
|
}
|
|
if (kind_compressible)
|
|
bfr->ctag_lines = DIV_ROUND_UP_ULL(bfr->size, ctag_granularity);
|
|
else
|
|
bfr->ctag_lines = 0;
|
|
|
|
bfr->use_kind_v = (bfr->kind_v != g->ops.mm.get_kind_invalid());
|
|
bfr->use_uc_kind_v = (bfr->uc_kind_v != g->ops.mm.get_kind_invalid());
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
|
|
struct dma_buf **dmabuf,
|
|
u64 *offset)
|
|
{
|
|
struct nvgpu_mapped_buf *mapped_buffer;
|
|
|
|
gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
|
|
|
|
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
|
|
|
mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va);
|
|
if (!mapped_buffer) {
|
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
|
return -EINVAL;
|
|
}
|
|
|
|
*dmabuf = mapped_buffer->dmabuf;
|
|
*offset = gpu_va - mapped_buffer->addr;
|
|
|
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* vm->update_gmmu_lock must be held. This checks to see if we already have
|
|
* mapped the passed buffer into this VM. If so, just return the existing
|
|
* mapping address.
|
|
*/
|
|
static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm,
|
|
struct dma_buf *dmabuf,
|
|
u64 offset_align,
|
|
u32 flags,
|
|
int kind,
|
|
bool user_mapped,
|
|
int rw_flag)
|
|
{
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
struct nvgpu_mapped_buf *mapped_buffer = NULL;
|
|
|
|
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
|
|
mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset_align);
|
|
if (!mapped_buffer)
|
|
return 0;
|
|
|
|
if (mapped_buffer->dmabuf != dmabuf ||
|
|
mapped_buffer->kind != (u32)kind)
|
|
return 0;
|
|
} else {
|
|
mapped_buffer =
|
|
__nvgpu_vm_find_mapped_buf_reverse(vm, dmabuf, kind);
|
|
if (!mapped_buffer)
|
|
return 0;
|
|
}
|
|
|
|
if (mapped_buffer->flags != flags)
|
|
return 0;
|
|
|
|
/* mark the buffer as used */
|
|
if (user_mapped) {
|
|
if (mapped_buffer->user_mapped == 0)
|
|
vm->num_user_mapped_buffers++;
|
|
mapped_buffer->user_mapped++;
|
|
|
|
/* If the mapping comes from user space, we own
|
|
* the handle ref. Since we reuse an
|
|
* existing mapping here, we need to give back those
|
|
* refs once in order not to leak.
|
|
*/
|
|
if (mapped_buffer->own_mem_ref)
|
|
dma_buf_put(mapped_buffer->dmabuf);
|
|
else
|
|
mapped_buffer->own_mem_ref = true;
|
|
}
|
|
nvgpu_ref_get(&mapped_buffer->ref);
|
|
|
|
nvgpu_log(g, gpu_dbg_map,
|
|
"gv: 0x%04x_%08x + 0x%-7zu "
|
|
"[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
|
|
"pgsz=%-3dKb as=%-2d ctags=%d start=%d "
|
|
"flags=0x%x apt=%s (reused)",
|
|
u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
|
|
dmabuf->size,
|
|
u64_hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
|
|
u64_lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
|
|
u64_hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
|
|
u64_lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
|
|
vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
|
|
vm_aspace_id(vm),
|
|
mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
|
|
mapped_buffer->flags,
|
|
nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf)));
|
|
|
|
return mapped_buffer->addr;
|
|
}
|
|
|
|
static int setup_bfr_kind_fields(struct buffer_attrs *bfr, s16 compr_kind,
|
|
s16 incompr_kind, u32 flags)
|
|
{
|
|
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
|
|
/* were we supplied with a kind in either parameter? */
|
|
if ((compr_kind < 0 || compr_kind >= NV_KIND_ATTR_SIZE) &&
|
|
(incompr_kind < 0 || incompr_kind >= NV_KIND_ATTR_SIZE))
|
|
return -EINVAL;
|
|
|
|
if (compr_kind != NV_KIND_INVALID) {
|
|
bfr->use_kind_v = true;
|
|
bfr->kind_v = (u8)compr_kind;
|
|
}
|
|
|
|
if (incompr_kind != NV_KIND_INVALID) {
|
|
bfr->use_uc_kind_v = true;
|
|
bfr->uc_kind_v = (u8)incompr_kind;
|
|
}
|
|
} else {
|
|
if (compr_kind < 0 || compr_kind >= NV_KIND_ATTR_SIZE)
|
|
return -EINVAL;
|
|
|
|
bfr->use_kind_v = true;
|
|
bfr->kind_v = (u8)compr_kind;
|
|
|
|
/*
|
|
* Note: setup_buffer_kind_and_compression() will
|
|
* figure out uc_kind_v or return an error
|
|
*/
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
u64 nvgpu_vm_map_linux(struct vm_gk20a *vm,
|
|
struct dma_buf *dmabuf,
|
|
u64 offset_align,
|
|
u32 flags,
|
|
s16 compr_kind,
|
|
s16 incompr_kind,
|
|
bool user_mapped,
|
|
int rw_flag,
|
|
u64 buffer_offset,
|
|
u64 mapping_size,
|
|
struct vm_gk20a_mapping_batch *batch)
|
|
{
|
|
struct gk20a *g = gk20a_from_vm(vm);
|
|
struct device *dev = dev_from_gk20a(g);
|
|
struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags;
|
|
struct nvgpu_mapped_buf *mapped_buffer = NULL;
|
|
bool va_allocated = false;
|
|
u64 map_offset = 0;
|
|
int err = 0;
|
|
struct buffer_attrs bfr = {NULL};
|
|
struct gk20a_comptags comptags;
|
|
bool clear_ctags = false;
|
|
struct scatterlist *sgl;
|
|
struct nvgpu_vm_area *vm_area = NULL;
|
|
u32 ctag_offset;
|
|
enum nvgpu_aperture aperture;
|
|
struct nvgpu_sgt *nvgpu_sgt;
|
|
|
|
/*
|
|
* The kind used as part of the key for map caching. HW may
|
|
* actually be programmed with the fallback kind in case the
|
|
* key kind is compressible but we're out of comptags.
|
|
*/
|
|
s16 map_key_kind;
|
|
|
|
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
|
|
if (compr_kind != NV_KIND_INVALID)
|
|
map_key_kind = compr_kind;
|
|
else
|
|
map_key_kind = incompr_kind;
|
|
} else {
|
|
map_key_kind = compr_kind;
|
|
}
|
|
|
|
if (user_mapped && vm->userspace_managed &&
|
|
!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
|
|
nvgpu_err(g, "non-fixed-offset mapping not available on "
|
|
"userspace managed address spaces");
|
|
return -EFAULT;
|
|
}
|
|
|
|
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
|
|
|
/* check if this buffer is already mapped */
|
|
if (!vm->userspace_managed) {
|
|
map_offset = __nvgpu_vm_find_mapping(
|
|
vm, dmabuf, offset_align,
|
|
flags, map_key_kind,
|
|
user_mapped, rw_flag);
|
|
if (map_offset) {
|
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
|
return map_offset;
|
|
}
|
|
}
|
|
|
|
/* pin buffer to get phys/iovmm addr */
|
|
bfr.sgt = gk20a_mm_pin(dev, dmabuf);
|
|
if (IS_ERR(bfr.sgt)) {
|
|
/* Falling back to physical is actually possible
|
|
* here in many cases if we use 4K phys pages in the
|
|
* gmmu. However we have some regions which require
|
|
* contig regions to work properly (either phys-contig
|
|
* or contig through smmu io_vaspace). Until we can
|
|
* track the difference between those two cases we have
|
|
* to fail the mapping when we run out of SMMU space.
|
|
*/
|
|
nvgpu_warn(g, "oom allocating tracking buffer");
|
|
goto clean_up;
|
|
}
|
|
|
|
err = setup_bfr_kind_fields(&bfr, compr_kind, incompr_kind, flags);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
bfr.size = dmabuf->size;
|
|
sgl = bfr.sgt->sgl;
|
|
|
|
aperture = gk20a_dmabuf_aperture(g, dmabuf);
|
|
if (aperture == APERTURE_INVALID) {
|
|
err = -EINVAL;
|
|
goto clean_up;
|
|
}
|
|
|
|
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
|
|
map_offset = offset_align;
|
|
|
|
bfr.align = nvgpu_get_buffer_alignment(g, sgl, aperture);
|
|
if (g->mm.disable_bigpage)
|
|
bfr.pgsz_idx = gmmu_page_size_small;
|
|
else
|
|
bfr.pgsz_idx = __get_pte_size(vm, map_offset,
|
|
min_t(u64, bfr.size, bfr.align));
|
|
mapping_size = mapping_size ? mapping_size : bfr.size;
|
|
mapping_size = ALIGN(mapping_size, SZ_4K);
|
|
|
|
if ((mapping_size > bfr.size) ||
|
|
(buffer_offset > (bfr.size - mapping_size))) {
|
|
err = -EINVAL;
|
|
goto clean_up;
|
|
}
|
|
|
|
/* Check if we should use a fixed offset for mapping this buffer */
|
|
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
|
|
err = nvgpu_vm_area_validate_buffer(vm, offset_align, mapping_size,
|
|
bfr.pgsz_idx, &vm_area);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
map_offset = offset_align;
|
|
va_allocated = false;
|
|
} else
|
|
va_allocated = true;
|
|
|
|
err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx);
|
|
if (unlikely(err)) {
|
|
nvgpu_err(g, "failure setting up kind and compression");
|
|
goto clean_up;
|
|
}
|
|
|
|
/* bar1 and pmu vm don't need ctag */
|
|
if (!vm->enable_ctag)
|
|
bfr.ctag_lines = 0;
|
|
|
|
gk20a_get_comptags(dev, dmabuf, &comptags);
|
|
|
|
/* ensure alignment to compression page size if compression enabled */
|
|
if (bfr.ctag_offset)
|
|
mapping_size = ALIGN(mapping_size,
|
|
g->ops.fb.compression_page_size(g));
|
|
|
|
if (bfr.ctag_lines && !comptags.lines) {
|
|
/* allocate compression resources if needed */
|
|
err = gk20a_alloc_comptags(g, dev, dmabuf,
|
|
ctag_allocator,
|
|
bfr.ctag_lines);
|
|
if (unlikely(err)) {
|
|
/* TBD: we can partially alloc ctags as well... */
|
|
if (bfr.use_uc_kind_v) {
|
|
/* no comptags, but fallback kind available */
|
|
bfr.kind_v = bfr.uc_kind_v;
|
|
} else {
|
|
nvgpu_err(g, "comptag alloc failed and no fallback kind specified");
|
|
goto clean_up;
|
|
}
|
|
} else {
|
|
gk20a_get_comptags(dev,
|
|
dmabuf, &comptags);
|
|
|
|
if (g->ops.ltc.cbc_ctrl)
|
|
g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
|
|
comptags.offset,
|
|
comptags.offset +
|
|
comptags.allocated_lines - 1);
|
|
else
|
|
clear_ctags = true;
|
|
}
|
|
}
|
|
|
|
/* store the comptag info */
|
|
bfr.ctag_offset = comptags.offset;
|
|
bfr.ctag_lines = comptags.lines;
|
|
bfr.ctag_allocated_lines = comptags.allocated_lines;
|
|
bfr.ctag_user_mappable = comptags.user_mappable;
|
|
|
|
/*
|
|
* Calculate comptag index for this mapping. Differs in
|
|
* case of partial mapping.
|
|
*/
|
|
ctag_offset = comptags.offset;
|
|
if (ctag_offset)
|
|
ctag_offset += buffer_offset >>
|
|
ilog2(g->ops.fb.compression_page_size(g));
|
|
|
|
nvgpu_sgt = nvgpu_linux_sgt_create(g, bfr.sgt);
|
|
|
|
/* update gmmu ptes */
|
|
map_offset = g->ops.mm.gmmu_map(vm,
|
|
map_offset,
|
|
nvgpu_sgt,
|
|
buffer_offset, /* sg offset */
|
|
mapping_size,
|
|
bfr.pgsz_idx,
|
|
bfr.kind_v,
|
|
ctag_offset,
|
|
flags, rw_flag,
|
|
clear_ctags,
|
|
false,
|
|
false,
|
|
batch,
|
|
aperture);
|
|
if (!map_offset)
|
|
goto clean_up;
|
|
|
|
nvgpu_sgt_free(nvgpu_sgt, g);
|
|
|
|
mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
|
|
if (!mapped_buffer) {
|
|
nvgpu_warn(g, "oom allocating tracking buffer");
|
|
goto clean_up;
|
|
}
|
|
mapped_buffer->dmabuf = dmabuf;
|
|
mapped_buffer->sgt = bfr.sgt;
|
|
mapped_buffer->addr = map_offset;
|
|
mapped_buffer->size = mapping_size;
|
|
mapped_buffer->pgsz_idx = bfr.pgsz_idx;
|
|
mapped_buffer->ctag_offset = bfr.ctag_offset;
|
|
mapped_buffer->ctag_lines = bfr.ctag_lines;
|
|
mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
|
|
mapped_buffer->vm = vm;
|
|
mapped_buffer->flags = flags;
|
|
mapped_buffer->kind = map_key_kind;
|
|
mapped_buffer->va_allocated = va_allocated;
|
|
mapped_buffer->user_mapped = user_mapped ? 1 : 0;
|
|
mapped_buffer->own_mem_ref = user_mapped;
|
|
nvgpu_init_list_node(&mapped_buffer->buffer_list);
|
|
nvgpu_ref_init(&mapped_buffer->ref);
|
|
|
|
err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
|
|
if (err) {
|
|
nvgpu_err(g, "failed to insert into mapped buffer tree");
|
|
goto clean_up;
|
|
}
|
|
if (user_mapped)
|
|
vm->num_user_mapped_buffers++;
|
|
|
|
if (vm_area) {
|
|
nvgpu_list_add_tail(&mapped_buffer->buffer_list,
|
|
&vm_area->buffer_list_head);
|
|
mapped_buffer->vm_area = vm_area;
|
|
}
|
|
|
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
|
|
|
return map_offset;
|
|
|
|
clean_up:
|
|
nvgpu_kfree(g, mapped_buffer);
|
|
if (va_allocated)
|
|
__nvgpu_vm_free_va(vm, map_offset, bfr.pgsz_idx);
|
|
if (!IS_ERR(bfr.sgt))
|
|
gk20a_mm_unpin(dev, dmabuf, bfr.sgt);
|
|
|
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
|
nvgpu_log_info(g, "err=%d", err);
|
|
return 0;
|
|
}
|
|
|
|
int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
|
|
int dmabuf_fd,
|
|
u64 *offset_align,
|
|
u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
|
|
s16 compr_kind,
|
|
s16 incompr_kind,
|
|
u64 buffer_offset,
|
|
u64 mapping_size,
|
|
struct vm_gk20a_mapping_batch *batch)
|
|
{
|
|
int err = 0;
|
|
struct dma_buf *dmabuf;
|
|
u64 ret_va;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
/* get ref to the mem handle (released on unmap_locked) */
|
|
dmabuf = dma_buf_get(dmabuf_fd);
|
|
if (IS_ERR(dmabuf)) {
|
|
nvgpu_warn(gk20a_from_vm(vm), "%s: fd %d is not a dmabuf",
|
|
__func__, dmabuf_fd);
|
|
return PTR_ERR(dmabuf);
|
|
}
|
|
|
|
/* verify that we're not overflowing the buffer, i.e.
|
|
* (buffer_offset + mapping_size)> dmabuf->size.
|
|
*
|
|
* Since buffer_offset + mapping_size could overflow, first check
|
|
* that mapping size < dmabuf_size, at which point we can subtract
|
|
* mapping_size from both sides for the final comparison.
|
|
*/
|
|
if ((mapping_size > dmabuf->size) ||
|
|
(buffer_offset > (dmabuf->size - mapping_size))) {
|
|
nvgpu_err(gk20a_from_vm(vm),
|
|
"buf size %llx < (offset(%llx) + map_size(%llx))\n",
|
|
(u64)dmabuf->size, buffer_offset, mapping_size);
|
|
return -EINVAL;
|
|
}
|
|
|
|
err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
|
|
if (err) {
|
|
dma_buf_put(dmabuf);
|
|
return err;
|
|
}
|
|
|
|
ret_va = nvgpu_vm_map_linux(vm, dmabuf, *offset_align,
|
|
flags, compr_kind, incompr_kind, true,
|
|
gk20a_mem_flag_none,
|
|
buffer_offset,
|
|
mapping_size,
|
|
batch);
|
|
|
|
*offset_align = ret_va;
|
|
if (!ret_va) {
|
|
dma_buf_put(dmabuf);
|
|
err = -EINVAL;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
|
|
struct vm_gk20a_mapping_batch *batch)
|
|
{
|
|
struct gk20a *g = vm->mm->g;
|
|
struct nvgpu_mapped_buf *mapped_buffer;
|
|
|
|
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
|
|
|
mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
|
|
if (!mapped_buffer) {
|
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
|
nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
|
|
return 0;
|
|
}
|
|
|
|
if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
|
|
struct nvgpu_timeout timeout;
|
|
|
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
|
|
|
nvgpu_timeout_init(vm->mm->g, &timeout, 10000,
|
|
NVGPU_TIMER_RETRY_TIMER);
|
|
do {
|
|
if (nvgpu_atomic_read(
|
|
&mapped_buffer->ref.refcount) == 1)
|
|
break;
|
|
nvgpu_udelay(5);
|
|
} while (!nvgpu_timeout_expired_msg(&timeout,
|
|
"sync-unmap failed on 0x%llx"));
|
|
|
|
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
|
}
|
|
|
|
if (mapped_buffer->user_mapped == 0) {
|
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
|
nvgpu_err(g, "addr already unmapped from user 0x%llx", offset);
|
|
return 0;
|
|
}
|
|
|
|
mapped_buffer->user_mapped--;
|
|
if (mapped_buffer->user_mapped == 0)
|
|
vm->num_user_mapped_buffers--;
|
|
|
|
vm->kref_put_batch = batch;
|
|
nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref);
|
|
vm->kref_put_batch = NULL;
|
|
|
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
|
return 0;
|
|
}
|
|
|
|
/* NOTE! mapped_buffers lock must be held */
|
|
void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
|
|
struct vm_gk20a_mapping_batch *batch)
|
|
{
|
|
struct vm_gk20a *vm = mapped_buffer->vm;
|
|
struct gk20a *g = vm->mm->g;
|
|
|
|
g->ops.mm.gmmu_unmap(vm,
|
|
mapped_buffer->addr,
|
|
mapped_buffer->size,
|
|
mapped_buffer->pgsz_idx,
|
|
mapped_buffer->va_allocated,
|
|
gk20a_mem_flag_none,
|
|
mapped_buffer->vm_area ?
|
|
mapped_buffer->vm_area->sparse : false,
|
|
batch);
|
|
|
|
gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
|
|
mapped_buffer->sgt);
|
|
|
|
/* remove from mapped buffer tree and remove list, free */
|
|
nvgpu_remove_mapped_buf(vm, mapped_buffer);
|
|
if (!nvgpu_list_empty(&mapped_buffer->buffer_list))
|
|
nvgpu_list_del(&mapped_buffer->buffer_list);
|
|
|
|
/* keep track of mapped buffers */
|
|
if (mapped_buffer->user_mapped)
|
|
vm->num_user_mapped_buffers--;
|
|
|
|
if (mapped_buffer->own_mem_ref)
|
|
dma_buf_put(mapped_buffer->dmabuf);
|
|
|
|
nvgpu_kfree(g, mapped_buffer);
|
|
}
|