mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: VM map path refactoring
Final VM mapping refactoring. Move most of the logic in the VM map path to the common/mm/vm.c code and use the generic APIs previously implemented to deal with comptags and map caching. This also updates the mapped_buffer struct to finally be free of the Linux dma_buf and scatter gather table pointers. This is replaced with the nvgpu_os_buffer struct. JIRA NVGPU-30 JIRA NVGPU-71 JIRA NVGPU-224 Change-Id: If5b32886221c3e5af2f3d7ddd4fa51dd487bb981 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1583987 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
8428c82c81
commit
01c98eb680
@@ -64,7 +64,7 @@ static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
|
||||
if (buffer)
|
||||
addr = nvgpu_mem_get_addr_sgl(g, buffer->sgt->sgl);
|
||||
addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl);
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
|
||||
return addr;
|
||||
|
||||
@@ -49,8 +49,8 @@ static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
|
||||
struct nvgpu_mapped_buf *mapped_buffer =
|
||||
mapped_buffer_from_rbtree_node(node);
|
||||
|
||||
if (mapped_buffer->dmabuf == dmabuf &&
|
||||
kind == mapped_buffer->kind)
|
||||
if (mapped_buffer->os_priv.dmabuf == dmabuf &&
|
||||
mapped_buffer->kind == kind)
|
||||
return mapped_buffer;
|
||||
|
||||
nvgpu_rbtree_enum_next(&node, node);
|
||||
@@ -75,7 +75,7 @@ int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*dmabuf = mapped_buffer->dmabuf;
|
||||
*dmabuf = mapped_buffer->os_priv.dmabuf;
|
||||
*offset = gpu_va - mapped_buffer->addr;
|
||||
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
@@ -83,66 +83,68 @@ int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf)
|
||||
{
|
||||
return os_buf->dmabuf->size;
|
||||
}
|
||||
|
||||
/*
|
||||
* vm->update_gmmu_lock must be held. This checks to see if we already have
|
||||
* mapped the passed buffer into this VM. If so, just return the existing
|
||||
* mapping address.
|
||||
*/
|
||||
static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm,
|
||||
struct dma_buf *dmabuf,
|
||||
u64 offset_align,
|
||||
struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
|
||||
struct nvgpu_os_buffer *os_buf,
|
||||
u64 map_addr,
|
||||
u32 flags,
|
||||
int kind,
|
||||
int rw_flag)
|
||||
int kind)
|
||||
{
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
struct nvgpu_mapped_buf *mapped_buffer = NULL;
|
||||
|
||||
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
|
||||
mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset_align);
|
||||
mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr);
|
||||
if (!mapped_buffer)
|
||||
return 0;
|
||||
return NULL;
|
||||
|
||||
if (mapped_buffer->dmabuf != dmabuf ||
|
||||
if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf ||
|
||||
mapped_buffer->kind != (u32)kind)
|
||||
return 0;
|
||||
return NULL;
|
||||
} else {
|
||||
mapped_buffer =
|
||||
__nvgpu_vm_find_mapped_buf_reverse(vm, dmabuf, kind);
|
||||
__nvgpu_vm_find_mapped_buf_reverse(vm,
|
||||
os_buf->dmabuf,
|
||||
kind);
|
||||
if (!mapped_buffer)
|
||||
return 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (mapped_buffer->flags != flags)
|
||||
return 0;
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* If we find the mapping here then that means we have mapped it already
|
||||
* and already have a dma_buf ref to the underlying buffer. As such
|
||||
* release the ref taken earlier in the map path.
|
||||
* and the prior pin and get must be undone.
|
||||
*/
|
||||
dma_buf_put(mapped_buffer->dmabuf);
|
||||
|
||||
nvgpu_ref_get(&mapped_buffer->ref);
|
||||
gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, mapped_buffer->os_priv.sgt);
|
||||
dma_buf_put(os_buf->dmabuf);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_map,
|
||||
"gv: 0x%04x_%08x + 0x%-7zu "
|
||||
"[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
|
||||
"[dma: 0x%010llx, pa: 0x%010llx] "
|
||||
"pgsz=%-3dKb as=%-2d ctags=%d start=%d "
|
||||
"flags=0x%x apt=%s (reused)",
|
||||
u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
|
||||
dmabuf->size,
|
||||
u64_hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
|
||||
u64_lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
|
||||
u64_hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
|
||||
u64_lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
|
||||
os_buf->dmabuf->size,
|
||||
(u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl),
|
||||
(u64)sg_phys(mapped_buffer->os_priv.sgt->sgl),
|
||||
vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
|
||||
vm_aspace_id(vm),
|
||||
mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
|
||||
mapped_buffer->flags,
|
||||
nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf)));
|
||||
nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
|
||||
|
||||
return mapped_buffer->addr;
|
||||
return mapped_buffer;
|
||||
}
|
||||
|
||||
int nvgpu_vm_map_linux(struct vm_gk20a *vm,
|
||||
@@ -159,237 +161,62 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
|
||||
{
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
struct nvgpu_ctag_buffer_info binfo = { 0 };
|
||||
struct gk20a_comptags comptags;
|
||||
struct nvgpu_vm_area *vm_area = NULL;
|
||||
struct nvgpu_sgt *nvgpu_sgt = NULL;
|
||||
struct sg_table *sgt;
|
||||
struct nvgpu_mapped_buf *mapped_buffer = NULL;
|
||||
struct nvgpu_os_buffer os_buf = { dmabuf, dev };
|
||||
enum nvgpu_aperture aperture;
|
||||
bool va_allocated = false;
|
||||
bool clear_ctags = false;
|
||||
u64 map_offset = 0;
|
||||
u64 align;
|
||||
u32 ctag_offset;
|
||||
struct sg_table *sgt;
|
||||
struct nvgpu_sgt *nvgpu_sgt = NULL;
|
||||
struct nvgpu_mapped_buf *mapped_buffer = NULL;
|
||||
u64 map_addr = 0ULL;
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* The kind used as part of the key for map caching. HW may
|
||||
* actually be programmed with the fallback kind in case the
|
||||
* key kind is compressible but we're out of comptags.
|
||||
*/
|
||||
s16 map_key_kind;
|
||||
|
||||
binfo.flags = flags;
|
||||
binfo.size = dmabuf->size;
|
||||
binfo.compr_kind = compr_kind;
|
||||
binfo.incompr_kind = incompr_kind;
|
||||
|
||||
if (compr_kind != NV_KIND_INVALID)
|
||||
map_key_kind = compr_kind;
|
||||
else
|
||||
map_key_kind = incompr_kind;
|
||||
|
||||
if (map_key_kind == NV_KIND_INVALID) {
|
||||
nvgpu_err(g, "Valid kind must be supplied");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (vm->userspace_managed &&
|
||||
!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
|
||||
nvgpu_err(g, "non-fixed-offset mapping not available on "
|
||||
"userspace managed address spaces");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
|
||||
/* check if this buffer is already mapped */
|
||||
if (!vm->userspace_managed) {
|
||||
map_offset = __nvgpu_vm_find_mapping(
|
||||
vm, dmabuf, offset_align,
|
||||
flags, map_key_kind, rw_flag);
|
||||
if (map_offset) {
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
*gpu_va = map_offset;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
|
||||
map_addr = offset_align;
|
||||
|
||||
sgt = gk20a_mm_pin(dev, dmabuf);
|
||||
if (IS_ERR(sgt)) {
|
||||
err = PTR_ERR(sgt);
|
||||
nvgpu_warn(g, "oom allocating tracking buffer");
|
||||
nvgpu_warn(g, "Failed to pin dma_buf!");
|
||||
return PTR_ERR(sgt);
|
||||
}
|
||||
|
||||
if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) {
|
||||
err = -EINVAL;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt);
|
||||
if (!nvgpu_sgt)
|
||||
goto clean_up;
|
||||
|
||||
aperture = gk20a_dmabuf_aperture(g, dmabuf);
|
||||
if (aperture == APERTURE_INVALID) {
|
||||
err = -EINVAL;
|
||||
if (!nvgpu_sgt) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
|
||||
map_offset = offset_align;
|
||||
|
||||
align = nvgpu_sgt_alignment(g, nvgpu_sgt);
|
||||
if (g->mm.disable_bigpage)
|
||||
binfo.pgsz_idx = gmmu_page_size_small;
|
||||
else
|
||||
binfo.pgsz_idx = __get_pte_size(vm, map_offset,
|
||||
min_t(u64, binfo.size, align));
|
||||
mapping_size = mapping_size ? mapping_size : binfo.size;
|
||||
mapping_size = ALIGN(mapping_size, SZ_4K);
|
||||
|
||||
if ((mapping_size > binfo.size) ||
|
||||
(buffer_offset > (binfo.size - mapping_size))) {
|
||||
err = -EINVAL;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
/* Check if we should use a fixed offset for mapping this buffer */
|
||||
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
|
||||
err = nvgpu_vm_area_validate_buffer(vm,
|
||||
offset_align,
|
||||
mapping_size,
|
||||
binfo.pgsz_idx,
|
||||
&vm_area);
|
||||
if (err)
|
||||
goto clean_up;
|
||||
|
||||
map_offset = offset_align;
|
||||
va_allocated = false;
|
||||
} else {
|
||||
va_allocated = true;
|
||||
}
|
||||
|
||||
err = nvgpu_vm_compute_compression(vm, &binfo);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failure setting up compression");
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
/* bar1 and pmu vm don't need ctag */
|
||||
if (!vm->enable_ctag)
|
||||
binfo.ctag_lines = 0;
|
||||
|
||||
gk20a_get_comptags(&os_buf, &comptags);
|
||||
|
||||
if (binfo.ctag_lines && !comptags.lines) {
|
||||
/* allocate compression resources if needed */
|
||||
err = gk20a_alloc_comptags(g, &os_buf,
|
||||
&g->gr.comp_tags,
|
||||
binfo.ctag_lines);
|
||||
if (err) {
|
||||
/* TBD: we can partially alloc ctags as well... */
|
||||
|
||||
/* prevent compression ... */
|
||||
binfo.compr_kind = NV_KIND_INVALID;
|
||||
|
||||
/* ... and make sure we have the fallback */
|
||||
if (binfo.incompr_kind == NV_KIND_INVALID) {
|
||||
nvgpu_err(g, "comptag alloc failed and no fallback kind specified");
|
||||
goto clean_up;
|
||||
}
|
||||
} else {
|
||||
gk20a_get_comptags(&os_buf, &comptags);
|
||||
|
||||
if (g->ops.ltc.cbc_ctrl)
|
||||
g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
|
||||
comptags.offset,
|
||||
comptags.offset +
|
||||
comptags.allocated_lines - 1);
|
||||
else
|
||||
clear_ctags = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate comptag index for this mapping. Differs in
|
||||
* case of partial mapping.
|
||||
*/
|
||||
ctag_offset = comptags.offset;
|
||||
if (ctag_offset)
|
||||
ctag_offset += buffer_offset >>
|
||||
ilog2(g->ops.fb.compression_page_size(g));
|
||||
|
||||
/* update gmmu ptes */
|
||||
map_offset = g->ops.mm.gmmu_map(vm,
|
||||
map_offset,
|
||||
mapped_buffer = nvgpu_vm_map(vm,
|
||||
&os_buf,
|
||||
nvgpu_sgt,
|
||||
buffer_offset, /* sg offset */
|
||||
map_addr,
|
||||
mapping_size,
|
||||
binfo.pgsz_idx,
|
||||
(binfo.compr_kind != NV_KIND_INVALID ?
|
||||
binfo.compr_kind : binfo.incompr_kind),
|
||||
ctag_offset,
|
||||
flags, rw_flag,
|
||||
clear_ctags,
|
||||
false,
|
||||
false,
|
||||
buffer_offset,
|
||||
rw_flag,
|
||||
flags,
|
||||
compr_kind,
|
||||
incompr_kind,
|
||||
batch,
|
||||
aperture);
|
||||
if (!map_offset)
|
||||
goto clean_up;
|
||||
gk20a_dmabuf_aperture(g, dmabuf));
|
||||
|
||||
nvgpu_sgt_free(g, nvgpu_sgt);
|
||||
|
||||
mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
|
||||
if (!mapped_buffer) {
|
||||
nvgpu_warn(g, "oom allocating tracking buffer");
|
||||
goto clean_up;
|
||||
}
|
||||
mapped_buffer->dmabuf = dmabuf;
|
||||
mapped_buffer->sgt = sgt;
|
||||
mapped_buffer->addr = map_offset;
|
||||
mapped_buffer->size = mapping_size;
|
||||
mapped_buffer->pgsz_idx = binfo.pgsz_idx;
|
||||
mapped_buffer->ctag_offset = ctag_offset;
|
||||
mapped_buffer->ctag_lines = binfo.ctag_lines;
|
||||
mapped_buffer->ctag_allocated_lines = comptags.allocated_lines;
|
||||
mapped_buffer->vm = vm;
|
||||
mapped_buffer->flags = flags;
|
||||
mapped_buffer->kind = map_key_kind;
|
||||
mapped_buffer->va_allocated = va_allocated;
|
||||
nvgpu_init_list_node(&mapped_buffer->buffer_list);
|
||||
nvgpu_ref_init(&mapped_buffer->ref);
|
||||
|
||||
err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to insert into mapped buffer tree");
|
||||
if (IS_ERR(mapped_buffer)) {
|
||||
err = PTR_ERR(mapped_buffer);
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
vm->num_user_mapped_buffers++;
|
||||
mapped_buffer->os_priv.dmabuf = dmabuf;
|
||||
mapped_buffer->os_priv.sgt = sgt;
|
||||
|
||||
if (vm_area) {
|
||||
nvgpu_list_add_tail(&mapped_buffer->buffer_list,
|
||||
&vm_area->buffer_list_head);
|
||||
mapped_buffer->vm_area = vm_area;
|
||||
}
|
||||
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
|
||||
*gpu_va = map_offset;
|
||||
*gpu_va = mapped_buffer->addr;
|
||||
return 0;
|
||||
|
||||
clean_up:
|
||||
nvgpu_kfree(g, mapped_buffer);
|
||||
|
||||
if (nvgpu_sgt)
|
||||
nvgpu_sgt_free(g, nvgpu_sgt);
|
||||
if (va_allocated)
|
||||
__nvgpu_vm_free_va(vm, map_offset, binfo.pgsz_idx);
|
||||
if (!IS_ERR(sgt))
|
||||
gk20a_mm_unpin(dev, dmabuf, sgt);
|
||||
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
nvgpu_log_info(g, "err=%d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -407,8 +234,6 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
|
||||
struct dma_buf *dmabuf;
|
||||
u64 ret_va;
|
||||
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
/* get ref to the mem handle (released on unmap_locked) */
|
||||
dmabuf = dma_buf_get(dmabuf_fd);
|
||||
if (IS_ERR(dmabuf)) {
|
||||
@@ -465,8 +290,8 @@ void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer)
|
||||
{
|
||||
struct vm_gk20a *vm = mapped_buffer->vm;
|
||||
|
||||
gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
|
||||
mapped_buffer->sgt);
|
||||
gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf,
|
||||
mapped_buffer->os_priv.sgt);
|
||||
|
||||
dma_buf_put(mapped_buffer->dmabuf);
|
||||
dma_buf_put(mapped_buffer->os_priv.dmabuf);
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/dma.h>
|
||||
#include <nvgpu/vm.h>
|
||||
@@ -712,6 +713,249 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
|
||||
nvgpu_big_free(vm->mm->g, mapped_buffers);
|
||||
}
|
||||
|
||||
struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
|
||||
struct nvgpu_os_buffer *os_buf,
|
||||
struct nvgpu_sgt *sgt,
|
||||
u64 map_addr,
|
||||
u64 map_size,
|
||||
u64 phys_offset,
|
||||
int rw,
|
||||
u32 flags,
|
||||
s16 compr_kind,
|
||||
s16 incompr_kind,
|
||||
struct vm_gk20a_mapping_batch *batch,
|
||||
enum nvgpu_aperture aperture)
|
||||
{
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
struct nvgpu_mapped_buf *mapped_buffer = NULL;
|
||||
struct nvgpu_ctag_buffer_info binfo = { 0 };
|
||||
struct gk20a_comptags comptags;
|
||||
struct nvgpu_vm_area *vm_area = NULL;
|
||||
int err = 0;
|
||||
u64 align;
|
||||
u32 ctag_offset;
|
||||
bool clear_ctags = false;
|
||||
bool va_allocated = true;
|
||||
|
||||
/*
|
||||
* The kind used as part of the key for map caching. HW may
|
||||
* actually be programmed with the fallback kind in case the
|
||||
* key kind is compressible but we're out of comptags.
|
||||
*/
|
||||
s16 map_key_kind;
|
||||
|
||||
if (vm->userspace_managed &&
|
||||
!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
|
||||
nvgpu_err(g,
|
||||
"non-fixed-offset mapping not available on "
|
||||
"userspace managed address spaces");
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
binfo.flags = flags;
|
||||
binfo.size = nvgpu_os_buf_get_size(os_buf);
|
||||
binfo.compr_kind = compr_kind;
|
||||
binfo.incompr_kind = incompr_kind;
|
||||
|
||||
if (compr_kind != NV_KIND_INVALID)
|
||||
map_key_kind = compr_kind;
|
||||
else
|
||||
map_key_kind = incompr_kind;
|
||||
|
||||
/*
|
||||
* Check if this buffer is already mapped.
|
||||
*/
|
||||
if (!vm->userspace_managed) {
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
mapped_buffer = nvgpu_vm_find_mapping(vm,
|
||||
os_buf,
|
||||
map_addr,
|
||||
flags,
|
||||
map_key_kind);
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
|
||||
if (mapped_buffer) {
|
||||
nvgpu_ref_get(&mapped_buffer->ref);
|
||||
return mapped_buffer;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a new mapping!
|
||||
*/
|
||||
mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
|
||||
if (!mapped_buffer) {
|
||||
nvgpu_warn(g, "oom allocating tracking buffer");
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
align = nvgpu_sgt_alignment(g, sgt);
|
||||
if (g->mm.disable_bigpage)
|
||||
binfo.pgsz_idx = gmmu_page_size_small;
|
||||
else
|
||||
binfo.pgsz_idx = __get_pte_size(vm, map_addr,
|
||||
min_t(u64, binfo.size, align));
|
||||
map_size = map_size ? map_size : binfo.size;
|
||||
map_size = ALIGN(map_size, SZ_4K);
|
||||
|
||||
if ((map_size > binfo.size) ||
|
||||
(phys_offset > (binfo.size - map_size))) {
|
||||
err = -EINVAL;
|
||||
goto clean_up_nolock;
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
|
||||
/*
|
||||
* Check if we should use a fixed offset for mapping this buffer.
|
||||
*/
|
||||
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
|
||||
err = nvgpu_vm_area_validate_buffer(vm,
|
||||
map_addr,
|
||||
map_size,
|
||||
binfo.pgsz_idx,
|
||||
&vm_area);
|
||||
if (err)
|
||||
goto clean_up;
|
||||
|
||||
va_allocated = false;
|
||||
}
|
||||
|
||||
err = nvgpu_vm_compute_compression(vm, &binfo);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failure setting up compression");
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
/*
|
||||
* bar1 and pmu VMs don't need ctags.
|
||||
*/
|
||||
if (!vm->enable_ctag)
|
||||
binfo.ctag_lines = 0;
|
||||
|
||||
gk20a_get_comptags(os_buf, &comptags);
|
||||
|
||||
if (binfo.ctag_lines && !comptags.lines) {
|
||||
/*
|
||||
* Allocate compression resources if needed.
|
||||
*/
|
||||
if (gk20a_alloc_comptags(g,
|
||||
os_buf,
|
||||
&g->gr.comp_tags,
|
||||
binfo.ctag_lines)) {
|
||||
|
||||
/*
|
||||
* Prevent compression...
|
||||
*/
|
||||
binfo.compr_kind = NV_KIND_INVALID;
|
||||
|
||||
/*
|
||||
* ... And make sure we have a fallback.
|
||||
*/
|
||||
if (binfo.incompr_kind == NV_KIND_INVALID) {
|
||||
nvgpu_err(g, "comptag alloc failed and no "
|
||||
"fallback kind specified");
|
||||
err = -ENOMEM;
|
||||
|
||||
/*
|
||||
* Any alloced comptags are cleaned up when the
|
||||
* dmabuf is freed.
|
||||
*/
|
||||
goto clean_up;
|
||||
}
|
||||
} else {
|
||||
gk20a_get_comptags(os_buf, &comptags);
|
||||
|
||||
if (g->ops.ltc.cbc_ctrl)
|
||||
g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
|
||||
comptags.offset,
|
||||
comptags.offset +
|
||||
comptags.allocated_lines - 1);
|
||||
else
|
||||
clear_ctags = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate comptag index for this mapping. Differs in case of partial
|
||||
* mapping.
|
||||
*/
|
||||
ctag_offset = comptags.offset;
|
||||
if (ctag_offset)
|
||||
ctag_offset += phys_offset >>
|
||||
ilog2(g->ops.fb.compression_page_size(g));
|
||||
|
||||
map_addr = g->ops.mm.gmmu_map(vm,
|
||||
map_addr,
|
||||
sgt,
|
||||
phys_offset,
|
||||
map_size,
|
||||
binfo.pgsz_idx,
|
||||
binfo.compr_kind != NV_KIND_INVALID ?
|
||||
binfo.compr_kind : binfo.incompr_kind,
|
||||
ctag_offset,
|
||||
flags,
|
||||
rw,
|
||||
clear_ctags,
|
||||
false,
|
||||
false,
|
||||
batch,
|
||||
aperture);
|
||||
if (!map_addr) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
nvgpu_init_list_node(&mapped_buffer->buffer_list);
|
||||
nvgpu_ref_init(&mapped_buffer->ref);
|
||||
mapped_buffer->addr = map_addr;
|
||||
mapped_buffer->size = map_size;
|
||||
mapped_buffer->pgsz_idx = binfo.pgsz_idx;
|
||||
mapped_buffer->ctag_offset = ctag_offset;
|
||||
mapped_buffer->ctag_lines = binfo.ctag_lines;
|
||||
mapped_buffer->ctag_allocated_lines = comptags.allocated_lines;
|
||||
mapped_buffer->vm = vm;
|
||||
mapped_buffer->flags = flags;
|
||||
mapped_buffer->kind = map_key_kind;
|
||||
mapped_buffer->va_allocated = va_allocated;
|
||||
mapped_buffer->vm_area = vm_area;
|
||||
|
||||
err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to insert into mapped buffer tree");
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
vm->num_user_mapped_buffers++;
|
||||
|
||||
if (vm_area) {
|
||||
nvgpu_list_add_tail(&mapped_buffer->buffer_list,
|
||||
&vm_area->buffer_list_head);
|
||||
mapped_buffer->vm_area = vm_area;
|
||||
}
|
||||
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
|
||||
return mapped_buffer;
|
||||
|
||||
clean_up:
|
||||
if (mapped_buffer->addr)
|
||||
g->ops.mm.gmmu_unmap(vm,
|
||||
mapped_buffer->addr,
|
||||
mapped_buffer->size,
|
||||
mapped_buffer->pgsz_idx,
|
||||
mapped_buffer->va_allocated,
|
||||
gk20a_mem_flag_none,
|
||||
mapped_buffer->vm_area ?
|
||||
mapped_buffer->vm_area->sparse : false,
|
||||
NULL);
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
clean_up_nolock:
|
||||
nvgpu_kfree(g, mapped_buffer);
|
||||
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Really unmap. This does the real GMMU unmap and removes the mapping from the
|
||||
* VM map tracking tree (and vm_area list if necessary).
|
||||
|
||||
@@ -44,6 +44,11 @@ struct nvgpu_os_buffer {
|
||||
struct device *dev;
|
||||
};
|
||||
|
||||
struct nvgpu_mapped_buf_priv {
|
||||
struct dma_buf *dmabuf;
|
||||
struct sg_table *sgt;
|
||||
};
|
||||
|
||||
/* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */
|
||||
int nvgpu_vm_map_linux(struct vm_gk20a *vm,
|
||||
struct dma_buf *dmabuf,
|
||||
|
||||
@@ -37,6 +37,18 @@ struct vm_gk20a;
|
||||
struct nvgpu_vm_area;
|
||||
struct gk20a_comptag_allocator;
|
||||
|
||||
/*
|
||||
* Defined by each OS. Allows the common VM code do things to the OS specific
|
||||
* buffer structures.
|
||||
*/
|
||||
struct nvgpu_os_buffer;
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <nvgpu/linux/vm.h>
|
||||
#else
|
||||
/* QNX include goes here. */
|
||||
#endif
|
||||
|
||||
/**
|
||||
* This header contains the OS agnostic APIs for dealing with VMs. Most of the
|
||||
* VM implementation is system specific - it must translate from a platform's
|
||||
@@ -89,13 +101,12 @@ struct nvgpu_mapped_buf {
|
||||
struct vm_gk20a *vm;
|
||||
struct nvgpu_vm_area *vm_area;
|
||||
|
||||
struct nvgpu_ref ref;
|
||||
|
||||
struct nvgpu_rbtree_node node;
|
||||
struct nvgpu_list_node buffer_list;
|
||||
u64 addr;
|
||||
u64 size;
|
||||
struct dma_buf *dmabuf;
|
||||
struct sg_table *sgt;
|
||||
struct nvgpu_ref ref;
|
||||
|
||||
u32 pgsz_idx;
|
||||
u32 ctag_offset;
|
||||
@@ -105,13 +116,16 @@ struct nvgpu_mapped_buf {
|
||||
u32 flags;
|
||||
u32 kind;
|
||||
bool va_allocated;
|
||||
};
|
||||
|
||||
/*
|
||||
* Defined by each OS. Allows the common VM code do things to the OS specific
|
||||
* buffer structures.
|
||||
* Separate from the nvgpu_os_buffer struct to clearly distinguish
|
||||
* lifetime. A nvgpu_mapped_buf_priv will _always_ be wrapped by a
|
||||
* struct nvgpu_mapped_buf; however, there are times when a struct
|
||||
* nvgpu_os_buffer would be separate. This aims to prevent dangerous
|
||||
* usage of container_of() or the like in OS code.
|
||||
*/
|
||||
struct nvgpu_os_buffer;
|
||||
struct nvgpu_mapped_buf_priv os_priv;
|
||||
};
|
||||
|
||||
static inline struct nvgpu_mapped_buf *
|
||||
nvgpu_mapped_buf_from_buffer_list(struct nvgpu_list_node *node)
|
||||
@@ -226,6 +240,25 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
|
||||
struct nvgpu_mapped_buf **mapped_buffers,
|
||||
int num_buffers);
|
||||
|
||||
struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
|
||||
struct nvgpu_os_buffer *os_buf,
|
||||
u64 map_addr,
|
||||
u32 flags,
|
||||
int kind);
|
||||
|
||||
struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
|
||||
struct nvgpu_os_buffer *os_buf,
|
||||
struct nvgpu_sgt *sgt,
|
||||
u64 map_addr,
|
||||
u64 map_size,
|
||||
u64 phys_offset,
|
||||
int rw,
|
||||
u32 flags,
|
||||
s16 compr_kind,
|
||||
s16 incompr_kind,
|
||||
struct vm_gk20a_mapping_batch *batch,
|
||||
enum nvgpu_aperture aperture);
|
||||
|
||||
void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
|
||||
struct vm_gk20a_mapping_batch *batch);
|
||||
|
||||
@@ -240,6 +273,8 @@ void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer);
|
||||
*/
|
||||
void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref);
|
||||
|
||||
u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf);
|
||||
|
||||
/*
|
||||
* These all require the VM update lock to be held.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user