mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: Refactoring nvgpu_vm functions
Refactor the last nvgpu_vm functions from the mm_gk20a.c code. This removes some usages of dma_buf from the mm_gk20a.c code, too, which helps make mm_gk20a.c less Linux specific. Also delete some header files that are no longer necessary in gk20a/mm_gk20a.c which are Linux specific. The mm_gk20a.c code is now quite close to being Linux free. JIRA NVGPU-30 JIRA NVGPU-138 Change-Id: I72b370bd85a7b029768b0fb4827d6abba42007c3 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1566629 Reviewed-by: Konsta Holtta <kholtta@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
b3446bc0b6
commit
0853109c99
@@ -115,6 +115,108 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
|
||||
return align;
|
||||
}
|
||||
|
||||
static int setup_kind_legacy(struct vm_gk20a *vm, struct buffer_attrs *bfr,
|
||||
bool *pkind_compressible)
|
||||
{
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
bool kind_compressible;
|
||||
|
||||
if (unlikely(bfr->kind_v == g->ops.mm.get_kind_invalid()))
|
||||
bfr->kind_v = g->ops.mm.get_kind_pitch();
|
||||
|
||||
if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) {
|
||||
nvgpu_err(g, "kind 0x%x not supported", bfr->kind_v);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
bfr->uc_kind_v = g->ops.mm.get_kind_invalid();
|
||||
/* find a suitable incompressible kind if it becomes necessary later */
|
||||
kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
|
||||
if (kind_compressible) {
|
||||
bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
|
||||
if (unlikely(bfr->uc_kind_v == g->ops.mm.get_kind_invalid())) {
|
||||
/* shouldn't happen, but it is worth cross-checking */
|
||||
nvgpu_err(g, "comptag kind 0x%x can't be"
|
||||
" downgraded to uncompressed kind",
|
||||
bfr->kind_v);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
*pkind_compressible = kind_compressible;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
|
||||
u32 flags,
|
||||
struct buffer_attrs *bfr,
|
||||
enum gmmu_pgsz_gk20a pgsz_idx)
|
||||
{
|
||||
bool kind_compressible;
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
int ctag_granularity = g->ops.fb.compression_page_size(g);
|
||||
|
||||
if (!bfr->use_kind_v)
|
||||
bfr->kind_v = g->ops.mm.get_kind_invalid();
|
||||
if (!bfr->use_uc_kind_v)
|
||||
bfr->uc_kind_v = g->ops.mm.get_kind_invalid();
|
||||
|
||||
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
|
||||
kind_compressible = (bfr->kind_v !=
|
||||
g->ops.mm.get_kind_invalid());
|
||||
if (!kind_compressible)
|
||||
bfr->kind_v = bfr->uc_kind_v;
|
||||
} else {
|
||||
int err = setup_kind_legacy(vm, bfr, &kind_compressible);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* comptags only supported for suitable kinds, 128KB pagesize */
|
||||
if (kind_compressible &&
|
||||
vm->gmmu_page_sizes[pgsz_idx] <
|
||||
g->ops.fb.compressible_page_size(g)) {
|
||||
/* it is safe to fall back to uncompressed as
|
||||
functionality is not harmed */
|
||||
bfr->kind_v = bfr->uc_kind_v;
|
||||
kind_compressible = false;
|
||||
}
|
||||
if (kind_compressible)
|
||||
bfr->ctag_lines = DIV_ROUND_UP_ULL(bfr->size, ctag_granularity);
|
||||
else
|
||||
bfr->ctag_lines = 0;
|
||||
|
||||
bfr->use_kind_v = (bfr->kind_v != g->ops.mm.get_kind_invalid());
|
||||
bfr->use_uc_kind_v = (bfr->uc_kind_v != g->ops.mm.get_kind_invalid());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
|
||||
struct dma_buf **dmabuf,
|
||||
u64 *offset)
|
||||
{
|
||||
struct nvgpu_mapped_buf *mapped_buffer;
|
||||
|
||||
gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
|
||||
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
|
||||
mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va);
|
||||
if (!mapped_buffer) {
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*dmabuf = mapped_buffer->dmabuf;
|
||||
*offset = gpu_va - mapped_buffer->addr;
|
||||
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* vm->update_gmmu_lock must be held. This checks to see if we already have
|
||||
* mapped the passed buffer into this VM. If so, just return the existing
|
||||
@@ -478,6 +580,67 @@ clean_up:
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
|
||||
int dmabuf_fd,
|
||||
u64 *offset_align,
|
||||
u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
|
||||
s16 compr_kind,
|
||||
s16 incompr_kind,
|
||||
u64 buffer_offset,
|
||||
u64 mapping_size,
|
||||
struct vm_gk20a_mapping_batch *batch)
|
||||
{
|
||||
int err = 0;
|
||||
struct dma_buf *dmabuf;
|
||||
u64 ret_va;
|
||||
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
/* get ref to the mem handle (released on unmap_locked) */
|
||||
dmabuf = dma_buf_get(dmabuf_fd);
|
||||
if (IS_ERR(dmabuf)) {
|
||||
nvgpu_warn(gk20a_from_vm(vm), "%s: fd %d is not a dmabuf",
|
||||
__func__, dmabuf_fd);
|
||||
return PTR_ERR(dmabuf);
|
||||
}
|
||||
|
||||
/* verify that we're not overflowing the buffer, i.e.
|
||||
* (buffer_offset + mapping_size)> dmabuf->size.
|
||||
*
|
||||
* Since buffer_offset + mapping_size could overflow, first check
|
||||
* that mapping size < dmabuf_size, at which point we can subtract
|
||||
* mapping_size from both sides for the final comparison.
|
||||
*/
|
||||
if ((mapping_size > dmabuf->size) ||
|
||||
(buffer_offset > (dmabuf->size - mapping_size))) {
|
||||
nvgpu_err(gk20a_from_vm(vm),
|
||||
"buf size %llx < (offset(%llx) + map_size(%llx))\n",
|
||||
(u64)dmabuf->size, buffer_offset, mapping_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
|
||||
if (err) {
|
||||
dma_buf_put(dmabuf);
|
||||
return err;
|
||||
}
|
||||
|
||||
ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align,
|
||||
flags, compr_kind, incompr_kind, true,
|
||||
gk20a_mem_flag_none,
|
||||
buffer_offset,
|
||||
mapping_size,
|
||||
batch);
|
||||
|
||||
*offset_align = ret_va;
|
||||
if (!ret_va) {
|
||||
dma_buf_put(dmabuf);
|
||||
err = -EINVAL;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
|
||||
{
|
||||
struct gk20a *g = vm->mm->g;
|
||||
@@ -491,6 +654,43 @@ void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
|
||||
return;
|
||||
}
|
||||
|
||||
nvgpu_ref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_ref);
|
||||
nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref);
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
}
|
||||
|
||||
/* NOTE! mapped_buffers lock must be held */
|
||||
void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
|
||||
struct vm_gk20a_mapping_batch *batch)
|
||||
{
|
||||
struct vm_gk20a *vm = mapped_buffer->vm;
|
||||
struct gk20a *g = vm->mm->g;
|
||||
|
||||
g->ops.mm.gmmu_unmap(vm,
|
||||
mapped_buffer->addr,
|
||||
mapped_buffer->size,
|
||||
mapped_buffer->pgsz_idx,
|
||||
mapped_buffer->va_allocated,
|
||||
gk20a_mem_flag_none,
|
||||
mapped_buffer->vm_area ?
|
||||
mapped_buffer->vm_area->sparse : false,
|
||||
batch);
|
||||
|
||||
gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
|
||||
mapped_buffer->sgt);
|
||||
|
||||
/* remove from mapped buffer tree and remove list, free */
|
||||
nvgpu_remove_mapped_buf(vm, mapped_buffer);
|
||||
if (!nvgpu_list_empty(&mapped_buffer->buffer_list))
|
||||
nvgpu_list_del(&mapped_buffer->buffer_list);
|
||||
|
||||
/* keep track of mapped buffers */
|
||||
if (mapped_buffer->user_mapped)
|
||||
vm->num_user_mapped_buffers--;
|
||||
|
||||
if (mapped_buffer->own_mem_ref)
|
||||
dma_buf_put(mapped_buffer->dmabuf);
|
||||
|
||||
nvgpu_kfree(g, mapped_buffer);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -88,9 +88,9 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
|
||||
void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset);
|
||||
|
||||
/* find buffer corresponding to va */
|
||||
int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
|
||||
struct dma_buf **dmabuf,
|
||||
u64 *offset);
|
||||
int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
|
||||
struct dma_buf **dmabuf,
|
||||
u64 *offset);
|
||||
|
||||
enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
|
||||
struct dma_buf *dmabuf);
|
||||
@@ -98,9 +98,5 @@ int validate_fixed_buffer(struct vm_gk20a *vm,
|
||||
struct buffer_attrs *bfr,
|
||||
u64 map_offset, u64 map_size,
|
||||
struct nvgpu_vm_area **pva_node);
|
||||
int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
|
||||
u32 flags,
|
||||
struct buffer_attrs *bfr,
|
||||
enum gmmu_pgsz_gk20a pgsz_idx);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -641,3 +641,137 @@ struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than(
|
||||
|
||||
return mapped_buffer_from_rbtree_node(node);
|
||||
}
|
||||
|
||||
int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
|
||||
struct nvgpu_mapped_buf ***mapped_buffers,
|
||||
int *num_buffers)
|
||||
{
|
||||
struct nvgpu_mapped_buf *mapped_buffer;
|
||||
struct nvgpu_mapped_buf **buffer_list;
|
||||
struct nvgpu_rbtree_node *node = NULL;
|
||||
int i = 0;
|
||||
|
||||
if (vm->userspace_managed) {
|
||||
*mapped_buffers = NULL;
|
||||
*num_buffers = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
|
||||
buffer_list = nvgpu_big_zalloc(vm->mm->g, sizeof(*buffer_list) *
|
||||
vm->num_user_mapped_buffers);
|
||||
if (!buffer_list) {
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
|
||||
while (node) {
|
||||
mapped_buffer = mapped_buffer_from_rbtree_node(node);
|
||||
if (mapped_buffer->user_mapped) {
|
||||
buffer_list[i] = mapped_buffer;
|
||||
nvgpu_ref_get(&mapped_buffer->ref);
|
||||
i++;
|
||||
}
|
||||
nvgpu_rbtree_enum_next(&node, node);
|
||||
}
|
||||
|
||||
BUG_ON(i != vm->num_user_mapped_buffers);
|
||||
|
||||
*num_buffers = vm->num_user_mapped_buffers;
|
||||
*mapped_buffers = buffer_list;
|
||||
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_vm_unmap_locked_ref(struct nvgpu_ref *ref)
|
||||
{
|
||||
struct nvgpu_mapped_buf *mapped_buffer =
|
||||
container_of(ref, struct nvgpu_mapped_buf, ref);
|
||||
nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
|
||||
}
|
||||
|
||||
void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
|
||||
struct nvgpu_mapped_buf **mapped_buffers,
|
||||
int num_buffers)
|
||||
{
|
||||
int i;
|
||||
struct vm_gk20a_mapping_batch batch;
|
||||
|
||||
if (num_buffers == 0)
|
||||
return;
|
||||
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
nvgpu_vm_mapping_batch_start(&batch);
|
||||
vm->kref_put_batch = &batch;
|
||||
|
||||
for (i = 0; i < num_buffers; ++i)
|
||||
nvgpu_ref_put(&mapped_buffers[i]->ref,
|
||||
nvgpu_vm_unmap_locked_ref);
|
||||
|
||||
vm->kref_put_batch = NULL;
|
||||
nvgpu_vm_mapping_batch_finish_locked(vm, &batch);
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
|
||||
nvgpu_big_free(vm->mm->g, mapped_buffers);
|
||||
}
|
||||
|
||||
static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
|
||||
struct vm_gk20a_mapping_batch *batch)
|
||||
{
|
||||
struct gk20a *g = vm->mm->g;
|
||||
struct nvgpu_mapped_buf *mapped_buffer;
|
||||
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
|
||||
mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
|
||||
if (!mapped_buffer) {
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
|
||||
return;
|
||||
}
|
||||
|
||||
if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
|
||||
struct nvgpu_timeout timeout;
|
||||
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
|
||||
nvgpu_timeout_init(vm->mm->g, &timeout, 10000,
|
||||
NVGPU_TIMER_RETRY_TIMER);
|
||||
do {
|
||||
if (nvgpu_atomic_read(
|
||||
&mapped_buffer->ref.refcount) == 1)
|
||||
break;
|
||||
nvgpu_udelay(5);
|
||||
} while (!nvgpu_timeout_expired_msg(&timeout,
|
||||
"sync-unmap failed on 0x%llx"));
|
||||
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
}
|
||||
|
||||
if (mapped_buffer->user_mapped == 0) {
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
nvgpu_err(g, "addr already unmapped from user 0x%llx", offset);
|
||||
return;
|
||||
}
|
||||
|
||||
mapped_buffer->user_mapped--;
|
||||
if (mapped_buffer->user_mapped == 0)
|
||||
vm->num_user_mapped_buffers--;
|
||||
|
||||
vm->kref_put_batch = batch;
|
||||
nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref);
|
||||
vm->kref_put_batch = NULL;
|
||||
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
}
|
||||
|
||||
int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
|
||||
struct vm_gk20a_mapping_batch *batch)
|
||||
{
|
||||
nvgpu_vm_unmap_user(vm, offset, batch);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -208,7 +208,7 @@ int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr)
|
||||
&vm_area->buffer_list_head,
|
||||
nvgpu_mapped_buf, buffer_list) {
|
||||
nvgpu_list_del(&buffer->buffer_list);
|
||||
nvgpu_ref_put(&buffer->ref, gk20a_vm_unmap_locked_ref);
|
||||
nvgpu_ref_put(&buffer->ref, nvgpu_vm_unmap_locked_ref);
|
||||
}
|
||||
|
||||
/* if this was a sparse mapping, free the va */
|
||||
|
||||
@@ -759,6 +759,8 @@ struct gpu_ops {
|
||||
struct vm_gk20a *vm, u32 big_page_size);
|
||||
bool (*mmu_fault_pending)(struct gk20a *g);
|
||||
void (*fault_info_mem_destroy)(struct gk20a *g);
|
||||
u32 (*get_kind_invalid)(void);
|
||||
u32 (*get_kind_pitch)(void);
|
||||
} mm;
|
||||
/*
|
||||
* This function is called to allocate secure memory (memory
|
||||
|
||||
@@ -22,11 +22,6 @@
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/dma-attrs.h>
|
||||
#include <linux/lcm.h>
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
#include <trace/events/gk20a.h>
|
||||
|
||||
#include <nvgpu/vm.h>
|
||||
@@ -46,8 +41,6 @@
|
||||
#include <nvgpu/enabled.h>
|
||||
#include <nvgpu/vidmem.h>
|
||||
|
||||
#include <nvgpu/linux/dma.h>
|
||||
|
||||
#include "gk20a.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include "mm_gk20a.h"
|
||||
@@ -64,13 +57,6 @@
|
||||
#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h>
|
||||
|
||||
/*
|
||||
* Necessary while transitioning to less coupled code. Will be removed once
|
||||
* all the common APIs no longers have Linux stuff in them.
|
||||
*/
|
||||
#include "common/linux/vm_priv.h"
|
||||
#include "common/linux/dmabuf.h"
|
||||
|
||||
/*
|
||||
* GPU mapping life cycle
|
||||
* ======================
|
||||
@@ -330,209 +316,6 @@ int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
|
||||
return vm->mmu_levels[0].lo_bit[0];
|
||||
}
|
||||
|
||||
int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
|
||||
struct nvgpu_mapped_buf ***mapped_buffers,
|
||||
int *num_buffers)
|
||||
{
|
||||
struct nvgpu_mapped_buf *mapped_buffer;
|
||||
struct nvgpu_mapped_buf **buffer_list;
|
||||
struct nvgpu_rbtree_node *node = NULL;
|
||||
int i = 0;
|
||||
|
||||
if (vm->userspace_managed) {
|
||||
*mapped_buffers = NULL;
|
||||
*num_buffers = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
|
||||
buffer_list = nvgpu_big_zalloc(vm->mm->g, sizeof(*buffer_list) *
|
||||
vm->num_user_mapped_buffers);
|
||||
if (!buffer_list) {
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
|
||||
while (node) {
|
||||
mapped_buffer = mapped_buffer_from_rbtree_node(node);
|
||||
if (mapped_buffer->user_mapped) {
|
||||
buffer_list[i] = mapped_buffer;
|
||||
nvgpu_ref_get(&mapped_buffer->ref);
|
||||
i++;
|
||||
}
|
||||
nvgpu_rbtree_enum_next(&node, node);
|
||||
}
|
||||
|
||||
BUG_ON(i != vm->num_user_mapped_buffers);
|
||||
|
||||
*num_buffers = vm->num_user_mapped_buffers;
|
||||
*mapped_buffers = buffer_list;
|
||||
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gk20a_vm_unmap_locked_ref(struct nvgpu_ref *ref)
|
||||
{
|
||||
struct nvgpu_mapped_buf *mapped_buffer =
|
||||
container_of(ref, struct nvgpu_mapped_buf, ref);
|
||||
nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
|
||||
}
|
||||
|
||||
void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
|
||||
struct nvgpu_mapped_buf **mapped_buffers,
|
||||
int num_buffers)
|
||||
{
|
||||
int i;
|
||||
struct vm_gk20a_mapping_batch batch;
|
||||
|
||||
if (num_buffers == 0)
|
||||
return;
|
||||
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
nvgpu_vm_mapping_batch_start(&batch);
|
||||
vm->kref_put_batch = &batch;
|
||||
|
||||
for (i = 0; i < num_buffers; ++i)
|
||||
nvgpu_ref_put(&mapped_buffers[i]->ref,
|
||||
gk20a_vm_unmap_locked_ref);
|
||||
|
||||
vm->kref_put_batch = NULL;
|
||||
nvgpu_vm_mapping_batch_finish_locked(vm, &batch);
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
|
||||
nvgpu_big_free(vm->mm->g, mapped_buffers);
|
||||
}
|
||||
|
||||
static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
|
||||
struct vm_gk20a_mapping_batch *batch)
|
||||
{
|
||||
struct gk20a *g = vm->mm->g;
|
||||
struct nvgpu_mapped_buf *mapped_buffer;
|
||||
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
|
||||
mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
|
||||
if (!mapped_buffer) {
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
|
||||
return;
|
||||
}
|
||||
|
||||
if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
|
||||
struct nvgpu_timeout timeout;
|
||||
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
|
||||
nvgpu_timeout_init(vm->mm->g, &timeout, 10000,
|
||||
NVGPU_TIMER_RETRY_TIMER);
|
||||
do {
|
||||
if (nvgpu_atomic_read(
|
||||
&mapped_buffer->ref.refcount) == 1)
|
||||
break;
|
||||
nvgpu_udelay(5);
|
||||
} while (!nvgpu_timeout_expired_msg(&timeout,
|
||||
"sync-unmap failed on 0x%llx"));
|
||||
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
}
|
||||
|
||||
if (mapped_buffer->user_mapped == 0) {
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
nvgpu_err(g, "addr already unmapped from user 0x%llx", offset);
|
||||
return;
|
||||
}
|
||||
|
||||
mapped_buffer->user_mapped--;
|
||||
if (mapped_buffer->user_mapped == 0)
|
||||
vm->num_user_mapped_buffers--;
|
||||
|
||||
vm->kref_put_batch = batch;
|
||||
nvgpu_ref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_ref);
|
||||
vm->kref_put_batch = NULL;
|
||||
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
}
|
||||
|
||||
static int setup_kind_legacy(struct vm_gk20a *vm, struct buffer_attrs *bfr,
|
||||
bool *pkind_compressible)
|
||||
{
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
bool kind_compressible;
|
||||
|
||||
if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
|
||||
bfr->kind_v = gmmu_pte_kind_pitch_v();
|
||||
|
||||
if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) {
|
||||
nvgpu_err(g, "kind 0x%x not supported", bfr->kind_v);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
|
||||
/* find a suitable incompressible kind if it becomes necessary later */
|
||||
kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
|
||||
if (kind_compressible) {
|
||||
bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
|
||||
if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) {
|
||||
/* shouldn't happen, but it is worth cross-checking */
|
||||
nvgpu_err(g, "comptag kind 0x%x can't be"
|
||||
" downgraded to uncompressed kind",
|
||||
bfr->kind_v);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
*pkind_compressible = kind_compressible;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
|
||||
u32 flags,
|
||||
struct buffer_attrs *bfr,
|
||||
enum gmmu_pgsz_gk20a pgsz_idx)
|
||||
{
|
||||
bool kind_compressible;
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
int ctag_granularity = g->ops.fb.compression_page_size(g);
|
||||
|
||||
if (!bfr->use_kind_v)
|
||||
bfr->kind_v = gmmu_pte_kind_invalid_v();
|
||||
if (!bfr->use_uc_kind_v)
|
||||
bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
|
||||
|
||||
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
|
||||
kind_compressible = (bfr->kind_v != gmmu_pte_kind_invalid_v());
|
||||
if (!kind_compressible)
|
||||
bfr->kind_v = bfr->uc_kind_v;
|
||||
} else {
|
||||
int err = setup_kind_legacy(vm, bfr, &kind_compressible);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* comptags only supported for suitable kinds, 128KB pagesize */
|
||||
if (kind_compressible &&
|
||||
vm->gmmu_page_sizes[pgsz_idx] < g->ops.fb.compressible_page_size(g)) {
|
||||
/* it is safe to fall back to uncompressed as
|
||||
functionality is not harmed */
|
||||
bfr->kind_v = bfr->uc_kind_v;
|
||||
kind_compressible = false;
|
||||
}
|
||||
if (kind_compressible)
|
||||
bfr->ctag_lines = DIV_ROUND_UP_ULL(bfr->size, ctag_granularity);
|
||||
else
|
||||
bfr->ctag_lines = 0;
|
||||
|
||||
bfr->use_kind_v = (bfr->kind_v != gmmu_pte_kind_invalid_v());
|
||||
bfr->use_uc_kind_v = (bfr->uc_kind_v != gmmu_pte_kind_invalid_v());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* for gk20a the "video memory" apertures here are misnomers. */
|
||||
static inline u32 big_valid_pde0_bits(struct gk20a *g,
|
||||
struct nvgpu_gmmu_pd *pd, u64 addr)
|
||||
@@ -698,43 +481,6 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
|
||||
pd_write(g, pd, pd_offset + 1, pte_w[1]);
|
||||
}
|
||||
|
||||
/* NOTE! mapped_buffers lock must be held */
|
||||
void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
|
||||
struct vm_gk20a_mapping_batch *batch)
|
||||
{
|
||||
struct vm_gk20a *vm = mapped_buffer->vm;
|
||||
struct gk20a *g = vm->mm->g;
|
||||
|
||||
g->ops.mm.gmmu_unmap(vm,
|
||||
mapped_buffer->addr,
|
||||
mapped_buffer->size,
|
||||
mapped_buffer->pgsz_idx,
|
||||
mapped_buffer->va_allocated,
|
||||
gk20a_mem_flag_none,
|
||||
mapped_buffer->vm_area ?
|
||||
mapped_buffer->vm_area->sparse : false,
|
||||
batch);
|
||||
|
||||
gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
|
||||
mapped_buffer->sgt);
|
||||
|
||||
/* remove from mapped buffer tree and remove list, free */
|
||||
nvgpu_remove_mapped_buf(vm, mapped_buffer);
|
||||
if (!nvgpu_list_empty(&mapped_buffer->buffer_list))
|
||||
nvgpu_list_del(&mapped_buffer->buffer_list);
|
||||
|
||||
/* keep track of mapped buffers */
|
||||
if (mapped_buffer->user_mapped)
|
||||
vm->num_user_mapped_buffers--;
|
||||
|
||||
if (mapped_buffer->own_mem_ref)
|
||||
dma_buf_put(mapped_buffer->dmabuf);
|
||||
|
||||
nvgpu_kfree(g, mapped_buffer);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
|
||||
{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
|
||||
.lo_bit = {26, 26},
|
||||
@@ -853,76 +599,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
|
||||
return __gk20a_vm_bind_channel(as_share->vm, ch);
|
||||
}
|
||||
|
||||
int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
|
||||
int dmabuf_fd,
|
||||
u64 *offset_align,
|
||||
u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
|
||||
s16 compr_kind,
|
||||
s16 incompr_kind,
|
||||
u64 buffer_offset,
|
||||
u64 mapping_size,
|
||||
struct vm_gk20a_mapping_batch *batch)
|
||||
{
|
||||
int err = 0;
|
||||
struct dma_buf *dmabuf;
|
||||
u64 ret_va;
|
||||
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
/* get ref to the mem handle (released on unmap_locked) */
|
||||
dmabuf = dma_buf_get(dmabuf_fd);
|
||||
if (IS_ERR(dmabuf)) {
|
||||
nvgpu_warn(gk20a_from_vm(vm), "%s: fd %d is not a dmabuf",
|
||||
__func__, dmabuf_fd);
|
||||
return PTR_ERR(dmabuf);
|
||||
}
|
||||
|
||||
/* verify that we're not overflowing the buffer, i.e.
|
||||
* (buffer_offset + mapping_size)> dmabuf->size.
|
||||
*
|
||||
* Since buffer_offset + mapping_size could overflow, first check
|
||||
* that mapping size < dmabuf_size, at which point we can subtract
|
||||
* mapping_size from both sides for the final comparison.
|
||||
*/
|
||||
if ((mapping_size > dmabuf->size) ||
|
||||
(buffer_offset > (dmabuf->size - mapping_size))) {
|
||||
nvgpu_err(gk20a_from_vm(vm),
|
||||
"buf size %llx < (offset(%llx) + map_size(%llx))\n",
|
||||
(u64)dmabuf->size, buffer_offset, mapping_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
|
||||
if (err) {
|
||||
dma_buf_put(dmabuf);
|
||||
return err;
|
||||
}
|
||||
|
||||
ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align,
|
||||
flags, compr_kind, incompr_kind, true,
|
||||
gk20a_mem_flag_none,
|
||||
buffer_offset,
|
||||
mapping_size,
|
||||
batch);
|
||||
|
||||
*offset_align = ret_va;
|
||||
if (!ret_va) {
|
||||
dma_buf_put(dmabuf);
|
||||
err = -EINVAL;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
|
||||
struct vm_gk20a_mapping_batch *batch)
|
||||
{
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
nvgpu_vm_unmap_user(vm, offset, batch);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
|
||||
{
|
||||
int err;
|
||||
@@ -1298,30 +974,6 @@ hw_was_off:
|
||||
gk20a_idle_nosuspend(g);
|
||||
}
|
||||
|
||||
int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
|
||||
struct dma_buf **dmabuf,
|
||||
u64 *offset)
|
||||
{
|
||||
struct nvgpu_mapped_buf *mapped_buffer;
|
||||
|
||||
gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
|
||||
|
||||
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
|
||||
|
||||
mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va);
|
||||
if (!mapped_buffer) {
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*dmabuf = mapped_buffer->dmabuf;
|
||||
*offset = gpu_va - mapped_buffer->addr;
|
||||
|
||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gk20a_mm_suspend(struct gk20a *g)
|
||||
{
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
@@ -386,6 +386,5 @@ extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
|
||||
|
||||
int gk20a_mm_get_buffer_info(struct device *dev, int dmabuf_fd,
|
||||
u64 *buffer_id, u64 *buffer_len);
|
||||
void gk20a_vm_unmap_locked_ref(struct nvgpu_ref *ref);
|
||||
|
||||
#endif /* MM_GK20A_H */
|
||||
|
||||
@@ -443,6 +443,8 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.is_bar1_supported = gm20b_mm_is_bar1_supported,
|
||||
.init_inst_block = gk20a_init_inst_block,
|
||||
.mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
|
||||
.get_kind_invalid = gm20b_get_kind_invalid,
|
||||
.get_kind_pitch = gm20b_get_kind_pitch,
|
||||
},
|
||||
.therm = {
|
||||
.init_therm_setup_hw = gm20b_init_therm_setup_hw,
|
||||
|
||||
@@ -74,3 +74,13 @@ u64 gm20b_gpu_phys_addr(struct gk20a *g,
|
||||
{
|
||||
return phys;
|
||||
}
|
||||
|
||||
u32 gm20b_get_kind_invalid(void)
|
||||
{
|
||||
return gmmu_pte_kind_invalid_v();
|
||||
}
|
||||
|
||||
u32 gm20b_get_kind_pitch(void)
|
||||
{
|
||||
return gmmu_pte_kind_pitch_v();
|
||||
}
|
||||
|
||||
@@ -38,4 +38,6 @@ bool gm20b_mm_is_bar1_supported(struct gk20a *g);
|
||||
int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g);
|
||||
u64 gm20b_gpu_phys_addr(struct gk20a *g,
|
||||
struct nvgpu_gmmu_attrs *attrs, u64 phys);
|
||||
u32 gm20b_get_kind_invalid(void);
|
||||
u32 gm20b_get_kind_pitch(void);
|
||||
#endif
|
||||
|
||||
@@ -529,6 +529,8 @@ static const struct gpu_ops gp106_ops = {
|
||||
.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup,
|
||||
.remove_bar2_vm = gp10b_remove_bar2_vm,
|
||||
.get_vidmem_size = gp106_mm_get_vidmem_size,
|
||||
.get_kind_invalid = gm20b_get_kind_invalid,
|
||||
.get_kind_pitch = gm20b_get_kind_pitch,
|
||||
},
|
||||
.pramin = {
|
||||
.enter = gk20a_pramin_enter,
|
||||
|
||||
@@ -488,6 +488,8 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.init_bar2_vm = gb10b_init_bar2_vm,
|
||||
.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup,
|
||||
.remove_bar2_vm = gp10b_remove_bar2_vm,
|
||||
.get_kind_invalid = gm20b_get_kind_invalid,
|
||||
.get_kind_pitch = gm20b_get_kind_pitch,
|
||||
},
|
||||
.pramin = {
|
||||
.enter = gk20a_pramin_enter,
|
||||
|
||||
@@ -31,9 +31,10 @@
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
#include <nvgpu/allocator.h>
|
||||
|
||||
struct dma_buf;
|
||||
|
||||
struct vm_gk20a;
|
||||
struct nvgpu_vm_area;
|
||||
struct buffer_attrs;
|
||||
struct gk20a_comptag_allocator;
|
||||
|
||||
/**
|
||||
@@ -212,6 +213,7 @@ int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
|
||||
|
||||
void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
|
||||
struct vm_gk20a_mapping_batch *batch);
|
||||
void nvgpu_vm_unmap_locked_ref(struct nvgpu_ref *ref);
|
||||
|
||||
/*
|
||||
* These all require the VM update lock to be held.
|
||||
@@ -223,9 +225,6 @@ struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_range(
|
||||
struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than(
|
||||
struct vm_gk20a *vm, u64 addr);
|
||||
|
||||
int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
|
||||
struct dma_buf **dmabuf,
|
||||
u64 *offset);
|
||||
int nvgpu_insert_mapped_buf(struct vm_gk20a *vm,
|
||||
struct nvgpu_mapped_buf *mapped_buffer);
|
||||
void nvgpu_remove_mapped_buf(struct vm_gk20a *vm,
|
||||
|
||||
@@ -342,6 +342,8 @@ static const struct gpu_ops vgpu_gm20b_ops = {
|
||||
.is_bar1_supported = gm20b_mm_is_bar1_supported,
|
||||
.init_inst_block = gk20a_init_inst_block,
|
||||
.mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
|
||||
.get_kind_invalid = gm20b_get_kind_invalid,
|
||||
.get_kind_pitch = gm20b_get_kind_pitch,
|
||||
},
|
||||
.therm = {
|
||||
.init_therm_setup_hw = gm20b_init_therm_setup_hw,
|
||||
|
||||
@@ -392,6 +392,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.init_bar2_vm = gb10b_init_bar2_vm,
|
||||
.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup,
|
||||
.remove_bar2_vm = gp10b_remove_bar2_vm,
|
||||
.get_kind_invalid = gm20b_get_kind_invalid,
|
||||
.get_kind_pitch = gm20b_get_kind_pitch,
|
||||
},
|
||||
.pramin = {
|
||||
.enter = gk20a_pramin_enter,
|
||||
|
||||
Reference in New Issue
Block a user