Files
linux-nvgpu/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
Alex Waterman 0090ee5aca gpu: nvgpu: nvgpu SGL implementation
The last major item preventing the core MM code in the nvgpu
driver from being platform agnostic is the usage of Linux
scattergather tables and scattergather lists. These data
structures are used throughout the mapping code to handle
discontiguous DMA allocations and also overloaded to represent
VIDMEM allocs.

The notion of a scatter gather table is crucial to a HW device
that can handle discontiguous DMA. The GPU has a MMU which
allows the GPU to do page gathering and present a virtually
contiguous buffer to the GPU HW. As a result it makes sense
for the GPU driver to use some sort of scatter gather concept
so maximize memory usage efficiency.

To that end this patch keeps the notion of a scatter gather
list but implements it in the nvgpu common code. It is based
heavily on the Linux SGL concept. It is a singly linked list
of blocks - each representing a chunk of memory. To map or
use a DMA allocation SW must iterate over each block in the
SGL.

This patch implements the most basic level of support for this
data structure. There are certainly easy optimizations that
could be done to speed up the current implementation. However,
this patches' goal is to simply divest the core MM code from
any last Linux'isms. Speed and efficiency come next.

Change-Id: Icf44641db22d87fa1d003debbd9f71b605258e42
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1530867
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2017-09-22 12:52:48 -07:00

377 lines
8.8 KiB
C

/*
* Virtualized GPU Memory Management
*
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/dma-mapping.h>
#include <nvgpu/kmem.h>
#include <nvgpu/dma.h>
#include <nvgpu/bug.h>
#include <nvgpu/vm.h>
#include <nvgpu/vm_area.h>
#include <nvgpu/vgpu/vm.h>
#include <nvgpu/linux/nvgpu_mem.h>
#include "vgpu/vgpu.h"
#include "gk20a/mm_gk20a.h"
#include "gm20b/mm_gm20b.h"
#include "common/linux/vm_priv.h"
static int vgpu_init_mm_setup_sw(struct gk20a *g)
{
struct mm_gk20a *mm = &g->mm;
gk20a_dbg_fn("");
if (mm->sw_ready) {
gk20a_dbg_fn("skip init");
return 0;
}
nvgpu_mutex_init(&mm->tlb_lock);
nvgpu_mutex_init(&mm->priv_lock);
mm->g = g;
/*TBD: make channel vm size configurable */
mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE;
mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
gk20a_dbg_info("channel vm size: user %dMB kernel %dMB",
(int)(mm->channel.user_size >> 20),
(int)(mm->channel.kernel_size >> 20));
mm->sw_ready = true;
return 0;
}
int vgpu_init_mm_support(struct gk20a *g)
{
int err;
gk20a_dbg_fn("");
err = vgpu_init_mm_setup_sw(g);
if (err)
return err;
if (g->ops.mm.init_mm_setup_hw)
err = g->ops.mm.init_mm_setup_hw(g);
return err;
}
static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
u64 map_offset,
struct nvgpu_mem_sgl *sgl,
u64 buffer_offset,
u64 size,
int pgsz_idx,
u8 kind_v,
u32 ctag_offset,
u32 flags,
int rw_flag,
bool clear_ctags,
bool sparse,
bool priv,
struct vm_gk20a_mapping_batch *batch,
enum nvgpu_aperture aperture)
{
int err = 0;
struct device *d = dev_from_vm(vm);
struct gk20a *g = gk20a_from_vm(vm);
struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
u64 addr = nvgpu_mem_sgl_gpu_addr(g, sgl, NULL);
u8 prot;
gk20a_dbg_fn("");
/* Allocate (or validate when map_offset != 0) the virtual address. */
if (!map_offset) {
map_offset = __nvgpu_vm_alloc_va(vm, size,
pgsz_idx);
if (!map_offset) {
nvgpu_err(g, "failed to allocate va space");
err = -ENOMEM;
goto fail;
}
}
if (rw_flag == gk20a_mem_flag_read_only)
prot = TEGRA_VGPU_MAP_PROT_READ_ONLY;
else if (rw_flag == gk20a_mem_flag_write_only)
prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY;
else
prot = TEGRA_VGPU_MAP_PROT_NONE;
msg.cmd = TEGRA_VGPU_CMD_AS_MAP;
msg.handle = vgpu_get_handle(g);
p->handle = vm->handle;
p->addr = addr;
p->gpu_va = map_offset;
p->size = size;
if (pgsz_idx == gmmu_page_size_kernel) {
u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
if (page_size == vm->gmmu_page_sizes[gmmu_page_size_small]) {
pgsz_idx = gmmu_page_size_small;
} else if (page_size ==
vm->gmmu_page_sizes[gmmu_page_size_big]) {
pgsz_idx = gmmu_page_size_big;
} else {
nvgpu_err(g, "invalid kernel page size %d",
page_size);
goto fail;
}
}
p->pgsz_idx = pgsz_idx;
p->iova = mapping ? 1 : 0;
p->kind = kind_v;
p->cacheable =
(flags & NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE) ? 1 : 0;
p->prot = prot;
p->ctag_offset = ctag_offset;
p->clear_ctags = clear_ctags;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
err = err ? err : msg.ret;
if (err)
goto fail;
/* TLB invalidate handled on server side */
return map_offset;
fail:
nvgpu_err(g, "%s: failed with err=%d", __func__, err);
return 0;
}
static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
u64 vaddr,
u64 size,
int pgsz_idx,
bool va_allocated,
int rw_flag,
bool sparse,
struct vm_gk20a_mapping_batch *batch)
{
struct gk20a *g = gk20a_from_vm(vm);
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
int err;
gk20a_dbg_fn("");
if (va_allocated) {
err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
if (err) {
dev_err(dev_from_vm(vm),
"failed to free va");
return;
}
}
msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP;
msg.handle = vgpu_get_handle(g);
p->handle = vm->handle;
p->gpu_va = vaddr;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
if (err || msg.ret)
dev_err(dev_from_vm(vm),
"failed to update gmmu ptes on unmap");
/* TLB invalidate handled on server side */
}
/*
* This is called by the common VM init routine to handle vGPU specifics of
* intializing a VM on a vGPU. This alone is not enough to init a VM. See
* nvgpu_vm_init().
*/
int vgpu_vm_init(struct gk20a *g, struct vm_gk20a *vm)
{
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
int err;
msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;
msg.handle = vgpu_get_handle(g);
p->size = vm->va_limit;
p->big_page_size = vm->big_page_size;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
if (err || msg.ret)
return -ENOMEM;
vm->handle = p->handle;
return 0;
}
/*
* Similar to vgpu_vm_init() this is called as part of the cleanup path for
* VMs. This alone is not enough to remove a VM - see nvgpu_vm_remove().
*/
void vgpu_vm_remove(struct vm_gk20a *vm)
{
struct gk20a *g = gk20a_from_vm(vm);
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
int err;
msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
msg.handle = vgpu_get_handle(g);
p->handle = vm->handle;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
WARN_ON(err || msg.ret);
}
u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size)
{
struct dma_iommu_mapping *mapping =
to_dma_iommu_mapping(dev_from_gk20a(g));
u64 addr = nvgpu_mem_get_addr_sgl(g, (*sgt)->sgl);
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
int err;
msg.cmd = TEGRA_VGPU_CMD_MAP_BAR1;
msg.handle = vgpu_get_handle(g);
p->addr = addr;
p->size = size;
p->iova = mapping ? 1 : 0;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
if (err || msg.ret)
addr = 0;
else
addr = p->gpu_va;
return addr;
}
static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share,
struct channel_gk20a *ch)
{
struct vm_gk20a *vm = as_share->vm;
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share;
int err;
gk20a_dbg_fn("");
ch->vm = vm;
msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE;
msg.handle = vgpu_get_handle(ch->g);
p->as_handle = vm->handle;
p->chan_handle = ch->virt_ctx;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
if (err || msg.ret) {
ch->vm = NULL;
err = -ENOMEM;
}
if (ch->vm)
nvgpu_vm_get(ch->vm);
return err;
}
static void vgpu_cache_maint(u64 handle, u8 op)
{
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint;
int err;
msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT;
msg.handle = handle;
p->op = op;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
WARN_ON(err || msg.ret);
}
static int vgpu_mm_fb_flush(struct gk20a *g)
{
gk20a_dbg_fn("");
vgpu_cache_maint(vgpu_get_handle(g), TEGRA_VGPU_FB_FLUSH);
return 0;
}
static void vgpu_mm_l2_invalidate(struct gk20a *g)
{
gk20a_dbg_fn("");
vgpu_cache_maint(vgpu_get_handle(g), TEGRA_VGPU_L2_MAINT_INV);
}
static void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate)
{
u8 op;
gk20a_dbg_fn("");
if (invalidate)
op = TEGRA_VGPU_L2_MAINT_FLUSH_INV;
else
op = TEGRA_VGPU_L2_MAINT_FLUSH;
vgpu_cache_maint(vgpu_get_handle(g), op);
}
static void vgpu_mm_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb)
{
gk20a_dbg_fn("");
nvgpu_err(g, "call to RM server not supported");
}
static void vgpu_mm_mmu_set_debug_mode(struct gk20a *g, bool enable)
{
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_mmu_debug_mode *p = &msg.params.mmu_debug_mode;
int err;
gk20a_dbg_fn("");
msg.cmd = TEGRA_VGPU_CMD_SET_MMU_DEBUG_MODE;
msg.handle = vgpu_get_handle(g);
p->enable = (u32)enable;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
WARN_ON(err || msg.ret);
}
void vgpu_init_mm_ops(struct gpu_ops *gops)
{
gops->fb.is_debug_mode_enabled = NULL;
gops->fb.set_debug_mode = vgpu_mm_mmu_set_debug_mode;
gops->mm.gmmu_map = vgpu_locked_gmmu_map;
gops->mm.gmmu_unmap = vgpu_locked_gmmu_unmap;
gops->mm.vm_bind_channel = vgpu_vm_bind_channel;
gops->mm.fb_flush = vgpu_mm_fb_flush;
gops->mm.l2_invalidate = vgpu_mm_l2_invalidate;
gops->mm.l2_flush = vgpu_mm_l2_flush;
gops->fb.tlb_invalidate = vgpu_mm_tlb_invalidate;
gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
gops->mm.gpu_phys_addr = gm20b_gpu_phys_addr;
gops->mm.init_mm_setup_hw = NULL;
}