gpu: nvgpu: User-space managed address space support

Implement NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED, which
enables creating userspace-managed GPU address spaces.

When an address space is marked as userspace-managed, the following
changes are in effect:

- Only fixed-address mappings are allowed.
- VA space allocation for fixed-address mappings is not required,
  except to mark space as sparse.
- Maps and unmaps are always immediate. In particular, the mapping
  ref increments at kickoffs and decrements at job completion are
  skipped.

Bug 1614735
Bug 1623949
Bug 1660392

Change-Id: I834fe19b3f65e9b02c268952383eddee0e465759
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/738558
Reviewed-on: http://git-master/r/833253
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Sami Kiminki
2015-05-04 18:41:23 +03:00
committed by Terje Bergstrom
parent 503d3a0b10
commit 9d2c9072c8
9 changed files with 148 additions and 38 deletions

View File

@@ -38,7 +38,8 @@ static void release_as_share_id(struct gk20a_as *as, int id)
}
int gk20a_as_alloc_share(struct gk20a_as *as,
u32 flags, struct gk20a_as_share **out)
u32 big_page_size, u32 flags,
struct gk20a_as_share **out)
{
struct gk20a *g = gk20a_from_as(as);
struct gk20a_as_share *as_share;
@@ -59,7 +60,7 @@ int gk20a_as_alloc_share(struct gk20a_as *as,
err = gk20a_busy(g->dev);
if (err)
goto failed;
err = g->ops.mm.vm_alloc_share(as_share, flags);
err = g->ops.mm.vm_alloc_share(as_share, big_page_size, flags);
gk20a_idle(g->dev);
if (err)
@@ -332,7 +333,7 @@ int gk20a_as_dev_open(struct inode *inode, struct file *filp)
g = container_of(inode->i_cdev, struct gk20a, as.cdev);
err = gk20a_as_alloc_share(&g->as, 0, &as_share);
err = gk20a_as_alloc_share(&g->as, 0, 0, &as_share);
if (err) {
gk20a_dbg_fn("failed to alloc share");
return err;

View File

@@ -1,7 +1,7 @@
/*
* GK20A Address Spaces
*
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -42,7 +42,9 @@ int gk20a_as_release_share(struct gk20a_as_share *as_share);
int gk20a_as_dev_open(struct inode *inode, struct file *filp);
int gk20a_as_dev_release(struct inode *inode, struct file *filp);
long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
int gk20a_as_alloc_share(struct gk20a_as *as,
/* if big_page_size == 0, the default big page size is used */
int gk20a_as_alloc_share(struct gk20a_as *as, u32 big_page_size,
u32 flags, struct gk20a_as_share **out);
#endif

View File

@@ -166,7 +166,8 @@ static int gk20a_ctrl_alloc_as(
goto clean_up;
}
err = gk20a_as_alloc_share(&g->as, args->big_page_size, &as_share);
err = gk20a_as_alloc_share(&g->as, args->big_page_size, args->flags,
&as_share);
if (err)
goto clean_up_file;

View File

@@ -1993,6 +1993,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
gk20a_platform_has_syncpoints(g->dev))
gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
gpu->gpc_mask = 1;
g->ops.gr.detect_sm_arch(g);

View File

@@ -370,7 +370,7 @@ struct gpu_ops {
struct vm_gk20a_mapping_batch *batch);
void (*vm_remove)(struct vm_gk20a *vm);
int (*vm_alloc_share)(struct gk20a_as_share *as_share,
u32 flags);
u32 big_page_size, u32 flags);
int (*vm_bind_channel)(struct gk20a_as_share *as_share,
struct channel_gk20a *ch);
int (*fb_flush)(struct gk20a *g);

View File

@@ -767,6 +767,12 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
struct rb_node *node;
int i = 0;
if (vm->userspace_managed) {
*mapped_buffers = NULL;
*num_buffers = 0;
return 0;
}
mutex_lock(&vm->update_gmmu_lock);
buffer_list = nvgpu_alloc(sizeof(*buffer_list) *
@@ -1135,7 +1141,8 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
static int validate_fixed_buffer(struct vm_gk20a *vm,
struct buffer_attrs *bfr,
u64 map_offset, u64 map_size)
u64 map_offset, u64 map_size,
struct vm_reserved_va_node **pva_node)
{
struct device *dev = dev_from_vm(vm);
struct vm_reserved_va_node *va_node;
@@ -1154,15 +1161,16 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
return -EINVAL;
}
/* find the space reservation */
/* Find the space reservation, but it's ok to have none for
* userspace-managed address spaces */
va_node = addr_to_reservation(vm, map_offset);
if (!va_node) {
if (!va_node && !vm->userspace_managed) {
gk20a_warn(dev, "fixed offset mapping without space allocation");
return -EINVAL;
}
/* mapped area should fit inside va */
if (map_end > va_node->vaddr_start + va_node->size) {
/* Mapped area should fit inside va, if there's one */
if (va_node && map_end > va_node->vaddr_start + va_node->size) {
gk20a_warn(dev, "fixed offset mapping size overflows va node");
return -EINVAL;
}
@@ -1177,6 +1185,8 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
return -EINVAL;
}
*pva_node = va_node;
return 0;
}
@@ -1411,17 +1421,29 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
u64 buf_addr;
u64 ctag_map_win_size = 0;
u32 ctag_map_win_ctagline = 0;
struct vm_reserved_va_node *va_node = NULL;
if (user_mapped && vm->userspace_managed &&
!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
gk20a_err(d,
"%s: non-fixed-offset mapping not available on userspace managed address spaces",
__func__);
return -EFAULT;
}
mutex_lock(&vm->update_gmmu_lock);
/* check if this buffer is already mapped */
map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align,
if (!vm->userspace_managed) {
map_offset = gk20a_vm_map_duplicate_locked(
vm, dmabuf, offset_align,
flags, kind, sgt,
user_mapped, rw_flag);
if (map_offset) {
mutex_unlock(&vm->update_gmmu_lock);
return map_offset;
}
}
/* pin buffer to get phys/iovmm addr */
bfr.sgt = gk20a_mm_pin(d, dmabuf);
@@ -1504,7 +1526,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
err = validate_fixed_buffer(vm, &bfr,
offset_align, mapping_size);
offset_align, mapping_size,
&va_node);
if (err)
goto clean_up;
@@ -1671,11 +1694,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
if (!va_allocated) {
struct vm_reserved_va_node *va_node;
/* find the space reservation */
va_node = addr_to_reservation(vm, map_offset);
if (va_node) {
list_add_tail(&mapped_buffer->va_buffers_list,
&va_node->va_buffers_list);
mapped_buffer->va_node = va_node;
@@ -1753,18 +1772,27 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
struct mapped_buffer_node *mapped_buffer;
struct gk20a *g = gk20a_from_vm(vm);
struct device *d = dev_from_vm(vm);
const bool fixed_mapping =
(flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0;
if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) {
/* This will be implemented later */
if (vm->userspace_managed && !fixed_mapping) {
gk20a_err(d,
"%s: fixed-offset compbits mapping not yet supported",
"%s: non-fixed-offset mapping is not available on userspace managed address spaces",
__func__);
return -EFAULT;
}
if (fixed_mapping && !vm->userspace_managed) {
gk20a_err(d,
"%s: fixed-offset mapping is available only on userspace managed address spaces",
__func__);
return -EFAULT;
}
mutex_lock(&vm->update_gmmu_lock);
mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
mapped_buffer =
find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
if (!mapped_buffer || !mapped_buffer->user_mapped) {
mutex_unlock(&vm->update_gmmu_lock);
@@ -1774,7 +1802,8 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
if (!mapped_buffer->ctags_mappable) {
mutex_unlock(&vm->update_gmmu_lock);
gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva);
gk20a_err(d, "%s: comptags not mappable, offset 0x%llx",
__func__, mapping_gva);
return -EFAULT;
}
@@ -1804,10 +1833,41 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
cacheline_offset_start =
cacheline_start * aggregate_cacheline_sz;
if (fixed_mapping) {
struct buffer_attrs bfr;
int err;
struct vm_reserved_va_node *va_node = NULL;
memset(&bfr, 0, sizeof(bfr));
bfr.pgsz_idx = small_pgsz_index;
err = validate_fixed_buffer(
vm, &bfr, *compbits_win_gva,
mapped_buffer->ctag_map_win_size, &va_node);
if (err) {
mutex_unlock(&vm->update_gmmu_lock);
return err;
}
if (va_node) {
/* this would create a dangling GPU VA
* pointer if the space is freed
* before before the buffer is
* unmapped */
mutex_unlock(&vm->update_gmmu_lock);
gk20a_err(d,
"%s: comptags cannot be mapped into allocated space",
__func__);
return -EINVAL;
}
}
mapped_buffer->ctag_map_win_addr =
g->ops.mm.gmmu_map(
vm,
0,
!fixed_mapping ? 0 : *compbits_win_gva, /* va */
g->gr.compbit_store.mem.sgt,
cacheline_offset_start, /* sg offset */
mapped_buffer->ctag_map_win_size, /* size */
@@ -1828,6 +1888,15 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
__func__, mapping_gva);
return -ENOMEM;
}
} else if (fixed_mapping && *compbits_win_gva &&
mapped_buffer->ctag_map_win_addr != *compbits_win_gva) {
mutex_unlock(&vm->update_gmmu_lock);
gk20a_err(d,
"%s: re-requesting comptags map into mismatching address. buffer offset 0x"
"%llx, existing comptag map at 0x%llx, requested remap 0x%llx",
__func__, mapping_gva,
mapped_buffer->ctag_map_win_addr, *compbits_win_gva);
return -EINVAL;
}
*mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
@@ -2662,6 +2731,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
u64 kernel_reserved,
u64 aperture_size,
bool big_pages,
bool userspace_managed,
char *name)
{
int err, i;
@@ -2685,6 +2755,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,
vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
vm->userspace_managed = userspace_managed;
vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g,
vm->big_page_size);
@@ -2821,7 +2893,8 @@ clean_up_pdes:
}
/* address space interfaces for the gk20a module */
int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
u32 flags)
{
struct gk20a_as *as = as_share->as;
struct gk20a *g = gk20a_from_as(as);
@@ -2829,6 +2902,8 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
struct vm_gk20a *vm;
char name[32];
int err;
const bool userspace_managed =
(flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) != 0;
gk20a_dbg_fn("");
@@ -2856,7 +2931,7 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10,
mm->channel.kernel_size,
mm->channel.user_size + mm->channel.kernel_size,
!mm->disable_bigpage, name);
!mm->disable_bigpage, userspace_managed, name);
return err;
}
@@ -3235,7 +3310,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
mm->bar1.aperture_size - SZ_4K,
mm->bar1.aperture_size, false, "bar1");
mm->bar1.aperture_size, false, false, "bar1");
err = gk20a_alloc_inst_block(g, inst_block);
if (err)
@@ -3263,7 +3338,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
gk20a_init_vm(mm, vm, big_page_size,
SZ_4K * 16, GK20A_PMU_VA_SIZE,
GK20A_PMU_VA_SIZE * 2, false,
GK20A_PMU_VA_SIZE * 2, false, false,
"system");
err = gk20a_alloc_inst_block(g, inst_block);
@@ -3303,7 +3378,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
SZ_4K * 16,
NV_MM_DEFAULT_KERNEL_SIZE,
NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
false, "cde");
false, false, "cde");
}
void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)

View File

@@ -251,6 +251,8 @@ struct vm_gk20a {
u32 big_page_size;
bool userspace_managed;
const struct gk20a_mmu_level *mmu_levels;
struct kref ref;
@@ -586,7 +588,8 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
/* vm-as interface */
struct nvgpu_as_alloc_space_args;
struct nvgpu_as_free_space_args;
int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 flags);
int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
u32 flags);
int gk20a_vm_release_share(struct gk20a_as_share *as_share);
int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
struct nvgpu_as_alloc_space_args *args);
@@ -621,6 +624,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
u64 kernel_reserved,
u64 aperture_size,
bool big_pages,
bool userspace_managed,
char *name);
void gk20a_deinit_vm(struct vm_gk20a *vm);

View File

@@ -267,7 +267,7 @@ u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size)
/* address space interfaces for the gk20a module */
static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
u32 big_page_size)
u32 big_page_size, u32 flags)
{
struct gk20a_as *as = as_share->as;
struct gk20a *g = gk20a_from_as(as);
@@ -280,6 +280,8 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
kernel_vma_start, kernel_vma_limit;
char name[32];
int err, i;
const bool userspace_managed =
(flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) != 0;
/* note: keep the page sizes sorted lowest to highest here */
u32 gmmu_page_sizes[gmmu_nr_page_sizes] = {
@@ -290,6 +292,12 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
gk20a_dbg_fn("");
if (userspace_managed) {
gk20a_err(dev_from_gk20a(g),
"userspace-managed address spaces not yet supported");
return -ENOSYS;
}
big_page_size = gmmu_page_sizes[gmmu_page_size_big];
vm = kzalloc(sizeof(*vm), GFP_KERNEL);

View File

@@ -106,6 +106,8 @@ struct nvgpu_gpu_zbc_query_table_args {
#define NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS (1 << 4)
/* NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT is available */
#define NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT (1 << 6)
/* User-space managed address spaces support */
#define NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS (1 << 7)
struct nvgpu_gpu_characteristics {
__u32 arch;
@@ -239,7 +241,22 @@ struct nvgpu_gpu_mark_compressible_write_args {
struct nvgpu_alloc_as_args {
__u32 big_page_size;
__s32 as_fd;
__u64 reserved; /* must be zero */
/*
* The GPU address space will be managed by the userspace. This has
* the following changes in functionality:
* 1. All non-fixed-offset user mappings are rejected (i.e.,
* fixed-offset only)
* 2. Address space does not need to be allocated for fixed-offset
* mappings, except to mark sparse address space areas.
* 3. Maps and unmaps are immediate. In particular, mapping ref
* increments at kickoffs and decrements at job completion are
* bypassed.
*/
#define NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED (1 << 0)
__u32 flags;
__u32 reserved; /* must be zero */
};
struct nvgpu_gpu_open_tsg_args {