diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h index 97b8334bf..6f3beaa96 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h @@ -54,8 +54,9 @@ struct nvgpu_mapped_buf_priv { /* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */ int nvgpu_vm_map_linux(struct vm_gk20a *vm, struct dma_buf *dmabuf, - u64 offset_align, + u64 map_addr, u32 flags, + u32 page_size, s16 compr_kind, s16 incompr_kind, int rw_flag, @@ -71,8 +72,9 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, */ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, int dmabuf_fd, - u64 *offset_align, + u64 *map_addr, u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ + u32 page_size, s16 compr_kind, s16 incompr_kind, u64 buffer_offset, diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index 66a804036..052a1d21d 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c @@ -975,6 +975,30 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l return cde_ctx; } +static u32 gk20a_cde_mapping_page_size(struct vm_gk20a *vm, + u32 map_offset, u32 map_size) +{ + struct gk20a *g = gk20a_from_vm(vm); + + /* + * To be simple we will just make the map size depend on the + * iommu'ability of the driver. If there's an IOMMU we can rely on + * buffers being contiguous. If not, then we'll use 4k pages since we + * know that will work for any buffer. + */ + if (!nvgpu_iommuable(g)) + return SZ_4K; + + /* + * If map size or offset is not 64K aligned then use small pages. + */ + if (map_size & (vm->big_page_size - 1) || + map_offset & (vm->big_page_size - 1)) + return SZ_4K; + + return vm->big_page_size; +} + int gk20a_cde_convert(struct nvgpu_os_linux *l, struct dma_buf *compbits_scatter_buf, u64 compbits_byte_offset, @@ -1071,7 +1095,10 @@ __releases(&l->cde_app->mutex) err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0, NVGPU_VM_MAP_CACHEABLE | NVGPU_VM_MAP_DIRECT_KIND_CTRL, - NVGPU_KIND_INVALID, + gk20a_cde_mapping_page_size(cde_ctx->vm, + map_offset, + map_size), + NV_KIND_INVALID, compbits_kind, /* incompressible kind */ gk20a_mem_flag_none, map_offset, map_size, diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c index 47f612cca..5eb9802fc 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c @@ -111,6 +111,7 @@ static int gk20a_as_ioctl_map_buffer_ex( return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, &args->offset, args->flags, + args->page_size, args->compr_kind, args->incompr_kind, args->buffer_offset, @@ -201,7 +202,7 @@ static int gk20a_as_ioctl_map_buffer_batch( err = nvgpu_vm_map_buffer( as_share->vm, map_args.dmabuf_fd, - &map_args.offset, map_args.flags, + &map_args.offset, map_args.flags, map_args.page_size, compressible_kind, incompressible_kind, map_args.buffer_offset, map_args.mapping_size, diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index 76ff25c0c..938e0abd2 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -1372,10 +1372,11 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, args->dmabuf_fd, &args->offset, 0, + SZ_4K, + 0, 0, 0, 0, - args->mapping_size, NULL); if (err) goto err_remove_vm; diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c index baa775154..eb9ca8fda 100644 --- a/drivers/gpu/nvgpu/os/linux/vm.c +++ b/drivers/gpu/nvgpu/os/linux/vm.c @@ -175,8 +175,9 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, int nvgpu_vm_map_linux(struct vm_gk20a *vm, struct dma_buf *dmabuf, - u64 offset_align, + u64 map_addr, u32 flags, + u32 page_size, s16 compr_kind, s16 incompr_kind, int rw_flag, @@ -192,12 +193,8 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, struct nvgpu_sgt *nvgpu_sgt = NULL; struct nvgpu_mapped_buf *mapped_buffer = NULL; struct dma_buf_attachment *attachment; - u64 map_addr = 0ULL; int err = 0; - if (flags & NVGPU_VM_MAP_FIXED_OFFSET) - map_addr = offset_align; - sgt = gk20a_mm_pin(dev, dmabuf, &attachment); if (IS_ERR(sgt)) { nvgpu_warn(g, "Failed to pin dma_buf!"); @@ -253,8 +250,9 @@ clean_up: int nvgpu_vm_map_buffer(struct vm_gk20a *vm, int dmabuf_fd, - u64 *offset_align, + u64 *map_addr, u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ + u32 page_size, s16 compr_kind, s16 incompr_kind, u64 buffer_offset, @@ -274,8 +272,28 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, return PTR_ERR(dmabuf); } + /* + * For regular maps we do not accept either an input address or a + * buffer_offset. + */ + if (!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) && + (buffer_offset || *map_addr)) { + nvgpu_err(g, + "Regular map with addr/buf offset is not supported!"); + return -EINVAL; + } + + /* + * Map size is always buffer size for non fixed mappings. As such map + * size should be left as zero by userspace for non-fixed maps. + */ + if (mapping_size && !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { + nvgpu_err(g, "map_size && non-fixed-mapping!"); + return -EINVAL; + } + /* verify that we're not overflowing the buffer, i.e. - * (buffer_offset + mapping_size)> dmabuf->size. + * (buffer_offset + mapping_size) > dmabuf->size. * * Since buffer_offset + mapping_size could overflow, first check * that mapping size < dmabuf_size, at which point we can subtract @@ -284,7 +302,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, if ((mapping_size > dmabuf->size) || (buffer_offset > (dmabuf->size - mapping_size))) { nvgpu_err(g, - "buf size %llx < (offset(%llx) + map_size(%llx))\n", + "buf size %llx < (offset(%llx) + map_size(%llx))", (u64)dmabuf->size, buffer_offset, mapping_size); dma_buf_put(dmabuf); return -EINVAL; @@ -296,8 +314,9 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, return err; } - err = nvgpu_vm_map_linux(vm, dmabuf, *offset_align, + err = nvgpu_vm_map_linux(vm, dmabuf, *map_addr, nvgpu_vm_translate_linux_flags(g, flags), + page_size, compr_kind, incompr_kind, gk20a_mem_flag_none, buffer_offset, @@ -306,7 +325,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, &ret_va); if (!err) - *offset_align = ret_va; + *map_addr = ret_va; else dma_buf_put(dmabuf);