diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 038fa4c80..63569008a 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c @@ -1,7 +1,7 @@ /* * GK20A Address Spaces * - * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -225,6 +225,31 @@ static int gk20a_as_ioctl_get_va_regions( return 0; } +static int gk20a_as_ioctl_get_buffer_compbits_info( + struct gk20a_as_share *as_share, + struct nvgpu_as_get_buffer_compbits_info_args *args) +{ + gk20a_dbg_fn(""); + return gk20a_vm_get_compbits_info(as_share->vm, + args->mapping_gva, + &args->compbits_win_size, + &args->compbits_win_ctagline, + &args->mapping_ctagline, + &args->flags); +} + +static int gk20a_as_ioctl_map_buffer_compbits( + struct gk20a_as_share *as_share, + struct nvgpu_as_map_buffer_compbits_args *args) +{ + gk20a_dbg_fn(""); + return gk20a_vm_map_compbits(as_share->vm, + args->mapping_gva, + &args->compbits_win_gva, + &args->mapping_iova, + args->flags); +} + int gk20a_as_dev_open(struct inode *inode, struct file *filp) { struct gk20a_as_share *as_share; @@ -334,6 +359,14 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err = gk20a_as_ioctl_get_va_regions(as_share, (struct nvgpu_as_get_va_regions_args *)buf); break; + case NVGPU_AS_IOCTL_GET_BUFFER_COMPBITS_INFO: + err = gk20a_as_ioctl_get_buffer_compbits_info(as_share, + (struct nvgpu_as_get_buffer_compbits_info_args *)buf); + break; + case NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS: + err = gk20a_as_ioctl_map_buffer_compbits(as_share, + (struct nvgpu_as_map_buffer_compbits_args *)buf); + break; default: dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd); err = -ENOTTY; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 470729b72..d3114ecd2 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -2016,8 +2016,13 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g); gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g); g->ops.gr.get_rop_l2_en_mask(g); - - gpu->reserved = 0; + gpu->gr_compbit_store_base_hw = g->gr.compbit_store.base_hw; + gpu->gr_gobs_per_comptagline_per_slice = + g->gr.gobs_per_comptagline_per_slice; + gpu->num_ltc = g->ltc_count; + gpu->lts_per_ltc = g->gr.slices_per_ltc; + gpu->cbc_cache_line_size = g->gr.cacheline_size; + gpu->cbc_comptags_per_line = g->gr.comptags_per_cacheline; return 0; } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 5d1ff5630..d896d7835 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -221,7 +221,9 @@ static int gk20a_alloc_comptags(struct gk20a *g, struct device *dev, struct dma_buf *dmabuf, struct gk20a_allocator *allocator, - u32 lines, bool user_mappable) + u32 lines, bool user_mappable, + u64 *ctag_map_win_size, + u32 *ctag_map_win_ctagline) { struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); u32 offset = 0; @@ -313,6 +315,8 @@ static int gk20a_alloc_comptags(struct gk20a *g, first_unneeded_cacheline * g->gr.comptags_per_cacheline; + u64 win_size; + if (needed_ctaglines < ctaglines_to_allocate) { /* free alignment lines */ int tmp= @@ -326,6 +330,14 @@ static int gk20a_alloc_comptags(struct gk20a *g, ctaglines_to_allocate = needed_ctaglines; } + + *ctag_map_win_ctagline = offset; + win_size = + DIV_ROUND_UP(lines, + g->gr.comptags_per_cacheline) * + aggregate_cacheline_sz; + + *ctag_map_win_size = round_up(win_size, small_pgsz); } priv->comptags.offset = offset; @@ -1374,6 +1386,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, bool clear_ctags = false; struct scatterlist *sgl; u64 buf_addr; + u64 ctag_map_win_size = 0; + u32 ctag_map_win_ctagline = 0; mutex_lock(&vm->update_gmmu_lock); @@ -1501,7 +1515,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, /* allocate compression resources if needed */ err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator, - bfr.ctag_lines, user_mappable); + bfr.ctag_lines, user_mappable, + &ctag_map_win_size, + &ctag_map_win_ctagline); if (err) { /* ok to fall back here if we ran out */ /* TBD: we can partially alloc ctags as well... */ @@ -1588,6 +1604,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, mapped_buffer->ctag_lines = bfr.ctag_lines; mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; + mapped_buffer->ctag_map_win_size = ctag_map_win_size; + mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline; mapped_buffer->vm = vm; mapped_buffer->flags = flags; mapped_buffer->kind = kind; @@ -1640,6 +1658,140 @@ clean_up: return 0; } +int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, + u64 mapping_gva, + u64 *compbits_win_size, + u32 *compbits_win_ctagline, + u32 *mapping_ctagline, + u32 *flags) +{ + struct mapped_buffer_node *mapped_buffer; + struct device *d = dev_from_vm(vm); + + mutex_lock(&vm->update_gmmu_lock); + + mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva); + + if (!mapped_buffer | !mapped_buffer->user_mapped) + { + mutex_unlock(&vm->update_gmmu_lock); + gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva); + return -EFAULT; + } + + *compbits_win_size = 0; + *compbits_win_ctagline = 0; + *mapping_ctagline = 0; + *flags = 0; + + if (mapped_buffer->ctag_offset) + *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS; + + if (mapped_buffer->ctags_mappable) + { + *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE; + *compbits_win_size = mapped_buffer->ctag_map_win_size; + *compbits_win_ctagline = mapped_buffer->ctag_map_win_ctagline; + *mapping_ctagline = mapped_buffer->ctag_offset; + } + + mutex_unlock(&vm->update_gmmu_lock); + return 0; +} + + +int gk20a_vm_map_compbits(struct vm_gk20a *vm, + u64 mapping_gva, + u64 *compbits_win_gva, + u64 *mapping_iova, + u32 flags) +{ + struct mapped_buffer_node *mapped_buffer; + struct gk20a *g = gk20a_from_vm(vm); + struct device *d = dev_from_vm(vm); + + if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) { + /* This will be implemented later */ + gk20a_err(d, + "%s: fixed-offset compbits mapping not yet supported", + __func__); + return -EFAULT; + } + + mutex_lock(&vm->update_gmmu_lock); + + mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva); + + if (!mapped_buffer || !mapped_buffer->user_mapped) { + mutex_unlock(&vm->update_gmmu_lock); + gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva); + return -EFAULT; + } + + if (!mapped_buffer->ctags_mappable) { + mutex_unlock(&vm->update_gmmu_lock); + gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva); + return -EFAULT; + } + + if (!mapped_buffer->ctag_map_win_addr) { + const u32 small_pgsz_index = 0; /* small pages, 4K */ + const u32 aggregate_cacheline_sz = + g->gr.cacheline_size * g->gr.slices_per_ltc * + g->ltc_count; + + /* first aggregate cacheline to map */ + u32 cacheline_start; /* inclusive */ + + /* offset of the start cacheline (will be page aligned) */ + u64 cacheline_offset_start; + + if (!mapped_buffer->ctag_map_win_size) { + mutex_unlock(&vm->update_gmmu_lock); + gk20a_err(d, + "%s: mapping 0x%llx does not have " + "mappable comptags", + __func__, mapping_gva); + return -EFAULT; + } + + cacheline_start = mapped_buffer->ctag_offset / + g->gr.comptags_per_cacheline; + cacheline_offset_start = + cacheline_start * aggregate_cacheline_sz; + + mapped_buffer->ctag_map_win_addr = + g->ops.mm.gmmu_map( + vm, + 0, + g->gr.compbit_store.mem.sgt, + cacheline_offset_start, /* sg offset */ + mapped_buffer->ctag_map_win_size, /* size */ + small_pgsz_index, + 0, /* kind */ + 0, /* ctag_offset */ + NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, + gk20a_mem_flag_read_only, + false, + false); + + if (!mapped_buffer->ctag_map_win_addr) { + mutex_unlock(&vm->update_gmmu_lock); + gk20a_err(d, + "%s: failed to map comptags for mapping 0x%llx", + __func__, mapping_gva); + return -ENOMEM; + } + } + + *mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0); + *compbits_win_gva = mapped_buffer->ctag_map_win_addr; + + mutex_unlock(&vm->update_gmmu_lock); + + return 0; +} + u64 gk20a_gmmu_map(struct vm_gk20a *vm, struct sg_table **sgt, u64 size, @@ -2276,6 +2428,18 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) struct vm_gk20a *vm = mapped_buffer->vm; struct gk20a *g = vm->mm->g; + if (mapped_buffer->ctag_map_win_addr) { + /* unmap compbits */ + + g->ops.mm.gmmu_unmap(vm, + mapped_buffer->ctag_map_win_addr, + mapped_buffer->ctag_map_win_size, + 0, /* page size 4k */ + true, /* va allocated */ + gk20a_mem_flag_none, + false); /* not sparse */ + } + g->ops.mm.gmmu_unmap(vm, mapped_buffer->addr, mapped_buffer->size, diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 0ff11d094..e07b95fe7 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -186,7 +186,13 @@ struct mapped_buffer_node { u32 ctag_offset; u32 ctag_lines; u32 ctag_allocated_lines; + + /* For comptag mapping, these are the mapping window parameters */ bool ctags_mappable; + u64 ctag_map_win_addr; /* non-zero if mapped */ + u64 ctag_map_win_size; /* non-zero if ctags_mappable */ + u32 ctag_map_win_ctagline; /* ctagline at win start, set if + * ctags_mappable */ u32 flags; u32 kind; @@ -504,6 +510,19 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, u64 buffer_offset, u64 mapping_size); +int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, + u64 mapping_gva, + u64 *compbits_win_size, + u32 *compbits_win_ctagline, + u32 *mapping_ctagline, + u32 *flags); + +int gk20a_vm_map_compbits(struct vm_gk20a *vm, + u64 mapping_gva, + u64 *compbits_win_gva, + u64 *mapping_iova, + u32 flags); + /* unmap handle from kernel */ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 9527ab824..e5bb0d07c 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -166,7 +166,13 @@ struct nvgpu_gpu_characteristics { __u8 chipname[8]; - + __u64 gr_compbit_store_base_hw; + __u32 gr_gobs_per_comptagline_per_slice; + __u32 num_ltc; + __u32 lts_per_ltc; + __u32 cbc_cache_line_size; + __u32 cbc_comptags_per_line; + __u32 reserved2; /* Notes: - This struct can be safely appended with new fields. However, always @@ -895,6 +901,91 @@ struct nvgpu_as_map_buffer_ex_args { * the buffer is returned in this field. */ }; +/* + * Get info about buffer compbits. Requires that buffer is mapped with + * NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS. + * + * The compbits for a mappable buffer are organized in a mappable + * window to the compbits store. In case the window contains comptags + * for more than one buffer, the buffer comptag line index may differ + * from the window comptag line index. + */ +struct nvgpu_as_get_buffer_compbits_info_args { + + /* in: address of an existing buffer mapping */ + __u64 mapping_gva; + + /* out: size of compbits mapping window (bytes) */ + __u64 compbits_win_size; + + /* out: comptag line index of the window start */ + __u32 compbits_win_ctagline; + + /* out: comptag line index of the buffer mapping */ + __u32 mapping_ctagline; + +/* Buffer uses compbits */ +#define NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS (1 << 0) + +/* Buffer compbits are mappable */ +#define NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE (1 << 1) + +/* Buffer IOVA addresses are discontiguous */ +#define NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_DISCONTIG_IOVA (1 << 2) + + /* out */ + __u32 flags; + + __u32 reserved1; +}; + +/* + * Map compbits of a mapped buffer to the GPU address space. The + * compbits mapping is automatically unmapped when the buffer is + * unmapped. + * + * The compbits mapping always uses small pages, it is read-only, and + * is GPU cacheable. The mapping is a window to the compbits + * store. The window may not be exactly the size of the cache lines + * for the buffer mapping. + */ +struct nvgpu_as_map_buffer_compbits_args { + + /* in: address of an existing buffer mapping */ + __u64 mapping_gva; + + /* in: gva to the mapped compbits store window when + * FIXED_OFFSET is set. Otherwise, ignored and should be be 0. + * + * For FIXED_OFFSET mapping: + * - If compbits are already mapped compbits_win_gva + * must match with the previously mapped gva. + * - The user must have allocated enough GVA space for the + * mapping window (see compbits_win_size in + * nvgpu_as_get_buffer_compbits_info_args) + * + * out: gva to the mapped compbits store window */ + __u64 compbits_win_gva; + + /* in: reserved, must be 0 + out: physical or IOMMU address for mapping */ + union { + /* contiguous iova addresses */ + __u64 mapping_iova; + + /* buffer to receive discontiguous iova addresses (reserved) */ + __u64 mapping_iova_buf_addr; + }; + + /* in: Buffer size (in bytes) for discontiguous iova + * addresses. Reserved, must be 0. */ + __u64 mapping_iova_buf_size; + +#define NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET (1 << 0) + __u32 flags; + __u32 reserved1; +}; + /* * Unmapping a buffer: * @@ -938,9 +1029,13 @@ struct nvgpu_as_get_va_regions_args { _IOWR(NVGPU_AS_IOCTL_MAGIC, 7, struct nvgpu_as_map_buffer_ex_args) #define NVGPU_AS_IOCTL_GET_VA_REGIONS \ _IOWR(NVGPU_AS_IOCTL_MAGIC, 8, struct nvgpu_as_get_va_regions_args) +#define NVGPU_AS_IOCTL_GET_BUFFER_COMPBITS_INFO \ + _IOWR(NVGPU_AS_IOCTL_MAGIC, 9, struct nvgpu_as_get_buffer_compbits_info_args) +#define NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS \ + _IOWR(NVGPU_AS_IOCTL_MAGIC, 10, struct nvgpu_as_map_buffer_compbits_args) #define NVGPU_AS_IOCTL_LAST \ - _IOC_NR(NVGPU_AS_IOCTL_GET_VA_REGIONS) + _IOC_NR(NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS) #define NVGPU_AS_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_as_map_buffer_ex_args)