diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c index 5b0fb9102..0c52271a8 100644 --- a/drivers/gpu/nvgpu/common/linux/cde.c +++ b/drivers/gpu/nvgpu/common/linux/cde.c @@ -975,7 +975,7 @@ __releases(&l->cde_app->mutex) u64 big_page_mask = 0; u32 flags; int err, i; - const s32 compbits_kind = 0; + const s16 compbits_kind = 0; gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", compbits_byte_offset, scatterbuffer_byte_offset); @@ -1038,8 +1038,11 @@ __releases(&l->cde_app->mutex) /* map the destination buffer */ get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */ map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, - NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - compbits_kind, true, + NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE | + NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL, + NV_KIND_INVALID, + compbits_kind, /* incompressible kind */ + true, gk20a_mem_flag_none, map_offset, map_size, NULL); diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c index d4242955e..cfc4e7ef5 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_as.c @@ -79,11 +79,22 @@ static int gk20a_as_ioctl_map_buffer_ex( struct gk20a_as_share *as_share, struct nvgpu_as_map_buffer_ex_args *args) { + s16 compressible_kind; + s16 incompressible_kind; + gk20a_dbg_fn(""); + if (args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { + compressible_kind = args->compr_kind; + incompressible_kind = args->incompr_kind; + } else { + compressible_kind = args->kind; + incompressible_kind = NV_KIND_INVALID; + } + return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, &args->offset, args->flags, - args->kind, + compressible_kind, incompressible_kind, args->buffer_offset, args->mapping_size, NULL); @@ -97,6 +108,7 @@ static int gk20a_as_ioctl_map_buffer( return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, &args->o_a.offset, args->flags, NV_KIND_DEFAULT, + NV_KIND_DEFAULT, 0, 0, NULL); /* args->o_a.offset will be set if !err */ } @@ -158,6 +170,9 @@ static int gk20a_as_ioctl_map_buffer_batch( } for (i = 0; i < args->num_maps; ++i) { + s16 compressible_kind; + s16 incompressible_kind; + struct nvgpu_as_map_buffer_ex_args map_args; memset(&map_args, 0, sizeof(map_args)); @@ -167,10 +182,19 @@ static int gk20a_as_ioctl_map_buffer_batch( break; } + if (map_args.flags & + NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { + compressible_kind = map_args.compr_kind; + incompressible_kind = map_args.incompr_kind; + } else { + compressible_kind = map_args.kind; + incompressible_kind = NV_KIND_INVALID; + } + err = nvgpu_vm_map_buffer( as_share->vm, map_args.dmabuf_fd, &map_args.offset, map_args.flags, - map_args.kind, + compressible_kind, incompressible_kind, map_args.buffer_offset, map_args.mapping_size, &batch); diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 58e2da130..86d8bec9c 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c @@ -177,11 +177,46 @@ static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm, return mapped_buffer->addr; } +static int setup_bfr_kind_fields(struct buffer_attrs *bfr, s16 compr_kind, + s16 incompr_kind, u32 flags) +{ + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { + /* were we supplied with a kind in either parameter? */ + if ((compr_kind < 0 || compr_kind >= NV_KIND_ATTR_SIZE) && + (incompr_kind < 0 || incompr_kind >= NV_KIND_ATTR_SIZE)) + return -EINVAL; + + if (compr_kind != NV_KIND_INVALID) { + bfr->use_kind_v = true; + bfr->kind_v = (u8)compr_kind; + } + + if (incompr_kind != NV_KIND_INVALID) { + bfr->use_uc_kind_v = true; + bfr->uc_kind_v = (u8)incompr_kind; + } + } else { + if (compr_kind < 0 || compr_kind >= NV_KIND_ATTR_SIZE) + return -EINVAL; + + bfr->use_kind_v = true; + bfr->kind_v = (u8)compr_kind; + + /* + * Note: setup_buffer_kind_and_compression() will + * figure out uc_kind_v or return an error + */ + } + + return 0; +} + u64 nvgpu_vm_map(struct vm_gk20a *vm, struct dma_buf *dmabuf, u64 offset_align, u32 flags, - int kind, + s16 compr_kind, + s16 incompr_kind, bool user_mapped, int rw_flag, u64 buffer_offset, @@ -203,6 +238,22 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, u32 ctag_offset; enum nvgpu_aperture aperture; + /* + * The kind used as part of the key for map caching. HW may + * actually be programmed with the fallback kind in case the + * key kind is compressible but we're out of comptags. + */ + s16 map_key_kind; + + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { + if (compr_kind != NV_KIND_INVALID) + map_key_kind = compr_kind; + else + map_key_kind = incompr_kind; + } else { + map_key_kind = compr_kind; + } + if (user_mapped && vm->userspace_managed && !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { nvgpu_err(g, "non-fixed-offset mapping not available on " @@ -216,7 +267,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, if (!vm->userspace_managed) { map_offset = __nvgpu_vm_find_mapping( vm, dmabuf, offset_align, - flags, kind, + flags, map_key_kind, user_mapped, rw_flag); if (map_offset) { nvgpu_mutex_release(&vm->update_gmmu_lock); @@ -239,12 +290,10 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, goto clean_up; } - if (kind >= NV_KIND_ATTR_SIZE) { - err = -EINVAL; + err = setup_bfr_kind_fields(&bfr, compr_kind, incompr_kind, flags); + if (err) goto clean_up; - } else { - bfr.kind_v = (u8)kind; - } + bfr.size = dmabuf->size; sgl = bfr.sgt->sgl; @@ -306,10 +355,15 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, err = gk20a_alloc_comptags(g, dev, dmabuf, ctag_allocator, bfr.ctag_lines); - if (err) { - /* ok to fall back here if we ran out */ + if (unlikely(err)) { /* TBD: we can partially alloc ctags as well... */ - bfr.kind_v = bfr.uc_kind_v; + if (bfr.use_uc_kind_v) { + /* no comptags, but fallback kind available */ + bfr.kind_v = bfr.uc_kind_v; + } else { + nvgpu_err(g, "comptag alloc failed and no fallback kind specified"); + goto clean_up; + } } else { gk20a_get_comptags(dev, dmabuf, &comptags); @@ -371,7 +425,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; mapped_buffer->vm = vm; mapped_buffer->flags = flags; - mapped_buffer->kind = kind; + mapped_buffer->kind = map_key_kind; mapped_buffer->va_allocated = va_allocated; mapped_buffer->user_mapped = user_mapped ? 1 : 0; mapped_buffer->own_mem_ref = user_mapped; diff --git a/drivers/gpu/nvgpu/common/linux/vm_priv.h b/drivers/gpu/nvgpu/common/linux/vm_priv.h index 4f6b10bbd..1eadf1d0e 100644 --- a/drivers/gpu/nvgpu/common/linux/vm_priv.h +++ b/drivers/gpu/nvgpu/common/linux/vm_priv.h @@ -34,7 +34,9 @@ struct buffer_attrs { u32 ctag_allocated_lines; int pgsz_idx; u8 kind_v; + bool use_kind_v; u8 uc_kind_v; + bool use_uc_kind_v; bool ctag_user_mappable; }; @@ -42,19 +44,43 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, struct dma_buf *dmabuf, u64 offset_align, u32 flags, - int kind, + + /* + * compressible kind if + * NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is + * specified, otherwise just the kind + */ + s16 compr_kind, + + /* + * incompressible kind if + * NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is + * specified, otherwise ignored + */ + s16 incompr_kind, + bool user_mapped, int rw_flag, u64 buffer_offset, u64 mapping_size, struct vm_gk20a_mapping_batch *mapping_batch); -/* Note: batch may be NULL if map op is not part of a batch */ +/* + * Notes: + * - Batch may be NULL if map op is not part of a batch. + * - If NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is set, + * compr_kind and incompr_kind work as explained in nvgpu.h. + * - If NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is NOT set, + * compr_kind holds the kind and kernel will figure out whether + * it is a compressible or incompressible kind. If compressible, kernel will + * also figure out the incompressible counterpart or return an error. + */ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, int dmabuf_fd, u64 *offset_align, u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ - int kind, + s16 compr_kind, + s16 incompr_kind, u64 buffer_offset, u64 mapping_size, struct vm_gk20a_mapping_batch *batch); diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 19433df9a..2b37a62af 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -1929,6 +1929,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, 0, 0, 0, + 0, args->mapping_size, NULL); if (err) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 455fa2382..e9948c161 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -411,7 +411,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes(); } - gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS; + gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS | + NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL; if (IS_ENABLED(CONFIG_SYNC)) gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index a6507d2dc..97b7aa800 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -822,14 +822,11 @@ static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset, nvgpu_mutex_release(&vm->update_gmmu_lock); } -int setup_buffer_kind_and_compression(struct vm_gk20a *vm, - u32 flags, - struct buffer_attrs *bfr, - enum gmmu_pgsz_gk20a pgsz_idx) +static int setup_kind_legacy(struct vm_gk20a *vm, struct buffer_attrs *bfr, + bool *pkind_compressible) { - bool kind_compressible; struct gk20a *g = gk20a_from_vm(vm); - int ctag_granularity = g->ops.fb.compression_page_size(g); + bool kind_compressible; if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) bfr->kind_v = gmmu_pte_kind_pitch_v(); @@ -840,7 +837,7 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm, } bfr->uc_kind_v = gmmu_pte_kind_invalid_v(); - /* find a suitable uncompressed kind if it becomes necessary later */ + /* find a suitable incompressible kind if it becomes necessary later */ kind_compressible = gk20a_kind_is_compressible(bfr->kind_v); if (kind_compressible) { bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v); @@ -852,6 +849,36 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm, return -EINVAL; } } + + *pkind_compressible = kind_compressible; + return 0; +} + +int setup_buffer_kind_and_compression(struct vm_gk20a *vm, + u32 flags, + struct buffer_attrs *bfr, + enum gmmu_pgsz_gk20a pgsz_idx) +{ + bool kind_compressible; + struct gk20a *g = gk20a_from_vm(vm); + int ctag_granularity = g->ops.fb.compression_page_size(g); + + if (!bfr->use_kind_v) + bfr->kind_v = gmmu_pte_kind_invalid_v(); + if (!bfr->use_uc_kind_v) + bfr->uc_kind_v = gmmu_pte_kind_invalid_v(); + + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { + kind_compressible = (bfr->kind_v != gmmu_pte_kind_invalid_v()); + if (!kind_compressible) + bfr->kind_v = bfr->uc_kind_v; + } else { + int err = setup_kind_legacy(vm, bfr, &kind_compressible); + + if (err) + return err; + } + /* comptags only supported for suitable kinds, 128KB pagesize */ if (kind_compressible && vm->gmmu_page_sizes[pgsz_idx] < g->ops.fb.compressible_page_size(g)) { @@ -865,6 +892,9 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm, else bfr->ctag_lines = 0; + bfr->use_kind_v = (bfr->kind_v != gmmu_pte_kind_invalid_v()); + bfr->use_uc_kind_v = (bfr->uc_kind_v != gmmu_pte_kind_invalid_v()); + return 0; } @@ -1649,7 +1679,8 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, int dmabuf_fd, u64 *offset_align, u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ - int kind, + s16 compr_kind, + s16 incompr_kind, u64 buffer_offset, u64 mapping_size, struct vm_gk20a_mapping_batch *batch) @@ -1690,7 +1721,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, } ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align, - flags, kind, true, + flags, compr_kind, incompr_kind, true, gk20a_mem_flag_none, buffer_offset, mapping_size, diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 5b1d606a5..9c883a93b 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -146,6 +146,9 @@ struct nvgpu_gpu_zbc_query_table_args { #define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE (1ULL << 20) /* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */ #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21) +/* Direct PTE kind control is supported (map_buffer_ex) */ +#define NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL (1ULL << 23) + struct nvgpu_gpu_characteristics { __u32 arch; @@ -1751,6 +1754,7 @@ struct nvgpu_as_map_buffer_args { #define NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT (1 << 4) #define NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE (1 << 5) #define NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS (1 << 6) +#define NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL (1 << 8) __u32 reserved; /* in */ __u32 dmabuf_fd; /* in */ __u32 page_size; /* inout, 0:= best fit to buffer */ @@ -1760,7 +1764,7 @@ struct nvgpu_as_map_buffer_args { } o_a; }; - /* +/* * Mapping dmabuf fds into an address space: * * The caller requests a mapping to a particular page 'kind'. @@ -1772,7 +1776,37 @@ struct nvgpu_as_map_buffer_args { struct nvgpu_as_map_buffer_ex_args { __u32 flags; /* in/out */ #define NV_KIND_DEFAULT -1 - __s32 kind; /* in (-1 represents default) */ + union { + /* + * Used if NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL + * is not set. + */ + __s32 kind; /* in (-1 represents default) */ + + /* + * If NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is + * set, this is used, instead. The rules are: + * + * - If both compr_kind and incompr_kind are set + * (i.e., value is other than NV_KIND_INVALID), + * kernel attempts to use compr_kind first. + * + * - If compr_kind is set, kernel attempts to allocate + * comptags for the buffer. If successful, + * compr_kind is used as the PTE kind. + * + * - If incompr_kind is set, kernel uses incompr_kind + * as the PTE kind. Comptags are not allocated. + * + * - If neither compr_kind or incompr_kind is set, the + * map call will fail. + */ +#define NV_KIND_INVALID -1 + struct { + __s16 compr_kind; + __s16 incompr_kind; + }; + }; __u32 dmabuf_fd; /* in */ __u32 page_size; /* inout, 0:= best fit to buffer */