diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 5bc677c02..4884305d9 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -1218,21 +1218,20 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm, struct gk20a_comptags comptags = { 0 }; /* - * Get the comptags state, alloc if necessary + * Get the comptags state */ - err = gk20a_alloc_or_get_comptags(g, os_buf, - &g->cbc->comp_tags, - &comptags); - if (err != 0) { + gk20a_get_comptags(os_buf, &comptags); + + if (!comptags.allocated) { + nvgpu_log_info(g, "compr kind %d map requested without comptags allocated, allocating...", + binfo_ptr->compr_kind); + /* - * This is an irrecoverable failure and we need to - * abort. In particular, it is not safe to proceed with - * the incompressible fallback, since we cannot not mark - * our alloc failure anywere. Later we would retry - * allocation and break compressible map aliasing. + * best effort only, we don't really care if + * this fails */ - nvgpu_err(g, "Error %d setting up comptags", err); - goto ret_err; + gk20a_alloc_or_get_comptags( + g, os_buf, &g->cbc->comp_tags, &comptags); } /* @@ -1261,9 +1260,9 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm, } /* - * Store the ctag offset for later use if we got the comptags + * Store the ctag offset for later use if we have the comptags */ - if (comptags.lines != 0U) { + if (comptags.enabled) { ctag_offset = comptags.offset; } } diff --git a/drivers/gpu/nvgpu/common/mm/vm_remap.c b/drivers/gpu/nvgpu/common/mm/vm_remap.c index ccd729ea8..92fffb368 100644 --- a/drivers/gpu/nvgpu/common/mm/vm_remap.c +++ b/drivers/gpu/nvgpu/common/mm/vm_remap.c @@ -174,42 +174,6 @@ nvgpu_vm_remap_mpool_find(struct nvgpu_rbtree_node *root, return nvgpu_vm_remap_mpool_from_tree_entry(node); } -#ifdef CONFIG_NVGPU_COMPRESSION -/* - * Ensure that compression resources are allocated to the specified - * physical memory buffer. - */ -static inline int nvgpu_vm_remap_ensure_comptags(struct vm_gk20a *vm, - struct nvgpu_vm_remap_os_buffer *remap_os_buf) -{ - struct gk20a *g = gk20a_from_vm(vm); - struct gk20a_comptags comptags = { 0 }; - struct nvgpu_os_buffer *os_buf = &remap_os_buf->os_buf; - int err = 0; - - err = gk20a_alloc_or_get_comptags(g, os_buf, - &g->cbc->comp_tags, - &comptags); - if (err != 0) { - nvgpu_err(g, "cannot alloc comptags: %d", err); - return err; - } - - if (comptags.needs_clear) { - nvgpu_assert(g->ops.cbc.ctrl != NULL); - if (gk20a_comptags_start_clear(os_buf)) { - err = g->ops.cbc.ctrl(g, nvgpu_cbc_op_clear, - comptags.offset, - (comptags.offset + - comptags.lines - 1U)); - gk20a_comptags_finish_clear(os_buf, err == 0); - } - } - - return err; -} -#endif - /* * Validate that the specified remap operation resides within the target * virtual memory pool. @@ -263,10 +227,41 @@ static int nvgpu_vm_remap_validate_map(struct vm_gk20a *vm, #ifdef CONFIG_NVGPU_COMPRESSION if (op->compr_kind != NVGPU_KIND_INVALID) { - if (nvgpu_vm_remap_ensure_comptags(vm, remap_os_buf)) { - /* inform caller there are no more compbits */ + + struct gk20a *g = gk20a_from_vm(vm); + struct gk20a_comptags comptags = { 0 }; + + /* + * Note: this is best-effort only + */ + gk20a_alloc_or_get_comptags(g, &remap_os_buf->os_buf, + &g->cbc->comp_tags, &comptags); + + if (!comptags.enabled) { + /* inform the caller that the buffer does not + * have compbits */ op->compr_kind = NVGPU_KIND_INVALID; } + + if (comptags.needs_clear) { + nvgpu_assert(g->ops.cbc.ctrl != NULL); + if (gk20a_comptags_start_clear(&remap_os_buf->os_buf)) { + int err = g->ops.cbc.ctrl( + g, nvgpu_cbc_op_clear, + comptags.offset, + (comptags.offset + + comptags.lines - 1U)); + gk20a_comptags_finish_clear( + &remap_os_buf->os_buf, err == 0); + + if (err) { + nvgpu_err( + g, "Comptags clear failed: %d", + err); + op->compr_kind = NVGPU_KIND_INVALID; + } + } + } } #endif diff --git a/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c index 4694cf907..044e6bd52 100644 --- a/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c @@ -47,7 +47,6 @@ static int vgpu_init_mm_setup_sw(struct gk20a *g) } nvgpu_mutex_init(&mm->tlb_lock); - nvgpu_mutex_init(&mm->priv_lock); mm->g = g; diff --git a/drivers/gpu/nvgpu/include/nvgpu/comptags.h b/drivers/gpu/nvgpu/include/nvgpu/comptags.h index bb5eb7d10..4593a81cf 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/comptags.h +++ b/drivers/gpu/nvgpu/include/nvgpu/comptags.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -43,6 +43,17 @@ struct gk20a_comptags { */ bool allocated; + /* + * "enabled" indicates if the comptags are in use for mapping the buffer + * as compressible. Buffer comptags usage may be changed at runtime by + * buffer metadata re-registration. However, comptags once allocated + * are freed only on freeing the buffer. + * + * "enabled" implies that comptags have been successfully allocated + * (offset > 0 and lines > 0) + */ + bool enabled; + /* * Do comptags need to be cleared before mapping? */ @@ -77,13 +88,16 @@ void gk20a_comptaglines_free(struct gk20a_comptag_allocator *allocator, * Defined by OS specific code since comptags are stored in a highly OS specific * way. */ -int gk20a_alloc_or_get_comptags(struct gk20a *g, - struct nvgpu_os_buffer *buf, - struct gk20a_comptag_allocator *allocator, - struct gk20a_comptags *comptags); +int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf, + struct gk20a_comptag_allocator *allocator); void gk20a_get_comptags(struct nvgpu_os_buffer *buf, struct gk20a_comptags *comptags); +/* legacy support */ +void gk20a_alloc_or_get_comptags(struct gk20a *g, + struct nvgpu_os_buffer *buf, + struct gk20a_comptag_allocator *allocator, + struct gk20a_comptags *comptags); /* * These functions must be used to synchronize comptags clear. The usage: * diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index ac7bc3b8d..c4ea23e9f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -215,6 +215,7 @@ struct gk20a; DEFINE_FLAG(NVGPU_CLK_ARB_ENABLED, "CLK_ARB support"), \ DEFINE_FLAG(NVGPU_SUPPORT_VAB_ENABLED, "VAB feature supported"), \ DEFINE_FLAG(NVGPU_SUPPORT_ROP_IN_GPC, "ROP is part of GPC"), \ + DEFINE_FLAG(NVGPU_SUPPORT_BUFFER_METADATA, "Buffer metadata support"), \ DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"), /** diff --git a/drivers/gpu/nvgpu/include/nvgpu/mm.h b/drivers/gpu/nvgpu/include/nvgpu/mm.h index 7e8872a63..8fbcea65e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/mm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h @@ -327,8 +327,6 @@ struct mm_gk20a { struct nvgpu_mutex l2_op_lock; /** Lock to serialize TLB operations. */ struct nvgpu_mutex tlb_lock; - /** Lock to serialize mm internal operations. */ - struct nvgpu_mutex priv_lock; struct nvgpu_mem bar2_desc; diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index c1a4961d1..289ca2c79 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c @@ -1020,6 +1020,7 @@ __acquires(&l->cde_app->mutex) __releases(&l->cde_app->mutex) { struct gk20a *g = &l->g; + struct gk20a_dmabuf_priv *priv = NULL; struct gk20a_cde_ctx *cde_ctx = NULL; struct nvgpu_cbc *cbc = g->cbc; struct gk20a_comptags comptags; @@ -1069,10 +1070,13 @@ __releases(&l->cde_app->mutex) /* First, map the buffer to local va */ /* ensure that the compbits buffer has drvdata */ - err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, + priv = gk20a_dma_buf_get_drvdata(compbits_scatter_buf, dev_from_gk20a(g)); - if (err) + if (!priv) { + err = -EINVAL; + nvgpu_err(g, "Compbits buffer has no metadata"); goto exit_idle; + } /* compbits don't start at page aligned offset, so we need to align the region to be mapped */ @@ -1749,11 +1753,22 @@ int gk20a_prepare_compressible_read( struct gk20a_buffer_state *state; struct dma_buf *dmabuf; u32 missing_bits; + struct gk20a_dmabuf_priv *priv = NULL; dmabuf = dma_buf_get(buffer_fd); if (IS_ERR(dmabuf)) return -EINVAL; + /* this function is nop for incompressible buffers */ + priv = gk20a_dma_buf_get_drvdata(dmabuf, dev_from_gk20a(g)); + if (!priv || !priv->comptags.enabled) { + nvgpu_log_info(g, "comptags not enabled for the buffer"); + *valid_compbits = NVGPU_GPU_COMPBITS_NONE; + *zbc_color = 0; + dma_buf_put(dmabuf); + return 0; + } + err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); if (err) { dma_buf_put(dmabuf); @@ -1811,6 +1826,7 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, int err; struct gk20a_buffer_state *state; struct dma_buf *dmabuf; + struct gk20a_dmabuf_priv *priv = NULL; dmabuf = dma_buf_get(buffer_fd); if (IS_ERR(dmabuf)) { @@ -1818,6 +1834,14 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, return -EINVAL; } + /* this function is nop for incompressible buffers */ + priv = gk20a_dma_buf_get_drvdata(dmabuf, dev_from_gk20a(g)); + if (!priv || !priv->comptags.enabled) { + nvgpu_log_info(g, "comptags not allocated for the buffer"); + dma_buf_put(dmabuf); + return 0; + } + err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); if (err) { nvgpu_err(g, "could not get state from dmabuf"); diff --git a/drivers/gpu/nvgpu/os/linux/comptags.c b/drivers/gpu/nvgpu/os/linux/comptags.c index 0b99015ce..6f9c172f3 100644 --- a/drivers/gpu/nvgpu/os/linux/comptags.c +++ b/drivers/gpu/nvgpu/os/linux/comptags.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -26,8 +26,8 @@ void gk20a_get_comptags(struct nvgpu_os_buffer *buf, struct gk20a_comptags *comptags) { - struct gk20a_dmabuf_priv *priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, - buf->dev); + struct gk20a_dmabuf_priv *priv = + gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev); if (!comptags) return; @@ -42,80 +42,85 @@ void gk20a_get_comptags(struct nvgpu_os_buffer *buf, nvgpu_mutex_release(&priv->lock); } -int gk20a_alloc_or_get_comptags(struct gk20a *g, - struct nvgpu_os_buffer *buf, - struct gk20a_comptag_allocator *allocator, - struct gk20a_comptags *comptags) +int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf, + struct gk20a_comptag_allocator *allocator) { - int ret = 0; - struct gk20a_dmabuf_priv *priv = NULL; - u32 offset; - int err; u64 ctag_granularity; - u32 lines; - - ret = gk20a_dmabuf_alloc_drvdata(buf->dmabuf, buf->dev); - if (ret) { - nvgpu_err(g, "error allocating comptags priv data"); - return ret; - } - - priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev); - if (!priv) - return -ENOSYS; - - nvgpu_mutex_acquire(&priv->lock); - - if (priv->comptags.allocated) { - /* - * already allocated - */ - *comptags = priv->comptags; - - err = 0; - goto exit_locked; - } + u32 offset = 0; + u32 lines = 0; + int err; ctag_granularity = g->ops.fb.compression_page_size(g); lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity); /* 0-sized buffer? Shouldn't occur, but let's check anyways. */ if (lines < 1) { - err = -EINVAL; - goto exit_locked; + nvgpu_err(g, "zero sized buffer. comptags not allocated."); + return -EINVAL; } + err = gk20a_comptaglines_alloc(allocator, &offset, lines); + if (err != 0) { + /* + * Note: we must prevent reallocation attempt in case the + * allocation failed. Otherwise a later successful allocation + * could cause corruption because interop endpoints have + * conflicting compression states with the maps + */ + nvgpu_err(g, "Comptags allocation failed %d", err); + lines = 0; + } + + /* Note that privdata is not validated here as it is available here. */ + priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev); + /* store the allocator so we can use it when we free the ctags */ priv->comptag_allocator = allocator; - err = gk20a_comptaglines_alloc(allocator, &offset, lines); - if (!err) { - priv->comptags.offset = offset; - priv->comptags.lines = lines; - priv->comptags.needs_clear = true; - } else { - priv->comptags.offset = 0; - priv->comptags.lines = 0; - priv->comptags.needs_clear = false; - } - /* - * We don't report an error here if comptag alloc failed. The - * caller will simply fallback to incompressible kinds. It - * would not be safe to re-allocate comptags anyways on - * successive calls, as that would break map aliasing. - */ - err = 0; + priv->comptags.offset = offset; + priv->comptags.lines = lines; + priv->comptags.needs_clear = (lines != 0); priv->comptags.allocated = true; - - *comptags = priv->comptags; - -exit_locked: - nvgpu_mutex_release(&priv->lock); + priv->comptags.enabled = (lines != 0); return err; } +void gk20a_alloc_or_get_comptags(struct gk20a *g, + struct nvgpu_os_buffer *buf, + struct gk20a_comptag_allocator *allocator, + struct gk20a_comptags *comptags) +{ + struct gk20a_dmabuf_priv *priv = NULL; + int err; + + if (!comptags) + return; + + err = gk20a_dmabuf_alloc_or_get_drvdata(buf->dmabuf, buf->dev, &priv); + + if (err != 0) { + (void) memset(comptags, 0, sizeof(*comptags)); + return; + } + + nvgpu_mutex_acquire(&priv->lock); + + /* + * Try to allocate only if metadata is not locked. However, we + * don't re-enable explicitly disabled comptags. + */ + if (!priv->registered || priv->mutable_metadata) { + if (!priv->comptags.allocated) { + gk20a_alloc_comptags(g, buf, allocator); + } + } + + *comptags = priv->comptags; + nvgpu_mutex_release(&priv->lock); +} + bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf) { struct gk20a_dmabuf_priv *priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf_priv.c b/drivers/gpu/nvgpu/os/linux/dmabuf_priv.c index 1c42c919e..b3d7febe7 100644 --- a/drivers/gpu/nvgpu/os/linux/dmabuf_priv.c +++ b/drivers/gpu/nvgpu/os/linux/dmabuf_priv.c @@ -91,11 +91,10 @@ static void nvgpu_dma_buf_release(struct dma_buf *dmabuf) dmabuf->ops->release(dmabuf); } +/* This function must be called with priv->lock held */ static int gk20a_dma_buf_set_drvdata(struct dma_buf *dmabuf, struct device *device, struct gk20a_dmabuf_priv *priv) { - nvgpu_mutex_acquire(&priv->lock); - priv->dmabuf = dmabuf; mutex_lock(&dmabuf->lock); @@ -109,8 +108,6 @@ static int gk20a_dma_buf_set_drvdata(struct dma_buf *dmabuf, struct device *devi dmabuf->ops = &priv->local_ops; mutex_unlock(&dmabuf->lock); - nvgpu_mutex_release(&priv->lock); - return 0; } @@ -210,6 +207,10 @@ void gk20a_mm_delete_priv(struct gk20a_dmabuf_priv *priv) /* Remove this entry from the global tracking list */ nvgpu_list_del(&priv->list); + if (priv->metadata_blob) { + nvgpu_kfree(g, priv->metadata_blob); + } + nvgpu_kfree(g, priv); } @@ -225,46 +226,50 @@ void gk20a_dma_buf_priv_list_clear(struct nvgpu_os_linux *l) nvgpu_mutex_release(&l->dmabuf_priv_list_lock); } -int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev) +int gk20a_dmabuf_alloc_or_get_drvdata(struct dma_buf *dmabuf, struct device *dev, + struct gk20a_dmabuf_priv **priv_ptr) { struct gk20a *g = gk20a_get_platform(dev)->g; - struct gk20a_dmabuf_priv *priv; struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_dmabuf_priv *priv; + int err = 0; priv = gk20a_dma_buf_get_drvdata(dmabuf, dev); - - if (likely(priv)) - return 0; - - nvgpu_mutex_acquire(&g->mm.priv_lock); - priv = gk20a_dma_buf_get_drvdata(dmabuf, dev); - if (priv) - goto priv_exist_or_err; + if (priv) { + nvgpu_log_info(g, "Buffer metadata already allocated"); + *priv_ptr = priv; + goto out; + } priv = nvgpu_kzalloc(g, sizeof(*priv)); if (!priv) { - priv = ERR_PTR(-ENOMEM); - goto priv_exist_or_err; + err = -ENOMEM; + nvgpu_err(g, "Buffer metadata allocation failed"); + goto out; } nvgpu_mutex_init(&priv->lock); - nvgpu_init_list_node(&priv->states); + + nvgpu_mutex_acquire(&priv->lock); + priv->g = g; - gk20a_dma_buf_set_drvdata(dmabuf, dev, priv); nvgpu_init_list_node(&priv->list); + nvgpu_init_list_node(&priv->states); + + gk20a_dma_buf_set_drvdata(dmabuf, dev, priv); + + nvgpu_mutex_release(&priv->lock); /* Append this priv to the global tracker */ nvgpu_mutex_acquire(&l->dmabuf_priv_list_lock); nvgpu_list_add_tail(&l->dmabuf_priv_list, &priv->list); nvgpu_mutex_release(&l->dmabuf_priv_list_lock); -priv_exist_or_err: - nvgpu_mutex_release(&g->mm.priv_lock); - if (IS_ERR(priv)) - return -ENOMEM; + *priv_ptr = priv; - return 0; +out: + return err; } int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, @@ -280,10 +285,6 @@ int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, return -EINVAL; } - err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev); - if (err) - return err; - priv = gk20a_dma_buf_get_drvdata(dmabuf, dev); if (!priv) { nvgpu_do_assert(); diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf_priv.h b/drivers/gpu/nvgpu/os/linux/dmabuf_priv.h index 3df5b5d44..eb9a32c09 100644 --- a/drivers/gpu/nvgpu/os/linux/dmabuf_priv.h +++ b/drivers/gpu/nvgpu/os/linux/dmabuf_priv.h @@ -74,9 +74,13 @@ struct gk20a_dmabuf_priv { struct gk20a_comptag_allocator *comptag_allocator; struct gk20a_comptags comptags; - struct nvgpu_list_node states; + u8 *metadata_blob; + u32 metadata_blob_size; - u64 buffer_id; + bool registered; + bool mutable_metadata; + + struct nvgpu_list_node states; /* Used for retrieving the associated dmabuf from the priv */ struct dma_buf *dmabuf; @@ -109,7 +113,8 @@ void nvgpu_mm_unpin(struct device *dev, void gk20a_mm_delete_priv(struct gk20a_dmabuf_priv *priv); -int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); +int gk20a_dmabuf_alloc_or_get_drvdata(struct dma_buf *dmabuf, struct device *dev, + struct gk20a_dmabuf_priv **priv_ptr); int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, u64 offset, struct gk20a_buffer_state **state); diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c index f55caf833..5861e3969 100644 --- a/drivers/gpu/nvgpu/os/linux/driver_common.c +++ b/drivers/gpu/nvgpu/os/linux/driver_common.c @@ -266,7 +266,6 @@ static void nvgpu_init_mm_vars(struct gk20a *g) platform->force_128K_pmu_vm); nvgpu_mutex_init(&g->mm.tlb_lock); - nvgpu_mutex_init(&g->mm.priv_lock); } int nvgpu_probe(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 4d9001b8e..e5f0a1aee 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -57,6 +58,7 @@ #include #include #include +#include #include "ioctl_ctrl.h" #include "ioctl_dbg.h" @@ -65,6 +67,7 @@ #include "ioctl_channel.h" #include "ioctl.h" +#include "dmabuf_priv.h" #include "platform_gk20a.h" #include "os_linux.h" #include "channel.h" @@ -300,6 +303,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = { NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED}, {NVGPU_GPU_FLAGS_SUPPORT_VAB, NVGPU_SUPPORT_VAB_ENABLED}, + {NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA, + NVGPU_SUPPORT_BUFFER_METADATA}, }; static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) @@ -1911,6 +1916,190 @@ out: } #endif +#ifdef CONFIG_NVGPU_COMPRESSION +static int nvgpu_handle_comptags_control(struct gk20a *g, + struct dma_buf *dmabuf, + struct gk20a_dmabuf_priv *priv, + u8 comptags_alloc_control) +{ + struct nvgpu_os_buffer os_buf = {0}; + int err = 0; + + if (comptags_alloc_control == NVGPU_GPU_COMPTAGS_ALLOC_NONE) { + if (priv->comptags.allocated) { + /* + * Just mark the comptags as disabled. Comptags will be + * freed on freeing the buffer. + */ + priv->comptags.enabled = false; + nvgpu_log_info(g, "Comptags disabled."); + } + + return 0; + } + + /* Allocate the comptags if requested/required. */ + if (priv->comptags.allocated) { + priv->comptags.enabled = priv->comptags.lines > 0; + if (priv->comptags.enabled) { + nvgpu_log_info(g, "Comptags enabled."); + return 0; + } else { + if (comptags_alloc_control == + NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) { + nvgpu_err(g, + "Previous allocation has failed, could not enable comptags (required)"); + return -ENOMEM; + } else { + nvgpu_log_info(g, + "Previous allocation has failed, could not enable comptags (requested)"); + return 0; + } + } + } + + os_buf.dmabuf = dmabuf; + os_buf.dev = dev_from_gk20a(g); + + err = gk20a_alloc_comptags(g, &os_buf, &g->cbc->comp_tags); + if (err != 0) { + if (comptags_alloc_control == + NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) { + nvgpu_err(g, "Comptags allocation (required) failed (%d)", + err); + } else { + nvgpu_err(g, "Comptags allocation (requested) failed (%d)", + err); + err = 0; + } + } + + return err; +} + +static int nvgpu_gpu_ioctl_register_buffer(struct gk20a *g, + struct nvgpu_gpu_register_buffer_args *args) +{ + struct gk20a_dmabuf_priv *priv = NULL; + bool mutable_metadata = false; + bool modify_metadata = false; + struct dma_buf *dmabuf; + u8 *blob_copy = NULL; + int err = 0; + + nvgpu_log_fn(g, " "); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_BUFFER_METADATA)) { + nvgpu_err(g, "Buffer metadata not supported"); + return -EINVAL; + } + + if (args->metadata_size > NVGPU_GPU_REGISTER_BUFFER_METADATA_MAX_SIZE) { + nvgpu_err(g, "Invalid metadata blob size"); + return -EINVAL; + } + + if (args->comptags_alloc_control > NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) { + nvgpu_err(g, "Invalid comptags_alloc_control"); + return -EINVAL; + } + + nvgpu_log_info(g, "dmabuf_fd: %d, comptags control: %u, metadata size: %u, flags: %u", + args->dmabuf_fd, args->comptags_alloc_control, + args->metadata_size, args->flags); + + mutable_metadata = (args->flags & NVGPU_GPU_REGISTER_BUFFER_FLAGS_MUTABLE) != 0; + modify_metadata = (args->flags & NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY) != 0; + + dmabuf = dma_buf_get(args->dmabuf_fd); + if (IS_ERR(dmabuf)) { + nvgpu_warn(g, "%s: fd %d is not a dmabuf", + __func__, args->dmabuf_fd); + return PTR_ERR(dmabuf); + } + + /* + * Allocate or get the buffer metadata state. + */ + err = gk20a_dmabuf_alloc_or_get_drvdata( + dmabuf, dev_from_gk20a(g), &priv); + if (err != 0) { + nvgpu_err(g, "Error allocating buffer metadata %d", err); + goto out; + } + + nvgpu_mutex_acquire(&priv->lock); + + /* Check for valid buffer metadata re-registration */ + if (priv->registered) { + if (!modify_metadata) { + nvgpu_err(g, "attempt to modify buffer metadata without NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY"); + err = -EINVAL; + goto out_priv_unlock; + } else if (!priv->mutable_metadata) { + nvgpu_err(g, "attempt to redefine immutable metadata"); + err = -EINVAL; + goto out_priv_unlock; + } + } + + /* Allocate memory for the metadata blob */ + blob_copy = nvgpu_kzalloc(g, args->metadata_size); + if (!blob_copy) { + nvgpu_err(g, "Error allocating memory for blob"); + err = -ENOMEM; + goto out_priv_unlock; + } + + /* Copy the metadata blob */ + if (copy_from_user(blob_copy, + (void __user *) args->metadata_addr, + args->metadata_size)) { + err = -EFAULT; + nvgpu_err(g, "Error copying buffer metadata blob"); + goto out_priv_unlock; + } + + /* Comptags allocation */ + err = nvgpu_handle_comptags_control(g, dmabuf, priv, + args->comptags_alloc_control); + if (err != 0) { + nvgpu_err(g, "Comptags alloc control failed %d", err); + goto out_priv_unlock; + } + + /* All done, update metadata blob */ + nvgpu_kfree(g, priv->metadata_blob); + + priv->metadata_blob = blob_copy; + priv->metadata_blob_size = args->metadata_size; + blob_copy = NULL; + + /* Mark registered and update mutability */ + priv->registered = true; + priv->mutable_metadata = mutable_metadata; + + /* Output variables */ + args->flags = 0; + if (priv->comptags.enabled) { + args->flags |= + NVGPU_GPU_REGISTER_BUFFER_FLAGS_COMPTAGS_ALLOCATED; + } + + nvgpu_log_info(g, "buffer registered: mutable: %s, metadata size: %u, flags: 0x%8x", + priv->mutable_metadata ? "yes" : "no", priv->metadata_blob_size, + args->flags); + +out_priv_unlock: + nvgpu_mutex_release(&priv->lock); +out: + dma_buf_put(dmabuf); + nvgpu_kfree(g, blob_copy); + + return err; +} +#endif + long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct gk20a_ctrl_priv *priv = filp->private_data; @@ -2268,6 +2457,13 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg (struct nvgpu_gpu_set_deterministic_opts_args *)buf); break; +#ifdef CONFIG_NVGPU_COMPRESSION + case NVGPU_GPU_IOCTL_REGISTER_BUFFER: + err = nvgpu_gpu_ioctl_register_buffer(g, + (struct nvgpu_gpu_register_buffer_args *)buf); + break; +#endif + default: nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c index 9372bcb79..7b5d7d56f 100644 --- a/drivers/gpu/nvgpu/os/linux/module.c +++ b/drivers/gpu/nvgpu/os/linux/module.c @@ -326,6 +326,9 @@ void gk20a_init_linux_characteristics(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_REMAP, true); +#ifdef CONFIG_NVGPU_COMPRESSION + nvgpu_set_enabled(g, NVGPU_SUPPORT_BUFFER_METADATA, true); +#endif if (!IS_ENABLED(CONFIG_NVGPU_SYNCFD_NONE)) { nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true); diff --git a/drivers/gpu/nvgpu/os/linux/vm_remap.c b/drivers/gpu/nvgpu/os/linux/vm_remap.c index dded7e673..5cabc22e5 100644 --- a/drivers/gpu/nvgpu/os/linux/vm_remap.c +++ b/drivers/gpu/nvgpu/os/linux/vm_remap.c @@ -68,12 +68,6 @@ int nvgpu_vm_remap_os_buf_get(struct vm_gk20a *vm, goto clean_up; } - err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm)); - if (err) { - nvgpu_warn(g, "failed to alloc drvdata"); - goto clean_up; - } - if ((op->flags & NVGPU_VM_REMAP_OP_FLAGS_ACCESS_NO_WRITE) != 0) { dmabuf_direction = DMA_TO_DEVICE; } else { diff --git a/drivers/gpu/nvgpu/os/posix/posix-comptags.c b/drivers/gpu/nvgpu/os/posix/posix-comptags.c index a00246ddc..710e5cf1e 100644 --- a/drivers/gpu/nvgpu/os/posix/posix-comptags.c +++ b/drivers/gpu/nvgpu/os/posix/posix-comptags.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -30,14 +30,19 @@ void gk20a_get_comptags(struct nvgpu_os_buffer *buf, { } -int gk20a_alloc_or_get_comptags(struct gk20a *g, - struct nvgpu_os_buffer *buf, - struct gk20a_comptag_allocator *allocator, - struct gk20a_comptags *comptags) +int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf, + struct gk20a_comptag_allocator *allocator) { return -ENODEV; } +void gk20a_alloc_or_get_comptags(struct gk20a *g, + struct nvgpu_os_buffer *buf, + struct gk20a_comptag_allocator *allocator, + struct gk20a_comptags *comptags) +{ +} + bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf) { return false; diff --git a/include/uapi/linux/nvgpu-ctrl.h b/include/uapi/linux/nvgpu-ctrl.h index 9d43b6908..912ada3ab 100644 --- a/include/uapi/linux/nvgpu-ctrl.h +++ b/include/uapi/linux/nvgpu-ctrl.h @@ -186,7 +186,11 @@ struct nvgpu_gpu_zbc_query_table_args { #define NVGPU_GPU_FLAGS_SUPPORT_SMPC_GLOBAL_MODE (1ULL << 48) /* Retrieving contents of graphics context is supported */ #define NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT (1ULL << 49) -/* Additional buffer metadata association supported */ +/* + * Note: Additional buffer metadata association support. This feature is only + * for supporting legacy userspace APIs and for compatibility with desktop + * RM behavior. Usage of this feature should be avoided. + */ #define NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA (1ULL << 50) /* Flag to indicate whether configuring L2_MAXEVICTLAST_WAYS is supported */ #define NVGPU_GPU_FLAGS_L2_MAX_WAYS_EVICT_LAST_ENABLED (1ULL << 51) @@ -940,6 +944,25 @@ struct nvgpu_gpu_set_deterministic_opts_args { __u64 channels; /* in */ }; +/* + * register buffer information ioctl. + * + * Note: Additional metadata is associated with the buffer only for supporting + * legacy userspace APIs and for compatibility with desktop RM. Usage of this + * API should be avoided. + * + * This ioctl allocates comptags for the buffer if requested/required + * by libnvrm_gpu and associates metadata blob sent by libnvrm_gpu + * with the buffer in the buffer privdata. + * + * return 0 on success, < 0 in case of failure. + * retval -EINVAL if the enabled flag NVGPU_SUPPORT_BUFFER_METADATA + * isn't set or invalid params. + * retval -ENOMEM in case of sufficient memory is not available for + * privdata or comptags. + * retval -EFAULT if the metadata blob copy fails. + */ + /* * NVGPU_GPU_COMPTAGS_ALLOC_NONE: Specified to not allocate comptags * for the buffer. @@ -967,20 +990,46 @@ struct nvgpu_gpu_set_deterministic_opts_args { */ #define NVGPU_GPU_REGISTER_BUFFER_FLAGS_COMPTAGS_ALLOCATED (1U << 0) + /* + * Specify buffer registration as mutable. This allows modifying the buffer + * attributes by calling this IOCTL again with NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY. + * + * Mutable registration is intended for private buffers where the physical + * memory allocation may be recycled. Buffers intended for interoperability + * should be specified without this flag. + */ +#define NVGPU_GPU_REGISTER_BUFFER_FLAGS_MUTABLE (1U << 1) + + /* + * Re-register the buffer. When this flag is set, the buffer comptags state, + * metadata binary blob, and other attributes are re-defined. + * + * This flag may be set only when the buffer was previously registered as + * mutable. This flag is ignored when the buffer is registered for the + * first time. + * + * If the buffer previously had comptags and the re-registration also specifies + * comptags, the associated comptags are not cleared. + * + */ +#define NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY (1U << 2) + /* Maximum size of the user supplied buffer metadata */ #define NVGPU_GPU_REGISTER_BUFFER_METADATA_MAX_SIZE 256U /* - * REGISTER_BUFFER ioctl is supported when the enabled flag - * NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA is set. It will - * return -EINVAL if that enabled flag isn't enabled. + * register buffer ioctl arguments struct. + * + * Note: Additional metadata is associated with the buffer only for supporting + * legacy userspace APIs and for compatibility with desktop RM. Usage of this + * API should be avoided. */ struct nvgpu_gpu_register_buffer_args { /* [in] dmabuf fd */ __s32 dmabuf_fd; /* - * [in] Compression tags allocation control. + * [in] Compression tags allocation control. * * Set to one of the NVGPU_GPU_COMPTAGS_ALLOC_* values. See the * description of the values for semantics of this field. @@ -990,7 +1039,7 @@ struct nvgpu_gpu_register_buffer_args { __u16 reserved1; /* - * [in] Pointer to buffer metadata. + * [in] Pointer to buffer metadata. * * This is a binary blob populated by nvrm_gpu that will be associated * with the dmabuf. @@ -1000,9 +1049,8 @@ struct nvgpu_gpu_register_buffer_args { /* [in] buffer metadata size */ __u32 metadata_size; - /* - * [out] flags. + * [in/out] flags. * * See description of NVGPU_GPU_REGISTER_BUFFER_FLAGS_* for semantics * of this field.