gpu: nvgpu: allocate comptags and store metadata in REGISTER_BUFFER ioctl

To enable userspace query about comptags allocation status of a buffer,
comptags are to be allocated only during buffer registration done by
nvrm_gpu. Earlier, they were allocated during map.

nvrm_gpu will be sending metadata blob to be associated with the buffer.
This will have to be stored in the dmabuf privdata for all the buffers
registered by nvrm_gpu.

This patch moves the privdata allocation to buffer registration ioctl.

Remove g->mm.priv_lock as it is not needed now. This lock was added
to protect dmabuf private data setup. That private data is now
handled through dmabuf->ops and setup of dmabuf->ops is done
under dmabuf->lock.

To support legacy userspace, this patch still allocates comptags on
demand on map calls for unregistered buffers.

Bug 200586313

Change-Id: I88b2ca04c733dd02a84bcbf05060bddc00147790
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2480761
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Sagar Kamble
2020-12-10 18:35:32 +05:30
committed by mobile promotions
parent 8a4b72a4aa
commit ed16377983
16 changed files with 456 additions and 170 deletions

View File

@@ -1218,21 +1218,20 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm,
struct gk20a_comptags comptags = { 0 }; struct gk20a_comptags comptags = { 0 };
/* /*
* Get the comptags state, alloc if necessary * Get the comptags state
*/ */
err = gk20a_alloc_or_get_comptags(g, os_buf, gk20a_get_comptags(os_buf, &comptags);
&g->cbc->comp_tags,
&comptags); if (!comptags.allocated) {
if (err != 0) { nvgpu_log_info(g, "compr kind %d map requested without comptags allocated, allocating...",
binfo_ptr->compr_kind);
/* /*
* This is an irrecoverable failure and we need to * best effort only, we don't really care if
* abort. In particular, it is not safe to proceed with * this fails
* the incompressible fallback, since we cannot not mark
* our alloc failure anywere. Later we would retry
* allocation and break compressible map aliasing.
*/ */
nvgpu_err(g, "Error %d setting up comptags", err); gk20a_alloc_or_get_comptags(
goto ret_err; g, os_buf, &g->cbc->comp_tags, &comptags);
} }
/* /*
@@ -1261,9 +1260,9 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm,
} }
/* /*
* Store the ctag offset for later use if we got the comptags * Store the ctag offset for later use if we have the comptags
*/ */
if (comptags.lines != 0U) { if (comptags.enabled) {
ctag_offset = comptags.offset; ctag_offset = comptags.offset;
} }
} }

View File

@@ -174,42 +174,6 @@ nvgpu_vm_remap_mpool_find(struct nvgpu_rbtree_node *root,
return nvgpu_vm_remap_mpool_from_tree_entry(node); return nvgpu_vm_remap_mpool_from_tree_entry(node);
} }
#ifdef CONFIG_NVGPU_COMPRESSION
/*
* Ensure that compression resources are allocated to the specified
* physical memory buffer.
*/
static inline int nvgpu_vm_remap_ensure_comptags(struct vm_gk20a *vm,
struct nvgpu_vm_remap_os_buffer *remap_os_buf)
{
struct gk20a *g = gk20a_from_vm(vm);
struct gk20a_comptags comptags = { 0 };
struct nvgpu_os_buffer *os_buf = &remap_os_buf->os_buf;
int err = 0;
err = gk20a_alloc_or_get_comptags(g, os_buf,
&g->cbc->comp_tags,
&comptags);
if (err != 0) {
nvgpu_err(g, "cannot alloc comptags: %d", err);
return err;
}
if (comptags.needs_clear) {
nvgpu_assert(g->ops.cbc.ctrl != NULL);
if (gk20a_comptags_start_clear(os_buf)) {
err = g->ops.cbc.ctrl(g, nvgpu_cbc_op_clear,
comptags.offset,
(comptags.offset +
comptags.lines - 1U));
gk20a_comptags_finish_clear(os_buf, err == 0);
}
}
return err;
}
#endif
/* /*
* Validate that the specified remap operation resides within the target * Validate that the specified remap operation resides within the target
* virtual memory pool. * virtual memory pool.
@@ -263,10 +227,41 @@ static int nvgpu_vm_remap_validate_map(struct vm_gk20a *vm,
#ifdef CONFIG_NVGPU_COMPRESSION #ifdef CONFIG_NVGPU_COMPRESSION
if (op->compr_kind != NVGPU_KIND_INVALID) { if (op->compr_kind != NVGPU_KIND_INVALID) {
if (nvgpu_vm_remap_ensure_comptags(vm, remap_os_buf)) {
/* inform caller there are no more compbits */ struct gk20a *g = gk20a_from_vm(vm);
struct gk20a_comptags comptags = { 0 };
/*
* Note: this is best-effort only
*/
gk20a_alloc_or_get_comptags(g, &remap_os_buf->os_buf,
&g->cbc->comp_tags, &comptags);
if (!comptags.enabled) {
/* inform the caller that the buffer does not
* have compbits */
op->compr_kind = NVGPU_KIND_INVALID; op->compr_kind = NVGPU_KIND_INVALID;
} }
if (comptags.needs_clear) {
nvgpu_assert(g->ops.cbc.ctrl != NULL);
if (gk20a_comptags_start_clear(&remap_os_buf->os_buf)) {
int err = g->ops.cbc.ctrl(
g, nvgpu_cbc_op_clear,
comptags.offset,
(comptags.offset +
comptags.lines - 1U));
gk20a_comptags_finish_clear(
&remap_os_buf->os_buf, err == 0);
if (err) {
nvgpu_err(
g, "Comptags clear failed: %d",
err);
op->compr_kind = NVGPU_KIND_INVALID;
}
}
}
} }
#endif #endif

View File

@@ -47,7 +47,6 @@ static int vgpu_init_mm_setup_sw(struct gk20a *g)
} }
nvgpu_mutex_init(&mm->tlb_lock); nvgpu_mutex_init(&mm->tlb_lock);
nvgpu_mutex_init(&mm->priv_lock);
mm->g = g; mm->g = g;

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -43,6 +43,17 @@ struct gk20a_comptags {
*/ */
bool allocated; bool allocated;
/*
* "enabled" indicates if the comptags are in use for mapping the buffer
* as compressible. Buffer comptags usage may be changed at runtime by
* buffer metadata re-registration. However, comptags once allocated
* are freed only on freeing the buffer.
*
* "enabled" implies that comptags have been successfully allocated
* (offset > 0 and lines > 0)
*/
bool enabled;
/* /*
* Do comptags need to be cleared before mapping? * Do comptags need to be cleared before mapping?
*/ */
@@ -77,13 +88,16 @@ void gk20a_comptaglines_free(struct gk20a_comptag_allocator *allocator,
* Defined by OS specific code since comptags are stored in a highly OS specific * Defined by OS specific code since comptags are stored in a highly OS specific
* way. * way.
*/ */
int gk20a_alloc_or_get_comptags(struct gk20a *g, int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf,
struct nvgpu_os_buffer *buf, struct gk20a_comptag_allocator *allocator);
struct gk20a_comptag_allocator *allocator,
struct gk20a_comptags *comptags);
void gk20a_get_comptags(struct nvgpu_os_buffer *buf, void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
struct gk20a_comptags *comptags); struct gk20a_comptags *comptags);
/* legacy support */
void gk20a_alloc_or_get_comptags(struct gk20a *g,
struct nvgpu_os_buffer *buf,
struct gk20a_comptag_allocator *allocator,
struct gk20a_comptags *comptags);
/* /*
* These functions must be used to synchronize comptags clear. The usage: * These functions must be used to synchronize comptags clear. The usage:
* *

View File

@@ -215,6 +215,7 @@ struct gk20a;
DEFINE_FLAG(NVGPU_CLK_ARB_ENABLED, "CLK_ARB support"), \ DEFINE_FLAG(NVGPU_CLK_ARB_ENABLED, "CLK_ARB support"), \
DEFINE_FLAG(NVGPU_SUPPORT_VAB_ENABLED, "VAB feature supported"), \ DEFINE_FLAG(NVGPU_SUPPORT_VAB_ENABLED, "VAB feature supported"), \
DEFINE_FLAG(NVGPU_SUPPORT_ROP_IN_GPC, "ROP is part of GPC"), \ DEFINE_FLAG(NVGPU_SUPPORT_ROP_IN_GPC, "ROP is part of GPC"), \
DEFINE_FLAG(NVGPU_SUPPORT_BUFFER_METADATA, "Buffer metadata support"), \
DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"), DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"),
/** /**

View File

@@ -327,8 +327,6 @@ struct mm_gk20a {
struct nvgpu_mutex l2_op_lock; struct nvgpu_mutex l2_op_lock;
/** Lock to serialize TLB operations. */ /** Lock to serialize TLB operations. */
struct nvgpu_mutex tlb_lock; struct nvgpu_mutex tlb_lock;
/** Lock to serialize mm internal operations. */
struct nvgpu_mutex priv_lock;
struct nvgpu_mem bar2_desc; struct nvgpu_mem bar2_desc;

View File

@@ -1020,6 +1020,7 @@ __acquires(&l->cde_app->mutex)
__releases(&l->cde_app->mutex) __releases(&l->cde_app->mutex)
{ {
struct gk20a *g = &l->g; struct gk20a *g = &l->g;
struct gk20a_dmabuf_priv *priv = NULL;
struct gk20a_cde_ctx *cde_ctx = NULL; struct gk20a_cde_ctx *cde_ctx = NULL;
struct nvgpu_cbc *cbc = g->cbc; struct nvgpu_cbc *cbc = g->cbc;
struct gk20a_comptags comptags; struct gk20a_comptags comptags;
@@ -1069,10 +1070,13 @@ __releases(&l->cde_app->mutex)
/* First, map the buffer to local va */ /* First, map the buffer to local va */
/* ensure that the compbits buffer has drvdata */ /* ensure that the compbits buffer has drvdata */
err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, priv = gk20a_dma_buf_get_drvdata(compbits_scatter_buf,
dev_from_gk20a(g)); dev_from_gk20a(g));
if (err) if (!priv) {
err = -EINVAL;
nvgpu_err(g, "Compbits buffer has no metadata");
goto exit_idle; goto exit_idle;
}
/* compbits don't start at page aligned offset, so we need to align /* compbits don't start at page aligned offset, so we need to align
the region to be mapped */ the region to be mapped */
@@ -1749,11 +1753,22 @@ int gk20a_prepare_compressible_read(
struct gk20a_buffer_state *state; struct gk20a_buffer_state *state;
struct dma_buf *dmabuf; struct dma_buf *dmabuf;
u32 missing_bits; u32 missing_bits;
struct gk20a_dmabuf_priv *priv = NULL;
dmabuf = dma_buf_get(buffer_fd); dmabuf = dma_buf_get(buffer_fd);
if (IS_ERR(dmabuf)) if (IS_ERR(dmabuf))
return -EINVAL; return -EINVAL;
/* this function is nop for incompressible buffers */
priv = gk20a_dma_buf_get_drvdata(dmabuf, dev_from_gk20a(g));
if (!priv || !priv->comptags.enabled) {
nvgpu_log_info(g, "comptags not enabled for the buffer");
*valid_compbits = NVGPU_GPU_COMPBITS_NONE;
*zbc_color = 0;
dma_buf_put(dmabuf);
return 0;
}
err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
if (err) { if (err) {
dma_buf_put(dmabuf); dma_buf_put(dmabuf);
@@ -1811,6 +1826,7 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
int err; int err;
struct gk20a_buffer_state *state; struct gk20a_buffer_state *state;
struct dma_buf *dmabuf; struct dma_buf *dmabuf;
struct gk20a_dmabuf_priv *priv = NULL;
dmabuf = dma_buf_get(buffer_fd); dmabuf = dma_buf_get(buffer_fd);
if (IS_ERR(dmabuf)) { if (IS_ERR(dmabuf)) {
@@ -1818,6 +1834,14 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
return -EINVAL; return -EINVAL;
} }
/* this function is nop for incompressible buffers */
priv = gk20a_dma_buf_get_drvdata(dmabuf, dev_from_gk20a(g));
if (!priv || !priv->comptags.enabled) {
nvgpu_log_info(g, "comptags not allocated for the buffer");
dma_buf_put(dmabuf);
return 0;
}
err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
if (err) { if (err) {
nvgpu_err(g, "could not get state from dmabuf"); nvgpu_err(g, "could not get state from dmabuf");

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
@@ -26,8 +26,8 @@
void gk20a_get_comptags(struct nvgpu_os_buffer *buf, void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
struct gk20a_comptags *comptags) struct gk20a_comptags *comptags)
{ {
struct gk20a_dmabuf_priv *priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, struct gk20a_dmabuf_priv *priv =
buf->dev); gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev);
if (!comptags) if (!comptags)
return; return;
@@ -42,80 +42,85 @@ void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
nvgpu_mutex_release(&priv->lock); nvgpu_mutex_release(&priv->lock);
} }
int gk20a_alloc_or_get_comptags(struct gk20a *g, int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf,
struct nvgpu_os_buffer *buf, struct gk20a_comptag_allocator *allocator)
struct gk20a_comptag_allocator *allocator,
struct gk20a_comptags *comptags)
{ {
int ret = 0;
struct gk20a_dmabuf_priv *priv = NULL; struct gk20a_dmabuf_priv *priv = NULL;
u32 offset;
int err;
u64 ctag_granularity; u64 ctag_granularity;
u32 lines; u32 offset = 0;
u32 lines = 0;
ret = gk20a_dmabuf_alloc_drvdata(buf->dmabuf, buf->dev); int err;
if (ret) {
nvgpu_err(g, "error allocating comptags priv data");
return ret;
}
priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev);
if (!priv)
return -ENOSYS;
nvgpu_mutex_acquire(&priv->lock);
if (priv->comptags.allocated) {
/*
* already allocated
*/
*comptags = priv->comptags;
err = 0;
goto exit_locked;
}
ctag_granularity = g->ops.fb.compression_page_size(g); ctag_granularity = g->ops.fb.compression_page_size(g);
lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity); lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity);
/* 0-sized buffer? Shouldn't occur, but let's check anyways. */ /* 0-sized buffer? Shouldn't occur, but let's check anyways. */
if (lines < 1) { if (lines < 1) {
err = -EINVAL; nvgpu_err(g, "zero sized buffer. comptags not allocated.");
goto exit_locked; return -EINVAL;
} }
err = gk20a_comptaglines_alloc(allocator, &offset, lines);
if (err != 0) {
/*
* Note: we must prevent reallocation attempt in case the
* allocation failed. Otherwise a later successful allocation
* could cause corruption because interop endpoints have
* conflicting compression states with the maps
*/
nvgpu_err(g, "Comptags allocation failed %d", err);
lines = 0;
}
/* Note that privdata is not validated here as it is available here. */
priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev);
/* store the allocator so we can use it when we free the ctags */ /* store the allocator so we can use it when we free the ctags */
priv->comptag_allocator = allocator; priv->comptag_allocator = allocator;
err = gk20a_comptaglines_alloc(allocator, &offset, lines);
if (!err) {
priv->comptags.offset = offset;
priv->comptags.lines = lines;
priv->comptags.needs_clear = true;
} else {
priv->comptags.offset = 0;
priv->comptags.lines = 0;
priv->comptags.needs_clear = false;
}
/* priv->comptags.offset = offset;
* We don't report an error here if comptag alloc failed. The priv->comptags.lines = lines;
* caller will simply fallback to incompressible kinds. It priv->comptags.needs_clear = (lines != 0);
* would not be safe to re-allocate comptags anyways on
* successive calls, as that would break map aliasing.
*/
err = 0;
priv->comptags.allocated = true; priv->comptags.allocated = true;
priv->comptags.enabled = (lines != 0);
*comptags = priv->comptags;
exit_locked:
nvgpu_mutex_release(&priv->lock);
return err; return err;
} }
void gk20a_alloc_or_get_comptags(struct gk20a *g,
struct nvgpu_os_buffer *buf,
struct gk20a_comptag_allocator *allocator,
struct gk20a_comptags *comptags)
{
struct gk20a_dmabuf_priv *priv = NULL;
int err;
if (!comptags)
return;
err = gk20a_dmabuf_alloc_or_get_drvdata(buf->dmabuf, buf->dev, &priv);
if (err != 0) {
(void) memset(comptags, 0, sizeof(*comptags));
return;
}
nvgpu_mutex_acquire(&priv->lock);
/*
* Try to allocate only if metadata is not locked. However, we
* don't re-enable explicitly disabled comptags.
*/
if (!priv->registered || priv->mutable_metadata) {
if (!priv->comptags.allocated) {
gk20a_alloc_comptags(g, buf, allocator);
}
}
*comptags = priv->comptags;
nvgpu_mutex_release(&priv->lock);
}
bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf) bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
{ {
struct gk20a_dmabuf_priv *priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, struct gk20a_dmabuf_priv *priv = gk20a_dma_buf_get_drvdata(buf->dmabuf,

View File

@@ -91,11 +91,10 @@ static void nvgpu_dma_buf_release(struct dma_buf *dmabuf)
dmabuf->ops->release(dmabuf); dmabuf->ops->release(dmabuf);
} }
/* This function must be called with priv->lock held */
static int gk20a_dma_buf_set_drvdata(struct dma_buf *dmabuf, struct device *device, static int gk20a_dma_buf_set_drvdata(struct dma_buf *dmabuf, struct device *device,
struct gk20a_dmabuf_priv *priv) struct gk20a_dmabuf_priv *priv)
{ {
nvgpu_mutex_acquire(&priv->lock);
priv->dmabuf = dmabuf; priv->dmabuf = dmabuf;
mutex_lock(&dmabuf->lock); mutex_lock(&dmabuf->lock);
@@ -109,8 +108,6 @@ static int gk20a_dma_buf_set_drvdata(struct dma_buf *dmabuf, struct device *devi
dmabuf->ops = &priv->local_ops; dmabuf->ops = &priv->local_ops;
mutex_unlock(&dmabuf->lock); mutex_unlock(&dmabuf->lock);
nvgpu_mutex_release(&priv->lock);
return 0; return 0;
} }
@@ -210,6 +207,10 @@ void gk20a_mm_delete_priv(struct gk20a_dmabuf_priv *priv)
/* Remove this entry from the global tracking list */ /* Remove this entry from the global tracking list */
nvgpu_list_del(&priv->list); nvgpu_list_del(&priv->list);
if (priv->metadata_blob) {
nvgpu_kfree(g, priv->metadata_blob);
}
nvgpu_kfree(g, priv); nvgpu_kfree(g, priv);
} }
@@ -225,46 +226,50 @@ void gk20a_dma_buf_priv_list_clear(struct nvgpu_os_linux *l)
nvgpu_mutex_release(&l->dmabuf_priv_list_lock); nvgpu_mutex_release(&l->dmabuf_priv_list_lock);
} }
int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev) int gk20a_dmabuf_alloc_or_get_drvdata(struct dma_buf *dmabuf, struct device *dev,
struct gk20a_dmabuf_priv **priv_ptr)
{ {
struct gk20a *g = gk20a_get_platform(dev)->g; struct gk20a *g = gk20a_get_platform(dev)->g;
struct gk20a_dmabuf_priv *priv;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_dmabuf_priv *priv;
int err = 0;
priv = gk20a_dma_buf_get_drvdata(dmabuf, dev); priv = gk20a_dma_buf_get_drvdata(dmabuf, dev);
if (priv) {
if (likely(priv)) nvgpu_log_info(g, "Buffer metadata already allocated");
return 0; *priv_ptr = priv;
goto out;
nvgpu_mutex_acquire(&g->mm.priv_lock); }
priv = gk20a_dma_buf_get_drvdata(dmabuf, dev);
if (priv)
goto priv_exist_or_err;
priv = nvgpu_kzalloc(g, sizeof(*priv)); priv = nvgpu_kzalloc(g, sizeof(*priv));
if (!priv) { if (!priv) {
priv = ERR_PTR(-ENOMEM); err = -ENOMEM;
goto priv_exist_or_err; nvgpu_err(g, "Buffer metadata allocation failed");
goto out;
} }
nvgpu_mutex_init(&priv->lock); nvgpu_mutex_init(&priv->lock);
nvgpu_init_list_node(&priv->states);
nvgpu_mutex_acquire(&priv->lock);
priv->g = g; priv->g = g;
gk20a_dma_buf_set_drvdata(dmabuf, dev, priv);
nvgpu_init_list_node(&priv->list); nvgpu_init_list_node(&priv->list);
nvgpu_init_list_node(&priv->states);
gk20a_dma_buf_set_drvdata(dmabuf, dev, priv);
nvgpu_mutex_release(&priv->lock);
/* Append this priv to the global tracker */ /* Append this priv to the global tracker */
nvgpu_mutex_acquire(&l->dmabuf_priv_list_lock); nvgpu_mutex_acquire(&l->dmabuf_priv_list_lock);
nvgpu_list_add_tail(&l->dmabuf_priv_list, &priv->list); nvgpu_list_add_tail(&l->dmabuf_priv_list, &priv->list);
nvgpu_mutex_release(&l->dmabuf_priv_list_lock); nvgpu_mutex_release(&l->dmabuf_priv_list_lock);
priv_exist_or_err: *priv_ptr = priv;
nvgpu_mutex_release(&g->mm.priv_lock);
if (IS_ERR(priv))
return -ENOMEM;
return 0; out:
return err;
} }
int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
@@ -280,10 +285,6 @@ int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
return -EINVAL; return -EINVAL;
} }
err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
if (err)
return err;
priv = gk20a_dma_buf_get_drvdata(dmabuf, dev); priv = gk20a_dma_buf_get_drvdata(dmabuf, dev);
if (!priv) { if (!priv) {
nvgpu_do_assert(); nvgpu_do_assert();

View File

@@ -74,9 +74,13 @@ struct gk20a_dmabuf_priv {
struct gk20a_comptag_allocator *comptag_allocator; struct gk20a_comptag_allocator *comptag_allocator;
struct gk20a_comptags comptags; struct gk20a_comptags comptags;
struct nvgpu_list_node states; u8 *metadata_blob;
u32 metadata_blob_size;
u64 buffer_id; bool registered;
bool mutable_metadata;
struct nvgpu_list_node states;
/* Used for retrieving the associated dmabuf from the priv */ /* Used for retrieving the associated dmabuf from the priv */
struct dma_buf *dmabuf; struct dma_buf *dmabuf;
@@ -109,7 +113,8 @@ void nvgpu_mm_unpin(struct device *dev,
void gk20a_mm_delete_priv(struct gk20a_dmabuf_priv *priv); void gk20a_mm_delete_priv(struct gk20a_dmabuf_priv *priv);
int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); int gk20a_dmabuf_alloc_or_get_drvdata(struct dma_buf *dmabuf, struct device *dev,
struct gk20a_dmabuf_priv **priv_ptr);
int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
u64 offset, struct gk20a_buffer_state **state); u64 offset, struct gk20a_buffer_state **state);

View File

@@ -266,7 +266,6 @@ static void nvgpu_init_mm_vars(struct gk20a *g)
platform->force_128K_pmu_vm); platform->force_128K_pmu_vm);
nvgpu_mutex_init(&g->mm.tlb_lock); nvgpu_mutex_init(&g->mm.tlb_lock);
nvgpu_mutex_init(&g->mm.priv_lock);
} }
int nvgpu_probe(struct gk20a *g, int nvgpu_probe(struct gk20a *g,

View File

@@ -23,6 +23,7 @@
#include <nvgpu/pmu/clk/clk.h> #include <nvgpu/pmu/clk/clk.h>
#include <nvgpu/bitops.h> #include <nvgpu/bitops.h>
#include <nvgpu/comptags.h>
#include <nvgpu/kmem.h> #include <nvgpu/kmem.h>
#include <nvgpu/nvhost.h> #include <nvgpu/nvhost.h>
#include <nvgpu/bug.h> #include <nvgpu/bug.h>
@@ -57,6 +58,7 @@
#include <nvgpu/user_fence.h> #include <nvgpu/user_fence.h>
#include <nvgpu/nvgpu_init.h> #include <nvgpu/nvgpu_init.h>
#include <nvgpu/grmgr.h> #include <nvgpu/grmgr.h>
#include <nvgpu/string.h>
#include "ioctl_ctrl.h" #include "ioctl_ctrl.h"
#include "ioctl_dbg.h" #include "ioctl_dbg.h"
@@ -65,6 +67,7 @@
#include "ioctl_channel.h" #include "ioctl_channel.h"
#include "ioctl.h" #include "ioctl.h"
#include "dmabuf_priv.h"
#include "platform_gk20a.h" #include "platform_gk20a.h"
#include "os_linux.h" #include "os_linux.h"
#include "channel.h" #include "channel.h"
@@ -300,6 +303,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED}, NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED},
{NVGPU_GPU_FLAGS_SUPPORT_VAB, {NVGPU_GPU_FLAGS_SUPPORT_VAB,
NVGPU_SUPPORT_VAB_ENABLED}, NVGPU_SUPPORT_VAB_ENABLED},
{NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA,
NVGPU_SUPPORT_BUFFER_METADATA},
}; };
static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)
@@ -1911,6 +1916,190 @@ out:
} }
#endif #endif
#ifdef CONFIG_NVGPU_COMPRESSION
static int nvgpu_handle_comptags_control(struct gk20a *g,
struct dma_buf *dmabuf,
struct gk20a_dmabuf_priv *priv,
u8 comptags_alloc_control)
{
struct nvgpu_os_buffer os_buf = {0};
int err = 0;
if (comptags_alloc_control == NVGPU_GPU_COMPTAGS_ALLOC_NONE) {
if (priv->comptags.allocated) {
/*
* Just mark the comptags as disabled. Comptags will be
* freed on freeing the buffer.
*/
priv->comptags.enabled = false;
nvgpu_log_info(g, "Comptags disabled.");
}
return 0;
}
/* Allocate the comptags if requested/required. */
if (priv->comptags.allocated) {
priv->comptags.enabled = priv->comptags.lines > 0;
if (priv->comptags.enabled) {
nvgpu_log_info(g, "Comptags enabled.");
return 0;
} else {
if (comptags_alloc_control ==
NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
nvgpu_err(g,
"Previous allocation has failed, could not enable comptags (required)");
return -ENOMEM;
} else {
nvgpu_log_info(g,
"Previous allocation has failed, could not enable comptags (requested)");
return 0;
}
}
}
os_buf.dmabuf = dmabuf;
os_buf.dev = dev_from_gk20a(g);
err = gk20a_alloc_comptags(g, &os_buf, &g->cbc->comp_tags);
if (err != 0) {
if (comptags_alloc_control ==
NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
nvgpu_err(g, "Comptags allocation (required) failed (%d)",
err);
} else {
nvgpu_err(g, "Comptags allocation (requested) failed (%d)",
err);
err = 0;
}
}
return err;
}
static int nvgpu_gpu_ioctl_register_buffer(struct gk20a *g,
struct nvgpu_gpu_register_buffer_args *args)
{
struct gk20a_dmabuf_priv *priv = NULL;
bool mutable_metadata = false;
bool modify_metadata = false;
struct dma_buf *dmabuf;
u8 *blob_copy = NULL;
int err = 0;
nvgpu_log_fn(g, " ");
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_BUFFER_METADATA)) {
nvgpu_err(g, "Buffer metadata not supported");
return -EINVAL;
}
if (args->metadata_size > NVGPU_GPU_REGISTER_BUFFER_METADATA_MAX_SIZE) {
nvgpu_err(g, "Invalid metadata blob size");
return -EINVAL;
}
if (args->comptags_alloc_control > NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
nvgpu_err(g, "Invalid comptags_alloc_control");
return -EINVAL;
}
nvgpu_log_info(g, "dmabuf_fd: %d, comptags control: %u, metadata size: %u, flags: %u",
args->dmabuf_fd, args->comptags_alloc_control,
args->metadata_size, args->flags);
mutable_metadata = (args->flags & NVGPU_GPU_REGISTER_BUFFER_FLAGS_MUTABLE) != 0;
modify_metadata = (args->flags & NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY) != 0;
dmabuf = dma_buf_get(args->dmabuf_fd);
if (IS_ERR(dmabuf)) {
nvgpu_warn(g, "%s: fd %d is not a dmabuf",
__func__, args->dmabuf_fd);
return PTR_ERR(dmabuf);
}
/*
* Allocate or get the buffer metadata state.
*/
err = gk20a_dmabuf_alloc_or_get_drvdata(
dmabuf, dev_from_gk20a(g), &priv);
if (err != 0) {
nvgpu_err(g, "Error allocating buffer metadata %d", err);
goto out;
}
nvgpu_mutex_acquire(&priv->lock);
/* Check for valid buffer metadata re-registration */
if (priv->registered) {
if (!modify_metadata) {
nvgpu_err(g, "attempt to modify buffer metadata without NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY");
err = -EINVAL;
goto out_priv_unlock;
} else if (!priv->mutable_metadata) {
nvgpu_err(g, "attempt to redefine immutable metadata");
err = -EINVAL;
goto out_priv_unlock;
}
}
/* Allocate memory for the metadata blob */
blob_copy = nvgpu_kzalloc(g, args->metadata_size);
if (!blob_copy) {
nvgpu_err(g, "Error allocating memory for blob");
err = -ENOMEM;
goto out_priv_unlock;
}
/* Copy the metadata blob */
if (copy_from_user(blob_copy,
(void __user *) args->metadata_addr,
args->metadata_size)) {
err = -EFAULT;
nvgpu_err(g, "Error copying buffer metadata blob");
goto out_priv_unlock;
}
/* Comptags allocation */
err = nvgpu_handle_comptags_control(g, dmabuf, priv,
args->comptags_alloc_control);
if (err != 0) {
nvgpu_err(g, "Comptags alloc control failed %d", err);
goto out_priv_unlock;
}
/* All done, update metadata blob */
nvgpu_kfree(g, priv->metadata_blob);
priv->metadata_blob = blob_copy;
priv->metadata_blob_size = args->metadata_size;
blob_copy = NULL;
/* Mark registered and update mutability */
priv->registered = true;
priv->mutable_metadata = mutable_metadata;
/* Output variables */
args->flags = 0;
if (priv->comptags.enabled) {
args->flags |=
NVGPU_GPU_REGISTER_BUFFER_FLAGS_COMPTAGS_ALLOCATED;
}
nvgpu_log_info(g, "buffer registered: mutable: %s, metadata size: %u, flags: 0x%8x",
priv->mutable_metadata ? "yes" : "no", priv->metadata_blob_size,
args->flags);
out_priv_unlock:
nvgpu_mutex_release(&priv->lock);
out:
dma_buf_put(dmabuf);
nvgpu_kfree(g, blob_copy);
return err;
}
#endif
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{ {
struct gk20a_ctrl_priv *priv = filp->private_data; struct gk20a_ctrl_priv *priv = filp->private_data;
@@ -2268,6 +2457,13 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
(struct nvgpu_gpu_set_deterministic_opts_args *)buf); (struct nvgpu_gpu_set_deterministic_opts_args *)buf);
break; break;
#ifdef CONFIG_NVGPU_COMPRESSION
case NVGPU_GPU_IOCTL_REGISTER_BUFFER:
err = nvgpu_gpu_ioctl_register_buffer(g,
(struct nvgpu_gpu_register_buffer_args *)buf);
break;
#endif
default: default:
nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
err = -ENOTTY; err = -ENOTTY;

View File

@@ -326,6 +326,9 @@ void gk20a_init_linux_characteristics(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_REMAP, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_REMAP, true);
#ifdef CONFIG_NVGPU_COMPRESSION
nvgpu_set_enabled(g, NVGPU_SUPPORT_BUFFER_METADATA, true);
#endif
if (!IS_ENABLED(CONFIG_NVGPU_SYNCFD_NONE)) { if (!IS_ENABLED(CONFIG_NVGPU_SYNCFD_NONE)) {
nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true);

View File

@@ -68,12 +68,6 @@ int nvgpu_vm_remap_os_buf_get(struct vm_gk20a *vm,
goto clean_up; goto clean_up;
} }
err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
if (err) {
nvgpu_warn(g, "failed to alloc drvdata");
goto clean_up;
}
if ((op->flags & NVGPU_VM_REMAP_OP_FLAGS_ACCESS_NO_WRITE) != 0) { if ((op->flags & NVGPU_VM_REMAP_OP_FLAGS_ACCESS_NO_WRITE) != 0) {
dmabuf_direction = DMA_TO_DEVICE; dmabuf_direction = DMA_TO_DEVICE;
} else { } else {

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -30,14 +30,19 @@ void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
{ {
} }
int gk20a_alloc_or_get_comptags(struct gk20a *g, int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf,
struct nvgpu_os_buffer *buf, struct gk20a_comptag_allocator *allocator)
struct gk20a_comptag_allocator *allocator,
struct gk20a_comptags *comptags)
{ {
return -ENODEV; return -ENODEV;
} }
void gk20a_alloc_or_get_comptags(struct gk20a *g,
struct nvgpu_os_buffer *buf,
struct gk20a_comptag_allocator *allocator,
struct gk20a_comptags *comptags)
{
}
bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf) bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
{ {
return false; return false;

View File

@@ -186,7 +186,11 @@ struct nvgpu_gpu_zbc_query_table_args {
#define NVGPU_GPU_FLAGS_SUPPORT_SMPC_GLOBAL_MODE (1ULL << 48) #define NVGPU_GPU_FLAGS_SUPPORT_SMPC_GLOBAL_MODE (1ULL << 48)
/* Retrieving contents of graphics context is supported */ /* Retrieving contents of graphics context is supported */
#define NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT (1ULL << 49) #define NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT (1ULL << 49)
/* Additional buffer metadata association supported */ /*
* Note: Additional buffer metadata association support. This feature is only
* for supporting legacy userspace APIs and for compatibility with desktop
* RM behavior. Usage of this feature should be avoided.
*/
#define NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA (1ULL << 50) #define NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA (1ULL << 50)
/* Flag to indicate whether configuring L2_MAXEVICTLAST_WAYS is supported */ /* Flag to indicate whether configuring L2_MAXEVICTLAST_WAYS is supported */
#define NVGPU_GPU_FLAGS_L2_MAX_WAYS_EVICT_LAST_ENABLED (1ULL << 51) #define NVGPU_GPU_FLAGS_L2_MAX_WAYS_EVICT_LAST_ENABLED (1ULL << 51)
@@ -940,6 +944,25 @@ struct nvgpu_gpu_set_deterministic_opts_args {
__u64 channels; /* in */ __u64 channels; /* in */
}; };
/*
* register buffer information ioctl.
*
* Note: Additional metadata is associated with the buffer only for supporting
* legacy userspace APIs and for compatibility with desktop RM. Usage of this
* API should be avoided.
*
* This ioctl allocates comptags for the buffer if requested/required
* by libnvrm_gpu and associates metadata blob sent by libnvrm_gpu
* with the buffer in the buffer privdata.
*
* return 0 on success, < 0 in case of failure.
* retval -EINVAL if the enabled flag NVGPU_SUPPORT_BUFFER_METADATA
* isn't set or invalid params.
* retval -ENOMEM in case of sufficient memory is not available for
* privdata or comptags.
* retval -EFAULT if the metadata blob copy fails.
*/
/* /*
* NVGPU_GPU_COMPTAGS_ALLOC_NONE: Specified to not allocate comptags * NVGPU_GPU_COMPTAGS_ALLOC_NONE: Specified to not allocate comptags
* for the buffer. * for the buffer.
@@ -967,20 +990,46 @@ struct nvgpu_gpu_set_deterministic_opts_args {
*/ */
#define NVGPU_GPU_REGISTER_BUFFER_FLAGS_COMPTAGS_ALLOCATED (1U << 0) #define NVGPU_GPU_REGISTER_BUFFER_FLAGS_COMPTAGS_ALLOCATED (1U << 0)
/*
* Specify buffer registration as mutable. This allows modifying the buffer
* attributes by calling this IOCTL again with NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY.
*
* Mutable registration is intended for private buffers where the physical
* memory allocation may be recycled. Buffers intended for interoperability
* should be specified without this flag.
*/
#define NVGPU_GPU_REGISTER_BUFFER_FLAGS_MUTABLE (1U << 1)
/*
* Re-register the buffer. When this flag is set, the buffer comptags state,
* metadata binary blob, and other attributes are re-defined.
*
* This flag may be set only when the buffer was previously registered as
* mutable. This flag is ignored when the buffer is registered for the
* first time.
*
* If the buffer previously had comptags and the re-registration also specifies
* comptags, the associated comptags are not cleared.
*
*/
#define NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY (1U << 2)
/* Maximum size of the user supplied buffer metadata */ /* Maximum size of the user supplied buffer metadata */
#define NVGPU_GPU_REGISTER_BUFFER_METADATA_MAX_SIZE 256U #define NVGPU_GPU_REGISTER_BUFFER_METADATA_MAX_SIZE 256U
/* /*
* REGISTER_BUFFER ioctl is supported when the enabled flag * register buffer ioctl arguments struct.
* NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA is set. It will *
* return -EINVAL if that enabled flag isn't enabled. * Note: Additional metadata is associated with the buffer only for supporting
* legacy userspace APIs and for compatibility with desktop RM. Usage of this
* API should be avoided.
*/ */
struct nvgpu_gpu_register_buffer_args { struct nvgpu_gpu_register_buffer_args {
/* [in] dmabuf fd */ /* [in] dmabuf fd */
__s32 dmabuf_fd; __s32 dmabuf_fd;
/* /*
* [in] Compression tags allocation control. * [in] Compression tags allocation control.
* *
* Set to one of the NVGPU_GPU_COMPTAGS_ALLOC_* values. See the * Set to one of the NVGPU_GPU_COMPTAGS_ALLOC_* values. See the
* description of the values for semantics of this field. * description of the values for semantics of this field.
@@ -990,7 +1039,7 @@ struct nvgpu_gpu_register_buffer_args {
__u16 reserved1; __u16 reserved1;
/* /*
* [in] Pointer to buffer metadata. * [in] Pointer to buffer metadata.
* *
* This is a binary blob populated by nvrm_gpu that will be associated * This is a binary blob populated by nvrm_gpu that will be associated
* with the dmabuf. * with the dmabuf.
@@ -1000,9 +1049,8 @@ struct nvgpu_gpu_register_buffer_args {
/* [in] buffer metadata size */ /* [in] buffer metadata size */
__u32 metadata_size; __u32 metadata_size;
/* /*
* [out] flags. * [in/out] flags.
* *
* See description of NVGPU_GPU_REGISTER_BUFFER_FLAGS_* for semantics * See description of NVGPU_GPU_REGISTER_BUFFER_FLAGS_* for semantics
* of this field. * of this field.