gpu: nvgpu: allocate comptags and store metadata in REGISTER_BUFFER ioctl

To enable userspace query about comptags allocation status of a buffer, comptags are to be allocated only during buffer registration done by nvrm_gpu. Earlier, they were allocated during map. nvrm_gpu will be sending metadata blob to be associated with the buffer. This will have to be stored in the dmabuf privdata for all the buffers registered by nvrm_gpu. This patch moves the privdata allocation to buffer registration ioctl. Remove g->mm.priv_lock as it is not needed now. This lock was added to protect dmabuf private data setup. That private data is now handled through dmabuf->ops and setup of dmabuf->ops is done under dmabuf->lock. To support legacy userspace, this patch still allocates comptags on demand on map calls for unregistered buffers. Bug 200586313 Change-Id: I88b2ca04c733dd02a84bcbf05060bddc00147790 Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2480761 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2020-12-10 18:35:32 +05:30
parent 8a4b72a4aa
commit ed16377983
16 changed files with 456 additions and 170 deletions
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -1218,21 +1218,20 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm,
 		struct gk20a_comptags comptags = { 0 };
 		/*
-		 * Get the comptags state, alloc if necessary
+		 * Get the comptags state
 		 */
-		err = gk20a_alloc_or_get_comptags(g, os_buf,
+		gk20a_get_comptags(os_buf, &comptags);
-						  &g->cbc->comp_tags,
+
-						  &comptags);
+		if (!comptags.allocated) {
-		if (err != 0) {
+			nvgpu_log_info(g, "compr kind %d map requested without comptags allocated, allocating...",
 				       binfo_ptr->compr_kind);
 			/*
-			 * This is an irrecoverable failure and we need to
+			 * best effort only, we don't really care if
-			 * abort. In particular, it is not safe to proceed with
+			 * this fails
 			 * the incompressible fallback, since we cannot not mark
 			 * our alloc failure anywere. Later we would retry
 			 * allocation and break compressible map aliasing.
 			 */
-			nvgpu_err(g, "Error %d setting up comptags", err);
+			gk20a_alloc_or_get_comptags(
-			goto ret_err;
+				g, os_buf, &g->cbc->comp_tags, &comptags);
 		}
 		/*
@@ -1261,9 +1260,9 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm,
 		}
 		/*
-		 * Store the ctag offset for later use if we got the comptags
+		 * Store the ctag offset for later use if we have the comptags
 		 */
-		if (comptags.lines != 0U) {
+		if (comptags.enabled) {
 			ctag_offset = comptags.offset;
 		}
 	}
--- a/drivers/gpu/nvgpu/common/mm/vm_remap.c
+++ b/drivers/gpu/nvgpu/common/mm/vm_remap.c
@@ -174,42 +174,6 @@ nvgpu_vm_remap_mpool_find(struct nvgpu_rbtree_node *root,
 	return nvgpu_vm_remap_mpool_from_tree_entry(node);
 }
 #ifdef CONFIG_NVGPU_COMPRESSION
 /*
 * Ensure that compression resources are allocated to the specified
 * physical memory buffer.
 */
 static inline int nvgpu_vm_remap_ensure_comptags(struct vm_gk20a *vm,
 				struct nvgpu_vm_remap_os_buffer *remap_os_buf)
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 	struct gk20a_comptags comptags = { 0 };
 	struct nvgpu_os_buffer *os_buf = &remap_os_buf->os_buf;
 	int err = 0;
 	err = gk20a_alloc_or_get_comptags(g, os_buf,
 					&g->cbc->comp_tags,
 					&comptags);
 	if (err != 0) {
 		nvgpu_err(g, "cannot alloc comptags: %d", err);
 		return err;
 	}
 	if (comptags.needs_clear) {
 		nvgpu_assert(g->ops.cbc.ctrl != NULL);
 		if (gk20a_comptags_start_clear(os_buf)) {
 			err = g->ops.cbc.ctrl(g, nvgpu_cbc_op_clear,
 					comptags.offset,
 					(comptags.offset +
 						comptags.lines - 1U));
 			gk20a_comptags_finish_clear(os_buf, err == 0);
 		}
 	}
 	return err;
 }
 #endif
 /*
 * Validate that the specified remap operation resides within the target
 * virtual memory pool.
@@ -263,10 +227,41 @@ static int nvgpu_vm_remap_validate_map(struct vm_gk20a *vm,
 #ifdef CONFIG_NVGPU_COMPRESSION
 	if (op->compr_kind != NVGPU_KIND_INVALID) {
-		if (nvgpu_vm_remap_ensure_comptags(vm, remap_os_buf)) {
+
-			/* inform caller there are no more compbits */
+		struct gk20a *g = gk20a_from_vm(vm);
 		struct gk20a_comptags comptags = { 0 };
 		/*
 		 * Note: this is best-effort only
 		 */
 		gk20a_alloc_or_get_comptags(g, &remap_os_buf->os_buf,
 			&g->cbc->comp_tags, &comptags);
 		if (!comptags.enabled) {
 			/* inform the caller that the buffer does not
 			 * have compbits */
 			op->compr_kind = NVGPU_KIND_INVALID;
 		}
 		if (comptags.needs_clear) {
 			nvgpu_assert(g->ops.cbc.ctrl != NULL);
 			if (gk20a_comptags_start_clear(&remap_os_buf->os_buf)) {
 				int err = g->ops.cbc.ctrl(
 					g, nvgpu_cbc_op_clear,
 					comptags.offset,
 					(comptags.offset +
 					 comptags.lines - 1U));
 				gk20a_comptags_finish_clear(
 					&remap_os_buf->os_buf, err == 0);
 				if (err) {
 					nvgpu_err(
 						g, "Comptags clear failed: %d",
 						err);
 					op->compr_kind = NVGPU_KIND_INVALID;
 				}
 			}
 		}
 	}
 #endif
--- a/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c
@@ -47,7 +47,6 @@ static int vgpu_init_mm_setup_sw(struct gk20a *g)
 	}
 	nvgpu_mutex_init(&mm->tlb_lock);
 	nvgpu_mutex_init(&mm->priv_lock);
 	mm->g = g;
--- a/drivers/gpu/nvgpu/include/nvgpu/comptags.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/comptags.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -43,6 +43,17 @@ struct gk20a_comptags {
 	 */
 	bool allocated;
 	/*
 	 * "enabled" indicates if the comptags are in use for mapping the buffer
 	 * as compressible. Buffer comptags usage may be changed at runtime by
 	 * buffer metadata re-registration. However, comptags once allocated
 	 * are freed only on freeing the buffer.
 	 *
 	 * "enabled" implies that comptags have been successfully allocated
 	 * (offset > 0 and lines > 0)
 	 */
 	bool enabled;
 	/*
 	 * Do comptags need to be cleared before mapping?
 	 */
@@ -77,13 +88,16 @@ void gk20a_comptaglines_free(struct gk20a_comptag_allocator *allocator,
 * Defined by OS specific code since comptags are stored in a highly OS specific
 * way.
 */
-int gk20a_alloc_or_get_comptags(struct gk20a *g,
+int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf,
-				struct nvgpu_os_buffer *buf,
+			 struct gk20a_comptag_allocator *allocator);
 				struct gk20a_comptag_allocator *allocator,
 				struct gk20a_comptags *comptags);
 void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
 			struct gk20a_comptags *comptags);
 /* legacy support */
 void gk20a_alloc_or_get_comptags(struct gk20a *g,
 				 struct nvgpu_os_buffer *buf,
 				 struct gk20a_comptag_allocator *allocator,
 				 struct gk20a_comptags *comptags);
 /*
 * These functions must be used to synchronize comptags clear. The usage:
 *
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -215,6 +215,7 @@ struct gk20a;
 	DEFINE_FLAG(NVGPU_CLK_ARB_ENABLED, "CLK_ARB support"),          \
 	DEFINE_FLAG(NVGPU_SUPPORT_VAB_ENABLED, "VAB feature supported"), \
 	DEFINE_FLAG(NVGPU_SUPPORT_ROP_IN_GPC, "ROP is part of GPC"), \
 	DEFINE_FLAG(NVGPU_SUPPORT_BUFFER_METADATA, "Buffer metadata support"), \
 	DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"),
 /**
--- a/drivers/gpu/nvgpu/include/nvgpu/mm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h
@@ -327,8 +327,6 @@ struct mm_gk20a {
 	struct nvgpu_mutex l2_op_lock;
 	/** Lock to serialize TLB operations. */
 	struct nvgpu_mutex tlb_lock;
 	/** Lock to serialize mm internal operations. */
 	struct nvgpu_mutex priv_lock;
 	struct nvgpu_mem bar2_desc;
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -1020,6 +1020,7 @@ __acquires(&l->cde_app->mutex)
 __releases(&l->cde_app->mutex)
 {
 	struct gk20a *g = &l->g;
 	struct gk20a_dmabuf_priv *priv = NULL;
 	struct gk20a_cde_ctx *cde_ctx = NULL;
 	struct nvgpu_cbc *cbc = g->cbc;
 	struct gk20a_comptags comptags;
@@ -1069,10 +1070,13 @@ __releases(&l->cde_app->mutex)
 	/* First, map the buffer to local va */
 	/* ensure that the compbits buffer has drvdata */
-	err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf,
+	priv = gk20a_dma_buf_get_drvdata(compbits_scatter_buf,
 			dev_from_gk20a(g));
-	if (err)
+	if (!priv) {
 		err = -EINVAL;
 		nvgpu_err(g, "Compbits buffer has no metadata");
 		goto exit_idle;
 	}
 	/* compbits don't start at page aligned offset, so we need to align
 	   the region to be mapped */
@@ -1749,11 +1753,22 @@ int gk20a_prepare_compressible_read(
 	struct gk20a_buffer_state *state;
 	struct dma_buf *dmabuf;
 	u32 missing_bits;
 	struct gk20a_dmabuf_priv *priv = NULL;
 	dmabuf = dma_buf_get(buffer_fd);
 	if (IS_ERR(dmabuf))
 		return -EINVAL;
 	/* this function is nop for incompressible buffers */
 	priv = gk20a_dma_buf_get_drvdata(dmabuf, dev_from_gk20a(g));
 	if (!priv || !priv->comptags.enabled) {
 		nvgpu_log_info(g, "comptags not enabled for the buffer");
 		*valid_compbits = NVGPU_GPU_COMPBITS_NONE;
 		*zbc_color = 0;
 		dma_buf_put(dmabuf);
 		return 0;
 	}
 	err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
 	if (err) {
 		dma_buf_put(dmabuf);
@@ -1811,6 +1826,7 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
 	int err;
 	struct gk20a_buffer_state *state;
 	struct dma_buf *dmabuf;
 	struct gk20a_dmabuf_priv *priv = NULL;
 	dmabuf = dma_buf_get(buffer_fd);
 	if (IS_ERR(dmabuf)) {
@@ -1818,6 +1834,14 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
 		return -EINVAL;
 	}
 	/* this function is nop for incompressible buffers */
 	priv = gk20a_dma_buf_get_drvdata(dmabuf, dev_from_gk20a(g));
 	if (!priv || !priv->comptags.enabled) {
 		nvgpu_log_info(g, "comptags not allocated for the buffer");
 		dma_buf_put(dmabuf);
 		return 0;
 	}
 	err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
 	if (err) {
 		nvgpu_err(g, "could not get state from dmabuf");
--- a/drivers/gpu/nvgpu/os/linux/comptags.c
+++ b/drivers/gpu/nvgpu/os/linux/comptags.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -26,8 +26,8 @@
 void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
 			struct gk20a_comptags *comptags)
 {
-	struct gk20a_dmabuf_priv *priv = gk20a_dma_buf_get_drvdata(buf->dmabuf,
+	struct gk20a_dmabuf_priv *priv =
-							buf->dev);
+		gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev);
 	if (!comptags)
 		return;
@@ -42,80 +42,85 @@ void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
 	nvgpu_mutex_release(&priv->lock);
 }
-int gk20a_alloc_or_get_comptags(struct gk20a *g,
+int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf,
-				struct nvgpu_os_buffer *buf,
+			 struct gk20a_comptag_allocator *allocator)
 				struct gk20a_comptag_allocator *allocator,
 				struct gk20a_comptags *comptags)
 {
 	int ret = 0;
 	struct gk20a_dmabuf_priv *priv = NULL;
 	u32 offset;
 	int err;
 	u64 ctag_granularity;
-	u32 lines;
+	u32 offset = 0;
-
+	u32 lines = 0;
-	ret = gk20a_dmabuf_alloc_drvdata(buf->dmabuf, buf->dev);
+	int err;
 	if (ret) {
 		nvgpu_err(g, "error allocating comptags priv data");
 		return ret;
 	}
 	priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev);
 	if (!priv)
 		return -ENOSYS;
 	nvgpu_mutex_acquire(&priv->lock);
 	if (priv->comptags.allocated) {
 		/*
 		 * already allocated
 		 */
 		*comptags = priv->comptags;
 		err = 0;
 		goto exit_locked;
 	}
 	ctag_granularity = g->ops.fb.compression_page_size(g);
 	lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity);
 	/* 0-sized buffer? Shouldn't occur, but let's check anyways. */
 	if (lines < 1) {
-		err = -EINVAL;
+		nvgpu_err(g, "zero sized buffer. comptags not allocated.");
-		goto exit_locked;
+		return -EINVAL;
 	}
 	err = gk20a_comptaglines_alloc(allocator, &offset, lines);
 	if (err != 0) {
 		/*
 		 * Note: we must prevent reallocation attempt in case the
 		 * allocation failed. Otherwise a later successful allocation
 		 * could cause corruption because interop endpoints have
 		 * conflicting compression states with the maps
 		 */
 		nvgpu_err(g, "Comptags allocation failed %d", err);
 		lines = 0;
 	}
 	/* Note that privdata is not validated here as it is available here. */
 	priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev);
 	/* store the allocator so we can use it when we free the ctags */
 	priv->comptag_allocator = allocator;
 	err = gk20a_comptaglines_alloc(allocator, &offset, lines);
 	if (!err) {
 		priv->comptags.offset = offset;
 		priv->comptags.lines = lines;
 		priv->comptags.needs_clear = true;
 	} else {
 		priv->comptags.offset = 0;
 		priv->comptags.lines = 0;
 		priv->comptags.needs_clear = false;
 	}
-	/*
+	priv->comptags.offset = offset;
-	 * We don't report an error here if comptag alloc failed. The
+	priv->comptags.lines = lines;
-	 * caller will simply fallback to incompressible kinds. It
+	priv->comptags.needs_clear = (lines != 0);
 	 * would not be safe to re-allocate comptags anyways on
 	 * successive calls, as that would break map aliasing.
 	 */
 	err = 0;
 	priv->comptags.allocated = true;
-
+	priv->comptags.enabled = (lines != 0);
 	*comptags = priv->comptags;
 exit_locked:
 	nvgpu_mutex_release(&priv->lock);
 	return err;
 }
 void gk20a_alloc_or_get_comptags(struct gk20a *g,
 				 struct nvgpu_os_buffer *buf,
 				 struct gk20a_comptag_allocator *allocator,
 				 struct gk20a_comptags *comptags)
 {
 	struct gk20a_dmabuf_priv *priv = NULL;
 	int err;
 	if (!comptags)
 		return;
 	err = gk20a_dmabuf_alloc_or_get_drvdata(buf->dmabuf, buf->dev, &priv);
 	if (err != 0) {
 		(void) memset(comptags, 0, sizeof(*comptags));
 		return;
 	}
 	nvgpu_mutex_acquire(&priv->lock);
 	/*
 	 * Try to allocate only if metadata is not locked. However, we
 	 * don't re-enable explicitly disabled comptags.
 	 */
 	if (!priv->registered || priv->mutable_metadata) {
 		if (!priv->comptags.allocated) {
 			gk20a_alloc_comptags(g, buf, allocator);
 		}
 	}
 	*comptags = priv->comptags;
 	nvgpu_mutex_release(&priv->lock);
 }
 bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
 {
 	struct gk20a_dmabuf_priv *priv = gk20a_dma_buf_get_drvdata(buf->dmabuf,
--- a/drivers/gpu/nvgpu/os/linux/dmabuf_priv.c
+++ b/drivers/gpu/nvgpu/os/linux/dmabuf_priv.c
@@ -91,11 +91,10 @@ static void nvgpu_dma_buf_release(struct dma_buf *dmabuf)
 	dmabuf->ops->release(dmabuf);
 }
 /* This function must be called with priv->lock held */
 static int gk20a_dma_buf_set_drvdata(struct dma_buf *dmabuf, struct device *device,
 			struct gk20a_dmabuf_priv *priv)
 {
 	nvgpu_mutex_acquire(&priv->lock);
 	priv->dmabuf = dmabuf;
 	mutex_lock(&dmabuf->lock);
@@ -109,8 +108,6 @@ static int gk20a_dma_buf_set_drvdata(struct dma_buf *dmabuf, struct device *devi
 	dmabuf->ops = &priv->local_ops;
 	mutex_unlock(&dmabuf->lock);
 	nvgpu_mutex_release(&priv->lock);
 	return 0;
 }
@@ -210,6 +207,10 @@ void gk20a_mm_delete_priv(struct gk20a_dmabuf_priv *priv)
 	/* Remove this entry from the global tracking list */
 	nvgpu_list_del(&priv->list);
 	if (priv->metadata_blob) {
 		nvgpu_kfree(g, priv->metadata_blob);
 	}
 	nvgpu_kfree(g, priv);
 }
@@ -225,46 +226,50 @@ void gk20a_dma_buf_priv_list_clear(struct nvgpu_os_linux *l)
 	nvgpu_mutex_release(&l->dmabuf_priv_list_lock);
 }
-int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
+int gk20a_dmabuf_alloc_or_get_drvdata(struct dma_buf *dmabuf, struct device *dev,
 				      struct gk20a_dmabuf_priv **priv_ptr)
 {
 	struct gk20a *g = gk20a_get_platform(dev)->g;
 	struct gk20a_dmabuf_priv *priv;
 	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
 	struct gk20a_dmabuf_priv *priv;
 	int err = 0;
 	priv = gk20a_dma_buf_get_drvdata(dmabuf, dev);
-
+	if (priv) {
-	if (likely(priv))
+		nvgpu_log_info(g, "Buffer metadata already allocated");
-		return 0;
+		*priv_ptr = priv;
-
+		goto out;
-	nvgpu_mutex_acquire(&g->mm.priv_lock);
+	}
 	priv = gk20a_dma_buf_get_drvdata(dmabuf, dev);
 	if (priv)
 		goto priv_exist_or_err;
 	priv = nvgpu_kzalloc(g, sizeof(*priv));
 	if (!priv) {
-		priv = ERR_PTR(-ENOMEM);
+		err = -ENOMEM;
-		goto priv_exist_or_err;
+		nvgpu_err(g, "Buffer metadata allocation failed");
 		goto out;
 	}
 	nvgpu_mutex_init(&priv->lock);
-	nvgpu_init_list_node(&priv->states);
+
 	nvgpu_mutex_acquire(&priv->lock);
 	priv->g = g;
 	gk20a_dma_buf_set_drvdata(dmabuf, dev, priv);
 	nvgpu_init_list_node(&priv->list);
 	nvgpu_init_list_node(&priv->states);
 	gk20a_dma_buf_set_drvdata(dmabuf, dev, priv);
 	nvgpu_mutex_release(&priv->lock);
 	/* Append this priv to the global tracker */
 	nvgpu_mutex_acquire(&l->dmabuf_priv_list_lock);
 	nvgpu_list_add_tail(&l->dmabuf_priv_list, &priv->list);
 	nvgpu_mutex_release(&l->dmabuf_priv_list_lock);
-priv_exist_or_err:
+	*priv_ptr = priv;
 	nvgpu_mutex_release(&g->mm.priv_lock);
 	if (IS_ERR(priv))
 		return -ENOMEM;
-	return 0;
+out:
 	return err;
 }
 int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
@@ -280,10 +285,6 @@ int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
 		return -EINVAL;
 	}
 	err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
 	if (err)
 		return err;
 	priv = gk20a_dma_buf_get_drvdata(dmabuf, dev);
 	if (!priv) {
 		nvgpu_do_assert();
--- a/drivers/gpu/nvgpu/os/linux/dmabuf_priv.h
+++ b/drivers/gpu/nvgpu/os/linux/dmabuf_priv.h
@@ -74,9 +74,13 @@ struct gk20a_dmabuf_priv {
 	struct gk20a_comptag_allocator *comptag_allocator;
 	struct gk20a_comptags comptags;
-	struct nvgpu_list_node states;
+	u8 *metadata_blob;
 	u32 metadata_blob_size;
-	u64 buffer_id;
+	bool registered;
 	bool mutable_metadata;
 	struct nvgpu_list_node states;
 	/* Used for retrieving the associated dmabuf from the priv */
 	struct dma_buf *dmabuf;
@@ -109,7 +113,8 @@ void nvgpu_mm_unpin(struct device *dev,
 void gk20a_mm_delete_priv(struct gk20a_dmabuf_priv *priv);
-int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
+int gk20a_dmabuf_alloc_or_get_drvdata(struct dma_buf *dmabuf, struct device *dev,
 				      struct gk20a_dmabuf_priv **priv_ptr);
 int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
 			   u64 offset, struct gk20a_buffer_state **state);
--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -266,7 +266,6 @@ static void nvgpu_init_mm_vars(struct gk20a *g)
 			    platform->force_128K_pmu_vm);
 	nvgpu_mutex_init(&g->mm.tlb_lock);
 	nvgpu_mutex_init(&g->mm.priv_lock);
 }
 int nvgpu_probe(struct gk20a *g,
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -23,6 +23,7 @@
 #include <nvgpu/pmu/clk/clk.h>
 #include <nvgpu/bitops.h>
 #include <nvgpu/comptags.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/nvhost.h>
 #include <nvgpu/bug.h>
@@ -57,6 +58,7 @@
 #include <nvgpu/user_fence.h>
 #include <nvgpu/nvgpu_init.h>
 #include <nvgpu/grmgr.h>
 #include <nvgpu/string.h>
 #include "ioctl_ctrl.h"
 #include "ioctl_dbg.h"
@@ -65,6 +67,7 @@
 #include "ioctl_channel.h"
 #include "ioctl.h"
 #include "dmabuf_priv.h"
 #include "platform_gk20a.h"
 #include "os_linux.h"
 #include "channel.h"
@@ -300,6 +303,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
 		NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED},
 	{NVGPU_GPU_FLAGS_SUPPORT_VAB,
 		NVGPU_SUPPORT_VAB_ENABLED},
 	{NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA,
 		NVGPU_SUPPORT_BUFFER_METADATA},
 };
 static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)
@@ -1911,6 +1916,190 @@ out:
 }
 #endif
 #ifdef CONFIG_NVGPU_COMPRESSION
 static int nvgpu_handle_comptags_control(struct gk20a *g,
 					 struct dma_buf *dmabuf,
 					 struct gk20a_dmabuf_priv *priv,
 					 u8 comptags_alloc_control)
 {
 	struct nvgpu_os_buffer os_buf = {0};
 	int err = 0;
 	if (comptags_alloc_control == NVGPU_GPU_COMPTAGS_ALLOC_NONE) {
 		if (priv->comptags.allocated) {
 			/*
 			 * Just mark the comptags as disabled. Comptags will be
 			 * freed on freeing the buffer.
 			 */
 			priv->comptags.enabled = false;
 			nvgpu_log_info(g, "Comptags disabled.");
 		}
 		return 0;
 	}
 	/* Allocate the comptags if requested/required. */
 	if (priv->comptags.allocated) {
 		priv->comptags.enabled = priv->comptags.lines > 0;
 		if (priv->comptags.enabled) {
 			nvgpu_log_info(g, "Comptags enabled.");
 			return 0;
 		} else {
 			if (comptags_alloc_control ==
 					NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
 				nvgpu_err(g,
 					"Previous allocation has failed, could not enable comptags (required)");
 				return -ENOMEM;
 			} else {
 				nvgpu_log_info(g,
 					"Previous allocation has failed, could not enable comptags (requested)");
 				return 0;
 			}
 		}
 	}
 	os_buf.dmabuf = dmabuf;
 	os_buf.dev = dev_from_gk20a(g);
 	err = gk20a_alloc_comptags(g, &os_buf, &g->cbc->comp_tags);
 	if (err != 0) {
 		if (comptags_alloc_control ==
 				NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
 			nvgpu_err(g, "Comptags allocation (required) failed (%d)",
 				  err);
 		} else {
 			nvgpu_err(g, "Comptags allocation (requested) failed (%d)",
 				  err);
 			err = 0;
 		}
 	}
 	return err;
 }
 static int nvgpu_gpu_ioctl_register_buffer(struct gk20a *g,
 		struct nvgpu_gpu_register_buffer_args *args)
 {
 	struct gk20a_dmabuf_priv *priv = NULL;
 	bool mutable_metadata = false;
 	bool modify_metadata = false;
 	struct dma_buf *dmabuf;
 	u8 *blob_copy = NULL;
 	int err = 0;
 	nvgpu_log_fn(g, " ");
 	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_BUFFER_METADATA)) {
 		nvgpu_err(g, "Buffer metadata not supported");
 		return -EINVAL;
 	}
 	if (args->metadata_size > NVGPU_GPU_REGISTER_BUFFER_METADATA_MAX_SIZE) {
 		nvgpu_err(g, "Invalid metadata blob size");
 		return -EINVAL;
 	}
 	if (args->comptags_alloc_control > NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
 		nvgpu_err(g, "Invalid comptags_alloc_control");
 		return -EINVAL;
 	}
 	nvgpu_log_info(g, "dmabuf_fd: %d, comptags control: %u, metadata size: %u, flags: %u",
 		       args->dmabuf_fd, args->comptags_alloc_control,
 		       args->metadata_size, args->flags);
 	mutable_metadata = (args->flags & NVGPU_GPU_REGISTER_BUFFER_FLAGS_MUTABLE) != 0;
 	modify_metadata = (args->flags & NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY) != 0;
 	dmabuf = dma_buf_get(args->dmabuf_fd);
 	if (IS_ERR(dmabuf)) {
 		nvgpu_warn(g, "%s: fd %d is not a dmabuf",
 			   __func__, args->dmabuf_fd);
 		return PTR_ERR(dmabuf);
 	}
 	/*
 	 * Allocate or get the buffer metadata state.
 	 */
 	err = gk20a_dmabuf_alloc_or_get_drvdata(
 		dmabuf, dev_from_gk20a(g), &priv);
 	if (err != 0) {
 		nvgpu_err(g, "Error allocating buffer metadata %d", err);
 		goto out;
 	}
 	nvgpu_mutex_acquire(&priv->lock);
 	/* Check for valid buffer metadata re-registration */
 	if (priv->registered) {
 		if (!modify_metadata) {
 			nvgpu_err(g, "attempt to modify buffer metadata without NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY");
 			err = -EINVAL;
 			goto out_priv_unlock;
 		} else if (!priv->mutable_metadata) {
 			nvgpu_err(g, "attempt to redefine immutable metadata");
 			err = -EINVAL;
 			goto out_priv_unlock;
 		}
 	}
 	/* Allocate memory for the metadata blob */
 	blob_copy = nvgpu_kzalloc(g, args->metadata_size);
 	if (!blob_copy) {
 		nvgpu_err(g, "Error allocating memory for blob");
 		err = -ENOMEM;
 		goto out_priv_unlock;
 	}
 	/* Copy the metadata blob */
 	if (copy_from_user(blob_copy,
 			   (void __user *) args->metadata_addr,
 			   args->metadata_size)) {
 		err = -EFAULT;
 		nvgpu_err(g, "Error copying buffer metadata blob");
 		goto out_priv_unlock;
 	}
 	/* Comptags allocation */
 	err = nvgpu_handle_comptags_control(g, dmabuf, priv,
 					    args->comptags_alloc_control);
 	if (err != 0) {
 		nvgpu_err(g, "Comptags alloc control failed %d", err);
 		goto out_priv_unlock;
 	}
 	/* All done, update metadata blob */
 	nvgpu_kfree(g, priv->metadata_blob);
 	priv->metadata_blob = blob_copy;
 	priv->metadata_blob_size = args->metadata_size;
 	blob_copy = NULL;
 	/* Mark registered and update mutability */
 	priv->registered = true;
 	priv->mutable_metadata = mutable_metadata;
 	/* Output variables */
 	args->flags = 0;
 	if (priv->comptags.enabled) {
 		args->flags |=
 			NVGPU_GPU_REGISTER_BUFFER_FLAGS_COMPTAGS_ALLOCATED;
 	}
 	nvgpu_log_info(g, "buffer registered: mutable: %s, metadata size: %u, flags: 0x%8x",
 		       priv->mutable_metadata ? "yes" : "no", priv->metadata_blob_size,
 		       args->flags);
 out_priv_unlock:
 	nvgpu_mutex_release(&priv->lock);
 out:
 	dma_buf_put(dmabuf);
 	nvgpu_kfree(g, blob_copy);
 	return err;
 }
 #endif
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct gk20a_ctrl_priv *priv = filp->private_data;
@@ -2268,6 +2457,13 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 			(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
 		break;
 #ifdef CONFIG_NVGPU_COMPRESSION
 	case NVGPU_GPU_IOCTL_REGISTER_BUFFER:
 		err = nvgpu_gpu_ioctl_register_buffer(g,
 			(struct nvgpu_gpu_register_buffer_args *)buf);
 		break;
 #endif
 	default:
 		nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -326,6 +326,9 @@ void gk20a_init_linux_characteristics(struct gk20a *g)
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_REMAP, true);
 #ifdef CONFIG_NVGPU_COMPRESSION
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_BUFFER_METADATA, true);
 #endif
 	if (!IS_ENABLED(CONFIG_NVGPU_SYNCFD_NONE)) {
 		nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true);
--- a/drivers/gpu/nvgpu/os/linux/vm_remap.c
+++ b/drivers/gpu/nvgpu/os/linux/vm_remap.c
@@ -68,12 +68,6 @@ int nvgpu_vm_remap_os_buf_get(struct vm_gk20a *vm,
 		goto clean_up;
 	}
 	err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
 	if (err) {
 		nvgpu_warn(g, "failed to alloc drvdata");
 		goto clean_up;
 	}
 	if ((op->flags & NVGPU_VM_REMAP_OP_FLAGS_ACCESS_NO_WRITE) != 0) {
 		dmabuf_direction = DMA_TO_DEVICE;
 	} else {
--- a/drivers/gpu/nvgpu/os/posix/posix-comptags.c
+++ b/drivers/gpu/nvgpu/os/posix/posix-comptags.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -30,14 +30,19 @@ void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
 {
 }
-int gk20a_alloc_or_get_comptags(struct gk20a *g,
+int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf,
-				struct nvgpu_os_buffer *buf,
+			 struct gk20a_comptag_allocator *allocator)
 				struct gk20a_comptag_allocator *allocator,
 				struct gk20a_comptags *comptags)
 {
 	return -ENODEV;
 }
 void gk20a_alloc_or_get_comptags(struct gk20a *g,
 				 struct nvgpu_os_buffer *buf,
 				 struct gk20a_comptag_allocator *allocator,
 				 struct gk20a_comptags *comptags)
 {
 }
 bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
 {
 	return false;
--- a/include/uapi/linux/nvgpu-ctrl.h
+++ b/include/uapi/linux/nvgpu-ctrl.h
@@ -186,7 +186,11 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_SMPC_GLOBAL_MODE	(1ULL << 48)
 /* Retrieving contents of graphics context is supported */
 #define NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT	    (1ULL << 49)
-/* Additional buffer metadata association supported */
+/*
 * Note: Additional buffer metadata association support. This feature is only
 * for supporting legacy userspace APIs and for compatibility with desktop
 * RM behavior. Usage of this feature should be avoided.
 */
 #define NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA		(1ULL << 50)
 /* Flag to indicate whether configuring L2_MAXEVICTLAST_WAYS is supported */
 #define NVGPU_GPU_FLAGS_L2_MAX_WAYS_EVICT_LAST_ENABLED	(1ULL << 51)
@@ -940,6 +944,25 @@ struct nvgpu_gpu_set_deterministic_opts_args {
 	__u64 channels; /* in */
 };
 /*
 * register buffer information ioctl.
 *
 * Note: Additional metadata is associated with the buffer only for supporting
 * legacy userspace APIs and for compatibility with desktop RM. Usage of this
 * API should be avoided.
 *
 * This ioctl allocates comptags for the buffer if requested/required
 * by libnvrm_gpu and associates metadata blob sent by libnvrm_gpu
 * with the buffer in the buffer privdata.
 *
 * return 0 on success, < 0 in case of failure.
 * retval -EINVAL if the enabled flag NVGPU_SUPPORT_BUFFER_METADATA
 *               isn't set or invalid params.
 * retval -ENOMEM in case of sufficient memory is not available for
 *                privdata or comptags.
 * retval -EFAULT if the metadata blob copy fails.
 */
 /*
 * NVGPU_GPU_COMPTAGS_ALLOC_NONE: Specified to not allocate comptags
 * for the buffer.
@@ -967,20 +990,46 @@ struct nvgpu_gpu_set_deterministic_opts_args {
 */
 #define NVGPU_GPU_REGISTER_BUFFER_FLAGS_COMPTAGS_ALLOCATED	(1U << 0)
 /*
  * Specify buffer registration as mutable. This allows modifying the buffer
  * attributes by calling this IOCTL again with NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY.
  *
  * Mutable registration is intended for private buffers where the physical
  * memory allocation may be recycled. Buffers intended for interoperability
  * should be specified without this flag.
  */
 #define NVGPU_GPU_REGISTER_BUFFER_FLAGS_MUTABLE			(1U << 1)
 /*
  * Re-register the buffer. When this flag is set, the buffer comptags state,
  * metadata binary blob, and other attributes are re-defined.
  *
  * This flag may be set only when the buffer was previously registered as
  * mutable. This flag is ignored when the buffer is registered for the
  * first time.
  *
  * If the buffer previously had comptags and the re-registration also specifies
  * comptags, the associated comptags are not cleared.
  *
  */
 #define NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY			(1U << 2)
 /* Maximum size of the user supplied buffer metadata */
 #define NVGPU_GPU_REGISTER_BUFFER_METADATA_MAX_SIZE	256U
 /*
- * REGISTER_BUFFER ioctl is supported when the enabled flag
+ * register buffer ioctl arguments struct.
- * NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA is set. It will
+ *
- * return -EINVAL if that enabled flag isn't enabled.
+ * Note: Additional metadata is associated with the buffer only for supporting
 * legacy userspace APIs and for compatibility with desktop RM. Usage of this
 * API should be avoided.
 */
 struct nvgpu_gpu_register_buffer_args {
 	/* [in] dmabuf fd */
 	__s32 dmabuf_fd;
 	/*
-	 * [in]  Compression tags allocation control.
+	 * [in] Compression tags allocation control.
 	 *
 	 * Set to one of the NVGPU_GPU_COMPTAGS_ALLOC_* values. See the
 	 * description of the values for semantics of this field.
@@ -990,7 +1039,7 @@ struct nvgpu_gpu_register_buffer_args {
 	__u16 reserved1;
 	/*
-	 * [in]  Pointer to buffer metadata.
+	 * [in] Pointer to buffer metadata.
 	 *
 	 * This is a binary blob populated by nvrm_gpu that will be associated
 	 * with the dmabuf.
@@ -1000,9 +1049,8 @@ struct nvgpu_gpu_register_buffer_args {
 	/* [in] buffer metadata size */
 	__u32 metadata_size;
 	/*
-	 * [out] flags.
+	 * [in/out] flags.
 	 *
 	 * See description of NVGPU_GPU_REGISTER_BUFFER_FLAGS_* for semantics
 	 * of this field.