diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 5bc677c02..4884305d9 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -1218,21 +1218,20 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm,
 		struct gk20a_comptags comptags = { 0 };
 
 		/*
-		 * Get the comptags state, alloc if necessary
+		 * Get the comptags state
 		 */
-		err = gk20a_alloc_or_get_comptags(g, os_buf,
-						  &g->cbc->comp_tags,
-						  &comptags);
-		if (err != 0) {
+		gk20a_get_comptags(os_buf, &comptags);
+
+		if (!comptags.allocated) {
+			nvgpu_log_info(g, "compr kind %d map requested without comptags allocated, allocating...",
+				       binfo_ptr->compr_kind);
+
 			/*
-			 * This is an irrecoverable failure and we need to
-			 * abort. In particular, it is not safe to proceed with
-			 * the incompressible fallback, since we cannot not mark
-			 * our alloc failure anywere. Later we would retry
-			 * allocation and break compressible map aliasing.
+			 * best effort only, we don't really care if
+			 * this fails
 			 */
-			nvgpu_err(g, "Error %d setting up comptags", err);
-			goto ret_err;
+			gk20a_alloc_or_get_comptags(
+				g, os_buf, &g->cbc->comp_tags, &comptags);
 		}
 
 		/*
@@ -1261,9 +1260,9 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm,
 		}
 
 		/*
-		 * Store the ctag offset for later use if we got the comptags
+		 * Store the ctag offset for later use if we have the comptags
 		 */
-		if (comptags.lines != 0U) {
+		if (comptags.enabled) {
 			ctag_offset = comptags.offset;
 		}
 	}
diff --git a/drivers/gpu/nvgpu/common/mm/vm_remap.c b/drivers/gpu/nvgpu/common/mm/vm_remap.c
index ccd729ea8..92fffb368 100644
--- a/drivers/gpu/nvgpu/common/mm/vm_remap.c
+++ b/drivers/gpu/nvgpu/common/mm/vm_remap.c
@@ -174,42 +174,6 @@ nvgpu_vm_remap_mpool_find(struct nvgpu_rbtree_node *root,
 	return nvgpu_vm_remap_mpool_from_tree_entry(node);
 }
 
-#ifdef CONFIG_NVGPU_COMPRESSION
-/*
- * Ensure that compression resources are allocated to the specified
- * physical memory buffer.
- */
-static inline int nvgpu_vm_remap_ensure_comptags(struct vm_gk20a *vm,
-				struct nvgpu_vm_remap_os_buffer *remap_os_buf)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	struct gk20a_comptags comptags = { 0 };
-	struct nvgpu_os_buffer *os_buf = &remap_os_buf->os_buf;
-	int err = 0;
-
-	err = gk20a_alloc_or_get_comptags(g, os_buf,
-					&g->cbc->comp_tags,
-					&comptags);
-	if (err != 0) {
-		nvgpu_err(g, "cannot alloc comptags: %d", err);
-		return err;
-	}
-
-	if (comptags.needs_clear) {
-		nvgpu_assert(g->ops.cbc.ctrl != NULL);
-		if (gk20a_comptags_start_clear(os_buf)) {
-			err = g->ops.cbc.ctrl(g, nvgpu_cbc_op_clear,
-					comptags.offset,
-					(comptags.offset +
-						comptags.lines - 1U));
-			gk20a_comptags_finish_clear(os_buf, err == 0);
-		}
-	}
-
-	return err;
-}
-#endif
-
 /*
  * Validate that the specified remap operation resides within the target
  * virtual memory pool.
@@ -263,10 +227,41 @@ static int nvgpu_vm_remap_validate_map(struct vm_gk20a *vm,
 
 #ifdef CONFIG_NVGPU_COMPRESSION
 	if (op->compr_kind != NVGPU_KIND_INVALID) {
-		if (nvgpu_vm_remap_ensure_comptags(vm, remap_os_buf)) {
-			/* inform caller there are no more compbits */
+
+		struct gk20a *g = gk20a_from_vm(vm);
+		struct gk20a_comptags comptags = { 0 };
+
+		/*
+		 * Note: this is best-effort only
+		 */
+		gk20a_alloc_or_get_comptags(g, &remap_os_buf->os_buf,
+			&g->cbc->comp_tags, &comptags);
+
+		if (!comptags.enabled) {
+			/* inform the caller that the buffer does not
+			 * have compbits */
 			op->compr_kind = NVGPU_KIND_INVALID;
 		}
+
+		if (comptags.needs_clear) {
+			nvgpu_assert(g->ops.cbc.ctrl != NULL);
+			if (gk20a_comptags_start_clear(&remap_os_buf->os_buf)) {
+				int err = g->ops.cbc.ctrl(
+					g, nvgpu_cbc_op_clear,
+					comptags.offset,
+					(comptags.offset +
+					 comptags.lines - 1U));
+				gk20a_comptags_finish_clear(
+					&remap_os_buf->os_buf, err == 0);
+
+				if (err) {
+					nvgpu_err(
+						g, "Comptags clear failed: %d",
+						err);
+					op->compr_kind = NVGPU_KIND_INVALID;
+				}
+			}
+		}
 	}
 #endif
 
diff --git a/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c
index 4694cf907..044e6bd52 100644
--- a/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c
@@ -47,7 +47,6 @@ static int vgpu_init_mm_setup_sw(struct gk20a *g)
 	}
 
 	nvgpu_mutex_init(&mm->tlb_lock);
-	nvgpu_mutex_init(&mm->priv_lock);
 
 	mm->g = g;
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/comptags.h b/drivers/gpu/nvgpu/include/nvgpu/comptags.h
index bb5eb7d10..4593a81cf 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/comptags.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/comptags.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -43,6 +43,17 @@ struct gk20a_comptags {
 	 */
 	bool allocated;
 
+	/*
+	 * "enabled" indicates if the comptags are in use for mapping the buffer
+	 * as compressible. Buffer comptags usage may be changed at runtime by
+	 * buffer metadata re-registration. However, comptags once allocated
+	 * are freed only on freeing the buffer.
+	 *
+	 * "enabled" implies that comptags have been successfully allocated
+	 * (offset > 0 and lines > 0)
+	 */
+	bool enabled;
+
 	/*
 	 * Do comptags need to be cleared before mapping?
 	 */
@@ -77,13 +88,16 @@ void gk20a_comptaglines_free(struct gk20a_comptag_allocator *allocator,
  * Defined by OS specific code since comptags are stored in a highly OS specific
  * way.
  */
-int gk20a_alloc_or_get_comptags(struct gk20a *g,
-				struct nvgpu_os_buffer *buf,
-				struct gk20a_comptag_allocator *allocator,
-				struct gk20a_comptags *comptags);
+int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf,
+			 struct gk20a_comptag_allocator *allocator);
 void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
 			struct gk20a_comptags *comptags);
 
+/* legacy support */
+void gk20a_alloc_or_get_comptags(struct gk20a *g,
+				 struct nvgpu_os_buffer *buf,
+				 struct gk20a_comptag_allocator *allocator,
+				 struct gk20a_comptags *comptags);
 /*
  * These functions must be used to synchronize comptags clear. The usage:
  *
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
index ac7bc3b8d..c4ea23e9f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -215,6 +215,7 @@ struct gk20a;
 	DEFINE_FLAG(NVGPU_CLK_ARB_ENABLED, "CLK_ARB support"),          \
 	DEFINE_FLAG(NVGPU_SUPPORT_VAB_ENABLED, "VAB feature supported"), \
 	DEFINE_FLAG(NVGPU_SUPPORT_ROP_IN_GPC, "ROP is part of GPC"), \
+	DEFINE_FLAG(NVGPU_SUPPORT_BUFFER_METADATA, "Buffer metadata support"), \
 	DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"),
 
 /**
diff --git a/drivers/gpu/nvgpu/include/nvgpu/mm.h b/drivers/gpu/nvgpu/include/nvgpu/mm.h
index 7e8872a63..8fbcea65e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/mm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h
@@ -327,8 +327,6 @@ struct mm_gk20a {
 	struct nvgpu_mutex l2_op_lock;
 	/** Lock to serialize TLB operations. */
 	struct nvgpu_mutex tlb_lock;
-	/** Lock to serialize mm internal operations. */
-	struct nvgpu_mutex priv_lock;
 
 	struct nvgpu_mem bar2_desc;
 
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c
index c1a4961d1..289ca2c79 100644
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -1020,6 +1020,7 @@ __acquires(&l->cde_app->mutex)
 __releases(&l->cde_app->mutex)
 {
 	struct gk20a *g = &l->g;
+	struct gk20a_dmabuf_priv *priv = NULL;
 	struct gk20a_cde_ctx *cde_ctx = NULL;
 	struct nvgpu_cbc *cbc = g->cbc;
 	struct gk20a_comptags comptags;
@@ -1069,10 +1070,13 @@ __releases(&l->cde_app->mutex)
 	/* First, map the buffer to local va */
 
 	/* ensure that the compbits buffer has drvdata */
-	err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf,
+	priv = gk20a_dma_buf_get_drvdata(compbits_scatter_buf,
 			dev_from_gk20a(g));
-	if (err)
+	if (!priv) {
+		err = -EINVAL;
+		nvgpu_err(g, "Compbits buffer has no metadata");
 		goto exit_idle;
+	}
 
 	/* compbits don't start at page aligned offset, so we need to align
 	   the region to be mapped */
@@ -1749,11 +1753,22 @@ int gk20a_prepare_compressible_read(
 	struct gk20a_buffer_state *state;
 	struct dma_buf *dmabuf;
 	u32 missing_bits;
+	struct gk20a_dmabuf_priv *priv = NULL;
 
 	dmabuf = dma_buf_get(buffer_fd);
 	if (IS_ERR(dmabuf))
 		return -EINVAL;
 
+	/* this function is nop for incompressible buffers */
+	priv = gk20a_dma_buf_get_drvdata(dmabuf, dev_from_gk20a(g));
+	if (!priv || !priv->comptags.enabled) {
+		nvgpu_log_info(g, "comptags not enabled for the buffer");
+		*valid_compbits = NVGPU_GPU_COMPBITS_NONE;
+		*zbc_color = 0;
+		dma_buf_put(dmabuf);
+		return 0;
+	}
+
 	err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
 	if (err) {
 		dma_buf_put(dmabuf);
@@ -1811,6 +1826,7 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
 	int err;
 	struct gk20a_buffer_state *state;
 	struct dma_buf *dmabuf;
+	struct gk20a_dmabuf_priv *priv = NULL;
 
 	dmabuf = dma_buf_get(buffer_fd);
 	if (IS_ERR(dmabuf)) {
@@ -1818,6 +1834,14 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
 		return -EINVAL;
 	}
 
+	/* this function is nop for incompressible buffers */
+	priv = gk20a_dma_buf_get_drvdata(dmabuf, dev_from_gk20a(g));
+	if (!priv || !priv->comptags.enabled) {
+		nvgpu_log_info(g, "comptags not allocated for the buffer");
+		dma_buf_put(dmabuf);
+		return 0;
+	}
+
 	err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
 	if (err) {
 		nvgpu_err(g, "could not get state from dmabuf");
diff --git a/drivers/gpu/nvgpu/os/linux/comptags.c b/drivers/gpu/nvgpu/os/linux/comptags.c
index 0b99015ce..6f9c172f3 100644
--- a/drivers/gpu/nvgpu/os/linux/comptags.c
+++ b/drivers/gpu/nvgpu/os/linux/comptags.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -26,8 +26,8 @@
 void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
 			struct gk20a_comptags *comptags)
 {
-	struct gk20a_dmabuf_priv *priv = gk20a_dma_buf_get_drvdata(buf->dmabuf,
-							buf->dev);
+	struct gk20a_dmabuf_priv *priv =
+		gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev);
 
 	if (!comptags)
 		return;
@@ -42,80 +42,85 @@ void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
 	nvgpu_mutex_release(&priv->lock);
 }
 
-int gk20a_alloc_or_get_comptags(struct gk20a *g,
-				struct nvgpu_os_buffer *buf,
-				struct gk20a_comptag_allocator *allocator,
-				struct gk20a_comptags *comptags)
+int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf,
+			 struct gk20a_comptag_allocator *allocator)
 {
-	int ret = 0;
-
 	struct gk20a_dmabuf_priv *priv = NULL;
-	u32 offset;
-	int err;
 	u64 ctag_granularity;
-	u32 lines;
-
-	ret = gk20a_dmabuf_alloc_drvdata(buf->dmabuf, buf->dev);
-	if (ret) {
-		nvgpu_err(g, "error allocating comptags priv data");
-		return ret;
-	}
-
-	priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev);
-	if (!priv)
-		return -ENOSYS;
-
-	nvgpu_mutex_acquire(&priv->lock);
-
-	if (priv->comptags.allocated) {
-		/*
-		 * already allocated
-		 */
-		*comptags = priv->comptags;
-
-		err = 0;
-		goto exit_locked;
-	}
+	u32 offset = 0;
+	u32 lines = 0;
+	int err;
 
 	ctag_granularity = g->ops.fb.compression_page_size(g);
 	lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity);
 
 	/* 0-sized buffer? Shouldn't occur, but let's check anyways. */
 	if (lines < 1) {
-		err = -EINVAL;
-		goto exit_locked;
+		nvgpu_err(g, "zero sized buffer. comptags not allocated.");
+		return -EINVAL;
 	}
 
+	err = gk20a_comptaglines_alloc(allocator, &offset, lines);
+	if (err != 0) {
+		/*
+		 * Note: we must prevent reallocation attempt in case the
+		 * allocation failed. Otherwise a later successful allocation
+		 * could cause corruption because interop endpoints have
+		 * conflicting compression states with the maps
+		 */
+		nvgpu_err(g, "Comptags allocation failed %d", err);
+		lines = 0;
+	}
+
+	/* Note that privdata is not validated here as it is available here. */
+	priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev);
+
 	/* store the allocator so we can use it when we free the ctags */
 	priv->comptag_allocator = allocator;
-	err = gk20a_comptaglines_alloc(allocator, &offset, lines);
-	if (!err) {
-		priv->comptags.offset = offset;
-		priv->comptags.lines = lines;
-		priv->comptags.needs_clear = true;
-	} else {
-		priv->comptags.offset = 0;
-		priv->comptags.lines = 0;
-		priv->comptags.needs_clear = false;
-	}
 
-	/*
-	 * We don't report an error here if comptag alloc failed. The
-	 * caller will simply fallback to incompressible kinds. It
-	 * would not be safe to re-allocate comptags anyways on
-	 * successive calls, as that would break map aliasing.
-	 */
-	err = 0;
+	priv->comptags.offset = offset;
+	priv->comptags.lines = lines;
+	priv->comptags.needs_clear = (lines != 0);
 	priv->comptags.allocated = true;
-
-	*comptags = priv->comptags;
-
-exit_locked:
-	nvgpu_mutex_release(&priv->lock);
+	priv->comptags.enabled = (lines != 0);
 
 	return err;
 }
 
+void gk20a_alloc_or_get_comptags(struct gk20a *g,
+				 struct nvgpu_os_buffer *buf,
+				 struct gk20a_comptag_allocator *allocator,
+				 struct gk20a_comptags *comptags)
+{
+	struct gk20a_dmabuf_priv *priv = NULL;
+	int err;
+
+	if (!comptags)
+		return;
+
+	err = gk20a_dmabuf_alloc_or_get_drvdata(buf->dmabuf, buf->dev, &priv);
+
+	if (err != 0) {
+		(void) memset(comptags, 0, sizeof(*comptags));
+		return;
+	}
+
+	nvgpu_mutex_acquire(&priv->lock);
+
+	/*
+	 * Try to allocate only if metadata is not locked. However, we
+	 * don't re-enable explicitly disabled comptags.
+	 */
+	if (!priv->registered || priv->mutable_metadata) {
+		if (!priv->comptags.allocated) {
+			gk20a_alloc_comptags(g, buf, allocator);
+		}
+	}
+
+	*comptags = priv->comptags;
+	nvgpu_mutex_release(&priv->lock);
+}
+
 bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
 {
 	struct gk20a_dmabuf_priv *priv = gk20a_dma_buf_get_drvdata(buf->dmabuf,
diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf_priv.c b/drivers/gpu/nvgpu/os/linux/dmabuf_priv.c
index 1c42c919e..b3d7febe7 100644
--- a/drivers/gpu/nvgpu/os/linux/dmabuf_priv.c
+++ b/drivers/gpu/nvgpu/os/linux/dmabuf_priv.c
@@ -91,11 +91,10 @@ static void nvgpu_dma_buf_release(struct dma_buf *dmabuf)
 	dmabuf->ops->release(dmabuf);
 }
 
+/* This function must be called with priv->lock held */
 static int gk20a_dma_buf_set_drvdata(struct dma_buf *dmabuf, struct device *device,
 			struct gk20a_dmabuf_priv *priv)
 {
-	nvgpu_mutex_acquire(&priv->lock);
-
 	priv->dmabuf = dmabuf;
 
 	mutex_lock(&dmabuf->lock);
@@ -109,8 +108,6 @@ static int gk20a_dma_buf_set_drvdata(struct dma_buf *dmabuf, struct device *devi
 	dmabuf->ops = &priv->local_ops;
 	mutex_unlock(&dmabuf->lock);
 
-	nvgpu_mutex_release(&priv->lock);
-
 	return 0;
 }
 
@@ -210,6 +207,10 @@ void gk20a_mm_delete_priv(struct gk20a_dmabuf_priv *priv)
 	/* Remove this entry from the global tracking list */
 	nvgpu_list_del(&priv->list);
 
+	if (priv->metadata_blob) {
+		nvgpu_kfree(g, priv->metadata_blob);
+	}
+
 	nvgpu_kfree(g, priv);
 }
 
@@ -225,46 +226,50 @@ void gk20a_dma_buf_priv_list_clear(struct nvgpu_os_linux *l)
 	nvgpu_mutex_release(&l->dmabuf_priv_list_lock);
 }
 
-int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
+int gk20a_dmabuf_alloc_or_get_drvdata(struct dma_buf *dmabuf, struct device *dev,
+				      struct gk20a_dmabuf_priv **priv_ptr)
 {
 	struct gk20a *g = gk20a_get_platform(dev)->g;
-	struct gk20a_dmabuf_priv *priv;
 	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_dmabuf_priv *priv;
+	int err = 0;
 
 	priv = gk20a_dma_buf_get_drvdata(dmabuf, dev);
-
-	if (likely(priv))
-		return 0;
-
-	nvgpu_mutex_acquire(&g->mm.priv_lock);
-	priv = gk20a_dma_buf_get_drvdata(dmabuf, dev);
-	if (priv)
-		goto priv_exist_or_err;
+	if (priv) {
+		nvgpu_log_info(g, "Buffer metadata already allocated");
+		*priv_ptr = priv;
+		goto out;
+	}
 
 	priv = nvgpu_kzalloc(g, sizeof(*priv));
 	if (!priv) {
-		priv = ERR_PTR(-ENOMEM);
-		goto priv_exist_or_err;
+		err = -ENOMEM;
+		nvgpu_err(g, "Buffer metadata allocation failed");
+		goto out;
 	}
 
 	nvgpu_mutex_init(&priv->lock);
-	nvgpu_init_list_node(&priv->states);
+
+	nvgpu_mutex_acquire(&priv->lock);
+
 	priv->g = g;
-	gk20a_dma_buf_set_drvdata(dmabuf, dev, priv);
 
 	nvgpu_init_list_node(&priv->list);
+	nvgpu_init_list_node(&priv->states);
+
+	gk20a_dma_buf_set_drvdata(dmabuf, dev, priv);
+
+	nvgpu_mutex_release(&priv->lock);
 
 	/* Append this priv to the global tracker */
 	nvgpu_mutex_acquire(&l->dmabuf_priv_list_lock);
 	nvgpu_list_add_tail(&l->dmabuf_priv_list, &priv->list);
 	nvgpu_mutex_release(&l->dmabuf_priv_list_lock);
 
-priv_exist_or_err:
-	nvgpu_mutex_release(&g->mm.priv_lock);
-	if (IS_ERR(priv))
-		return -ENOMEM;
+	*priv_ptr = priv;
 
-	return 0;
+out:
+	return err;
 }
 
 int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
@@ -280,10 +285,6 @@ int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
 		return -EINVAL;
 	}
 
-	err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
-	if (err)
-		return err;
-
 	priv = gk20a_dma_buf_get_drvdata(dmabuf, dev);
 	if (!priv) {
 		nvgpu_do_assert();
diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf_priv.h b/drivers/gpu/nvgpu/os/linux/dmabuf_priv.h
index 3df5b5d44..eb9a32c09 100644
--- a/drivers/gpu/nvgpu/os/linux/dmabuf_priv.h
+++ b/drivers/gpu/nvgpu/os/linux/dmabuf_priv.h
@@ -74,9 +74,13 @@ struct gk20a_dmabuf_priv {
 	struct gk20a_comptag_allocator *comptag_allocator;
 	struct gk20a_comptags comptags;
 
-	struct nvgpu_list_node states;
+	u8 *metadata_blob;
+	u32 metadata_blob_size;
 
-	u64 buffer_id;
+	bool registered;
+	bool mutable_metadata;
+
+	struct nvgpu_list_node states;
 
 	/* Used for retrieving the associated dmabuf from the priv */
 	struct dma_buf *dmabuf;
@@ -109,7 +113,8 @@ void nvgpu_mm_unpin(struct device *dev,
 
 void gk20a_mm_delete_priv(struct gk20a_dmabuf_priv *priv);
 
-int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
+int gk20a_dmabuf_alloc_or_get_drvdata(struct dma_buf *dmabuf, struct device *dev,
+				      struct gk20a_dmabuf_priv **priv_ptr);
 
 int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
 			   u64 offset, struct gk20a_buffer_state **state);
diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c
index f55caf833..5861e3969 100644
--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -266,7 +266,6 @@ static void nvgpu_init_mm_vars(struct gk20a *g)
 			    platform->force_128K_pmu_vm);
 
 	nvgpu_mutex_init(&g->mm.tlb_lock);
-	nvgpu_mutex_init(&g->mm.priv_lock);
 }
 
 int nvgpu_probe(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
index 4d9001b8e..e5f0a1aee 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -23,6 +23,7 @@
 #include <nvgpu/pmu/clk/clk.h>
 
 #include <nvgpu/bitops.h>
+#include <nvgpu/comptags.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/nvhost.h>
 #include <nvgpu/bug.h>
@@ -57,6 +58,7 @@
 #include <nvgpu/user_fence.h>
 #include <nvgpu/nvgpu_init.h>
 #include <nvgpu/grmgr.h>
+#include <nvgpu/string.h>
 
 #include "ioctl_ctrl.h"
 #include "ioctl_dbg.h"
@@ -65,6 +67,7 @@
 #include "ioctl_channel.h"
 #include "ioctl.h"
 
+#include "dmabuf_priv.h"
 #include "platform_gk20a.h"
 #include "os_linux.h"
 #include "channel.h"
@@ -300,6 +303,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
 		NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED},
 	{NVGPU_GPU_FLAGS_SUPPORT_VAB,
 		NVGPU_SUPPORT_VAB_ENABLED},
+	{NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA,
+		NVGPU_SUPPORT_BUFFER_METADATA},
 };
 
 static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)
@@ -1911,6 +1916,190 @@ out:
 }
 #endif
 
+#ifdef CONFIG_NVGPU_COMPRESSION
+static int nvgpu_handle_comptags_control(struct gk20a *g,
+					 struct dma_buf *dmabuf,
+					 struct gk20a_dmabuf_priv *priv,
+					 u8 comptags_alloc_control)
+{
+	struct nvgpu_os_buffer os_buf = {0};
+	int err = 0;
+
+	if (comptags_alloc_control == NVGPU_GPU_COMPTAGS_ALLOC_NONE) {
+		if (priv->comptags.allocated) {
+			/*
+			 * Just mark the comptags as disabled. Comptags will be
+			 * freed on freeing the buffer.
+			 */
+			priv->comptags.enabled = false;
+			nvgpu_log_info(g, "Comptags disabled.");
+		}
+
+		return 0;
+	}
+
+	/* Allocate the comptags if requested/required. */
+	if (priv->comptags.allocated) {
+		priv->comptags.enabled = priv->comptags.lines > 0;
+		if (priv->comptags.enabled) {
+			nvgpu_log_info(g, "Comptags enabled.");
+			return 0;
+		} else {
+			if (comptags_alloc_control ==
+					NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
+				nvgpu_err(g,
+					"Previous allocation has failed, could not enable comptags (required)");
+				return -ENOMEM;
+			} else {
+				nvgpu_log_info(g,
+					"Previous allocation has failed, could not enable comptags (requested)");
+				return 0;
+			}
+		}
+	}
+
+	os_buf.dmabuf = dmabuf;
+	os_buf.dev = dev_from_gk20a(g);
+
+	err = gk20a_alloc_comptags(g, &os_buf, &g->cbc->comp_tags);
+	if (err != 0) {
+		if (comptags_alloc_control ==
+				NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
+			nvgpu_err(g, "Comptags allocation (required) failed (%d)",
+				  err);
+		} else {
+			nvgpu_err(g, "Comptags allocation (requested) failed (%d)",
+				  err);
+			err = 0;
+		}
+	}
+
+	return err;
+}
+
+static int nvgpu_gpu_ioctl_register_buffer(struct gk20a *g,
+		struct nvgpu_gpu_register_buffer_args *args)
+{
+	struct gk20a_dmabuf_priv *priv = NULL;
+	bool mutable_metadata = false;
+	bool modify_metadata = false;
+	struct dma_buf *dmabuf;
+	u8 *blob_copy = NULL;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_BUFFER_METADATA)) {
+		nvgpu_err(g, "Buffer metadata not supported");
+		return -EINVAL;
+	}
+
+	if (args->metadata_size > NVGPU_GPU_REGISTER_BUFFER_METADATA_MAX_SIZE) {
+		nvgpu_err(g, "Invalid metadata blob size");
+		return -EINVAL;
+	}
+
+	if (args->comptags_alloc_control > NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
+		nvgpu_err(g, "Invalid comptags_alloc_control");
+		return -EINVAL;
+	}
+
+	nvgpu_log_info(g, "dmabuf_fd: %d, comptags control: %u, metadata size: %u, flags: %u",
+		       args->dmabuf_fd, args->comptags_alloc_control,
+		       args->metadata_size, args->flags);
+
+	mutable_metadata = (args->flags & NVGPU_GPU_REGISTER_BUFFER_FLAGS_MUTABLE) != 0;
+	modify_metadata = (args->flags & NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY) != 0;
+
+	dmabuf = dma_buf_get(args->dmabuf_fd);
+	if (IS_ERR(dmabuf)) {
+		nvgpu_warn(g, "%s: fd %d is not a dmabuf",
+			   __func__, args->dmabuf_fd);
+		return PTR_ERR(dmabuf);
+	}
+
+	/*
+	 * Allocate or get the buffer metadata state.
+	 */
+	err = gk20a_dmabuf_alloc_or_get_drvdata(
+		dmabuf, dev_from_gk20a(g), &priv);
+	if (err != 0) {
+		nvgpu_err(g, "Error allocating buffer metadata %d", err);
+		goto out;
+	}
+
+	nvgpu_mutex_acquire(&priv->lock);
+
+	/* Check for valid buffer metadata re-registration */
+	if (priv->registered) {
+		if (!modify_metadata) {
+			nvgpu_err(g, "attempt to modify buffer metadata without NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY");
+			err = -EINVAL;
+			goto out_priv_unlock;
+		} else if (!priv->mutable_metadata) {
+			nvgpu_err(g, "attempt to redefine immutable metadata");
+			err = -EINVAL;
+			goto out_priv_unlock;
+		}
+	}
+
+	/* Allocate memory for the metadata blob */
+	blob_copy = nvgpu_kzalloc(g, args->metadata_size);
+	if (!blob_copy) {
+		nvgpu_err(g, "Error allocating memory for blob");
+		err = -ENOMEM;
+		goto out_priv_unlock;
+	}
+
+	/* Copy the metadata blob */
+	if (copy_from_user(blob_copy,
+			   (void __user *) args->metadata_addr,
+			   args->metadata_size)) {
+		err = -EFAULT;
+		nvgpu_err(g, "Error copying buffer metadata blob");
+		goto out_priv_unlock;
+	}
+
+	/* Comptags allocation */
+	err = nvgpu_handle_comptags_control(g, dmabuf, priv,
+					    args->comptags_alloc_control);
+	if (err != 0) {
+		nvgpu_err(g, "Comptags alloc control failed %d", err);
+		goto out_priv_unlock;
+	}
+
+	/* All done, update metadata blob */
+	nvgpu_kfree(g, priv->metadata_blob);
+
+	priv->metadata_blob = blob_copy;
+	priv->metadata_blob_size = args->metadata_size;
+	blob_copy = NULL;
+
+	/* Mark registered and update mutability */
+	priv->registered = true;
+	priv->mutable_metadata = mutable_metadata;
+
+	/* Output variables */
+	args->flags = 0;
+	if (priv->comptags.enabled) {
+		args->flags |=
+			NVGPU_GPU_REGISTER_BUFFER_FLAGS_COMPTAGS_ALLOCATED;
+	}
+
+	nvgpu_log_info(g, "buffer registered: mutable: %s, metadata size: %u, flags: 0x%8x",
+		       priv->mutable_metadata ? "yes" : "no", priv->metadata_blob_size,
+		       args->flags);
+
+out_priv_unlock:
+	nvgpu_mutex_release(&priv->lock);
+out:
+	dma_buf_put(dmabuf);
+	nvgpu_kfree(g, blob_copy);
+
+	return err;
+}
+#endif
+
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct gk20a_ctrl_priv *priv = filp->private_data;
@@ -2268,6 +2457,13 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 			(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
 		break;
 
+#ifdef CONFIG_NVGPU_COMPRESSION
+	case NVGPU_GPU_IOCTL_REGISTER_BUFFER:
+		err = nvgpu_gpu_ioctl_register_buffer(g,
+			(struct nvgpu_gpu_register_buffer_args *)buf);
+		break;
+#endif
+
 	default:
 		nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c
index 9372bcb79..7b5d7d56f 100644
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -326,6 +326,9 @@ void gk20a_init_linux_characteristics(struct gk20a *g)
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_REMAP, true);
+#ifdef CONFIG_NVGPU_COMPRESSION
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_BUFFER_METADATA, true);
+#endif
 
 	if (!IS_ENABLED(CONFIG_NVGPU_SYNCFD_NONE)) {
 		nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true);
diff --git a/drivers/gpu/nvgpu/os/linux/vm_remap.c b/drivers/gpu/nvgpu/os/linux/vm_remap.c
index dded7e673..5cabc22e5 100644
--- a/drivers/gpu/nvgpu/os/linux/vm_remap.c
+++ b/drivers/gpu/nvgpu/os/linux/vm_remap.c
@@ -68,12 +68,6 @@ int nvgpu_vm_remap_os_buf_get(struct vm_gk20a *vm,
 		goto clean_up;
 	}
 
-	err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
-	if (err) {
-		nvgpu_warn(g, "failed to alloc drvdata");
-		goto clean_up;
-	}
-
 	if ((op->flags & NVGPU_VM_REMAP_OP_FLAGS_ACCESS_NO_WRITE) != 0) {
 		dmabuf_direction = DMA_TO_DEVICE;
 	} else {
diff --git a/drivers/gpu/nvgpu/os/posix/posix-comptags.c b/drivers/gpu/nvgpu/os/posix/posix-comptags.c
index a00246ddc..710e5cf1e 100644
--- a/drivers/gpu/nvgpu/os/posix/posix-comptags.c
+++ b/drivers/gpu/nvgpu/os/posix/posix-comptags.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -30,14 +30,19 @@ void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
 {
 }
 
-int gk20a_alloc_or_get_comptags(struct gk20a *g,
-				struct nvgpu_os_buffer *buf,
-				struct gk20a_comptag_allocator *allocator,
-				struct gk20a_comptags *comptags)
+int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf,
+			 struct gk20a_comptag_allocator *allocator)
 {
 	return -ENODEV;
 }
 
+void gk20a_alloc_or_get_comptags(struct gk20a *g,
+				 struct nvgpu_os_buffer *buf,
+				 struct gk20a_comptag_allocator *allocator,
+				 struct gk20a_comptags *comptags)
+{
+}
+
 bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
 {
 	return false;
diff --git a/include/uapi/linux/nvgpu-ctrl.h b/include/uapi/linux/nvgpu-ctrl.h
index 9d43b6908..912ada3ab 100644
--- a/include/uapi/linux/nvgpu-ctrl.h
+++ b/include/uapi/linux/nvgpu-ctrl.h
@@ -186,7 +186,11 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_SMPC_GLOBAL_MODE	(1ULL << 48)
 /* Retrieving contents of graphics context is supported */
 #define NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT	    (1ULL << 49)
-/* Additional buffer metadata association supported */
+/*
+ * Note: Additional buffer metadata association support. This feature is only
+ * for supporting legacy userspace APIs and for compatibility with desktop
+ * RM behavior. Usage of this feature should be avoided.
+ */
 #define NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA		(1ULL << 50)
 /* Flag to indicate whether configuring L2_MAXEVICTLAST_WAYS is supported */
 #define NVGPU_GPU_FLAGS_L2_MAX_WAYS_EVICT_LAST_ENABLED	(1ULL << 51)
@@ -940,6 +944,25 @@ struct nvgpu_gpu_set_deterministic_opts_args {
 	__u64 channels; /* in */
 };
 
+/*
+ * register buffer information ioctl.
+ *
+ * Note: Additional metadata is associated with the buffer only for supporting
+ * legacy userspace APIs and for compatibility with desktop RM. Usage of this
+ * API should be avoided.
+ *
+ * This ioctl allocates comptags for the buffer if requested/required
+ * by libnvrm_gpu and associates metadata blob sent by libnvrm_gpu
+ * with the buffer in the buffer privdata.
+ *
+ * return 0 on success, < 0 in case of failure.
+ * retval -EINVAL if the enabled flag NVGPU_SUPPORT_BUFFER_METADATA
+ *               isn't set or invalid params.
+ * retval -ENOMEM in case of sufficient memory is not available for
+ *                privdata or comptags.
+ * retval -EFAULT if the metadata blob copy fails.
+ */
+
 /*
  * NVGPU_GPU_COMPTAGS_ALLOC_NONE: Specified to not allocate comptags
  * for the buffer.
@@ -967,20 +990,46 @@ struct nvgpu_gpu_set_deterministic_opts_args {
  */
 #define NVGPU_GPU_REGISTER_BUFFER_FLAGS_COMPTAGS_ALLOCATED	(1U << 0)
 
+ /*
+  * Specify buffer registration as mutable. This allows modifying the buffer
+  * attributes by calling this IOCTL again with NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY.
+  *
+  * Mutable registration is intended for private buffers where the physical
+  * memory allocation may be recycled. Buffers intended for interoperability
+  * should be specified without this flag.
+  */
+#define NVGPU_GPU_REGISTER_BUFFER_FLAGS_MUTABLE			(1U << 1)
+
+ /*
+  * Re-register the buffer. When this flag is set, the buffer comptags state,
+  * metadata binary blob, and other attributes are re-defined.
+  *
+  * This flag may be set only when the buffer was previously registered as
+  * mutable. This flag is ignored when the buffer is registered for the
+  * first time.
+  *
+  * If the buffer previously had comptags and the re-registration also specifies
+  * comptags, the associated comptags are not cleared.
+  *
+  */
+#define NVGPU_GPU_REGISTER_BUFFER_FLAGS_MODIFY			(1U << 2)
+
 /* Maximum size of the user supplied buffer metadata */
 #define NVGPU_GPU_REGISTER_BUFFER_METADATA_MAX_SIZE	256U
 
 /*
- * REGISTER_BUFFER ioctl is supported when the enabled flag
- * NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA is set. It will
- * return -EINVAL if that enabled flag isn't enabled.
+ * register buffer ioctl arguments struct.
+ *
+ * Note: Additional metadata is associated with the buffer only for supporting
+ * legacy userspace APIs and for compatibility with desktop RM. Usage of this
+ * API should be avoided.
  */
 struct nvgpu_gpu_register_buffer_args {
 	/* [in] dmabuf fd */
 	__s32 dmabuf_fd;
 
 	/*
-	 * [in]  Compression tags allocation control.
+	 * [in] Compression tags allocation control.
 	 *
 	 * Set to one of the NVGPU_GPU_COMPTAGS_ALLOC_* values. See the
 	 * description of the values for semantics of this field.
@@ -990,7 +1039,7 @@ struct nvgpu_gpu_register_buffer_args {
 	__u16 reserved1;
 
 	/*
-	 * [in]  Pointer to buffer metadata.
+	 * [in] Pointer to buffer metadata.
 	 *
 	 * This is a binary blob populated by nvrm_gpu that will be associated
 	 * with the dmabuf.
@@ -1000,9 +1049,8 @@ struct nvgpu_gpu_register_buffer_args {
 	/* [in] buffer metadata size */
 	__u32 metadata_size;
 
-
 	/*
-	 * [out] flags.
+	 * [in/out] flags.
 	 *
 	 * See description of NVGPU_GPU_REGISTER_BUFFER_FLAGS_* for semantics
 	 * of this field.