diff --git a/drivers/gpu/nvgpu/common/profiler/profiler.c b/drivers/gpu/nvgpu/common/profiler/profiler.c
index 1a0352f23..92332e04f 100644
--- a/drivers/gpu/nvgpu/common/profiler/profiler.c
+++ b/drivers/gpu/nvgpu/common/profiler/profiler.c
@@ -30,6 +30,7 @@
 #include <nvgpu/tsg.h>
 #include <nvgpu/nvgpu_init.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/perfbuf.h>
 
 static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
 static int generate_unique_id(void)
@@ -74,6 +75,7 @@ void nvgpu_profiler_free(struct nvgpu_profiler_object *prof)
 		prof->prof_handle);
 
 	nvgpu_profiler_unbind_context(prof);
+	nvgpu_profiler_free_pma_stream(prof);
 
 	nvgpu_list_del(&prof->prof_obj_entry);
 	nvgpu_kfree(g, prof);
@@ -329,6 +331,7 @@ static int nvgpu_profiler_unbind_hwpm(struct nvgpu_profiler_object *prof)
 
 static int nvgpu_profiler_bind_hwpm_streamout(struct nvgpu_profiler_object *prof)
 {
+	struct gk20a *g = prof->g;
 	int err;
 
 	err = nvgpu_profiler_bind_hwpm(prof, true);
@@ -336,13 +339,25 @@ static int nvgpu_profiler_bind_hwpm_streamout(struct nvgpu_profiler_object *prof
 		return err;
 	}
 
+	err = g->ops.perfbuf.perfbuf_enable(g, prof->pma_buffer_va, prof->pma_buffer_size);
+	if (err) {
+		nvgpu_profiler_unbind_hwpm(prof);
+		return err;
+	}
+
 	return 0;
 }
 
 static int nvgpu_profiler_unbind_hwpm_streamout(struct nvgpu_profiler_object *prof)
 {
+	struct gk20a *g = prof->g;
 	int err;
 
+	err = g->ops.perfbuf.perfbuf_disable(g);
+	if (err) {
+		return err;
+	}
+
 	err = nvgpu_profiler_unbind_hwpm(prof);
 	if (err) {
 		return err;
@@ -490,3 +505,48 @@ fail:
 	gk20a_idle(g);
 	return err;
 }
+
+int nvgpu_profiler_alloc_pma_stream(struct nvgpu_profiler_object *prof)
+{
+	struct gk20a *g = prof->g;
+	int err;
+
+	err = nvgpu_profiler_pm_resource_reserve(prof,
+			NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
+	if (err) {
+		nvgpu_err(g, "failed to reserve PMA stream");
+		return err;
+	}
+
+	err = nvgpu_perfbuf_init_vm(g);
+	if (err) {
+		nvgpu_err(g, "failed to initialize perfbuf VM");
+		nvgpu_profiler_pm_resource_release(prof,
+				NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
+		return err;
+	}
+
+	return 0;
+}
+
+void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof)
+{
+	struct gk20a *g = prof->g;
+	struct mm_gk20a *mm = &g->mm;
+
+	if (prof->pma_buffer_va == 0U) {
+		return;
+	}
+
+	nvgpu_vm_unmap(mm->perfbuf.vm, prof->pma_bytes_available_buffer_va, NULL);
+	prof->pma_bytes_available_buffer_va = 0U;
+
+	nvgpu_vm_unmap(mm->perfbuf.vm, prof->pma_buffer_va, NULL);
+	prof->pma_buffer_va = 0U;
+	prof->pma_buffer_size = 0U;
+
+	nvgpu_perfbuf_deinit_vm(g);
+
+	nvgpu_profiler_pm_resource_release(prof,
+			NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
+}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/profiler.h b/drivers/gpu/nvgpu/include/nvgpu/profiler.h
index 8de31dd47..5588ee375 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/profiler.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/profiler.h
@@ -82,6 +82,30 @@ struct nvgpu_profiler_object {
 	 * until this flag is set.
 	 */
 	bool bound;
+
+	/*
+	 * GPU VA of the PMA stream buffer (if PMA stream resource is reserved
+	 * successfully) associated with this profiler object.
+	 */
+	u64 pma_buffer_va;
+
+	/*
+	 * Size of the PMA stream buffer (if PMA stream resource is reserved
+	 * successfully) associated with this profiler object.
+	 */
+	u32 pma_buffer_size;
+
+	/*
+	 * GPU VA of the buffer that would store available bytes in PMA buffer
+	 * (if PMA stream resource is reserved successfully).
+	 */
+	u64 pma_bytes_available_buffer_va;
+
+	/*
+	 * CPU VA of the buffer that would store available bytes in PMA buffer
+	 * (if PMA stream resource is reserved successfully).
+	 */
+	void *pma_bytes_available_buffer_cpuva;
 };
 
 static inline struct nvgpu_profiler_object *
@@ -108,5 +132,8 @@ int nvgpu_profiler_pm_resource_release(struct nvgpu_profiler_object *prof,
 int nvgpu_profiler_bind_pm_resources(struct nvgpu_profiler_object *prof);
 int nvgpu_profiler_unbind_pm_resources(struct nvgpu_profiler_object *prof);
 
+int nvgpu_profiler_alloc_pma_stream(struct nvgpu_profiler_object *prof);
+void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof);
+
 #endif /* CONFIG_NVGPU_PROFILER */
 #endif /* NVGPU_PROFILER_H */
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
index b9dd69f63..6810d28d7 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
@@ -18,6 +18,7 @@
 #include <linux/file.h>
 #include <linux/cdev.h>
 #include <linux/uaccess.h>
+#include <linux/dma-buf.h>
 #include <uapi/linux/nvgpu.h>
 
 #include <nvgpu/kmem.h>
@@ -26,6 +27,7 @@
 #include <nvgpu/nvgpu_init.h>
 #include <nvgpu/profiler.h>
 #include <nvgpu/regops.h>
+#include <nvgpu/perfbuf.h>
 #include <nvgpu/pm_reservation.h>
 #include <nvgpu/tsg.h>
 
@@ -61,8 +63,16 @@ struct nvgpu_profiler_object_priv {
 	 * execution.
 	 */
 	struct nvgpu_dbg_reg_op *regops_staging_buf;
+
+	/*
+	 * dmabuf handle of the buffer that would store available bytes in PMA buffer
+	 * (if PMA stream resource is reserved successfully).
+	 */
+	struct dma_buf *pma_bytes_available_buffer_dmabuf;
 };
 
+static void nvgpu_prof_free_pma_stream_priv_data(struct nvgpu_profiler_object_priv *priv);
+
 static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp,
 		enum nvgpu_profiler_pm_reservation_scope scope)
 {
@@ -181,6 +191,8 @@ int nvgpu_prof_fops_release(struct inode *inode, struct file *filp)
 
 	nvgpu_profiler_free(prof);
 
+	nvgpu_prof_free_pma_stream_priv_data(prof_priv);
+
 	nvgpu_kfree(g, prof_priv->regops_umd_copy_buf);
 	nvgpu_kfree(g, prof_priv->regops_staging_buf);
 
@@ -325,6 +337,137 @@ static int nvgpu_prof_ioctl_release_pm_resource(struct nvgpu_profiler_object *pr
 	return 0;
 }
 
+static int nvgpu_prof_ioctl_alloc_pma_stream(struct nvgpu_profiler_object_priv *priv,
+		struct nvgpu_profiler_alloc_pma_stream_args *args)
+{
+	struct nvgpu_profiler_object *prof = priv->prof;
+	struct gk20a *g = prof->g;
+	struct mm_gk20a *mm = &g->mm;
+	u64 pma_bytes_available_buffer_offset = 0ULL;
+	struct dma_buf *dmabuf;
+	void *cpuva;
+	u32 pma_buffer_size;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_prof, "Request to setup PMA stream for handle %u",
+		prof->prof_handle);
+
+	if (prof->pma_buffer_va != 0U) {
+		nvgpu_err(g, "PMA stream already initialized");
+		return -EINVAL;
+	}
+
+	err = nvgpu_profiler_alloc_pma_stream(prof);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init PMA stream");
+		return err;
+	}
+
+	/*
+	 * Size register is 32-bit in HW, ensure requested size does
+	 * not violate that.
+	 */
+	pma_buffer_size = nvgpu_safe_cast_u64_to_u32(args->pma_buffer_map_size);
+
+	err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->pma_buffer_fd,
+			&args->pma_buffer_offset, 0, SZ_4K, 0, 0,
+			0, 0, NULL);
+	if (err != 0) {
+		nvgpu_err(g, "failed to map PMA buffer");
+		goto err_put_vm;
+	}
+
+	err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->pma_bytes_available_buffer_fd,
+			&pma_bytes_available_buffer_offset, 0, SZ_4K, 0, 0,
+			0, 0, NULL);
+	if (err != 0) {
+		nvgpu_err(g, "failed to map available bytes buffer");
+		goto err_unmap_pma;
+	}
+
+	dmabuf = dma_buf_get(args->pma_bytes_available_buffer_fd);
+	if (IS_ERR(dmabuf)) {
+		err = -EINVAL;
+		nvgpu_err(g, "failed to get available bytes buffer FD");
+		goto err_unmap_bytes_available;
+	}
+
+	cpuva = dma_buf_vmap(dmabuf);
+	if (cpuva == NULL) {
+		err = -ENOMEM;
+		nvgpu_err(g, "failed to vmap available bytes buffer FD");
+		goto err_dma_buf_put;
+	}
+
+	prof->pma_buffer_va = args->pma_buffer_offset;
+	prof->pma_buffer_size = pma_buffer_size;
+	prof->pma_bytes_available_buffer_va = pma_bytes_available_buffer_offset;
+	prof->pma_bytes_available_buffer_cpuva = cpuva;
+	priv->pma_bytes_available_buffer_dmabuf = dmabuf;
+
+	nvgpu_log(g, gpu_dbg_prof, "PMA stream initialized for profiler handle %u, 0x%llx 0x%x 0x%llx",
+		prof->prof_handle, prof->pma_buffer_va, prof->pma_buffer_size,
+		prof->pma_bytes_available_buffer_va);
+
+	args->pma_buffer_va = args->pma_buffer_offset;
+
+	return 0;
+
+err_dma_buf_put:
+	dma_buf_put(dmabuf);
+err_unmap_bytes_available:
+	nvgpu_vm_unmap(mm->perfbuf.vm, pma_bytes_available_buffer_offset, NULL);
+err_unmap_pma:
+	nvgpu_vm_unmap(mm->perfbuf.vm, args->pma_buffer_offset, NULL);
+err_put_vm:
+	nvgpu_perfbuf_deinit_vm(g);
+	nvgpu_profiler_pm_resource_release(prof,
+			NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
+	return err;
+}
+
+static void nvgpu_prof_free_pma_stream_priv_data(struct nvgpu_profiler_object_priv *priv)
+{
+	struct nvgpu_profiler_object *prof = priv->prof;
+
+	if (priv->pma_bytes_available_buffer_dmabuf == NULL) {
+		return;
+	}
+
+	dma_buf_vunmap(priv->pma_bytes_available_buffer_dmabuf,
+		prof->pma_bytes_available_buffer_cpuva);
+	dma_buf_put(priv->pma_bytes_available_buffer_dmabuf);
+	priv->pma_bytes_available_buffer_dmabuf = NULL;
+	prof->pma_bytes_available_buffer_cpuva = NULL;
+}
+
+static int nvgpu_prof_ioctl_free_pma_stream(struct nvgpu_profiler_object_priv *priv)
+{
+	struct nvgpu_profiler_object *prof = priv->prof;
+	struct gk20a *g = prof->g;
+
+	nvgpu_log(g, gpu_dbg_prof, "Request to free PMA stream for handle %u",
+		prof->prof_handle);
+
+	if (prof->pma_buffer_va == 0U) {
+		nvgpu_err(g, "PMA stream not initialized");
+		return -EINVAL;
+	}
+
+	if (prof->bound) {
+		nvgpu_err(g, "PM resources are bound, cannot free PMA");
+		return -EINVAL;
+	}
+
+	nvgpu_profiler_free_pma_stream(prof);
+	nvgpu_prof_free_pma_stream_priv_data(priv);
+
+	nvgpu_log(g, gpu_dbg_prof, "Request to free PMA stream for handle %u completed",
+		prof->prof_handle);
+
+	return 0;
+}
+
 static int nvgpu_prof_ioctl_bind_pm_resources(struct nvgpu_profiler_object *prof)
 {
 	return nvgpu_profiler_bind_pm_resources(prof);
@@ -563,6 +706,15 @@ long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd,
 		err = nvgpu_prof_ioctl_unbind_pm_resources(prof);
 		break;
 
+	case NVGPU_PROFILER_IOCTL_ALLOC_PMA_STREAM:
+		err = nvgpu_prof_ioctl_alloc_pma_stream(prof_priv,
+			(struct nvgpu_profiler_alloc_pma_stream_args *)buf);
+		break;
+
+	case NVGPU_PROFILER_IOCTL_FREE_PMA_STREAM:
+		err = nvgpu_prof_ioctl_free_pma_stream(prof_priv);
+		break;
+
 	case NVGPU_PROFILER_IOCTL_EXEC_REG_OPS:
 		err = nvgpu_prof_ioctl_exec_reg_ops(prof_priv,
 			(struct nvgpu_profiler_exec_reg_ops_args *)buf);