gpu: nvgpu: add profiler apis to alloc/free pma stream

Add two new IOCTL APIs to allocate/free pma stream. Add two new functions to handle this : nvgpu_prof_ioctl_alloc_pma_stream() nvgpu_prof_ioctl_free_pma_stream() Allocation of pma stream includes below steps : - Initializing perfbuf VM - Mapping PMA buffer into perfbuf VM - Mapping PMA byte buffer into perfbuf VM - Mapping PMA byte buffer to CPU virtual address space Store all of above data in struct nvgpu_profiler_object for reference. OS specific data is stored in struct nvgpu_profiler_object_priv Update HWPM streamout bind/unbind sequence to enable/disable perfbuf respectively. Also take care of releasing the pma stream resources in profiler object close path if they are not explicitly released by user space by IOCTL call. Bug 2510974 Jira NVGPU-5360 Change-Id: I126633746cabc4e293c7ad7c49806866a897949d Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2406483 Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Antony Clince Alex <aalex@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2020-05-18 16:28:14 +05:30
parent 49c9f0c137
commit 5844151a93
3 changed files with 239 additions and 0 deletions
--- a/drivers/gpu/nvgpu/common/profiler/profiler.c
+++ b/drivers/gpu/nvgpu/common/profiler/profiler.c
@@ -30,6 +30,7 @@
 #include <nvgpu/tsg.h>
 #include <nvgpu/nvgpu_init.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/perfbuf.h>

 static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
 static int generate_unique_id(void)
@@ -74,6 +75,7 @@ void nvgpu_profiler_free(struct nvgpu_profiler_object *prof)
 		prof->prof_handle);

 	nvgpu_profiler_unbind_context(prof);
+	nvgpu_profiler_free_pma_stream(prof);

 	nvgpu_list_del(&prof->prof_obj_entry);
 	nvgpu_kfree(g, prof);
@@ -329,6 +331,7 @@ static int nvgpu_profiler_unbind_hwpm(struct nvgpu_profiler_object *prof)

 static int nvgpu_profiler_bind_hwpm_streamout(struct nvgpu_profiler_object *prof)
 {
+	struct gk20a *g = prof->g;
 	int err;

 	err = nvgpu_profiler_bind_hwpm(prof, true);
@@ -336,13 +339,25 @@ static int nvgpu_profiler_bind_hwpm_streamout(struct nvgpu_profiler_object *prof
 		return err;
 	}

+	err = g->ops.perfbuf.perfbuf_enable(g, prof->pma_buffer_va, prof->pma_buffer_size);
+	if (err) {
+		nvgpu_profiler_unbind_hwpm(prof);
+		return err;
+	}
+
 	return 0;
 }

 static int nvgpu_profiler_unbind_hwpm_streamout(struct nvgpu_profiler_object *prof)
 {
+	struct gk20a *g = prof->g;
 	int err;

+	err = g->ops.perfbuf.perfbuf_disable(g);
+	if (err) {
+		return err;
+	}
+
 	err = nvgpu_profiler_unbind_hwpm(prof);
 	if (err) {
 		return err;
@@ -490,3 +505,48 @@ fail:
 	gk20a_idle(g);
 	return err;
 }
+
+int nvgpu_profiler_alloc_pma_stream(struct nvgpu_profiler_object *prof)
+{
+	struct gk20a *g = prof->g;
+	int err;
+
+	err = nvgpu_profiler_pm_resource_reserve(prof,
+			NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
+	if (err) {
+		nvgpu_err(g, "failed to reserve PMA stream");
+		return err;
+	}
+
+	err = nvgpu_perfbuf_init_vm(g);
+	if (err) {
+		nvgpu_err(g, "failed to initialize perfbuf VM");
+		nvgpu_profiler_pm_resource_release(prof,
+				NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
+		return err;
+	}
+
+	return 0;
+}
+
+void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof)
+{
+	struct gk20a *g = prof->g;
+	struct mm_gk20a *mm = &g->mm;
+
+	if (prof->pma_buffer_va == 0U) {
+		return;
+	}
+
+	nvgpu_vm_unmap(mm->perfbuf.vm, prof->pma_bytes_available_buffer_va, NULL);
+	prof->pma_bytes_available_buffer_va = 0U;
+
+	nvgpu_vm_unmap(mm->perfbuf.vm, prof->pma_buffer_va, NULL);
+	prof->pma_buffer_va = 0U;
+	prof->pma_buffer_size = 0U;
+
+	nvgpu_perfbuf_deinit_vm(g);
+
+	nvgpu_profiler_pm_resource_release(prof,
+			NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
+}
--- a/drivers/gpu/nvgpu/include/nvgpu/profiler.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/profiler.h
@@ -82,6 +82,30 @@ struct nvgpu_profiler_object {
 	 * until this flag is set.
 	 */
 	bool bound;
+
+	/*
+	 * GPU VA of the PMA stream buffer (if PMA stream resource is reserved
+	 * successfully) associated with this profiler object.
+	 */
+	u64 pma_buffer_va;
+
+	/*
+	 * Size of the PMA stream buffer (if PMA stream resource is reserved
+	 * successfully) associated with this profiler object.
+	 */
+	u32 pma_buffer_size;
+
+	/*
+	 * GPU VA of the buffer that would store available bytes in PMA buffer
+	 * (if PMA stream resource is reserved successfully).
+	 */
+	u64 pma_bytes_available_buffer_va;
+
+	/*
+	 * CPU VA of the buffer that would store available bytes in PMA buffer
+	 * (if PMA stream resource is reserved successfully).
+	 */
+	void *pma_bytes_available_buffer_cpuva;
 };

 static inline struct nvgpu_profiler_object *
@@ -108,5 +132,8 @@ int nvgpu_profiler_pm_resource_release(struct nvgpu_profiler_object *prof,
 int nvgpu_profiler_bind_pm_resources(struct nvgpu_profiler_object *prof);
 int nvgpu_profiler_unbind_pm_resources(struct nvgpu_profiler_object *prof);

+int nvgpu_profiler_alloc_pma_stream(struct nvgpu_profiler_object *prof);
+void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof);
+
 #endif /* CONFIG_NVGPU_PROFILER */
 #endif /* NVGPU_PROFILER_H */
--- a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
@@ -18,6 +18,7 @@
 #include <linux/file.h>
 #include <linux/cdev.h>
 #include <linux/uaccess.h>
+#include <linux/dma-buf.h>
 #include <uapi/linux/nvgpu.h>

 #include <nvgpu/kmem.h>
@@ -26,6 +27,7 @@
 #include <nvgpu/nvgpu_init.h>
 #include <nvgpu/profiler.h>
 #include <nvgpu/regops.h>
+#include <nvgpu/perfbuf.h>
 #include <nvgpu/pm_reservation.h>
 #include <nvgpu/tsg.h>

@@ -61,8 +63,16 @@ struct nvgpu_profiler_object_priv {
 	 * execution.
 	 */
 	struct nvgpu_dbg_reg_op *regops_staging_buf;
+
+	/*
+	 * dmabuf handle of the buffer that would store available bytes in PMA buffer
+	 * (if PMA stream resource is reserved successfully).
+	 */
+	struct dma_buf *pma_bytes_available_buffer_dmabuf;
 };

+static void nvgpu_prof_free_pma_stream_priv_data(struct nvgpu_profiler_object_priv *priv);
+
 static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp,
 		enum nvgpu_profiler_pm_reservation_scope scope)
 {
@@ -181,6 +191,8 @@ int nvgpu_prof_fops_release(struct inode *inode, struct file *filp)

 	nvgpu_profiler_free(prof);

+	nvgpu_prof_free_pma_stream_priv_data(prof_priv);
+
 	nvgpu_kfree(g, prof_priv->regops_umd_copy_buf);
 	nvgpu_kfree(g, prof_priv->regops_staging_buf);

@@ -325,6 +337,137 @@ static int nvgpu_prof_ioctl_release_pm_resource(struct nvgpu_profiler_object *pr
 	return 0;
 }

+static int nvgpu_prof_ioctl_alloc_pma_stream(struct nvgpu_profiler_object_priv *priv,
+		struct nvgpu_profiler_alloc_pma_stream_args *args)
+{
+	struct nvgpu_profiler_object *prof = priv->prof;
+	struct gk20a *g = prof->g;
+	struct mm_gk20a *mm = &g->mm;
+	u64 pma_bytes_available_buffer_offset = 0ULL;
+	struct dma_buf *dmabuf;
+	void *cpuva;
+	u32 pma_buffer_size;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_prof, "Request to setup PMA stream for handle %u",
+		prof->prof_handle);
+
+	if (prof->pma_buffer_va != 0U) {
+		nvgpu_err(g, "PMA stream already initialized");
+		return -EINVAL;
+	}
+
+	err = nvgpu_profiler_alloc_pma_stream(prof);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init PMA stream");
+		return err;
+	}
+
+	/*
+	 * Size register is 32-bit in HW, ensure requested size does
+	 * not violate that.
+	 */
+	pma_buffer_size = nvgpu_safe_cast_u64_to_u32(args->pma_buffer_map_size);
+
+	err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->pma_buffer_fd,
+			&args->pma_buffer_offset, 0, SZ_4K, 0, 0,
+			0, 0, NULL);
+	if (err != 0) {
+		nvgpu_err(g, "failed to map PMA buffer");
+		goto err_put_vm;
+	}
+
+	err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->pma_bytes_available_buffer_fd,
+			&pma_bytes_available_buffer_offset, 0, SZ_4K, 0, 0,
+			0, 0, NULL);
+	if (err != 0) {
+		nvgpu_err(g, "failed to map available bytes buffer");
+		goto err_unmap_pma;
+	}
+
+	dmabuf = dma_buf_get(args->pma_bytes_available_buffer_fd);
+	if (IS_ERR(dmabuf)) {
+		err = -EINVAL;
+		nvgpu_err(g, "failed to get available bytes buffer FD");
+		goto err_unmap_bytes_available;
+	}
+
+	cpuva = dma_buf_vmap(dmabuf);
+	if (cpuva == NULL) {
+		err = -ENOMEM;
+		nvgpu_err(g, "failed to vmap available bytes buffer FD");
+		goto err_dma_buf_put;
+	}
+
+	prof->pma_buffer_va = args->pma_buffer_offset;
+	prof->pma_buffer_size = pma_buffer_size;
+	prof->pma_bytes_available_buffer_va = pma_bytes_available_buffer_offset;
+	prof->pma_bytes_available_buffer_cpuva = cpuva;
+	priv->pma_bytes_available_buffer_dmabuf = dmabuf;
+
+	nvgpu_log(g, gpu_dbg_prof, "PMA stream initialized for profiler handle %u, 0x%llx 0x%x 0x%llx",
+		prof->prof_handle, prof->pma_buffer_va, prof->pma_buffer_size,
+		prof->pma_bytes_available_buffer_va);
+
+	args->pma_buffer_va = args->pma_buffer_offset;
+
+	return 0;
+
+err_dma_buf_put:
+	dma_buf_put(dmabuf);
+err_unmap_bytes_available:
+	nvgpu_vm_unmap(mm->perfbuf.vm, pma_bytes_available_buffer_offset, NULL);
+err_unmap_pma:
+	nvgpu_vm_unmap(mm->perfbuf.vm, args->pma_buffer_offset, NULL);
+err_put_vm:
+	nvgpu_perfbuf_deinit_vm(g);
+	nvgpu_profiler_pm_resource_release(prof,
+			NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
+	return err;
+}
+
+static void nvgpu_prof_free_pma_stream_priv_data(struct nvgpu_profiler_object_priv *priv)
+{
+	struct nvgpu_profiler_object *prof = priv->prof;
+
+	if (priv->pma_bytes_available_buffer_dmabuf == NULL) {
+		return;
+	}
+
+	dma_buf_vunmap(priv->pma_bytes_available_buffer_dmabuf,
+		prof->pma_bytes_available_buffer_cpuva);
+	dma_buf_put(priv->pma_bytes_available_buffer_dmabuf);
+	priv->pma_bytes_available_buffer_dmabuf = NULL;
+	prof->pma_bytes_available_buffer_cpuva = NULL;
+}
+
+static int nvgpu_prof_ioctl_free_pma_stream(struct nvgpu_profiler_object_priv *priv)
+{
+	struct nvgpu_profiler_object *prof = priv->prof;
+	struct gk20a *g = prof->g;
+
+	nvgpu_log(g, gpu_dbg_prof, "Request to free PMA stream for handle %u",
+		prof->prof_handle);
+
+	if (prof->pma_buffer_va == 0U) {
+		nvgpu_err(g, "PMA stream not initialized");
+		return -EINVAL;
+	}
+
+	if (prof->bound) {
+		nvgpu_err(g, "PM resources are bound, cannot free PMA");
+		return -EINVAL;
+	}
+
+	nvgpu_profiler_free_pma_stream(prof);
+	nvgpu_prof_free_pma_stream_priv_data(priv);
+
+	nvgpu_log(g, gpu_dbg_prof, "Request to free PMA stream for handle %u completed",
+		prof->prof_handle);
+
+	return 0;
+}
+
 static int nvgpu_prof_ioctl_bind_pm_resources(struct nvgpu_profiler_object *prof)
 {
 	return nvgpu_profiler_bind_pm_resources(prof);
@@ -563,6 +706,15 @@ long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd,
 		err = nvgpu_prof_ioctl_unbind_pm_resources(prof);
 		break;

+	case NVGPU_PROFILER_IOCTL_ALLOC_PMA_STREAM:
+		err = nvgpu_prof_ioctl_alloc_pma_stream(prof_priv,
+			(struct nvgpu_profiler_alloc_pma_stream_args *)buf);
+		break;
+
+	case NVGPU_PROFILER_IOCTL_FREE_PMA_STREAM:
+		err = nvgpu_prof_ioctl_free_pma_stream(prof_priv);
+		break;
+
 	case NVGPU_PROFILER_IOCTL_EXEC_REG_OPS:
 		err = nvgpu_prof_ioctl_exec_reg_ops(prof_priv,
 			(struct nvgpu_profiler_exec_reg_ops_args *)buf);