diff --git a/drivers/gpu/nvgpu/common/profiler/profiler.c b/drivers/gpu/nvgpu/common/profiler/profiler.c index 1a0352f23..92332e04f 100644 --- a/drivers/gpu/nvgpu/common/profiler/profiler.c +++ b/drivers/gpu/nvgpu/common/profiler/profiler.c @@ -30,6 +30,7 @@ #include #include #include +#include static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0); static int generate_unique_id(void) @@ -74,6 +75,7 @@ void nvgpu_profiler_free(struct nvgpu_profiler_object *prof) prof->prof_handle); nvgpu_profiler_unbind_context(prof); + nvgpu_profiler_free_pma_stream(prof); nvgpu_list_del(&prof->prof_obj_entry); nvgpu_kfree(g, prof); @@ -329,6 +331,7 @@ static int nvgpu_profiler_unbind_hwpm(struct nvgpu_profiler_object *prof) static int nvgpu_profiler_bind_hwpm_streamout(struct nvgpu_profiler_object *prof) { + struct gk20a *g = prof->g; int err; err = nvgpu_profiler_bind_hwpm(prof, true); @@ -336,13 +339,25 @@ static int nvgpu_profiler_bind_hwpm_streamout(struct nvgpu_profiler_object *prof return err; } + err = g->ops.perfbuf.perfbuf_enable(g, prof->pma_buffer_va, prof->pma_buffer_size); + if (err) { + nvgpu_profiler_unbind_hwpm(prof); + return err; + } + return 0; } static int nvgpu_profiler_unbind_hwpm_streamout(struct nvgpu_profiler_object *prof) { + struct gk20a *g = prof->g; int err; + err = g->ops.perfbuf.perfbuf_disable(g); + if (err) { + return err; + } + err = nvgpu_profiler_unbind_hwpm(prof); if (err) { return err; @@ -490,3 +505,48 @@ fail: gk20a_idle(g); return err; } + +int nvgpu_profiler_alloc_pma_stream(struct nvgpu_profiler_object *prof) +{ + struct gk20a *g = prof->g; + int err; + + err = nvgpu_profiler_pm_resource_reserve(prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM); + if (err) { + nvgpu_err(g, "failed to reserve PMA stream"); + return err; + } + + err = nvgpu_perfbuf_init_vm(g); + if (err) { + nvgpu_err(g, "failed to initialize perfbuf VM"); + nvgpu_profiler_pm_resource_release(prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM); + return err; + } + + return 0; +} + +void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof) +{ + struct gk20a *g = prof->g; + struct mm_gk20a *mm = &g->mm; + + if (prof->pma_buffer_va == 0U) { + return; + } + + nvgpu_vm_unmap(mm->perfbuf.vm, prof->pma_bytes_available_buffer_va, NULL); + prof->pma_bytes_available_buffer_va = 0U; + + nvgpu_vm_unmap(mm->perfbuf.vm, prof->pma_buffer_va, NULL); + prof->pma_buffer_va = 0U; + prof->pma_buffer_size = 0U; + + nvgpu_perfbuf_deinit_vm(g); + + nvgpu_profiler_pm_resource_release(prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM); +} diff --git a/drivers/gpu/nvgpu/include/nvgpu/profiler.h b/drivers/gpu/nvgpu/include/nvgpu/profiler.h index 8de31dd47..5588ee375 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/profiler.h +++ b/drivers/gpu/nvgpu/include/nvgpu/profiler.h @@ -82,6 +82,30 @@ struct nvgpu_profiler_object { * until this flag is set. */ bool bound; + + /* + * GPU VA of the PMA stream buffer (if PMA stream resource is reserved + * successfully) associated with this profiler object. + */ + u64 pma_buffer_va; + + /* + * Size of the PMA stream buffer (if PMA stream resource is reserved + * successfully) associated with this profiler object. + */ + u32 pma_buffer_size; + + /* + * GPU VA of the buffer that would store available bytes in PMA buffer + * (if PMA stream resource is reserved successfully). + */ + u64 pma_bytes_available_buffer_va; + + /* + * CPU VA of the buffer that would store available bytes in PMA buffer + * (if PMA stream resource is reserved successfully). + */ + void *pma_bytes_available_buffer_cpuva; }; static inline struct nvgpu_profiler_object * @@ -108,5 +132,8 @@ int nvgpu_profiler_pm_resource_release(struct nvgpu_profiler_object *prof, int nvgpu_profiler_bind_pm_resources(struct nvgpu_profiler_object *prof); int nvgpu_profiler_unbind_pm_resources(struct nvgpu_profiler_object *prof); +int nvgpu_profiler_alloc_pma_stream(struct nvgpu_profiler_object *prof); +void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof); + #endif /* CONFIG_NVGPU_PROFILER */ #endif /* NVGPU_PROFILER_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c index b9dd69f63..6810d28d7 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include #include #include @@ -61,8 +63,16 @@ struct nvgpu_profiler_object_priv { * execution. */ struct nvgpu_dbg_reg_op *regops_staging_buf; + + /* + * dmabuf handle of the buffer that would store available bytes in PMA buffer + * (if PMA stream resource is reserved successfully). + */ + struct dma_buf *pma_bytes_available_buffer_dmabuf; }; +static void nvgpu_prof_free_pma_stream_priv_data(struct nvgpu_profiler_object_priv *priv); + static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp, enum nvgpu_profiler_pm_reservation_scope scope) { @@ -181,6 +191,8 @@ int nvgpu_prof_fops_release(struct inode *inode, struct file *filp) nvgpu_profiler_free(prof); + nvgpu_prof_free_pma_stream_priv_data(prof_priv); + nvgpu_kfree(g, prof_priv->regops_umd_copy_buf); nvgpu_kfree(g, prof_priv->regops_staging_buf); @@ -325,6 +337,137 @@ static int nvgpu_prof_ioctl_release_pm_resource(struct nvgpu_profiler_object *pr return 0; } +static int nvgpu_prof_ioctl_alloc_pma_stream(struct nvgpu_profiler_object_priv *priv, + struct nvgpu_profiler_alloc_pma_stream_args *args) +{ + struct nvgpu_profiler_object *prof = priv->prof; + struct gk20a *g = prof->g; + struct mm_gk20a *mm = &g->mm; + u64 pma_bytes_available_buffer_offset = 0ULL; + struct dma_buf *dmabuf; + void *cpuva; + u32 pma_buffer_size; + int err; + + nvgpu_log(g, gpu_dbg_prof, "Request to setup PMA stream for handle %u", + prof->prof_handle); + + if (prof->pma_buffer_va != 0U) { + nvgpu_err(g, "PMA stream already initialized"); + return -EINVAL; + } + + err = nvgpu_profiler_alloc_pma_stream(prof); + if (err != 0) { + nvgpu_err(g, "failed to init PMA stream"); + return err; + } + + /* + * Size register is 32-bit in HW, ensure requested size does + * not violate that. + */ + pma_buffer_size = nvgpu_safe_cast_u64_to_u32(args->pma_buffer_map_size); + + err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->pma_buffer_fd, + &args->pma_buffer_offset, 0, SZ_4K, 0, 0, + 0, 0, NULL); + if (err != 0) { + nvgpu_err(g, "failed to map PMA buffer"); + goto err_put_vm; + } + + err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->pma_bytes_available_buffer_fd, + &pma_bytes_available_buffer_offset, 0, SZ_4K, 0, 0, + 0, 0, NULL); + if (err != 0) { + nvgpu_err(g, "failed to map available bytes buffer"); + goto err_unmap_pma; + } + + dmabuf = dma_buf_get(args->pma_bytes_available_buffer_fd); + if (IS_ERR(dmabuf)) { + err = -EINVAL; + nvgpu_err(g, "failed to get available bytes buffer FD"); + goto err_unmap_bytes_available; + } + + cpuva = dma_buf_vmap(dmabuf); + if (cpuva == NULL) { + err = -ENOMEM; + nvgpu_err(g, "failed to vmap available bytes buffer FD"); + goto err_dma_buf_put; + } + + prof->pma_buffer_va = args->pma_buffer_offset; + prof->pma_buffer_size = pma_buffer_size; + prof->pma_bytes_available_buffer_va = pma_bytes_available_buffer_offset; + prof->pma_bytes_available_buffer_cpuva = cpuva; + priv->pma_bytes_available_buffer_dmabuf = dmabuf; + + nvgpu_log(g, gpu_dbg_prof, "PMA stream initialized for profiler handle %u, 0x%llx 0x%x 0x%llx", + prof->prof_handle, prof->pma_buffer_va, prof->pma_buffer_size, + prof->pma_bytes_available_buffer_va); + + args->pma_buffer_va = args->pma_buffer_offset; + + return 0; + +err_dma_buf_put: + dma_buf_put(dmabuf); +err_unmap_bytes_available: + nvgpu_vm_unmap(mm->perfbuf.vm, pma_bytes_available_buffer_offset, NULL); +err_unmap_pma: + nvgpu_vm_unmap(mm->perfbuf.vm, args->pma_buffer_offset, NULL); +err_put_vm: + nvgpu_perfbuf_deinit_vm(g); + nvgpu_profiler_pm_resource_release(prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM); + return err; +} + +static void nvgpu_prof_free_pma_stream_priv_data(struct nvgpu_profiler_object_priv *priv) +{ + struct nvgpu_profiler_object *prof = priv->prof; + + if (priv->pma_bytes_available_buffer_dmabuf == NULL) { + return; + } + + dma_buf_vunmap(priv->pma_bytes_available_buffer_dmabuf, + prof->pma_bytes_available_buffer_cpuva); + dma_buf_put(priv->pma_bytes_available_buffer_dmabuf); + priv->pma_bytes_available_buffer_dmabuf = NULL; + prof->pma_bytes_available_buffer_cpuva = NULL; +} + +static int nvgpu_prof_ioctl_free_pma_stream(struct nvgpu_profiler_object_priv *priv) +{ + struct nvgpu_profiler_object *prof = priv->prof; + struct gk20a *g = prof->g; + + nvgpu_log(g, gpu_dbg_prof, "Request to free PMA stream for handle %u", + prof->prof_handle); + + if (prof->pma_buffer_va == 0U) { + nvgpu_err(g, "PMA stream not initialized"); + return -EINVAL; + } + + if (prof->bound) { + nvgpu_err(g, "PM resources are bound, cannot free PMA"); + return -EINVAL; + } + + nvgpu_profiler_free_pma_stream(prof); + nvgpu_prof_free_pma_stream_priv_data(priv); + + nvgpu_log(g, gpu_dbg_prof, "Request to free PMA stream for handle %u completed", + prof->prof_handle); + + return 0; +} + static int nvgpu_prof_ioctl_bind_pm_resources(struct nvgpu_profiler_object *prof) { return nvgpu_profiler_bind_pm_resources(prof); @@ -563,6 +706,15 @@ long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd, err = nvgpu_prof_ioctl_unbind_pm_resources(prof); break; + case NVGPU_PROFILER_IOCTL_ALLOC_PMA_STREAM: + err = nvgpu_prof_ioctl_alloc_pma_stream(prof_priv, + (struct nvgpu_profiler_alloc_pma_stream_args *)buf); + break; + + case NVGPU_PROFILER_IOCTL_FREE_PMA_STREAM: + err = nvgpu_prof_ioctl_free_pma_stream(prof_priv); + break; + case NVGPU_PROFILER_IOCTL_EXEC_REG_OPS: err = nvgpu_prof_ioctl_exec_reg_ops(prof_priv, (struct nvgpu_profiler_exec_reg_ops_args *)buf);