diff --git a/drivers/gpu/nvgpu/common/perf/perfbuf.c b/drivers/gpu/nvgpu/common/perf/perfbuf.c index 616972e46..5765d61a5 100644 --- a/drivers/gpu/nvgpu/common/perf/perfbuf.c +++ b/drivers/gpu/nvgpu/common/perf/perfbuf.c @@ -25,6 +25,11 @@ #include #include #include +#include +#include + +#define PERFBUF_PMA_BYTES_AVAILABLE_BUFFER_FIXED_GPU_VA 0x4000000ULL +#define PERFBUF_PMA_BYTES_AVAILABLE_BUFFER_MAX_SIZE NVGPU_CPU_PAGE_SIZE int nvgpu_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) { @@ -92,6 +97,24 @@ int nvgpu_perfbuf_init_vm(struct gk20a *g) return -ENOMEM; } + /* + * PMA available byte buffer GPU_VA needs to fit in 32 bit + * register, hence use a fixed GPU_VA to map it. + * Only one PMA stream is allowed right now so this works. + * This should be updated later to support multiple PMA streams. + */ + mm->perfbuf.pma_bytes_available_buffer_gpu_va = + PERFBUF_PMA_BYTES_AVAILABLE_BUFFER_FIXED_GPU_VA; + + err = nvgpu_vm_area_alloc(mm->perfbuf.vm, + PERFBUF_PMA_BYTES_AVAILABLE_BUFFER_MAX_SIZE / SZ_4K, + SZ_4K, &mm->perfbuf.pma_bytes_available_buffer_gpu_va, + NVGPU_VM_AREA_ALLOC_FIXED_OFFSET); + if (err != 0) { + nvgpu_vm_put(mm->perfbuf.vm); + return err; + } + err = g->ops.perfbuf.init_inst_block(g); if (err != 0) { nvgpu_vm_put(mm->perfbuf.vm); @@ -109,7 +132,12 @@ void nvgpu_perfbuf_deinit_inst_block(struct gk20a *g) void nvgpu_perfbuf_deinit_vm(struct gk20a *g) { + struct mm_gk20a *mm = &g->mm; + g->ops.perfbuf.deinit_inst_block(g); + + nvgpu_vm_area_free(mm->perfbuf.vm, + mm->perfbuf.pma_bytes_available_buffer_gpu_va); nvgpu_vm_put(g->mm.perfbuf.vm); } diff --git a/drivers/gpu/nvgpu/common/profiler/profiler.c b/drivers/gpu/nvgpu/common/profiler/profiler.c index 150947867..c074e67d5 100644 --- a/drivers/gpu/nvgpu/common/profiler/profiler.c +++ b/drivers/gpu/nvgpu/common/profiler/profiler.c @@ -538,21 +538,10 @@ int nvgpu_profiler_alloc_pma_stream(struct nvgpu_profiler_object *prof) void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof) { struct gk20a *g = prof->g; - struct mm_gk20a *mm = &g->mm; - if (prof->pma_buffer_va == 0U) { - return; + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) { + nvgpu_perfbuf_deinit_vm(g); + nvgpu_profiler_pm_resource_release(prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM); } - - nvgpu_vm_unmap(mm->perfbuf.vm, prof->pma_bytes_available_buffer_va, NULL); - prof->pma_bytes_available_buffer_va = 0U; - - nvgpu_vm_unmap(mm->perfbuf.vm, prof->pma_buffer_va, NULL); - prof->pma_buffer_va = 0U; - prof->pma_buffer_size = 0U; - - nvgpu_perfbuf_deinit_vm(g); - - nvgpu_profiler_pm_resource_release(prof, - NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM); } diff --git a/drivers/gpu/nvgpu/include/nvgpu/mm.h b/drivers/gpu/nvgpu/include/nvgpu/mm.h index 8c828e8b9..dbc11cc0f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/mm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h @@ -294,6 +294,7 @@ struct mm_gk20a { struct { struct vm_gk20a *vm; struct nvgpu_mem inst_block; + u64 pma_bytes_available_buffer_gpu_va; } perfbuf; /** diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c index e62035116..be7fe3642 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c @@ -189,10 +189,10 @@ int nvgpu_prof_fops_release(struct inode *inode, struct file *filp) "Request to close profiler session with scope %u and profiler handle %u", prof->scope, prof->prof_handle); - nvgpu_profiler_free(prof); - nvgpu_prof_free_pma_stream_priv_data(prof_priv); + nvgpu_profiler_free(prof); + nvgpu_kfree(g, prof_priv->regops_umd_copy_buf); nvgpu_kfree(g, prof_priv->regops_staging_buf); @@ -343,7 +343,7 @@ static int nvgpu_prof_ioctl_alloc_pma_stream(struct nvgpu_profiler_object_priv * struct nvgpu_profiler_object *prof = priv->prof; struct gk20a *g = prof->g; struct mm_gk20a *mm = &g->mm; - u64 pma_bytes_available_buffer_offset = 0ULL; + u64 pma_bytes_available_buffer_offset; struct dma_buf *dmabuf; void *cpuva; u32 pma_buffer_size; @@ -363,8 +363,15 @@ static int nvgpu_prof_ioctl_alloc_pma_stream(struct nvgpu_profiler_object_priv * return err; } + /* + * PMA available byte buffer GPU_VA needs to fit in 32 bit + * register, hence use a fixed GPU_VA to map it. + */ + pma_bytes_available_buffer_offset = mm->perfbuf.pma_bytes_available_buffer_gpu_va; + err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->pma_bytes_available_buffer_fd, - &pma_bytes_available_buffer_offset, 0, SZ_4K, 0, 0, + &pma_bytes_available_buffer_offset, + NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET, SZ_4K, 0, 0, 0, 0, NULL); if (err != 0) { nvgpu_err(g, "failed to map available bytes buffer"); @@ -429,11 +436,20 @@ err_put_vm: static void nvgpu_prof_free_pma_stream_priv_data(struct nvgpu_profiler_object_priv *priv) { struct nvgpu_profiler_object *prof = priv->prof; + struct gk20a *g = prof->g; + struct mm_gk20a *mm = &g->mm; if (priv->pma_bytes_available_buffer_dmabuf == NULL) { return; } + nvgpu_vm_unmap(mm->perfbuf.vm, prof->pma_bytes_available_buffer_va, NULL); + prof->pma_bytes_available_buffer_va = 0U; + + nvgpu_vm_unmap(mm->perfbuf.vm, prof->pma_buffer_va, NULL); + prof->pma_buffer_va = 0U; + prof->pma_buffer_size = 0U; + dma_buf_vunmap(priv->pma_bytes_available_buffer_dmabuf, prof->pma_bytes_available_buffer_cpuva); dma_buf_put(priv->pma_bytes_available_buffer_dmabuf); @@ -459,8 +475,8 @@ static int nvgpu_prof_ioctl_free_pma_stream(struct nvgpu_profiler_object_priv *p return -EINVAL; } - nvgpu_profiler_free_pma_stream(prof); nvgpu_prof_free_pma_stream_priv_data(priv); + nvgpu_profiler_free_pma_stream(prof); nvgpu_log(g, gpu_dbg_prof, "Request to free PMA stream for handle %u completed", prof->prof_handle);