diff --git a/drivers/gpu/nvgpu/hal/perf/perf_gm20b.c b/drivers/gpu/nvgpu/hal/perf/perf_gm20b.c index e73e7fef0..99a6247e7 100644 --- a/drivers/gpu/nvgpu/hal/perf/perf_gm20b.c +++ b/drivers/gpu/nvgpu/hal/perf/perf_gm20b.c @@ -24,8 +24,6 @@ #include #include #include -#include -#include #include "perf_gm20b.h" @@ -55,8 +53,6 @@ void gm20b_perf_membuf_reset_streaming(struct gk20a *g) u32 engine_status; u32 num_unread_bytes; - g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON)); - engine_status = nvgpu_readl(g, perf_pmasys_enginestatus_r()); WARN_ON(0U == (engine_status & perf_pmasys_enginestatus_rbufempty_empty_f())); diff --git a/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c b/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c index e7c1d6087..e72742aa2 100644 --- a/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c +++ b/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c @@ -24,8 +24,6 @@ #include #include #include -#include -#include #include "perf_gv11b.h" @@ -55,8 +53,6 @@ void gv11b_perf_membuf_reset_streaming(struct gk20a *g) u32 engine_status; u32 num_unread_bytes; - g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON)); - engine_status = nvgpu_readl(g, perf_pmasys_enginestatus_r()); WARN_ON(0U == (engine_status & perf_pmasys_enginestatus_rbufempty_empty_f())); diff --git a/drivers/gpu/nvgpu/include/nvgpu/debugger.h b/drivers/gpu/nvgpu/include/nvgpu/debugger.h index c964ff97b..e48723bf6 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/debugger.h +++ b/drivers/gpu/nvgpu/include/nvgpu/debugger.h @@ -69,6 +69,13 @@ struct dbg_session_gk20a { bool broadcast_stop_trigger; struct nvgpu_mutex ioctl_lock; + + /* + * Dummy profiler object for debug session to synchronize PMA + * reservation and HWPM system reset with new context/device + * profilers. + */ + struct nvgpu_profiler_object *prof; }; struct dbg_session_data { diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index 9d4e8d560..dc7d4b0c9 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -97,7 +97,8 @@ static int alloc_session(struct gk20a *g, struct dbg_session_gk20a_linux **_dbg_ return 0; } -static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset); +static int gk20a_perfbuf_release_locked(struct gk20a *g, + struct dbg_session_gk20a *dbg_s, u64 offset); static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_exec_reg_ops_args *args); @@ -203,7 +204,7 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) /* If this session owned the perf buffer, release it */ if (g->perfbuf.owner == dbg_s) - gk20a_perfbuf_release_locked(g, g->perfbuf.offset); + gk20a_perfbuf_release_locked(g, dbg_s, g->perfbuf.offset); /* Per-context profiler objects were released when we called * dbg_unbind_all_channels. We could still have global ones. @@ -1433,6 +1434,44 @@ static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s, return ret; } +static int nvgpu_perfbuf_reserve_pma(struct dbg_session_gk20a *dbg_s) +{ + struct gk20a *g = dbg_s->g; + int err; + + /* Legacy profiler only supports global PMA stream */ + err = nvgpu_profiler_alloc(g, &dbg_s->prof, + NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE); + if (err != 0) { + nvgpu_err(g, "Failed to allocate profiler object"); + return err; + } + + err = nvgpu_profiler_pm_resource_reserve(dbg_s->prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM); + if (err != 0) { + nvgpu_err(g, "Failed to reserve PMA stream"); + nvgpu_profiler_free(dbg_s->prof); + return err; + } + + return err; +} + +static void nvgpu_perfbuf_release_pma(struct dbg_session_gk20a *dbg_s) +{ + struct gk20a *g = dbg_s->g; + int err; + + err = nvgpu_profiler_pm_resource_release(dbg_s->prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM); + if (err != 0) { + nvgpu_err(g, "Failed to release PMA stream"); + } + + nvgpu_profiler_free(dbg_s->prof); +} + static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_perfbuf_map_args *args) { @@ -1448,12 +1487,17 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, return -EBUSY; } - err = nvgpu_perfbuf_init_vm(g); - if (err) { + err = nvgpu_perfbuf_reserve_pma(dbg_s); + if (err != 0) { nvgpu_mutex_release(&g->dbg_sessions_lock); return err; } + err = nvgpu_perfbuf_init_vm(g); + if (err != 0) { + goto err_release_pma; + } + err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->dmabuf_fd, &args->offset, @@ -1488,6 +1532,8 @@ err_unmap: nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL); err_remove_vm: nvgpu_perfbuf_deinit_vm(g); +err_release_pma: + nvgpu_perfbuf_release_pma(dbg_s); nvgpu_mutex_release(&g->dbg_sessions_lock); return err; } @@ -1505,7 +1551,7 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, return -EINVAL; } - err = gk20a_perfbuf_release_locked(g, args->offset); + err = gk20a_perfbuf_release_locked(g, dbg_s, args->offset); nvgpu_mutex_release(&g->dbg_sessions_lock); @@ -1705,7 +1751,8 @@ static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s, args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE; } -static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) +static int gk20a_perfbuf_release_locked(struct gk20a *g, + struct dbg_session_gk20a *dbg_s, u64 offset) { struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = mm->perfbuf.vm; @@ -1717,6 +1764,8 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) nvgpu_perfbuf_deinit_vm(g); + nvgpu_perfbuf_release_pma(dbg_s); + g->perfbuf.owner = NULL; g->perfbuf.offset = 0; return err;