gpu: nvgpu: reserve pma stream for legacy profiler

Legacy profiler does not reserve PMA stream resource with PM
reservation system. Also, HWPM system reset is separately implemented
in membuf disable path. And it does not even restore perf unit SLCG
prod values.

Allcoate a dummy profiler object for debug session in perfbuf map
path. Free it in perfbuf unmap path.

This has advantage of synchronizing PMA stream reservation with new
profiler stack. And this also leverages HWPM system reset and SLCG
handling code during resource reservation.

Remove explicit HWPM reset from gops.perf.membuf_reset_streaming()
HALs

Bug 2510974
Jira NVGPU-5360

Change-Id: I54c5202b6251dea3d80a4dfc011e8a296339e07f
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2399595
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2020-08-17 12:13:50 +05:30
committed by Alex Waterman
parent d90b9a3d4e
commit d020778c55
4 changed files with 62 additions and 14 deletions

View File

@@ -24,8 +24,6 @@
#include <nvgpu/mm.h>
#include <nvgpu/bug.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/mc.h>
#include <nvgpu/gops_mc.h>
#include "perf_gm20b.h"
@@ -55,8 +53,6 @@ void gm20b_perf_membuf_reset_streaming(struct gk20a *g)
u32 engine_status;
u32 num_unread_bytes;
g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON));
engine_status = nvgpu_readl(g, perf_pmasys_enginestatus_r());
WARN_ON(0U ==
(engine_status & perf_pmasys_enginestatus_rbufempty_empty_f()));

View File

@@ -24,8 +24,6 @@
#include <nvgpu/mm.h>
#include <nvgpu/bug.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/mc.h>
#include <nvgpu/gops_mc.h>
#include "perf_gv11b.h"
@@ -55,8 +53,6 @@ void gv11b_perf_membuf_reset_streaming(struct gk20a *g)
u32 engine_status;
u32 num_unread_bytes;
g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON));
engine_status = nvgpu_readl(g, perf_pmasys_enginestatus_r());
WARN_ON(0U ==
(engine_status & perf_pmasys_enginestatus_rbufempty_empty_f()));

View File

@@ -69,6 +69,13 @@ struct dbg_session_gk20a {
bool broadcast_stop_trigger;
struct nvgpu_mutex ioctl_lock;
/*
* Dummy profiler object for debug session to synchronize PMA
* reservation and HWPM system reset with new context/device
* profilers.
*/
struct nvgpu_profiler_object *prof;
};
struct dbg_session_data {

View File

@@ -97,7 +97,8 @@ static int alloc_session(struct gk20a *g, struct dbg_session_gk20a_linux **_dbg_
return 0;
}
static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset);
static int gk20a_perfbuf_release_locked(struct gk20a *g,
struct dbg_session_gk20a *dbg_s, u64 offset);
static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_exec_reg_ops_args *args);
@@ -203,7 +204,7 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
/* If this session owned the perf buffer, release it */
if (g->perfbuf.owner == dbg_s)
gk20a_perfbuf_release_locked(g, g->perfbuf.offset);
gk20a_perfbuf_release_locked(g, dbg_s, g->perfbuf.offset);
/* Per-context profiler objects were released when we called
* dbg_unbind_all_channels. We could still have global ones.
@@ -1433,6 +1434,44 @@ static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
return ret;
}
static int nvgpu_perfbuf_reserve_pma(struct dbg_session_gk20a *dbg_s)
{
struct gk20a *g = dbg_s->g;
int err;
/* Legacy profiler only supports global PMA stream */
err = nvgpu_profiler_alloc(g, &dbg_s->prof,
NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE);
if (err != 0) {
nvgpu_err(g, "Failed to allocate profiler object");
return err;
}
err = nvgpu_profiler_pm_resource_reserve(dbg_s->prof,
NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
if (err != 0) {
nvgpu_err(g, "Failed to reserve PMA stream");
nvgpu_profiler_free(dbg_s->prof);
return err;
}
return err;
}
static void nvgpu_perfbuf_release_pma(struct dbg_session_gk20a *dbg_s)
{
struct gk20a *g = dbg_s->g;
int err;
err = nvgpu_profiler_pm_resource_release(dbg_s->prof,
NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
if (err != 0) {
nvgpu_err(g, "Failed to release PMA stream");
}
nvgpu_profiler_free(dbg_s->prof);
}
static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_perfbuf_map_args *args)
{
@@ -1448,12 +1487,17 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
return -EBUSY;
}
err = nvgpu_perfbuf_init_vm(g);
if (err) {
err = nvgpu_perfbuf_reserve_pma(dbg_s);
if (err != 0) {
nvgpu_mutex_release(&g->dbg_sessions_lock);
return err;
}
err = nvgpu_perfbuf_init_vm(g);
if (err != 0) {
goto err_release_pma;
}
err = nvgpu_vm_map_buffer(mm->perfbuf.vm,
args->dmabuf_fd,
&args->offset,
@@ -1488,6 +1532,8 @@ err_unmap:
nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL);
err_remove_vm:
nvgpu_perfbuf_deinit_vm(g);
err_release_pma:
nvgpu_perfbuf_release_pma(dbg_s);
nvgpu_mutex_release(&g->dbg_sessions_lock);
return err;
}
@@ -1505,7 +1551,7 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
return -EINVAL;
}
err = gk20a_perfbuf_release_locked(g, args->offset);
err = gk20a_perfbuf_release_locked(g, dbg_s, args->offset);
nvgpu_mutex_release(&g->dbg_sessions_lock);
@@ -1705,7 +1751,8 @@ static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s,
args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE;
}
static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
static int gk20a_perfbuf_release_locked(struct gk20a *g,
struct dbg_session_gk20a *dbg_s, u64 offset)
{
struct mm_gk20a *mm = &g->mm;
struct vm_gk20a *vm = mm->perfbuf.vm;
@@ -1717,6 +1764,8 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
nvgpu_perfbuf_deinit_vm(g);
nvgpu_perfbuf_release_pma(dbg_s);
g->perfbuf.owner = NULL;
g->perfbuf.offset = 0;
return err;