mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: implement HWPM streamout teardown sequence
Implement below functions: - nvgpu_profiler_quiesce_hwpm_streamout_resident Teardown sequence when context is resident or in case profiling session is a device level session. - nvgpu_profiler_quiesce_hwpm_streamout_non_resident Teardown sequence when context is non resident - nvgpu_profiler_quiesce_hwpm_streamout Generic sequence to call either of above API based on whether context is resident or not. Trigger HWPM streamout teardown sequence while unbinding resources in nvgpu_profiler_unbind_hwpm_streamout() Add a new HAL gops.gr.is_tsg_ctx_resident to call gk20a_is_tsg_ctx_resident() from common code. Implement below supporting HALs for resident teardown sequence: - gops.perf.pma_stream_enable() - gops.perf.disable_all_perfmons() - gops.perf.wait_for_idle_pmm_routers() - gops.perf.wait_for_idle_pma() - gops.gr.disable_cau() - gops.gr.disable_smpc() Jira NVGPU-5360 Change-Id: I304ea25d296fae0146937b15228ea21edc091e16 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2461333 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: Antony Clince Alex <aalex@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
ce8358ef2b
commit
9221b01968
@@ -31,6 +31,7 @@
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/gr/ctx.h>
|
||||
#include <nvgpu/perfbuf.h>
|
||||
#include <nvgpu/gr/gr.h>
|
||||
|
||||
static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
|
||||
static int generate_unique_id(void)
|
||||
@@ -332,6 +333,128 @@ static int nvgpu_profiler_unbind_hwpm(struct nvgpu_profiler_object *prof)
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_profiler_quiesce_hwpm_streamout_resident(struct nvgpu_profiler_object *prof)
|
||||
{
|
||||
struct gk20a *g = prof->g;
|
||||
u64 bytes_available;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_prof,
|
||||
"HWPM streamout quiesce in resident state started for handle %u",
|
||||
prof->prof_handle);
|
||||
|
||||
/* Enable streamout */
|
||||
g->ops.perf.pma_stream_enable(g, true);
|
||||
|
||||
/* Disable all perfmons */
|
||||
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
|
||||
g->ops.perf.disable_all_perfmons(g);
|
||||
}
|
||||
|
||||
/* Disable CAUs */
|
||||
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY] &&
|
||||
prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC] &&
|
||||
g->ops.gr.disable_cau != NULL) {
|
||||
g->ops.gr.disable_cau(g);
|
||||
}
|
||||
|
||||
/* Disable SMPC */
|
||||
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC] &&
|
||||
g->ops.gr.disable_smpc != NULL) {
|
||||
g->ops.gr.disable_smpc(g);
|
||||
}
|
||||
|
||||
/* Wait for routers to idle/quiescent */
|
||||
err = g->ops.perf.wait_for_idle_pmm_routers(g);
|
||||
if (err != 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Wait for PMA to idle/quiescent */
|
||||
err = g->ops.perf.wait_for_idle_pma(g);
|
||||
if (err != 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Disable streamout */
|
||||
g->ops.perf.pma_stream_enable(g, false);
|
||||
|
||||
/* wait for all the inflight records from fb-hub to stream out */
|
||||
err = nvgpu_perfbuf_update_get_put(g, 0U, &bytes_available,
|
||||
prof->pma_bytes_available_buffer_cpuva, true,
|
||||
NULL, NULL);
|
||||
|
||||
fail:
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to quiesce HWPM streamout in resident state");
|
||||
} else {
|
||||
nvgpu_log(g, gpu_dbg_prof,
|
||||
"HWPM streamout quiesce in resident state successfull for handle %u",
|
||||
prof->prof_handle);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct nvgpu_profiler_object *prof)
|
||||
{
|
||||
struct nvgpu_mem *pm_ctx_mem;
|
||||
struct gk20a *g = prof->g;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_prof,
|
||||
"HWPM streamout quiesce in non-resident state started for handle %u",
|
||||
prof->prof_handle);
|
||||
|
||||
if (prof->tsg == NULL || prof->tsg->gr_ctx == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pm_ctx_mem = nvgpu_gr_ctx_get_pm_ctx_mem(prof->tsg->gr_ctx);
|
||||
if (pm_ctx_mem == NULL) {
|
||||
nvgpu_err(g, "No PM context");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nvgpu_memset(g, pm_ctx_mem, 0U, 0U, pm_ctx_mem->size);
|
||||
nvgpu_log(g, gpu_dbg_prof,
|
||||
"HWPM streamout quiesce in non-resident state successfull for handle %u",
|
||||
prof->prof_handle);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvgpu_profiler_quiesce_hwpm_streamout(struct nvgpu_profiler_object *prof)
|
||||
{
|
||||
struct gk20a *g = prof->g;
|
||||
bool ctx_resident;
|
||||
int err, ctxsw_err;
|
||||
|
||||
err = nvgpu_gr_disable_ctxsw(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "unable to stop gr ctxsw");
|
||||
return err;
|
||||
}
|
||||
|
||||
ctx_resident = g->ops.gr.is_tsg_ctx_resident(prof->tsg);
|
||||
|
||||
if (ctx_resident) {
|
||||
err = nvgpu_profiler_quiesce_hwpm_streamout_resident(prof);
|
||||
} else {
|
||||
err = nvgpu_profiler_quiesce_hwpm_streamout_non_resident(prof);
|
||||
}
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to quiesce HWPM streamout");
|
||||
}
|
||||
|
||||
ctxsw_err = nvgpu_gr_enable_ctxsw(g);
|
||||
if (ctxsw_err != 0) {
|
||||
nvgpu_err(g, "unable to restart ctxsw!");
|
||||
err = ctxsw_err;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_profiler_bind_hwpm_streamout(struct nvgpu_profiler_object *prof)
|
||||
{
|
||||
struct gk20a *g = prof->g;
|
||||
@@ -357,6 +480,19 @@ static int nvgpu_profiler_unbind_hwpm_streamout(struct nvgpu_profiler_object *pr
|
||||
struct gk20a *g = prof->g;
|
||||
int err;
|
||||
|
||||
if (prof->scope == NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE) {
|
||||
if (prof->ctxsw[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
|
||||
err = nvgpu_profiler_quiesce_hwpm_streamout(prof);
|
||||
} else {
|
||||
err = nvgpu_profiler_quiesce_hwpm_streamout_resident(prof);
|
||||
}
|
||||
} else {
|
||||
err = nvgpu_profiler_quiesce_hwpm_streamout(prof);
|
||||
}
|
||||
if (err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
g->ops.perf.bind_mem_bytes_buffer_addr(g, 0ULL);
|
||||
|
||||
err = g->ops.perfbuf.perfbuf_disable(g);
|
||||
|
||||
@@ -1341,7 +1341,7 @@ bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool gk20a_is_tsg_ctx_resident(struct nvgpu_tsg *tsg)
|
||||
bool gk20a_is_tsg_ctx_resident(struct nvgpu_tsg *tsg)
|
||||
{
|
||||
u32 curr_gr_tsgid;
|
||||
struct gk20a *g = tsg->g;
|
||||
|
||||
@@ -72,6 +72,7 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
|
||||
int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
|
||||
struct nvgpu_channel *ch, u64 sms, bool enable);
|
||||
bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch);
|
||||
bool gk20a_is_tsg_ctx_resident(struct nvgpu_tsg *tsg);
|
||||
int gk20a_gr_lock_down_sm(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
|
||||
bool check_errors);
|
||||
|
||||
@@ -127,3 +127,27 @@ int tu104_gr_update_smpc_global_mode(struct gk20a *g, bool enable)
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void tu104_gr_disable_cau(struct gk20a *g)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
for (i = 0U; i < gr_gpcs_tpcs_cau_control__size_1_v(); ++i) {
|
||||
nvgpu_writel(g, gr_gpcs_tpcs_cau_control_r(i), 0U);
|
||||
}
|
||||
|
||||
if (g->ops.priv_ring.read_pri_fence != NULL) {
|
||||
g->ops.priv_ring.read_pri_fence(g);
|
||||
}
|
||||
}
|
||||
|
||||
void tu104_gr_disable_smpc(struct gk20a *g)
|
||||
{
|
||||
nvgpu_writel(g, gr_egpcs_etpcs_sm_dsm_perf_counter_control_r(), 0U);
|
||||
nvgpu_writel(g, gr_egpcs_etpcs_sm_dsm_perf_counter_control0_r(), 0U);
|
||||
nvgpu_writel(g, gr_egpcs_etpcs_sm_dsm_perf_counter_control5_r(), 0U);
|
||||
|
||||
if (g->ops.priv_ring.read_pri_fence != NULL) {
|
||||
g->ops.priv_ring.read_pri_fence(g);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,5 +40,9 @@ void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
|
||||
u32 *num_sm_dsm_perf_ctrl_regs, u32 **sm_dsm_perf_ctrl_regs,
|
||||
u32 *ctrl_register_stride);
|
||||
int tu104_gr_update_smpc_global_mode(struct gk20a *g, bool enable);
|
||||
|
||||
void tu104_gr_disable_cau(struct gk20a *g);
|
||||
void tu104_gr_disable_smpc(struct gk20a *g);
|
||||
|
||||
#endif /* CONFIG_NVGPU_DEBUGGER */
|
||||
#endif /* NVGPU_GR_TU104_H */
|
||||
|
||||
@@ -660,6 +660,7 @@ static const struct gops_gr gv11b_ops_gr = {
|
||||
.wait_for_pause = NULL,
|
||||
.resume_from_pause = NULL,
|
||||
.clear_sm_errors = gr_gk20a_clear_sm_errors,
|
||||
.is_tsg_ctx_resident = gk20a_is_tsg_ctx_resident,
|
||||
.sm_debugger_attached = gv11b_gr_sm_debugger_attached,
|
||||
.suspend_single_sm = gv11b_gr_suspend_single_sm,
|
||||
.suspend_all_sms = gv11b_gr_suspend_all_sms,
|
||||
@@ -1238,6 +1239,10 @@ static const struct gops_perf gv11b_ops_perf = {
|
||||
.get_num_hwpm_perfmon = gv11b_perf_get_num_hwpm_perfmon,
|
||||
.init_hwpm_pmm_register = gv11b_perf_init_hwpm_pmm_register,
|
||||
.reset_hwpm_pmm_registers = gv11b_perf_reset_hwpm_pmm_registers,
|
||||
.pma_stream_enable = gv11b_perf_pma_stream_enable,
|
||||
.disable_all_perfmons = gv11b_perf_disable_all_perfmons,
|
||||
.wait_for_idle_pmm_routers = gv11b_perf_wait_for_idle_pmm_routers,
|
||||
.wait_for_idle_pma = gv11b_perf_wait_for_idle_pma,
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
@@ -695,6 +695,8 @@ static const struct gops_gr tu104_ops_gr = {
|
||||
.update_smpc_global_mode = tu104_gr_update_smpc_global_mode,
|
||||
.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
|
||||
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
|
||||
.disable_cau = tu104_gr_disable_cau,
|
||||
.disable_smpc = tu104_gr_disable_smpc,
|
||||
.clear_sm_error_state = gv11b_gr_clear_sm_error_state,
|
||||
.suspend_contexts = gr_gp10b_suspend_contexts,
|
||||
.resume_contexts = gr_gk20a_resume_contexts,
|
||||
@@ -702,6 +704,7 @@ static const struct gops_gr tu104_ops_gr = {
|
||||
.wait_for_pause = NULL,
|
||||
.resume_from_pause = NULL,
|
||||
.clear_sm_errors = gr_gk20a_clear_sm_errors,
|
||||
.is_tsg_ctx_resident = gk20a_is_tsg_ctx_resident,
|
||||
.sm_debugger_attached = gv11b_gr_sm_debugger_attached,
|
||||
.suspend_single_sm = gv11b_gr_suspend_single_sm,
|
||||
.suspend_all_sms = gv11b_gr_suspend_all_sms,
|
||||
@@ -1304,6 +1307,10 @@ static const struct gops_perf tu104_ops_perf = {
|
||||
.get_num_hwpm_perfmon = gv11b_perf_get_num_hwpm_perfmon,
|
||||
.init_hwpm_pmm_register = gv11b_perf_init_hwpm_pmm_register,
|
||||
.reset_hwpm_pmm_registers = gv11b_perf_reset_hwpm_pmm_registers,
|
||||
.pma_stream_enable = gv11b_perf_pma_stream_enable,
|
||||
.disable_all_perfmons = gv11b_perf_disable_all_perfmons,
|
||||
.wait_for_idle_pmm_routers = gv11b_perf_wait_for_idle_pmm_routers,
|
||||
.wait_for_idle_pma = gv11b_perf_wait_for_idle_pma,
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
@@ -32,6 +32,8 @@
|
||||
|
||||
#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
|
||||
|
||||
#define PMM_ROUTER_OFFSET 0x200U
|
||||
|
||||
bool gv11b_perf_get_membuf_overflow_status(struct gk20a *g)
|
||||
{
|
||||
const u32 st = perf_pmasys_control_membuf_status_overflowed_f();
|
||||
@@ -108,7 +110,9 @@ int gv11b_perf_update_get_put(struct gk20a *g, u64 bytes_consumed,
|
||||
{
|
||||
u32 val;
|
||||
|
||||
nvgpu_writel(g, perf_pmasys_mem_bump_r(), bytes_consumed);
|
||||
if (bytes_consumed != 0U) {
|
||||
nvgpu_writel(g, perf_pmasys_mem_bump_r(), bytes_consumed);
|
||||
}
|
||||
|
||||
if (update_available_bytes) {
|
||||
val = nvgpu_readl(g, perf_pmasys_control_r());
|
||||
@@ -497,3 +501,165 @@ void gv11b_perf_init_hwpm_pmm_register(struct gk20a *g)
|
||||
g->ops.perf.get_pmmgpc_per_chiplet_offset(),
|
||||
g->num_gpc_perfmon);
|
||||
}
|
||||
|
||||
void gv11b_perf_pma_stream_enable(struct gk20a *g, bool enable)
|
||||
{
|
||||
u32 reg_val;
|
||||
|
||||
reg_val = nvgpu_readl(g, perf_pmasys_control_r());
|
||||
|
||||
if (enable) {
|
||||
reg_val = set_field(reg_val,
|
||||
perf_pmasys_control_stream_m(),
|
||||
perf_pmasys_control_stream_enable_f());
|
||||
} else {
|
||||
reg_val = set_field(reg_val,
|
||||
perf_pmasys_control_stream_m(),
|
||||
perf_pmasys_control_stream_disable_f());
|
||||
}
|
||||
|
||||
nvgpu_writel(g, perf_pmasys_control_r(), reg_val);
|
||||
}
|
||||
|
||||
void gv11b_perf_disable_all_perfmons(struct gk20a *g)
|
||||
{
|
||||
if (g->num_sys_perfmon == 0U) {
|
||||
g->ops.perf.get_num_hwpm_perfmon(g, &g->num_sys_perfmon,
|
||||
&g->num_fbp_perfmon, &g->num_gpc_perfmon);
|
||||
}
|
||||
|
||||
g->ops.perf.set_pmm_register(g, perf_pmmsys_control_r(0U), 0U, 1U,
|
||||
g->ops.perf.get_pmmsys_per_chiplet_offset(),
|
||||
g->num_sys_perfmon);
|
||||
|
||||
g->ops.perf.set_pmm_register(g, perf_pmmfbp_fbps_control_r(0U), 0U, 1U,
|
||||
g->ops.perf.get_pmmfbp_per_chiplet_offset(),
|
||||
g->num_fbp_perfmon);
|
||||
|
||||
g->ops.perf.set_pmm_register(g, perf_pmmgpc_gpcs_control_r(0U), 0U, 1U,
|
||||
g->ops.perf.get_pmmgpc_per_chiplet_offset(),
|
||||
g->num_gpc_perfmon);
|
||||
|
||||
if (g->ops.priv_ring.read_pri_fence != NULL) {
|
||||
g->ops.priv_ring.read_pri_fence(g);
|
||||
}
|
||||
}
|
||||
|
||||
static int poll_for_pmm_router_idle(struct gk20a *g, u32 offset, u32 timeout_ms)
|
||||
{
|
||||
struct nvgpu_timeout timeout;
|
||||
u32 reg_val;
|
||||
u32 status;
|
||||
int err;
|
||||
|
||||
err = nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to init timeout");
|
||||
return err;
|
||||
}
|
||||
|
||||
do {
|
||||
reg_val = nvgpu_readl(g, offset);
|
||||
status = perf_pmmsysrouter_enginestatus_status_v(reg_val);
|
||||
|
||||
if ((status == perf_pmmsysrouter_enginestatus_status_empty_v()) ||
|
||||
(status == perf_pmmsysrouter_enginestatus_status_quiescent_v())) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvgpu_usleep_range(20, 40);
|
||||
} while (nvgpu_timeout_expired(&timeout) == 0);
|
||||
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
int gv11b_perf_wait_for_idle_pmm_routers(struct gk20a *g)
|
||||
{
|
||||
u32 num_gpc, num_fbp;
|
||||
int err;
|
||||
u32 i;
|
||||
|
||||
num_gpc = nvgpu_gr_config_get_gpc_count(nvgpu_gr_get_config_ptr(g));
|
||||
num_fbp = nvgpu_fbp_get_num_fbps(g->fbp);
|
||||
|
||||
/* wait for all perfmons to report idle */
|
||||
err = poll_for_pmm_router_idle(g, perf_pmmsysrouter_perfmonstatus_r(), 1);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
for (i = 0U; i < num_gpc; ++i) {
|
||||
err = poll_for_pmm_router_idle(g,
|
||||
perf_pmmgpcrouter_perfmonstatus_r() + (i * PMM_ROUTER_OFFSET),
|
||||
1);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0U; i < num_fbp; ++i) {
|
||||
err = poll_for_pmm_router_idle(g,
|
||||
perf_pmmfbprouter_perfmonstatus_r() + (i * PMM_ROUTER_OFFSET),
|
||||
1);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* wait for all routers to report idle */
|
||||
err = poll_for_pmm_router_idle(g, perf_pmmsysrouter_enginestatus_r(), 1);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
for (i = 0U; i < num_gpc; ++i) {
|
||||
err = poll_for_pmm_router_idle(g,
|
||||
perf_pmmgpcrouter_enginestatus_r() + (i * PMM_ROUTER_OFFSET),
|
||||
1);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0U; i < num_fbp; ++i) {
|
||||
err = poll_for_pmm_router_idle(g,
|
||||
perf_pmmfbprouter_enginestatus_r() + (i * PMM_ROUTER_OFFSET),
|
||||
1);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gv11b_perf_wait_for_idle_pma(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_timeout timeout;
|
||||
u32 status, rbufempty_status;
|
||||
u32 timeout_ms = 1;
|
||||
u32 reg_val;
|
||||
int err;
|
||||
|
||||
err = nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to init timeout");
|
||||
return err;
|
||||
}
|
||||
|
||||
do {
|
||||
reg_val = nvgpu_readl(g, perf_pmasys_enginestatus_r());
|
||||
|
||||
status = perf_pmasys_enginestatus_status_v(reg_val);
|
||||
rbufempty_status = perf_pmasys_enginestatus_rbufempty_v(reg_val);
|
||||
|
||||
if ((status == perf_pmasys_enginestatus_status_empty_v()) &&
|
||||
(rbufempty_status == perf_pmasys_enginestatus_rbufempty_empty_v())) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvgpu_usleep_range(20, 40);
|
||||
} while (nvgpu_timeout_expired(&timeout) == 0);
|
||||
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
@@ -62,5 +62,11 @@ void gv11b_perf_get_num_hwpm_perfmon(struct gk20a *g, u32 *num_sys_perfmon,
|
||||
u32 *num_fbp_perfmon, u32 *num_gpc_perfmon);
|
||||
void gv11b_perf_reset_hwpm_pmm_registers(struct gk20a *g);
|
||||
void gv11b_perf_init_hwpm_pmm_register(struct gk20a *g);
|
||||
|
||||
void gv11b_perf_pma_stream_enable(struct gk20a *g, bool enable);
|
||||
void gv11b_perf_disable_all_perfmons(struct gk20a *g);
|
||||
int gv11b_perf_wait_for_idle_pmm_routers(struct gk20a *g);
|
||||
int gv11b_perf_wait_for_idle_pma(struct gk20a *g);
|
||||
|
||||
#endif /* CONFIG_NVGPU_DEBUGGER */
|
||||
#endif
|
||||
|
||||
@@ -70,6 +70,10 @@ struct gops_perf {
|
||||
void (*set_pmm_register)(struct gk20a *g, u32 offset, u32 val,
|
||||
u32 num_chiplets, u32 chiplet_stride, u32 num_perfmons);
|
||||
void (*reset_hwpm_pmm_registers)(struct gk20a *g);
|
||||
void (*pma_stream_enable)(struct gk20a *g, bool enable);
|
||||
void (*disable_all_perfmons)(struct gk20a *g);
|
||||
int (*wait_for_idle_pmm_routers)(struct gk20a *g);
|
||||
int (*wait_for_idle_pma)(struct gk20a *g);
|
||||
};
|
||||
struct gops_perfbuf {
|
||||
int (*perfbuf_enable)(struct gk20a *g, u64 offset, u32 size);
|
||||
|
||||
@@ -1127,6 +1127,7 @@ struct gops_gr {
|
||||
struct nvgpu_warpstate *w_state);
|
||||
int (*resume_from_pause)(struct gk20a *g);
|
||||
int (*clear_sm_errors)(struct gk20a *g);
|
||||
bool (*is_tsg_ctx_resident)(struct nvgpu_tsg *tsg);
|
||||
bool (*sm_debugger_attached)(struct gk20a *g);
|
||||
void (*suspend_single_sm)(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 sm,
|
||||
@@ -1190,6 +1191,8 @@ struct gops_gr {
|
||||
int (*set_boosted_ctx)(struct nvgpu_channel *ch, bool boost);
|
||||
#endif
|
||||
#endif
|
||||
void (*disable_cau)(struct gk20a *g);
|
||||
void (*disable_smpc)(struct gk20a *g);
|
||||
/** @endcond */
|
||||
|
||||
/** This structure stores the GR ecc subunit hal pointers. */
|
||||
|
||||
@@ -77,6 +77,9 @@
|
||||
#define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U)
|
||||
#define perf_pmasys_control_update_bytes_m() (U32(0x1U) << 3U)
|
||||
#define perf_pmasys_control_update_bytes_doit_f() (0x8U)
|
||||
#define perf_pmasys_control_stream_m() (U32(0x1U) << 0U)
|
||||
#define perf_pmasys_control_stream_enable_f() (0x1U)
|
||||
#define perf_pmasys_control_stream_disable_f() (0x0U)
|
||||
#define perf_pmasys_mem_block_r() (0x0024a070U)
|
||||
#define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U)
|
||||
#define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U)
|
||||
@@ -109,8 +112,11 @@
|
||||
#define perf_pmasys_mem_bytes_addr_ptr_b() (2U)
|
||||
#define perf_pmasys_enginestatus_r() (0x0024a0a4U)
|
||||
#define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U)
|
||||
#define perf_pmasys_enginestatus_rbufempty_v(r) (((r) >> 4U) & 0x1U)
|
||||
#define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U)
|
||||
#define perf_pmasys_enginestatus_rbufempty_empty_f() (0x10U)
|
||||
#define perf_pmasys_enginestatus_status_v(r) (((r) >> 0U) & 0x7U)
|
||||
#define perf_pmasys_enginestatus_status_empty_v() (0x00000000U)
|
||||
#define perf_pmmsys_engine_sel_r(i)\
|
||||
(nvgpu_safe_add_u32(0x0024006cU, nvgpu_safe_mult_u32((i), 512U)))
|
||||
#define perf_pmmsys_engine_sel__size_1_v() (0x00000020U)
|
||||
@@ -120,4 +126,19 @@
|
||||
#define perf_pmmgpc_engine_sel_r(i)\
|
||||
(nvgpu_safe_add_u32(0x0018006cU, nvgpu_safe_mult_u32((i), 512U)))
|
||||
#define perf_pmmgpc_engine_sel__size_1_v() (0x00000020U)
|
||||
#define perf_pmmsys_control_r(i)\
|
||||
(nvgpu_safe_add_u32(0x0024009cU, nvgpu_safe_mult_u32((i), 512U)))
|
||||
#define perf_pmmfbp_fbps_control_r(i)\
|
||||
(nvgpu_safe_add_u32(0x0027c09cU, nvgpu_safe_mult_u32((i), 512U)))
|
||||
#define perf_pmmgpc_gpcs_control_r(i)\
|
||||
(nvgpu_safe_add_u32(0x0027809cU, nvgpu_safe_mult_u32((i), 512U)))
|
||||
#define perf_pmmsysrouter_perfmonstatus_r() (0x00248014U)
|
||||
#define perf_pmmsysrouter_enginestatus_r() (0x00248010U)
|
||||
#define perf_pmmsysrouter_enginestatus_status_v(r) (((r) >> 0U) & 0x7U)
|
||||
#define perf_pmmsysrouter_enginestatus_status_empty_v() (0x00000000U)
|
||||
#define perf_pmmsysrouter_enginestatus_status_quiescent_v() (0x00000003U)
|
||||
#define perf_pmmgpcrouter_perfmonstatus_r() (0x00244014U)
|
||||
#define perf_pmmgpcrouter_enginestatus_r() (0x00244010U)
|
||||
#define perf_pmmfbprouter_perfmonstatus_r() (0x00246014U)
|
||||
#define perf_pmmfbprouter_enginestatus_r() (0x00246010U)
|
||||
#endif
|
||||
|
||||
@@ -1230,4 +1230,10 @@
|
||||
#define gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(v)\
|
||||
((U32(v) & 0x1ffU) << 0U)
|
||||
#define gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m() (U32(0x1ffU) << 0U)
|
||||
#define gr_gpcs_tpcs_cau_control_r(i)\
|
||||
(nvgpu_safe_add_u32(0x00419980U, nvgpu_safe_mult_u32((i), 64U)))
|
||||
#define gr_gpcs_tpcs_cau_control__size_1_v() (0x00000002U)
|
||||
#define gr_egpcs_etpcs_sm_dsm_perf_counter_control_r() (0x00481a48U)
|
||||
#define gr_egpcs_etpcs_sm_dsm_perf_counter_control0_r() (0x00481a08U)
|
||||
#define gr_egpcs_etpcs_sm_dsm_perf_counter_control5_r() (0x00481a0cU)
|
||||
#endif
|
||||
|
||||
@@ -77,6 +77,9 @@
|
||||
#define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U)
|
||||
#define perf_pmasys_control_update_bytes_m() (U32(0x1U) << 3U)
|
||||
#define perf_pmasys_control_update_bytes_doit_f() (0x8U)
|
||||
#define perf_pmasys_control_stream_m() (U32(0x1U) << 0U)
|
||||
#define perf_pmasys_control_stream_enable_f() (0x1U)
|
||||
#define perf_pmasys_control_stream_disable_f() (0x0U)
|
||||
#define perf_pmasys_mem_block_r() (0x0024a070U)
|
||||
#define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U)
|
||||
#define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U)
|
||||
@@ -109,8 +112,11 @@
|
||||
#define perf_pmasys_mem_bytes_addr_ptr_b() (2U)
|
||||
#define perf_pmasys_enginestatus_r() (0x0024a0a4U)
|
||||
#define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U)
|
||||
#define perf_pmasys_enginestatus_rbufempty_v(r) (((r) >> 4U) & 0x1U)
|
||||
#define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U)
|
||||
#define perf_pmasys_enginestatus_rbufempty_empty_f() (0x10U)
|
||||
#define perf_pmasys_enginestatus_status_v(r) (((r) >> 0U) & 0x7U)
|
||||
#define perf_pmasys_enginestatus_status_empty_v() (0x00000000U)
|
||||
#define perf_pmmsys_engine_sel_r(i)\
|
||||
(nvgpu_safe_add_u32(0x0024006cU, nvgpu_safe_mult_u32((i), 512U)))
|
||||
#define perf_pmmsys_engine_sel__size_1_v() (0x00000020U)
|
||||
@@ -120,4 +126,19 @@
|
||||
#define perf_pmmgpc_engine_sel_r(i)\
|
||||
(nvgpu_safe_add_u32(0x0018006cU, nvgpu_safe_mult_u32((i), 512U)))
|
||||
#define perf_pmmgpc_engine_sel__size_1_v() (0x00000020U)
|
||||
#define perf_pmmsys_control_r(i)\
|
||||
(nvgpu_safe_add_u32(0x0024009cU, nvgpu_safe_mult_u32((i), 512U)))
|
||||
#define perf_pmmfbp_fbps_control_r(i)\
|
||||
(nvgpu_safe_add_u32(0x0027c09cU, nvgpu_safe_mult_u32((i), 512U)))
|
||||
#define perf_pmmgpc_gpcs_control_r(i)\
|
||||
(nvgpu_safe_add_u32(0x0027809cU, nvgpu_safe_mult_u32((i), 512U)))
|
||||
#define perf_pmmsysrouter_perfmonstatus_r() (0x00248014U)
|
||||
#define perf_pmmsysrouter_enginestatus_r() (0x00248010U)
|
||||
#define perf_pmmsysrouter_enginestatus_status_v(r) (((r) >> 0U) & 0x7U)
|
||||
#define perf_pmmsysrouter_enginestatus_status_empty_v() (0x00000000U)
|
||||
#define perf_pmmsysrouter_enginestatus_status_quiescent_v() (0x00000003U)
|
||||
#define perf_pmmgpcrouter_perfmonstatus_r() (0x00244014U)
|
||||
#define perf_pmmgpcrouter_enginestatus_r() (0x00244010U)
|
||||
#define perf_pmmfbprouter_perfmonstatus_r() (0x00246014U)
|
||||
#define perf_pmmfbprouter_enginestatus_r() (0x00246010U)
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user