From 9221b0196839957a0347daf634e9bee210cdb6d9 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Mon, 14 Dec 2020 17:13:50 +0530 Subject: [PATCH] gpu: nvgpu: implement HWPM streamout teardown sequence Implement below functions: - nvgpu_profiler_quiesce_hwpm_streamout_resident Teardown sequence when context is resident or in case profiling session is a device level session. - nvgpu_profiler_quiesce_hwpm_streamout_non_resident Teardown sequence when context is non resident - nvgpu_profiler_quiesce_hwpm_streamout Generic sequence to call either of above API based on whether context is resident or not. Trigger HWPM streamout teardown sequence while unbinding resources in nvgpu_profiler_unbind_hwpm_streamout() Add a new HAL gops.gr.is_tsg_ctx_resident to call gk20a_is_tsg_ctx_resident() from common code. Implement below supporting HALs for resident teardown sequence: - gops.perf.pma_stream_enable() - gops.perf.disable_all_perfmons() - gops.perf.wait_for_idle_pmm_routers() - gops.perf.wait_for_idle_pma() - gops.gr.disable_cau() - gops.gr.disable_smpc() Jira NVGPU-5360 Change-Id: I304ea25d296fae0146937b15228ea21edc091e16 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2461333 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-cert Reviewed-by: svc-mobile-misra Reviewed-by: Antony Clince Alex Reviewed-by: mobile promotions GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/profiler/profiler.c | 136 ++++++++++++++ drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c | 2 +- drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h | 1 + drivers/gpu/nvgpu/hal/gr/gr/gr_tu104.c | 24 +++ drivers/gpu/nvgpu/hal/gr/gr/gr_tu104.h | 4 + drivers/gpu/nvgpu/hal/init/hal_gv11b.c | 5 + drivers/gpu/nvgpu/hal/init/hal_tu104.c | 7 + drivers/gpu/nvgpu/hal/perf/perf_gv11b.c | 168 +++++++++++++++++- drivers/gpu/nvgpu/hal/perf/perf_gv11b.h | 6 + .../gpu/nvgpu/include/nvgpu/gops/debugger.h | 4 + drivers/gpu/nvgpu/include/nvgpu/gops/gr.h | 3 + .../include/nvgpu/hw/gv11b/hw_perf_gv11b.h | 21 +++ .../include/nvgpu/hw/tu104/hw_gr_tu104.h | 6 + .../include/nvgpu/hw/tu104/hw_perf_tu104.h | 21 +++ 14 files changed, 406 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nvgpu/common/profiler/profiler.c b/drivers/gpu/nvgpu/common/profiler/profiler.c index c074e67d5..43b7a5592 100644 --- a/drivers/gpu/nvgpu/common/profiler/profiler.c +++ b/drivers/gpu/nvgpu/common/profiler/profiler.c @@ -31,6 +31,7 @@ #include #include #include +#include static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0); static int generate_unique_id(void) @@ -332,6 +333,128 @@ static int nvgpu_profiler_unbind_hwpm(struct nvgpu_profiler_object *prof) return err; } +static int nvgpu_profiler_quiesce_hwpm_streamout_resident(struct nvgpu_profiler_object *prof) +{ + struct gk20a *g = prof->g; + u64 bytes_available; + int err = 0; + + nvgpu_log(g, gpu_dbg_prof, + "HWPM streamout quiesce in resident state started for handle %u", + prof->prof_handle); + + /* Enable streamout */ + g->ops.perf.pma_stream_enable(g, true); + + /* Disable all perfmons */ + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) { + g->ops.perf.disable_all_perfmons(g); + } + + /* Disable CAUs */ + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY] && + prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC] && + g->ops.gr.disable_cau != NULL) { + g->ops.gr.disable_cau(g); + } + + /* Disable SMPC */ + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC] && + g->ops.gr.disable_smpc != NULL) { + g->ops.gr.disable_smpc(g); + } + + /* Wait for routers to idle/quiescent */ + err = g->ops.perf.wait_for_idle_pmm_routers(g); + if (err != 0) { + goto fail; + } + + /* Wait for PMA to idle/quiescent */ + err = g->ops.perf.wait_for_idle_pma(g); + if (err != 0) { + goto fail; + } + + /* Disable streamout */ + g->ops.perf.pma_stream_enable(g, false); + + /* wait for all the inflight records from fb-hub to stream out */ + err = nvgpu_perfbuf_update_get_put(g, 0U, &bytes_available, + prof->pma_bytes_available_buffer_cpuva, true, + NULL, NULL); + +fail: + if (err != 0) { + nvgpu_err(g, "Failed to quiesce HWPM streamout in resident state"); + } else { + nvgpu_log(g, gpu_dbg_prof, + "HWPM streamout quiesce in resident state successfull for handle %u", + prof->prof_handle); + } + + return 0; +} + +static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct nvgpu_profiler_object *prof) +{ + struct nvgpu_mem *pm_ctx_mem; + struct gk20a *g = prof->g; + + nvgpu_log(g, gpu_dbg_prof, + "HWPM streamout quiesce in non-resident state started for handle %u", + prof->prof_handle); + + if (prof->tsg == NULL || prof->tsg->gr_ctx == NULL) { + return -EINVAL; + } + + pm_ctx_mem = nvgpu_gr_ctx_get_pm_ctx_mem(prof->tsg->gr_ctx); + if (pm_ctx_mem == NULL) { + nvgpu_err(g, "No PM context"); + return -EINVAL; + } + + nvgpu_memset(g, pm_ctx_mem, 0U, 0U, pm_ctx_mem->size); + nvgpu_log(g, gpu_dbg_prof, + "HWPM streamout quiesce in non-resident state successfull for handle %u", + prof->prof_handle); + + return 0; +} + +static int nvgpu_profiler_quiesce_hwpm_streamout(struct nvgpu_profiler_object *prof) +{ + struct gk20a *g = prof->g; + bool ctx_resident; + int err, ctxsw_err; + + err = nvgpu_gr_disable_ctxsw(g); + if (err != 0) { + nvgpu_err(g, "unable to stop gr ctxsw"); + return err; + } + + ctx_resident = g->ops.gr.is_tsg_ctx_resident(prof->tsg); + + if (ctx_resident) { + err = nvgpu_profiler_quiesce_hwpm_streamout_resident(prof); + } else { + err = nvgpu_profiler_quiesce_hwpm_streamout_non_resident(prof); + } + if (err != 0) { + nvgpu_err(g, "Failed to quiesce HWPM streamout"); + } + + ctxsw_err = nvgpu_gr_enable_ctxsw(g); + if (ctxsw_err != 0) { + nvgpu_err(g, "unable to restart ctxsw!"); + err = ctxsw_err; + } + + return err; +} + static int nvgpu_profiler_bind_hwpm_streamout(struct nvgpu_profiler_object *prof) { struct gk20a *g = prof->g; @@ -357,6 +480,19 @@ static int nvgpu_profiler_unbind_hwpm_streamout(struct nvgpu_profiler_object *pr struct gk20a *g = prof->g; int err; + if (prof->scope == NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE) { + if (prof->ctxsw[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) { + err = nvgpu_profiler_quiesce_hwpm_streamout(prof); + } else { + err = nvgpu_profiler_quiesce_hwpm_streamout_resident(prof); + } + } else { + err = nvgpu_profiler_quiesce_hwpm_streamout(prof); + } + if (err) { + return err; + } + g->ops.perf.bind_mem_bytes_buffer_addr(g, 0ULL); err = g->ops.perfbuf.perfbuf_disable(g); diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c index 6088d2f05..f2e69d146 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c @@ -1341,7 +1341,7 @@ bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch) return ret; } -static bool gk20a_is_tsg_ctx_resident(struct nvgpu_tsg *tsg) +bool gk20a_is_tsg_ctx_resident(struct nvgpu_tsg *tsg) { u32 curr_gr_tsgid; struct gk20a *g = tsg->g; diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h index e112b9e9c..fd69e7fc6 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h @@ -72,6 +72,7 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g, int gr_gk20a_set_sm_debug_mode(struct gk20a *g, struct nvgpu_channel *ch, u64 sms, bool enable); bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch); +bool gk20a_is_tsg_ctx_resident(struct nvgpu_tsg *tsg); int gk20a_gr_lock_down_sm(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, bool check_errors); diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_tu104.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_tu104.c index 3aa2d82fb..773f2260d 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_tu104.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_tu104.c @@ -127,3 +127,27 @@ int tu104_gr_update_smpc_global_mode(struct gk20a *g, bool enable) return err; } + +void tu104_gr_disable_cau(struct gk20a *g) +{ + u32 i; + + for (i = 0U; i < gr_gpcs_tpcs_cau_control__size_1_v(); ++i) { + nvgpu_writel(g, gr_gpcs_tpcs_cau_control_r(i), 0U); + } + + if (g->ops.priv_ring.read_pri_fence != NULL) { + g->ops.priv_ring.read_pri_fence(g); + } +} + +void tu104_gr_disable_smpc(struct gk20a *g) +{ + nvgpu_writel(g, gr_egpcs_etpcs_sm_dsm_perf_counter_control_r(), 0U); + nvgpu_writel(g, gr_egpcs_etpcs_sm_dsm_perf_counter_control0_r(), 0U); + nvgpu_writel(g, gr_egpcs_etpcs_sm_dsm_perf_counter_control5_r(), 0U); + + if (g->ops.priv_ring.read_pri_fence != NULL) { + g->ops.priv_ring.read_pri_fence(g); + } +} diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_tu104.h b/drivers/gpu/nvgpu/hal/gr/gr/gr_tu104.h index f029e4da3..181bbe557 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_tu104.h +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_tu104.h @@ -40,5 +40,9 @@ void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, u32 *num_sm_dsm_perf_ctrl_regs, u32 **sm_dsm_perf_ctrl_regs, u32 *ctrl_register_stride); int tu104_gr_update_smpc_global_mode(struct gk20a *g, bool enable); + +void tu104_gr_disable_cau(struct gk20a *g); +void tu104_gr_disable_smpc(struct gk20a *g); + #endif /* CONFIG_NVGPU_DEBUGGER */ #endif /* NVGPU_GR_TU104_H */ diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index 3dd7ef8e2..a2caee408 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -660,6 +660,7 @@ static const struct gops_gr gv11b_ops_gr = { .wait_for_pause = NULL, .resume_from_pause = NULL, .clear_sm_errors = gr_gk20a_clear_sm_errors, + .is_tsg_ctx_resident = gk20a_is_tsg_ctx_resident, .sm_debugger_attached = gv11b_gr_sm_debugger_attached, .suspend_single_sm = gv11b_gr_suspend_single_sm, .suspend_all_sms = gv11b_gr_suspend_all_sms, @@ -1238,6 +1239,10 @@ static const struct gops_perf gv11b_ops_perf = { .get_num_hwpm_perfmon = gv11b_perf_get_num_hwpm_perfmon, .init_hwpm_pmm_register = gv11b_perf_init_hwpm_pmm_register, .reset_hwpm_pmm_registers = gv11b_perf_reset_hwpm_pmm_registers, + .pma_stream_enable = gv11b_perf_pma_stream_enable, + .disable_all_perfmons = gv11b_perf_disable_all_perfmons, + .wait_for_idle_pmm_routers = gv11b_perf_wait_for_idle_pmm_routers, + .wait_for_idle_pma = gv11b_perf_wait_for_idle_pma, }; #endif diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 7417ce177..952d82c93 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -695,6 +695,8 @@ static const struct gops_gr tu104_ops_gr = { .update_smpc_global_mode = tu104_gr_update_smpc_global_mode, .set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode, .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, + .disable_cau = tu104_gr_disable_cau, + .disable_smpc = tu104_gr_disable_smpc, .clear_sm_error_state = gv11b_gr_clear_sm_error_state, .suspend_contexts = gr_gp10b_suspend_contexts, .resume_contexts = gr_gk20a_resume_contexts, @@ -702,6 +704,7 @@ static const struct gops_gr tu104_ops_gr = { .wait_for_pause = NULL, .resume_from_pause = NULL, .clear_sm_errors = gr_gk20a_clear_sm_errors, + .is_tsg_ctx_resident = gk20a_is_tsg_ctx_resident, .sm_debugger_attached = gv11b_gr_sm_debugger_attached, .suspend_single_sm = gv11b_gr_suspend_single_sm, .suspend_all_sms = gv11b_gr_suspend_all_sms, @@ -1304,6 +1307,10 @@ static const struct gops_perf tu104_ops_perf = { .get_num_hwpm_perfmon = gv11b_perf_get_num_hwpm_perfmon, .init_hwpm_pmm_register = gv11b_perf_init_hwpm_pmm_register, .reset_hwpm_pmm_registers = gv11b_perf_reset_hwpm_pmm_registers, + .pma_stream_enable = gv11b_perf_pma_stream_enable, + .disable_all_perfmons = gv11b_perf_disable_all_perfmons, + .wait_for_idle_pmm_routers = gv11b_perf_wait_for_idle_pmm_routers, + .wait_for_idle_pma = gv11b_perf_wait_for_idle_pma, }; #endif diff --git a/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c b/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c index 28cf07e37..d5337661b 100644 --- a/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c +++ b/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c @@ -32,6 +32,8 @@ #include +#define PMM_ROUTER_OFFSET 0x200U + bool gv11b_perf_get_membuf_overflow_status(struct gk20a *g) { const u32 st = perf_pmasys_control_membuf_status_overflowed_f(); @@ -108,7 +110,9 @@ int gv11b_perf_update_get_put(struct gk20a *g, u64 bytes_consumed, { u32 val; - nvgpu_writel(g, perf_pmasys_mem_bump_r(), bytes_consumed); + if (bytes_consumed != 0U) { + nvgpu_writel(g, perf_pmasys_mem_bump_r(), bytes_consumed); + } if (update_available_bytes) { val = nvgpu_readl(g, perf_pmasys_control_r()); @@ -497,3 +501,165 @@ void gv11b_perf_init_hwpm_pmm_register(struct gk20a *g) g->ops.perf.get_pmmgpc_per_chiplet_offset(), g->num_gpc_perfmon); } + +void gv11b_perf_pma_stream_enable(struct gk20a *g, bool enable) +{ + u32 reg_val; + + reg_val = nvgpu_readl(g, perf_pmasys_control_r()); + + if (enable) { + reg_val = set_field(reg_val, + perf_pmasys_control_stream_m(), + perf_pmasys_control_stream_enable_f()); + } else { + reg_val = set_field(reg_val, + perf_pmasys_control_stream_m(), + perf_pmasys_control_stream_disable_f()); + } + + nvgpu_writel(g, perf_pmasys_control_r(), reg_val); +} + +void gv11b_perf_disable_all_perfmons(struct gk20a *g) +{ + if (g->num_sys_perfmon == 0U) { + g->ops.perf.get_num_hwpm_perfmon(g, &g->num_sys_perfmon, + &g->num_fbp_perfmon, &g->num_gpc_perfmon); + } + + g->ops.perf.set_pmm_register(g, perf_pmmsys_control_r(0U), 0U, 1U, + g->ops.perf.get_pmmsys_per_chiplet_offset(), + g->num_sys_perfmon); + + g->ops.perf.set_pmm_register(g, perf_pmmfbp_fbps_control_r(0U), 0U, 1U, + g->ops.perf.get_pmmfbp_per_chiplet_offset(), + g->num_fbp_perfmon); + + g->ops.perf.set_pmm_register(g, perf_pmmgpc_gpcs_control_r(0U), 0U, 1U, + g->ops.perf.get_pmmgpc_per_chiplet_offset(), + g->num_gpc_perfmon); + + if (g->ops.priv_ring.read_pri_fence != NULL) { + g->ops.priv_ring.read_pri_fence(g); + } +} + +static int poll_for_pmm_router_idle(struct gk20a *g, u32 offset, u32 timeout_ms) +{ + struct nvgpu_timeout timeout; + u32 reg_val; + u32 status; + int err; + + err = nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + nvgpu_err(g, "failed to init timeout"); + return err; + } + + do { + reg_val = nvgpu_readl(g, offset); + status = perf_pmmsysrouter_enginestatus_status_v(reg_val); + + if ((status == perf_pmmsysrouter_enginestatus_status_empty_v()) || + (status == perf_pmmsysrouter_enginestatus_status_quiescent_v())) { + return 0; + } + + nvgpu_usleep_range(20, 40); + } while (nvgpu_timeout_expired(&timeout) == 0); + + return -ETIMEDOUT; +} + +int gv11b_perf_wait_for_idle_pmm_routers(struct gk20a *g) +{ + u32 num_gpc, num_fbp; + int err; + u32 i; + + num_gpc = nvgpu_gr_config_get_gpc_count(nvgpu_gr_get_config_ptr(g)); + num_fbp = nvgpu_fbp_get_num_fbps(g->fbp); + + /* wait for all perfmons to report idle */ + err = poll_for_pmm_router_idle(g, perf_pmmsysrouter_perfmonstatus_r(), 1); + if (err != 0) { + return err; + } + + for (i = 0U; i < num_gpc; ++i) { + err = poll_for_pmm_router_idle(g, + perf_pmmgpcrouter_perfmonstatus_r() + (i * PMM_ROUTER_OFFSET), + 1); + if (err != 0) { + return err; + } + } + + for (i = 0U; i < num_fbp; ++i) { + err = poll_for_pmm_router_idle(g, + perf_pmmfbprouter_perfmonstatus_r() + (i * PMM_ROUTER_OFFSET), + 1); + if (err != 0) { + return err; + } + } + + /* wait for all routers to report idle */ + err = poll_for_pmm_router_idle(g, perf_pmmsysrouter_enginestatus_r(), 1); + if (err != 0) { + return err; + } + + for (i = 0U; i < num_gpc; ++i) { + err = poll_for_pmm_router_idle(g, + perf_pmmgpcrouter_enginestatus_r() + (i * PMM_ROUTER_OFFSET), + 1); + if (err != 0) { + return err; + } + } + + for (i = 0U; i < num_fbp; ++i) { + err = poll_for_pmm_router_idle(g, + perf_pmmfbprouter_enginestatus_r() + (i * PMM_ROUTER_OFFSET), + 1); + if (err != 0) { + return err; + } + } + + return 0; +} + +int gv11b_perf_wait_for_idle_pma(struct gk20a *g) +{ + struct nvgpu_timeout timeout; + u32 status, rbufempty_status; + u32 timeout_ms = 1; + u32 reg_val; + int err; + + err = nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + nvgpu_err(g, "failed to init timeout"); + return err; + } + + do { + reg_val = nvgpu_readl(g, perf_pmasys_enginestatus_r()); + + status = perf_pmasys_enginestatus_status_v(reg_val); + rbufempty_status = perf_pmasys_enginestatus_rbufempty_v(reg_val); + + if ((status == perf_pmasys_enginestatus_status_empty_v()) && + (rbufempty_status == perf_pmasys_enginestatus_rbufempty_empty_v())) { + return 0; + } + + nvgpu_usleep_range(20, 40); + } while (nvgpu_timeout_expired(&timeout) == 0); + + return -ETIMEDOUT; +} diff --git a/drivers/gpu/nvgpu/hal/perf/perf_gv11b.h b/drivers/gpu/nvgpu/hal/perf/perf_gv11b.h index ca5fb295d..d487f9a2d 100644 --- a/drivers/gpu/nvgpu/hal/perf/perf_gv11b.h +++ b/drivers/gpu/nvgpu/hal/perf/perf_gv11b.h @@ -62,5 +62,11 @@ void gv11b_perf_get_num_hwpm_perfmon(struct gk20a *g, u32 *num_sys_perfmon, u32 *num_fbp_perfmon, u32 *num_gpc_perfmon); void gv11b_perf_reset_hwpm_pmm_registers(struct gk20a *g); void gv11b_perf_init_hwpm_pmm_register(struct gk20a *g); + +void gv11b_perf_pma_stream_enable(struct gk20a *g, bool enable); +void gv11b_perf_disable_all_perfmons(struct gk20a *g); +int gv11b_perf_wait_for_idle_pmm_routers(struct gk20a *g); +int gv11b_perf_wait_for_idle_pma(struct gk20a *g); + #endif /* CONFIG_NVGPU_DEBUGGER */ #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h b/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h index 4d75cde86..b4854eaf7 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h @@ -70,6 +70,10 @@ struct gops_perf { void (*set_pmm_register)(struct gk20a *g, u32 offset, u32 val, u32 num_chiplets, u32 chiplet_stride, u32 num_perfmons); void (*reset_hwpm_pmm_registers)(struct gk20a *g); + void (*pma_stream_enable)(struct gk20a *g, bool enable); + void (*disable_all_perfmons)(struct gk20a *g); + int (*wait_for_idle_pmm_routers)(struct gk20a *g); + int (*wait_for_idle_pma)(struct gk20a *g); }; struct gops_perfbuf { int (*perfbuf_enable)(struct gk20a *g, u64 offset, u32 size); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h index 8e4dfe5a2..9ab37271d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h @@ -1127,6 +1127,7 @@ struct gops_gr { struct nvgpu_warpstate *w_state); int (*resume_from_pause)(struct gk20a *g); int (*clear_sm_errors)(struct gk20a *g); + bool (*is_tsg_ctx_resident)(struct nvgpu_tsg *tsg); bool (*sm_debugger_attached)(struct gk20a *g); void (*suspend_single_sm)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, @@ -1190,6 +1191,8 @@ struct gops_gr { int (*set_boosted_ctx)(struct nvgpu_channel *ch, bool boost); #endif #endif + void (*disable_cau)(struct gk20a *g); + void (*disable_smpc)(struct gk20a *g); /** @endcond */ /** This structure stores the GR ecc subunit hal pointers. */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h index 49333af22..35f6ddec2 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h @@ -77,6 +77,9 @@ #define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U) #define perf_pmasys_control_update_bytes_m() (U32(0x1U) << 3U) #define perf_pmasys_control_update_bytes_doit_f() (0x8U) +#define perf_pmasys_control_stream_m() (U32(0x1U) << 0U) +#define perf_pmasys_control_stream_enable_f() (0x1U) +#define perf_pmasys_control_stream_disable_f() (0x0U) #define perf_pmasys_mem_block_r() (0x0024a070U) #define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U) #define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U) @@ -109,8 +112,11 @@ #define perf_pmasys_mem_bytes_addr_ptr_b() (2U) #define perf_pmasys_enginestatus_r() (0x0024a0a4U) #define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U) +#define perf_pmasys_enginestatus_rbufempty_v(r) (((r) >> 4U) & 0x1U) #define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U) #define perf_pmasys_enginestatus_rbufempty_empty_f() (0x10U) +#define perf_pmasys_enginestatus_status_v(r) (((r) >> 0U) & 0x7U) +#define perf_pmasys_enginestatus_status_empty_v() (0x00000000U) #define perf_pmmsys_engine_sel_r(i)\ (nvgpu_safe_add_u32(0x0024006cU, nvgpu_safe_mult_u32((i), 512U))) #define perf_pmmsys_engine_sel__size_1_v() (0x00000020U) @@ -120,4 +126,19 @@ #define perf_pmmgpc_engine_sel_r(i)\ (nvgpu_safe_add_u32(0x0018006cU, nvgpu_safe_mult_u32((i), 512U))) #define perf_pmmgpc_engine_sel__size_1_v() (0x00000020U) +#define perf_pmmsys_control_r(i)\ + (nvgpu_safe_add_u32(0x0024009cU, nvgpu_safe_mult_u32((i), 512U))) +#define perf_pmmfbp_fbps_control_r(i)\ + (nvgpu_safe_add_u32(0x0027c09cU, nvgpu_safe_mult_u32((i), 512U))) +#define perf_pmmgpc_gpcs_control_r(i)\ + (nvgpu_safe_add_u32(0x0027809cU, nvgpu_safe_mult_u32((i), 512U))) +#define perf_pmmsysrouter_perfmonstatus_r() (0x00248014U) +#define perf_pmmsysrouter_enginestatus_r() (0x00248010U) +#define perf_pmmsysrouter_enginestatus_status_v(r) (((r) >> 0U) & 0x7U) +#define perf_pmmsysrouter_enginestatus_status_empty_v() (0x00000000U) +#define perf_pmmsysrouter_enginestatus_status_quiescent_v() (0x00000003U) +#define perf_pmmgpcrouter_perfmonstatus_r() (0x00244014U) +#define perf_pmmgpcrouter_enginestatus_r() (0x00244010U) +#define perf_pmmfbprouter_perfmonstatus_r() (0x00246014U) +#define perf_pmmfbprouter_enginestatus_r() (0x00246010U) #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_gr_tu104.h b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_gr_tu104.h index a6e02e28d..49308e4d3 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_gr_tu104.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_gr_tu104.h @@ -1230,4 +1230,10 @@ #define gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(v)\ ((U32(v) & 0x1ffU) << 0U) #define gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m() (U32(0x1ffU) << 0U) +#define gr_gpcs_tpcs_cau_control_r(i)\ + (nvgpu_safe_add_u32(0x00419980U, nvgpu_safe_mult_u32((i), 64U))) +#define gr_gpcs_tpcs_cau_control__size_1_v() (0x00000002U) +#define gr_egpcs_etpcs_sm_dsm_perf_counter_control_r() (0x00481a48U) +#define gr_egpcs_etpcs_sm_dsm_perf_counter_control0_r() (0x00481a08U) +#define gr_egpcs_etpcs_sm_dsm_perf_counter_control5_r() (0x00481a0cU) #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_perf_tu104.h b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_perf_tu104.h index 0f9ea8bc1..b2d08feb2 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_perf_tu104.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_perf_tu104.h @@ -77,6 +77,9 @@ #define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U) #define perf_pmasys_control_update_bytes_m() (U32(0x1U) << 3U) #define perf_pmasys_control_update_bytes_doit_f() (0x8U) +#define perf_pmasys_control_stream_m() (U32(0x1U) << 0U) +#define perf_pmasys_control_stream_enable_f() (0x1U) +#define perf_pmasys_control_stream_disable_f() (0x0U) #define perf_pmasys_mem_block_r() (0x0024a070U) #define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U) #define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U) @@ -109,8 +112,11 @@ #define perf_pmasys_mem_bytes_addr_ptr_b() (2U) #define perf_pmasys_enginestatus_r() (0x0024a0a4U) #define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U) +#define perf_pmasys_enginestatus_rbufempty_v(r) (((r) >> 4U) & 0x1U) #define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U) #define perf_pmasys_enginestatus_rbufempty_empty_f() (0x10U) +#define perf_pmasys_enginestatus_status_v(r) (((r) >> 0U) & 0x7U) +#define perf_pmasys_enginestatus_status_empty_v() (0x00000000U) #define perf_pmmsys_engine_sel_r(i)\ (nvgpu_safe_add_u32(0x0024006cU, nvgpu_safe_mult_u32((i), 512U))) #define perf_pmmsys_engine_sel__size_1_v() (0x00000020U) @@ -120,4 +126,19 @@ #define perf_pmmgpc_engine_sel_r(i)\ (nvgpu_safe_add_u32(0x0018006cU, nvgpu_safe_mult_u32((i), 512U))) #define perf_pmmgpc_engine_sel__size_1_v() (0x00000020U) +#define perf_pmmsys_control_r(i)\ + (nvgpu_safe_add_u32(0x0024009cU, nvgpu_safe_mult_u32((i), 512U))) +#define perf_pmmfbp_fbps_control_r(i)\ + (nvgpu_safe_add_u32(0x0027c09cU, nvgpu_safe_mult_u32((i), 512U))) +#define perf_pmmgpc_gpcs_control_r(i)\ + (nvgpu_safe_add_u32(0x0027809cU, nvgpu_safe_mult_u32((i), 512U))) +#define perf_pmmsysrouter_perfmonstatus_r() (0x00248014U) +#define perf_pmmsysrouter_enginestatus_r() (0x00248010U) +#define perf_pmmsysrouter_enginestatus_status_v(r) (((r) >> 0U) & 0x7U) +#define perf_pmmsysrouter_enginestatus_status_empty_v() (0x00000000U) +#define perf_pmmsysrouter_enginestatus_status_quiescent_v() (0x00000003U) +#define perf_pmmgpcrouter_perfmonstatus_r() (0x00244014U) +#define perf_pmmgpcrouter_enginestatus_r() (0x00244010U) +#define perf_pmmfbprouter_perfmonstatus_r() (0x00246014U) +#define perf_pmmfbprouter_enginestatus_r() (0x00246010U) #endif