gpu: nvgpu: implement HWPM streamout teardown sequence

Implement below functions:

- nvgpu_profiler_quiesce_hwpm_streamout_resident
Teardown sequence when context is resident or in case profiling
session is a device level session.

- nvgpu_profiler_quiesce_hwpm_streamout_non_resident
Teardown sequence when context is non resident

- nvgpu_profiler_quiesce_hwpm_streamout
Generic sequence to call either of above API based on whether
context is resident or not.

Trigger HWPM streamout teardown sequence while unbinding resources
in nvgpu_profiler_unbind_hwpm_streamout()

Add a new HAL gops.gr.is_tsg_ctx_resident to call
gk20a_is_tsg_ctx_resident() from common code.

Implement below supporting HALs for resident teardown sequence:
- gops.perf.pma_stream_enable()
- gops.perf.disable_all_perfmons()
- gops.perf.wait_for_idle_pmm_routers()
- gops.perf.wait_for_idle_pma()
- gops.gr.disable_cau()
- gops.gr.disable_smpc()

Jira NVGPU-5360

Change-Id: I304ea25d296fae0146937b15228ea21edc091e16
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2461333
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: Antony Clince Alex <aalex@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2020-12-14 17:13:50 +05:30
committed by mobile promotions
parent ce8358ef2b
commit 9221b01968
14 changed files with 406 additions and 2 deletions

View File

@@ -31,6 +31,7 @@
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/perfbuf.h>
#include <nvgpu/gr/gr.h>
static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
static int generate_unique_id(void)
@@ -332,6 +333,128 @@ static int nvgpu_profiler_unbind_hwpm(struct nvgpu_profiler_object *prof)
return err;
}
static int nvgpu_profiler_quiesce_hwpm_streamout_resident(struct nvgpu_profiler_object *prof)
{
struct gk20a *g = prof->g;
u64 bytes_available;
int err = 0;
nvgpu_log(g, gpu_dbg_prof,
"HWPM streamout quiesce in resident state started for handle %u",
prof->prof_handle);
/* Enable streamout */
g->ops.perf.pma_stream_enable(g, true);
/* Disable all perfmons */
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
g->ops.perf.disable_all_perfmons(g);
}
/* Disable CAUs */
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY] &&
prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC] &&
g->ops.gr.disable_cau != NULL) {
g->ops.gr.disable_cau(g);
}
/* Disable SMPC */
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC] &&
g->ops.gr.disable_smpc != NULL) {
g->ops.gr.disable_smpc(g);
}
/* Wait for routers to idle/quiescent */
err = g->ops.perf.wait_for_idle_pmm_routers(g);
if (err != 0) {
goto fail;
}
/* Wait for PMA to idle/quiescent */
err = g->ops.perf.wait_for_idle_pma(g);
if (err != 0) {
goto fail;
}
/* Disable streamout */
g->ops.perf.pma_stream_enable(g, false);
/* wait for all the inflight records from fb-hub to stream out */
err = nvgpu_perfbuf_update_get_put(g, 0U, &bytes_available,
prof->pma_bytes_available_buffer_cpuva, true,
NULL, NULL);
fail:
if (err != 0) {
nvgpu_err(g, "Failed to quiesce HWPM streamout in resident state");
} else {
nvgpu_log(g, gpu_dbg_prof,
"HWPM streamout quiesce in resident state successfull for handle %u",
prof->prof_handle);
}
return 0;
}
static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct nvgpu_profiler_object *prof)
{
struct nvgpu_mem *pm_ctx_mem;
struct gk20a *g = prof->g;
nvgpu_log(g, gpu_dbg_prof,
"HWPM streamout quiesce in non-resident state started for handle %u",
prof->prof_handle);
if (prof->tsg == NULL || prof->tsg->gr_ctx == NULL) {
return -EINVAL;
}
pm_ctx_mem = nvgpu_gr_ctx_get_pm_ctx_mem(prof->tsg->gr_ctx);
if (pm_ctx_mem == NULL) {
nvgpu_err(g, "No PM context");
return -EINVAL;
}
nvgpu_memset(g, pm_ctx_mem, 0U, 0U, pm_ctx_mem->size);
nvgpu_log(g, gpu_dbg_prof,
"HWPM streamout quiesce in non-resident state successfull for handle %u",
prof->prof_handle);
return 0;
}
static int nvgpu_profiler_quiesce_hwpm_streamout(struct nvgpu_profiler_object *prof)
{
struct gk20a *g = prof->g;
bool ctx_resident;
int err, ctxsw_err;
err = nvgpu_gr_disable_ctxsw(g);
if (err != 0) {
nvgpu_err(g, "unable to stop gr ctxsw");
return err;
}
ctx_resident = g->ops.gr.is_tsg_ctx_resident(prof->tsg);
if (ctx_resident) {
err = nvgpu_profiler_quiesce_hwpm_streamout_resident(prof);
} else {
err = nvgpu_profiler_quiesce_hwpm_streamout_non_resident(prof);
}
if (err != 0) {
nvgpu_err(g, "Failed to quiesce HWPM streamout");
}
ctxsw_err = nvgpu_gr_enable_ctxsw(g);
if (ctxsw_err != 0) {
nvgpu_err(g, "unable to restart ctxsw!");
err = ctxsw_err;
}
return err;
}
static int nvgpu_profiler_bind_hwpm_streamout(struct nvgpu_profiler_object *prof)
{
struct gk20a *g = prof->g;
@@ -357,6 +480,19 @@ static int nvgpu_profiler_unbind_hwpm_streamout(struct nvgpu_profiler_object *pr
struct gk20a *g = prof->g;
int err;
if (prof->scope == NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE) {
if (prof->ctxsw[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
err = nvgpu_profiler_quiesce_hwpm_streamout(prof);
} else {
err = nvgpu_profiler_quiesce_hwpm_streamout_resident(prof);
}
} else {
err = nvgpu_profiler_quiesce_hwpm_streamout(prof);
}
if (err) {
return err;
}
g->ops.perf.bind_mem_bytes_buffer_addr(g, 0ULL);
err = g->ops.perfbuf.perfbuf_disable(g);

View File

@@ -1341,7 +1341,7 @@ bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch)
return ret;
}
static bool gk20a_is_tsg_ctx_resident(struct nvgpu_tsg *tsg)
bool gk20a_is_tsg_ctx_resident(struct nvgpu_tsg *tsg)
{
u32 curr_gr_tsgid;
struct gk20a *g = tsg->g;

View File

@@ -72,6 +72,7 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
struct nvgpu_channel *ch, u64 sms, bool enable);
bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch);
bool gk20a_is_tsg_ctx_resident(struct nvgpu_tsg *tsg);
int gk20a_gr_lock_down_sm(struct gk20a *g,
u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
bool check_errors);

View File

@@ -127,3 +127,27 @@ int tu104_gr_update_smpc_global_mode(struct gk20a *g, bool enable)
return err;
}
void tu104_gr_disable_cau(struct gk20a *g)
{
u32 i;
for (i = 0U; i < gr_gpcs_tpcs_cau_control__size_1_v(); ++i) {
nvgpu_writel(g, gr_gpcs_tpcs_cau_control_r(i), 0U);
}
if (g->ops.priv_ring.read_pri_fence != NULL) {
g->ops.priv_ring.read_pri_fence(g);
}
}
void tu104_gr_disable_smpc(struct gk20a *g)
{
nvgpu_writel(g, gr_egpcs_etpcs_sm_dsm_perf_counter_control_r(), 0U);
nvgpu_writel(g, gr_egpcs_etpcs_sm_dsm_perf_counter_control0_r(), 0U);
nvgpu_writel(g, gr_egpcs_etpcs_sm_dsm_perf_counter_control5_r(), 0U);
if (g->ops.priv_ring.read_pri_fence != NULL) {
g->ops.priv_ring.read_pri_fence(g);
}
}

View File

@@ -40,5 +40,9 @@ void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
u32 *num_sm_dsm_perf_ctrl_regs, u32 **sm_dsm_perf_ctrl_regs,
u32 *ctrl_register_stride);
int tu104_gr_update_smpc_global_mode(struct gk20a *g, bool enable);
void tu104_gr_disable_cau(struct gk20a *g);
void tu104_gr_disable_smpc(struct gk20a *g);
#endif /* CONFIG_NVGPU_DEBUGGER */
#endif /* NVGPU_GR_TU104_H */

View File

@@ -660,6 +660,7 @@ static const struct gops_gr gv11b_ops_gr = {
.wait_for_pause = NULL,
.resume_from_pause = NULL,
.clear_sm_errors = gr_gk20a_clear_sm_errors,
.is_tsg_ctx_resident = gk20a_is_tsg_ctx_resident,
.sm_debugger_attached = gv11b_gr_sm_debugger_attached,
.suspend_single_sm = gv11b_gr_suspend_single_sm,
.suspend_all_sms = gv11b_gr_suspend_all_sms,
@@ -1238,6 +1239,10 @@ static const struct gops_perf gv11b_ops_perf = {
.get_num_hwpm_perfmon = gv11b_perf_get_num_hwpm_perfmon,
.init_hwpm_pmm_register = gv11b_perf_init_hwpm_pmm_register,
.reset_hwpm_pmm_registers = gv11b_perf_reset_hwpm_pmm_registers,
.pma_stream_enable = gv11b_perf_pma_stream_enable,
.disable_all_perfmons = gv11b_perf_disable_all_perfmons,
.wait_for_idle_pmm_routers = gv11b_perf_wait_for_idle_pmm_routers,
.wait_for_idle_pma = gv11b_perf_wait_for_idle_pma,
};
#endif

View File

@@ -695,6 +695,8 @@ static const struct gops_gr tu104_ops_gr = {
.update_smpc_global_mode = tu104_gr_update_smpc_global_mode,
.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
.disable_cau = tu104_gr_disable_cau,
.disable_smpc = tu104_gr_disable_smpc,
.clear_sm_error_state = gv11b_gr_clear_sm_error_state,
.suspend_contexts = gr_gp10b_suspend_contexts,
.resume_contexts = gr_gk20a_resume_contexts,
@@ -702,6 +704,7 @@ static const struct gops_gr tu104_ops_gr = {
.wait_for_pause = NULL,
.resume_from_pause = NULL,
.clear_sm_errors = gr_gk20a_clear_sm_errors,
.is_tsg_ctx_resident = gk20a_is_tsg_ctx_resident,
.sm_debugger_attached = gv11b_gr_sm_debugger_attached,
.suspend_single_sm = gv11b_gr_suspend_single_sm,
.suspend_all_sms = gv11b_gr_suspend_all_sms,
@@ -1304,6 +1307,10 @@ static const struct gops_perf tu104_ops_perf = {
.get_num_hwpm_perfmon = gv11b_perf_get_num_hwpm_perfmon,
.init_hwpm_pmm_register = gv11b_perf_init_hwpm_pmm_register,
.reset_hwpm_pmm_registers = gv11b_perf_reset_hwpm_pmm_registers,
.pma_stream_enable = gv11b_perf_pma_stream_enable,
.disable_all_perfmons = gv11b_perf_disable_all_perfmons,
.wait_for_idle_pmm_routers = gv11b_perf_wait_for_idle_pmm_routers,
.wait_for_idle_pma = gv11b_perf_wait_for_idle_pma,
};
#endif

View File

@@ -32,6 +32,8 @@
#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
#define PMM_ROUTER_OFFSET 0x200U
bool gv11b_perf_get_membuf_overflow_status(struct gk20a *g)
{
const u32 st = perf_pmasys_control_membuf_status_overflowed_f();
@@ -108,7 +110,9 @@ int gv11b_perf_update_get_put(struct gk20a *g, u64 bytes_consumed,
{
u32 val;
nvgpu_writel(g, perf_pmasys_mem_bump_r(), bytes_consumed);
if (bytes_consumed != 0U) {
nvgpu_writel(g, perf_pmasys_mem_bump_r(), bytes_consumed);
}
if (update_available_bytes) {
val = nvgpu_readl(g, perf_pmasys_control_r());
@@ -497,3 +501,165 @@ void gv11b_perf_init_hwpm_pmm_register(struct gk20a *g)
g->ops.perf.get_pmmgpc_per_chiplet_offset(),
g->num_gpc_perfmon);
}
void gv11b_perf_pma_stream_enable(struct gk20a *g, bool enable)
{
u32 reg_val;
reg_val = nvgpu_readl(g, perf_pmasys_control_r());
if (enable) {
reg_val = set_field(reg_val,
perf_pmasys_control_stream_m(),
perf_pmasys_control_stream_enable_f());
} else {
reg_val = set_field(reg_val,
perf_pmasys_control_stream_m(),
perf_pmasys_control_stream_disable_f());
}
nvgpu_writel(g, perf_pmasys_control_r(), reg_val);
}
void gv11b_perf_disable_all_perfmons(struct gk20a *g)
{
if (g->num_sys_perfmon == 0U) {
g->ops.perf.get_num_hwpm_perfmon(g, &g->num_sys_perfmon,
&g->num_fbp_perfmon, &g->num_gpc_perfmon);
}
g->ops.perf.set_pmm_register(g, perf_pmmsys_control_r(0U), 0U, 1U,
g->ops.perf.get_pmmsys_per_chiplet_offset(),
g->num_sys_perfmon);
g->ops.perf.set_pmm_register(g, perf_pmmfbp_fbps_control_r(0U), 0U, 1U,
g->ops.perf.get_pmmfbp_per_chiplet_offset(),
g->num_fbp_perfmon);
g->ops.perf.set_pmm_register(g, perf_pmmgpc_gpcs_control_r(0U), 0U, 1U,
g->ops.perf.get_pmmgpc_per_chiplet_offset(),
g->num_gpc_perfmon);
if (g->ops.priv_ring.read_pri_fence != NULL) {
g->ops.priv_ring.read_pri_fence(g);
}
}
static int poll_for_pmm_router_idle(struct gk20a *g, u32 offset, u32 timeout_ms)
{
struct nvgpu_timeout timeout;
u32 reg_val;
u32 status;
int err;
err = nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER);
if (err != 0) {
nvgpu_err(g, "failed to init timeout");
return err;
}
do {
reg_val = nvgpu_readl(g, offset);
status = perf_pmmsysrouter_enginestatus_status_v(reg_val);
if ((status == perf_pmmsysrouter_enginestatus_status_empty_v()) ||
(status == perf_pmmsysrouter_enginestatus_status_quiescent_v())) {
return 0;
}
nvgpu_usleep_range(20, 40);
} while (nvgpu_timeout_expired(&timeout) == 0);
return -ETIMEDOUT;
}
int gv11b_perf_wait_for_idle_pmm_routers(struct gk20a *g)
{
u32 num_gpc, num_fbp;
int err;
u32 i;
num_gpc = nvgpu_gr_config_get_gpc_count(nvgpu_gr_get_config_ptr(g));
num_fbp = nvgpu_fbp_get_num_fbps(g->fbp);
/* wait for all perfmons to report idle */
err = poll_for_pmm_router_idle(g, perf_pmmsysrouter_perfmonstatus_r(), 1);
if (err != 0) {
return err;
}
for (i = 0U; i < num_gpc; ++i) {
err = poll_for_pmm_router_idle(g,
perf_pmmgpcrouter_perfmonstatus_r() + (i * PMM_ROUTER_OFFSET),
1);
if (err != 0) {
return err;
}
}
for (i = 0U; i < num_fbp; ++i) {
err = poll_for_pmm_router_idle(g,
perf_pmmfbprouter_perfmonstatus_r() + (i * PMM_ROUTER_OFFSET),
1);
if (err != 0) {
return err;
}
}
/* wait for all routers to report idle */
err = poll_for_pmm_router_idle(g, perf_pmmsysrouter_enginestatus_r(), 1);
if (err != 0) {
return err;
}
for (i = 0U; i < num_gpc; ++i) {
err = poll_for_pmm_router_idle(g,
perf_pmmgpcrouter_enginestatus_r() + (i * PMM_ROUTER_OFFSET),
1);
if (err != 0) {
return err;
}
}
for (i = 0U; i < num_fbp; ++i) {
err = poll_for_pmm_router_idle(g,
perf_pmmfbprouter_enginestatus_r() + (i * PMM_ROUTER_OFFSET),
1);
if (err != 0) {
return err;
}
}
return 0;
}
int gv11b_perf_wait_for_idle_pma(struct gk20a *g)
{
struct nvgpu_timeout timeout;
u32 status, rbufempty_status;
u32 timeout_ms = 1;
u32 reg_val;
int err;
err = nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER);
if (err != 0) {
nvgpu_err(g, "failed to init timeout");
return err;
}
do {
reg_val = nvgpu_readl(g, perf_pmasys_enginestatus_r());
status = perf_pmasys_enginestatus_status_v(reg_val);
rbufempty_status = perf_pmasys_enginestatus_rbufempty_v(reg_val);
if ((status == perf_pmasys_enginestatus_status_empty_v()) &&
(rbufempty_status == perf_pmasys_enginestatus_rbufempty_empty_v())) {
return 0;
}
nvgpu_usleep_range(20, 40);
} while (nvgpu_timeout_expired(&timeout) == 0);
return -ETIMEDOUT;
}

View File

@@ -62,5 +62,11 @@ void gv11b_perf_get_num_hwpm_perfmon(struct gk20a *g, u32 *num_sys_perfmon,
u32 *num_fbp_perfmon, u32 *num_gpc_perfmon);
void gv11b_perf_reset_hwpm_pmm_registers(struct gk20a *g);
void gv11b_perf_init_hwpm_pmm_register(struct gk20a *g);
void gv11b_perf_pma_stream_enable(struct gk20a *g, bool enable);
void gv11b_perf_disable_all_perfmons(struct gk20a *g);
int gv11b_perf_wait_for_idle_pmm_routers(struct gk20a *g);
int gv11b_perf_wait_for_idle_pma(struct gk20a *g);
#endif /* CONFIG_NVGPU_DEBUGGER */
#endif

View File

@@ -70,6 +70,10 @@ struct gops_perf {
void (*set_pmm_register)(struct gk20a *g, u32 offset, u32 val,
u32 num_chiplets, u32 chiplet_stride, u32 num_perfmons);
void (*reset_hwpm_pmm_registers)(struct gk20a *g);
void (*pma_stream_enable)(struct gk20a *g, bool enable);
void (*disable_all_perfmons)(struct gk20a *g);
int (*wait_for_idle_pmm_routers)(struct gk20a *g);
int (*wait_for_idle_pma)(struct gk20a *g);
};
struct gops_perfbuf {
int (*perfbuf_enable)(struct gk20a *g, u64 offset, u32 size);

View File

@@ -1127,6 +1127,7 @@ struct gops_gr {
struct nvgpu_warpstate *w_state);
int (*resume_from_pause)(struct gk20a *g);
int (*clear_sm_errors)(struct gk20a *g);
bool (*is_tsg_ctx_resident)(struct nvgpu_tsg *tsg);
bool (*sm_debugger_attached)(struct gk20a *g);
void (*suspend_single_sm)(struct gk20a *g,
u32 gpc, u32 tpc, u32 sm,
@@ -1190,6 +1191,8 @@ struct gops_gr {
int (*set_boosted_ctx)(struct nvgpu_channel *ch, bool boost);
#endif
#endif
void (*disable_cau)(struct gk20a *g);
void (*disable_smpc)(struct gk20a *g);
/** @endcond */
/** This structure stores the GR ecc subunit hal pointers. */

View File

@@ -77,6 +77,9 @@
#define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U)
#define perf_pmasys_control_update_bytes_m() (U32(0x1U) << 3U)
#define perf_pmasys_control_update_bytes_doit_f() (0x8U)
#define perf_pmasys_control_stream_m() (U32(0x1U) << 0U)
#define perf_pmasys_control_stream_enable_f() (0x1U)
#define perf_pmasys_control_stream_disable_f() (0x0U)
#define perf_pmasys_mem_block_r() (0x0024a070U)
#define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U)
#define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U)
@@ -109,8 +112,11 @@
#define perf_pmasys_mem_bytes_addr_ptr_b() (2U)
#define perf_pmasys_enginestatus_r() (0x0024a0a4U)
#define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U)
#define perf_pmasys_enginestatus_rbufempty_v(r) (((r) >> 4U) & 0x1U)
#define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U)
#define perf_pmasys_enginestatus_rbufempty_empty_f() (0x10U)
#define perf_pmasys_enginestatus_status_v(r) (((r) >> 0U) & 0x7U)
#define perf_pmasys_enginestatus_status_empty_v() (0x00000000U)
#define perf_pmmsys_engine_sel_r(i)\
(nvgpu_safe_add_u32(0x0024006cU, nvgpu_safe_mult_u32((i), 512U)))
#define perf_pmmsys_engine_sel__size_1_v() (0x00000020U)
@@ -120,4 +126,19 @@
#define perf_pmmgpc_engine_sel_r(i)\
(nvgpu_safe_add_u32(0x0018006cU, nvgpu_safe_mult_u32((i), 512U)))
#define perf_pmmgpc_engine_sel__size_1_v() (0x00000020U)
#define perf_pmmsys_control_r(i)\
(nvgpu_safe_add_u32(0x0024009cU, nvgpu_safe_mult_u32((i), 512U)))
#define perf_pmmfbp_fbps_control_r(i)\
(nvgpu_safe_add_u32(0x0027c09cU, nvgpu_safe_mult_u32((i), 512U)))
#define perf_pmmgpc_gpcs_control_r(i)\
(nvgpu_safe_add_u32(0x0027809cU, nvgpu_safe_mult_u32((i), 512U)))
#define perf_pmmsysrouter_perfmonstatus_r() (0x00248014U)
#define perf_pmmsysrouter_enginestatus_r() (0x00248010U)
#define perf_pmmsysrouter_enginestatus_status_v(r) (((r) >> 0U) & 0x7U)
#define perf_pmmsysrouter_enginestatus_status_empty_v() (0x00000000U)
#define perf_pmmsysrouter_enginestatus_status_quiescent_v() (0x00000003U)
#define perf_pmmgpcrouter_perfmonstatus_r() (0x00244014U)
#define perf_pmmgpcrouter_enginestatus_r() (0x00244010U)
#define perf_pmmfbprouter_perfmonstatus_r() (0x00246014U)
#define perf_pmmfbprouter_enginestatus_r() (0x00246010U)
#endif

View File

@@ -1230,4 +1230,10 @@
#define gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(v)\
((U32(v) & 0x1ffU) << 0U)
#define gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m() (U32(0x1ffU) << 0U)
#define gr_gpcs_tpcs_cau_control_r(i)\
(nvgpu_safe_add_u32(0x00419980U, nvgpu_safe_mult_u32((i), 64U)))
#define gr_gpcs_tpcs_cau_control__size_1_v() (0x00000002U)
#define gr_egpcs_etpcs_sm_dsm_perf_counter_control_r() (0x00481a48U)
#define gr_egpcs_etpcs_sm_dsm_perf_counter_control0_r() (0x00481a08U)
#define gr_egpcs_etpcs_sm_dsm_perf_counter_control5_r() (0x00481a0cU)
#endif

View File

@@ -77,6 +77,9 @@
#define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U)
#define perf_pmasys_control_update_bytes_m() (U32(0x1U) << 3U)
#define perf_pmasys_control_update_bytes_doit_f() (0x8U)
#define perf_pmasys_control_stream_m() (U32(0x1U) << 0U)
#define perf_pmasys_control_stream_enable_f() (0x1U)
#define perf_pmasys_control_stream_disable_f() (0x0U)
#define perf_pmasys_mem_block_r() (0x0024a070U)
#define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U)
#define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U)
@@ -109,8 +112,11 @@
#define perf_pmasys_mem_bytes_addr_ptr_b() (2U)
#define perf_pmasys_enginestatus_r() (0x0024a0a4U)
#define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U)
#define perf_pmasys_enginestatus_rbufempty_v(r) (((r) >> 4U) & 0x1U)
#define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U)
#define perf_pmasys_enginestatus_rbufempty_empty_f() (0x10U)
#define perf_pmasys_enginestatus_status_v(r) (((r) >> 0U) & 0x7U)
#define perf_pmasys_enginestatus_status_empty_v() (0x00000000U)
#define perf_pmmsys_engine_sel_r(i)\
(nvgpu_safe_add_u32(0x0024006cU, nvgpu_safe_mult_u32((i), 512U)))
#define perf_pmmsys_engine_sel__size_1_v() (0x00000020U)
@@ -120,4 +126,19 @@
#define perf_pmmgpc_engine_sel_r(i)\
(nvgpu_safe_add_u32(0x0018006cU, nvgpu_safe_mult_u32((i), 512U)))
#define perf_pmmgpc_engine_sel__size_1_v() (0x00000020U)
#define perf_pmmsys_control_r(i)\
(nvgpu_safe_add_u32(0x0024009cU, nvgpu_safe_mult_u32((i), 512U)))
#define perf_pmmfbp_fbps_control_r(i)\
(nvgpu_safe_add_u32(0x0027c09cU, nvgpu_safe_mult_u32((i), 512U)))
#define perf_pmmgpc_gpcs_control_r(i)\
(nvgpu_safe_add_u32(0x0027809cU, nvgpu_safe_mult_u32((i), 512U)))
#define perf_pmmsysrouter_perfmonstatus_r() (0x00248014U)
#define perf_pmmsysrouter_enginestatus_r() (0x00248010U)
#define perf_pmmsysrouter_enginestatus_status_v(r) (((r) >> 0U) & 0x7U)
#define perf_pmmsysrouter_enginestatus_status_empty_v() (0x00000000U)
#define perf_pmmsysrouter_enginestatus_status_quiescent_v() (0x00000003U)
#define perf_pmmgpcrouter_perfmonstatus_r() (0x00244014U)
#define perf_pmmgpcrouter_enginestatus_r() (0x00244010U)
#define perf_pmmfbprouter_perfmonstatus_r() (0x00246014U)
#define perf_pmmfbprouter_enginestatus_r() (0x00246010U)
#endif