mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 02:22:34 +03:00
gpu: nvgpu: Initialize hwpm perfmons (engine_sel)
- For Mode-E ctxsw it is required that engine_sel is set to 0xFFFFFFFF. - Default 0 is a valid signal and causes problems. Bug 2106999 Change-Id: I5cdb4441a8e6d7e8133c31a9e361b54611dd2995 Signed-off-by: Vaibhav Kachore <vkachore@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1770755 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
e14fdcd8f1
commit
503d489dba
@@ -312,6 +312,7 @@ struct gpu_ops {
|
||||
struct channel_gk20a *c,
|
||||
u64 gpu_va,
|
||||
u32 mode);
|
||||
void (*init_hwpm_pmm_register)(struct gk20a *g);
|
||||
int (*dump_gr_regs)(struct gk20a *g,
|
||||
struct gk20a_debug_output *o);
|
||||
int (*update_pc_sampling)(struct channel_gk20a *ch,
|
||||
|
||||
@@ -1782,6 +1782,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW &&
|
||||
g->ops.gr.init_hwpm_pmm_register) {
|
||||
g->ops.gr.init_hwpm_pmm_register(g);
|
||||
}
|
||||
}
|
||||
|
||||
data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#include <nvgpu/hw/gv100/hw_fuse_gv100.h>
|
||||
#include <nvgpu/hw/gv100/hw_top_gv100.h>
|
||||
#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h>
|
||||
#include <nvgpu/hw/gv100/hw_perf_gv100.h>
|
||||
|
||||
|
||||
/*
|
||||
@@ -459,3 +460,89 @@ u32 gr_gv100_get_hw_accessor_stream_out_mode()
|
||||
{
|
||||
return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
|
||||
}
|
||||
|
||||
static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
|
||||
u32 num_chiplets, u32 num_perfmons)
|
||||
{
|
||||
u32 perfmon_index = 0;
|
||||
u32 chiplet_index = 0;
|
||||
u32 reg_offset = 0;
|
||||
u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset();
|
||||
|
||||
for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) {
|
||||
for (perfmon_index = 0; perfmon_index < num_perfmons;
|
||||
perfmon_index++) {
|
||||
reg_offset = offset + perfmon_index * perf_pmmgpc_perdomain_offset_v() +
|
||||
chiplet_index * chiplet_stride;
|
||||
nvgpu_writel(g, reg_offset, val);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon,
|
||||
int *num_fbp_perfmon, int *num_gpc_perfmon)
|
||||
{
|
||||
int err;
|
||||
u32 buf_offset_lo, buf_offset_addr, num_offsets;
|
||||
u32 perfmon_index = 0;
|
||||
|
||||
for (perfmon_index = 0; perfmon_index < perf_pmmsys_engine_sel__size_1_v();
|
||||
perfmon_index++) {
|
||||
err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
|
||||
perf_pmmsys_engine_sel_r(perfmon_index),
|
||||
1,
|
||||
&buf_offset_lo,
|
||||
&buf_offset_addr,
|
||||
&num_offsets);
|
||||
if (err) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
*num_sys_perfmon = perfmon_index;
|
||||
|
||||
for (perfmon_index = 0; perfmon_index < perf_pmmfbp_engine_sel__size_1_v();
|
||||
perfmon_index++) {
|
||||
err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
|
||||
perf_pmmfbp_engine_sel_r(perfmon_index),
|
||||
1,
|
||||
&buf_offset_lo,
|
||||
&buf_offset_addr,
|
||||
&num_offsets);
|
||||
if (err) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
*num_fbp_perfmon = perfmon_index;
|
||||
|
||||
for (perfmon_index = 0; perfmon_index < perf_pmmgpc_engine_sel__size_1_v();
|
||||
perfmon_index++) {
|
||||
err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
|
||||
perf_pmmgpc_engine_sel_r(perfmon_index),
|
||||
1,
|
||||
&buf_offset_lo,
|
||||
&buf_offset_addr,
|
||||
&num_offsets);
|
||||
if (err) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
*num_gpc_perfmon = perfmon_index;
|
||||
}
|
||||
|
||||
void gr_gv100_init_hwpm_pmm_register(struct gk20a *g)
|
||||
{
|
||||
int num_sys_perfmon = 0;
|
||||
int num_fbp_perfmon = 0;
|
||||
int num_gpc_perfmon = 0;
|
||||
|
||||
gr_gv100_get_num_hwpm_perfmon(g, &num_sys_perfmon,
|
||||
&num_fbp_perfmon, &num_gpc_perfmon);
|
||||
|
||||
gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0),
|
||||
1, 0xFFFFFFFF, num_sys_perfmon);
|
||||
gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0),
|
||||
nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS), 0xFFFFFFFF, num_fbp_perfmon);
|
||||
gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0),
|
||||
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS), 0xFFFFFFFF, num_gpc_perfmon);
|
||||
}
|
||||
|
||||
@@ -48,4 +48,5 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
|
||||
u32 *priv_addr_table, u32 *t);
|
||||
void gr_gv100_init_gpc_mmu(struct gk20a *g);
|
||||
u32 gr_gv100_get_hw_accessor_stream_out_mode(void);
|
||||
void gr_gv100_init_hwpm_pmm_register(struct gk20a *g);
|
||||
#endif
|
||||
|
||||
@@ -361,6 +361,7 @@ static const struct gpu_ops gv100_ops = {
|
||||
.get_hw_accessor_stream_out_mode =
|
||||
gr_gv100_get_hw_accessor_stream_out_mode,
|
||||
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
|
||||
.init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register,
|
||||
.record_sm_error_state = gv11b_gr_record_sm_error_state,
|
||||
.update_sm_error_state = gv11b_gr_update_sm_error_state,
|
||||
.clear_sm_error_state = gm20b_gr_clear_sm_error_state,
|
||||
|
||||
@@ -232,4 +232,28 @@ static inline u32 perf_pmasys_enginestatus_rbufempty_empty_f(void)
|
||||
{
|
||||
return 0x10U;
|
||||
}
|
||||
static inline u32 perf_pmmsys_engine_sel_r(u32 i)
|
||||
{
|
||||
return 0x0024006cU + i*512U;
|
||||
}
|
||||
static inline u32 perf_pmmsys_engine_sel__size_1_v(void)
|
||||
{
|
||||
return 0x00000020U;
|
||||
}
|
||||
static inline u32 perf_pmmfbp_engine_sel_r(u32 i)
|
||||
{
|
||||
return 0x0020006cU + i*512U;
|
||||
}
|
||||
static inline u32 perf_pmmfbp_engine_sel__size_1_v(void)
|
||||
{
|
||||
return 0x00000020U;
|
||||
}
|
||||
static inline u32 perf_pmmgpc_engine_sel_r(u32 i)
|
||||
{
|
||||
return 0x0018006cU + i*512U;
|
||||
}
|
||||
static inline u32 perf_pmmgpc_engine_sel__size_1_v(void)
|
||||
{
|
||||
return 0x00000020U;
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user