gpu: nvgpu: Initialize hwpm perfmons (engine_sel)

- For Mode-E ctxsw it is required that engine_sel
is set to 0xFFFFFFFF.
- Default 0 is a valid signal and causes problems.

Bug 2106999

Change-Id: I5cdb4441a8e6d7e8133c31a9e361b54611dd2995
Signed-off-by: Vaibhav Kachore <vkachore@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1770755
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Vaibhav Kachore
2018-07-03 17:21:13 +05:30
committed by mobile promotions
parent e14fdcd8f1
commit 503d489dba
6 changed files with 119 additions and 0 deletions

View File

@@ -312,6 +312,7 @@ struct gpu_ops {
struct channel_gk20a *c,
u64 gpu_va,
u32 mode);
void (*init_hwpm_pmm_register)(struct gk20a *g);
int (*dump_gr_regs)(struct gk20a *g,
struct gk20a_debug_output *o);
int (*update_pc_sampling)(struct channel_gk20a *ch,

View File

@@ -1782,6 +1782,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
return -ENOMEM;
}
}
if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW &&
g->ops.gr.init_hwpm_pmm_register) {
g->ops.gr.init_hwpm_pmm_register(g);
}
}
data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());

View File

@@ -39,6 +39,7 @@
#include <nvgpu/hw/gv100/hw_fuse_gv100.h>
#include <nvgpu/hw/gv100/hw_top_gv100.h>
#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h>
#include <nvgpu/hw/gv100/hw_perf_gv100.h>
/*
@@ -459,3 +460,89 @@ u32 gr_gv100_get_hw_accessor_stream_out_mode()
{
return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
}
static void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
u32 num_chiplets, u32 num_perfmons)
{
u32 perfmon_index = 0;
u32 chiplet_index = 0;
u32 reg_offset = 0;
u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset();
for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) {
for (perfmon_index = 0; perfmon_index < num_perfmons;
perfmon_index++) {
reg_offset = offset + perfmon_index * perf_pmmgpc_perdomain_offset_v() +
chiplet_index * chiplet_stride;
nvgpu_writel(g, reg_offset, val);
}
}
}
static void gr_gv100_get_num_hwpm_perfmon(struct gk20a *g, int *num_sys_perfmon,
int *num_fbp_perfmon, int *num_gpc_perfmon)
{
int err;
u32 buf_offset_lo, buf_offset_addr, num_offsets;
u32 perfmon_index = 0;
for (perfmon_index = 0; perfmon_index < perf_pmmsys_engine_sel__size_1_v();
perfmon_index++) {
err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
perf_pmmsys_engine_sel_r(perfmon_index),
1,
&buf_offset_lo,
&buf_offset_addr,
&num_offsets);
if (err) {
break;
}
}
*num_sys_perfmon = perfmon_index;
for (perfmon_index = 0; perfmon_index < perf_pmmfbp_engine_sel__size_1_v();
perfmon_index++) {
err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
perf_pmmfbp_engine_sel_r(perfmon_index),
1,
&buf_offset_lo,
&buf_offset_addr,
&num_offsets);
if (err) {
break;
}
}
*num_fbp_perfmon = perfmon_index;
for (perfmon_index = 0; perfmon_index < perf_pmmgpc_engine_sel__size_1_v();
perfmon_index++) {
err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
perf_pmmgpc_engine_sel_r(perfmon_index),
1,
&buf_offset_lo,
&buf_offset_addr,
&num_offsets);
if (err) {
break;
}
}
*num_gpc_perfmon = perfmon_index;
}
void gr_gv100_init_hwpm_pmm_register(struct gk20a *g)
{
int num_sys_perfmon = 0;
int num_fbp_perfmon = 0;
int num_gpc_perfmon = 0;
gr_gv100_get_num_hwpm_perfmon(g, &num_sys_perfmon,
&num_fbp_perfmon, &num_gpc_perfmon);
gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0),
1, 0xFFFFFFFF, num_sys_perfmon);
gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0),
nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS), 0xFFFFFFFF, num_fbp_perfmon);
gr_gv100_set_pmm_register(g, perf_pmmsys_engine_sel_r(0),
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS), 0xFFFFFFFF, num_gpc_perfmon);
}

View File

@@ -48,4 +48,5 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
u32 *priv_addr_table, u32 *t);
void gr_gv100_init_gpc_mmu(struct gk20a *g);
u32 gr_gv100_get_hw_accessor_stream_out_mode(void);
void gr_gv100_init_hwpm_pmm_register(struct gk20a *g);
#endif

View File

@@ -361,6 +361,7 @@ static const struct gpu_ops gv100_ops = {
.get_hw_accessor_stream_out_mode =
gr_gv100_get_hw_accessor_stream_out_mode,
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
.init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register,
.record_sm_error_state = gv11b_gr_record_sm_error_state,
.update_sm_error_state = gv11b_gr_update_sm_error_state,
.clear_sm_error_state = gm20b_gr_clear_sm_error_state,

View File

@@ -232,4 +232,28 @@ static inline u32 perf_pmasys_enginestatus_rbufempty_empty_f(void)
{
return 0x10U;
}
static inline u32 perf_pmmsys_engine_sel_r(u32 i)
{
return 0x0024006cU + i*512U;
}
static inline u32 perf_pmmsys_engine_sel__size_1_v(void)
{
return 0x00000020U;
}
static inline u32 perf_pmmfbp_engine_sel_r(u32 i)
{
return 0x0020006cU + i*512U;
}
static inline u32 perf_pmmfbp_engine_sel__size_1_v(void)
{
return 0x00000020U;
}
static inline u32 perf_pmmgpc_engine_sel_r(u32 i)
{
return 0x0018006cU + i*512U;
}
static inline u32 perf_pmmgpc_engine_sel__size_1_v(void)
{
return 0x00000020U;
}
#endif