gpu: nvgpu: move perf unit accesses to common.perf unit

Below HALs are implemented in common.gr unit, but they really belong
to common.perf unit since they access registers from perf unit.
gops.gr.init_hwpm_pmm_register()
gops.gr.get_num_hwpm_perfmon()
gops.gr.set_pmm_register()
gops.gr.reset_hwpm_pmm_registers()

Move them to common.perf unit, and update all the code accordingly
gops.perf.init_hwpm_pmm_register()
gops.perf.get_num_hwpm_perfmon()
gops.perf.set_pmm_register()
gops.perf.reset_hwpm_pmm_registers()

Add new HAL gops.gr.get_pm_ctx_buffer_offsets() and set it to
gr_gk20a_get_pm_ctx_buffer_offsets() for all chips.

Bug 2510974
Jira NVGPU-5360

Change-Id: Ib5e84ed5c8b6e72cc6923161e55fc2c3a6a4070e
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2418306
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2020-09-22 11:49:31 +05:30
committed by Alex Waterman
parent 9652764b65
commit dd9298c959
12 changed files with 170 additions and 159 deletions

View File

@@ -22,6 +22,9 @@
#include <nvgpu/io.h>
#include <nvgpu/mm.h>
#include <nvgpu/fbp.h>
#include <nvgpu/gr/gr_utils.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/bug.h>
#include <nvgpu/gk20a.h>
@@ -358,3 +361,135 @@ const u32 *gv11b_perf_get_hwpm_fbp_perfmon_regs(u32 *count)
*count = sizeof(hwpm_fbp_perfmon_regs) / sizeof(hwpm_fbp_perfmon_regs[0]);
return hwpm_fbp_perfmon_regs;
}
void gv11b_perf_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
u32 num_chiplets, u32 chiplet_stride, u32 num_perfmons)
{
u32 perfmon_index = 0;
u32 chiplet_index = 0;
u32 reg_offset = 0;
for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) {
for (perfmon_index = 0; perfmon_index < num_perfmons;
perfmon_index++) {
reg_offset = offset + perfmon_index *
perf_pmmsys_perdomain_offset_v() +
chiplet_index * chiplet_stride;
nvgpu_writel(g, reg_offset, val);
}
}
}
void gv11b_perf_get_num_hwpm_perfmon(struct gk20a *g, u32 *num_sys_perfmon,
u32 *num_fbp_perfmon, u32 *num_gpc_perfmon)
{
int err;
u32 buf_offset_lo, buf_offset_addr, num_offsets;
u32 perfmon_index = 0;
for (perfmon_index = 0; perfmon_index <
perf_pmmsys_engine_sel__size_1_v();
perfmon_index++) {
err = g->ops.gr.get_pm_ctx_buffer_offsets(g,
perf_pmmsys_engine_sel_r(perfmon_index),
1,
&buf_offset_lo,
&buf_offset_addr,
&num_offsets);
if (err != 0) {
break;
}
}
*num_sys_perfmon = perfmon_index;
for (perfmon_index = 0; perfmon_index <
perf_pmmfbp_engine_sel__size_1_v();
perfmon_index++) {
err = g->ops.gr.get_pm_ctx_buffer_offsets(g,
perf_pmmfbp_engine_sel_r(perfmon_index),
1,
&buf_offset_lo,
&buf_offset_addr,
&num_offsets);
if (err != 0) {
break;
}
}
*num_fbp_perfmon = perfmon_index;
for (perfmon_index = 0; perfmon_index <
perf_pmmgpc_engine_sel__size_1_v();
perfmon_index++) {
err = g->ops.gr.get_pm_ctx_buffer_offsets(g,
perf_pmmgpc_engine_sel_r(perfmon_index),
1,
&buf_offset_lo,
&buf_offset_addr,
&num_offsets);
if (err != 0) {
break;
}
}
*num_gpc_perfmon = perfmon_index;
}
void gv11b_perf_reset_hwpm_pmm_registers(struct gk20a *g)
{
u32 count;
const u32 *perfmon_regs;
u32 i;
if (g->num_sys_perfmon == 0U) {
g->ops.perf.get_num_hwpm_perfmon(g, &g->num_sys_perfmon,
&g->num_fbp_perfmon, &g->num_gpc_perfmon);
}
perfmon_regs = g->ops.perf.get_hwpm_sys_perfmon_regs(&count);
for (i = 0U; i < count; i++) {
g->ops.perf.set_pmm_register(g, perfmon_regs[i], 0U, 1U,
g->ops.perf.get_pmmsys_per_chiplet_offset(),
g->num_sys_perfmon);
}
/*
* All the registers are broadcast ones so trigger
* g->ops.gr.set_pmm_register() only with 1 chiplet even for
* GPC and FBP chiplets.
*/
perfmon_regs = g->ops.perf.get_hwpm_fbp_perfmon_regs(&count);
for (i = 0U; i < count; i++) {
g->ops.perf.set_pmm_register(g, perfmon_regs[i], 0U, 1U,
g->ops.perf.get_pmmfbp_per_chiplet_offset(),
g->num_fbp_perfmon);
}
perfmon_regs = g->ops.perf.get_hwpm_gpc_perfmon_regs(&count);
for (i = 0U; i < count; i++) {
g->ops.perf.set_pmm_register(g, perfmon_regs[i], 0U, 1U,
g->ops.perf.get_pmmgpc_per_chiplet_offset(),
g->num_gpc_perfmon);
}
}
void gv11b_perf_init_hwpm_pmm_register(struct gk20a *g)
{
if (g->num_sys_perfmon == 0U) {
g->ops.perf.get_num_hwpm_perfmon(g, &g->num_sys_perfmon,
&g->num_fbp_perfmon, &g->num_gpc_perfmon);
}
g->ops.perf.set_pmm_register(g, perf_pmmsys_engine_sel_r(0), 0xFFFFFFFFU,
1U, g->ops.perf.get_pmmsys_per_chiplet_offset(),
g->num_sys_perfmon);
g->ops.perf.set_pmm_register(g, perf_pmmfbp_engine_sel_r(0), 0xFFFFFFFFU,
nvgpu_fbp_get_num_fbps(g->fbp),
g->ops.perf.get_pmmfbp_per_chiplet_offset(),
g->num_fbp_perfmon);
g->ops.perf.set_pmm_register(g, perf_pmmgpc_engine_sel_r(0), 0xFFFFFFFFU,
nvgpu_gr_config_get_gpc_count(nvgpu_gr_get_config_ptr(g)),
g->ops.perf.get_pmmgpc_per_chiplet_offset(),
g->num_gpc_perfmon);
}