mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: using pmu counters for load estimate
PMU counters #0 and #4 are used to count total cycles and busy cycles. These counts are used by podgov to estimate GPU load. PMU idle intr status register is used to monitor overflow. Overflow rarely occurs because frequency governor reads and resets the counters at a high cadence. When overflow occurs, 100% work load is reported to frequency governor. Bug 1963732 Change-Id: I046480ebde162e6eda24577932b96cfd91b77c69 Signed-off-by: Peng Liu <pengliu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1939547 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
75ff0feeff
commit
34df003519
@@ -803,6 +803,39 @@ void gk20a_pmu_init_perfmon_counter(struct gk20a *g)
|
|||||||
pwr_pmu_idle_ctrl_value_always_f() |
|
pwr_pmu_idle_ctrl_value_always_f() |
|
||||||
pwr_pmu_idle_ctrl_filter_disabled_f());
|
pwr_pmu_idle_ctrl_filter_disabled_f());
|
||||||
gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
|
gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* use counters 4 and 0 for perfmon to log busy cycles and total cycles
|
||||||
|
* counter #0 overflow sets pmu idle intr status bit
|
||||||
|
*/
|
||||||
|
gk20a_writel(g, pwr_pmu_idle_intr_r(),
|
||||||
|
pwr_pmu_idle_intr_en_f(0));
|
||||||
|
|
||||||
|
gk20a_writel(g, pwr_pmu_idle_threshold_r(0),
|
||||||
|
pwr_pmu_idle_threshold_value_f(0x7FFFFFFF));
|
||||||
|
|
||||||
|
data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(0));
|
||||||
|
data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
|
||||||
|
pwr_pmu_idle_ctrl_filter_m(),
|
||||||
|
pwr_pmu_idle_ctrl_value_always_f() |
|
||||||
|
pwr_pmu_idle_ctrl_filter_disabled_f());
|
||||||
|
gk20a_writel(g, pwr_pmu_idle_ctrl_r(0), data);
|
||||||
|
|
||||||
|
gk20a_writel(g, pwr_pmu_idle_mask_r(4),
|
||||||
|
pwr_pmu_idle_mask_gr_enabled_f() |
|
||||||
|
pwr_pmu_idle_mask_ce_2_enabled_f());
|
||||||
|
|
||||||
|
data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(4));
|
||||||
|
data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
|
||||||
|
pwr_pmu_idle_ctrl_filter_m(),
|
||||||
|
pwr_pmu_idle_ctrl_value_busy_f() |
|
||||||
|
pwr_pmu_idle_ctrl_filter_disabled_f());
|
||||||
|
gk20a_writel(g, pwr_pmu_idle_ctrl_r(4), data);
|
||||||
|
|
||||||
|
gk20a_writel(g, pwr_pmu_idle_count_r(0), pwr_pmu_idle_count_reset_f(1));
|
||||||
|
gk20a_writel(g, pwr_pmu_idle_count_r(4), pwr_pmu_idle_count_reset_f(1));
|
||||||
|
gk20a_writel(g, pwr_pmu_idle_intr_status_r(),
|
||||||
|
pwr_pmu_idle_intr_status_intr_f(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id)
|
u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id)
|
||||||
@@ -817,6 +850,18 @@ void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id)
|
|||||||
pwr_pmu_idle_count_reset_f(1));
|
pwr_pmu_idle_count_reset_f(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return pwr_pmu_idle_intr_status_intr_v(
|
||||||
|
gk20a_readl(g, pwr_pmu_idle_intr_status_r()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void gk20a_pmu_clear_idle_intr_status(struct gk20a *g)
|
||||||
|
{
|
||||||
|
gk20a_writel(g, pwr_pmu_idle_intr_status_r(),
|
||||||
|
pwr_pmu_idle_intr_status_intr_f(1));
|
||||||
|
}
|
||||||
|
|
||||||
void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
|
void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
|
||||||
struct pmu_pg_stats_data *pg_stat_data)
|
struct pmu_pg_stats_data *pg_stat_data)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -58,6 +58,9 @@ void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set);
|
|||||||
u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id);
|
u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id);
|
||||||
void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id);
|
void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id);
|
||||||
|
|
||||||
|
u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g);
|
||||||
|
void gk20a_pmu_clear_idle_intr_status(struct gk20a *g);
|
||||||
|
|
||||||
void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr);
|
void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr);
|
||||||
bool gk20a_is_pmu_supported(struct gk20a *g);
|
bool gk20a_is_pmu_supported(struct gk20a *g);
|
||||||
|
|
||||||
|
|||||||
@@ -236,6 +236,48 @@ int nvgpu_pmu_load_update(struct gk20a *g)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm)
|
||||||
|
{
|
||||||
|
u64 busy_cycles, total_cycles;
|
||||||
|
u32 intr_status;
|
||||||
|
|
||||||
|
gk20a_busy_noresume(g);
|
||||||
|
if (!g->power_on) {
|
||||||
|
*norm = 0;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g->ops.pmu.pmu_read_idle_counter == NULL ||
|
||||||
|
g->ops.pmu.pmu_reset_idle_counter == NULL ||
|
||||||
|
g->ops.pmu.pmu_read_idle_intr_status == NULL ||
|
||||||
|
g->ops.pmu.pmu_clear_idle_intr_status == NULL) {
|
||||||
|
*norm = PMU_BUSY_CYCLES_NORM_MAX;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
busy_cycles = g->ops.pmu.pmu_read_idle_counter(g, 4);
|
||||||
|
total_cycles = g->ops.pmu.pmu_read_idle_counter(g, 0);
|
||||||
|
intr_status = g->ops.pmu.pmu_read_idle_intr_status(g);
|
||||||
|
|
||||||
|
g->ops.pmu.pmu_reset_idle_counter(g, 4);
|
||||||
|
g->ops.pmu.pmu_reset_idle_counter(g, 0);
|
||||||
|
|
||||||
|
if (intr_status != 0UL) {
|
||||||
|
*norm = PMU_BUSY_CYCLES_NORM_MAX;
|
||||||
|
g->ops.pmu.pmu_clear_idle_intr_status(g);
|
||||||
|
} else if (total_cycles == 0ULL || busy_cycles > total_cycles) {
|
||||||
|
*norm = PMU_BUSY_CYCLES_NORM_MAX;
|
||||||
|
} else {
|
||||||
|
*norm = (u32)(busy_cycles * PMU_BUSY_CYCLES_NORM_MAX
|
||||||
|
/ total_cycles);
|
||||||
|
}
|
||||||
|
|
||||||
|
exit:
|
||||||
|
gk20a_idle_nosuspend(g);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
|
void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
|
||||||
u32 *total_cycles)
|
u32 *total_cycles)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -605,6 +605,8 @@ static const struct gpu_ops gm20b_ops = {
|
|||||||
.pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
|
.pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
|
||||||
.pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
|
.pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
|
||||||
.pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
|
.pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
|
||||||
|
.pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status,
|
||||||
|
.pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status,
|
||||||
.pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
|
.pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
|
||||||
.pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
|
.pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
|
||||||
.pmu_enable_irq = gk20a_pmu_enable_irq,
|
.pmu_enable_irq = gk20a_pmu_enable_irq,
|
||||||
|
|||||||
@@ -718,6 +718,8 @@ static const struct gpu_ops gp106_ops = {
|
|||||||
.pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
|
.pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
|
||||||
.pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
|
.pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
|
||||||
.pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
|
.pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
|
||||||
|
.pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status,
|
||||||
|
.pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status,
|
||||||
.pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
|
.pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
|
||||||
.pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
|
.pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
|
||||||
.pmu_enable_irq = gk20a_pmu_enable_irq,
|
.pmu_enable_irq = gk20a_pmu_enable_irq,
|
||||||
|
|||||||
@@ -681,6 +681,8 @@ static const struct gpu_ops gp10b_ops = {
|
|||||||
.pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
|
.pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
|
||||||
.pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
|
.pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
|
||||||
.pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
|
.pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
|
||||||
|
.pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status,
|
||||||
|
.pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status,
|
||||||
.pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
|
.pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
|
||||||
.pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
|
.pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
|
||||||
.pmu_enable_irq = gk20a_pmu_enable_irq,
|
.pmu_enable_irq = gk20a_pmu_enable_irq,
|
||||||
|
|||||||
@@ -854,6 +854,8 @@ static const struct gpu_ops gv100_ops = {
|
|||||||
.pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
|
.pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
|
||||||
.pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
|
.pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
|
||||||
.pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
|
.pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
|
||||||
|
.pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status,
|
||||||
|
.pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status,
|
||||||
.pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
|
.pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
|
||||||
.pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
|
.pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
|
||||||
.pmu_enable_irq = gk20a_pmu_enable_irq,
|
.pmu_enable_irq = gk20a_pmu_enable_irq,
|
||||||
|
|||||||
@@ -810,6 +810,8 @@ static const struct gpu_ops gv11b_ops = {
|
|||||||
.pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
|
.pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
|
||||||
.pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
|
.pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
|
||||||
.pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
|
.pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
|
||||||
|
.pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status,
|
||||||
|
.pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status,
|
||||||
.pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
|
.pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
|
||||||
.pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
|
.pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
|
||||||
.pmu_enable_irq = gk20a_pmu_enable_irq,
|
.pmu_enable_irq = gk20a_pmu_enable_irq,
|
||||||
|
|||||||
@@ -1153,6 +1153,8 @@ struct gpu_ops {
|
|||||||
void (*pmu_init_perfmon_counter)(struct gk20a *g);
|
void (*pmu_init_perfmon_counter)(struct gk20a *g);
|
||||||
void (*pmu_pg_idle_counter_config)(struct gk20a *g, u32 pg_engine_id);
|
void (*pmu_pg_idle_counter_config)(struct gk20a *g, u32 pg_engine_id);
|
||||||
u32 (*pmu_read_idle_counter)(struct gk20a *g, u32 counter_id);
|
u32 (*pmu_read_idle_counter)(struct gk20a *g, u32 counter_id);
|
||||||
|
u32 (*pmu_read_idle_intr_status)(struct gk20a *g);
|
||||||
|
void (*pmu_clear_idle_intr_status)(struct gk20a *g);
|
||||||
void (*pmu_reset_idle_counter)(struct gk20a *g, u32 counter_id);
|
void (*pmu_reset_idle_counter)(struct gk20a *g, u32 counter_id);
|
||||||
void (*pmu_dump_elpg_stats)(struct nvgpu_pmu *pmu);
|
void (*pmu_dump_elpg_stats)(struct nvgpu_pmu *pmu);
|
||||||
void (*pmu_dump_falcon_stats)(struct nvgpu_pmu *pmu);
|
void (*pmu_dump_falcon_stats)(struct nvgpu_pmu *pmu);
|
||||||
|
|||||||
@@ -674,6 +674,46 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
|
|||||||
{
|
{
|
||||||
return 0x0U;
|
return 0x0U;
|
||||||
}
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_r(u32 i)
|
||||||
|
{
|
||||||
|
return 0x0010a8a0U + i*4U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x7fffffffU) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9e8U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000000U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9ecU;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
|
||||||
|
{
|
||||||
|
return U32(0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
|
||||||
|
{
|
||||||
|
return (r >> 0U) & 0x1U;
|
||||||
|
}
|
||||||
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
||||||
{
|
{
|
||||||
return 0x0010a9f0U + i*8U;
|
return 0x0010a9f0U + i*8U;
|
||||||
|
|||||||
@@ -718,6 +718,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
|
|||||||
{
|
{
|
||||||
return 0x0U;
|
return 0x0U;
|
||||||
}
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_r(u32 i)
|
||||||
|
{
|
||||||
|
return 0x0010a8a0U + i*4U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x7fffffffU) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9e8U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000000U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9ecU;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
|
||||||
|
{
|
||||||
|
return U32(0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
|
||||||
|
{
|
||||||
|
return (r >> 0U) & 0x1U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
||||||
{
|
{
|
||||||
return 0x0010a9f0U + i*8U;
|
return 0x0010a9f0U + i*8U;
|
||||||
|
|||||||
@@ -726,6 +726,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
|
|||||||
{
|
{
|
||||||
return 0x0U;
|
return 0x0U;
|
||||||
}
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_r(u32 i)
|
||||||
|
{
|
||||||
|
return 0x0010a8a0U + i*4U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x7fffffffU) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9e8U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000000U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9ecU;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
|
||||||
|
{
|
||||||
|
return U32(0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
|
||||||
|
{
|
||||||
|
return (r >> 0U) & 0x1U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
||||||
{
|
{
|
||||||
return 0x0010a9f0U + i*8U;
|
return 0x0010a9f0U + i*8U;
|
||||||
|
|||||||
@@ -722,6 +722,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
|
|||||||
{
|
{
|
||||||
return 0x0U;
|
return 0x0U;
|
||||||
}
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_r(u32 i)
|
||||||
|
{
|
||||||
|
return 0x0010a8a0U + i*4U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x7fffffffU) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9e8U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000000U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9ecU;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
|
||||||
|
{
|
||||||
|
return U32(0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
|
||||||
|
{
|
||||||
|
return (r >> 0U) & 0x1U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
||||||
{
|
{
|
||||||
return 0x0010a9f0U + i*8U;
|
return 0x0010a9f0U + i*8U;
|
||||||
|
|||||||
@@ -826,6 +826,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
|
|||||||
{
|
{
|
||||||
return 0x0U;
|
return 0x0U;
|
||||||
}
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_r(u32 i)
|
||||||
|
{
|
||||||
|
return 0x0010a8a0U + i*4U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x7fffffffU) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9e8U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000000U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9ecU;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
|
||||||
|
{
|
||||||
|
return U32(0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
|
||||||
|
{
|
||||||
|
return (r >> 0U) & 0x1U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
||||||
{
|
{
|
||||||
return 0x0010a9f0U + i*8U;
|
return 0x0010a9f0U + i*8U;
|
||||||
|
|||||||
@@ -882,6 +882,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
|
|||||||
{
|
{
|
||||||
return 0x0U;
|
return 0x0U;
|
||||||
}
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_r(u32 i)
|
||||||
|
{
|
||||||
|
return 0x0010a8a0U + i*4U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x7fffffffU) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9e8U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000000U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9ecU;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
|
||||||
|
{
|
||||||
|
return U32(0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
|
||||||
|
{
|
||||||
|
return (r >> 0U) & 0x1U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
||||||
{
|
{
|
||||||
return 0x0010a9f0U + i*8U;
|
return 0x0010a9f0U + i*8U;
|
||||||
|
|||||||
@@ -826,6 +826,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
|
|||||||
{
|
{
|
||||||
return 0x0U;
|
return 0x0U;
|
||||||
}
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_r(u32 i)
|
||||||
|
{
|
||||||
|
return 0x0010be00U + i*4U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_threshold_value_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x7fffffffU) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9e8U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_disabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000000U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_en_enabled_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_r(void)
|
||||||
|
{
|
||||||
|
return 0x0010a9ecU;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_m(void)
|
||||||
|
{
|
||||||
|
return U32(0x1U) << 0U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r)
|
||||||
|
{
|
||||||
|
return (r >> 0U) & 0x1U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
|
static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void)
|
||||||
|
{
|
||||||
|
return 0x00000001U;
|
||||||
|
}
|
||||||
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
|
||||||
{
|
{
|
||||||
return 0x0010a9f0U + i*8U;
|
return 0x0010a9f0U + i*8U;
|
||||||
|
|||||||
@@ -142,6 +142,9 @@ enum pmu_seq_state {
|
|||||||
#define APCTRL_POWER_BREAKEVEN_DEFAULT_US (2000U)
|
#define APCTRL_POWER_BREAKEVEN_DEFAULT_US (2000U)
|
||||||
#define APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT (200U)
|
#define APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT (200U)
|
||||||
|
|
||||||
|
/* pmu load const defines */
|
||||||
|
#define PMU_BUSY_CYCLES_NORM_MAX (1000U)
|
||||||
|
|
||||||
/* RPC */
|
/* RPC */
|
||||||
#define PMU_RPC_EXECUTE(_stat, _pmu, _unit, _func, _prpc, _size)\
|
#define PMU_RPC_EXECUTE(_stat, _pmu, _unit, _func, _prpc, _size)\
|
||||||
do { \
|
do { \
|
||||||
@@ -447,6 +450,7 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
|
|||||||
int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu);
|
int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu);
|
||||||
int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
|
int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
|
||||||
int nvgpu_pmu_load_update(struct gk20a *g);
|
int nvgpu_pmu_load_update(struct gk20a *g);
|
||||||
|
int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm);
|
||||||
void nvgpu_pmu_reset_load_counters(struct gk20a *g);
|
void nvgpu_pmu_reset_load_counters(struct gk20a *g);
|
||||||
void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
|
void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
|
||||||
u32 *total_cycles);
|
u32 *total_cycles);
|
||||||
|
|||||||
@@ -211,18 +211,18 @@ static int gk20a_scale_target(struct device *dev, unsigned long *freq,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* update_load_estimate_gpmu(profile)
|
* update_load_estimate_busy_cycles(dev)
|
||||||
*
|
*
|
||||||
* Update load estimate using gpmu. The gpmu value is normalised
|
* Update load estimate using pmu idle counters. Result is normalised
|
||||||
* based on the time it was asked last time.
|
* based on the time it was asked last time.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void update_load_estimate_gpmu(struct device *dev)
|
static void update_load_estimate_busy_cycles(struct device *dev)
|
||||||
{
|
{
|
||||||
struct gk20a *g = get_gk20a(dev);
|
struct gk20a *g = get_gk20a(dev);
|
||||||
struct gk20a_scale_profile *profile = g->scale_profile;
|
struct gk20a_scale_profile *profile = g->scale_profile;
|
||||||
unsigned long dt;
|
unsigned long dt;
|
||||||
u32 busy_time;
|
u32 busy_cycles_norm;
|
||||||
ktime_t t;
|
ktime_t t;
|
||||||
|
|
||||||
t = ktime_get();
|
t = ktime_get();
|
||||||
@@ -230,8 +230,9 @@ static void update_load_estimate_gpmu(struct device *dev)
|
|||||||
|
|
||||||
profile->dev_stat.total_time = dt;
|
profile->dev_stat.total_time = dt;
|
||||||
profile->last_event_time = t;
|
profile->last_event_time = t;
|
||||||
nvgpu_pmu_load_norm(g, &busy_time);
|
nvgpu_pmu_busy_cycles_norm(g, &busy_cycles_norm);
|
||||||
profile->dev_stat.busy_time = (busy_time * dt) / 1000;
|
profile->dev_stat.busy_time =
|
||||||
|
(busy_cycles_norm * dt) / PMU_BUSY_CYCLES_NORM_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -284,9 +285,6 @@ static int gk20a_scale_get_dev_status(struct device *dev,
|
|||||||
struct gk20a_scale_profile *profile = g->scale_profile;
|
struct gk20a_scale_profile *profile = g->scale_profile;
|
||||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||||
|
|
||||||
/* update the software shadow */
|
|
||||||
nvgpu_pmu_load_update(g);
|
|
||||||
|
|
||||||
/* inform edp about new constraint */
|
/* inform edp about new constraint */
|
||||||
if (platform->prescale)
|
if (platform->prescale)
|
||||||
platform->prescale(dev);
|
platform->prescale(dev);
|
||||||
@@ -296,7 +294,7 @@ static int gk20a_scale_get_dev_status(struct device *dev,
|
|||||||
g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
|
g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
|
||||||
|
|
||||||
/* Update load estimate */
|
/* Update load estimate */
|
||||||
update_load_estimate_gpmu(dev);
|
update_load_estimate_busy_cycles(dev);
|
||||||
|
|
||||||
/* Copy the contents of the current device status */
|
/* Copy the contents of the current device status */
|
||||||
*stat = profile->dev_stat;
|
*stat = profile->dev_stat;
|
||||||
|
|||||||
@@ -883,6 +883,9 @@ static const struct gpu_ops tu104_ops = {
|
|||||||
.pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
|
.pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config,
|
||||||
.pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
|
.pmu_read_idle_counter = gk20a_pmu_read_idle_counter,
|
||||||
.pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
|
.pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter,
|
||||||
|
/* TODO: implement for tu104 */
|
||||||
|
.pmu_read_idle_intr_status = NULL,
|
||||||
|
.pmu_clear_idle_intr_status = NULL,
|
||||||
.pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
|
.pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats,
|
||||||
.pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
|
.pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats,
|
||||||
.pmu_enable_irq = gk20a_pmu_enable_irq,
|
.pmu_enable_irq = gk20a_pmu_enable_irq,
|
||||||
|
|||||||
@@ -510,6 +510,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
|||||||
.pmu_pg_idle_counter_config = NULL,
|
.pmu_pg_idle_counter_config = NULL,
|
||||||
.pmu_read_idle_counter = NULL,
|
.pmu_read_idle_counter = NULL,
|
||||||
.pmu_reset_idle_counter = NULL,
|
.pmu_reset_idle_counter = NULL,
|
||||||
|
.pmu_read_idle_intr_status = NULL,
|
||||||
|
.pmu_clear_idle_intr_status = NULL,
|
||||||
.pmu_dump_elpg_stats = NULL,
|
.pmu_dump_elpg_stats = NULL,
|
||||||
.pmu_dump_falcon_stats = NULL,
|
.pmu_dump_falcon_stats = NULL,
|
||||||
.pmu_enable_irq = NULL,
|
.pmu_enable_irq = NULL,
|
||||||
|
|||||||
@@ -585,6 +585,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
|||||||
.pmu_pg_idle_counter_config = NULL,
|
.pmu_pg_idle_counter_config = NULL,
|
||||||
.pmu_read_idle_counter = NULL,
|
.pmu_read_idle_counter = NULL,
|
||||||
.pmu_reset_idle_counter = NULL,
|
.pmu_reset_idle_counter = NULL,
|
||||||
|
.pmu_read_idle_intr_status = NULL,
|
||||||
|
.pmu_clear_idle_intr_status = NULL,
|
||||||
.pmu_dump_elpg_stats = NULL,
|
.pmu_dump_elpg_stats = NULL,
|
||||||
.pmu_dump_falcon_stats = NULL,
|
.pmu_dump_falcon_stats = NULL,
|
||||||
.pmu_enable_irq = NULL,
|
.pmu_enable_irq = NULL,
|
||||||
|
|||||||
Reference in New Issue
Block a user