From 34df0035194e0203f68f679acdd84e5533a48149 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 30 Oct 2018 13:45:43 -0700 Subject: [PATCH] gpu: nvgpu: using pmu counters for load estimate PMU counters #0 and #4 are used to count total cycles and busy cycles. These counts are used by podgov to estimate GPU load. PMU idle intr status register is used to monitor overflow. Overflow rarely occurs because frequency governor reads and resets the counters at a high cadence. When overflow occurs, 100% work load is reported to frequency governor. Bug 1963732 Change-Id: I046480ebde162e6eda24577932b96cfd91b77c69 Signed-off-by: Peng Liu Reviewed-on: https://git-master.nvidia.com/r/1939547 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/pmu/pmu_gk20a.c | 45 +++++++++++++++++ drivers/gpu/nvgpu/common/pmu/pmu_gk20a.h | 3 ++ drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c | 42 ++++++++++++++++ drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 2 + drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 + drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 + drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 + drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 2 + drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 2 + .../include/nvgpu/hw/gk20a/hw_pwr_gk20a.h | 40 ++++++++++++++++ .../include/nvgpu/hw/gm20b/hw_pwr_gm20b.h | 48 +++++++++++++++++++ .../include/nvgpu/hw/gp106/hw_pwr_gp106.h | 48 +++++++++++++++++++ .../include/nvgpu/hw/gp10b/hw_pwr_gp10b.h | 48 +++++++++++++++++++ .../include/nvgpu/hw/gv100/hw_pwr_gv100.h | 48 +++++++++++++++++++ .../include/nvgpu/hw/gv11b/hw_pwr_gv11b.h | 48 +++++++++++++++++++ .../include/nvgpu/hw/tu104/hw_pwr_tu104.h | 48 +++++++++++++++++++ drivers/gpu/nvgpu/include/nvgpu/pmu.h | 4 ++ drivers/gpu/nvgpu/os/linux/scale.c | 18 ++++--- drivers/gpu/nvgpu/tu104/hal_tu104.c | 3 ++ drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 2 + drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | 2 + 21 files changed, 449 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.c b/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.c index e568b4120..90c1d901e 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.c @@ -803,6 +803,39 @@ void gk20a_pmu_init_perfmon_counter(struct gk20a *g) pwr_pmu_idle_ctrl_value_always_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); + + /* + * use counters 4 and 0 for perfmon to log busy cycles and total cycles + * counter #0 overflow sets pmu idle intr status bit + */ + gk20a_writel(g, pwr_pmu_idle_intr_r(), + pwr_pmu_idle_intr_en_f(0)); + + gk20a_writel(g, pwr_pmu_idle_threshold_r(0), + pwr_pmu_idle_threshold_value_f(0x7FFFFFFF)); + + data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(0)); + data = set_field(data, pwr_pmu_idle_ctrl_value_m() | + pwr_pmu_idle_ctrl_filter_m(), + pwr_pmu_idle_ctrl_value_always_f() | + pwr_pmu_idle_ctrl_filter_disabled_f()); + gk20a_writel(g, pwr_pmu_idle_ctrl_r(0), data); + + gk20a_writel(g, pwr_pmu_idle_mask_r(4), + pwr_pmu_idle_mask_gr_enabled_f() | + pwr_pmu_idle_mask_ce_2_enabled_f()); + + data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(4)); + data = set_field(data, pwr_pmu_idle_ctrl_value_m() | + pwr_pmu_idle_ctrl_filter_m(), + pwr_pmu_idle_ctrl_value_busy_f() | + pwr_pmu_idle_ctrl_filter_disabled_f()); + gk20a_writel(g, pwr_pmu_idle_ctrl_r(4), data); + + gk20a_writel(g, pwr_pmu_idle_count_r(0), pwr_pmu_idle_count_reset_f(1)); + gk20a_writel(g, pwr_pmu_idle_count_r(4), pwr_pmu_idle_count_reset_f(1)); + gk20a_writel(g, pwr_pmu_idle_intr_status_r(), + pwr_pmu_idle_intr_status_intr_f(1)); } u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id) @@ -817,6 +850,18 @@ void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id) pwr_pmu_idle_count_reset_f(1)); } +u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g) +{ + return pwr_pmu_idle_intr_status_intr_v( + gk20a_readl(g, pwr_pmu_idle_intr_status_r())); +} + +void gk20a_pmu_clear_idle_intr_status(struct gk20a *g) +{ + gk20a_writel(g, pwr_pmu_idle_intr_status_r(), + pwr_pmu_idle_intr_status_intr_f(1)); +} + void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, struct pmu_pg_stats_data *pg_stat_data) { diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.h b/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.h index 514d021f4..37a52762c 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.h @@ -58,6 +58,9 @@ void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set); u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id); void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id); +u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g); +void gk20a_pmu_clear_idle_intr_status(struct gk20a *g); + void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr); bool gk20a_is_pmu_supported(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c index d60c4f074..c0e67c621 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c @@ -236,6 +236,48 @@ int nvgpu_pmu_load_update(struct gk20a *g) return 0; } +int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm) +{ + u64 busy_cycles, total_cycles; + u32 intr_status; + + gk20a_busy_noresume(g); + if (!g->power_on) { + *norm = 0; + goto exit; + } + + if (g->ops.pmu.pmu_read_idle_counter == NULL || + g->ops.pmu.pmu_reset_idle_counter == NULL || + g->ops.pmu.pmu_read_idle_intr_status == NULL || + g->ops.pmu.pmu_clear_idle_intr_status == NULL) { + *norm = PMU_BUSY_CYCLES_NORM_MAX; + goto exit; + } + + busy_cycles = g->ops.pmu.pmu_read_idle_counter(g, 4); + total_cycles = g->ops.pmu.pmu_read_idle_counter(g, 0); + intr_status = g->ops.pmu.pmu_read_idle_intr_status(g); + + g->ops.pmu.pmu_reset_idle_counter(g, 4); + g->ops.pmu.pmu_reset_idle_counter(g, 0); + + if (intr_status != 0UL) { + *norm = PMU_BUSY_CYCLES_NORM_MAX; + g->ops.pmu.pmu_clear_idle_intr_status(g); + } else if (total_cycles == 0ULL || busy_cycles > total_cycles) { + *norm = PMU_BUSY_CYCLES_NORM_MAX; + } else { + *norm = (u32)(busy_cycles * PMU_BUSY_CYCLES_NORM_MAX + / total_cycles); + } + +exit: + gk20a_idle_nosuspend(g); + + return 0; +} + void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, u32 *total_cycles) { diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index a99cfa72c..5182b4aee 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -605,6 +605,8 @@ static const struct gpu_ops gm20b_ops = { .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, + .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status, + .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status, .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, .pmu_enable_irq = gk20a_pmu_enable_irq, diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 75920a2fb..a58cfec72 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -718,6 +718,8 @@ static const struct gpu_ops gp106_ops = { .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, + .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status, + .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status, .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, .pmu_enable_irq = gk20a_pmu_enable_irq, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 4c3f0c029..2f61ccba0 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -681,6 +681,8 @@ static const struct gpu_ops gp10b_ops = { .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, + .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status, + .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status, .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, .pmu_enable_irq = gk20a_pmu_enable_irq, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index b2b854f54..dfbfc7d10 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -854,6 +854,8 @@ static const struct gpu_ops gv100_ops = { .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, + .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status, + .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status, .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, .pmu_enable_irq = gk20a_pmu_enable_irq, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index f3a5547b5..c85e75831 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -810,6 +810,8 @@ static const struct gpu_ops gv11b_ops = { .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, + .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status, + .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status, .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, .pmu_enable_irq = gk20a_pmu_enable_irq, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index bed617dd4..8d1eee1f1 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1153,6 +1153,8 @@ struct gpu_ops { void (*pmu_init_perfmon_counter)(struct gk20a *g); void (*pmu_pg_idle_counter_config)(struct gk20a *g, u32 pg_engine_id); u32 (*pmu_read_idle_counter)(struct gk20a *g, u32 counter_id); + u32 (*pmu_read_idle_intr_status)(struct gk20a *g); + void (*pmu_clear_idle_intr_status)(struct gk20a *g); void (*pmu_reset_idle_counter)(struct gk20a *g, u32 counter_id); void (*pmu_dump_elpg_stats)(struct nvgpu_pmu *pmu); void (*pmu_dump_falcon_stats)(struct nvgpu_pmu *pmu); diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h index 290d4d490..9c545f02e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h @@ -674,6 +674,46 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010a8a0U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h index 5a1221e7f..47f11aae9 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h @@ -718,6 +718,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010a8a0U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void) +{ + return 0x00000001U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h index dcf5070a2..84d815e86 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h @@ -726,6 +726,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010a8a0U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void) +{ + return 0x00000001U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h index e1527e36a..8d3ef82d3 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h @@ -722,6 +722,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010a8a0U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void) +{ + return 0x00000001U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h index 86190e117..b667f0eb5 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h @@ -826,6 +826,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010a8a0U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void) +{ + return 0x00000001U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h index b51cc7a44..4e8bbbb88 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h @@ -882,6 +882,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010a8a0U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void) +{ + return 0x00000001U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_pwr_tu104.h b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_pwr_tu104.h index 184d3965c..2c4c0750e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_pwr_tu104.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_pwr_tu104.h @@ -826,6 +826,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010be00U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void) +{ + return 0x00000001U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h index 0a7746a8f..abf9c4b65 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h @@ -142,6 +142,9 @@ enum pmu_seq_state { #define APCTRL_POWER_BREAKEVEN_DEFAULT_US (2000U) #define APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT (200U) +/* pmu load const defines */ +#define PMU_BUSY_CYCLES_NORM_MAX (1000U) + /* RPC */ #define PMU_RPC_EXECUTE(_stat, _pmu, _unit, _func, _prpc, _size)\ do { \ @@ -447,6 +450,7 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu); int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load); int nvgpu_pmu_load_update(struct gk20a *g); +int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm); void nvgpu_pmu_reset_load_counters(struct gk20a *g); void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, u32 *total_cycles); diff --git a/drivers/gpu/nvgpu/os/linux/scale.c b/drivers/gpu/nvgpu/os/linux/scale.c index d7a961c76..e6bb5c505 100644 --- a/drivers/gpu/nvgpu/os/linux/scale.c +++ b/drivers/gpu/nvgpu/os/linux/scale.c @@ -211,18 +211,18 @@ static int gk20a_scale_target(struct device *dev, unsigned long *freq, } /* - * update_load_estimate_gpmu(profile) + * update_load_estimate_busy_cycles(dev) * - * Update load estimate using gpmu. The gpmu value is normalised + * Update load estimate using pmu idle counters. Result is normalised * based on the time it was asked last time. */ -static void update_load_estimate_gpmu(struct device *dev) +static void update_load_estimate_busy_cycles(struct device *dev) { struct gk20a *g = get_gk20a(dev); struct gk20a_scale_profile *profile = g->scale_profile; unsigned long dt; - u32 busy_time; + u32 busy_cycles_norm; ktime_t t; t = ktime_get(); @@ -230,8 +230,9 @@ static void update_load_estimate_gpmu(struct device *dev) profile->dev_stat.total_time = dt; profile->last_event_time = t; - nvgpu_pmu_load_norm(g, &busy_time); - profile->dev_stat.busy_time = (busy_time * dt) / 1000; + nvgpu_pmu_busy_cycles_norm(g, &busy_cycles_norm); + profile->dev_stat.busy_time = + (busy_cycles_norm * dt) / PMU_BUSY_CYCLES_NORM_MAX; } /* @@ -284,9 +285,6 @@ static int gk20a_scale_get_dev_status(struct device *dev, struct gk20a_scale_profile *profile = g->scale_profile; struct gk20a_platform *platform = dev_get_drvdata(dev); - /* update the software shadow */ - nvgpu_pmu_load_update(g); - /* inform edp about new constraint */ if (platform->prescale) platform->prescale(dev); @@ -296,7 +294,7 @@ static int gk20a_scale_get_dev_status(struct device *dev, g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); /* Update load estimate */ - update_load_estimate_gpmu(dev); + update_load_estimate_busy_cycles(dev); /* Copy the contents of the current device status */ *stat = profile->dev_stat; diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 4268aad88..5042e95ac 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -883,6 +883,9 @@ static const struct gpu_ops tu104_ops = { .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, + /* TODO: implement for tu104 */ + .pmu_read_idle_intr_status = NULL, + .pmu_clear_idle_intr_status = NULL, .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, .pmu_enable_irq = gk20a_pmu_enable_irq, diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index c20dd6ef4..2548d7797 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -510,6 +510,8 @@ static const struct gpu_ops vgpu_gp10b_ops = { .pmu_pg_idle_counter_config = NULL, .pmu_read_idle_counter = NULL, .pmu_reset_idle_counter = NULL, + .pmu_read_idle_intr_status = NULL, + .pmu_clear_idle_intr_status = NULL, .pmu_dump_elpg_stats = NULL, .pmu_dump_falcon_stats = NULL, .pmu_enable_irq = NULL, diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index e659f2dd0..6b8ca0db0 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c @@ -585,6 +585,8 @@ static const struct gpu_ops vgpu_gv11b_ops = { .pmu_pg_idle_counter_config = NULL, .pmu_read_idle_counter = NULL, .pmu_reset_idle_counter = NULL, + .pmu_read_idle_intr_status = NULL, + .pmu_clear_idle_intr_status = NULL, .pmu_dump_elpg_stats = NULL, .pmu_dump_falcon_stats = NULL, .pmu_enable_irq = NULL,