gpu: nvgpu: using pmu counters for load estimate

PMU counters #0 and #4 are used to count total cycles and busy cycles. These counts are used by podgov to estimate GPU load. PMU idle intr status register is used to monitor overflow. Overflow rarely occurs because frequency governor reads and resets the counters at a high cadence. When overflow occurs, 100% work load is reported to frequency governor. Bug 1963732 Change-Id: I046480ebde162e6eda24577932b96cfd91b77c69 Signed-off-by: Peng Liu <pengliu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1939547 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2018-10-30 13:45:43 -07:00
parent 75ff0feeff
commit 34df003519
21 changed files with 449 additions and 10 deletions
--- a/drivers/gpu/nvgpu/os/linux/scale.c
+++ b/drivers/gpu/nvgpu/os/linux/scale.c
@@ -211,18 +211,18 @@ static int gk20a_scale_target(struct device *dev, unsigned long *freq,
 }

 /*
- * update_load_estimate_gpmu(profile)
+ * update_load_estimate_busy_cycles(dev)
 *
- * Update load estimate using gpmu. The gpmu value is normalised
+ * Update load estimate using pmu idle counters. Result is normalised
 * based on the time it was asked last time.
 */

-static void update_load_estimate_gpmu(struct device *dev)
+static void update_load_estimate_busy_cycles(struct device *dev)
 {
 	struct gk20a *g = get_gk20a(dev);
 	struct gk20a_scale_profile *profile = g->scale_profile;
 	unsigned long dt;
-	u32 busy_time;
+	u32 busy_cycles_norm;
 	ktime_t t;

 	t = ktime_get();
@@ -230,8 +230,9 @@ static void update_load_estimate_gpmu(struct device *dev)

 	profile->dev_stat.total_time = dt;
 	profile->last_event_time = t;
-	nvgpu_pmu_load_norm(g, &busy_time);
-	profile->dev_stat.busy_time = (busy_time * dt) / 1000;
+	nvgpu_pmu_busy_cycles_norm(g, &busy_cycles_norm);
+	profile->dev_stat.busy_time =
+		(busy_cycles_norm * dt) / PMU_BUSY_CYCLES_NORM_MAX;
 }

 /*
@@ -284,9 +285,6 @@ static int gk20a_scale_get_dev_status(struct device *dev,
 	struct gk20a_scale_profile *profile = g->scale_profile;
 	struct gk20a_platform *platform = dev_get_drvdata(dev);

-	/* update the software shadow */
-	nvgpu_pmu_load_update(g);
-
 	/* inform edp about new constraint */
 	if (platform->prescale)
 		platform->prescale(dev);
@@ -296,7 +294,7 @@ static int gk20a_scale_get_dev_status(struct device *dev,
 				g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);

 	/* Update load estimate */
-	update_load_estimate_gpmu(dev);
+	update_load_estimate_busy_cycles(dev);

 	/* Copy the contents of the current device status */
 	*stat = profile->dev_stat;