gpu: nvgpu: don't skip setting same clk in arbiter

In the current setting, clock arbiter skips setting the clock if its already set previously. The value set by the arbiter is stored in "struct nvgpu_clk_arb->actual" whenever the clock is updated via the arbiter. However, DVFS might also update the clock and the updates are not synchronized with the arbiter. Hence, ensure that any clock requests are always updated i.e. the requested rate is set even if the previous rate remains the same. In the devfreq scale() part, scale emc when clk_arb is active and skip setting of clocks. Bug 3666615 Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Change-Id: I32bf4dbf81b19fdd6fa0bdec3a6c9a9312b78eca Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2727787 Reviewed-by: svcacv <svcacv@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2022-06-13 12:18:42 +05:30
parent 4194c35e17
commit 21ab579341
3 changed files with 24 additions and 4 deletions
--- a/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.c
+++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -305,11 +305,23 @@ void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)

 	gpc2clk_session_target = gpc2clk_target;

+/* When DVFS is enabled, there is a mismatch between
+ * arb->actual->gpc2clk as dvfs doesn't synchronize
+ * values w.r.t Arbiter. Hence, allow the clocks
+ * to be set irrespective of whether it exists.
+ *
+ * CONFIG_GK20A_DEVFREQ is defined only for Linux
+ * and DVFS is supported only for linux. For, other
+ * platforms, arb->actual->gpc2clk contains the correct
+ * value.
+ */
+#ifndef CONFIG_GK20A_DEVFREQ
 	if (arb->actual->gpc2clk == gpc2clk_target) {
 		nvgpu_atomic_inc(&arb->req_nr);
 		nvgpu_cond_signal_interruptible(&arb->request_wq);
 		goto exit_arb;
 	}
+#endif

 	nvgpu_mutex_acquire(&arb->pstate_lock);

@@ -344,6 +356,8 @@ void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)
 		goto exit_arb;
 	}

+	g->last_freq = rounded_rate;
+
 	actual = ((NV_READ_ONCE(arb->actual)) == &arb->actual_pool[0] ?
 			&arb->actual_pool[1] : &arb->actual_pool[0]);

--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -100,6 +100,9 @@ static void nvgpu_init_vars(struct gk20a *g)
 	/* Init the clock req count to 0 */
 	nvgpu_atomic_set(&g->clk_arb_global_nr, 0);

+	/* Atomic set doesn't guarantee a barrier */
+	nvgpu_smp_wmb();
+
 	nvgpu_mutex_init(&l->ctrl_privs_lock);
 	nvgpu_init_list_node(&l->ctrl_privs);

--- a/drivers/gpu/nvgpu/os/linux/scale.c
+++ b/drivers/gpu/nvgpu/os/linux/scale.c
@@ -163,11 +163,13 @@ static int gk20a_scale_target(struct device *dev, unsigned long *freq,
 	struct devfreq *devfreq = l->devfreq;
 #endif
 	unsigned long local_freq = *freq;
-	unsigned long rounded_rate;
+	unsigned long rounded_rate = 0;
 	unsigned long min_freq = 0, max_freq = 0;

-	if (nvgpu_clk_arb_has_active_req(g))
-		return 0;
+	if (nvgpu_clk_arb_has_active_req(g)) {
+		rounded_rate = g->last_freq;
+		goto post_scale;
+	}
 	/*
 	 * Calculate floor and cap frequency values
 	 *
@@ -222,6 +224,7 @@ static int gk20a_scale_target(struct device *dev, unsigned long *freq,

 	g->last_freq = *freq;

+post_scale:
 	/* postscale will only scale emc (dram clock) if evaluating
 	 * gk20a_tegra_get_emc_rate() produces a new or different emc
 	 * target because the load or_and gpufreq has changed */