gpu: nvgpu: Remove fixed wait time for change seq completion

Currently after sending change seq RPC, nvgpu waits for a fixed time of 20ms. This CL replaces this with pmu_wait_message_cond, which will return immediately after getting change seq completion event. Also added debug fs node to get the change seq execution time. Bug 200545366 Change-Id: Iba283f65d4949858be9cbff88de4d21a8c92ff81 Signed-off-by: Abdul Salam <absalam@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2202423 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-25 02:52:51 +03:00 · 2019-09-20 20:01:32 +05:30
parent 8b575a3fff
commit 65ecd7a181
9 changed files with 41 additions and 1 deletions
--- a/drivers/gpu/nvgpu/common/pmu/clk/clk.c
+++ b/drivers/gpu/nvgpu/common/pmu/clk/clk.c
@@ -282,6 +282,7 @@ int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g,
 	struct nvgpu_pmu *pmu = g->pmu;
 	struct nv_pmu_rpc_perf_change_seq_queue_change rpc;
 	struct ctrl_perf_change_seq_change_input change_input;
+	struct change_seq_pmu *change_seq_pmu = &g->perf_pmu->changeseq_pmu;
 	int status = 0;
 	u8 gpcclk_domain = 0U;
 	u32 gpcclk_voltuv = 0U, gpcclk_clkmhz = 0U;
@@ -340,6 +341,8 @@ int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g,
 	rpc.change = change_input;
 	rpc.change.pstate_index =
 			nvgpu_get_pstate_entry_idx(g, CTRL_PERF_PSTATE_P0);
+	change_seq_pmu->change_state = 0U;
+	change_seq_pmu->start_time = nvgpu_current_time_us();
 	PMU_RPC_EXECUTE_CPB(status, pmu, PERF,
 			CHANGE_SEQ_QUEUE_CHANGE, &rpc, 0);
 	if (status != 0) {
@@ -349,8 +352,11 @@ int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g,

 	/* Wait for sync change to complete. */
 	if ((rpc.change.flags & CTRL_PERF_CHANGE_SEQ_CHANGE_ASYNC) == 0U) {
-		nvgpu_msleep(20);
+		pmu_wait_message_cond(g->pmu,
+			nvgpu_get_poll_timeout(g),
+			&change_seq_pmu->change_state, 1U);
 	}
+	change_seq_pmu->stop_time = nvgpu_current_time_us();
 	return status;
 }

--- a/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c
+++ b/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c
@@ -100,6 +100,7 @@ int nvgpu_perf_change_seq_sw_setup(struct gk20a *g)
 	perf_change_seq_pmu->b_lock = false;
 	perf_change_seq_pmu->cpu_step_id_mask = 0;
 	perf_change_seq_pmu->cpu_adverised_step_id_mask = 0;
+	perf_change_seq_pmu->change_state = 0U;

 exit:
 	return status;
--- a/drivers/gpu/nvgpu/common/pmu/perf/perf_ps35.c
+++ b/drivers/gpu/nvgpu/common/pmu/perf/perf_ps35.c
@@ -59,6 +59,7 @@ static int tu104_pmu_handle_perf_event(struct gk20a *g, void *pmumsg)
 	struct pmu_nvgpu_rpc_perf_event *msg =
 			(struct pmu_nvgpu_rpc_perf_event *)pmumsg;
 	struct perf_pmupstate *perf_pmu = g->perf_pmu;
+	struct change_seq_pmu *change_pmu = &g->perf_pmu->changeseq_pmu;

 	nvgpu_log_fn(g, " ");
 	switch (msg->rpc_hdr.function) {
@@ -67,6 +68,7 @@ static int tu104_pmu_handle_perf_event(struct gk20a *g, void *pmumsg)
 		(void) nvgpu_cond_signal_interruptible(&perf_pmu->vfe_init.wq);
 		break;
 	case NV_PMU_RPC_ID_PERF_SEQ_COMPLETION:
+		change_pmu->change_state = 1U;
 		nvgpu_log_info(g, "Change Seq Completed");
 		break;
 	case NV_PMU_RPC_ID_PERF_PSTATES_INVALIDATE:
--- a/drivers/gpu/nvgpu/hal/clk/clk_tu104.c
+++ b/drivers/gpu/nvgpu/hal/clk/clk_tu104.c
@@ -34,6 +34,7 @@
 #include <nvgpu/soc.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/clk.h>
+#include <nvgpu/pmu/perf.h>
 #include <nvgpu/clk_arb.h>
 #include <nvgpu/pmu/clk/clk.h>
 #include <nvgpu/pmu/clk/clk_domain.h>
@@ -368,3 +369,11 @@ unsigned long tu104_clk_maxrate(struct gk20a *g, u32 api_domain)

 	return (max_mhz * 1000UL * 1000UL);
 }
+
+void tu104_get_change_seq_time(struct gk20a *g, s64 *change_time)
+{
+	struct change_seq_pmu *change_seq_pmu = &g->perf_pmu->changeseq_pmu;
+	s64 diff = change_seq_pmu->stop_time - change_seq_pmu->start_time;
+
+	*change_time = diff;
+}
--- a/drivers/gpu/nvgpu/hal/clk/clk_tu104.h
+++ b/drivers/gpu/nvgpu/hal/clk/clk_tu104.h
@@ -36,4 +36,5 @@ int tu104_clk_domain_get_f_points(
 	u32 *pfpointscount,
 	u16 *pfreqpointsinmhz);
 unsigned long tu104_clk_maxrate(struct gk20a *g, u32 api_domain);
+void tu104_get_change_seq_time(struct gk20a *g, s64 *change_time);
 #endif /* CLK_TU104_H */
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -1227,6 +1227,7 @@ static const struct gpu_ops tu104_ops = {
 		.perf_pmu_vfe_load = nvgpu_perf_pmu_vfe_load_ps35,
 		.clk_domain_get_f_points = tu104_clk_domain_get_f_points,
 		.get_maxrate = tu104_clk_maxrate,
+		.get_change_seq_time = tu104_get_change_seq_time,
 	},
 #ifdef CONFIG_NVGPU_CLK_ARB
 	.clk_arb = {
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1570,6 +1570,7 @@ struct gpu_ops {
 		int (*mclk_init)(struct gk20a *g);
 		void (*mclk_deinit)(struct gk20a *g);
 		int (*mclk_change)(struct gk20a *g, u16 val);
+		void (*get_change_seq_time)(struct gk20a *g, s64 *change_time);
 		bool split_rail_support;
 		bool support_clk_freq_controller;
 		bool support_pmgr_domain;
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu/perf.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu/perf.h
@@ -78,6 +78,9 @@ struct change_seq_pmu {
 	struct change_seq_pmu_script script_curr;
 	struct change_seq_pmu_script script_last;
 	struct change_seq_pmu_script script_query;
+	u32 change_state;
+	s64 start_time;
+	s64 stop_time;
 };

 struct perf_pmupstate {
--- a/drivers/gpu/nvgpu/os/linux/debug_clk_tu104.c
+++ b/drivers/gpu/nvgpu/os/linux/debug_clk_tu104.c
@@ -203,6 +203,20 @@ static const struct file_operations vftable_fops = {
 	.release = single_release,
 };

+static int tu104_change_seq_time(void *data , u64 *val)
+{
+	struct gk20a *g = (struct gk20a *)data;
+	s64 readval;
+
+	if (!g->ops.clk.get_change_seq_time)
+		return -EINVAL;
+
+	g->ops.clk.get_change_seq_time(g, &readval);
+	*val = (u64)readval;
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(change_seq_fops, tu104_change_seq_time, NULL, "%llu\n");
+
 int tu104_clk_init_debugfs(struct gk20a *g)
 {
 	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
@@ -226,6 +240,8 @@ int tu104_clk_init_debugfs(struct gk20a *g)
 				g, &xbar_cfc_fops);
 	d = debugfs_create_file("gpc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root,
 				g, &gpc_cfc_fops);
+	d = debugfs_create_file("change_seq_time_us", S_IRUGO, clocks_root,
+				g, &change_seq_fops);

 	nvgpu_log(g, gpu_dbg_info, "g=%p", g);