From 65ecd7a1813c1c86bdbb2085777a88f43a463ed0 Mon Sep 17 00:00:00 2001
From: Abdul Salam <absalam@nvidia.com>
Date: Fri, 20 Sep 2019 20:01:32 +0530
Subject: [PATCH] gpu: nvgpu: Remove fixed wait time for change seq completion

Currently after sending change seq RPC, nvgpu waits for a fixed time
of 20ms.
This CL replaces this with pmu_wait_message_cond, which will return
immediately after getting change seq completion event.
Also added debug fs node to get the change seq execution time.

Bug 200545366

Change-Id: Iba283f65d4949858be9cbff88de4d21a8c92ff81
Signed-off-by: Abdul Salam <absalam@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2202423
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/pmu/clk/clk.c         |  8 +++++++-
 drivers/gpu/nvgpu/common/pmu/perf/change_seq.c |  1 +
 drivers/gpu/nvgpu/common/pmu/perf/perf_ps35.c  |  2 ++
 drivers/gpu/nvgpu/hal/clk/clk_tu104.c          |  9 +++++++++
 drivers/gpu/nvgpu/hal/clk/clk_tu104.h          |  1 +
 drivers/gpu/nvgpu/hal/init/hal_tu104.c         |  1 +
 drivers/gpu/nvgpu/include/nvgpu/gk20a.h        |  1 +
 drivers/gpu/nvgpu/include/nvgpu/pmu/perf.h     |  3 +++
 drivers/gpu/nvgpu/os/linux/debug_clk_tu104.c   | 16 ++++++++++++++++
 9 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk.c b/drivers/gpu/nvgpu/common/pmu/clk/clk.c
index 7f5357b23..b4610e88c 100644
--- a/drivers/gpu/nvgpu/common/pmu/clk/clk.c
+++ b/drivers/gpu/nvgpu/common/pmu/clk/clk.c
@@ -282,6 +282,7 @@ int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g,
 	struct nvgpu_pmu *pmu = g->pmu;
 	struct nv_pmu_rpc_perf_change_seq_queue_change rpc;
 	struct ctrl_perf_change_seq_change_input change_input;
+	struct change_seq_pmu *change_seq_pmu = &g->perf_pmu->changeseq_pmu;
 	int status = 0;
 	u8 gpcclk_domain = 0U;
 	u32 gpcclk_voltuv = 0U, gpcclk_clkmhz = 0U;
@@ -340,6 +341,8 @@ int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g,
 	rpc.change = change_input;
 	rpc.change.pstate_index =
 			nvgpu_get_pstate_entry_idx(g, CTRL_PERF_PSTATE_P0);
+	change_seq_pmu->change_state = 0U;
+	change_seq_pmu->start_time = nvgpu_current_time_us();
 	PMU_RPC_EXECUTE_CPB(status, pmu, PERF,
 			CHANGE_SEQ_QUEUE_CHANGE, &rpc, 0);
 	if (status != 0) {
@@ -349,8 +352,11 @@ int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g,
 
 	/* Wait for sync change to complete. */
 	if ((rpc.change.flags & CTRL_PERF_CHANGE_SEQ_CHANGE_ASYNC) == 0U) {
-		nvgpu_msleep(20);
+		pmu_wait_message_cond(g->pmu,
+			nvgpu_get_poll_timeout(g),
+			&change_seq_pmu->change_state, 1U);
 	}
+	change_seq_pmu->stop_time = nvgpu_current_time_us();
 	return status;
 }
 
diff --git a/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c b/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c
index 20382105a..8487f46df 100644
--- a/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c
+++ b/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c
@@ -100,6 +100,7 @@ int nvgpu_perf_change_seq_sw_setup(struct gk20a *g)
 	perf_change_seq_pmu->b_lock = false;
 	perf_change_seq_pmu->cpu_step_id_mask = 0;
 	perf_change_seq_pmu->cpu_adverised_step_id_mask = 0;
+	perf_change_seq_pmu->change_state = 0U;
 
 exit:
 	return status;
diff --git a/drivers/gpu/nvgpu/common/pmu/perf/perf_ps35.c b/drivers/gpu/nvgpu/common/pmu/perf/perf_ps35.c
index 7323bf792..13f89a8d2 100644
--- a/drivers/gpu/nvgpu/common/pmu/perf/perf_ps35.c
+++ b/drivers/gpu/nvgpu/common/pmu/perf/perf_ps35.c
@@ -59,6 +59,7 @@ static int tu104_pmu_handle_perf_event(struct gk20a *g, void *pmumsg)
 	struct pmu_nvgpu_rpc_perf_event *msg =
 			(struct pmu_nvgpu_rpc_perf_event *)pmumsg;
 	struct perf_pmupstate *perf_pmu = g->perf_pmu;
+	struct change_seq_pmu *change_pmu = &g->perf_pmu->changeseq_pmu;
 
 	nvgpu_log_fn(g, " ");
 	switch (msg->rpc_hdr.function) {
@@ -67,6 +68,7 @@ static int tu104_pmu_handle_perf_event(struct gk20a *g, void *pmumsg)
 		(void) nvgpu_cond_signal_interruptible(&perf_pmu->vfe_init.wq);
 		break;
 	case NV_PMU_RPC_ID_PERF_SEQ_COMPLETION:
+		change_pmu->change_state = 1U;
 		nvgpu_log_info(g, "Change Seq Completed");
 		break;
 	case NV_PMU_RPC_ID_PERF_PSTATES_INVALIDATE:
diff --git a/drivers/gpu/nvgpu/hal/clk/clk_tu104.c b/drivers/gpu/nvgpu/hal/clk/clk_tu104.c
index ebc64e754..bc8d22342 100644
--- a/drivers/gpu/nvgpu/hal/clk/clk_tu104.c
+++ b/drivers/gpu/nvgpu/hal/clk/clk_tu104.c
@@ -34,6 +34,7 @@
 #include <nvgpu/soc.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/clk.h>
+#include <nvgpu/pmu/perf.h>
 #include <nvgpu/clk_arb.h>
 #include <nvgpu/pmu/clk/clk.h>
 #include <nvgpu/pmu/clk/clk_domain.h>
@@ -368,3 +369,11 @@ unsigned long tu104_clk_maxrate(struct gk20a *g, u32 api_domain)
 
 	return (max_mhz * 1000UL * 1000UL);
 }
+
+void tu104_get_change_seq_time(struct gk20a *g, s64 *change_time)
+{
+	struct change_seq_pmu *change_seq_pmu = &g->perf_pmu->changeseq_pmu;
+	s64 diff = change_seq_pmu->stop_time - change_seq_pmu->start_time;
+
+	*change_time = diff;
+}
diff --git a/drivers/gpu/nvgpu/hal/clk/clk_tu104.h b/drivers/gpu/nvgpu/hal/clk/clk_tu104.h
index d39066d01..4aa813040 100644
--- a/drivers/gpu/nvgpu/hal/clk/clk_tu104.h
+++ b/drivers/gpu/nvgpu/hal/clk/clk_tu104.h
@@ -36,4 +36,5 @@ int tu104_clk_domain_get_f_points(
 	u32 *pfpointscount,
 	u16 *pfreqpointsinmhz);
 unsigned long tu104_clk_maxrate(struct gk20a *g, u32 api_domain);
+void tu104_get_change_seq_time(struct gk20a *g, s64 *change_time);
 #endif /* CLK_TU104_H */
diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
index 2720dd303..983b109d1 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -1227,6 +1227,7 @@ static const struct gpu_ops tu104_ops = {
 		.perf_pmu_vfe_load = nvgpu_perf_pmu_vfe_load_ps35,
 		.clk_domain_get_f_points = tu104_clk_domain_get_f_points,
 		.get_maxrate = tu104_clk_maxrate,
+		.get_change_seq_time = tu104_get_change_seq_time,
 	},
 #ifdef CONFIG_NVGPU_CLK_ARB
 	.clk_arb = {
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 08474a5a2..c4f47bd85 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1570,6 +1570,7 @@ struct gpu_ops {
 		int (*mclk_init)(struct gk20a *g);
 		void (*mclk_deinit)(struct gk20a *g);
 		int (*mclk_change)(struct gk20a *g, u16 val);
+		void (*get_change_seq_time)(struct gk20a *g, s64 *change_time);
 		bool split_rail_support;
 		bool support_clk_freq_controller;
 		bool support_pmgr_domain;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu/perf.h b/drivers/gpu/nvgpu/include/nvgpu/pmu/perf.h
index 5839964a2..2eeb5a87a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu/perf.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu/perf.h
@@ -78,6 +78,9 @@ struct change_seq_pmu {
 	struct change_seq_pmu_script script_curr;
 	struct change_seq_pmu_script script_last;
 	struct change_seq_pmu_script script_query;
+	u32 change_state;
+	s64 start_time;
+	s64 stop_time;
 };
 
 struct perf_pmupstate {
diff --git a/drivers/gpu/nvgpu/os/linux/debug_clk_tu104.c b/drivers/gpu/nvgpu/os/linux/debug_clk_tu104.c
index bce26bb9e..cc585169a 100644
--- a/drivers/gpu/nvgpu/os/linux/debug_clk_tu104.c
+++ b/drivers/gpu/nvgpu/os/linux/debug_clk_tu104.c
@@ -203,6 +203,20 @@ static const struct file_operations vftable_fops = {
 	.release = single_release,
 };
 
+static int tu104_change_seq_time(void *data , u64 *val)
+{
+	struct gk20a *g = (struct gk20a *)data;
+	s64 readval;
+
+	if (!g->ops.clk.get_change_seq_time)
+		return -EINVAL;
+
+	g->ops.clk.get_change_seq_time(g, &readval);
+	*val = (u64)readval;
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(change_seq_fops, tu104_change_seq_time, NULL, "%llu\n");
+
 int tu104_clk_init_debugfs(struct gk20a *g)
 {
 	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
@@ -226,6 +240,8 @@ int tu104_clk_init_debugfs(struct gk20a *g)
 				g, &xbar_cfc_fops);
 	d = debugfs_create_file("gpc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root,
 				g, &gpc_cfc_fops);
+	d = debugfs_create_file("change_seq_time_us", S_IRUGO, clocks_root,
+				g, &change_seq_fops);
 
 	nvgpu_log(g, gpu_dbg_info, "g=%p", g);