From 65ecd7a1813c1c86bdbb2085777a88f43a463ed0 Mon Sep 17 00:00:00 2001 From: Abdul Salam Date: Fri, 20 Sep 2019 20:01:32 +0530 Subject: [PATCH] gpu: nvgpu: Remove fixed wait time for change seq completion Currently after sending change seq RPC, nvgpu waits for a fixed time of 20ms. This CL replaces this with pmu_wait_message_cond, which will return immediately after getting change seq completion event. Also added debug fs node to get the change seq execution time. Bug 200545366 Change-Id: Iba283f65d4949858be9cbff88de4d21a8c92ff81 Signed-off-by: Abdul Salam Reviewed-on: https://git-master.nvidia.com/r/2202423 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/pmu/clk/clk.c | 8 +++++++- drivers/gpu/nvgpu/common/pmu/perf/change_seq.c | 1 + drivers/gpu/nvgpu/common/pmu/perf/perf_ps35.c | 2 ++ drivers/gpu/nvgpu/hal/clk/clk_tu104.c | 9 +++++++++ drivers/gpu/nvgpu/hal/clk/clk_tu104.h | 1 + drivers/gpu/nvgpu/hal/init/hal_tu104.c | 1 + drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 1 + drivers/gpu/nvgpu/include/nvgpu/pmu/perf.h | 3 +++ drivers/gpu/nvgpu/os/linux/debug_clk_tu104.c | 16 ++++++++++++++++ 9 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk.c b/drivers/gpu/nvgpu/common/pmu/clk/clk.c index 7f5357b23..b4610e88c 100644 --- a/drivers/gpu/nvgpu/common/pmu/clk/clk.c +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk.c @@ -282,6 +282,7 @@ int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g, struct nvgpu_pmu *pmu = g->pmu; struct nv_pmu_rpc_perf_change_seq_queue_change rpc; struct ctrl_perf_change_seq_change_input change_input; + struct change_seq_pmu *change_seq_pmu = &g->perf_pmu->changeseq_pmu; int status = 0; u8 gpcclk_domain = 0U; u32 gpcclk_voltuv = 0U, gpcclk_clkmhz = 0U; @@ -340,6 +341,8 @@ int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g, rpc.change = change_input; rpc.change.pstate_index = nvgpu_get_pstate_entry_idx(g, CTRL_PERF_PSTATE_P0); + change_seq_pmu->change_state = 0U; + change_seq_pmu->start_time = nvgpu_current_time_us(); PMU_RPC_EXECUTE_CPB(status, pmu, PERF, CHANGE_SEQ_QUEUE_CHANGE, &rpc, 0); if (status != 0) { @@ -349,8 +352,11 @@ int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g, /* Wait for sync change to complete. */ if ((rpc.change.flags & CTRL_PERF_CHANGE_SEQ_CHANGE_ASYNC) == 0U) { - nvgpu_msleep(20); + pmu_wait_message_cond(g->pmu, + nvgpu_get_poll_timeout(g), + &change_seq_pmu->change_state, 1U); } + change_seq_pmu->stop_time = nvgpu_current_time_us(); return status; } diff --git a/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c b/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c index 20382105a..8487f46df 100644 --- a/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c +++ b/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c @@ -100,6 +100,7 @@ int nvgpu_perf_change_seq_sw_setup(struct gk20a *g) perf_change_seq_pmu->b_lock = false; perf_change_seq_pmu->cpu_step_id_mask = 0; perf_change_seq_pmu->cpu_adverised_step_id_mask = 0; + perf_change_seq_pmu->change_state = 0U; exit: return status; diff --git a/drivers/gpu/nvgpu/common/pmu/perf/perf_ps35.c b/drivers/gpu/nvgpu/common/pmu/perf/perf_ps35.c index 7323bf792..13f89a8d2 100644 --- a/drivers/gpu/nvgpu/common/pmu/perf/perf_ps35.c +++ b/drivers/gpu/nvgpu/common/pmu/perf/perf_ps35.c @@ -59,6 +59,7 @@ static int tu104_pmu_handle_perf_event(struct gk20a *g, void *pmumsg) struct pmu_nvgpu_rpc_perf_event *msg = (struct pmu_nvgpu_rpc_perf_event *)pmumsg; struct perf_pmupstate *perf_pmu = g->perf_pmu; + struct change_seq_pmu *change_pmu = &g->perf_pmu->changeseq_pmu; nvgpu_log_fn(g, " "); switch (msg->rpc_hdr.function) { @@ -67,6 +68,7 @@ static int tu104_pmu_handle_perf_event(struct gk20a *g, void *pmumsg) (void) nvgpu_cond_signal_interruptible(&perf_pmu->vfe_init.wq); break; case NV_PMU_RPC_ID_PERF_SEQ_COMPLETION: + change_pmu->change_state = 1U; nvgpu_log_info(g, "Change Seq Completed"); break; case NV_PMU_RPC_ID_PERF_PSTATES_INVALIDATE: diff --git a/drivers/gpu/nvgpu/hal/clk/clk_tu104.c b/drivers/gpu/nvgpu/hal/clk/clk_tu104.c index ebc64e754..bc8d22342 100644 --- a/drivers/gpu/nvgpu/hal/clk/clk_tu104.c +++ b/drivers/gpu/nvgpu/hal/clk/clk_tu104.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -368,3 +369,11 @@ unsigned long tu104_clk_maxrate(struct gk20a *g, u32 api_domain) return (max_mhz * 1000UL * 1000UL); } + +void tu104_get_change_seq_time(struct gk20a *g, s64 *change_time) +{ + struct change_seq_pmu *change_seq_pmu = &g->perf_pmu->changeseq_pmu; + s64 diff = change_seq_pmu->stop_time - change_seq_pmu->start_time; + + *change_time = diff; +} diff --git a/drivers/gpu/nvgpu/hal/clk/clk_tu104.h b/drivers/gpu/nvgpu/hal/clk/clk_tu104.h index d39066d01..4aa813040 100644 --- a/drivers/gpu/nvgpu/hal/clk/clk_tu104.h +++ b/drivers/gpu/nvgpu/hal/clk/clk_tu104.h @@ -36,4 +36,5 @@ int tu104_clk_domain_get_f_points( u32 *pfpointscount, u16 *pfreqpointsinmhz); unsigned long tu104_clk_maxrate(struct gk20a *g, u32 api_domain); +void tu104_get_change_seq_time(struct gk20a *g, s64 *change_time); #endif /* CLK_TU104_H */ diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 2720dd303..983b109d1 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -1227,6 +1227,7 @@ static const struct gpu_ops tu104_ops = { .perf_pmu_vfe_load = nvgpu_perf_pmu_vfe_load_ps35, .clk_domain_get_f_points = tu104_clk_domain_get_f_points, .get_maxrate = tu104_clk_maxrate, + .get_change_seq_time = tu104_get_change_seq_time, }, #ifdef CONFIG_NVGPU_CLK_ARB .clk_arb = { diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 08474a5a2..c4f47bd85 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1570,6 +1570,7 @@ struct gpu_ops { int (*mclk_init)(struct gk20a *g); void (*mclk_deinit)(struct gk20a *g); int (*mclk_change)(struct gk20a *g, u16 val); + void (*get_change_seq_time)(struct gk20a *g, s64 *change_time); bool split_rail_support; bool support_clk_freq_controller; bool support_pmgr_domain; diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu/perf.h b/drivers/gpu/nvgpu/include/nvgpu/pmu/perf.h index 5839964a2..2eeb5a87a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu/perf.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu/perf.h @@ -78,6 +78,9 @@ struct change_seq_pmu { struct change_seq_pmu_script script_curr; struct change_seq_pmu_script script_last; struct change_seq_pmu_script script_query; + u32 change_state; + s64 start_time; + s64 stop_time; }; struct perf_pmupstate { diff --git a/drivers/gpu/nvgpu/os/linux/debug_clk_tu104.c b/drivers/gpu/nvgpu/os/linux/debug_clk_tu104.c index bce26bb9e..cc585169a 100644 --- a/drivers/gpu/nvgpu/os/linux/debug_clk_tu104.c +++ b/drivers/gpu/nvgpu/os/linux/debug_clk_tu104.c @@ -203,6 +203,20 @@ static const struct file_operations vftable_fops = { .release = single_release, }; +static int tu104_change_seq_time(void *data , u64 *val) +{ + struct gk20a *g = (struct gk20a *)data; + s64 readval; + + if (!g->ops.clk.get_change_seq_time) + return -EINVAL; + + g->ops.clk.get_change_seq_time(g, &readval); + *val = (u64)readval; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(change_seq_fops, tu104_change_seq_time, NULL, "%llu\n"); + int tu104_clk_init_debugfs(struct gk20a *g) { struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); @@ -226,6 +240,8 @@ int tu104_clk_init_debugfs(struct gk20a *g) g, &xbar_cfc_fops); d = debugfs_create_file("gpc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root, g, &gpc_cfc_fops); + d = debugfs_create_file("change_seq_time_us", S_IRUGO, clocks_root, + g, &change_seq_fops); nvgpu_log(g, gpu_dbg_info, "g=%p", g);