gpu: nvgpu: Remove fixed wait time for change seq completion

Currently after sending change seq RPC, nvgpu waits for a fixed time
of 20ms.
This CL replaces this with pmu_wait_message_cond, which will return
immediately after getting change seq completion event.
Also added debug fs node to get the change seq execution time.

Bug 200545366

Change-Id: Iba283f65d4949858be9cbff88de4d21a8c92ff81
Signed-off-by: Abdul Salam <absalam@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2202423
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Abdul Salam
2019-09-20 20:01:32 +05:30
committed by Alex Waterman
parent 8b575a3fff
commit 65ecd7a181
9 changed files with 41 additions and 1 deletions

View File

@@ -282,6 +282,7 @@ int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g,
struct nvgpu_pmu *pmu = g->pmu;
struct nv_pmu_rpc_perf_change_seq_queue_change rpc;
struct ctrl_perf_change_seq_change_input change_input;
struct change_seq_pmu *change_seq_pmu = &g->perf_pmu->changeseq_pmu;
int status = 0;
u8 gpcclk_domain = 0U;
u32 gpcclk_voltuv = 0U, gpcclk_clkmhz = 0U;
@@ -340,6 +341,8 @@ int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g,
rpc.change = change_input;
rpc.change.pstate_index =
nvgpu_get_pstate_entry_idx(g, CTRL_PERF_PSTATE_P0);
change_seq_pmu->change_state = 0U;
change_seq_pmu->start_time = nvgpu_current_time_us();
PMU_RPC_EXECUTE_CPB(status, pmu, PERF,
CHANGE_SEQ_QUEUE_CHANGE, &rpc, 0);
if (status != 0) {
@@ -349,8 +352,11 @@ int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g,
/* Wait for sync change to complete. */
if ((rpc.change.flags & CTRL_PERF_CHANGE_SEQ_CHANGE_ASYNC) == 0U) {
nvgpu_msleep(20);
pmu_wait_message_cond(g->pmu,
nvgpu_get_poll_timeout(g),
&change_seq_pmu->change_state, 1U);
}
change_seq_pmu->stop_time = nvgpu_current_time_us();
return status;
}

View File

@@ -100,6 +100,7 @@ int nvgpu_perf_change_seq_sw_setup(struct gk20a *g)
perf_change_seq_pmu->b_lock = false;
perf_change_seq_pmu->cpu_step_id_mask = 0;
perf_change_seq_pmu->cpu_adverised_step_id_mask = 0;
perf_change_seq_pmu->change_state = 0U;
exit:
return status;

View File

@@ -59,6 +59,7 @@ static int tu104_pmu_handle_perf_event(struct gk20a *g, void *pmumsg)
struct pmu_nvgpu_rpc_perf_event *msg =
(struct pmu_nvgpu_rpc_perf_event *)pmumsg;
struct perf_pmupstate *perf_pmu = g->perf_pmu;
struct change_seq_pmu *change_pmu = &g->perf_pmu->changeseq_pmu;
nvgpu_log_fn(g, " ");
switch (msg->rpc_hdr.function) {
@@ -67,6 +68,7 @@ static int tu104_pmu_handle_perf_event(struct gk20a *g, void *pmumsg)
(void) nvgpu_cond_signal_interruptible(&perf_pmu->vfe_init.wq);
break;
case NV_PMU_RPC_ID_PERF_SEQ_COMPLETION:
change_pmu->change_state = 1U;
nvgpu_log_info(g, "Change Seq Completed");
break;
case NV_PMU_RPC_ID_PERF_PSTATES_INVALIDATE:

View File

@@ -34,6 +34,7 @@
#include <nvgpu/soc.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/clk.h>
#include <nvgpu/pmu/perf.h>
#include <nvgpu/clk_arb.h>
#include <nvgpu/pmu/clk/clk.h>
#include <nvgpu/pmu/clk/clk_domain.h>
@@ -368,3 +369,11 @@ unsigned long tu104_clk_maxrate(struct gk20a *g, u32 api_domain)
return (max_mhz * 1000UL * 1000UL);
}
void tu104_get_change_seq_time(struct gk20a *g, s64 *change_time)
{
struct change_seq_pmu *change_seq_pmu = &g->perf_pmu->changeseq_pmu;
s64 diff = change_seq_pmu->stop_time - change_seq_pmu->start_time;
*change_time = diff;
}

View File

@@ -36,4 +36,5 @@ int tu104_clk_domain_get_f_points(
u32 *pfpointscount,
u16 *pfreqpointsinmhz);
unsigned long tu104_clk_maxrate(struct gk20a *g, u32 api_domain);
void tu104_get_change_seq_time(struct gk20a *g, s64 *change_time);
#endif /* CLK_TU104_H */

View File

@@ -1227,6 +1227,7 @@ static const struct gpu_ops tu104_ops = {
.perf_pmu_vfe_load = nvgpu_perf_pmu_vfe_load_ps35,
.clk_domain_get_f_points = tu104_clk_domain_get_f_points,
.get_maxrate = tu104_clk_maxrate,
.get_change_seq_time = tu104_get_change_seq_time,
},
#ifdef CONFIG_NVGPU_CLK_ARB
.clk_arb = {

View File

@@ -1570,6 +1570,7 @@ struct gpu_ops {
int (*mclk_init)(struct gk20a *g);
void (*mclk_deinit)(struct gk20a *g);
int (*mclk_change)(struct gk20a *g, u16 val);
void (*get_change_seq_time)(struct gk20a *g, s64 *change_time);
bool split_rail_support;
bool support_clk_freq_controller;
bool support_pmgr_domain;

View File

@@ -78,6 +78,9 @@ struct change_seq_pmu {
struct change_seq_pmu_script script_curr;
struct change_seq_pmu_script script_last;
struct change_seq_pmu_script script_query;
u32 change_state;
s64 start_time;
s64 stop_time;
};
struct perf_pmupstate {

View File

@@ -203,6 +203,20 @@ static const struct file_operations vftable_fops = {
.release = single_release,
};
static int tu104_change_seq_time(void *data , u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
s64 readval;
if (!g->ops.clk.get_change_seq_time)
return -EINVAL;
g->ops.clk.get_change_seq_time(g, &readval);
*val = (u64)readval;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(change_seq_fops, tu104_change_seq_time, NULL, "%llu\n");
int tu104_clk_init_debugfs(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
@@ -226,6 +240,8 @@ int tu104_clk_init_debugfs(struct gk20a *g)
g, &xbar_cfc_fops);
d = debugfs_create_file("gpc", S_IRUGO | S_IWUSR, clk_freq_ctlr_root,
g, &gpc_cfc_fops);
d = debugfs_create_file("change_seq_time_us", S_IRUGO, clocks_root,
g, &change_seq_fops);
nvgpu_log(g, gpu_dbg_info, "g=%p", g);