From 146d8d3ce566af0d06c273a9ef9a5b82d91834de Mon Sep 17 00:00:00 2001 From: Abdul Salam Date: Mon, 7 Jan 2019 11:38:11 +0530 Subject: [PATCH] gpu: nvgpu: Add clk_arb for TU104 Add clk arbiter support for tu104 setup clk_arb for supporting functions in hal_tu04 TU104 supports GPCCLK and not GPC2CLK Remove multiplication and division by 2 to convert gpcclk to gpc2clk Provide support for following features *Domains: Currently GPCCLK is supported *clk Range: From P0 min to P0 max *Freq Points: Gives the VF curve from PMU *Default: Default value(P0 Max) *Current Pstate: P0 is supported All request for change is freq is validated against P0 value Out of bound values are trimmed to match the Pstate limits Multiple requests are supported and max of that will be set Requests are sent to PMU via change sequencer Bug 200454682 JIRA NVGPU-1653 Change-Id: I36735fa50c7963830ebc569a2ea2a2d7aafcf2ab Signed-off-by: Abdul Salam Reviewed-on: https://git-master.nvidia.com/r/1982078 Reviewed-by: Vijayakumar Subbu Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 3 +- drivers/gpu/nvgpu/Makefile.sources | 1 + drivers/gpu/nvgpu/clk/clk.c | 112 ++-- drivers/gpu/nvgpu/clk/clk.h | 26 +- drivers/gpu/nvgpu/clk/clk_arb.c | 242 +------ drivers/gpu/nvgpu/clk/clk_domain.c | 35 +- drivers/gpu/nvgpu/clk/clk_prog.c | 62 +- drivers/gpu/nvgpu/clk/clk_vf_point.c | 265 +++++++- drivers/gpu/nvgpu/clk/clk_vf_point.h | 13 + drivers/gpu/nvgpu/common/pmu/pmu_fw.c | 3 +- drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c | 35 +- drivers/gpu/nvgpu/gv100/clk_arb_gv100.c | 739 +++++++++++++++++++++ drivers/gpu/nvgpu/gv100/clk_arb_gv100.h | 40 ++ drivers/gpu/nvgpu/gv100/clk_gv100.c | 40 +- drivers/gpu/nvgpu/gv100/clk_gv100.h | 6 +- drivers/gpu/nvgpu/include/nvgpu/clk_arb.h | 2 + drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c | 4 +- drivers/gpu/nvgpu/pmu_perf/perf_tu104.c | 3 +- drivers/gpu/nvgpu/tu104/hal_tu104.c | 19 +- drivers/gpu/nvgpu/volt/volt_pmu.c | 4 +- 20 files changed, 1276 insertions(+), 378 deletions(-) create mode 100644 drivers/gpu/nvgpu/gv100/clk_arb_gv100.c create mode 100644 drivers/gpu/nvgpu/gv100/clk_arb_gv100.h diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 72015e129..980ddd62b 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -405,4 +405,5 @@ nvgpu-y += \ therm/thrmchannel.o \ therm/thrmpmu.o \ lpwr/rppg.o \ - lpwr/lpwr.o + lpwr/lpwr.o \ + gv100/clk_arb_gv100.o diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 0701ce395..6dbe755d7 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -240,6 +240,7 @@ srcs := os/posix/nvgpu.c \ gv100/hal_gv100.c \ gv100/gsp_gv100.c \ gv100/clk_gv100.c \ + gv100/clk_arb_gv100.c \ common/bus/bus_tu104.c \ common/fb/fb_tu104.c \ common/ltc/ltc_tu104.c \ diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c index fe24d8f72..e2b764096 100644 --- a/drivers/gpu/nvgpu/clk/clk.c +++ b/drivers/gpu/nvgpu/clk/clk.c @@ -433,7 +433,7 @@ u32 nvgpu_clk_vf_change_inject_data_fill_gp10x(struct gk20a *g, vfchange = &rpccall->params.clk_vf_change_inject; vfchange->flags = 0; vfchange->clk_list.num_domains = 3; - vfchange->clk_list.clk_domains[0].clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; + vfchange->clk_list.clk_domains[0].clk_domain = CTRL_CLK_DOMAIN_GPCCLK; vfchange->clk_list.clk_domains[0].clk_freq_khz = (u32)setfllclk->gpc2clkmhz * 1000U; vfchange->clk_list.clk_domains[0].clk_flags = 0; @@ -441,7 +441,7 @@ u32 nvgpu_clk_vf_change_inject_data_fill_gp10x(struct gk20a *g, setfllclk->current_regime_id_gpc; vfchange->clk_list.clk_domains[0].target_regime_id = setfllclk->target_regime_id_gpc; - vfchange->clk_list.clk_domains[1].clk_domain = CTRL_CLK_DOMAIN_XBAR2CLK; + vfchange->clk_list.clk_domains[1].clk_domain = CTRL_CLK_DOMAIN_XBARCLK; vfchange->clk_list.clk_domains[1].clk_freq_khz = (u32)setfllclk->xbar2clkmhz * 1000U; vfchange->clk_list.clk_domains[1].clk_flags = 0; @@ -449,7 +449,7 @@ u32 nvgpu_clk_vf_change_inject_data_fill_gp10x(struct gk20a *g, setfllclk->current_regime_id_xbar; vfchange->clk_list.clk_domains[1].target_regime_id = setfllclk->target_regime_id_xbar; - vfchange->clk_list.clk_domains[2].clk_domain = CTRL_CLK_DOMAIN_SYS2CLK; + vfchange->clk_list.clk_domains[2].clk_domain = CTRL_CLK_DOMAIN_SYSCLK; vfchange->clk_list.clk_domains[2].clk_freq_khz = (u32)setfllclk->sys2clkmhz * 1000U; vfchange->clk_list.clk_domains[2].clk_flags = 0; @@ -630,32 +630,32 @@ int clk_set_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk) int status = -EINVAL; /*set regime ids */ - status = get_regime_id(g, CTRL_CLK_DOMAIN_GPC2CLK, + status = get_regime_id(g, CTRL_CLK_DOMAIN_GPCCLK, &setfllclk->current_regime_id_gpc); if (status != 0) { goto done; } setfllclk->target_regime_id_gpc = find_regime_id(g, - CTRL_CLK_DOMAIN_GPC2CLK, setfllclk->gpc2clkmhz); + CTRL_CLK_DOMAIN_GPCCLK, setfllclk->gpc2clkmhz); - status = get_regime_id(g, CTRL_CLK_DOMAIN_SYS2CLK, + status = get_regime_id(g, CTRL_CLK_DOMAIN_SYSCLK, &setfllclk->current_regime_id_sys); if (status != 0) { goto done; } setfllclk->target_regime_id_sys = find_regime_id(g, - CTRL_CLK_DOMAIN_SYS2CLK, setfllclk->sys2clkmhz); + CTRL_CLK_DOMAIN_SYSCLK, setfllclk->sys2clkmhz); - status = get_regime_id(g, CTRL_CLK_DOMAIN_XBAR2CLK, + status = get_regime_id(g, CTRL_CLK_DOMAIN_XBARCLK, &setfllclk->current_regime_id_xbar); if (status != 0) { goto done; } setfllclk->target_regime_id_xbar = find_regime_id(g, - CTRL_CLK_DOMAIN_XBAR2CLK, setfllclk->xbar2clkmhz); + CTRL_CLK_DOMAIN_XBARCLK, setfllclk->xbar2clkmhz); status = clk_pmu_vf_inject(g, setfllclk); @@ -664,19 +664,19 @@ int clk_set_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk) } /* save regime ids */ - status = set_regime_id(g, CTRL_CLK_DOMAIN_XBAR2CLK, + status = set_regime_id(g, CTRL_CLK_DOMAIN_XBARCLK, setfllclk->target_regime_id_xbar); if (status != 0) { goto done; } - status = set_regime_id(g, CTRL_CLK_DOMAIN_GPC2CLK, + status = set_regime_id(g, CTRL_CLK_DOMAIN_GPCCLK, setfllclk->target_regime_id_gpc); if (status != 0) { goto done; } - status = set_regime_id(g, CTRL_CLK_DOMAIN_SYS2CLK, + status = set_regime_id(g, CTRL_CLK_DOMAIN_SYSCLK, setfllclk->target_regime_id_sys); if (status != 0) { goto done; @@ -692,8 +692,8 @@ int clk_get_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk) u8 i; struct clk_pmupstate *pclk = g->clk_pmu; u16 clkmhz = 0; - struct clk_domain_3x_master *p3xmaster; - struct clk_domain_3x_slave *p3xslave; + struct clk_domain_35_master *p35master; + struct clk_domain_35_slave *p35slave; unsigned long slaveidxmask; if (setfllclk->gpc2clkmhz == 0U) { @@ -703,42 +703,44 @@ int clk_get_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk) BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super), struct clk_domain *, pdomain, i) { - if (pdomain->api_domain == CTRL_CLK_DOMAIN_GPC2CLK) { - + if (pdomain->api_domain == CTRL_CLK_DOMAIN_GPCCLK) { if (!pdomain->super.implements(g, &pdomain->super, - CTRL_CLK_CLK_DOMAIN_TYPE_3X_MASTER)) { + CTRL_CLK_CLK_DOMAIN_TYPE_35_MASTER)) { status = -EINVAL; goto done; } - p3xmaster = (struct clk_domain_3x_master *)pdomain; - slaveidxmask = p3xmaster->slave_idxs_mask; + p35master = (struct clk_domain_35_master *)pdomain; + slaveidxmask = p35master->master.slave_idxs_mask; for_each_set_bit(i, &slaveidxmask, 32U) { - p3xslave = (struct clk_domain_3x_slave *) + p35slave = (struct clk_domain_35_slave *) CLK_CLK_DOMAIN_GET(pclk, i); - if ((p3xslave->super.super.super.api_domain != - CTRL_CLK_DOMAIN_XBAR2CLK) && - (p3xslave->super.super.super.api_domain != - CTRL_CLK_DOMAIN_SYS2CLK)) { - continue; - } + clkmhz = 0; - status = p3xslave->clkdomainclkgetslaveclk(g, + status = p35slave->slave.clkdomainclkgetslaveclk(g, pclk, - (struct clk_domain *)p3xslave, + (struct clk_domain *)(void *)p35slave, &clkmhz, setfllclk->gpc2clkmhz); if (status != 0) { status = -EINVAL; goto done; } - if (p3xslave->super.super.super.api_domain == - CTRL_CLK_DOMAIN_XBAR2CLK) { + if (p35slave->super.super.super.super.api_domain == + CTRL_CLK_DOMAIN_XBARCLK) { setfllclk->xbar2clkmhz = clkmhz; } - if (p3xslave->super.super.super.api_domain == - CTRL_CLK_DOMAIN_SYS2CLK) { + if (p35slave->super.super.super.super.api_domain == + CTRL_CLK_DOMAIN_SYSCLK) { setfllclk->sys2clkmhz = clkmhz; } + if (p35slave->super.super.super.super.api_domain == + CTRL_CLK_DOMAIN_NVDCLK) { + setfllclk->nvdclkmhz = clkmhz; + } + if (p35slave->super.super.super.super.api_domain == + CTRL_CLK_DOMAIN_HOSTCLK) { + setfllclk->hostclkmhz = clkmhz; + } } } } @@ -914,14 +916,12 @@ int nvgpu_clk_set_boot_fll_clk_gv10x(struct gk20a *g) } voltuv = gpcclk_voltuv; - status = volt_set_voltage(g, voltuv, 0); if (status != 0) { nvgpu_err(g, "attempt to set boot voltage failed %d", voltuv); } - bootfllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPCCLK; bootfllclk.clkmhz = gpcclk_clkmhz; bootfllclk.voltuv = voltuv; @@ -929,9 +929,7 @@ int nvgpu_clk_set_boot_fll_clk_gv10x(struct gk20a *g) if (status != 0) { nvgpu_err(g, "attempt to set boot gpcclk failed"); } - status = clk_pmu_freq_effective_avg_load(g, true); - /* * Read clocks after some delay with below method * & extract clock data from buffer @@ -1053,19 +1051,35 @@ int nvgpu_clk_set_boot_fll_clk_tu10x(struct gk20a *g) return status; } -int clk_domain_freq_to_volt( - struct gk20a *g, - u8 clkdomain_idx, - u32 *pclkmhz, - u32 *pvoltuv, - u8 railidx -) +int clk_domain_volt_to_freq(struct gk20a *g, u8 clkdomain_idx, + u32 *pclkmhz, u32 *pvoltuv, u8 railidx) { struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt rpc; struct nvgpu_pmu *pmu = &g->pmu; int status = -EINVAL; - (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt )); + (void)memset(&rpc, 0, sizeof(struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt )); + rpc.volt_rail_idx = volt_rail_volt_domain_convert_to_idx(g, railidx); + rpc.clk_domain_idx = clkdomain_idx; + rpc.voltage_type = CTRL_VOLT_DOMAIN_LOGIC; + rpc.input.value = *pvoltuv; + PMU_RPC_EXECUTE_CPB(status, pmu, CLK, CLK_DOMAIN_35_PROG_VOLT_TO_FREQ, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute Freq to Volt RPC status=0x%x", + status); + } + *pclkmhz = rpc.output.value; + return status; +} + +int clk_domain_freq_to_volt(struct gk20a *g, u8 clkdomain_idx, + u32 *pclkmhz, u32 *pvoltuv, u8 railidx) +{ + struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt rpc; + struct nvgpu_pmu *pmu = &g->pmu; + int status = -EINVAL; + + (void)memset(&rpc, 0, sizeof(struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt )); rpc.volt_rail_idx = volt_rail_volt_domain_convert_to_idx(g, railidx); rpc.clk_domain_idx = clkdomain_idx; rpc.voltage_type = CTRL_VOLT_DOMAIN_LOGIC; @@ -1079,13 +1093,9 @@ int clk_domain_freq_to_volt( return status; } -int clk_domain_get_f_or_v( - struct gk20a *g, - u32 clkapidomain, - u16 *pclkmhz, - u32 *pvoltuv, - u8 railidx -) + +int clk_domain_get_f_or_v(struct gk20a *g, u32 clkapidomain, + u16 *pclkmhz, u32 *pvoltuv, u8 railidx) { int status = -EINVAL; struct clk_domain *pdomain; diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h index 52ef4ec2c..a3e3e715b 100644 --- a/drivers/gpu/nvgpu/clk/clk.h +++ b/drivers/gpu/nvgpu/clk/clk.h @@ -78,6 +78,12 @@ struct set_fll_clk { u16 xbar2clkmhz; u8 current_regime_id_xbar; u8 target_regime_id_xbar; + u16 nvdclkmhz; + u8 current_regime_id_nvd; + u8 target_regime_id_nvd; + u16 hostclkmhz; + u8 current_regime_id_host; + u8 target_regime_id_host; }; #define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_MAX_NUMCLKS 9U @@ -126,20 +132,12 @@ void clk_free_pmupstate(struct gk20a *g); int clk_pmu_vin_load(struct gk20a *g); int clk_pmu_clk_domains_load(struct gk20a *g); int clk_domain_print_vf_table(struct gk20a *g, u32 clkapidomain); -int clk_domain_get_f_or_v( - struct gk20a *g, - u32 clkapidomain, - u16 *pclkmhz, - u32 *pvoltuv, - u8 railidx -); -int clk_domain_freq_to_volt( - struct gk20a *g, - u8 clkdomain_idx, - u32 *pclkmhz, - u32 *pvoltuv, - u8 railidx -); +int clk_domain_get_f_or_v(struct gk20a *g, u32 clkapidomain, + u16 *pclkmhz, u32 *pvoltuv, u8 railidx); +int clk_domain_freq_to_volt(struct gk20a *g, u8 clkdomain_idx, + u32 *pclkmhz, u32 *pvoltuv, u8 railidx); +int clk_domain_volt_to_freq( struct gk20a *g, u8 clkdomain_idx, + u32 *pclkmhz, u32 *pvoltuv, u8 railidx); int clk_get_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk); int clk_set_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk); int clk_pmu_freq_controller_load(struct gk20a *g, bool bload, u8 bit_idx); diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c index bed98edf1..60a3afddb 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.c +++ b/drivers/gpu/nvgpu/clk/clk_arb.c @@ -112,27 +112,25 @@ int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) u32 i, j; int status = -EINVAL; - u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; - u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; u16 clk_cur; u32 num_points; - struct clk_set_info *p5_info, *p0_info; + struct clk_set_info *p0_info; table = NV_ACCESS_ONCE(arb->current_vf_table); /* make flag visible when all data has resolved in the tables */ nvgpu_smp_rmb(); - table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] : &arb->vf_table_pool[0]; /* Get allowed memory ranges */ - if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPC2CLK, + if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK, &arb->gpc2clk_min, &arb->gpc2clk_max) < 0) { nvgpu_err(g, "failed to fetch GPC2CLK range"); goto exit_vf_table; } + if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_MCLK, &arb->mclk_min, &arb->mclk_max) < 0) { @@ -142,102 +140,22 @@ int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) table->gpc2clk_num_points = MAX_F_POINTS; table->mclk_num_points = MAX_F_POINTS; - - if (g->ops.clk.clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_GPC2CLK, + if (g->ops.clk.clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_GPCCLK, &table->gpc2clk_num_points, arb->gpc2clk_f_points)) { nvgpu_err(g, "failed to fetch GPC2CLK frequency points"); goto exit_vf_table; } - - if (g->ops.clk.clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_MCLK, - &table->mclk_num_points, arb->mclk_f_points)) { - nvgpu_err(g, "failed to fetch MCLK frequency points"); - goto exit_vf_table; - } - if (!table->mclk_num_points || !table->gpc2clk_num_points) { - nvgpu_err(g, "empty queries to f points mclk %d gpc2clk %d", - table->mclk_num_points, table->gpc2clk_num_points); + if (!table->gpc2clk_num_points) { + nvgpu_err(g, "empty queries to f points gpc2clk %d", table->gpc2clk_num_points); status = -EINVAL; goto exit_vf_table; } - (void) memset(table->mclk_points, 0, - table->mclk_num_points*sizeof(struct nvgpu_clk_vf_point)); (void) memset(table->gpc2clk_points, 0, table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point)); - p5_info = pstate_get_clk_set_info(g, - CTRL_PERF_PSTATE_P5, CLKWHICH_MCLK); - if (!p5_info) { - nvgpu_err(g, "failed to get MCLK P5 info"); - goto exit_vf_table; - } p0_info = pstate_get_clk_set_info(g, - CTRL_PERF_PSTATE_P0, CLKWHICH_MCLK); - if (!p0_info) { - nvgpu_err(g, "failed to get MCLK P0 info"); - goto exit_vf_table; - } - - for (i = 0, j = 0, num_points = 0, clk_cur = 0; - i < table->mclk_num_points; i++) { - - if ((arb->mclk_f_points[i] >= arb->mclk_min) && - (arb->mclk_f_points[i] <= arb->mclk_max) && - (arb->mclk_f_points[i] != clk_cur)) { - - table->mclk_points[j].mem_mhz = arb->mclk_f_points[i]; - mclk_voltuv = mclk_voltuv_sram = 0; - - status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, - &table->mclk_points[j].mem_mhz, &mclk_voltuv, - CTRL_VOLT_DOMAIN_LOGIC); - if (status < 0) { - nvgpu_err(g, - "failed to get MCLK LOGIC voltage"); - goto exit_vf_table; - } - status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, - &table->mclk_points[j].mem_mhz, - &mclk_voltuv_sram, - CTRL_VOLT_DOMAIN_SRAM); - if (status < 0) { - nvgpu_err(g, "failed to get MCLK SRAM voltage"); - goto exit_vf_table; - } - - table->mclk_points[j].uvolt = mclk_voltuv; - table->mclk_points[j].uvolt_sram = mclk_voltuv_sram; - clk_cur = table->mclk_points[j].mem_mhz; - - if ((clk_cur >= p5_info->min_mhz) && - (clk_cur <= p5_info->max_mhz)) - VF_POINT_SET_PSTATE_SUPPORTED( - &table->mclk_points[j], - CTRL_PERF_PSTATE_P5); - if ((clk_cur >= p0_info->min_mhz) && - (clk_cur <= p0_info->max_mhz)) - VF_POINT_SET_PSTATE_SUPPORTED( - &table->mclk_points[j], - CTRL_PERF_PSTATE_P0); - - j++; - num_points++; - - } - } - table->mclk_num_points = num_points; - - p5_info = pstate_get_clk_set_info(g, - CTRL_PERF_PSTATE_P5, CLKWHICH_GPC2CLK); - if (!p5_info) { - status = -EINVAL; - nvgpu_err(g, "failed to get GPC2CLK P5 info"); - goto exit_vf_table; - } - - p0_info = pstate_get_clk_set_info(g, - CTRL_PERF_PSTATE_P0, CLKWHICH_GPC2CLK); + CTRL_PERF_PSTATE_P0, CLKWHICH_GPCCLK); if (!p0_info) { status = -EINVAL; nvgpu_err(g, "failed to get GPC2CLK P0 info"); @@ -247,7 +165,7 @@ int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) /* GPC2CLK needs to be checked in two passes. The first determines the * relationships between GPC2CLK, SYS2CLK and XBAR2CLK, while the * second verifies that the clocks minimum is satisfied and sets - * the voltages + * the voltages,the later part is done in nvgpu_clk_set_req_fll_clk_ps35 */ for (i = 0, j = 0, num_points = 0, clk_cur = 0; i < table->gpc2clk_num_points; i++) { @@ -260,6 +178,7 @@ int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) table->gpc2clk_points[j].gpc_mhz = arb->gpc2clk_f_points[i]; setfllclk.gpc2clkmhz = arb->gpc2clk_f_points[i]; + status = clk_get_fll_clks(g, &setfllclk); if (status < 0) { nvgpu_err(g, @@ -271,14 +190,13 @@ int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) setfllclk.sys2clkmhz; table->gpc2clk_points[j].xbar_mhz = setfllclk.xbar2clkmhz; + table->gpc2clk_points[j].nvd_mhz = + setfllclk.nvdclkmhz; + table->gpc2clk_points[j].host_mhz = + setfllclk.hostclkmhz; clk_cur = table->gpc2clk_points[j].gpc_mhz; - if ((clk_cur >= p5_info->min_mhz) && - (clk_cur <= p5_info->max_mhz)) - VF_POINT_SET_PSTATE_SUPPORTED( - &table->gpc2clk_points[j], - CTRL_PERF_PSTATE_P5); if ((clk_cur >= p0_info->min_mhz) && (clk_cur <= p0_info->max_mhz)) VF_POINT_SET_PSTATE_SUPPORTED( @@ -291,110 +209,6 @@ int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) } table->gpc2clk_num_points = num_points; - /* Second pass */ - for (i = 0, j = 0; i < table->gpc2clk_num_points; i++) { - - u16 alt_gpc2clk = table->gpc2clk_points[i].gpc_mhz; - - gpc2clk_voltuv = gpc2clk_voltuv_sram = 0; - - /* Check sysclk */ - p5_info = pstate_get_clk_set_info(g, - VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]), - CLKWHICH_SYS2CLK); - if (!p5_info) { - status = -EINVAL; - nvgpu_err(g, "failed to get SYS2CLK P5 info"); - goto exit_vf_table; - } - - /* sys2clk below clk min, need to find correct clock */ - if (table->gpc2clk_points[i].sys_mhz < p5_info->min_mhz) { - for (j = i + 1; j < table->gpc2clk_num_points; j++) { - - if (table->gpc2clk_points[j].sys_mhz >= - p5_info->min_mhz) { - - - table->gpc2clk_points[i].sys_mhz = - p5_info->min_mhz; - - alt_gpc2clk = alt_gpc2clk < - table->gpc2clk_points[j]. - gpc_mhz ? - table->gpc2clk_points[j]. - gpc_mhz : - alt_gpc2clk; - break; - } - } - /* no VF exists that satisfies condition */ - if (j == table->gpc2clk_num_points) { - nvgpu_err(g, "NO SYS2CLK VF point possible"); - status = -EINVAL; - goto exit_vf_table; - } - } - - /* Check xbarclk */ - p5_info = pstate_get_clk_set_info(g, - VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]), - CLKWHICH_XBAR2CLK); - if (!p5_info) { - status = -EINVAL; - nvgpu_err(g, "failed to get SYS2CLK P5 info"); - goto exit_vf_table; - } - - /* xbar2clk below clk min, need to find correct clock */ - if (table->gpc2clk_points[i].xbar_mhz < p5_info->min_mhz) { - for (j = i; j < table->gpc2clk_num_points; j++) { - if (table->gpc2clk_points[j].xbar_mhz >= - p5_info->min_mhz) { - - table->gpc2clk_points[i].xbar_mhz = - p5_info->min_mhz; - - alt_gpc2clk = alt_gpc2clk < - table->gpc2clk_points[j]. - gpc_mhz ? - table->gpc2clk_points[j]. - gpc_mhz : - alt_gpc2clk; - break; - } - } - /* no VF exists that satisfies condition */ - if (j == table->gpc2clk_num_points) { - status = -EINVAL; - nvgpu_err(g, "NO XBAR2CLK VF point possible"); - - goto exit_vf_table; - } - } - - /* Calculate voltages */ - status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, - &alt_gpc2clk, &gpc2clk_voltuv, - CTRL_VOLT_DOMAIN_LOGIC); - if (status < 0) { - nvgpu_err(g, "failed to get GPC2CLK LOGIC voltage"); - goto exit_vf_table; - } - - status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, - &alt_gpc2clk, - &gpc2clk_voltuv_sram, - CTRL_VOLT_DOMAIN_SRAM); - if (status < 0) { - nvgpu_err(g, "failed to get GPC2CLK SRAM voltage"); - goto exit_vf_table; - } - - table->gpc2clk_points[i].uvolt = gpc2clk_voltuv; - table->gpc2clk_points[i].uvolt_sram = gpc2clk_voltuv_sram; - } - /* make table visible when all data has resolved in the tables */ nvgpu_smp_wmb(); arb->current_vf_table = table; @@ -758,11 +572,10 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) { int err = 0; - if (!g->ops.clk.support_clk_freq_controller || + if (!g->ops.clk.support_clk_freq_controller && !g->ops.clk_arb.get_arbiter_clk_domains) { return 0; } - nvgpu_mutex_acquire(&g->clk_arb_enable_lock); err = g->ops.clk_arb.arbiter_clk_init(g); @@ -822,7 +635,7 @@ int nvgpu_clk_arb_init_session(struct gk20a *g, clk_arb_dbg(g, " "); - if (!g->ops.clk.support_clk_freq_controller || + if (!g->ops.clk.support_clk_freq_controller && !g->ops.clk_arb.get_arbiter_clk_domains) { return 0; } @@ -941,7 +754,7 @@ bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain) return (clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0; case NVGPU_CLK_DOMAIN_GPCCLK: - return (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) != 0; + return (clk_domains & CTRL_CLK_DOMAIN_GPCCLK) != 0; default: return false; @@ -961,11 +774,7 @@ int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, case NVGPU_CLK_DOMAIN_GPCCLK: ret = g->ops.clk_arb.get_arbiter_clk_range(g, - CTRL_CLK_DOMAIN_GPC2CLK, min_mhz, max_mhz); - if (ret == 0) { - *min_mhz /= 2; - *max_mhz /= 2; - } + CTRL_CLK_DOMAIN_GPCCLK, min_mhz, max_mhz); return ret; default: @@ -977,16 +786,13 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, u32 api_domain, u32 *max_points, u16 *fpoints) { int err; - u32 i; switch (api_domain) { case NVGPU_CLK_DOMAIN_GPCCLK: err = g->ops.clk_arb.get_arbiter_f_points(g, - CTRL_CLK_DOMAIN_GPC2CLK, max_points, fpoints); + CTRL_CLK_DOMAIN_GPCCLK, max_points, fpoints); if (err || !fpoints) return err; - for (i = 0; i < *max_points; i++) - fpoints[i] /= 2; return 0; case NVGPU_CLK_DOMAIN_MCLK: return g->ops.clk_arb.get_arbiter_f_points(g, @@ -1012,7 +818,7 @@ int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, break; case NVGPU_CLK_DOMAIN_GPCCLK: - *freq_mhz = target->gpc2clk / 2ULL; + *freq_mhz = target->gpc2clk; break; default: @@ -1039,7 +845,7 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, break; case NVGPU_CLK_DOMAIN_GPCCLK: - *freq_mhz = actual->gpc2clk / 2ULL; + *freq_mhz = actual->gpc2clk ; break; default: @@ -1054,8 +860,8 @@ unsigned long nvgpu_clk_measure_freq(struct gk20a *g, u32 api_domain) unsigned long freq = 0UL; switch (api_domain) { - case CTRL_CLK_DOMAIN_GPC2CLK: - freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) * 2UL; + case CTRL_CLK_DOMAIN_GPCCLK: + freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); break; default: break; @@ -1077,8 +883,8 @@ int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, return 0; case NVGPU_CLK_DOMAIN_GPCCLK: - *freq_mhz = g->ops.clk.measure_freq(g, - CTRL_CLK_DOMAIN_GPC2CLK) / 2000000ULL; + *freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_GPCCLK) / + 1000000ULL; return 0; default: diff --git a/drivers/gpu/nvgpu/clk/clk_domain.c b/drivers/gpu/nvgpu/clk/clk_domain.c index 099a38bd2..49c16155a 100644 --- a/drivers/gpu/nvgpu/clk/clk_domain.c +++ b/drivers/gpu/nvgpu/clk/clk_domain.c @@ -965,8 +965,9 @@ static int clkdomaingetslaveclk(struct gk20a *g, struct clk_prog *pprog = NULL; struct clk_prog_1x_master *pprog1xmaster = NULL; u8 slaveidx; + struct clk_domain_35_master *p35master; struct clk_domain_3x_master *p3xmaster; - + u32 ver = g->params.gpu_arch + g->params.gpu_impl; nvgpu_log_info(g, " "); if (pclkmhz == NULL) { @@ -977,16 +978,30 @@ static int clkdomaingetslaveclk(struct gk20a *g, return -EINVAL; } - slaveidx = BOARDOBJ_GET_IDX(pdomain); - p3xmaster = (struct clk_domain_3x_master *) - CLK_CLK_DOMAIN_GET(pclk, - ((struct clk_domain_3x_slave *) - pdomain)->master_idx); - pprog = CLK_CLK_PROG_GET(pclk, p3xmaster->super.clk_prog_idx_first); - pprog1xmaster = (struct clk_prog_1x_master *)pprog; + if(ver == NVGPU_GPUID_GV100) { + slaveidx = BOARDOBJ_GET_IDX(pdomain); + p3xmaster = (struct clk_domain_3x_master *) + CLK_CLK_DOMAIN_GET(pclk, + ((struct clk_domain_3x_slave *) + pdomain)->master_idx); + pprog = CLK_CLK_PROG_GET(pclk, p3xmaster->super.clk_prog_idx_first); + pprog1xmaster = (struct clk_prog_1x_master *)pprog; - status = pprog1xmaster->getslaveclk(g, pclk, pprog1xmaster, - slaveidx, pclkmhz, masterclkmhz); + status = pprog1xmaster->getslaveclk(g, pclk, pprog1xmaster, + slaveidx, pclkmhz, masterclkmhz); + } else { + slaveidx = BOARDOBJ_GET_IDX(pdomain); + p35master = (struct clk_domain_35_master *) + CLK_CLK_DOMAIN_GET(pclk, + ((struct clk_domain_35_slave *) + pdomain)->slave.master_idx); + + pprog = CLK_CLK_PROG_GET(pclk, p35master->master.super.clk_prog_idx_first); + pprog1xmaster = (struct clk_prog_1x_master *)pprog; + + status = pprog1xmaster->getslaveclk(g, pclk, pprog1xmaster, + slaveidx, pclkmhz, masterclkmhz); + } return status; } diff --git a/drivers/gpu/nvgpu/clk/clk_prog.c b/drivers/gpu/nvgpu/clk/clk_prog.c index f84f5be5d..f0dc32133 100644 --- a/drivers/gpu/nvgpu/clk/clk_prog.c +++ b/drivers/gpu/nvgpu/clk/clk_prog.c @@ -1673,7 +1673,7 @@ static int getfpoints_prog_1x_master pvfentry = (struct ctrl_clk_clk_prog_1x_master_vf_entry *)( (u8 *)pvfentry + ((u8)sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) * - (rail+1U))); + rail)); fpointscount = (u32)pvfentry->vf_point_idx_last - (u32)pvfentry->vf_point_idx_first + 1U; @@ -1707,10 +1707,11 @@ static int getslaveclk_prog_1x_master(struct gk20a *g, { struct clk_progs *pclkprogobjs; struct clk_prog_1x_master_ratio *p1xmasterratio; + struct clk_prog_35_master_ratio *p35masterratio; u8 slaveentrycount; u8 i; struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry *pslaveents; - + u32 ver = g->params.gpu_arch + g->params.gpu_impl; if (pclkmhz == NULL) { return -EINVAL; } @@ -1723,27 +1724,50 @@ static int getslaveclk_prog_1x_master(struct gk20a *g, pclkprogobjs = &(pclk->clk_progobjs); slaveentrycount = pclkprogobjs->slave_entry_count; - - if (p1xmaster->super.super.super.implements(g, - &p1xmaster->super.super.super, - CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO)) { - p1xmasterratio = - (struct clk_prog_1x_master_ratio *)p1xmaster; - pslaveents = p1xmasterratio->p_slave_entries; - for (i = 0; i < slaveentrycount; i++) { - if (pslaveents->clk_dom_idx == - slave_clk_domain) { - break; + if(ver == NVGPU_GPUID_GV100) { + if (p1xmaster->super.super.super.implements(g, + &p1xmaster->super.super.super, + CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO)) { + p1xmasterratio = + (struct clk_prog_1x_master_ratio *)p1xmaster; + pslaveents = p1xmasterratio->p_slave_entries; + for (i = 0; i < slaveentrycount; i++) { + if (pslaveents->clk_dom_idx == + slave_clk_domain) { + break; + } + pslaveents++; } - pslaveents++; - } - if (i == slaveentrycount) { + if (i == slaveentrycount) { + return -EINVAL; + } + *pclkmhz = (masterclkmhz * pslaveents->ratio)/100U; + } else { + /* only support ratio for now */ return -EINVAL; } - *pclkmhz = (masterclkmhz * pslaveents->ratio)/100U; } else { - /* only support ratio for now */ - return -EINVAL; + if (p1xmaster->super.super.super.implements(g, + &p1xmaster->super.super.super, + CTRL_CLK_CLK_PROG_TYPE_35_MASTER_RATIO)) { + p35masterratio = + (struct clk_prog_35_master_ratio *)p1xmaster; + pslaveents = p35masterratio->ratio.p_slave_entries; + for (i = 0; i < slaveentrycount; i++) { + if (pslaveents->clk_dom_idx == + slave_clk_domain) { + break; + } + pslaveents++; + } + if (i == slaveentrycount) { + return -EINVAL; + } + *pclkmhz = (masterclkmhz * pslaveents->ratio)/100U; + } else { + /* only support ratio for now */ + return -EINVAL; + } } return 0; } diff --git a/drivers/gpu/nvgpu/clk/clk_vf_point.c b/drivers/gpu/nvgpu/clk/clk_vf_point.c index 23d0dc134..72c976ea4 100644 --- a/drivers/gpu/nvgpu/clk/clk_vf_point.c +++ b/drivers/gpu/nvgpu/clk/clk_vf_point.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include "clk.h" #include "clk_vf_point.h" @@ -484,6 +486,210 @@ static int clk_vf_point_update(struct gk20a *g, return 0; } +int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g, struct nvgpu_clk_slave_freq *vf_point) +{ + struct nvgpu_pmu *pmu = &g->pmu; + struct nv_pmu_rpc_perf_change_seq_queue_change rpc; + struct ctrl_perf_change_seq_change_input change_input; + struct clk_domain *pclk_domain; + int status = 0; + u8 i = 0, gpcclk_domain=0; + u32 gpcclk_voltuv=0,gpcclk_clkmhz=0; + u32 max_clkmhz; + u16 max_ratio; + struct clk_set_info *p0_info; + + (void) memset(&change_input, 0, + sizeof(struct ctrl_perf_change_seq_change_input)); + BOARDOBJGRP_FOR_EACH(&(g->clk_pmu->clk_domainobjs.super.super), + struct clk_domain *, pclk_domain, i) { + + switch (pclk_domain->api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + gpcclk_domain = i; + gpcclk_clkmhz = vf_point->gpc_mhz; + + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_GPCCLK); + if(p0_info == NULL){ + nvgpu_err(g, "failed to get GPCCLK P0 info"); + break; + } + if ( vf_point->gpc_mhz < p0_info->min_mhz ) { + vf_point->gpc_mhz = p0_info->min_mhz; + } + if (vf_point->gpc_mhz > p0_info->max_mhz) { + vf_point->gpc_mhz = p0_info->max_mhz; + } + change_input.clk[i].clk_freq_khz = (u32)vf_point->gpc_mhz * 1000U; + change_input.clk_domains_mask.super.data[0] |= (u32) BIT(i); + break; + case CTRL_CLK_DOMAIN_XBARCLK: + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_XBARCLK); + if(p0_info == NULL){ + nvgpu_err(g, "failed to get XBARCLK P0 info"); + break; + } + max_ratio = (vf_point->xbar_mhz*100U)/vf_point->gpc_mhz; + if ( vf_point->xbar_mhz < p0_info->min_mhz ) { + vf_point->xbar_mhz = p0_info->min_mhz; + } + if (vf_point->xbar_mhz > p0_info->max_mhz) { + vf_point->xbar_mhz = p0_info->max_mhz; + } + change_input.clk[i].clk_freq_khz = (u32)vf_point->xbar_mhz * 1000U; + change_input.clk_domains_mask.super.data[0] |= (u32) BIT(i); + max_clkmhz = (((u32)vf_point->xbar_mhz * 100U)/ (u32)max_ratio); + if (gpcclk_clkmhz < max_clkmhz) { + gpcclk_clkmhz = max_clkmhz; + } + break; + case CTRL_CLK_DOMAIN_SYSCLK: + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_SYSCLK); + if(p0_info == NULL){ + nvgpu_err(g, "failed to get SYSCLK P0 info"); + break; + } + max_ratio = (vf_point->sys_mhz*100U)/vf_point->gpc_mhz; + if ( vf_point->sys_mhz < p0_info->min_mhz ) { + vf_point->sys_mhz = p0_info->min_mhz; + } + if (vf_point->sys_mhz > p0_info->max_mhz) { + vf_point->sys_mhz = p0_info->max_mhz; + } + change_input.clk[i].clk_freq_khz = (u32)vf_point->sys_mhz * 1000U; + change_input.clk_domains_mask.super.data[0] |= (u32) BIT(i); + max_clkmhz = (((u32)vf_point->sys_mhz * 100U)/ (u32)max_ratio); + if (gpcclk_clkmhz < max_clkmhz) { + gpcclk_clkmhz = max_clkmhz; + } + break; + case CTRL_CLK_DOMAIN_NVDCLK: + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_NVDCLK); + if(p0_info == NULL){ + nvgpu_err(g, "failed to get NVDCLK P0 info"); + break; + } + max_ratio = (vf_point->nvd_mhz*100U)/vf_point->gpc_mhz; + if ( vf_point->nvd_mhz < p0_info->min_mhz ) { + vf_point->nvd_mhz = p0_info->min_mhz; + } + if (vf_point->nvd_mhz > p0_info->max_mhz) { + vf_point->nvd_mhz = p0_info->max_mhz; + } + change_input.clk[i].clk_freq_khz = (u32)vf_point->nvd_mhz * 1000U; + change_input.clk_domains_mask.super.data[0] |= (u32) BIT(i); + max_clkmhz = (((u32)vf_point->nvd_mhz * 100U)/ (u32)max_ratio); + if (gpcclk_clkmhz < max_clkmhz) { + gpcclk_clkmhz = max_clkmhz; + } + break; + case CTRL_CLK_DOMAIN_HOSTCLK: + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_HOSTCLK); + if(p0_info == NULL){ + nvgpu_err(g, "failed to get HOSTCLK P0 info"); + break; + } + max_ratio = (vf_point->host_mhz*100U)/vf_point->gpc_mhz; + if ( vf_point->host_mhz < p0_info->min_mhz ) { + vf_point->host_mhz = p0_info->min_mhz; + } + if (vf_point->host_mhz > p0_info->max_mhz) { + vf_point->host_mhz = p0_info->max_mhz; + } + change_input.clk[i].clk_freq_khz = (u32)vf_point->host_mhz * 1000U; + change_input.clk_domains_mask.super.data[0] |= (u32) BIT(i); + max_clkmhz = (((u32)vf_point->host_mhz * 100U)/ (u32)max_ratio); + if (gpcclk_clkmhz < max_clkmhz) { + gpcclk_clkmhz = max_clkmhz; + } + break; + default: + nvgpu_pmu_dbg(g, "Fixed clock domain"); + break; + } + } + + change_input.pstate_index = 0U; + change_input.flags = (u32)CTRL_PERF_CHANGE_SEQ_CHANGE_FORCE; + change_input.vf_points_cache_counter = 0xFFFFFFFFU; + + status = clk_domain_freq_to_volt(g, gpcclk_domain, + &gpcclk_clkmhz, &gpcclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC); + gpcclk_voltuv += VMIN_PAD_UV; + change_input.volt[0].voltage_uv = gpcclk_voltuv; + change_input.volt[0].voltage_min_noise_unaware_uv = gpcclk_voltuv; + change_input.volt_rails_mask.super.data[0] = 1U; + + /* RPC to PMU to queue to execute change sequence request*/ + (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_perf_change_seq_queue_change )); + rpc.change = change_input; + rpc.change.pstate_index = 0; + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, CHANGE_SEQ_QUEUE_CHANGE, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute Change Seq RPC status=0x%x", + status); + } + + /* Wait for sync change to complete. */ + if ((rpc.change.flags & CTRL_PERF_CHANGE_SEQ_CHANGE_ASYNC) == 0U) { + nvgpu_msleep(20); + } + return status; +} + + int nvgpu_clk_arb_find_slave_points(struct nvgpu_clk_arb *arb, + struct nvgpu_clk_slave_freq *vf_point) +{ + + u16 gpc2clk_target; + struct nvgpu_clk_vf_table *table; + u32 index; + int status = 0; + do { + gpc2clk_target = vf_point->gpc_mhz; + + table = NV_ACCESS_ONCE(arb->current_vf_table); + /* pointer to table can be updated by callback */ + nvgpu_smp_rmb(); + + if (table == NULL) { + continue; + } + if ((table->gpc2clk_num_points == 0U)) { + nvgpu_err(arb->g, "found empty table"); + status = -EINVAL; ; + } + + /* round up the freq requests */ + for (index = 0; index < table->gpc2clk_num_points; index++) { + if ((table->gpc2clk_points[index].gpc_mhz >= + gpc2clk_target)) { + gpc2clk_target = + table->gpc2clk_points[index].gpc_mhz; + vf_point->sys_mhz = + table->gpc2clk_points[index].sys_mhz; + vf_point->xbar_mhz = + table->gpc2clk_points[index].xbar_mhz; + vf_point->nvd_mhz = + table->gpc2clk_points[index].nvd_mhz; + vf_point->host_mhz = + table->gpc2clk_points[index].host_mhz; + break; + } + } + vf_point->gpc_mhz = gpc2clk_target < vf_point->gpc_mhz ? gpc2clk_target : vf_point->gpc_mhz; + } while ((table == NULL) || + (NV_ACCESS_ONCE(arb->current_vf_table) != table)); + + return status; + +} + /*get latest vf point data from PMU */ int clk_vf_point_cache(struct gk20a *g) { @@ -495,36 +701,57 @@ int clk_vf_point_cache(struct gk20a *g) struct boardobj *pboardobj = NULL; struct nv_pmu_boardobj_query *pboardobjpmustatus = NULL; int status; + struct clk_vf_point *pclk_vf_point; u8 index; + u32 voltage_min_uv,voltage_step_size_uv; + u32 gpcclk_clkmhz=0, gpcclk_voltuv=0; + u32 ver = g->params.gpu_arch + g->params.gpu_impl; nvgpu_log_info(g, " "); pclk_vf_points = &g->clk_pmu->clk_vf_pointobjs; pboardobjgrp = &pclk_vf_points->super.super; pboardobjgrpmask = &pclk_vf_points->super.mask.super; - status = pboardobjgrp->pmugetstatus(g, pboardobjgrp, pboardobjgrpmask); - if (status != 0) { - nvgpu_err(g, "err getting boardobjs from pmu"); - return status; - } - pboardobjgrppmu = pboardobjgrp->pmu.getstatus.buf; - - BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct boardobj*, pboardobj, index) { - status = pboardobjgrp->pmustatusinstget(g, - (struct nv_pmu_boardobjgrp *)pboardobjgrppmu, - &pboardobjpmustatus, index); + if (ver == NVGPU_GPUID_GV100) { + status = pboardobjgrp->pmugetstatus(g, pboardobjgrp, pboardobjgrpmask); if (status != 0) { - nvgpu_err(g, "could not get status object instance"); + nvgpu_err(g, "err getting boardobjs from pmu"); return status; } + pboardobjgrppmu = pboardobjgrp->pmu.getstatus.buf; - status = clk_vf_point_update(g, pboardobj, - (struct nv_pmu_boardobj *)pboardobjpmustatus); - if (status != 0) { - nvgpu_err(g, "invalid data from pmu at %d", index); - return status; + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct boardobj*, pboardobj, index) { + status = pboardobjgrp->pmustatusinstget(g, + (struct nv_pmu_boardobjgrp *)pboardobjgrppmu, + &pboardobjpmustatus, index); + if (status != 0) { + nvgpu_err(g, "could not get status object instance"); + return status; + } + status = clk_vf_point_update(g, pboardobj, + (struct nv_pmu_boardobj *)pboardobjpmustatus); + if (status != 0) { + nvgpu_err(g, "invalid data from pmu at %d", index); + return status; + } + + } + } else { + voltage_min_uv = g->clk_pmu->avfs_fllobjs.lut_min_voltage_uv; + voltage_step_size_uv = g->clk_pmu->avfs_fllobjs.lut_step_size_uv * 2U; + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct boardobj*, pboardobj, index) { + pclk_vf_point = (struct clk_vf_point *)(void *)pboardobj; + gpcclk_voltuv = + voltage_min_uv + index * voltage_step_size_uv; + status = clk_domain_volt_to_freq(g, 0, + &gpcclk_clkmhz, &gpcclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC); + if (status != 0) { + nvgpu_err(g, "Failed to get freq for requested voltage"); + return status; + } + pclk_vf_point->pair.freq_mhz = (u16)gpcclk_clkmhz; + pclk_vf_point->pair.voltage_uv = gpcclk_voltuv; } } - - return 0; + return status; } diff --git a/drivers/gpu/nvgpu/clk/clk_vf_point.h b/drivers/gpu/nvgpu/clk/clk_vf_point.h index 7afaecf51..4c02f6723 100644 --- a/drivers/gpu/nvgpu/clk/clk_vf_point.h +++ b/drivers/gpu/nvgpu/clk/clk_vf_point.h @@ -28,9 +28,22 @@ #include #include +#define VMIN_PAD_UV 50000U + int clk_vf_point_sw_setup(struct gk20a *g); int clk_vf_point_pmu_setup(struct gk20a *g); int clk_vf_point_cache(struct gk20a *g); +struct nvgpu_clk_arb; +struct nvgpu_clk_slave_freq{ + u16 gpc_mhz; + u16 sys_mhz; + u16 xbar_mhz; + u16 host_mhz; + u16 nvd_mhz; +}; + +int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g, struct nvgpu_clk_slave_freq *vf_point); +int nvgpu_clk_arb_find_slave_points(struct nvgpu_clk_arb *arb,struct nvgpu_clk_slave_freq *vf_point); struct clk_vf_points { struct boardobjgrp_e255 super; diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c index ae17d4601..da1b63547 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c @@ -1329,8 +1329,7 @@ int nvgpu_init_pmu_fw_ver_ops(struct nvgpu_pmu *pmu) g->ops.pmu_ver.clk.clk_set_boot_clk = nvgpu_clk_set_boot_fll_clk_gv10x; } else { - g->ops.pmu_ver.clk.clk_set_boot_clk = - nvgpu_clk_set_boot_fll_clk_tu10x; + g->ops.pmu_ver.clk.clk_set_boot_clk = NULL; } } else { g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params = diff --git a/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c b/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c index 5b937cbf0..695cc6ed4 100644 --- a/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c @@ -29,33 +29,20 @@ u32 gp10b_get_arbiter_clk_domains(struct gk20a *g) { (void)g; clk_arb_dbg(g, " "); - return CTRL_CLK_DOMAIN_GPC2CLK; + return CTRL_CLK_DOMAIN_GPCCLK; } int gp10b_get_arbiter_f_points(struct gk20a *g,u32 api_domain, u32 *num_points, u16 *freqs_in_mhz) { int ret = 0; - u32 i; - bool is_freq_list_available = false; - - if (*num_points != 0U) { - is_freq_list_available = true; - } clk_arb_dbg(g, " "); switch (api_domain) { - case CTRL_CLK_DOMAIN_GPC2CLK: + case CTRL_CLK_DOMAIN_GPCCLK: ret = g->ops.clk.clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPCCLK, num_points, freqs_in_mhz); - - /* multiply by 2 for GPC2CLK */ - if (ret == 0 && is_freq_list_available) { - for (i = 0U; i < *num_points; i++) { - freqs_in_mhz[i] *= 2U; - } - } break; default: ret = -EINVAL; @@ -73,14 +60,9 @@ int gp10b_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, clk_arb_dbg(g, " "); switch (api_domain) { - case CTRL_CLK_DOMAIN_GPC2CLK: + case CTRL_CLK_DOMAIN_GPCCLK: ret = g->ops.clk.get_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK, min_mhz, max_mhz); - - if (ret == 0) { - *min_mhz *= 2U; - *max_mhz *= 2U; - } break; default: @@ -100,7 +82,7 @@ int gp10b_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, clk_arb_dbg(g, " "); switch (api_domain) { - case CTRL_CLK_DOMAIN_GPC2CLK: + case CTRL_CLK_DOMAIN_GPCCLK: ret = gp10b_get_arbiter_clk_range(g, api_domain, &min_mhz, &max_mhz); @@ -168,7 +150,7 @@ int gp10b_init_clk_arbiter(struct gk20a *g) arb->g = g; err = g->ops.clk_arb.get_arbiter_clk_default(g, - CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz); + CTRL_CLK_DOMAIN_GPCCLK, &default_mhz); if (err < 0) { err = -EINVAL; goto init_fail; @@ -176,7 +158,7 @@ int gp10b_init_clk_arbiter(struct gk20a *g) arb->gpc2clk_default_mhz = default_mhz; - err = g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPC2CLK, + err = g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK, &arb->gpc2clk_min, &arb->gpc2clk_max); if (err < 0) { @@ -325,11 +307,8 @@ void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb) nvgpu_mutex_acquire(&arb->pstate_lock); - /* get the rounded_rate in terms of Hz for igpu - * pass (gpcclk) freq = (gpc2clk) freq / 2 - */ status = g->ops.clk.clk_get_round_rate(g, - CTRL_CLK_DOMAIN_GPCCLK, ((unsigned long)gpc2clk_session_target / 2UL) * 1000000UL, &rounded_rate); + CTRL_CLK_DOMAIN_GPCCLK, gpc2clk_session_target * 1000000UL, &rounded_rate); clk_arb_dbg(g, "rounded_rate: %lu\n", rounded_rate); diff --git a/drivers/gpu/nvgpu/gv100/clk_arb_gv100.c b/drivers/gpu/nvgpu/gv100/clk_arb_gv100.c new file mode 100644 index 000000000..d834160a3 --- /dev/null +++ b/drivers/gpu/nvgpu/gv100/clk_arb_gv100.c @@ -0,0 +1,739 @@ +/* + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include "clk_arb_gv100.h" + +u32 gv100_get_arbiter_clk_domains(struct gk20a *g) +{ + (void)g; + return (CTRL_CLK_DOMAIN_GPCCLK); +} + +int gv100_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz) +{ + return g->ops.clk.clk_domain_get_f_points(g, + api_domain, num_points, freqs_in_mhz); +} + +int gv100_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz) +{ + u32 clkwhich; + struct clk_set_info *p0_info; + struct avfsfllobjs *pfllobjs = &(g->clk_pmu->avfs_fllobjs); + u16 limit_min_mhz; + bool error_status = false; + + switch (api_domain) { + case CTRL_CLK_DOMAIN_MCLK: + clkwhich = CLKWHICH_MCLK; + break; + + case CTRL_CLK_DOMAIN_GPCCLK: + clkwhich = CLKWHICH_GPCCLK; + break; + + default: + error_status = true; + break; + } + + if (error_status == true) { + return -EINVAL; + } + + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, clkwhich); + if (p0_info == NULL) { + return -EINVAL; + } + + limit_min_mhz = p0_info->min_mhz; + /* WAR for DVCO min */ + if (api_domain == CTRL_CLK_DOMAIN_GPCCLK) { + if ((pfllobjs->max_min_freq_mhz != 0U) && + (pfllobjs->max_min_freq_mhz >= limit_min_mhz)) { + limit_min_mhz = pfllobjs->max_min_freq_mhz + 1U; + } + } + *min_mhz = limit_min_mhz; + *max_mhz = p0_info->max_mhz; + + return 0; +} + +int gv100_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, + u16 *default_mhz) +{ + u32 clkwhich; + struct clk_set_info *p0_info; + bool error_status = false; + + switch (api_domain) { + case CTRL_CLK_DOMAIN_MCLK: + clkwhich = CLKWHICH_MCLK; + break; + + case CTRL_CLK_DOMAIN_GPCCLK: + clkwhich = CLKWHICH_GPCCLK; + break; + + default: + error_status = true; + break; + } + + if (error_status == true) { + return -EINVAL; + } + + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, clkwhich); + if (p0_info == NULL) { + return -EINVAL; + } + + *default_mhz = p0_info->max_mhz; + return 0; +} + +int gv100_init_clk_arbiter(struct gk20a *g) +{ + struct nvgpu_clk_arb *arb; + u16 default_mhz; + int err; + int index; + struct nvgpu_clk_vf_table *table; + clk_arb_dbg(g, " "); + + if (g->clk_arb != NULL) { + return 0; + } + arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb)); + if (arb == NULL) { + return -ENOMEM; + } + + err = nvgpu_mutex_init(&arb->pstate_lock); + if (err != 0) { + goto mutex_fail; + } + nvgpu_spinlock_init(&arb->sessions_lock); + nvgpu_spinlock_init(&arb->users_lock); + nvgpu_spinlock_init(&arb->requests_lock); + + arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); + if (arb->mclk_f_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + + arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); + if (arb->gpc2clk_f_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + + for (index = 0; index < 2; index++) { + table = &arb->vf_table_pool[index]; + table->gpc2clk_num_points = MAX_F_POINTS; + table->mclk_num_points = MAX_F_POINTS; + + table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point)); + if (table->gpc2clk_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + + + table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point)); + if (table->mclk_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + } + + g->clk_arb = arb; + arb->g = g; + + err = g->ops.clk_arb.get_arbiter_clk_default(g, + CTRL_CLK_DOMAIN_MCLK, &default_mhz); + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->mclk_default_mhz = default_mhz; + + err = g->ops.clk_arb.get_arbiter_clk_default(g, + CTRL_CLK_DOMAIN_GPCCLK, &default_mhz); + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->gpc2clk_default_mhz = default_mhz; + + arb->actual = &arb->actual_pool[0]; + + nvgpu_atomic_set(&arb->req_nr, 0); + + nvgpu_atomic64_set(&arb->alarm_mask, 0); + err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue, + DEFAULT_EVENT_NUMBER); + if (err < 0) { + goto init_fail; + } + nvgpu_init_list_node(&arb->users); + nvgpu_init_list_node(&arb->sessions); + nvgpu_init_list_node(&arb->requests); + + (void)nvgpu_cond_init(&arb->request_wq); + + nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item); + nvgpu_init_list_node(&arb->update_arb_work_item.worker_item); + arb->update_vf_table_work_item.arb = arb; + arb->update_arb_work_item.arb = arb; + arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE; + arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB; + err = nvgpu_clk_arb_worker_init(g); + if (err < 0) { + goto init_fail; + } + +#ifdef CONFIG_DEBUG_FS + arb->debug = &arb->debug_pool[0]; + + if (!arb->debugfs_set) { + if (nvgpu_clk_arb_debugfs_init(g)) + arb->debugfs_set = true; + } +#endif + err = clk_vf_point_cache(g); + if (err < 0) { + goto init_fail; + } + + err = nvgpu_clk_arb_update_vf_table(arb); + if (err < 0) { + goto init_fail; + } + + do { + /* Check that first run is completed */ + nvgpu_smp_mb(); + NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq, + nvgpu_atomic_read(&arb->req_nr), 0); + } while (nvgpu_atomic_read(&arb->req_nr) == 0); + return arb->status; + +init_fail: + nvgpu_kfree(g, arb->gpc2clk_f_points); + nvgpu_kfree(g, arb->mclk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points); + nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); + } + + nvgpu_mutex_destroy(&arb->pstate_lock); + +mutex_fail: + nvgpu_kfree(g, arb); + + return err; +} + +static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, + u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk, + u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram) +{ + u16 gpc2clk_target, mclk_target; + u32 gpc2clk_voltuv, gpc2clk_voltuv_sram; + u32 mclk_voltuv, mclk_voltuv_sram; + u32 current_pstate = VF_POINT_INVALID_PSTATE; + struct nvgpu_clk_vf_table *table; + u32 index, index_mclk; + struct nvgpu_clk_vf_point *mclk_vf = NULL; + + do { + gpc2clk_target = *gpc2clk; + mclk_target = *mclk; + gpc2clk_voltuv = 0; + gpc2clk_voltuv_sram = 0; + mclk_voltuv = 0; + mclk_voltuv_sram = 0; + + table = NV_ACCESS_ONCE(arb->current_vf_table); + /* pointer to table can be updated by callback */ + nvgpu_smp_rmb(); + + if (table == NULL) { + continue; + } + if ((table->gpc2clk_num_points == 0U) || (table->mclk_num_points == 0U)) { + nvgpu_err(arb->g, "found empty table"); + goto find_exit; + } + /* First we check MCLK to find out which PSTATE we are + * are requesting, and from there try to find the minimum + * GPC2CLK on the same PSTATE that satisfies the request. + * If no GPC2CLK can be found, then we need to up the PSTATE + */ + +recalculate_vf_point: + for (index = 0; index < table->mclk_num_points; index++) { + if (table->mclk_points[index].mem_mhz >= mclk_target) { + mclk_vf = &table->mclk_points[index]; + break; + } + } + if (index == table->mclk_num_points) { + mclk_vf = &table->mclk_points[index-1U]; + index = table->mclk_num_points - 1U; + } + index_mclk = index; + + /* round up the freq requests */ + for (index = 0; index < table->gpc2clk_num_points; index++) { + current_pstate = VF_POINT_COMMON_PSTATE( + &table->gpc2clk_points[index], mclk_vf); + + if ((table->gpc2clk_points[index].gpc_mhz >= + gpc2clk_target) && + (current_pstate != VF_POINT_INVALID_PSTATE)) { + gpc2clk_target = + table->gpc2clk_points[index].gpc_mhz; + *sys2clk = + table->gpc2clk_points[index].sys_mhz; + *xbar2clk = + table->gpc2clk_points[index].xbar_mhz; + + gpc2clk_voltuv = + table->gpc2clk_points[index].uvolt; + gpc2clk_voltuv_sram = + table->gpc2clk_points[index].uvolt_sram; + break; + } + } + + if (index == table->gpc2clk_num_points) { + current_pstate = VF_POINT_COMMON_PSTATE( + &table->gpc2clk_points[index-1U], mclk_vf); + if (current_pstate != VF_POINT_INVALID_PSTATE) { + gpc2clk_target = + table->gpc2clk_points[index-1U].gpc_mhz; + *sys2clk = + table->gpc2clk_points[index-1U].sys_mhz; + *xbar2clk = + table->gpc2clk_points[index-1U].xbar_mhz; + + gpc2clk_voltuv = + table->gpc2clk_points[index-1U].uvolt; + gpc2clk_voltuv_sram = + table->gpc2clk_points[index-1U]. + uvolt_sram; + } else if (index_mclk >= table->mclk_num_points - 1U) { + /* There is no available combination of MCLK + * and GPC2CLK, we need to fail this + */ + gpc2clk_target = 0; + mclk_target = 0; + current_pstate = VF_POINT_INVALID_PSTATE; + goto find_exit; + } else { + /* recalculate with higher PSTATE */ + gpc2clk_target = *gpc2clk; + mclk_target = table->mclk_points[index_mclk + 1U]. + mem_mhz; + goto recalculate_vf_point; + } + } + + mclk_target = mclk_vf->mem_mhz; + mclk_voltuv = mclk_vf->uvolt; + mclk_voltuv_sram = mclk_vf->uvolt_sram; + + } while ((table == NULL) || + (NV_ACCESS_ONCE(arb->current_vf_table) != table)); + +find_exit: + *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; + *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ? + gpc2clk_voltuv_sram : mclk_voltuv_sram; + /* noise unaware vmin */ + *nuvmin = mclk_voltuv; + *nuvmin_sram = mclk_voltuv_sram; + *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk; + *mclk = mclk_target; + return (u8)current_pstate; +} + +static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, + u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv, + u32 voltuv_sram) +{ + struct set_fll_clk fllclk; + struct nvgpu_clk_arb *arb = g->clk_arb; + int status; + + fllclk.gpc2clkmhz = gpc2clk_target; + fllclk.sys2clkmhz = sys2clk_target; + fllclk.xbar2clkmhz = xbar2clk_target; + + fllclk.voltuv = voltuv; + + /* if voltage ascends we do: + * (1) FLL change + * (2) Voltage change + * (3) MCLK change + * If it goes down + * (1) MCLK change + * (2) Voltage change + * (3) FLL change + */ + + /* descending */ + if (voltuv < arb->voltuv_actual) { + if (g->ops.clk.mclk_change != NULL) { + status = g->ops.clk.mclk_change(g, mclk_target); + if (status < 0) { + return status; + } + } + status = volt_set_voltage(g, voltuv, voltuv_sram); + if (status < 0) { + return status; + } + + status = clk_set_fll_clks(g, &fllclk); + if (status < 0) { + return status; + } + } else { + status = clk_set_fll_clks(g, &fllclk); + if (status < 0) { + return status; + } + status = volt_set_voltage(g, voltuv, voltuv_sram); + if (status < 0) { + return status; + } + if (g->ops.clk.mclk_change != NULL) { + status = g->ops.clk.mclk_change(g, mclk_target); + if (status < 0) { + return status; + } + } + } + return 0; +} + +void gv100_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb) +{ + struct nvgpu_clk_session *session; + struct nvgpu_clk_dev *dev; + struct nvgpu_clk_dev *tmp; + struct nvgpu_clk_arb_target *target, *actual; + struct gk20a *g = arb->g; + + u32 current_pstate = VF_POINT_INVALID_PSTATE; + u32 voltuv=0, voltuv_sram; + bool mclk_set, gpc2clk_set; + u32 nuvmin, nuvmin_sram; + u32 alarms_notified = 0; + u32 current_alarm; + int status = 0; + u32 ver = g->params.gpu_arch + g->params.gpu_impl; + /* Temporary variables for checking target frequency */ + u16 gpc2clk_target, sys2clk_target, xbar2clk_target, mclk_target; + u16 gpc2clk_session_target, mclk_session_target; + struct nvgpu_clk_slave_freq vf_point; + +#ifdef CONFIG_DEBUG_FS + u64 t0, t1; + struct nvgpu_clk_arb_debug *debug; + +#endif + + clk_arb_dbg(g, " "); + + /* bail out if gpu is down */ + if (nvgpu_atomic64_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST)) { + goto exit_arb; + } + +#ifdef CONFIG_DEBUG_FS + g->ops.ptimer.read_ptimer(g, &t0); +#endif + + /* Only one arbiter should be running */ + gpc2clk_target = 0; + mclk_target = 0; + nvgpu_spinlock_acquire(&arb->sessions_lock); + nvgpu_list_for_each_entry(session, &arb->sessions, + nvgpu_clk_session, link) { + if (!session->zombie) { + mclk_set = false; + gpc2clk_set = false; + target = (session->target == &session->target_pool[0] ? + &session->target_pool[1] : + &session->target_pool[0]); + nvgpu_spinlock_acquire(&session->session_lock); + if (!nvgpu_list_empty(&session->targets)) { + /* Copy over state */ + target->mclk = session->target->mclk; + target->gpc2clk = session->target->gpc2clk; + /* Query the latest committed request */ + nvgpu_list_for_each_entry_safe(dev, tmp, + &session->targets, nvgpu_clk_dev, node) { + if ((mclk_set == false) && (dev->mclk_target_mhz != 0U)) { + target->mclk = + dev->mclk_target_mhz; + mclk_set = true; + } + if ((gpc2clk_set == false) && + (dev->gpc2clk_target_mhz != 0U)) { + target->gpc2clk = + dev->gpc2clk_target_mhz; + gpc2clk_set = true; + } + nvgpu_ref_get(&dev->refcount); + nvgpu_list_del(&dev->node); + nvgpu_spinlock_acquire( + &arb->requests_lock); + nvgpu_list_add( + &dev->node, &arb->requests); + nvgpu_spinlock_release(&arb->requests_lock); + } + session->target = target; + } + nvgpu_spinlock_release( + &session->session_lock); + + mclk_target = mclk_target > session->target->mclk ? + mclk_target : session->target->mclk; + + gpc2clk_target = + gpc2clk_target > session->target->gpc2clk ? + gpc2clk_target : session->target->gpc2clk; + } + } + nvgpu_spinlock_release(&arb->sessions_lock); + + gpc2clk_target = (gpc2clk_target > 0U) ? gpc2clk_target : + arb->gpc2clk_default_mhz; + + if (gpc2clk_target < arb->gpc2clk_min) { + gpc2clk_target = arb->gpc2clk_min; + } + + if (gpc2clk_target > arb->gpc2clk_max) { + gpc2clk_target = arb->gpc2clk_max; + } + + mclk_target = (mclk_target > 0U) ? mclk_target : + arb->mclk_default_mhz; + + if (mclk_target < arb->mclk_min) { + mclk_target = arb->mclk_min; + } + + if (mclk_target > arb->mclk_max) { + mclk_target = arb->mclk_max; + } + + sys2clk_target = 0; + xbar2clk_target = 0; + + gpc2clk_session_target = gpc2clk_target; + mclk_session_target = mclk_target; + + if (ver == NVGPU_GPUID_GV100) { + /* Query the table for the closest vf point to program */ + current_pstate = (u8)nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target, + &sys2clk_target, &xbar2clk_target, &mclk_target, &voltuv, + &voltuv_sram, &nuvmin, &nuvmin_sram); + + if ((gpc2clk_target < gpc2clk_session_target) || + (mclk_target < mclk_session_target)) { + nvgpu_clk_arb_set_global_alarm(g, + EVENT(ALARM_TARGET_VF_NOT_POSSIBLE)); + } + + if ((arb->actual->gpc2clk == gpc2clk_target) && + (arb->actual->mclk == mclk_target) && + (arb->voltuv_actual == voltuv)) { + goto exit_arb; + } + + /* Program clocks */ + /* A change in both mclk of gpc2clk may require a change in voltage */ + + nvgpu_mutex_acquire(&arb->pstate_lock); + + status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target, + sys2clk_target, xbar2clk_target, mclk_target, voltuv, + voltuv_sram); + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + /* make status visible */ + nvgpu_smp_mb(); + goto exit_arb; + } + } + else { + vf_point.gpc_mhz=gpc2clk_target; + (void)nvgpu_clk_arb_find_slave_points(arb, &vf_point); + if (status != 0) { + nvgpu_err(g, "Unable to get slave frequency"); + goto exit_arb; + } + + status = nvgpu_clk_set_req_fll_clk_ps35(g, &vf_point); + if (status != 0) { + nvgpu_err(g, "Unable to program frequency"); + goto exit_arb; + } + } + actual = NV_ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ? + &arb->actual_pool[1] : &arb->actual_pool[0]; + + /* do not reorder this pointer */ + nvgpu_smp_rmb(); + actual->gpc2clk = gpc2clk_target; + actual->mclk = mclk_target; + arb->voltuv_actual = voltuv; + actual->pstate = current_pstate; + arb->status = status; + + /* Make changes visible to other threads */ + nvgpu_smp_wmb(); + arb->actual = actual; + + /* status must be visible before atomic inc */ + nvgpu_smp_wmb(); + nvgpu_atomic_inc(&arb->req_nr); + + if (ver == NVGPU_GPUID_GV100) { + /* Unlock pstate change for PG */ + nvgpu_mutex_release(&arb->pstate_lock); + } + + /* VF Update complete */ + nvgpu_clk_arb_set_global_alarm(g, EVENT(VF_UPDATE)); + + (void)nvgpu_cond_signal_interruptible(&arb->request_wq); +#ifdef CONFIG_DEBUG_FS + g->ops.ptimer.read_ptimer(g, &t1); + + debug = arb->debug == &arb->debug_pool[0] ? + &arb->debug_pool[1] : &arb->debug_pool[0]; + + memcpy(debug, arb->debug, sizeof(arb->debug_pool[0])); + debug->switch_num++; + + if (debug->switch_num == 1) { + debug->switch_max = debug->switch_min = + debug->switch_avg = (t1-t0)/1000; + debug->switch_std = 0; + } else { + s64 prev_avg; + s64 curr = (t1-t0)/1000; + + debug->switch_max = curr > debug->switch_max ? + curr : debug->switch_max; + debug->switch_min = debug->switch_min ? + (curr < debug->switch_min ? + curr : debug->switch_min) : curr; + prev_avg = debug->switch_avg; + debug->switch_avg = (curr + + (debug->switch_avg * (debug->switch_num-1))) / + debug->switch_num; + debug->switch_std += + (curr - debug->switch_avg) * (curr - prev_avg); + } + /* commit changes before exchanging debug pointer */ + nvgpu_smp_wmb(); + arb->debug = debug; +#endif + +exit_arb: + if (status < 0) { + nvgpu_err(g, "Error in arbiter update"); + nvgpu_clk_arb_set_global_alarm(g, + EVENT(ALARM_CLOCK_ARBITER_FAILED)); + } + + current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask); + /* notify completion for all requests */ + nvgpu_spinlock_acquire(&arb->requests_lock); + nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests, + nvgpu_clk_dev, node) { + nvgpu_atomic_set(&dev->poll_mask, + NVGPU_POLLIN | NVGPU_POLLRDNORM); + nvgpu_clk_arb_event_post_event(dev); + nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); + nvgpu_list_del(&dev->node); + } + nvgpu_spinlock_release(&arb->requests_lock); + + nvgpu_atomic_set(&arb->notification_queue.head, + nvgpu_atomic_read(&arb->notification_queue.tail)); + /* notify event for all users */ + nvgpu_spinlock_acquire(&arb->users_lock); + nvgpu_list_for_each_entry(dev, &arb->users, nvgpu_clk_dev, link) { + alarms_notified |= + nvgpu_clk_arb_notify(dev, arb->actual, current_alarm); + } + nvgpu_spinlock_release(&arb->users_lock); + + /* clear alarms */ + nvgpu_clk_arb_clear_global_alarm(g, alarms_notified & + ~EVENT(ALARM_GPU_LOST)); +} + +void gv100_clk_arb_cleanup(struct nvgpu_clk_arb *arb) +{ + struct gk20a *g = arb->g; + int index; + + nvgpu_kfree(g, arb->gpc2clk_f_points); + nvgpu_kfree(g, arb->mclk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, + arb->vf_table_pool[index].gpc2clk_points); + nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); + } + + nvgpu_mutex_destroy(&g->clk_arb->pstate_lock); + nvgpu_kfree(g, g->clk_arb); + + g->clk_arb = NULL; +} diff --git a/drivers/gpu/nvgpu/gv100/clk_arb_gv100.h b/drivers/gpu/nvgpu/gv100/clk_arb_gv100.h new file mode 100644 index 000000000..e9dd415e9 --- /dev/null +++ b/drivers/gpu/nvgpu/gv100/clk_arb_gv100.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef CLK_ARB_GV100_H +#define CLK_ARB_GV100_H + +struct nvgpu_clk_session; +struct nvgpu_clk_arb; + + +u32 gv100_get_arbiter_clk_domains(struct gk20a *g); +int gv100_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz); +int gv100_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz); +int gv100_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, + u16 *default_mhz); +int gv100_init_clk_arbiter(struct gk20a *g); +void gv100_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb); +void gv100_clk_arb_cleanup(struct nvgpu_clk_arb *arb); + +#endif /* CLK_ARB_GV100_H */ diff --git a/drivers/gpu/nvgpu/gv100/clk_gv100.c b/drivers/gpu/nvgpu/gv100/clk_gv100.c index 33778b5a0..cdf59c4f9 100644 --- a/drivers/gpu/nvgpu/gv100/clk_gv100.c +++ b/drivers/gpu/nvgpu/gv100/clk_gv100.c @@ -83,7 +83,7 @@ unsigned long gv100_clk_measure_freq(struct gk20a *g, u32 api_domain) } /* Convert to HZ */ - return freq_khz * 1000UL; + return (freq_khz * 1000UL); } int gv100_init_clk_support(struct gk20a *g) @@ -173,6 +173,7 @@ u32 gv100_get_rate_cntr(struct gk20a *g, struct namemap_cfg *c) { u32 cntr = 0; u64 cntr_start = 0; u64 cntr_stop = 0; + u64 start_time, stop_time; struct clk_gk20a *clk = &g->clk; @@ -188,17 +189,48 @@ u32 gv100_get_rate_cntr(struct gk20a *g, struct namemap_cfg *c) { /* Counter is 36bits , 32 bits on addr[0] and 4 lsb on addr[1] others zero*/ cntr_start = (u64)gk20a_readl(g, c->cntr.reg_cntr_addr[0]); cntr_start += ((u64)gk20a_readl(g, c->cntr.reg_cntr_addr[1]) << 32); + start_time = (u64)nvgpu_current_time_ms(); nvgpu_udelay(XTAL_CNTR_DELAY); - cntr_stop = (u64) gk20a_readl(g, c->cntr.reg_cntr_addr[0]); + stop_time = (u64)nvgpu_current_time_ms(); + cntr_stop = (u64)gk20a_readl(g, c->cntr.reg_cntr_addr[0]); cntr_stop += ((u64)gk20a_readl(g, c->cntr.reg_cntr_addr[1]) << 32); - /*Calculate the difference and convert to KHz*/ - cntr = (u32)((cntr_stop - cntr_start) / 10ULL); + /*Calculate the difference with Acutal time and convert to KHz*/ + cntr = ((u32)(cntr_stop - cntr_start) / (u32)(stop_time-start_time)); nvgpu_mutex_release(&clk->clk_mutex); return cntr; } +int gv100_clk_domain_get_f_points( + struct gk20a *g, + u32 clkapidomain, + u32 *pfpointscount, + u16 *pfreqpointsinmhz) +{ + int status = -EINVAL; + struct clk_domain *pdomain; + u8 i; + struct clk_pmupstate *pclk = g->clk_pmu; + if (pfpointscount == NULL) { + return -EINVAL; + } + + if ((pfreqpointsinmhz == NULL) && (*pfpointscount != 0U)) { + return -EINVAL; + } + BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super), + struct clk_domain *, pdomain, i) { + if (pdomain->api_domain == clkapidomain) { + status = pdomain->clkdomainclkgetfpoints(g, pclk, + pdomain, pfpointscount, + pfreqpointsinmhz, + CLK_PROG_VFE_ENTRY_LOGIC); + return status; + } + } + return status; +} int gv100_suspend_clk_support(struct gk20a *g) { nvgpu_mutex_destroy(&g->clk.clk_mutex); diff --git a/drivers/gpu/nvgpu/gv100/clk_gv100.h b/drivers/gpu/nvgpu/gv100/clk_gv100.h index fd3229ab6..5455464f3 100644 --- a/drivers/gpu/nvgpu/gv100/clk_gv100.h +++ b/drivers/gpu/nvgpu/gv100/clk_gv100.h @@ -30,5 +30,9 @@ int gv100_init_clk_support(struct gk20a *g); u32 gv100_crystal_clk_hz(struct gk20a *g); unsigned long gv100_clk_measure_freq(struct gk20a *g, u32 api_domain); int gv100_suspend_clk_support(struct gk20a *g); - +int gv100_clk_domain_get_f_points( + struct gk20a *g, + u32 clkapidomain, + u32 *pfpointscount, + u16 *pfreqpointsinmhz); #endif /* CLK_GV100_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h index 7ccafe204..f1ece0f9b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h +++ b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h @@ -138,6 +138,8 @@ struct nvgpu_clk_vf_point { u16 gpc_mhz; u16 sys_mhz; u16 xbar_mhz; + u16 host_mhz; + u16 nvd_mhz; }; u16 mem_mhz; }; diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c index 51aadce7e..d8af9a768 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c @@ -480,7 +480,7 @@ int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, break; case NVGPU_CLK_DOMAIN_GPCCLK: - dev->gpc2clk_target_mhz = target_mhz * 2ULL; + dev->gpc2clk_target_mhz = target_mhz; break; default: @@ -497,7 +497,7 @@ u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); u32 api_domains = 0; - if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) + if (clk_domains & CTRL_CLK_DOMAIN_GPCCLK) api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK); if (clk_domains & CTRL_CLK_DOMAIN_MCLK) diff --git a/drivers/gpu/nvgpu/pmu_perf/perf_tu104.c b/drivers/gpu/nvgpu/pmu_perf/perf_tu104.c index 133d353f8..c5c4a0dd0 100644 --- a/drivers/gpu/nvgpu/pmu_perf/perf_tu104.c +++ b/drivers/gpu/nvgpu/pmu_perf/perf_tu104.c @@ -25,7 +25,7 @@ #include #include #include - +#include #include "perf_tu104.h" #include "pmu_perf/pmu_perf.h" @@ -68,6 +68,7 @@ static int tu104_pmu_handle_perf_event(struct gk20a *g, void *pmumsg) case NV_PMU_PERF_MSG_ID_VFE_CALLBACK: perf_pmu->vfe_init.state_change = true; (void) nvgpu_cond_signal(&perf_pmu->vfe_init.wq); + nvgpu_clk_arb_schedule_vf_table_update(g); break; case NV_PMU_PERF_MSG_ID_CHANGE_SEQ_COMPLETION: nvgpu_log_fn(g, "Change Seq Completed"); diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 8ee425075..cfb8b2ebb 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -114,6 +114,7 @@ #include "gv100/gr_gv100.h" #include "gv100/mm_gv100.h" #include "gv100/regops_gv100.h" +#include "gv100/clk_arb_gv100.h" #include "pmu_perf/perf_tu104.h" #include "tu104/fifo_tu104.h" @@ -942,12 +943,17 @@ static const struct gpu_ops tu104_ops = { .measure_freq = gv100_clk_measure_freq, .suspend_clk_support = gv100_suspend_clk_support, .perf_pmu_vfe_load = tu104_perf_pmu_vfe_load, + .clk_domain_get_f_points = gv100_clk_domain_get_f_points, }, .clk_arb = { - .get_arbiter_clk_domains = NULL, - .get_arbiter_clk_range = NULL, - .get_arbiter_clk_default = NULL, - .get_current_pstate = NULL, + .get_arbiter_clk_domains = gv100_get_arbiter_clk_domains, + .get_arbiter_f_points = gv100_get_arbiter_f_points, + .get_arbiter_clk_range = gv100_get_arbiter_clk_range, + .get_arbiter_clk_default = gv100_get_arbiter_clk_default, + .get_current_pstate = nvgpu_clk_arb_get_current_pstate, + .arbiter_clk_init = gv100_init_clk_arbiter, + .clk_arb_run_arbiter_cb = gv100_clk_arb_run_arbiter_cb, + .clk_arb_cleanup = gv100_clk_arb_cleanup, }, .regops = { .exec_regops = exec_regops_gk20a, @@ -1189,6 +1195,9 @@ int tu104_init_hal(struct gk20a *g) gops->clk.get_crystal_clk_hz = tu104_ops.clk.get_crystal_clk_hz; gops->clk.measure_freq = tu104_ops.clk.measure_freq; gops->clk.suspend_clk_support = tu104_ops.clk.suspend_clk_support; + gops->clk_arb = tu104_ops.clk_arb; + gops->clk.clk_domain_get_f_points = tu104_ops.clk.clk_domain_get_f_points; + gops->clk = tu104_ops.clk; /* Lone functions */ gops->chip_init_gpu_characteristics = @@ -1204,7 +1213,7 @@ int tu104_init_hal(struct gk20a *g) __nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_RTOS, true); /* for now */ - gops->clk.support_clk_freq_controller = false; + gops->clk.support_clk_freq_controller = true; gops->clk.support_pmgr_domain = false; gops->clk.support_lpwr_pg = false; gops->clk.support_clk_freq_domain = true; diff --git a/drivers/gpu/nvgpu/volt/volt_pmu.c b/drivers/gpu/nvgpu/volt/volt_pmu.c index 2550854ef..444feb87d 100644 --- a/drivers/gpu/nvgpu/volt/volt_pmu.c +++ b/drivers/gpu/nvgpu/volt/volt_pmu.c @@ -375,11 +375,9 @@ int volt_set_noiseaware_vmin(struct gk20a *g, u32 logic_voltage_uv, int status = 0; struct ctrl_volt_volt_rail_list rail_list = { 0 }; - rail_list.num_rails = RAIL_COUNT_GP; + rail_list.num_rails = RAIL_COUNT_GV; rail_list.rails[0].rail_idx = 0; rail_list.rails[0].voltage_uv = logic_voltage_uv; - rail_list.rails[1].rail_idx = 1; - rail_list.rails[1].voltage_uv = sram_voltage_uv; status = volt_policy_set_noiseaware_vmin(g, &rail_list);