diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 72015e129..980ddd62b 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -405,4 +405,5 @@ nvgpu-y += \ therm/thrmchannel.o \ therm/thrmpmu.o \ lpwr/rppg.o \ - lpwr/lpwr.o + lpwr/lpwr.o \ + gv100/clk_arb_gv100.o diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 0701ce395..6dbe755d7 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -240,6 +240,7 @@ srcs := os/posix/nvgpu.c \ gv100/hal_gv100.c \ gv100/gsp_gv100.c \ gv100/clk_gv100.c \ + gv100/clk_arb_gv100.c \ common/bus/bus_tu104.c \ common/fb/fb_tu104.c \ common/ltc/ltc_tu104.c \ diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c index fe24d8f72..e2b764096 100644 --- a/drivers/gpu/nvgpu/clk/clk.c +++ b/drivers/gpu/nvgpu/clk/clk.c @@ -433,7 +433,7 @@ u32 nvgpu_clk_vf_change_inject_data_fill_gp10x(struct gk20a *g, vfchange = &rpccall->params.clk_vf_change_inject; vfchange->flags = 0; vfchange->clk_list.num_domains = 3; - vfchange->clk_list.clk_domains[0].clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; + vfchange->clk_list.clk_domains[0].clk_domain = CTRL_CLK_DOMAIN_GPCCLK; vfchange->clk_list.clk_domains[0].clk_freq_khz = (u32)setfllclk->gpc2clkmhz * 1000U; vfchange->clk_list.clk_domains[0].clk_flags = 0; @@ -441,7 +441,7 @@ u32 nvgpu_clk_vf_change_inject_data_fill_gp10x(struct gk20a *g, setfllclk->current_regime_id_gpc; vfchange->clk_list.clk_domains[0].target_regime_id = setfllclk->target_regime_id_gpc; - vfchange->clk_list.clk_domains[1].clk_domain = CTRL_CLK_DOMAIN_XBAR2CLK; + vfchange->clk_list.clk_domains[1].clk_domain = CTRL_CLK_DOMAIN_XBARCLK; vfchange->clk_list.clk_domains[1].clk_freq_khz = (u32)setfllclk->xbar2clkmhz * 1000U; vfchange->clk_list.clk_domains[1].clk_flags = 0; @@ -449,7 +449,7 @@ u32 nvgpu_clk_vf_change_inject_data_fill_gp10x(struct gk20a *g, setfllclk->current_regime_id_xbar; vfchange->clk_list.clk_domains[1].target_regime_id = setfllclk->target_regime_id_xbar; - vfchange->clk_list.clk_domains[2].clk_domain = CTRL_CLK_DOMAIN_SYS2CLK; + vfchange->clk_list.clk_domains[2].clk_domain = CTRL_CLK_DOMAIN_SYSCLK; vfchange->clk_list.clk_domains[2].clk_freq_khz = (u32)setfllclk->sys2clkmhz * 1000U; vfchange->clk_list.clk_domains[2].clk_flags = 0; @@ -630,32 +630,32 @@ int clk_set_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk) int status = -EINVAL; /*set regime ids */ - status = get_regime_id(g, CTRL_CLK_DOMAIN_GPC2CLK, + status = get_regime_id(g, CTRL_CLK_DOMAIN_GPCCLK, &setfllclk->current_regime_id_gpc); if (status != 0) { goto done; } setfllclk->target_regime_id_gpc = find_regime_id(g, - CTRL_CLK_DOMAIN_GPC2CLK, setfllclk->gpc2clkmhz); + CTRL_CLK_DOMAIN_GPCCLK, setfllclk->gpc2clkmhz); - status = get_regime_id(g, CTRL_CLK_DOMAIN_SYS2CLK, + status = get_regime_id(g, CTRL_CLK_DOMAIN_SYSCLK, &setfllclk->current_regime_id_sys); if (status != 0) { goto done; } setfllclk->target_regime_id_sys = find_regime_id(g, - CTRL_CLK_DOMAIN_SYS2CLK, setfllclk->sys2clkmhz); + CTRL_CLK_DOMAIN_SYSCLK, setfllclk->sys2clkmhz); - status = get_regime_id(g, CTRL_CLK_DOMAIN_XBAR2CLK, + status = get_regime_id(g, CTRL_CLK_DOMAIN_XBARCLK, &setfllclk->current_regime_id_xbar); if (status != 0) { goto done; } setfllclk->target_regime_id_xbar = find_regime_id(g, - CTRL_CLK_DOMAIN_XBAR2CLK, setfllclk->xbar2clkmhz); + CTRL_CLK_DOMAIN_XBARCLK, setfllclk->xbar2clkmhz); status = clk_pmu_vf_inject(g, setfllclk); @@ -664,19 +664,19 @@ int clk_set_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk) } /* save regime ids */ - status = set_regime_id(g, CTRL_CLK_DOMAIN_XBAR2CLK, + status = set_regime_id(g, CTRL_CLK_DOMAIN_XBARCLK, setfllclk->target_regime_id_xbar); if (status != 0) { goto done; } - status = set_regime_id(g, CTRL_CLK_DOMAIN_GPC2CLK, + status = set_regime_id(g, CTRL_CLK_DOMAIN_GPCCLK, setfllclk->target_regime_id_gpc); if (status != 0) { goto done; } - status = set_regime_id(g, CTRL_CLK_DOMAIN_SYS2CLK, + status = set_regime_id(g, CTRL_CLK_DOMAIN_SYSCLK, setfllclk->target_regime_id_sys); if (status != 0) { goto done; @@ -692,8 +692,8 @@ int clk_get_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk) u8 i; struct clk_pmupstate *pclk = g->clk_pmu; u16 clkmhz = 0; - struct clk_domain_3x_master *p3xmaster; - struct clk_domain_3x_slave *p3xslave; + struct clk_domain_35_master *p35master; + struct clk_domain_35_slave *p35slave; unsigned long slaveidxmask; if (setfllclk->gpc2clkmhz == 0U) { @@ -703,42 +703,44 @@ int clk_get_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk) BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super), struct clk_domain *, pdomain, i) { - if (pdomain->api_domain == CTRL_CLK_DOMAIN_GPC2CLK) { - + if (pdomain->api_domain == CTRL_CLK_DOMAIN_GPCCLK) { if (!pdomain->super.implements(g, &pdomain->super, - CTRL_CLK_CLK_DOMAIN_TYPE_3X_MASTER)) { + CTRL_CLK_CLK_DOMAIN_TYPE_35_MASTER)) { status = -EINVAL; goto done; } - p3xmaster = (struct clk_domain_3x_master *)pdomain; - slaveidxmask = p3xmaster->slave_idxs_mask; + p35master = (struct clk_domain_35_master *)pdomain; + slaveidxmask = p35master->master.slave_idxs_mask; for_each_set_bit(i, &slaveidxmask, 32U) { - p3xslave = (struct clk_domain_3x_slave *) + p35slave = (struct clk_domain_35_slave *) CLK_CLK_DOMAIN_GET(pclk, i); - if ((p3xslave->super.super.super.api_domain != - CTRL_CLK_DOMAIN_XBAR2CLK) && - (p3xslave->super.super.super.api_domain != - CTRL_CLK_DOMAIN_SYS2CLK)) { - continue; - } + clkmhz = 0; - status = p3xslave->clkdomainclkgetslaveclk(g, + status = p35slave->slave.clkdomainclkgetslaveclk(g, pclk, - (struct clk_domain *)p3xslave, + (struct clk_domain *)(void *)p35slave, &clkmhz, setfllclk->gpc2clkmhz); if (status != 0) { status = -EINVAL; goto done; } - if (p3xslave->super.super.super.api_domain == - CTRL_CLK_DOMAIN_XBAR2CLK) { + if (p35slave->super.super.super.super.api_domain == + CTRL_CLK_DOMAIN_XBARCLK) { setfllclk->xbar2clkmhz = clkmhz; } - if (p3xslave->super.super.super.api_domain == - CTRL_CLK_DOMAIN_SYS2CLK) { + if (p35slave->super.super.super.super.api_domain == + CTRL_CLK_DOMAIN_SYSCLK) { setfllclk->sys2clkmhz = clkmhz; } + if (p35slave->super.super.super.super.api_domain == + CTRL_CLK_DOMAIN_NVDCLK) { + setfllclk->nvdclkmhz = clkmhz; + } + if (p35slave->super.super.super.super.api_domain == + CTRL_CLK_DOMAIN_HOSTCLK) { + setfllclk->hostclkmhz = clkmhz; + } } } } @@ -914,14 +916,12 @@ int nvgpu_clk_set_boot_fll_clk_gv10x(struct gk20a *g) } voltuv = gpcclk_voltuv; - status = volt_set_voltage(g, voltuv, 0); if (status != 0) { nvgpu_err(g, "attempt to set boot voltage failed %d", voltuv); } - bootfllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPCCLK; bootfllclk.clkmhz = gpcclk_clkmhz; bootfllclk.voltuv = voltuv; @@ -929,9 +929,7 @@ int nvgpu_clk_set_boot_fll_clk_gv10x(struct gk20a *g) if (status != 0) { nvgpu_err(g, "attempt to set boot gpcclk failed"); } - status = clk_pmu_freq_effective_avg_load(g, true); - /* * Read clocks after some delay with below method * & extract clock data from buffer @@ -1053,19 +1051,35 @@ int nvgpu_clk_set_boot_fll_clk_tu10x(struct gk20a *g) return status; } -int clk_domain_freq_to_volt( - struct gk20a *g, - u8 clkdomain_idx, - u32 *pclkmhz, - u32 *pvoltuv, - u8 railidx -) +int clk_domain_volt_to_freq(struct gk20a *g, u8 clkdomain_idx, + u32 *pclkmhz, u32 *pvoltuv, u8 railidx) { struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt rpc; struct nvgpu_pmu *pmu = &g->pmu; int status = -EINVAL; - (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt )); + (void)memset(&rpc, 0, sizeof(struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt )); + rpc.volt_rail_idx = volt_rail_volt_domain_convert_to_idx(g, railidx); + rpc.clk_domain_idx = clkdomain_idx; + rpc.voltage_type = CTRL_VOLT_DOMAIN_LOGIC; + rpc.input.value = *pvoltuv; + PMU_RPC_EXECUTE_CPB(status, pmu, CLK, CLK_DOMAIN_35_PROG_VOLT_TO_FREQ, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute Freq to Volt RPC status=0x%x", + status); + } + *pclkmhz = rpc.output.value; + return status; +} + +int clk_domain_freq_to_volt(struct gk20a *g, u8 clkdomain_idx, + u32 *pclkmhz, u32 *pvoltuv, u8 railidx) +{ + struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt rpc; + struct nvgpu_pmu *pmu = &g->pmu; + int status = -EINVAL; + + (void)memset(&rpc, 0, sizeof(struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt )); rpc.volt_rail_idx = volt_rail_volt_domain_convert_to_idx(g, railidx); rpc.clk_domain_idx = clkdomain_idx; rpc.voltage_type = CTRL_VOLT_DOMAIN_LOGIC; @@ -1079,13 +1093,9 @@ int clk_domain_freq_to_volt( return status; } -int clk_domain_get_f_or_v( - struct gk20a *g, - u32 clkapidomain, - u16 *pclkmhz, - u32 *pvoltuv, - u8 railidx -) + +int clk_domain_get_f_or_v(struct gk20a *g, u32 clkapidomain, + u16 *pclkmhz, u32 *pvoltuv, u8 railidx) { int status = -EINVAL; struct clk_domain *pdomain; diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h index 52ef4ec2c..a3e3e715b 100644 --- a/drivers/gpu/nvgpu/clk/clk.h +++ b/drivers/gpu/nvgpu/clk/clk.h @@ -78,6 +78,12 @@ struct set_fll_clk { u16 xbar2clkmhz; u8 current_regime_id_xbar; u8 target_regime_id_xbar; + u16 nvdclkmhz; + u8 current_regime_id_nvd; + u8 target_regime_id_nvd; + u16 hostclkmhz; + u8 current_regime_id_host; + u8 target_regime_id_host; }; #define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_MAX_NUMCLKS 9U @@ -126,20 +132,12 @@ void clk_free_pmupstate(struct gk20a *g); int clk_pmu_vin_load(struct gk20a *g); int clk_pmu_clk_domains_load(struct gk20a *g); int clk_domain_print_vf_table(struct gk20a *g, u32 clkapidomain); -int clk_domain_get_f_or_v( - struct gk20a *g, - u32 clkapidomain, - u16 *pclkmhz, - u32 *pvoltuv, - u8 railidx -); -int clk_domain_freq_to_volt( - struct gk20a *g, - u8 clkdomain_idx, - u32 *pclkmhz, - u32 *pvoltuv, - u8 railidx -); +int clk_domain_get_f_or_v(struct gk20a *g, u32 clkapidomain, + u16 *pclkmhz, u32 *pvoltuv, u8 railidx); +int clk_domain_freq_to_volt(struct gk20a *g, u8 clkdomain_idx, + u32 *pclkmhz, u32 *pvoltuv, u8 railidx); +int clk_domain_volt_to_freq( struct gk20a *g, u8 clkdomain_idx, + u32 *pclkmhz, u32 *pvoltuv, u8 railidx); int clk_get_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk); int clk_set_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk); int clk_pmu_freq_controller_load(struct gk20a *g, bool bload, u8 bit_idx); diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c index bed98edf1..60a3afddb 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.c +++ b/drivers/gpu/nvgpu/clk/clk_arb.c @@ -112,27 +112,25 @@ int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) u32 i, j; int status = -EINVAL; - u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; - u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; u16 clk_cur; u32 num_points; - struct clk_set_info *p5_info, *p0_info; + struct clk_set_info *p0_info; table = NV_ACCESS_ONCE(arb->current_vf_table); /* make flag visible when all data has resolved in the tables */ nvgpu_smp_rmb(); - table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] : &arb->vf_table_pool[0]; /* Get allowed memory ranges */ - if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPC2CLK, + if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK, &arb->gpc2clk_min, &arb->gpc2clk_max) < 0) { nvgpu_err(g, "failed to fetch GPC2CLK range"); goto exit_vf_table; } + if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_MCLK, &arb->mclk_min, &arb->mclk_max) < 0) { @@ -142,102 +140,22 @@ int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) table->gpc2clk_num_points = MAX_F_POINTS; table->mclk_num_points = MAX_F_POINTS; - - if (g->ops.clk.clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_GPC2CLK, + if (g->ops.clk.clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_GPCCLK, &table->gpc2clk_num_points, arb->gpc2clk_f_points)) { nvgpu_err(g, "failed to fetch GPC2CLK frequency points"); goto exit_vf_table; } - - if (g->ops.clk.clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_MCLK, - &table->mclk_num_points, arb->mclk_f_points)) { - nvgpu_err(g, "failed to fetch MCLK frequency points"); - goto exit_vf_table; - } - if (!table->mclk_num_points || !table->gpc2clk_num_points) { - nvgpu_err(g, "empty queries to f points mclk %d gpc2clk %d", - table->mclk_num_points, table->gpc2clk_num_points); + if (!table->gpc2clk_num_points) { + nvgpu_err(g, "empty queries to f points gpc2clk %d", table->gpc2clk_num_points); status = -EINVAL; goto exit_vf_table; } - (void) memset(table->mclk_points, 0, - table->mclk_num_points*sizeof(struct nvgpu_clk_vf_point)); (void) memset(table->gpc2clk_points, 0, table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point)); - p5_info = pstate_get_clk_set_info(g, - CTRL_PERF_PSTATE_P5, CLKWHICH_MCLK); - if (!p5_info) { - nvgpu_err(g, "failed to get MCLK P5 info"); - goto exit_vf_table; - } p0_info = pstate_get_clk_set_info(g, - CTRL_PERF_PSTATE_P0, CLKWHICH_MCLK); - if (!p0_info) { - nvgpu_err(g, "failed to get MCLK P0 info"); - goto exit_vf_table; - } - - for (i = 0, j = 0, num_points = 0, clk_cur = 0; - i < table->mclk_num_points; i++) { - - if ((arb->mclk_f_points[i] >= arb->mclk_min) && - (arb->mclk_f_points[i] <= arb->mclk_max) && - (arb->mclk_f_points[i] != clk_cur)) { - - table->mclk_points[j].mem_mhz = arb->mclk_f_points[i]; - mclk_voltuv = mclk_voltuv_sram = 0; - - status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, - &table->mclk_points[j].mem_mhz, &mclk_voltuv, - CTRL_VOLT_DOMAIN_LOGIC); - if (status < 0) { - nvgpu_err(g, - "failed to get MCLK LOGIC voltage"); - goto exit_vf_table; - } - status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, - &table->mclk_points[j].mem_mhz, - &mclk_voltuv_sram, - CTRL_VOLT_DOMAIN_SRAM); - if (status < 0) { - nvgpu_err(g, "failed to get MCLK SRAM voltage"); - goto exit_vf_table; - } - - table->mclk_points[j].uvolt = mclk_voltuv; - table->mclk_points[j].uvolt_sram = mclk_voltuv_sram; - clk_cur = table->mclk_points[j].mem_mhz; - - if ((clk_cur >= p5_info->min_mhz) && - (clk_cur <= p5_info->max_mhz)) - VF_POINT_SET_PSTATE_SUPPORTED( - &table->mclk_points[j], - CTRL_PERF_PSTATE_P5); - if ((clk_cur >= p0_info->min_mhz) && - (clk_cur <= p0_info->max_mhz)) - VF_POINT_SET_PSTATE_SUPPORTED( - &table->mclk_points[j], - CTRL_PERF_PSTATE_P0); - - j++; - num_points++; - - } - } - table->mclk_num_points = num_points; - - p5_info = pstate_get_clk_set_info(g, - CTRL_PERF_PSTATE_P5, CLKWHICH_GPC2CLK); - if (!p5_info) { - status = -EINVAL; - nvgpu_err(g, "failed to get GPC2CLK P5 info"); - goto exit_vf_table; - } - - p0_info = pstate_get_clk_set_info(g, - CTRL_PERF_PSTATE_P0, CLKWHICH_GPC2CLK); + CTRL_PERF_PSTATE_P0, CLKWHICH_GPCCLK); if (!p0_info) { status = -EINVAL; nvgpu_err(g, "failed to get GPC2CLK P0 info"); @@ -247,7 +165,7 @@ int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) /* GPC2CLK needs to be checked in two passes. The first determines the * relationships between GPC2CLK, SYS2CLK and XBAR2CLK, while the * second verifies that the clocks minimum is satisfied and sets - * the voltages + * the voltages,the later part is done in nvgpu_clk_set_req_fll_clk_ps35 */ for (i = 0, j = 0, num_points = 0, clk_cur = 0; i < table->gpc2clk_num_points; i++) { @@ -260,6 +178,7 @@ int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) table->gpc2clk_points[j].gpc_mhz = arb->gpc2clk_f_points[i]; setfllclk.gpc2clkmhz = arb->gpc2clk_f_points[i]; + status = clk_get_fll_clks(g, &setfllclk); if (status < 0) { nvgpu_err(g, @@ -271,14 +190,13 @@ int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) setfllclk.sys2clkmhz; table->gpc2clk_points[j].xbar_mhz = setfllclk.xbar2clkmhz; + table->gpc2clk_points[j].nvd_mhz = + setfllclk.nvdclkmhz; + table->gpc2clk_points[j].host_mhz = + setfllclk.hostclkmhz; clk_cur = table->gpc2clk_points[j].gpc_mhz; - if ((clk_cur >= p5_info->min_mhz) && - (clk_cur <= p5_info->max_mhz)) - VF_POINT_SET_PSTATE_SUPPORTED( - &table->gpc2clk_points[j], - CTRL_PERF_PSTATE_P5); if ((clk_cur >= p0_info->min_mhz) && (clk_cur <= p0_info->max_mhz)) VF_POINT_SET_PSTATE_SUPPORTED( @@ -291,110 +209,6 @@ int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) } table->gpc2clk_num_points = num_points; - /* Second pass */ - for (i = 0, j = 0; i < table->gpc2clk_num_points; i++) { - - u16 alt_gpc2clk = table->gpc2clk_points[i].gpc_mhz; - - gpc2clk_voltuv = gpc2clk_voltuv_sram = 0; - - /* Check sysclk */ - p5_info = pstate_get_clk_set_info(g, - VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]), - CLKWHICH_SYS2CLK); - if (!p5_info) { - status = -EINVAL; - nvgpu_err(g, "failed to get SYS2CLK P5 info"); - goto exit_vf_table; - } - - /* sys2clk below clk min, need to find correct clock */ - if (table->gpc2clk_points[i].sys_mhz < p5_info->min_mhz) { - for (j = i + 1; j < table->gpc2clk_num_points; j++) { - - if (table->gpc2clk_points[j].sys_mhz >= - p5_info->min_mhz) { - - - table->gpc2clk_points[i].sys_mhz = - p5_info->min_mhz; - - alt_gpc2clk = alt_gpc2clk < - table->gpc2clk_points[j]. - gpc_mhz ? - table->gpc2clk_points[j]. - gpc_mhz : - alt_gpc2clk; - break; - } - } - /* no VF exists that satisfies condition */ - if (j == table->gpc2clk_num_points) { - nvgpu_err(g, "NO SYS2CLK VF point possible"); - status = -EINVAL; - goto exit_vf_table; - } - } - - /* Check xbarclk */ - p5_info = pstate_get_clk_set_info(g, - VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]), - CLKWHICH_XBAR2CLK); - if (!p5_info) { - status = -EINVAL; - nvgpu_err(g, "failed to get SYS2CLK P5 info"); - goto exit_vf_table; - } - - /* xbar2clk below clk min, need to find correct clock */ - if (table->gpc2clk_points[i].xbar_mhz < p5_info->min_mhz) { - for (j = i; j < table->gpc2clk_num_points; j++) { - if (table->gpc2clk_points[j].xbar_mhz >= - p5_info->min_mhz) { - - table->gpc2clk_points[i].xbar_mhz = - p5_info->min_mhz; - - alt_gpc2clk = alt_gpc2clk < - table->gpc2clk_points[j]. - gpc_mhz ? - table->gpc2clk_points[j]. - gpc_mhz : - alt_gpc2clk; - break; - } - } - /* no VF exists that satisfies condition */ - if (j == table->gpc2clk_num_points) { - status = -EINVAL; - nvgpu_err(g, "NO XBAR2CLK VF point possible"); - - goto exit_vf_table; - } - } - - /* Calculate voltages */ - status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, - &alt_gpc2clk, &gpc2clk_voltuv, - CTRL_VOLT_DOMAIN_LOGIC); - if (status < 0) { - nvgpu_err(g, "failed to get GPC2CLK LOGIC voltage"); - goto exit_vf_table; - } - - status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, - &alt_gpc2clk, - &gpc2clk_voltuv_sram, - CTRL_VOLT_DOMAIN_SRAM); - if (status < 0) { - nvgpu_err(g, "failed to get GPC2CLK SRAM voltage"); - goto exit_vf_table; - } - - table->gpc2clk_points[i].uvolt = gpc2clk_voltuv; - table->gpc2clk_points[i].uvolt_sram = gpc2clk_voltuv_sram; - } - /* make table visible when all data has resolved in the tables */ nvgpu_smp_wmb(); arb->current_vf_table = table; @@ -758,11 +572,10 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) { int err = 0; - if (!g->ops.clk.support_clk_freq_controller || + if (!g->ops.clk.support_clk_freq_controller && !g->ops.clk_arb.get_arbiter_clk_domains) { return 0; } - nvgpu_mutex_acquire(&g->clk_arb_enable_lock); err = g->ops.clk_arb.arbiter_clk_init(g); @@ -822,7 +635,7 @@ int nvgpu_clk_arb_init_session(struct gk20a *g, clk_arb_dbg(g, " "); - if (!g->ops.clk.support_clk_freq_controller || + if (!g->ops.clk.support_clk_freq_controller && !g->ops.clk_arb.get_arbiter_clk_domains) { return 0; } @@ -941,7 +754,7 @@ bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain) return (clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0; case NVGPU_CLK_DOMAIN_GPCCLK: - return (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) != 0; + return (clk_domains & CTRL_CLK_DOMAIN_GPCCLK) != 0; default: return false; @@ -961,11 +774,7 @@ int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, case NVGPU_CLK_DOMAIN_GPCCLK: ret = g->ops.clk_arb.get_arbiter_clk_range(g, - CTRL_CLK_DOMAIN_GPC2CLK, min_mhz, max_mhz); - if (ret == 0) { - *min_mhz /= 2; - *max_mhz /= 2; - } + CTRL_CLK_DOMAIN_GPCCLK, min_mhz, max_mhz); return ret; default: @@ -977,16 +786,13 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, u32 api_domain, u32 *max_points, u16 *fpoints) { int err; - u32 i; switch (api_domain) { case NVGPU_CLK_DOMAIN_GPCCLK: err = g->ops.clk_arb.get_arbiter_f_points(g, - CTRL_CLK_DOMAIN_GPC2CLK, max_points, fpoints); + CTRL_CLK_DOMAIN_GPCCLK, max_points, fpoints); if (err || !fpoints) return err; - for (i = 0; i < *max_points; i++) - fpoints[i] /= 2; return 0; case NVGPU_CLK_DOMAIN_MCLK: return g->ops.clk_arb.get_arbiter_f_points(g, @@ -1012,7 +818,7 @@ int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, break; case NVGPU_CLK_DOMAIN_GPCCLK: - *freq_mhz = target->gpc2clk / 2ULL; + *freq_mhz = target->gpc2clk; break; default: @@ -1039,7 +845,7 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, break; case NVGPU_CLK_DOMAIN_GPCCLK: - *freq_mhz = actual->gpc2clk / 2ULL; + *freq_mhz = actual->gpc2clk ; break; default: @@ -1054,8 +860,8 @@ unsigned long nvgpu_clk_measure_freq(struct gk20a *g, u32 api_domain) unsigned long freq = 0UL; switch (api_domain) { - case CTRL_CLK_DOMAIN_GPC2CLK: - freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) * 2UL; + case CTRL_CLK_DOMAIN_GPCCLK: + freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); break; default: break; @@ -1077,8 +883,8 @@ int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, return 0; case NVGPU_CLK_DOMAIN_GPCCLK: - *freq_mhz = g->ops.clk.measure_freq(g, - CTRL_CLK_DOMAIN_GPC2CLK) / 2000000ULL; + *freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_GPCCLK) / + 1000000ULL; return 0; default: diff --git a/drivers/gpu/nvgpu/clk/clk_domain.c b/drivers/gpu/nvgpu/clk/clk_domain.c index 099a38bd2..49c16155a 100644 --- a/drivers/gpu/nvgpu/clk/clk_domain.c +++ b/drivers/gpu/nvgpu/clk/clk_domain.c @@ -965,8 +965,9 @@ static int clkdomaingetslaveclk(struct gk20a *g, struct clk_prog *pprog = NULL; struct clk_prog_1x_master *pprog1xmaster = NULL; u8 slaveidx; + struct clk_domain_35_master *p35master; struct clk_domain_3x_master *p3xmaster; - + u32 ver = g->params.gpu_arch + g->params.gpu_impl; nvgpu_log_info(g, " "); if (pclkmhz == NULL) { @@ -977,16 +978,30 @@ static int clkdomaingetslaveclk(struct gk20a *g, return -EINVAL; } - slaveidx = BOARDOBJ_GET_IDX(pdomain); - p3xmaster = (struct clk_domain_3x_master *) - CLK_CLK_DOMAIN_GET(pclk, - ((struct clk_domain_3x_slave *) - pdomain)->master_idx); - pprog = CLK_CLK_PROG_GET(pclk, p3xmaster->super.clk_prog_idx_first); - pprog1xmaster = (struct clk_prog_1x_master *)pprog; + if(ver == NVGPU_GPUID_GV100) { + slaveidx = BOARDOBJ_GET_IDX(pdomain); + p3xmaster = (struct clk_domain_3x_master *) + CLK_CLK_DOMAIN_GET(pclk, + ((struct clk_domain_3x_slave *) + pdomain)->master_idx); + pprog = CLK_CLK_PROG_GET(pclk, p3xmaster->super.clk_prog_idx_first); + pprog1xmaster = (struct clk_prog_1x_master *)pprog; - status = pprog1xmaster->getslaveclk(g, pclk, pprog1xmaster, - slaveidx, pclkmhz, masterclkmhz); + status = pprog1xmaster->getslaveclk(g, pclk, pprog1xmaster, + slaveidx, pclkmhz, masterclkmhz); + } else { + slaveidx = BOARDOBJ_GET_IDX(pdomain); + p35master = (struct clk_domain_35_master *) + CLK_CLK_DOMAIN_GET(pclk, + ((struct clk_domain_35_slave *) + pdomain)->slave.master_idx); + + pprog = CLK_CLK_PROG_GET(pclk, p35master->master.super.clk_prog_idx_first); + pprog1xmaster = (struct clk_prog_1x_master *)pprog; + + status = pprog1xmaster->getslaveclk(g, pclk, pprog1xmaster, + slaveidx, pclkmhz, masterclkmhz); + } return status; } diff --git a/drivers/gpu/nvgpu/clk/clk_prog.c b/drivers/gpu/nvgpu/clk/clk_prog.c index f84f5be5d..f0dc32133 100644 --- a/drivers/gpu/nvgpu/clk/clk_prog.c +++ b/drivers/gpu/nvgpu/clk/clk_prog.c @@ -1673,7 +1673,7 @@ static int getfpoints_prog_1x_master pvfentry = (struct ctrl_clk_clk_prog_1x_master_vf_entry *)( (u8 *)pvfentry + ((u8)sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) * - (rail+1U))); + rail)); fpointscount = (u32)pvfentry->vf_point_idx_last - (u32)pvfentry->vf_point_idx_first + 1U; @@ -1707,10 +1707,11 @@ static int getslaveclk_prog_1x_master(struct gk20a *g, { struct clk_progs *pclkprogobjs; struct clk_prog_1x_master_ratio *p1xmasterratio; + struct clk_prog_35_master_ratio *p35masterratio; u8 slaveentrycount; u8 i; struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry *pslaveents; - + u32 ver = g->params.gpu_arch + g->params.gpu_impl; if (pclkmhz == NULL) { return -EINVAL; } @@ -1723,27 +1724,50 @@ static int getslaveclk_prog_1x_master(struct gk20a *g, pclkprogobjs = &(pclk->clk_progobjs); slaveentrycount = pclkprogobjs->slave_entry_count; - - if (p1xmaster->super.super.super.implements(g, - &p1xmaster->super.super.super, - CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO)) { - p1xmasterratio = - (struct clk_prog_1x_master_ratio *)p1xmaster; - pslaveents = p1xmasterratio->p_slave_entries; - for (i = 0; i < slaveentrycount; i++) { - if (pslaveents->clk_dom_idx == - slave_clk_domain) { - break; + if(ver == NVGPU_GPUID_GV100) { + if (p1xmaster->super.super.super.implements(g, + &p1xmaster->super.super.super, + CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO)) { + p1xmasterratio = + (struct clk_prog_1x_master_ratio *)p1xmaster; + pslaveents = p1xmasterratio->p_slave_entries; + for (i = 0; i < slaveentrycount; i++) { + if (pslaveents->clk_dom_idx == + slave_clk_domain) { + break; + } + pslaveents++; } - pslaveents++; - } - if (i == slaveentrycount) { + if (i == slaveentrycount) { + return -EINVAL; + } + *pclkmhz = (masterclkmhz * pslaveents->ratio)/100U; + } else { + /* only support ratio for now */ return -EINVAL; } - *pclkmhz = (masterclkmhz * pslaveents->ratio)/100U; } else { - /* only support ratio for now */ - return -EINVAL; + if (p1xmaster->super.super.super.implements(g, + &p1xmaster->super.super.super, + CTRL_CLK_CLK_PROG_TYPE_35_MASTER_RATIO)) { + p35masterratio = + (struct clk_prog_35_master_ratio *)p1xmaster; + pslaveents = p35masterratio->ratio.p_slave_entries; + for (i = 0; i < slaveentrycount; i++) { + if (pslaveents->clk_dom_idx == + slave_clk_domain) { + break; + } + pslaveents++; + } + if (i == slaveentrycount) { + return -EINVAL; + } + *pclkmhz = (masterclkmhz * pslaveents->ratio)/100U; + } else { + /* only support ratio for now */ + return -EINVAL; + } } return 0; } diff --git a/drivers/gpu/nvgpu/clk/clk_vf_point.c b/drivers/gpu/nvgpu/clk/clk_vf_point.c index 23d0dc134..72c976ea4 100644 --- a/drivers/gpu/nvgpu/clk/clk_vf_point.c +++ b/drivers/gpu/nvgpu/clk/clk_vf_point.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include "clk.h" #include "clk_vf_point.h" @@ -484,6 +486,210 @@ static int clk_vf_point_update(struct gk20a *g, return 0; } +int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g, struct nvgpu_clk_slave_freq *vf_point) +{ + struct nvgpu_pmu *pmu = &g->pmu; + struct nv_pmu_rpc_perf_change_seq_queue_change rpc; + struct ctrl_perf_change_seq_change_input change_input; + struct clk_domain *pclk_domain; + int status = 0; + u8 i = 0, gpcclk_domain=0; + u32 gpcclk_voltuv=0,gpcclk_clkmhz=0; + u32 max_clkmhz; + u16 max_ratio; + struct clk_set_info *p0_info; + + (void) memset(&change_input, 0, + sizeof(struct ctrl_perf_change_seq_change_input)); + BOARDOBJGRP_FOR_EACH(&(g->clk_pmu->clk_domainobjs.super.super), + struct clk_domain *, pclk_domain, i) { + + switch (pclk_domain->api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + gpcclk_domain = i; + gpcclk_clkmhz = vf_point->gpc_mhz; + + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_GPCCLK); + if(p0_info == NULL){ + nvgpu_err(g, "failed to get GPCCLK P0 info"); + break; + } + if ( vf_point->gpc_mhz < p0_info->min_mhz ) { + vf_point->gpc_mhz = p0_info->min_mhz; + } + if (vf_point->gpc_mhz > p0_info->max_mhz) { + vf_point->gpc_mhz = p0_info->max_mhz; + } + change_input.clk[i].clk_freq_khz = (u32)vf_point->gpc_mhz * 1000U; + change_input.clk_domains_mask.super.data[0] |= (u32) BIT(i); + break; + case CTRL_CLK_DOMAIN_XBARCLK: + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_XBARCLK); + if(p0_info == NULL){ + nvgpu_err(g, "failed to get XBARCLK P0 info"); + break; + } + max_ratio = (vf_point->xbar_mhz*100U)/vf_point->gpc_mhz; + if ( vf_point->xbar_mhz < p0_info->min_mhz ) { + vf_point->xbar_mhz = p0_info->min_mhz; + } + if (vf_point->xbar_mhz > p0_info->max_mhz) { + vf_point->xbar_mhz = p0_info->max_mhz; + } + change_input.clk[i].clk_freq_khz = (u32)vf_point->xbar_mhz * 1000U; + change_input.clk_domains_mask.super.data[0] |= (u32) BIT(i); + max_clkmhz = (((u32)vf_point->xbar_mhz * 100U)/ (u32)max_ratio); + if (gpcclk_clkmhz < max_clkmhz) { + gpcclk_clkmhz = max_clkmhz; + } + break; + case CTRL_CLK_DOMAIN_SYSCLK: + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_SYSCLK); + if(p0_info == NULL){ + nvgpu_err(g, "failed to get SYSCLK P0 info"); + break; + } + max_ratio = (vf_point->sys_mhz*100U)/vf_point->gpc_mhz; + if ( vf_point->sys_mhz < p0_info->min_mhz ) { + vf_point->sys_mhz = p0_info->min_mhz; + } + if (vf_point->sys_mhz > p0_info->max_mhz) { + vf_point->sys_mhz = p0_info->max_mhz; + } + change_input.clk[i].clk_freq_khz = (u32)vf_point->sys_mhz * 1000U; + change_input.clk_domains_mask.super.data[0] |= (u32) BIT(i); + max_clkmhz = (((u32)vf_point->sys_mhz * 100U)/ (u32)max_ratio); + if (gpcclk_clkmhz < max_clkmhz) { + gpcclk_clkmhz = max_clkmhz; + } + break; + case CTRL_CLK_DOMAIN_NVDCLK: + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_NVDCLK); + if(p0_info == NULL){ + nvgpu_err(g, "failed to get NVDCLK P0 info"); + break; + } + max_ratio = (vf_point->nvd_mhz*100U)/vf_point->gpc_mhz; + if ( vf_point->nvd_mhz < p0_info->min_mhz ) { + vf_point->nvd_mhz = p0_info->min_mhz; + } + if (vf_point->nvd_mhz > p0_info->max_mhz) { + vf_point->nvd_mhz = p0_info->max_mhz; + } + change_input.clk[i].clk_freq_khz = (u32)vf_point->nvd_mhz * 1000U; + change_input.clk_domains_mask.super.data[0] |= (u32) BIT(i); + max_clkmhz = (((u32)vf_point->nvd_mhz * 100U)/ (u32)max_ratio); + if (gpcclk_clkmhz < max_clkmhz) { + gpcclk_clkmhz = max_clkmhz; + } + break; + case CTRL_CLK_DOMAIN_HOSTCLK: + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_HOSTCLK); + if(p0_info == NULL){ + nvgpu_err(g, "failed to get HOSTCLK P0 info"); + break; + } + max_ratio = (vf_point->host_mhz*100U)/vf_point->gpc_mhz; + if ( vf_point->host_mhz < p0_info->min_mhz ) { + vf_point->host_mhz = p0_info->min_mhz; + } + if (vf_point->host_mhz > p0_info->max_mhz) { + vf_point->host_mhz = p0_info->max_mhz; + } + change_input.clk[i].clk_freq_khz = (u32)vf_point->host_mhz * 1000U; + change_input.clk_domains_mask.super.data[0] |= (u32) BIT(i); + max_clkmhz = (((u32)vf_point->host_mhz * 100U)/ (u32)max_ratio); + if (gpcclk_clkmhz < max_clkmhz) { + gpcclk_clkmhz = max_clkmhz; + } + break; + default: + nvgpu_pmu_dbg(g, "Fixed clock domain"); + break; + } + } + + change_input.pstate_index = 0U; + change_input.flags = (u32)CTRL_PERF_CHANGE_SEQ_CHANGE_FORCE; + change_input.vf_points_cache_counter = 0xFFFFFFFFU; + + status = clk_domain_freq_to_volt(g, gpcclk_domain, + &gpcclk_clkmhz, &gpcclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC); + gpcclk_voltuv += VMIN_PAD_UV; + change_input.volt[0].voltage_uv = gpcclk_voltuv; + change_input.volt[0].voltage_min_noise_unaware_uv = gpcclk_voltuv; + change_input.volt_rails_mask.super.data[0] = 1U; + + /* RPC to PMU to queue to execute change sequence request*/ + (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_perf_change_seq_queue_change )); + rpc.change = change_input; + rpc.change.pstate_index = 0; + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, CHANGE_SEQ_QUEUE_CHANGE, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute Change Seq RPC status=0x%x", + status); + } + + /* Wait for sync change to complete. */ + if ((rpc.change.flags & CTRL_PERF_CHANGE_SEQ_CHANGE_ASYNC) == 0U) { + nvgpu_msleep(20); + } + return status; +} + + int nvgpu_clk_arb_find_slave_points(struct nvgpu_clk_arb *arb, + struct nvgpu_clk_slave_freq *vf_point) +{ + + u16 gpc2clk_target; + struct nvgpu_clk_vf_table *table; + u32 index; + int status = 0; + do { + gpc2clk_target = vf_point->gpc_mhz; + + table = NV_ACCESS_ONCE(arb->current_vf_table); + /* pointer to table can be updated by callback */ + nvgpu_smp_rmb(); + + if (table == NULL) { + continue; + } + if ((table->gpc2clk_num_points == 0U)) { + nvgpu_err(arb->g, "found empty table"); + status = -EINVAL; ; + } + + /* round up the freq requests */ + for (index = 0; index < table->gpc2clk_num_points; index++) { + if ((table->gpc2clk_points[index].gpc_mhz >= + gpc2clk_target)) { + gpc2clk_target = + table->gpc2clk_points[index].gpc_mhz; + vf_point->sys_mhz = + table->gpc2clk_points[index].sys_mhz; + vf_point->xbar_mhz = + table->gpc2clk_points[index].xbar_mhz; + vf_point->nvd_mhz = + table->gpc2clk_points[index].nvd_mhz; + vf_point->host_mhz = + table->gpc2clk_points[index].host_mhz; + break; + } + } + vf_point->gpc_mhz = gpc2clk_target < vf_point->gpc_mhz ? gpc2clk_target : vf_point->gpc_mhz; + } while ((table == NULL) || + (NV_ACCESS_ONCE(arb->current_vf_table) != table)); + + return status; + +} + /*get latest vf point data from PMU */ int clk_vf_point_cache(struct gk20a *g) { @@ -495,36 +701,57 @@ int clk_vf_point_cache(struct gk20a *g) struct boardobj *pboardobj = NULL; struct nv_pmu_boardobj_query *pboardobjpmustatus = NULL; int status; + struct clk_vf_point *pclk_vf_point; u8 index; + u32 voltage_min_uv,voltage_step_size_uv; + u32 gpcclk_clkmhz=0, gpcclk_voltuv=0; + u32 ver = g->params.gpu_arch + g->params.gpu_impl; nvgpu_log_info(g, " "); pclk_vf_points = &g->clk_pmu->clk_vf_pointobjs; pboardobjgrp = &pclk_vf_points->super.super; pboardobjgrpmask = &pclk_vf_points->super.mask.super; - status = pboardobjgrp->pmugetstatus(g, pboardobjgrp, pboardobjgrpmask); - if (status != 0) { - nvgpu_err(g, "err getting boardobjs from pmu"); - return status; - } - pboardobjgrppmu = pboardobjgrp->pmu.getstatus.buf; - - BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct boardobj*, pboardobj, index) { - status = pboardobjgrp->pmustatusinstget(g, - (struct nv_pmu_boardobjgrp *)pboardobjgrppmu, - &pboardobjpmustatus, index); + if (ver == NVGPU_GPUID_GV100) { + status = pboardobjgrp->pmugetstatus(g, pboardobjgrp, pboardobjgrpmask); if (status != 0) { - nvgpu_err(g, "could not get status object instance"); + nvgpu_err(g, "err getting boardobjs from pmu"); return status; } + pboardobjgrppmu = pboardobjgrp->pmu.getstatus.buf; - status = clk_vf_point_update(g, pboardobj, - (struct nv_pmu_boardobj *)pboardobjpmustatus); - if (status != 0) { - nvgpu_err(g, "invalid data from pmu at %d", index); - return status; + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct boardobj*, pboardobj, index) { + status = pboardobjgrp->pmustatusinstget(g, + (struct nv_pmu_boardobjgrp *)pboardobjgrppmu, + &pboardobjpmustatus, index); + if (status != 0) { + nvgpu_err(g, "could not get status object instance"); + return status; + } + status = clk_vf_point_update(g, pboardobj, + (struct nv_pmu_boardobj *)pboardobjpmustatus); + if (status != 0) { + nvgpu_err(g, "invalid data from pmu at %d", index); + return status; + } + + } + } else { + voltage_min_uv = g->clk_pmu->avfs_fllobjs.lut_min_voltage_uv; + voltage_step_size_uv = g->clk_pmu->avfs_fllobjs.lut_step_size_uv * 2U; + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct boardobj*, pboardobj, index) { + pclk_vf_point = (struct clk_vf_point *)(void *)pboardobj; + gpcclk_voltuv = + voltage_min_uv + index * voltage_step_size_uv; + status = clk_domain_volt_to_freq(g, 0, + &gpcclk_clkmhz, &gpcclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC); + if (status != 0) { + nvgpu_err(g, "Failed to get freq for requested voltage"); + return status; + } + pclk_vf_point->pair.freq_mhz = (u16)gpcclk_clkmhz; + pclk_vf_point->pair.voltage_uv = gpcclk_voltuv; } } - - return 0; + return status; } diff --git a/drivers/gpu/nvgpu/clk/clk_vf_point.h b/drivers/gpu/nvgpu/clk/clk_vf_point.h index 7afaecf51..4c02f6723 100644 --- a/drivers/gpu/nvgpu/clk/clk_vf_point.h +++ b/drivers/gpu/nvgpu/clk/clk_vf_point.h @@ -28,9 +28,22 @@ #include #include +#define VMIN_PAD_UV 50000U + int clk_vf_point_sw_setup(struct gk20a *g); int clk_vf_point_pmu_setup(struct gk20a *g); int clk_vf_point_cache(struct gk20a *g); +struct nvgpu_clk_arb; +struct nvgpu_clk_slave_freq{ + u16 gpc_mhz; + u16 sys_mhz; + u16 xbar_mhz; + u16 host_mhz; + u16 nvd_mhz; +}; + +int nvgpu_clk_set_req_fll_clk_ps35(struct gk20a *g, struct nvgpu_clk_slave_freq *vf_point); +int nvgpu_clk_arb_find_slave_points(struct nvgpu_clk_arb *arb,struct nvgpu_clk_slave_freq *vf_point); struct clk_vf_points { struct boardobjgrp_e255 super; diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c index ae17d4601..da1b63547 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c @@ -1329,8 +1329,7 @@ int nvgpu_init_pmu_fw_ver_ops(struct nvgpu_pmu *pmu) g->ops.pmu_ver.clk.clk_set_boot_clk = nvgpu_clk_set_boot_fll_clk_gv10x; } else { - g->ops.pmu_ver.clk.clk_set_boot_clk = - nvgpu_clk_set_boot_fll_clk_tu10x; + g->ops.pmu_ver.clk.clk_set_boot_clk = NULL; } } else { g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params = diff --git a/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c b/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c index 5b937cbf0..695cc6ed4 100644 --- a/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c @@ -29,33 +29,20 @@ u32 gp10b_get_arbiter_clk_domains(struct gk20a *g) { (void)g; clk_arb_dbg(g, " "); - return CTRL_CLK_DOMAIN_GPC2CLK; + return CTRL_CLK_DOMAIN_GPCCLK; } int gp10b_get_arbiter_f_points(struct gk20a *g,u32 api_domain, u32 *num_points, u16 *freqs_in_mhz) { int ret = 0; - u32 i; - bool is_freq_list_available = false; - - if (*num_points != 0U) { - is_freq_list_available = true; - } clk_arb_dbg(g, " "); switch (api_domain) { - case CTRL_CLK_DOMAIN_GPC2CLK: + case CTRL_CLK_DOMAIN_GPCCLK: ret = g->ops.clk.clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPCCLK, num_points, freqs_in_mhz); - - /* multiply by 2 for GPC2CLK */ - if (ret == 0 && is_freq_list_available) { - for (i = 0U; i < *num_points; i++) { - freqs_in_mhz[i] *= 2U; - } - } break; default: ret = -EINVAL; @@ -73,14 +60,9 @@ int gp10b_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, clk_arb_dbg(g, " "); switch (api_domain) { - case CTRL_CLK_DOMAIN_GPC2CLK: + case CTRL_CLK_DOMAIN_GPCCLK: ret = g->ops.clk.get_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK, min_mhz, max_mhz); - - if (ret == 0) { - *min_mhz *= 2U; - *max_mhz *= 2U; - } break; default: @@ -100,7 +82,7 @@ int gp10b_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, clk_arb_dbg(g, " "); switch (api_domain) { - case CTRL_CLK_DOMAIN_GPC2CLK: + case CTRL_CLK_DOMAIN_GPCCLK: ret = gp10b_get_arbiter_clk_range(g, api_domain, &min_mhz, &max_mhz); @@ -168,7 +150,7 @@ int gp10b_init_clk_arbiter(struct gk20a *g) arb->g = g; err = g->ops.clk_arb.get_arbiter_clk_default(g, - CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz); + CTRL_CLK_DOMAIN_GPCCLK, &default_mhz); if (err < 0) { err = -EINVAL; goto init_fail; @@ -176,7 +158,7 @@ int gp10b_init_clk_arbiter(struct gk20a *g) arb->gpc2clk_default_mhz = default_mhz; - err = g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPC2CLK, + err = g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK, &arb->gpc2clk_min, &arb->gpc2clk_max); if (err < 0) { @@ -325,11 +307,8 @@ void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb) nvgpu_mutex_acquire(&arb->pstate_lock); - /* get the rounded_rate in terms of Hz for igpu - * pass (gpcclk) freq = (gpc2clk) freq / 2 - */ status = g->ops.clk.clk_get_round_rate(g, - CTRL_CLK_DOMAIN_GPCCLK, ((unsigned long)gpc2clk_session_target / 2UL) * 1000000UL, &rounded_rate); + CTRL_CLK_DOMAIN_GPCCLK, gpc2clk_session_target * 1000000UL, &rounded_rate); clk_arb_dbg(g, "rounded_rate: %lu\n", rounded_rate); diff --git a/drivers/gpu/nvgpu/gv100/clk_arb_gv100.c b/drivers/gpu/nvgpu/gv100/clk_arb_gv100.c new file mode 100644 index 000000000..d834160a3 --- /dev/null +++ b/drivers/gpu/nvgpu/gv100/clk_arb_gv100.c @@ -0,0 +1,739 @@ +/* + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include "clk_arb_gv100.h" + +u32 gv100_get_arbiter_clk_domains(struct gk20a *g) +{ + (void)g; + return (CTRL_CLK_DOMAIN_GPCCLK); +} + +int gv100_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz) +{ + return g->ops.clk.clk_domain_get_f_points(g, + api_domain, num_points, freqs_in_mhz); +} + +int gv100_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz) +{ + u32 clkwhich; + struct clk_set_info *p0_info; + struct avfsfllobjs *pfllobjs = &(g->clk_pmu->avfs_fllobjs); + u16 limit_min_mhz; + bool error_status = false; + + switch (api_domain) { + case CTRL_CLK_DOMAIN_MCLK: + clkwhich = CLKWHICH_MCLK; + break; + + case CTRL_CLK_DOMAIN_GPCCLK: + clkwhich = CLKWHICH_GPCCLK; + break; + + default: + error_status = true; + break; + } + + if (error_status == true) { + return -EINVAL; + } + + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, clkwhich); + if (p0_info == NULL) { + return -EINVAL; + } + + limit_min_mhz = p0_info->min_mhz; + /* WAR for DVCO min */ + if (api_domain == CTRL_CLK_DOMAIN_GPCCLK) { + if ((pfllobjs->max_min_freq_mhz != 0U) && + (pfllobjs->max_min_freq_mhz >= limit_min_mhz)) { + limit_min_mhz = pfllobjs->max_min_freq_mhz + 1U; + } + } + *min_mhz = limit_min_mhz; + *max_mhz = p0_info->max_mhz; + + return 0; +} + +int gv100_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, + u16 *default_mhz) +{ + u32 clkwhich; + struct clk_set_info *p0_info; + bool error_status = false; + + switch (api_domain) { + case CTRL_CLK_DOMAIN_MCLK: + clkwhich = CLKWHICH_MCLK; + break; + + case CTRL_CLK_DOMAIN_GPCCLK: + clkwhich = CLKWHICH_GPCCLK; + break; + + default: + error_status = true; + break; + } + + if (error_status == true) { + return -EINVAL; + } + + p0_info = pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, clkwhich); + if (p0_info == NULL) { + return -EINVAL; + } + + *default_mhz = p0_info->max_mhz; + return 0; +} + +int gv100_init_clk_arbiter(struct gk20a *g) +{ + struct nvgpu_clk_arb *arb; + u16 default_mhz; + int err; + int index; + struct nvgpu_clk_vf_table *table; + clk_arb_dbg(g, " "); + + if (g->clk_arb != NULL) { + return 0; + } + arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb)); + if (arb == NULL) { + return -ENOMEM; + } + + err = nvgpu_mutex_init(&arb->pstate_lock); + if (err != 0) { + goto mutex_fail; + } + nvgpu_spinlock_init(&arb->sessions_lock); + nvgpu_spinlock_init(&arb->users_lock); + nvgpu_spinlock_init(&arb->requests_lock); + + arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); + if (arb->mclk_f_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + + arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); + if (arb->gpc2clk_f_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + + for (index = 0; index < 2; index++) { + table = &arb->vf_table_pool[index]; + table->gpc2clk_num_points = MAX_F_POINTS; + table->mclk_num_points = MAX_F_POINTS; + + table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point)); + if (table->gpc2clk_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + + + table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point)); + if (table->mclk_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + } + + g->clk_arb = arb; + arb->g = g; + + err = g->ops.clk_arb.get_arbiter_clk_default(g, + CTRL_CLK_DOMAIN_MCLK, &default_mhz); + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->mclk_default_mhz = default_mhz; + + err = g->ops.clk_arb.get_arbiter_clk_default(g, + CTRL_CLK_DOMAIN_GPCCLK, &default_mhz); + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->gpc2clk_default_mhz = default_mhz; + + arb->actual = &arb->actual_pool[0]; + + nvgpu_atomic_set(&arb->req_nr, 0); + + nvgpu_atomic64_set(&arb->alarm_mask, 0); + err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue, + DEFAULT_EVENT_NUMBER); + if (err < 0) { + goto init_fail; + } + nvgpu_init_list_node(&arb->users); + nvgpu_init_list_node(&arb->sessions); + nvgpu_init_list_node(&arb->requests); + + (void)nvgpu_cond_init(&arb->request_wq); + + nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item); + nvgpu_init_list_node(&arb->update_arb_work_item.worker_item); + arb->update_vf_table_work_item.arb = arb; + arb->update_arb_work_item.arb = arb; + arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE; + arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB; + err = nvgpu_clk_arb_worker_init(g); + if (err < 0) { + goto init_fail; + } + +#ifdef CONFIG_DEBUG_FS + arb->debug = &arb->debug_pool[0]; + + if (!arb->debugfs_set) { + if (nvgpu_clk_arb_debugfs_init(g)) + arb->debugfs_set = true; + } +#endif + err = clk_vf_point_cache(g); + if (err < 0) { + goto init_fail; + } + + err = nvgpu_clk_arb_update_vf_table(arb); + if (err < 0) { + goto init_fail; + } + + do { + /* Check that first run is completed */ + nvgpu_smp_mb(); + NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq, + nvgpu_atomic_read(&arb->req_nr), 0); + } while (nvgpu_atomic_read(&arb->req_nr) == 0); + return arb->status; + +init_fail: + nvgpu_kfree(g, arb->gpc2clk_f_points); + nvgpu_kfree(g, arb->mclk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points); + nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); + } + + nvgpu_mutex_destroy(&arb->pstate_lock); + +mutex_fail: + nvgpu_kfree(g, arb); + + return err; +} + +static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, + u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk, + u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram) +{ + u16 gpc2clk_target, mclk_target; + u32 gpc2clk_voltuv, gpc2clk_voltuv_sram; + u32 mclk_voltuv, mclk_voltuv_sram; + u32 current_pstate = VF_POINT_INVALID_PSTATE; + struct nvgpu_clk_vf_table *table; + u32 index, index_mclk; + struct nvgpu_clk_vf_point *mclk_vf = NULL; + + do { + gpc2clk_target = *gpc2clk; + mclk_target = *mclk; + gpc2clk_voltuv = 0; + gpc2clk_voltuv_sram = 0; + mclk_voltuv = 0; + mclk_voltuv_sram = 0; + + table = NV_ACCESS_ONCE(arb->current_vf_table); + /* pointer to table can be updated by callback */ + nvgpu_smp_rmb(); + + if (table == NULL) { + continue; + } + if ((table->gpc2clk_num_points == 0U) || (table->mclk_num_points == 0U)) { + nvgpu_err(arb->g, "found empty table"); + goto find_exit; + } + /* First we check MCLK to find out which PSTATE we are + * are requesting, and from there try to find the minimum + * GPC2CLK on the same PSTATE that satisfies the request. + * If no GPC2CLK can be found, then we need to up the PSTATE + */ + +recalculate_vf_point: + for (index = 0; index < table->mclk_num_points; index++) { + if (table->mclk_points[index].mem_mhz >= mclk_target) { + mclk_vf = &table->mclk_points[index]; + break; + } + } + if (index == table->mclk_num_points) { + mclk_vf = &table->mclk_points[index-1U]; + index = table->mclk_num_points - 1U; + } + index_mclk = index; + + /* round up the freq requests */ + for (index = 0; index < table->gpc2clk_num_points; index++) { + current_pstate = VF_POINT_COMMON_PSTATE( + &table->gpc2clk_points[index], mclk_vf); + + if ((table->gpc2clk_points[index].gpc_mhz >= + gpc2clk_target) && + (current_pstate != VF_POINT_INVALID_PSTATE)) { + gpc2clk_target = + table->gpc2clk_points[index].gpc_mhz; + *sys2clk = + table->gpc2clk_points[index].sys_mhz; + *xbar2clk = + table->gpc2clk_points[index].xbar_mhz; + + gpc2clk_voltuv = + table->gpc2clk_points[index].uvolt; + gpc2clk_voltuv_sram = + table->gpc2clk_points[index].uvolt_sram; + break; + } + } + + if (index == table->gpc2clk_num_points) { + current_pstate = VF_POINT_COMMON_PSTATE( + &table->gpc2clk_points[index-1U], mclk_vf); + if (current_pstate != VF_POINT_INVALID_PSTATE) { + gpc2clk_target = + table->gpc2clk_points[index-1U].gpc_mhz; + *sys2clk = + table->gpc2clk_points[index-1U].sys_mhz; + *xbar2clk = + table->gpc2clk_points[index-1U].xbar_mhz; + + gpc2clk_voltuv = + table->gpc2clk_points[index-1U].uvolt; + gpc2clk_voltuv_sram = + table->gpc2clk_points[index-1U]. + uvolt_sram; + } else if (index_mclk >= table->mclk_num_points - 1U) { + /* There is no available combination of MCLK + * and GPC2CLK, we need to fail this + */ + gpc2clk_target = 0; + mclk_target = 0; + current_pstate = VF_POINT_INVALID_PSTATE; + goto find_exit; + } else { + /* recalculate with higher PSTATE */ + gpc2clk_target = *gpc2clk; + mclk_target = table->mclk_points[index_mclk + 1U]. + mem_mhz; + goto recalculate_vf_point; + } + } + + mclk_target = mclk_vf->mem_mhz; + mclk_voltuv = mclk_vf->uvolt; + mclk_voltuv_sram = mclk_vf->uvolt_sram; + + } while ((table == NULL) || + (NV_ACCESS_ONCE(arb->current_vf_table) != table)); + +find_exit: + *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; + *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ? + gpc2clk_voltuv_sram : mclk_voltuv_sram; + /* noise unaware vmin */ + *nuvmin = mclk_voltuv; + *nuvmin_sram = mclk_voltuv_sram; + *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk; + *mclk = mclk_target; + return (u8)current_pstate; +} + +static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, + u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv, + u32 voltuv_sram) +{ + struct set_fll_clk fllclk; + struct nvgpu_clk_arb *arb = g->clk_arb; + int status; + + fllclk.gpc2clkmhz = gpc2clk_target; + fllclk.sys2clkmhz = sys2clk_target; + fllclk.xbar2clkmhz = xbar2clk_target; + + fllclk.voltuv = voltuv; + + /* if voltage ascends we do: + * (1) FLL change + * (2) Voltage change + * (3) MCLK change + * If it goes down + * (1) MCLK change + * (2) Voltage change + * (3) FLL change + */ + + /* descending */ + if (voltuv < arb->voltuv_actual) { + if (g->ops.clk.mclk_change != NULL) { + status = g->ops.clk.mclk_change(g, mclk_target); + if (status < 0) { + return status; + } + } + status = volt_set_voltage(g, voltuv, voltuv_sram); + if (status < 0) { + return status; + } + + status = clk_set_fll_clks(g, &fllclk); + if (status < 0) { + return status; + } + } else { + status = clk_set_fll_clks(g, &fllclk); + if (status < 0) { + return status; + } + status = volt_set_voltage(g, voltuv, voltuv_sram); + if (status < 0) { + return status; + } + if (g->ops.clk.mclk_change != NULL) { + status = g->ops.clk.mclk_change(g, mclk_target); + if (status < 0) { + return status; + } + } + } + return 0; +} + +void gv100_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb) +{ + struct nvgpu_clk_session *session; + struct nvgpu_clk_dev *dev; + struct nvgpu_clk_dev *tmp; + struct nvgpu_clk_arb_target *target, *actual; + struct gk20a *g = arb->g; + + u32 current_pstate = VF_POINT_INVALID_PSTATE; + u32 voltuv=0, voltuv_sram; + bool mclk_set, gpc2clk_set; + u32 nuvmin, nuvmin_sram; + u32 alarms_notified = 0; + u32 current_alarm; + int status = 0; + u32 ver = g->params.gpu_arch + g->params.gpu_impl; + /* Temporary variables for checking target frequency */ + u16 gpc2clk_target, sys2clk_target, xbar2clk_target, mclk_target; + u16 gpc2clk_session_target, mclk_session_target; + struct nvgpu_clk_slave_freq vf_point; + +#ifdef CONFIG_DEBUG_FS + u64 t0, t1; + struct nvgpu_clk_arb_debug *debug; + +#endif + + clk_arb_dbg(g, " "); + + /* bail out if gpu is down */ + if (nvgpu_atomic64_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST)) { + goto exit_arb; + } + +#ifdef CONFIG_DEBUG_FS + g->ops.ptimer.read_ptimer(g, &t0); +#endif + + /* Only one arbiter should be running */ + gpc2clk_target = 0; + mclk_target = 0; + nvgpu_spinlock_acquire(&arb->sessions_lock); + nvgpu_list_for_each_entry(session, &arb->sessions, + nvgpu_clk_session, link) { + if (!session->zombie) { + mclk_set = false; + gpc2clk_set = false; + target = (session->target == &session->target_pool[0] ? + &session->target_pool[1] : + &session->target_pool[0]); + nvgpu_spinlock_acquire(&session->session_lock); + if (!nvgpu_list_empty(&session->targets)) { + /* Copy over state */ + target->mclk = session->target->mclk; + target->gpc2clk = session->target->gpc2clk; + /* Query the latest committed request */ + nvgpu_list_for_each_entry_safe(dev, tmp, + &session->targets, nvgpu_clk_dev, node) { + if ((mclk_set == false) && (dev->mclk_target_mhz != 0U)) { + target->mclk = + dev->mclk_target_mhz; + mclk_set = true; + } + if ((gpc2clk_set == false) && + (dev->gpc2clk_target_mhz != 0U)) { + target->gpc2clk = + dev->gpc2clk_target_mhz; + gpc2clk_set = true; + } + nvgpu_ref_get(&dev->refcount); + nvgpu_list_del(&dev->node); + nvgpu_spinlock_acquire( + &arb->requests_lock); + nvgpu_list_add( + &dev->node, &arb->requests); + nvgpu_spinlock_release(&arb->requests_lock); + } + session->target = target; + } + nvgpu_spinlock_release( + &session->session_lock); + + mclk_target = mclk_target > session->target->mclk ? + mclk_target : session->target->mclk; + + gpc2clk_target = + gpc2clk_target > session->target->gpc2clk ? + gpc2clk_target : session->target->gpc2clk; + } + } + nvgpu_spinlock_release(&arb->sessions_lock); + + gpc2clk_target = (gpc2clk_target > 0U) ? gpc2clk_target : + arb->gpc2clk_default_mhz; + + if (gpc2clk_target < arb->gpc2clk_min) { + gpc2clk_target = arb->gpc2clk_min; + } + + if (gpc2clk_target > arb->gpc2clk_max) { + gpc2clk_target = arb->gpc2clk_max; + } + + mclk_target = (mclk_target > 0U) ? mclk_target : + arb->mclk_default_mhz; + + if (mclk_target < arb->mclk_min) { + mclk_target = arb->mclk_min; + } + + if (mclk_target > arb->mclk_max) { + mclk_target = arb->mclk_max; + } + + sys2clk_target = 0; + xbar2clk_target = 0; + + gpc2clk_session_target = gpc2clk_target; + mclk_session_target = mclk_target; + + if (ver == NVGPU_GPUID_GV100) { + /* Query the table for the closest vf point to program */ + current_pstate = (u8)nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target, + &sys2clk_target, &xbar2clk_target, &mclk_target, &voltuv, + &voltuv_sram, &nuvmin, &nuvmin_sram); + + if ((gpc2clk_target < gpc2clk_session_target) || + (mclk_target < mclk_session_target)) { + nvgpu_clk_arb_set_global_alarm(g, + EVENT(ALARM_TARGET_VF_NOT_POSSIBLE)); + } + + if ((arb->actual->gpc2clk == gpc2clk_target) && + (arb->actual->mclk == mclk_target) && + (arb->voltuv_actual == voltuv)) { + goto exit_arb; + } + + /* Program clocks */ + /* A change in both mclk of gpc2clk may require a change in voltage */ + + nvgpu_mutex_acquire(&arb->pstate_lock); + + status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target, + sys2clk_target, xbar2clk_target, mclk_target, voltuv, + voltuv_sram); + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + /* make status visible */ + nvgpu_smp_mb(); + goto exit_arb; + } + } + else { + vf_point.gpc_mhz=gpc2clk_target; + (void)nvgpu_clk_arb_find_slave_points(arb, &vf_point); + if (status != 0) { + nvgpu_err(g, "Unable to get slave frequency"); + goto exit_arb; + } + + status = nvgpu_clk_set_req_fll_clk_ps35(g, &vf_point); + if (status != 0) { + nvgpu_err(g, "Unable to program frequency"); + goto exit_arb; + } + } + actual = NV_ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ? + &arb->actual_pool[1] : &arb->actual_pool[0]; + + /* do not reorder this pointer */ + nvgpu_smp_rmb(); + actual->gpc2clk = gpc2clk_target; + actual->mclk = mclk_target; + arb->voltuv_actual = voltuv; + actual->pstate = current_pstate; + arb->status = status; + + /* Make changes visible to other threads */ + nvgpu_smp_wmb(); + arb->actual = actual; + + /* status must be visible before atomic inc */ + nvgpu_smp_wmb(); + nvgpu_atomic_inc(&arb->req_nr); + + if (ver == NVGPU_GPUID_GV100) { + /* Unlock pstate change for PG */ + nvgpu_mutex_release(&arb->pstate_lock); + } + + /* VF Update complete */ + nvgpu_clk_arb_set_global_alarm(g, EVENT(VF_UPDATE)); + + (void)nvgpu_cond_signal_interruptible(&arb->request_wq); +#ifdef CONFIG_DEBUG_FS + g->ops.ptimer.read_ptimer(g, &t1); + + debug = arb->debug == &arb->debug_pool[0] ? + &arb->debug_pool[1] : &arb->debug_pool[0]; + + memcpy(debug, arb->debug, sizeof(arb->debug_pool[0])); + debug->switch_num++; + + if (debug->switch_num == 1) { + debug->switch_max = debug->switch_min = + debug->switch_avg = (t1-t0)/1000; + debug->switch_std = 0; + } else { + s64 prev_avg; + s64 curr = (t1-t0)/1000; + + debug->switch_max = curr > debug->switch_max ? + curr : debug->switch_max; + debug->switch_min = debug->switch_min ? + (curr < debug->switch_min ? + curr : debug->switch_min) : curr; + prev_avg = debug->switch_avg; + debug->switch_avg = (curr + + (debug->switch_avg * (debug->switch_num-1))) / + debug->switch_num; + debug->switch_std += + (curr - debug->switch_avg) * (curr - prev_avg); + } + /* commit changes before exchanging debug pointer */ + nvgpu_smp_wmb(); + arb->debug = debug; +#endif + +exit_arb: + if (status < 0) { + nvgpu_err(g, "Error in arbiter update"); + nvgpu_clk_arb_set_global_alarm(g, + EVENT(ALARM_CLOCK_ARBITER_FAILED)); + } + + current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask); + /* notify completion for all requests */ + nvgpu_spinlock_acquire(&arb->requests_lock); + nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests, + nvgpu_clk_dev, node) { + nvgpu_atomic_set(&dev->poll_mask, + NVGPU_POLLIN | NVGPU_POLLRDNORM); + nvgpu_clk_arb_event_post_event(dev); + nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); + nvgpu_list_del(&dev->node); + } + nvgpu_spinlock_release(&arb->requests_lock); + + nvgpu_atomic_set(&arb->notification_queue.head, + nvgpu_atomic_read(&arb->notification_queue.tail)); + /* notify event for all users */ + nvgpu_spinlock_acquire(&arb->users_lock); + nvgpu_list_for_each_entry(dev, &arb->users, nvgpu_clk_dev, link) { + alarms_notified |= + nvgpu_clk_arb_notify(dev, arb->actual, current_alarm); + } + nvgpu_spinlock_release(&arb->users_lock); + + /* clear alarms */ + nvgpu_clk_arb_clear_global_alarm(g, alarms_notified & + ~EVENT(ALARM_GPU_LOST)); +} + +void gv100_clk_arb_cleanup(struct nvgpu_clk_arb *arb) +{ + struct gk20a *g = arb->g; + int index; + + nvgpu_kfree(g, arb->gpc2clk_f_points); + nvgpu_kfree(g, arb->mclk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, + arb->vf_table_pool[index].gpc2clk_points); + nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); + } + + nvgpu_mutex_destroy(&g->clk_arb->pstate_lock); + nvgpu_kfree(g, g->clk_arb); + + g->clk_arb = NULL; +} diff --git a/drivers/gpu/nvgpu/gv100/clk_arb_gv100.h b/drivers/gpu/nvgpu/gv100/clk_arb_gv100.h new file mode 100644 index 000000000..e9dd415e9 --- /dev/null +++ b/drivers/gpu/nvgpu/gv100/clk_arb_gv100.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef CLK_ARB_GV100_H +#define CLK_ARB_GV100_H + +struct nvgpu_clk_session; +struct nvgpu_clk_arb; + + +u32 gv100_get_arbiter_clk_domains(struct gk20a *g); +int gv100_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz); +int gv100_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz); +int gv100_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, + u16 *default_mhz); +int gv100_init_clk_arbiter(struct gk20a *g); +void gv100_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb); +void gv100_clk_arb_cleanup(struct nvgpu_clk_arb *arb); + +#endif /* CLK_ARB_GV100_H */ diff --git a/drivers/gpu/nvgpu/gv100/clk_gv100.c b/drivers/gpu/nvgpu/gv100/clk_gv100.c index 33778b5a0..cdf59c4f9 100644 --- a/drivers/gpu/nvgpu/gv100/clk_gv100.c +++ b/drivers/gpu/nvgpu/gv100/clk_gv100.c @@ -83,7 +83,7 @@ unsigned long gv100_clk_measure_freq(struct gk20a *g, u32 api_domain) } /* Convert to HZ */ - return freq_khz * 1000UL; + return (freq_khz * 1000UL); } int gv100_init_clk_support(struct gk20a *g) @@ -173,6 +173,7 @@ u32 gv100_get_rate_cntr(struct gk20a *g, struct namemap_cfg *c) { u32 cntr = 0; u64 cntr_start = 0; u64 cntr_stop = 0; + u64 start_time, stop_time; struct clk_gk20a *clk = &g->clk; @@ -188,17 +189,48 @@ u32 gv100_get_rate_cntr(struct gk20a *g, struct namemap_cfg *c) { /* Counter is 36bits , 32 bits on addr[0] and 4 lsb on addr[1] others zero*/ cntr_start = (u64)gk20a_readl(g, c->cntr.reg_cntr_addr[0]); cntr_start += ((u64)gk20a_readl(g, c->cntr.reg_cntr_addr[1]) << 32); + start_time = (u64)nvgpu_current_time_ms(); nvgpu_udelay(XTAL_CNTR_DELAY); - cntr_stop = (u64) gk20a_readl(g, c->cntr.reg_cntr_addr[0]); + stop_time = (u64)nvgpu_current_time_ms(); + cntr_stop = (u64)gk20a_readl(g, c->cntr.reg_cntr_addr[0]); cntr_stop += ((u64)gk20a_readl(g, c->cntr.reg_cntr_addr[1]) << 32); - /*Calculate the difference and convert to KHz*/ - cntr = (u32)((cntr_stop - cntr_start) / 10ULL); + /*Calculate the difference with Acutal time and convert to KHz*/ + cntr = ((u32)(cntr_stop - cntr_start) / (u32)(stop_time-start_time)); nvgpu_mutex_release(&clk->clk_mutex); return cntr; } +int gv100_clk_domain_get_f_points( + struct gk20a *g, + u32 clkapidomain, + u32 *pfpointscount, + u16 *pfreqpointsinmhz) +{ + int status = -EINVAL; + struct clk_domain *pdomain; + u8 i; + struct clk_pmupstate *pclk = g->clk_pmu; + if (pfpointscount == NULL) { + return -EINVAL; + } + + if ((pfreqpointsinmhz == NULL) && (*pfpointscount != 0U)) { + return -EINVAL; + } + BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super), + struct clk_domain *, pdomain, i) { + if (pdomain->api_domain == clkapidomain) { + status = pdomain->clkdomainclkgetfpoints(g, pclk, + pdomain, pfpointscount, + pfreqpointsinmhz, + CLK_PROG_VFE_ENTRY_LOGIC); + return status; + } + } + return status; +} int gv100_suspend_clk_support(struct gk20a *g) { nvgpu_mutex_destroy(&g->clk.clk_mutex); diff --git a/drivers/gpu/nvgpu/gv100/clk_gv100.h b/drivers/gpu/nvgpu/gv100/clk_gv100.h index fd3229ab6..5455464f3 100644 --- a/drivers/gpu/nvgpu/gv100/clk_gv100.h +++ b/drivers/gpu/nvgpu/gv100/clk_gv100.h @@ -30,5 +30,9 @@ int gv100_init_clk_support(struct gk20a *g); u32 gv100_crystal_clk_hz(struct gk20a *g); unsigned long gv100_clk_measure_freq(struct gk20a *g, u32 api_domain); int gv100_suspend_clk_support(struct gk20a *g); - +int gv100_clk_domain_get_f_points( + struct gk20a *g, + u32 clkapidomain, + u32 *pfpointscount, + u16 *pfreqpointsinmhz); #endif /* CLK_GV100_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h index 7ccafe204..f1ece0f9b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h +++ b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h @@ -138,6 +138,8 @@ struct nvgpu_clk_vf_point { u16 gpc_mhz; u16 sys_mhz; u16 xbar_mhz; + u16 host_mhz; + u16 nvd_mhz; }; u16 mem_mhz; }; diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c index 51aadce7e..d8af9a768 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c @@ -480,7 +480,7 @@ int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, break; case NVGPU_CLK_DOMAIN_GPCCLK: - dev->gpc2clk_target_mhz = target_mhz * 2ULL; + dev->gpc2clk_target_mhz = target_mhz; break; default: @@ -497,7 +497,7 @@ u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); u32 api_domains = 0; - if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) + if (clk_domains & CTRL_CLK_DOMAIN_GPCCLK) api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK); if (clk_domains & CTRL_CLK_DOMAIN_MCLK) diff --git a/drivers/gpu/nvgpu/pmu_perf/perf_tu104.c b/drivers/gpu/nvgpu/pmu_perf/perf_tu104.c index 133d353f8..c5c4a0dd0 100644 --- a/drivers/gpu/nvgpu/pmu_perf/perf_tu104.c +++ b/drivers/gpu/nvgpu/pmu_perf/perf_tu104.c @@ -25,7 +25,7 @@ #include #include #include - +#include #include "perf_tu104.h" #include "pmu_perf/pmu_perf.h" @@ -68,6 +68,7 @@ static int tu104_pmu_handle_perf_event(struct gk20a *g, void *pmumsg) case NV_PMU_PERF_MSG_ID_VFE_CALLBACK: perf_pmu->vfe_init.state_change = true; (void) nvgpu_cond_signal(&perf_pmu->vfe_init.wq); + nvgpu_clk_arb_schedule_vf_table_update(g); break; case NV_PMU_PERF_MSG_ID_CHANGE_SEQ_COMPLETION: nvgpu_log_fn(g, "Change Seq Completed"); diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 8ee425075..cfb8b2ebb 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -114,6 +114,7 @@ #include "gv100/gr_gv100.h" #include "gv100/mm_gv100.h" #include "gv100/regops_gv100.h" +#include "gv100/clk_arb_gv100.h" #include "pmu_perf/perf_tu104.h" #include "tu104/fifo_tu104.h" @@ -942,12 +943,17 @@ static const struct gpu_ops tu104_ops = { .measure_freq = gv100_clk_measure_freq, .suspend_clk_support = gv100_suspend_clk_support, .perf_pmu_vfe_load = tu104_perf_pmu_vfe_load, + .clk_domain_get_f_points = gv100_clk_domain_get_f_points, }, .clk_arb = { - .get_arbiter_clk_domains = NULL, - .get_arbiter_clk_range = NULL, - .get_arbiter_clk_default = NULL, - .get_current_pstate = NULL, + .get_arbiter_clk_domains = gv100_get_arbiter_clk_domains, + .get_arbiter_f_points = gv100_get_arbiter_f_points, + .get_arbiter_clk_range = gv100_get_arbiter_clk_range, + .get_arbiter_clk_default = gv100_get_arbiter_clk_default, + .get_current_pstate = nvgpu_clk_arb_get_current_pstate, + .arbiter_clk_init = gv100_init_clk_arbiter, + .clk_arb_run_arbiter_cb = gv100_clk_arb_run_arbiter_cb, + .clk_arb_cleanup = gv100_clk_arb_cleanup, }, .regops = { .exec_regops = exec_regops_gk20a, @@ -1189,6 +1195,9 @@ int tu104_init_hal(struct gk20a *g) gops->clk.get_crystal_clk_hz = tu104_ops.clk.get_crystal_clk_hz; gops->clk.measure_freq = tu104_ops.clk.measure_freq; gops->clk.suspend_clk_support = tu104_ops.clk.suspend_clk_support; + gops->clk_arb = tu104_ops.clk_arb; + gops->clk.clk_domain_get_f_points = tu104_ops.clk.clk_domain_get_f_points; + gops->clk = tu104_ops.clk; /* Lone functions */ gops->chip_init_gpu_characteristics = @@ -1204,7 +1213,7 @@ int tu104_init_hal(struct gk20a *g) __nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_RTOS, true); /* for now */ - gops->clk.support_clk_freq_controller = false; + gops->clk.support_clk_freq_controller = true; gops->clk.support_pmgr_domain = false; gops->clk.support_lpwr_pg = false; gops->clk.support_clk_freq_domain = true; diff --git a/drivers/gpu/nvgpu/volt/volt_pmu.c b/drivers/gpu/nvgpu/volt/volt_pmu.c index 2550854ef..444feb87d 100644 --- a/drivers/gpu/nvgpu/volt/volt_pmu.c +++ b/drivers/gpu/nvgpu/volt/volt_pmu.c @@ -375,11 +375,9 @@ int volt_set_noiseaware_vmin(struct gk20a *g, u32 logic_voltage_uv, int status = 0; struct ctrl_volt_volt_rail_list rail_list = { 0 }; - rail_list.num_rails = RAIL_COUNT_GP; + rail_list.num_rails = RAIL_COUNT_GV; rail_list.rails[0].rail_idx = 0; rail_list.rails[0].voltage_uv = logic_voltage_uv; - rail_list.rails[1].rail_idx = 1; - rail_list.rails[1].voltage_uv = sram_voltage_uv; status = volt_policy_set_noiseaware_vmin(g, &rail_list);