diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 2a60a6f63..ee54f9d4c 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -374,6 +374,7 @@ nvgpu-y += \ pmu_perf/vfe_equ.o \ pmu_perf/pmu_perf.o \ pmu_perf/perf_gv100.o \ + pmu_perf/change_seq.o \ clk/clk.o \ gp106/clk_gp106.o \ gp106/clk_arb_gp106.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 5e1f4de6f..4c97e0839 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -173,6 +173,7 @@ srcs := os/posix/nvgpu.c \ pmu_perf/vfe_equ.c \ pmu_perf/vfe_var.c \ pmu_perf/perf_gv100.c \ + pmu_perf/change_seq.c \ pmgr/pmgr.c \ pmgr/pmgrpmu.c \ pmgr/pwrdev.c \ diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 682242b5c..d03d4a385 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -913,6 +913,7 @@ int gp106_init_hal(struct gk20a *g) gops->clk.support_pmgr_domain = true; gops->clk.support_lpwr_pg = true; gops->clk.lut_num_entries = CTRL_CLK_LUT_NUM_ENTRIES_GP10x; + gops->pmu_perf.support_changeseq = false; g->name = "gp10x"; diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 85be10b8f..8353a9663 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -1059,6 +1059,7 @@ int gv100_init_hal(struct gk20a *g) gops->clk.support_pmgr_domain = false; gops->clk.support_lpwr_pg = false; gops->clk.lut_num_entries = CTRL_CLK_LUT_NUM_ENTRIES_GV10x; + gops->pmu_perf.support_changeseq = false; g->name = "gv10x"; diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index a62792e3c..853c497db 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1155,6 +1155,7 @@ struct gpu_ops { } clk_arb; struct { int (*handle_pmu_perf_event)(struct gk20a *g, void *pmu_msg); + bool support_changeseq; } pmu_perf; struct { int (*exec_regops)(struct dbg_session_gk20a *dbg_s, diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmuif/ctrlclk.h b/drivers/gpu/nvgpu/include/nvgpu/pmuif/ctrlclk.h index f15dbdc7b..74991e6d6 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmuif/ctrlclk.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmuif/ctrlclk.h @@ -85,9 +85,16 @@ #define CTRL_CLK_CLK_VF_POINT_IDX_INVALID 255U -#define CTRL_CLK_CLK_VF_POINT_TYPE_FREQ 0x01U -#define CTRL_CLK_CLK_VF_POINT_TYPE_VOLT 0x02U -#define CTRL_CLK_CLK_VF_POINT_TYPE_UNKNOWN 255U +#define CTRL_CLK_CLK_VF_POINT_TYPE_FREQ 0x01U +#define CTRL_CLK_CLK_VF_POINT_TYPE_VOLT 0x02U +#define CTRL_CLK_CLK_VF_POINT_TYPE_35 0x03U +#define CTRL_CLK_CLK_VF_POINT_TYPE_35_FREQ 0x04U +#define CTRL_CLK_CLK_VF_POINT_TYPE_35_VOLT 0x05U +#define CTRL_CLK_CLK_VF_POINT_TYPE_35_VOLT_SEC 0x06U +#define CTRL_CLK_CLK_VF_POINT_TYPE_UNKNOWN 255U + +#define CTRL_CLK_CLK_DOMAIN_CLIENT_MAX_DOMAINS 16 + struct ctrl_clk_clk_prog_1x_master_source_fll { u32 base_vfsmooth_volt_uv; diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmuif/ctrlperf.h b/drivers/gpu/nvgpu/include/nvgpu/pmuif/ctrlperf.h index 6e30c3dfd..04e017684 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmuif/ctrlperf.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmuif/ctrlperf.h @@ -24,6 +24,9 @@ #ifndef NVGPU_CTRLPERF_H #define NVGPU_CTRLPERF_H +#include "ctrlvolt.h" +#include "ctrlclk.h" + struct ctrl_perf_volt_rail_list_item { u8 volt_domain; u32 voltage_uv; @@ -100,4 +103,174 @@ struct ctrl_perf_vfe_var_single_sensed_fuse_ver_vfield_info { bool b_use_default_on_ver_check_fail; u8 v_field_id_ver; }; + +/*----------------------------- CHANGES_SEQ --------------------------------*/ + +/*! + * Enumeration of the PERF CHANGE_SEQ feature version. + * + * _2X - Legacy implementation of CHANGE_SEQ used in pstates 3.0 and earlier. + * _PMU - Represents PMU based perf change sequence class and its sub-classes. + * _31 - CHANGE_SEQ implementation used with pstates 3.1 and later. + * _35 - CHANGE_SEQ implementation used with pstates 3.5 and later. + */ +#define CTRL_PERF_CHANGE_SEQ_VERSION_UNKNOWN 0xFF +#define CTRL_PERF_CHANGE_SEQ_VERSION_2X 0x01 +#define CTRL_PERF_CHANGE_SEQ_VERSION_PMU 0x02 +#define CTRL_PERF_CHANGE_SEQ_VERSION_31 0x03 +#define CTRL_PERF_CHANGE_SEQ_VERSION_35 0x04 + +/*! + * Flags to provide information about the input perf change request. + * This flags will be used to understand the type of perf change req. + */ +#define CTRL_PERF_CHANGE_SEQ_CHANGE_NONE 0x00 +#define CTRL_PERF_CHANGE_SEQ_CHANGE_FORCE BIT(0) +#define CTRL_PERF_CHANGE_SEQ_CHANGE_FORCE_CLOCKS BIT(1) +#define CTRL_PERF_CHANGE_SEQ_CHANGE_ASYNC BIT(2) +#define CTRL_PERF_CHANGE_SEQ_CHANGE_SKIP_VBLANK_WAIT BIT(3) + +#define CTRL_PERF_CHANGE_SEQ_SYNC_CHANGE_QUEUE_SIZE 0x04 +#define CTRL_PERF_CHANGE_SEQ_SCRIPT_MAX_PROFILING_THREADS 8 + +enum ctrl_perf_change_seq_sync_change_client { + CTRL_PERF_CHANGE_SEQ_SYNC_CHANGE_CLIENT_INVALID = 0, + CTRL_PERF_CHANGE_SEQ_SYNC_CHANGE_CLIENT_RM_NVGPU = 1, + CTRL_PERF_CHANGE_SEQ_SYNC_CHANGE_CLIENT_PMU = 2, +}; + +struct ctrl_perf_chage_seq_change_pmu { + u32 seq_id; +}; + +struct ctrl_perf_change_seq_change { + struct ctrl_clk_clk_domain_list clk_list; + struct ctrl_volt_volt_rail_list_v1 volt_list; + u32 pstate_index; + u32 flags; + u32 vf_points_cache_counter; + u8 version; + struct ctrl_perf_chage_seq_change_pmu data; +}; + +struct ctrl_perf_chage_seq_input_clk { + u32 clk_freq_khz; +}; + +struct ctrl_perf_chage_seq_input_volt { + u32 voltage_uv; + u32 voltage_min_noise_unaware_uv; +}; + +struct ctrl_perf_change_seq_change_input { + u32 pstate_index; + u32 flags; + u32 vf_points_cache_counter; + struct ctrl_boardobjgrp_mask_e32 clk_domains_mask; + struct ctrl_perf_chage_seq_input_clk clk[CTRL_CLK_CLK_DOMAIN_CLIENT_MAX_DOMAINS]; + struct ctrl_boardobjgrp_mask_e32 volt_rails_mask; + struct ctrl_perf_chage_seq_input_volt volt[CTRL_VOLT_VOLT_RAIL_CLIENT_MAX_RAILS]; +}; + +struct u64_align32 { + u32 lo; + u32 hi; +}; +struct ctrl_perf_change_seq_script_profiling_thread { + u32 step_mask; + struct u64_align32 timens; +}; + +struct ctrl_perf_change_seq_script_profiling { + struct u64_align32 total_timens; /*align 32 */ + struct u64_align32 total_build_timens; + struct u64_align32 total_execution_timens; + u8 num_threads; /*number of threads required to process this script*/ + struct ctrl_perf_change_seq_script_profiling_thread + nvgpu_threads[CTRL_PERF_CHANGE_SEQ_SCRIPT_MAX_PROFILING_THREADS]; +}; + +struct ctrl_perf_change_seq_pmu_script_header { + bool b_increase; + u8 num_steps; + u8 cur_step_index; + struct ctrl_perf_change_seq_script_profiling profiling; +}; + +enum ctrl_perf_change_seq_pmu_step_id { + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_NONE, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_PRE_CHANGE_RM, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_PRE_CHANGE_PMU, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_POST_CHANGE_RM, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_POST_CHANGE_PMU, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_PRE_PSTATE_RM, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_PRE_PSTATE_PMU, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_POST_PSTATE_RM, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_POST_PSTATE_PMU, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_VOLT, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_LPWR, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_BIF, + CTRL_PERF_CHANGE_SEQ_31_STEP_ID_NOISE_UNAWARE_CLKS, + CTRL_PERF_CHANGE_SEQ_31_STEP_ID_NOISE_AWARE_CLKS, + CTRL_PERF_CHANGE_SEQ_35_STEP_ID_PRE_VOLT_CLKS, + CTRL_PERF_CHANGE_SEQ_35_STEP_ID_POST_VOLT_CLKS, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_MAX_STEPS, +}; + +struct ctrl_perf_change_seq_step_profiling { + /*all aligned to 32 */ + u64 total_timens; + u64 nv_thread_timens; + u64 pmu_thread_timens; +}; + +struct ctrl_perf_change_seq_pmu_script_step_super { + enum ctrl_perf_change_seq_pmu_step_id step_id; + struct ctrl_perf_change_seq_step_profiling profiling; +}; + +struct ctrl_perf_change_seq_pmu_script_step_change { + struct ctrl_perf_change_seq_pmu_script_step_super super; + u32 pstate_index; +}; + +struct ctrl_perf_change_seq_pmu_script_step_pstate { + struct ctrl_perf_change_seq_pmu_script_step_super super; + u32 pstate_index; +}; + +struct ctrl_perf_change_seq_pmu_script_step_lpwr { + struct ctrl_perf_change_seq_pmu_script_step_super super; + u32 pstate_index; +}; + +struct ctrl_perf_change_seq_pmu_script_step_bif { + struct ctrl_perf_change_seq_pmu_script_step_super super; + u32 pstate_index; + u8 pcie_idx; + u8 nvlink_idx; +}; + +struct ctrl_perf_change_seq_pmu_script_step_clks { + struct ctrl_perf_change_seq_pmu_script_step_super super; + struct ctrl_volt_volt_rail_list_v1 volt_list; + struct ctrl_clk_clk_domain_list clk_list; +}; + +struct ctrl_perf_change_seq_pmu_script_step_volt { + struct ctrl_perf_change_seq_pmu_script_step_super super; + struct ctrl_volt_volt_rail_list_v1 volt_list; +}; + +union ctrl_perf_change_seq_pmu_script_step_data { + struct ctrl_perf_change_seq_pmu_script_step_super super; + struct ctrl_perf_change_seq_pmu_script_step_change change; + struct ctrl_perf_change_seq_pmu_script_step_pstate pstate; + struct ctrl_perf_change_seq_pmu_script_step_lpwr lpwr; + struct ctrl_perf_change_seq_pmu_script_step_bif bif; + struct ctrl_perf_change_seq_pmu_script_step_clks clk; + struct ctrl_perf_change_seq_pmu_script_step_volt volt; +}; + #endif /* NVGPU_CTRLPERF_H */ + diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmuif/ctrlvolt.h b/drivers/gpu/nvgpu/include/nvgpu/pmuif/ctrlvolt.h index 633bff1ad..d51c4f7fd 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmuif/ctrlvolt.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmuif/ctrlvolt.h @@ -27,9 +27,9 @@ #define CTRL_VOLT_VOLT_RAIL_MAX_RAILS \ CTRL_BOARDOBJGRP_E32_MAX_OBJECTS -#include "ctrlperf.h" #include "ctrlboardobj.h" +#define CTRL_VOLT_VOLT_RAIL_CLIENT_MAX_RAILS 0x04U #define CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES 0x04U #define CTRL_VOLT_VOLT_DEV_VID_VSEL_MAX_ENTRIES (8U) #define CTRL_VOLT_DOMAIN_INVALID 0x00U diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmu_super_surf_if.h b/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmu_super_surf_if.h index b0f9e1012..4c48144fa 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmu_super_surf_if.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmu_super_surf_if.h @@ -72,6 +72,11 @@ struct nv_pmu_super_surface { struct nv_pmu_therm_therm_device_boardobj_grp_set therm_device_grp_set; u8 therm_rsvd[0x1460]; } therm; + struct { + struct perf_change_seq_pmu_script script_curr; + struct perf_change_seq_pmu_script script_last; + struct perf_change_seq_pmu_script script_query; + } change_seq; }; #endif /* NVGPU_PMUIF_GPMU_SUPER_SURF_IF_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuifperf.h b/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuifperf.h index 70b93e12b..952934f64 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuifperf.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmuif/gpmuifperf.h @@ -151,4 +151,67 @@ struct nv_pmu_perf_msg { }; }; +struct nv_pmu_rpc_perf_change_seq_queue_change { + /*[IN/OUT] Must be first field in RPC structure */ + struct nv_pmu_rpc_header hdr; + struct ctrl_perf_change_seq_change_input change; + u32 seq_id; + u32 scratch[1]; +}; + +struct nv_pmu_perf_change_seq_super_info_get { + u8 version; +}; + +struct nv_pmu_perf_change_seq_pmu_info_get { + struct nv_pmu_perf_change_seq_super_info_get super; + u32 cpu_advertised_step_id_mask; +}; + +struct nv_pmu_perf_change_seq_super_info_set { + u8 version; + struct ctrl_boardobjgrp_mask_e32 clk_domains_exclusion_mask; + struct ctrl_boardobjgrp_mask_e32 clk_domains_inclusion_mask; +}; + +struct nv_pmu_perf_change_seq_pmu_info_set { + struct nv_pmu_perf_change_seq_super_info_set super; + bool b_lock; + bool b_vf_point_check_ignore; + u32 cpu_step_id_mask; +}; + +struct nv_pmu_rpc_perf_change_seq_info_get { + /*[IN/OUT] Must be first field in RPC structure */ + struct nv_pmu_rpc_header hdr; + struct nv_pmu_perf_change_seq_pmu_info_get info_get; + u32 scratch[1]; + +}; + +struct nv_pmu_rpc_perf_change_seq_info_set { + /*[IN/OUT] Must be first field in RPC structure */ + struct nv_pmu_rpc_header hdr; + struct nv_pmu_perf_change_seq_pmu_info_set info_set; + u32 scratch[1]; + +}; + +NV_PMU_MAKE_ALIGNED_STRUCT(ctrl_perf_change_seq_change, + sizeof(struct ctrl_perf_change_seq_change)); + +NV_PMU_MAKE_ALIGNED_STRUCT(ctrl_perf_change_seq_pmu_script_header, + sizeof(struct ctrl_perf_change_seq_pmu_script_header)); + +NV_PMU_MAKE_ALIGNED_UNION(ctrl_perf_change_seq_pmu_script_step_data, + sizeof(union ctrl_perf_change_seq_pmu_script_step_data)); + +struct perf_change_seq_pmu_script { + union ctrl_perf_change_seq_pmu_script_header_aligned hdr; + union ctrl_perf_change_seq_change_aligned change; + /* below should be an aligned structure */ + union ctrl_perf_change_seq_pmu_script_step_data_aligned + steps[CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_MAX_STEPS]; +}; + #endif /* NVGPU_PMUIF_GPMUIFPERF_H*/ diff --git a/drivers/gpu/nvgpu/pmu_perf/change_seq.c b/drivers/gpu/nvgpu/pmu_perf/change_seq.c new file mode 100644 index 000000000..6ebf03e22 --- /dev/null +++ b/drivers/gpu/nvgpu/pmu_perf/change_seq.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "clk/clk_domain.h" +#include "pstate/pstate.h" +#include "pmu_perf.h" + +#include "change_seq.h" + +static int perf_change_seq_sw_setup_super(struct gk20a *g, + struct change_seq *p_change_seq) +{ + int status = 0; + + nvgpu_log_fn(g, " "); + + /* Initialize parameters */ + p_change_seq->client_lock_mask = 0; + + p_change_seq->version = CTRL_PERF_CHANGE_SEQ_VERSION_35; + + status = boardobjgrpmask_init( + &p_change_seq->clk_domains_exclusion_mask.super, + 32U, ((void*)0)); + if (status != 0) { + nvgpu_err(g, "clk_domains_exclusion_mask failed to init %d", + status); + goto perf_change_seq_sw_setup_super_exit; + } + + status = boardobjgrpmask_init( + &p_change_seq->clk_domains_inclusion_mask.super, + 32U, ((void*)0)); + if (status != 0) { + nvgpu_err(g, "clk_domains_inclusion_mask failed to init %d", + status); + goto perf_change_seq_sw_setup_super_exit; + } + +perf_change_seq_sw_setup_super_exit: + return status; +} + +int nvgpu_perf_change_seq_sw_setup(struct gk20a *g) +{ + struct change_seq_pmu *perf_change_seq_pmu = + &(g->perf_pmu->changeseq_pmu); + int status = 0; + + nvgpu_log_fn(g, " "); + + (void) memset(perf_change_seq_pmu, 0, + sizeof(struct change_seq_pmu)); + + status = perf_change_seq_sw_setup_super(g, &perf_change_seq_pmu->super); + if (status != 0) { + goto exit; + } + + perf_change_seq_pmu->super.b_enabled_pmu_support = true; + /*exclude MCLK, may not be needed as MCLK is already fixed */ + perf_change_seq_pmu->super.clk_domains_exclusion_mask.super.data[0] + = 0x04U; + perf_change_seq_pmu->b_vf_point_check_ignore = true; + perf_change_seq_pmu->b_lock = false; + perf_change_seq_pmu->cpu_step_id_mask = 0; + perf_change_seq_pmu->cpu_adverised_step_id_mask = 0; + +exit: + return status; +} + +static void build_change_seq_boot (struct gk20a *g) +{ + struct nvgpu_pmu *pmu = &g->pmu; + struct change_seq_pmu *perf_change_seq_pmu = + &(g->perf_pmu->changeseq_pmu); + struct clk_domain *pdomain; + struct clk_set_info *p0_info; + struct change_seq_pmu_script *script_last = + &perf_change_seq_pmu->script_last; + u8 i = 0; + + nvgpu_log_fn(g, " "); + + script_last->super_surface_offset = + (u32) offsetof(struct nv_pmu_super_surface, + change_seq.script_last); + + nvgpu_mem_rd_n(g, &pmu->super_surface_buf, + script_last->super_surface_offset, + &script_last->buf, + (u32) sizeof(struct perf_change_seq_pmu_script)); + + script_last->buf.change.data.flags = CTRL_PERF_CHANGE_SEQ_CHANGE_NONE; + + BOARDOBJGRP_FOR_EACH(&(g->clk_pmu->clk_domainobjs.super.super), + struct clk_domain *, pdomain, i) { + + p0_info = pstate_get_clk_set_info(g, CTRL_PERF_PSTATE_P0, + pdomain->domain); + + script_last->buf.change.data.clk_list.clk_domains[i].clk_domain = + pdomain->api_domain; + + script_last->buf.change.data.clk_list.clk_domains[i].clk_freq_khz = + p0_info->nominal_mhz * 1000U; + + /* VBIOS always boots with FFR*/ + script_last->buf.change.data.clk_list.clk_domains[i].regime_id = + CTRL_CLK_FLL_REGIME_ID_FFR; + + script_last->buf.change.data.clk_list.num_domains++; + + nvgpu_pmu_dbg(g, "Domain %x, Nom Freq = %d Max Freq =%d, regime %d", + pdomain->api_domain,p0_info->nominal_mhz, p0_info->max_mhz, + CTRL_CLK_FLL_REGIME_ID_FFR); + } + + nvgpu_pmu_dbg(g,"Total domains = %d\n", + script_last->buf.change.data.clk_list.num_domains); + + /* Assume everything is P0 - Need to find the index for P0 */ + script_last->buf.change.data.pstate_index = 0; + + nvgpu_mem_wr_n(g, &pmu->super_surface_buf, + script_last->super_surface_offset, + &script_last->buf, + (u32) sizeof(struct perf_change_seq_pmu_script)); + + return; +} + +static int perf_pmu_load(struct gk20a *g) +{ + int status = 0; + struct nv_pmu_rpc_struct_perf_load rpc; + struct nvgpu_pmu *pmu = &g->pmu; + + (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perf_load)); + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, LOAD, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC status=0x%x", + status); + } + return status; +} + +int nvgpu_perf_change_seq_pmu_setup(struct gk20a *g) +{ + struct nv_pmu_rpc_perf_change_seq_info_get info_get; + struct nv_pmu_rpc_perf_change_seq_info_set info_set; + struct nvgpu_pmu *pmu = &g->pmu; + struct change_seq_pmu *perf_change_seq_pmu = + &(g->perf_pmu->changeseq_pmu); + int status; + + /* Do this till we enable performance table */ + build_change_seq_boot(g); + + (void) memset(&info_get, 0, + sizeof(struct nv_pmu_rpc_perf_change_seq_info_get)); + (void) memset(&info_set, 0, + sizeof(struct nv_pmu_rpc_perf_change_seq_info_set)); + + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, CHANGE_SEQ_INFO_GET, &info_get, 0); + if (status != 0) { + nvgpu_err(g, + "Failed to execute Change Seq GET RPC status=0x%x", + status); + goto perf_change_seq_pmu_setup_exit; + } + + info_set.info_set.super.version = perf_change_seq_pmu->super.version; + + status = boardobjgrpmask_export( + &perf_change_seq_pmu->super.clk_domains_exclusion_mask.super, + perf_change_seq_pmu->super.clk_domains_exclusion_mask.super.bitcount, + &info_set.info_set.super.clk_domains_exclusion_mask.super); + if ( status != 0 ) { + nvgpu_err(g, "Could not export clkdomains exclusion mask"); + goto perf_change_seq_pmu_setup_exit; + } + + status = boardobjgrpmask_export( + &perf_change_seq_pmu->super.clk_domains_inclusion_mask.super, + perf_change_seq_pmu->super.clk_domains_inclusion_mask.super.bitcount, + &info_set.info_set.super.clk_domains_inclusion_mask.super); + if ( status != 0 ) { + nvgpu_err(g, "Could not export clkdomains inclusion mask"); + goto perf_change_seq_pmu_setup_exit; + } + + info_set.info_set.b_vf_point_check_ignore = + perf_change_seq_pmu->b_vf_point_check_ignore; + info_set.info_set.cpu_step_id_mask = + perf_change_seq_pmu->cpu_step_id_mask; + info_set.info_set.b_lock = + perf_change_seq_pmu->b_lock; + + perf_change_seq_pmu->script_last.super_surface_offset = + (u32) offsetof(struct nv_pmu_super_surface, + change_seq.script_last); + + nvgpu_mem_rd_n(g, &pmu->super_surface_buf, + perf_change_seq_pmu->script_last.super_surface_offset, + &perf_change_seq_pmu->script_last.buf, + (u32) sizeof(struct perf_change_seq_pmu_script)); + + /* Assume everything is P0 - Need to find the index for P0 */ + perf_change_seq_pmu->script_last.buf.change.data.pstate_index = 0; + + nvgpu_mem_wr_n(g, &pmu->super_surface_buf, + perf_change_seq_pmu->script_last.super_surface_offset, + &perf_change_seq_pmu->script_last.buf, + (u32) sizeof(struct perf_change_seq_pmu_script)); + + /* Continue with PMU setup, assume FB map is done */ + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, CHANGE_SEQ_INFO_SET, &info_set, 0); + if (status != 0) { + nvgpu_err(g, + "Failed to execute Change Seq SET RPC status=0x%x", + status); + goto perf_change_seq_pmu_setup_exit; + } + + /* Perf Load*/ + status = perf_pmu_load(g); + if (status != 0) { + nvgpu_err(g, "Failed to Load Perf"); + } + +perf_change_seq_pmu_setup_exit: + return status; +} diff --git a/drivers/gpu/nvgpu/pmu_perf/change_seq.h b/drivers/gpu/nvgpu/pmu_perf/change_seq.h new file mode 100644 index 000000000..b82f0ef57 --- /dev/null +++ b/drivers/gpu/nvgpu/pmu_perf/change_seq.h @@ -0,0 +1,63 @@ +/* + * general clock structures & definitions + * + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CHANGE_SEQ_H +#define NVGPU_CHANGE_SEQ_H + +#include + +struct change_seq_pmu_script { + struct perf_change_seq_pmu_script buf; + u32 super_surface_offset; +}; + +struct change_seq { + u8 version; + bool b_enabled_pmu_support; + u32 thread_seq_id_last; + u64 thread_carry_over_timens; + struct ctrl_perf_change_seq_change last_pstate_values; + struct boardobjgrpmask_e32 clk_domains_exclusion_mask; + struct boardobjgrpmask_e32 clk_domains_inclusion_mask; + u32 client_lock_mask; +}; + +struct change_seq_pmu { + struct change_seq super; + bool b_lock; + bool b_vf_point_check_ignore; + u32 cpu_adverised_step_id_mask; + u32 cpu_step_id_mask; + u32 event_mask_pending; + u32 event_mask_received; + u32 last_completed_change_Seq_id; + struct change_seq_pmu_script script_curr; + struct change_seq_pmu_script script_last; + struct change_seq_pmu_script script_query; +}; + +int nvgpu_perf_change_seq_sw_setup(struct gk20a *g); +int nvgpu_perf_change_seq_pmu_setup(struct gk20a *g); + +#endif /* NVGPU_CHANGE_SEQ_H */ diff --git a/drivers/gpu/nvgpu/pmu_perf/pmu_perf.h b/drivers/gpu/nvgpu/pmu_perf/pmu_perf.h index c66c53946..ee0d1dfcf 100644 --- a/drivers/gpu/nvgpu/pmu_perf/pmu_perf.h +++ b/drivers/gpu/nvgpu/pmu_perf/pmu_perf.h @@ -28,6 +28,7 @@ #include "pstate/pstate.h" #include "volt/volt.h" #include "lpwr/lpwr.h" +#include "change_seq.h" #define CTRL_PERF_VFE_VAR_TYPE_INVALID 0x00U #define CTRL_PERF_VFE_VAR_TYPE_DERIVED 0x01U @@ -77,6 +78,7 @@ struct perf_pmupstate { struct obj_volt volt; struct obj_lwpr lpwr; struct nvgpu_vfe_invalidate vfe_init; + struct change_seq_pmu changeseq_pmu; }; int perf_pmu_init_pmupstate(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c index a2cce532a..a3caa1a14 100644 --- a/drivers/gpu/nvgpu/pstate/pstate.c +++ b/drivers/gpu/nvgpu/pstate/pstate.c @@ -28,6 +28,7 @@ #include "clk/clk.h" #include "pmu_perf/pmu_perf.h" +#include "pmu_perf/change_seq.h" #include "pmgr/pmgr.h" #include "pstate/pstate.h" #include "therm/thrm.h" @@ -150,6 +151,13 @@ int gk20a_init_pstate_support(struct gk20a *g) } } + if(g->ops.pmu_perf.support_changeseq) { + err = nvgpu_perf_change_seq_sw_setup(g); + if (err != 0) { + goto err_clk_init_pmupstate; + } + } + return 0; err_therm_pmu_init_pmupstate: @@ -158,6 +166,7 @@ err_perf_pmu_init_pmupstate: perf_pmu_free_pmupstate(g); err_clk_init_pmupstate: clk_free_pmupstate(g); + return err; } @@ -251,6 +260,7 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g) return err; } } + err = clk_pmu_vin_load(g); if (err != 0U) { return err; @@ -265,6 +275,13 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g) err = pmgr_domain_pmu_setup(g); } + if(g->ops.pmu_perf.support_changeseq) { + err = nvgpu_perf_change_seq_pmu_setup(g); + if (err != 0U) { + return err; + } + } + return err; }