gpu: nvgpu: Freq Mon/Clock Mon Common Implementation

FMON/Clock Mon detects for fault in a particular clock domain.
For this we need to poll a specified master register to know if there
is any fault. If this is set we scan all the available clock domains
to see which domain is faulted and the type of fault.
This CL will have all required common functions to monitor
different clock domains registers.

Bug 2182063
NVGPU-3846

Change-Id: I6a2bdb65335eaeef995eb163d480ee722c230311
Signed-off-by: Abdul Salam <absalam@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2170887
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Abdul Salam
2019-11-18 15:15:37 +05:30
committed by Alex Waterman
parent 86ab9c2ebc
commit 2879c4f21e
7 changed files with 385 additions and 2 deletions

View File

@@ -59,6 +59,281 @@
#define XTAL4X_KHZ 108000
#define BOOT_GPCCLK_MHZ 645U
/**
* FMON register types
*/
#define FMON_THRESHOLD_HIGH 0x0U
#define FMON_THRESHOLD_LOW 0x1U
#define FMON_FAULT_STATUS 0x2U
#define FMON_FAULT_STATUS_PRIV_MASK 0x3U
/**
* Mapping between the clk api domain and the various clock monitor registers
*/
static struct clk_mon_address_map clock_mon_map_tu104[] = {
{
CTRL_CLK_DOMAIN_GPCCLK,
{
trim_gpcclk_fault_threshold_high_r(),
trim_gpcclk_fault_threshold_low_r(),
trim_gpcclk_fault_status_r(),
trim_gpcclk_fault_priv_level_mask_r(),
}
},
{
CTRL_CLK_DOMAIN_SYSCLK,
{
trim_sysclk_fault_threshold_high_r(),
trim_sysclk_fault_threshold_low_r(),
trim_sysclk_fault_status_r(),
trim_sysclk_fault_priv_level_mask_r(),
}
},
{
CTRL_CLK_DOMAIN_HUBCLK,
{
trim_hubclk_fault_threshold_high_r(),
trim_hubclk_fault_threshold_low_r(),
trim_hubclk_fault_status_r(),
trim_hubclk_fault_priv_level_mask_r(),
}
},
{
CTRL_CLK_DOMAIN_HOSTCLK,
{
trim_hostclk_fault_threshold_high_r(),
trim_hostclk_fault_threshold_low_r(),
trim_hostclk_fault_status_r(),
trim_hostclk_fault_priv_level_mask_r(),
}
},
{
CTRL_CLK_DOMAIN_XBARCLK,
{
trim_xbarclk_fault_threshold_high_r(),
trim_xbarclk_fault_threshold_low_r(),
trim_xbarclk_fault_status_r(),
trim_xbarclk_fault_priv_level_mask_r(),
}
},
{
CTRL_CLK_DOMAIN_NVDCLK,
{
trim_nvdclk_fault_threshold_high_r(),
trim_nvdclk_fault_threshold_low_r(),
trim_nvdclk_fault_status_r(),
trim_nvdclk_fault_priv_level_mask_r(),
}
},
{
CTRL_CLK_DOMAIN_MCLK,
{
trim_dramclk_fault_threshold_high_r(),
trim_dramclk_fault_threshold_low_r(),
trim_dramclk_fault_status_r(),
trim_dramclk_fault_priv_level_mask_r(),
}
},
{
CTRL_CLK_DOMAIN_PWRCLK,
{
trim_pwrclk_fault_threshold_high_r(),
trim_pwrclk_fault_threshold_low_r(),
trim_pwrclk_fault_status_r(),
trim_pwrclk_fault_priv_level_mask_r(),
}
},
{
CTRL_CLK_DOMAIN_UTILSCLK,
{
trim_utilsclk_fault_threshold_high_r(),
trim_utilsclk_fault_threshold_low_r(),
trim_utilsclk_fault_status_r(),
trim_utilsclk_fault_priv_level_mask_r(),
}
},
{
CTRL_CLK_DOMAIN_PEX_REFCLK,
{
trim_pex_refclk_fault_threshold_high_r(),
trim_pex_refclk_fault_threshold_low_r(),
trim_pex_refclk_fault_status_r(),
trim_pex_refclk_fault_priv_level_mask_r(),
}
},
{
CTRL_CLK_DOMAIN_NVL_COMMON,
{
trim_nvl_commonclk_fault_threshold_high_r(),
trim_nvl_commonclk_fault_threshold_low_r(),
trim_nvl_commonclk_fault_status_r(),
trim_nvl_commonclk_fault_priv_level_mask_r(),
}
},
{
CTRL_CLK_DOMAIN_XCLK,
{
trim_xclk_fault_threshold_high_r(),
trim_xclk_fault_threshold_low_r(),
trim_xclk_fault_status_r(),
trim_xclk_fault_priv_level_mask_r(),
}
},
};
static int nvgpu_clk_mon_idx_get(struct gk20a *g, u32 clk_api_domain, u8 *idx)
{
u8 index;
for (index = 0; index < CTRL_CLK_CLK_DOMAIN_ARCH_MAX_DOMAINS; index++) {
if (clock_mon_map_tu104[index].clk_api_domain ==
clk_api_domain) {
*idx = index;
return 0;
}
}
return -EINVAL;
}
static int nvgpu_clk_mon_reg_get(struct gk20a *g, u32 clk_api_domain,
u32 *reg_address, u32 reg_type)
{
u8 index = 0;
int status;
status = nvgpu_clk_mon_idx_get(g, clk_api_domain, &index);
if (status != 0) {
nvgpu_err(g, "Failed to get clk_domain index");
return -EINVAL;
}
*reg_address = clock_mon_map_tu104[index].reg_add[reg_type];
return 0;
}
static u32 nvgpu_check_for_dc_fault(u32 data)
{
return (trim_fault_status_dc_v(data) ==
trim_fault_status_dc_true_v()) ?
trim_fault_status_dc_m() : 0U;
}
static u32 nvgpu_check_for_lower_threshold_fault(u32 data)
{
return (trim_fault_status_lower_threshold_v(data) ==
trim_fault_status_lower_threshold_true_v()) ?
trim_fault_status_lower_threshold_m() : 0U;
}
static u32 nvgpu_check_for_higher_threshold_fault(u32 data)
{
return (trim_fault_status_higher_threshold_v(data) ==
trim_fault_status_higher_threshold_true_v()) ?
trim_fault_status_higher_threshold_m() : 0U;
}
static u32 nvgpu_check_for_overflow_err(u32 data)
{
return (trim_fault_status_overflow_v(data) ==
trim_fault_status_overflow_true_v()) ?
trim_fault_status_overflow_m() : 0U;
}
static int nvgpu_clk_mon_get_fault(struct gk20a *g, u32 i, u32 data,
struct clk_domains_mon_status_params *clk_mon_status)
{
u32 reg_address;
int status = 0;
/* Fields for faults are same for all clock domains */
clk_mon_status->clk_mon_list[i].clk_domain_fault_status =
((nvgpu_check_for_dc_fault(data)) |
(nvgpu_check_for_lower_threshold_fault(data)) |
(nvgpu_check_for_higher_threshold_fault(data)) |
(nvgpu_check_for_overflow_err(data)));
nvgpu_err(g, "FMON faulted domain 0x%x value 0x%x",
clk_mon_status->clk_mon_list[i].clk_api_domain,
clk_mon_status->clk_mon_list[i].
clk_domain_fault_status);
/* Get the low threshold limit */
status = nvgpu_clk_mon_reg_get(g, clock_mon_map_tu104[i].
clk_api_domain, &reg_address,
FMON_THRESHOLD_LOW);
if (status != 0) {
nvgpu_err(g, "Failed to get register address");
return -EINVAL;
}
data = nvgpu_readl(g, reg_address);
clk_mon_status->clk_mon_list[i].low_threshold =
trim_fault_threshold_low_count_v(data);
/* Get the high threshold limit */
status = nvgpu_clk_mon_reg_get(g, clock_mon_map_tu104[i].
clk_api_domain, &reg_address,
FMON_THRESHOLD_HIGH);
if (status != 0) {
nvgpu_err(g, "Failed to get register address");
return -EINVAL;
}
data = nvgpu_readl(g, reg_address);
clk_mon_status->clk_mon_list[i].high_threshold =
trim_fault_threshold_high_count_v(data);
return status;
}
bool nvgpu_clk_mon_check_master_fault_status(struct gk20a *g)
{
u32 fmon_master_status = nvgpu_readl(g, trim_fmon_master_status_r());
if (trim_fmon_master_status_fault_out_v(fmon_master_status) ==
trim_fmon_master_status_fault_out_true_v()) {
return true;
}
return false;
}
int nvgpu_clk_mon_check_status(struct gk20a *g,
struct clk_domains_mon_status_params *clk_mon_status)
{
u32 reg_address;
u32 data, i;
int status;
clk_mon_status->clk_mon_list_size =
CTRL_CLK_CLK_DOMAIN_ARCH_MAX_DOMAINS;
for (i = 0; i < clk_mon_status->clk_mon_list_size; i++) {
clk_mon_status->clk_mon_list[i].clk_api_domain =
clock_mon_map_tu104[i].clk_api_domain;
status = nvgpu_clk_mon_reg_get(g,
clk_mon_status->clk_mon_list[i].clk_api_domain,
&reg_address, FMON_FAULT_STATUS);
if (status != 0) {
nvgpu_err(g, "Failed to get register address");
return -EINVAL;
}
data = nvgpu_readl(g, reg_address);
clk_mon_status->clk_mon_list[i].clk_domain_fault_status = 0U;
/* Check FMON fault status, fault_out field is same for all */
if (trim_fault_status_fault_out_v(data) ==
trim_fault_status_fault_out_true_v()) {
status = nvgpu_clk_mon_get_fault(g, i, data,
clk_mon_status);
if (status != 0) {
nvgpu_err(g, "Failed to get fault status");
return -EINVAL;
}
}
}
return 0;
}
u32 tu104_crystal_clk_hz(struct gk20a *g)
{
return (XTAL4X_KHZ * 1000);

View File

@@ -38,4 +38,8 @@ int tu104_clk_domain_get_f_points(
unsigned long tu104_clk_maxrate(struct gk20a *g, u32 api_domain);
void tu104_get_change_seq_time(struct gk20a *g, s64 *change_time);
void tu104_change_host_clk_source(struct gk20a *g);
bool nvgpu_clk_mon_check_master_fault_status(struct gk20a *g);
int nvgpu_clk_mon_check_status(struct gk20a *g, struct
clk_domains_mon_status_params *clk_mon_status);
#endif /* CLK_TU104_H */

View File

@@ -181,7 +181,6 @@
#include "common/clk_arb/clk_arb_gv100.h"
#include "hal/clk/clk_tu104.h"
#include "hal/fbpa/fbpa_tu104.h"
#include "hal_tu104.h"
#include "hal_tu104_litter.h"
@@ -1242,6 +1241,9 @@ static const struct gpu_ops tu104_ops = {
.get_maxrate = tu104_clk_maxrate,
.get_change_seq_time = tu104_get_change_seq_time,
.change_host_clk_source = tu104_change_host_clk_source,
.clk_mon_check_master_fault_status =
nvgpu_clk_mon_check_master_fault_status,
.clk_mon_check_status = nvgpu_clk_mon_check_status,
},
#ifdef CONFIG_NVGPU_CLK_ARB
.clk_arb = {
@@ -1646,6 +1648,7 @@ int tu104_init_hal(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_DGPU_THERMAL_ALERT, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_DGPU_PCIE_SCRIPT_EXECUTE, true);
nvgpu_set_enabled(g, NVGPU_FMON_SUPPORT_ENABLE, true);
/* for now */
gops->clk.support_pmgr_domain = false;

View File

@@ -240,10 +240,13 @@ struct gk20a;
/** DGPU PCIe Script Update */
#define NVGPU_SUPPORT_DGPU_PCIE_SCRIPT_EXECUTE 82U
/** FMON feature Enable */
#define NVGPU_FMON_SUPPORT_ENABLE 83U
/*
* Must be greater than the largest bit offset in the above list.
*/
#define NVGPU_MAX_ENABLED_BITS 83U
#define NVGPU_MAX_ENABLED_BITS 84U
/**
* @brief Check if the passed flag is enabled.

View File

@@ -108,6 +108,7 @@ struct _resmgr_context;
struct nvgpu_gpfifo_entry;
struct vm_gk20a_mapping_batch;
struct pmu_pg_stats_data;
struct clk_domains_mon_status_params;
enum nvgpu_flush_op;
enum gk20a_mem_rw_flag;
@@ -367,6 +368,9 @@ struct gpu_ops {
bool support_clk_freq_domain;
bool support_vf_point;
u8 lut_num_entries;
bool (*clk_mon_check_master_fault_status)(struct gk20a *g);
int (*clk_mon_check_status)(struct gk20a *g,
struct clk_domains_mon_status_params *clk_mon_status);
} clk;
#ifdef CONFIG_NVGPU_CLK_ARB
struct {

View File

@@ -139,4 +139,75 @@
#define trim_sys_fr_clk_cntr_sysclk_cntr1_r() (0x00137634U)
#define trim_sys_ind_clk_sys_core_clksrc_r() (0x00137c00U)
#define trim_sys_ind_clk_sys_core_clksrc_hostclk_fll_f() (0x180U)
#define trim_fault_threshold_high_r() (0x00132af0U)
#define trim_fault_threshold_high_count_v(r) (((r) >> 0U) & 0xffffffffU)
#define trim_fault_threshold_low_r() (0x00132af4U)
#define trim_fault_threshold_low_count_v(r) (((r) >> 0U) & 0xffffffffU)
#define trim_fault_status_r() (0x00132b0cU)
#define trim_fault_status_dc_m() (U32(0x1U) << 0U)
#define trim_fault_status_dc_v(r) (((r) >> 0U) & 0x1U)
#define trim_fault_status_dc_true_v() (0x00000001U)
#define trim_fault_status_lower_threshold_m() (U32(0x1U) << 1U)
#define trim_fault_status_lower_threshold_v(r) (((r) >> 1U) & 0x1U)
#define trim_fault_status_lower_threshold_true_v() (0x00000001U)
#define trim_fault_status_higher_threshold_m() (U32(0x1U) << 2U)
#define trim_fault_status_higher_threshold_v(r) (((r) >> 2U) & 0x1U)
#define trim_fault_status_higher_threshold_true_v() (0x00000001U)
#define trim_fault_status_overflow_m() (U32(0x1U) << 3U)
#define trim_fault_status_overflow_v(r) (((r) >> 3U) & 0x1U)
#define trim_fault_status_overflow_true_v() (0x00000001U)
#define trim_fault_status_fault_out_v(r) (((r) >> 4U) & 0x1U)
#define trim_fault_status_fault_out_true_v() (0x00000001U)
#define trim_gpcclk_fault_priv_level_mask_r() (0x00132bb0U)
#define trim_gpcclk_fault_threshold_high_r() (0x00132af0U)
#define trim_gpcclk_fault_threshold_low_r() (0x00132af4U)
#define trim_gpcclk_fault_status_r() (0x00132b0cU)
#define trim_sysclk_fault_priv_level_mask_r() (0x00137b80U)
#define trim_sysclk_fault_threshold_high_r() (0x00137674U)
#define trim_sysclk_fault_threshold_low_r() (0x00137678U)
#define trim_sysclk_fault_status_r() (0x00137690U)
#define trim_hubclk_fault_priv_level_mask_r() (0x00137b80U)
#define trim_hubclk_fault_threshold_high_r() (0x001376b4U)
#define trim_hubclk_fault_threshold_low_r() (0x001376b8U)
#define trim_hubclk_fault_status_r() (0x001376d0U)
#define trim_hostclk_fault_priv_level_mask_r() (0x00137b80U)
#define trim_hostclk_fault_threshold_high_r() (0x00137774U)
#define trim_hostclk_fault_threshold_low_r() (0x00137778U)
#define trim_hostclk_fault_status_r() (0x00137790U)
#define trim_xbarclk_fault_priv_level_mask_r() (0x00137b80U)
#define trim_xbarclk_fault_threshold_high_r() (0x00137980U)
#define trim_xbarclk_fault_threshold_low_r() (0x00137984U)
#define trim_xbarclk_fault_status_r() (0x0013799cU)
#define trim_nvdclk_fault_priv_level_mask_r() (0x00137b80U)
#define trim_nvdclk_fault_threshold_high_r() (0x001379c0U)
#define trim_nvdclk_fault_threshold_low_r() (0x001379c4U)
#define trim_nvdclk_fault_status_r() (0x001379dcU)
#define trim_dramclk_fault_priv_level_mask_r() (0x001321e4U)
#define trim_dramclk_fault_threshold_high_r() (0x001321a0U)
#define trim_dramclk_fault_threshold_low_r() (0x001321a4U)
#define trim_dramclk_fault_status_r() (0x001321bcU)
#define trim_pwrclk_fault_priv_level_mask_r() (0x00137b80U)
#define trim_pwrclk_fault_threshold_high_r() (0x001376f4U)
#define trim_pwrclk_fault_threshold_low_r() (0x001376f8U)
#define trim_pwrclk_fault_status_r() (0x00137710U)
#define trim_utilsclk_fault_priv_level_mask_r() (0x00137b80U)
#define trim_utilsclk_fault_threshold_high_r() (0x00137734U)
#define trim_utilsclk_fault_threshold_low_r() (0x00137738U)
#define trim_utilsclk_fault_status_r() (0x00137750U)
#define trim_pex_refclk_fault_priv_level_mask_r() (0x00137b80U)
#define trim_pex_refclk_fault_threshold_high_r() (0x001377b4U)
#define trim_pex_refclk_fault_threshold_low_r() (0x001377b8U)
#define trim_pex_refclk_fault_status_r() (0x001377d0U)
#define trim_nvl_commonclk_fault_priv_level_mask_r() (0x00137b80U)
#define trim_nvl_commonclk_fault_threshold_high_r() (0x00137940U)
#define trim_nvl_commonclk_fault_threshold_low_r() (0x00137944U)
#define trim_nvl_commonclk_fault_status_r() (0x0013795cU)
#define trim_xclk_fault_priv_level_mask_r() (0x00137b00U)
#define trim_xclk_fault_threshold_high_r() (0x00137900U)
#define trim_xclk_fault_threshold_low_r() (0x00137904U)
#define trim_xclk_fault_status_r() (0x0013791cU)
#define trim_fmon_master_status_priv_mask_r() (0x00137b40U)
#define trim_fmon_master_status_r() (0x00137a00U)
#define trim_fmon_master_status_fault_out_v(r) (((r) >> 0U) & 0x1U)
#define trim_fmon_master_status_fault_out_true_v() (0x00000001U)
#endif

View File

@@ -42,6 +42,9 @@
#define CTRL_CLK_DOMAIN_PWRCLK (0x00080000U)
#define CTRL_CLK_DOMAIN_NVDCLK (0x00100000U)
#define CTRL_CLK_DOMAIN_PCIEGENCLK (0x00200000U)
#define CTRL_CLK_DOMAIN_XCLK (0x04000000U)
#define CTRL_CLK_DOMAIN_NVL_COMMON (0x08000000U)
#define CTRL_CLK_DOMAIN_PEX_REFCLK (0x10000000U)
#define CTRL_CLK_DOMAIN_GPCCLK (0x00000001U)
#define CTRL_CLK_DOMAIN_XBARCLK (0x00000002U)
@@ -95,6 +98,8 @@
#define CTRL_CLK_CLK_VF_POINT_TYPE_UNKNOWN 255U
#define CTRL_CLK_CLK_DOMAIN_CLIENT_MAX_DOMAINS 16
#define CTRL_CLK_CLK_DOMAIN_ARCH_MAX_DOMAINS 12U
#define CLK_CLOCK_MON_REG_TYPE_COUNT 4U
struct ctrl_clk_domain_control_35_prog_clk_mon {
u32 flags;
@@ -278,6 +283,24 @@ struct ctrl_clk_domain_clk_mon_list {
clk_domain[CTRL_CLK_CLK_DOMAIN_CLIENT_MAX_DOMAINS];
};
struct clk_domain_mon_status {
u32 clk_api_domain;
u32 low_threshold;
u32 high_threshold;
u32 clk_domain_fault_status;
};
struct clk_mon_address_map {
u32 clk_api_domain;
u32 reg_add[CLK_CLOCK_MON_REG_TYPE_COUNT];
};
struct clk_domains_mon_status_params {
u32 clk_mon_list_size;
struct clk_domain_mon_status
clk_mon_list[CTRL_CLK_CLK_DOMAIN_ARCH_MAX_DOMAINS];
};
#define CTRL_CLK_VF_PAIR_FREQ_MHZ_GET(pvfpair) \
((pvfpair)->freq_mhz)