mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-25 02:52:51 +03:00
gpu: nvgpu: gv11b: Add GCC L1.5 parity support
Add handling of GCC L1.5 parity exception. JIRA GPUT19X-86 Change-Id: Ie83fc306d3dff79b0ddaf2616dcf0ff71fccd4ca Signed-off-by: Lakshmanan M <lm@nvidia.com> Reviewed-on: http://git-master/r/1485834 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
5a08eafbe0
commit
45ca7cb8c5
@@ -556,6 +556,84 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
bool *post_event, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr)
|
||||
{
|
||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||
u32 offset = gpc_stride * gpc;
|
||||
u32 gcc_l15_ecc_status, gcc_l15_ecc_corrected_err_status = 0;
|
||||
u32 gcc_l15_ecc_uncorrected_err_status = 0;
|
||||
u32 gcc_l15_corrected_err_count_delta = 0;
|
||||
u32 gcc_l15_uncorrected_err_count_delta = 0;
|
||||
bool is_gcc_l15_ecc_corrected_total_err_overflow = 0;
|
||||
bool is_gcc_l15_ecc_uncorrected_total_err_overflow = 0;
|
||||
|
||||
/* Check for gcc l15 ECC errors. */
|
||||
gcc_l15_ecc_status = gk20a_readl(g,
|
||||
gr_pri_gpc0_gcc_l15_ecc_status_r() + offset);
|
||||
gcc_l15_ecc_corrected_err_status = gcc_l15_ecc_status &
|
||||
(gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank0_m() |
|
||||
gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank1_m());
|
||||
gcc_l15_ecc_uncorrected_err_status = gcc_l15_ecc_status &
|
||||
(gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank0_m() |
|
||||
gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank1_m());
|
||||
|
||||
if ((gcc_l15_ecc_corrected_err_status == 0) && (gcc_l15_ecc_uncorrected_err_status == 0))
|
||||
return 0;
|
||||
|
||||
gcc_l15_corrected_err_count_delta =
|
||||
gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_v(
|
||||
gk20a_readl(g,
|
||||
gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() +
|
||||
offset));
|
||||
gcc_l15_uncorrected_err_count_delta =
|
||||
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_v(
|
||||
gk20a_readl(g,
|
||||
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() +
|
||||
offset));
|
||||
is_gcc_l15_ecc_corrected_total_err_overflow =
|
||||
gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_total_counter_overflow_v(gcc_l15_ecc_status);
|
||||
is_gcc_l15_ecc_uncorrected_total_err_overflow =
|
||||
gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_total_counter_overflow_v(gcc_l15_ecc_status);
|
||||
|
||||
if ((gcc_l15_corrected_err_count_delta > 0) || is_gcc_l15_ecc_corrected_total_err_overflow) {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
|
||||
"corrected error (SBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]",
|
||||
gcc_l15_ecc_corrected_err_status, is_gcc_l15_ecc_corrected_total_err_overflow);
|
||||
|
||||
/* HW uses 16-bits counter */
|
||||
gcc_l15_corrected_err_count_delta +=
|
||||
(is_gcc_l15_ecc_corrected_total_err_overflow <<
|
||||
gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s());
|
||||
g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count.counters[gpc] +=
|
||||
gcc_l15_corrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset,
|
||||
0);
|
||||
}
|
||||
if ((gcc_l15_uncorrected_err_count_delta > 0) || is_gcc_l15_ecc_uncorrected_total_err_overflow) {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
|
||||
"Uncorrected error (DBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]",
|
||||
gcc_l15_ecc_uncorrected_err_status, is_gcc_l15_ecc_uncorrected_total_err_overflow);
|
||||
|
||||
/* HW uses 16-bits counter */
|
||||
gcc_l15_uncorrected_err_count_delta +=
|
||||
(is_gcc_l15_ecc_uncorrected_total_err_overflow <<
|
||||
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s());
|
||||
g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count.counters[gpc] +=
|
||||
gcc_l15_uncorrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset,
|
||||
0);
|
||||
}
|
||||
|
||||
gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_status_r() + offset,
|
||||
gr_pri_gpc0_gcc_l15_ecc_status_reset_task_f());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g)
|
||||
{
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
@@ -567,7 +645,8 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g)
|
||||
tpc_mask =
|
||||
gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1);
|
||||
|
||||
gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), tpc_mask);
|
||||
gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(),
|
||||
(tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1)));
|
||||
}
|
||||
|
||||
static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
@@ -2113,6 +2192,7 @@ void gv11b_init_gr(struct gpu_ops *gops)
|
||||
gops->gr.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask;
|
||||
gops->gr.get_access_map = gr_gv11b_get_access_map;
|
||||
gops->gr.handle_sm_exception = gr_gv11b_handle_sm_exception;
|
||||
gops->gr.handle_gcc_exception = gr_gv11b_handle_gcc_exception;
|
||||
gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception;
|
||||
gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions;
|
||||
gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr;
|
||||
|
||||
@@ -45,6 +45,8 @@ struct gr_t19x {
|
||||
struct gr_gp10b_ecc_stat sm_l1_data_uncorrected_err_count;
|
||||
struct gr_gp10b_ecc_stat sm_icache_corrected_err_count;
|
||||
struct gr_gp10b_ecc_stat sm_icache_uncorrected_err_count;
|
||||
struct gr_gp10b_ecc_stat gcc_l15_corrected_err_count;
|
||||
struct gr_gp10b_ecc_stat gcc_l15_uncorrected_err_count;
|
||||
} ecc_stats;
|
||||
};
|
||||
|
||||
|
||||
@@ -131,6 +131,8 @@ static struct device_attribute *dev_attr_sm_l1_data_ecc_corrected_err_count_arra
|
||||
static struct device_attribute *dev_attr_sm_l1_data_ecc_uncorrected_err_count_array;
|
||||
static struct device_attribute *dev_attr_sm_icache_ecc_corrected_err_count_array;
|
||||
static struct device_attribute *dev_attr_sm_icache_ecc_uncorrected_err_count_array;
|
||||
static struct device_attribute *dev_attr_gcc_l15_ecc_corrected_err_count_array;
|
||||
static struct device_attribute *dev_attr_gcc_l15_ecc_uncorrected_err_count_array;
|
||||
|
||||
void gr_gv11b_create_sysfs(struct device *dev)
|
||||
{
|
||||
@@ -193,6 +195,18 @@ void gr_gv11b_create_sysfs(struct device *dev)
|
||||
&g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count,
|
||||
dev_attr_sm_icache_ecc_uncorrected_err_count_array);
|
||||
|
||||
error |= gr_gp10b_ecc_stat_create(dev,
|
||||
0,
|
||||
"gcc_l15_ecc_corrected_err_count",
|
||||
&g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count,
|
||||
dev_attr_gcc_l15_ecc_corrected_err_count_array);
|
||||
|
||||
error |= gr_gp10b_ecc_stat_create(dev,
|
||||
0,
|
||||
"gcc_l15_ecc_uncorrected_err_count",
|
||||
&g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count,
|
||||
dev_attr_gcc_l15_ecc_uncorrected_err_count_array);
|
||||
|
||||
if (error)
|
||||
dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
|
||||
}
|
||||
@@ -241,4 +255,14 @@ static void gr_gv11b_remove_sysfs(struct device *dev)
|
||||
&g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count,
|
||||
dev_attr_sm_icache_ecc_uncorrected_err_count_array);
|
||||
|
||||
gr_gp10b_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count,
|
||||
dev_attr_gcc_l15_ecc_corrected_err_count_array);
|
||||
|
||||
gr_gp10b_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count,
|
||||
dev_attr_gcc_l15_ecc_uncorrected_err_count_array);
|
||||
|
||||
}
|
||||
|
||||
@@ -3370,6 +3370,10 @@ static inline u32 gr_gpcs_gpccs_gpc_exception_en_r(void)
|
||||
{
|
||||
return 0x0041ac94;
|
||||
}
|
||||
static inline u32 gr_gpcs_gpccs_gpc_exception_en_gcc_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 2;
|
||||
}
|
||||
static inline u32 gr_gpcs_gpccs_gpc_exception_en_tpc_f(u32 v)
|
||||
{
|
||||
return (v & 0xff) << 16;
|
||||
@@ -3378,6 +3382,10 @@ static inline u32 gr_gpc0_gpccs_gpc_exception_r(void)
|
||||
{
|
||||
return 0x00502c90;
|
||||
}
|
||||
static inline u32 gr_gpc0_gpccs_gpc_exception_gcc_v(u32 r)
|
||||
{
|
||||
return (r >> 2) & 0x1;
|
||||
}
|
||||
static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_v(u32 r)
|
||||
{
|
||||
return (r >> 16) & 0xff;
|
||||
@@ -3386,6 +3394,62 @@ static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_0_pending_v(void)
|
||||
{
|
||||
return 0x00000001;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_r(void)
|
||||
{
|
||||
return 0x00501048;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank0_m(void)
|
||||
{
|
||||
return 0x1 << 0;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank1_m(void)
|
||||
{
|
||||
return 0x1 << 1;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank0_m(void)
|
||||
{
|
||||
return 0x1 << 4;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank1_m(void)
|
||||
{
|
||||
return 0x1 << 5;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_total_counter_overflow_v(u32 r)
|
||||
{
|
||||
return (r >> 8) & 0x1;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_total_counter_overflow_v(u32 r)
|
||||
{
|
||||
return (r >> 10) & 0x1;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_reset_task_f(void)
|
||||
{
|
||||
return 0x40000000;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r(void)
|
||||
{
|
||||
return 0x0050104c;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s(void)
|
||||
{
|
||||
return 16;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_v(u32 r)
|
||||
{
|
||||
return (r >> 0) & 0xffff;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r(void)
|
||||
{
|
||||
return 0x00501054;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s(void)
|
||||
{
|
||||
return 16;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_v(u32 r)
|
||||
{
|
||||
return (r >> 0) & 0xffff;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_r(void)
|
||||
{
|
||||
return 0x00504508;
|
||||
|
||||
Reference in New Issue
Block a user