mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-25 02:52:51 +03:00
gpu: nvgpu: L2 cache tag ECC support
Adding support for L2 cache tag ECC error handling JIRA: GPUT19X-112 Change-Id: I9a8ebefe97814b341f57a024dfb126013adaac1c Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: http://git-master/r/1489029 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
81172b5df4
commit
6bc36bded0
@@ -33,4 +33,9 @@ struct ecc_gr_t19x {
|
||||
struct gk20a_ecc_stat gpccs_uncorrected_err_count;
|
||||
};
|
||||
|
||||
struct ecc_ltc_t19x {
|
||||
struct gk20a_ecc_stat l2_cache_corrected_err_count;
|
||||
struct gk20a_ecc_stat l2_cache_uncorrected_err_count;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "ltc_gv11b.h"
|
||||
|
||||
#include <nvgpu/hw/gv11b/hw_ltc_gv11b.h>
|
||||
#include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
|
||||
#include <nvgpu/hw/gv11b/hw_top_gv11b.h>
|
||||
#include <nvgpu/hw/gv11b/hw_pri_ringmaster_gv11b.h>
|
||||
|
||||
@@ -74,6 +75,111 @@ static void gv11b_ltc_init_fs_state(struct gk20a *g)
|
||||
ltc_intr);
|
||||
}
|
||||
|
||||
static void gv11b_ltc_isr(struct gk20a *g)
|
||||
{
|
||||
u32 mc_intr, ltc_intr3;
|
||||
unsigned int ltc, slice;
|
||||
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
|
||||
u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
|
||||
u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
|
||||
u32 corrected_delta, uncorrected_delta;
|
||||
u32 corrected_overflow, uncorrected_overflow;
|
||||
u32 ltc_corrected, ltc_uncorrected;
|
||||
|
||||
mc_intr = gk20a_readl(g, mc_intr_ltc_r());
|
||||
for (ltc = 0; ltc < g->ltc_count; ltc++) {
|
||||
if ((mc_intr & 1 << ltc) == 0)
|
||||
continue;
|
||||
ltc_corrected = ltc_uncorrected = 0;
|
||||
|
||||
for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
|
||||
u32 offset = ltc_stride * ltc + lts_stride * slice;
|
||||
ltc_intr3 = gk20a_readl(g, ltc_ltc0_lts0_intr3_r() +
|
||||
offset);
|
||||
|
||||
/* Detect and handle ECC PARITY errors */
|
||||
|
||||
if (ltc_intr3 &
|
||||
(ltc_ltcs_ltss_intr3_ecc_uncorrected_m() |
|
||||
ltc_ltcs_ltss_intr3_ecc_corrected_m())) {
|
||||
|
||||
ecc_status = gk20a_readl(g,
|
||||
ltc_ltc0_lts0_l2_cache_ecc_status_r() +
|
||||
offset);
|
||||
ecc_addr = gk20a_readl(g,
|
||||
ltc_ltc0_lts0_l2_cache_ecc_address_r() +
|
||||
offset);
|
||||
corrected_cnt = gk20a_readl(g,
|
||||
ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset);
|
||||
uncorrected_cnt = gk20a_readl(g,
|
||||
ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset);
|
||||
|
||||
corrected_delta =
|
||||
ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(corrected_cnt);
|
||||
uncorrected_delta =
|
||||
ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(uncorrected_cnt);
|
||||
corrected_overflow = ecc_status &
|
||||
ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m();
|
||||
|
||||
uncorrected_overflow = ecc_status &
|
||||
ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m();
|
||||
|
||||
/* clear the interrupt */
|
||||
if ((corrected_delta > 0) || corrected_overflow) {
|
||||
gk20a_writel(g, ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset, 0);
|
||||
}
|
||||
if ((uncorrected_delta > 0) || uncorrected_overflow) {
|
||||
gk20a_writel(g,
|
||||
ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset, 0);
|
||||
}
|
||||
|
||||
gk20a_writel(g, ltc_ltc0_lts0_l2_cache_ecc_status_r() + offset,
|
||||
ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f());
|
||||
|
||||
/* update counters per slice */
|
||||
if (corrected_overflow)
|
||||
corrected_delta += (0x1UL << ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s());
|
||||
if (uncorrected_overflow)
|
||||
uncorrected_delta += (0x1UL << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
|
||||
|
||||
ltc_corrected += corrected_delta;
|
||||
ltc_uncorrected += uncorrected_delta;
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
|
||||
|
||||
if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m())
|
||||
nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected");
|
||||
if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m())
|
||||
nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected");
|
||||
if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m())
|
||||
nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected");
|
||||
if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m())
|
||||
nvgpu_log(g, gpu_dbg_intr, "tstg ecc error uncorrected");
|
||||
if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m())
|
||||
nvgpu_log(g, gpu_dbg_intr, "dstg ecc error corrected");
|
||||
if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m())
|
||||
nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected");
|
||||
|
||||
if (corrected_overflow || uncorrected_overflow)
|
||||
nvgpu_info(g, "ecc counter overflow!");
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ecc error address: 0x%x", ecc_addr);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
g->ecc.ltc.t19x.l2_cache_corrected_err_count.counters[ltc] +=
|
||||
ltc_corrected;
|
||||
g->ecc.ltc.t19x.l2_cache_uncorrected_err_count.counters[ltc] +=
|
||||
ltc_uncorrected;
|
||||
|
||||
}
|
||||
|
||||
/* fallback to other interrupts */
|
||||
gp10b_ltc_isr(g);
|
||||
}
|
||||
|
||||
static u32 gv11b_ltc_cbc_fix_config(struct gk20a *g, int base)
|
||||
{
|
||||
u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r());
|
||||
@@ -93,4 +199,5 @@ void gv11b_init_ltc(struct gpu_ops *gops)
|
||||
gops->ltc.set_zbc_s_entry = gv11b_ltc_set_zbc_stencil_entry;
|
||||
gops->ltc.init_fs_state = gv11b_ltc_init_fs_state;
|
||||
gops->ltc.cbc_fix_config = gv11b_ltc_cbc_fix_config;
|
||||
gops->ltc.isr = gv11b_ltc_isr;
|
||||
}
|
||||
|
||||
@@ -177,6 +177,9 @@ static struct device_attribute *dev_attr_fecs_ecc_uncorrected_err_count_array;
|
||||
static struct device_attribute *dev_attr_gpccs_ecc_corrected_err_count_array;
|
||||
static struct device_attribute *dev_attr_gpccs_ecc_uncorrected_err_count_array;
|
||||
|
||||
static struct device_attribute *dev_attr_l2_cache_ecc_corrected_err_count_array;
|
||||
static struct device_attribute *dev_attr_l2_cache_ecc_uncorrected_err_count_array;
|
||||
|
||||
void gr_gv11b_create_sysfs(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
@@ -250,6 +253,20 @@ void gr_gv11b_create_sysfs(struct device *dev)
|
||||
&g->ecc.gr.t19x.gcc_l15_uncorrected_err_count,
|
||||
dev_attr_gcc_l15_ecc_uncorrected_err_count_array);
|
||||
|
||||
error |= gp10b_ecc_stat_create(dev,
|
||||
g->ltc_count,
|
||||
"ltc",
|
||||
"l2_cache_uncorrected_err_count",
|
||||
&g->ecc.ltc.t19x.l2_cache_uncorrected_err_count,
|
||||
dev_attr_l2_cache_ecc_uncorrected_err_count_array);
|
||||
|
||||
error |= gp10b_ecc_stat_create(dev,
|
||||
g->ltc_count,
|
||||
"ltc",
|
||||
"l2_cache_corrected_err_count",
|
||||
&g->ecc.ltc.t19x.l2_cache_corrected_err_count,
|
||||
dev_attr_l2_cache_ecc_corrected_err_count_array);
|
||||
|
||||
error |= gp10b_ecc_stat_create(dev,
|
||||
1,
|
||||
"gpc",
|
||||
@@ -336,6 +353,16 @@ static void gr_gv11b_remove_sysfs(struct device *dev)
|
||||
&g->ecc.gr.t19x.gcc_l15_uncorrected_err_count,
|
||||
dev_attr_gcc_l15_ecc_uncorrected_err_count_array);
|
||||
|
||||
gp10b_ecc_stat_remove(dev,
|
||||
g->ltc_count,
|
||||
&g->ecc.ltc.t19x.l2_cache_uncorrected_err_count,
|
||||
dev_attr_l2_cache_ecc_uncorrected_err_count_array);
|
||||
|
||||
gp10b_ecc_stat_remove(dev,
|
||||
g->ltc_count,
|
||||
&g->ecc.ltc.t19x.l2_cache_corrected_err_count,
|
||||
dev_attr_l2_cache_ecc_corrected_err_count_array);
|
||||
|
||||
gp10b_ecc_stat_remove(dev,
|
||||
1,
|
||||
&g->ecc.gr.t19x.fecs_uncorrected_err_count,
|
||||
|
||||
@@ -374,6 +374,190 @@ static inline u32 ltc_ltc0_lts0_intr_r(void)
|
||||
{
|
||||
return 0x0014040c;
|
||||
}
|
||||
static inline u32 ltc_ltcs_ltss_intr3_r(void)
|
||||
{
|
||||
return 0x0017e388;
|
||||
}
|
||||
static inline u32 ltc_ltcs_ltss_intr3_ecc_corrected_m(void)
|
||||
{
|
||||
return 0x1 << 7;
|
||||
}
|
||||
static inline u32 ltc_ltcs_ltss_intr3_ecc_uncorrected_m(void)
|
||||
{
|
||||
return 0x1 << 8;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_intr3_r(void)
|
||||
{
|
||||
return 0x00140588;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_r(void)
|
||||
{
|
||||
return 0x001404f0;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 1;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m(void)
|
||||
{
|
||||
return 0x1 << 1;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 3;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m(void)
|
||||
{
|
||||
return 0x1 << 3;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 5;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m(void)
|
||||
{
|
||||
return 0x1 << 5;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 0;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m(void)
|
||||
{
|
||||
return 0x1 << 0;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 2;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m(void)
|
||||
{
|
||||
return 0x1 << 2;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 4;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m(void)
|
||||
{
|
||||
return 0x1 << 4;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 18;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m(void)
|
||||
{
|
||||
return 0x1 << 18;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 16;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m(void)
|
||||
{
|
||||
return 0x1 << 16;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_unique_counter_overflow_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 19;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_unique_counter_overflow_m(void)
|
||||
{
|
||||
return 0x1 << 19;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_unique_counter_overflow_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 17;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_unique_counter_overflow_m(void)
|
||||
{
|
||||
return 0x1 << 17;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_reset_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 30;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f(void)
|
||||
{
|
||||
return 0x40000000;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_address_r(void)
|
||||
{
|
||||
return 0x001404fc;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r(void)
|
||||
{
|
||||
return 0x001404f4;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s(void)
|
||||
{
|
||||
return 16;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_f(u32 v)
|
||||
{
|
||||
return (v & 0xffff) << 0;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_m(void)
|
||||
{
|
||||
return 0xffff << 0;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(u32 r)
|
||||
{
|
||||
return (r >> 0) & 0xffff;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_unique_total_s(void)
|
||||
{
|
||||
return 16;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_unique_total_f(u32 v)
|
||||
{
|
||||
return (v & 0xffff) << 16;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_unique_total_m(void)
|
||||
{
|
||||
return 0xffff << 16;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_unique_total_v(u32 r)
|
||||
{
|
||||
return (r >> 16) & 0xffff;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r(void)
|
||||
{
|
||||
return 0x001404f8;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s(void)
|
||||
{
|
||||
return 16;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_f(u32 v)
|
||||
{
|
||||
return (v & 0xffff) << 0;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_m(void)
|
||||
{
|
||||
return 0xffff << 0;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(u32 r)
|
||||
{
|
||||
return (r >> 0) & 0xffff;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_unique_total_s(void)
|
||||
{
|
||||
return 16;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_unique_total_f(u32 v)
|
||||
{
|
||||
return (v & 0xffff) << 16;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_unique_total_m(void)
|
||||
{
|
||||
return 0xffff << 16;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_unique_total_v(u32 r)
|
||||
{
|
||||
return (r >> 16) & 0xffff;
|
||||
}
|
||||
static inline u32 ltc_ltc0_lts0_dstg_ecc_report_r(void)
|
||||
{
|
||||
return 0x0014051c;
|
||||
|
||||
Reference in New Issue
Block a user