gpu: nvgpu: ga10x: fix LTC ecc handling

Notable differences from GV11B are below: 1. RSTG/TSTG uncorrected errors are supported. 2. PLTS_INTR doesn't report SEC/DED errors. Instead, PLTS_INTR3 will indicate the SEC/DED errors through CORRECTED_ERR_DSTG and UNCORRECTED_ERR_DSTG fields respectively. 3. DSTG_ECC_ADDRESS and DSTG_ECC_REPORT are deprecated. Bug 3446731 Change-Id: I60018d1b3825adcbb287dea05bc96a87f559c969 Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2633959 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Antony Clince Alex <aalex@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2021-11-29 17:01:48 +05:30
parent c463810bcd
commit 6a6562cd4d
8 changed files with 281 additions and 49 deletions
--- a/drivers/gpu/nvgpu/common/ltc/ltc.c
+++ b/drivers/gpu/nvgpu/common/ltc/ltc.c
@@ -227,6 +227,10 @@ void nvgpu_ltc_ecc_free(struct gk20a *g)
 			ecc->ltc.ecc_ded_count[ltc] = NULL;
 		}

+		if (ecc->ltc.rstg_ecc_parity_count != NULL) {
+			nvgpu_kfree(g, ecc->ltc.rstg_ecc_parity_count[ltc]);
+		}
+
 		if (ecc->ltc.tstg_ecc_parity_count != NULL) {
 			nvgpu_kfree(g, ecc->ltc.tstg_ecc_parity_count[ltc]);
 		}
@@ -242,6 +246,9 @@ void nvgpu_ltc_ecc_free(struct gk20a *g)
 	nvgpu_kfree(g, ecc->ltc.ecc_ded_count);
 	ecc->ltc.ecc_ded_count = NULL;

+	nvgpu_kfree(g, ecc->ltc.rstg_ecc_parity_count);
+	ecc->ltc.rstg_ecc_parity_count = NULL;
+
 	nvgpu_kfree(g, ecc->ltc.tstg_ecc_parity_count);
 	ecc->ltc.tstg_ecc_parity_count = NULL;

--- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
@@ -374,7 +374,7 @@ static const struct gops_ltc_intr ga100_ops_ltc_intr = {
 };

 static const struct gops_ltc ga100_ops_ltc = {
-	.ecc_init = gv11b_lts_ecc_init,
+	.ecc_init = ga10b_lts_ecc_init,
 	.init_ltc_support = nvgpu_init_ltc_support,
 	.ltc_remove_support = nvgpu_ltc_remove_support,
 	.determine_L2_size_bytes = gp10b_determine_L2_size_bytes,
--- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
@@ -343,7 +343,7 @@ static const struct gops_ltc_intr ga10b_ops_ltc_intr = {
 };

 static const struct gops_ltc ga10b_ops_ltc = {
-	.ecc_init = gv11b_lts_ecc_init,
+	.ecc_init = ga10b_lts_ecc_init,
 	.init_ltc_support = nvgpu_init_ltc_support,
 	.ltc_remove_support = nvgpu_ltc_remove_support,
 	.determine_L2_size_bytes = ga10b_determine_L2_size_bytes,
--- a/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_ga10b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_ga10b_fusa.c
@@ -28,7 +28,6 @@
 #include <nvgpu/nvgpu_err.h>
 #include <nvgpu/utils.h>

-#include "hal/ltc/intr/ltc_intr_gv11b.h"
 #include "ltc_intr_ga10b.h"

 #include <nvgpu/hw/ga10b/hw_ltc_ga10b.h>
@@ -388,6 +387,210 @@ void ga10b_ltc_intr_configure(struct gk20a *g)
 	ga10b_ltc_intr3_configure(g);
 }

+static void ga10b_ltc_intr_handle_rstg_ecc_interrupts(struct gk20a *g,
+			u32 ltc, u32 slice, u32 ecc_status, u32 ecc_addr,
+			u32 uncorrected_delta)
+{
+	bool is_rstg_ecc_addr = (ltc_ltc0_lts0_l2_cache_ecc_address_subunit_v(ecc_addr) ==
+				 ltc_ltc0_lts0_l2_cache_ecc_address_subunit_rstg_v());
+
+	if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m()) != 0U) {
+		nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected");
+
+		if (!is_rstg_ecc_addr) {
+			nvgpu_log(g, gpu_dbg_intr, "ECC address doesn't belong to RSTG");
+			return;
+		}
+
+		g->ecc.ltc.rstg_ecc_parity_count[ltc][slice].counter =
+				nvgpu_wrapping_add_u32(
+				g->ecc.ltc.rstg_ecc_parity_count[ltc][slice].counter,
+					uncorrected_delta);
+		nvgpu_report_ecc_err(g,
+			NVGPU_ERR_MODULE_LTC,
+			(ltc << 8U) | slice,
+			GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED, ecc_addr,
+			g->ecc.ltc.rstg_ecc_parity_count[ltc][slice].counter);
+	}
+
+	if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) != 0U) {
+		nvgpu_err(g, "rstg ecc error corrected");
+		/* This error is not expected to occur in ga10x and hence,
+		 * this scenario is considered as a fatal error.
+		 */
+		BUG();
+	}
+}
+
+static void ga10b_ltc_intr_handle_tstg_ecc_interrupts(struct gk20a *g,
+			u32 ltc, u32 slice, u32 ecc_status, u32 ecc_addr,
+			u32 uncorrected_delta)
+{
+	bool is_tstg_ecc_addr = (ltc_ltc0_lts0_l2_cache_ecc_address_subunit_v(ecc_addr) ==
+				 ltc_ltc0_lts0_l2_cache_ecc_address_subunit_tstg_v());
+
+	if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m()) != 0U) {
+		nvgpu_log(g, gpu_dbg_intr, "tstg ecc error uncorrected");
+
+		if (!is_tstg_ecc_addr) {
+			nvgpu_log(g, gpu_dbg_intr, "ECC address doesn't belong to TSTG");
+			return;
+		}
+
+		g->ecc.ltc.tstg_ecc_parity_count[ltc][slice].counter =
+				nvgpu_wrapping_add_u32(
+				g->ecc.ltc.tstg_ecc_parity_count[ltc][slice].counter,
+					uncorrected_delta);
+		nvgpu_report_ecc_err(g,
+			NVGPU_ERR_MODULE_LTC,
+			(ltc << 8U) | slice,
+			GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED, ecc_addr,
+			g->ecc.ltc.tstg_ecc_parity_count[ltc][slice].counter);
+	}
+
+	if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) != 0U) {
+		nvgpu_err(g, "tstg ecc error corrected");
+		/* This error is not expected to occur in ga10b and hence,
+		 * this scenario is considered as a fatal error.
+		 */
+		BUG();
+	}
+}
+
+static bool ga10b_ltc_intr_is_dstg_data_bank(u32 ecc_addr)
+{
+	u32 ecc_ram = ltc_ltc0_lts0_l2_cache_ecc_address_ram_v(ecc_addr);
+	bool is_dstg_data_bank = false;
+
+	if ((ecc_ram == ltc_ltc0_lts0_l2_cache_ecc_address_ram_dstg_db_bank0_v()) ||
+	    (ecc_ram == ltc_ltc0_lts0_l2_cache_ecc_address_ram_dstg_db_bank1_v()) ||
+	    (ecc_ram == ltc_ltc0_lts0_l2_cache_ecc_address_ram_dstg_db_bank2_v()) ||
+	    (ecc_ram == ltc_ltc0_lts0_l2_cache_ecc_address_ram_dstg_db_bank3_v())) {
+		is_dstg_data_bank = true;
+	}
+
+	return is_dstg_data_bank;
+}
+
+static bool ga10b_ltc_intr_is_dstg_be_ram(u32 ecc_addr)
+{
+	u32 ecc_ram = ltc_ltc0_lts0_l2_cache_ecc_address_ram_v(ecc_addr);
+	bool is_dstg_be_ram = false;
+
+	if ((ecc_ram == ltc_ltc0_lts0_l2_cache_ecc_address_ram_dstg_db_clrbe_trlram0_v()) ||
+	    (ecc_ram == ltc_ltc0_lts0_l2_cache_ecc_address_ram_dstg_db_clrbe_trlram1_v()) ||
+	    (ecc_ram == ltc_ltc0_lts0_l2_cache_ecc_address_ram_dstg_db_clrbe_trlram2_v()) ||
+	    (ecc_ram == ltc_ltc0_lts0_l2_cache_ecc_address_ram_dstg_db_clrbe_trlram3_v()) ||
+	    (ecc_ram == ltc_ltc0_lts0_l2_cache_ecc_address_ram_dstg_db_clrbe_trlram4_v()) ||
+	    (ecc_ram == ltc_ltc0_lts0_l2_cache_ecc_address_ram_dstg_db_clrbe_trlram5_v()) ||
+	    (ecc_ram == ltc_ltc0_lts0_l2_cache_ecc_address_ram_dstg_db_clrbe_trlram6_v()) ||
+	    (ecc_ram == ltc_ltc0_lts0_l2_cache_ecc_address_ram_dstg_db_clrbe_trlram7_v())) {
+		is_dstg_be_ram = true;
+	}
+
+	return is_dstg_be_ram;
+}
+
+static void ga10b_ltc_intr_handle_dstg_ecc_interrupts(struct gk20a *g,
+			u32 ltc, u32 slice, u32 ecc_status, u32 ecc_addr,
+			u32 corrected_delta, u32 uncorrected_delta)
+{
+	bool is_dstg_ecc_addr = (ltc_ltc0_lts0_l2_cache_ecc_address_subunit_v(ecc_addr) ==
+				 ltc_ltc0_lts0_l2_cache_ecc_address_subunit_dstg_v());
+
+	if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m()) != 0U) {
+		nvgpu_log(g, gpu_dbg_intr, "dstg ecc error (SEC) corrected");
+
+		if (!is_dstg_ecc_addr) {
+			nvgpu_log(g, gpu_dbg_intr, "ECC address doesn't belong to DSTG");
+			return;
+		}
+
+		g->ecc.ltc.ecc_sec_count[ltc][slice].counter =
+			nvgpu_wrapping_add_u32(
+				g->ecc.ltc.ecc_sec_count[ltc][slice].counter,
+				corrected_delta);
+
+		nvgpu_report_ecc_err(g,
+			NVGPU_ERR_MODULE_LTC,
+			(ltc << 8U) | slice,
+			GPU_LTC_CACHE_DSTG_ECC_CORRECTED, ecc_addr,
+			g->ecc.ltc.ecc_sec_count[ltc][slice].counter);
+
+		/*
+		 * Using a SEC code will allow correction of an SBE (Single Bit
+		 * Error). But the current HW doesn't have the ability to clear
+		 * out the SBE from the RAMs for a read access. So before the
+		 * SBE turns into a DBE (Double Bit Error), a SW flush is
+		 * preferred.
+		 */
+		if (g->ops.mm.cache.l2_flush(g, true) != 0) {
+			nvgpu_err(g, "l2_flush failed");
+			BUG();
+		}
+	}
+
+	if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m()) != 0U) {
+		nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected");
+
+		if (!is_dstg_ecc_addr) {
+			nvgpu_log(g, gpu_dbg_intr, "ECC address doesn't belong to DSTG");
+			return;
+		}
+
+		if (ga10b_ltc_intr_is_dstg_data_bank(ecc_addr)) {
+			nvgpu_err(g, "Double bit error detected in GPU L2!");
+
+			g->ecc.ltc.ecc_ded_count[ltc][slice].counter =
+				nvgpu_wrapping_add_u32(
+					g->ecc.ltc.ecc_ded_count[ltc][slice].counter,
+					uncorrected_delta);
+
+			nvgpu_report_ecc_err(g,
+				NVGPU_ERR_MODULE_LTC,
+				(ltc << 8U) | slice,
+				GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED, ecc_addr,
+				g->ecc.ltc.ecc_ded_count[ltc][slice].counter);
+		} else if (ga10b_ltc_intr_is_dstg_be_ram(ecc_addr)) {
+			nvgpu_log(g, gpu_dbg_intr, "dstg be ecc error uncorrected");
+
+			g->ecc.ltc.dstg_be_ecc_parity_count[ltc][slice].counter =
+				nvgpu_wrapping_add_u32(
+					g->ecc.ltc.dstg_be_ecc_parity_count[ltc][slice].counter,
+					uncorrected_delta);
+
+			nvgpu_report_ecc_err(g,
+				NVGPU_ERR_MODULE_LTC,
+				(ltc << 8U) | slice,
+				GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED, ecc_addr,
+				g->ecc.ltc.dstg_be_ecc_parity_count[ltc][slice].counter);
+		} else {
+			nvgpu_err(g, "unsupported uncorrected dstg ecc error");
+			BUG();
+		}
+	}
+}
+
+static void ga10b_ltc_intr_init_counters(struct gk20a *g,
+			u32 uncorrected_delta, u32 uncorrected_overflow,
+			u32 corrected_delta, u32 corrected_overflow,
+			u32 offset)
+{
+	if ((uncorrected_delta > 0U) || (uncorrected_overflow != 0U)) {
+		nvgpu_writel(g,
+			nvgpu_safe_add_u32(
+			ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r(),
+			offset), 0);
+	}
+
+	if ((corrected_delta > 0U) || (corrected_overflow != 0U)) {
+		nvgpu_writel(g,
+			nvgpu_safe_add_u32(
+			ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r(),
+			offset), 0);
+	}
+}
+
 static void ga10b_ltc_intr3_ecc_interrupts(struct gk20a *g, u32 ltc, u32 slice,
 				u32 offset, u32 ltc_intr3)
 {
@@ -395,7 +598,18 @@ static void ga10b_ltc_intr3_ecc_interrupts(struct gk20a *g, u32 ltc, u32 slice,
 	u32 corrected_delta, uncorrected_delta;
 	u32 corrected_overflow, uncorrected_overflow;

-	/* Detect and handle ECC PARITY errors */
+	/*
+	 * Detect and handle ECC PARITY errors and SEC-DED errors.
+	 * SEC errors are reported as DSTG corrected errors and
+	 * DED errors are reported as DSTG uncorrected errors.
+	 * Below are the supported errors:
+	 *
+	 *   1. UNCORRECTED_ERR_RSTG - signals a parity error in RSTG RAMS, for now only CBC RAMS
+	 *   2. UNCORRECTED_ERR_TSTG - signals a parity error in TSTG RAMS
+	 *   3. UNCORRECTED_ERR_DSTG - signals a parity error in DSTG RAMS, non-data RAMS
+	 *                             and DED in data RAMS.
+	 *   4. CORRECTED_ERR_DSTG - signals an ecc corrected error in DSTG data RAMS (SEC)
+	 */
 	if ((ltc_intr3 &
 		(ltc_ltcs_ltss_intr3_ecc_uncorrected_m() |
 		 ltc_ltcs_ltss_intr3_ecc_corrected_m())) != 0U) {
@@ -404,81 +618,65 @@ static void ga10b_ltc_intr3_ecc_interrupts(struct gk20a *g, u32 ltc, u32 slice,
 				ltc_ltc0_lts0_l2_cache_ecc_status_r(), offset));
 		ecc_addr = nvgpu_readl(g, nvgpu_safe_add_u32(
 			ltc_ltc0_lts0_l2_cache_ecc_address_r(), offset));
-		corrected_cnt = nvgpu_readl(g, nvgpu_safe_add_u32(
-			ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r(),
-			offset));
+
 		uncorrected_cnt = nvgpu_readl(g, nvgpu_safe_add_u32(
 			ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r(),
 			offset));

-		corrected_delta =
-			ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(
-					corrected_cnt);
 		uncorrected_delta =
 			ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(uncorrected_cnt);
-		corrected_overflow = ecc_status &
-			ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m();

 		uncorrected_overflow = ecc_status &
 			ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m();

-		gv11b_ltc_intr_init_counters(g,
-			uncorrected_delta, uncorrected_overflow, offset);
+		corrected_cnt = nvgpu_readl(g, nvgpu_safe_add_u32(
+			ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r(),
+			offset));
+
+		corrected_delta =
+			ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(corrected_cnt);
+
+		corrected_overflow = ecc_status &
+			ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m();
+
+		ga10b_ltc_intr_init_counters(g,
+			uncorrected_delta, uncorrected_overflow,
+			corrected_delta, corrected_overflow, offset);

 		nvgpu_writel(g, nvgpu_safe_add_u32(
 				ltc_ltc0_lts0_l2_cache_ecc_status_r(), offset),
 			ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f());

 		/* update counters per slice */
-		if (corrected_overflow != 0U) {
-			corrected_delta += BIT32(
-				ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s());
-		}
 		if (uncorrected_overflow != 0U) {
+			nvgpu_info(g, "uncorrected ecc counter overflow!");
 			uncorrected_delta += BIT32(
 				ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
 		}

-		g->ecc.ltc.ecc_sec_count[ltc][slice].counter =
-				nvgpu_safe_add_u32(
-				g->ecc.ltc.ecc_sec_count[ltc][slice].counter,
-					corrected_delta);
-		g->ecc.ltc.ecc_ded_count[ltc][slice].counter =
-				nvgpu_safe_add_u32(
-				g->ecc.ltc.ecc_ded_count[ltc][slice].counter,
-					uncorrected_delta);
+		if (corrected_overflow != 0U) {
+			nvgpu_info(g, "corrected ecc counter overflow!");
+			corrected_delta += BIT32(
+				ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s());
+		}
+
 		nvgpu_log(g, gpu_dbg_intr,
-			"ltc:%d lts: %d cache ecc interrupt intr: 0x%x",
-			ltc, slice, ltc_intr3);
+			  "ecc status 0x%08x error address: 0x%08x subunit: %u corrected_delta: 0x%08x uncorrected_delta: 0x%08x",
+			  ecc_status, ecc_addr,
+			  ltc_ltc0_lts0_l2_cache_ecc_address_subunit_v(ecc_addr),
+			  corrected_delta, uncorrected_delta);

-		/* This check has been added to ensure that the slice id is less
-		 * than 8-bits and hence, it can be packed as part of LSB 8-bits
-		 * along with the LTC id while reporting LTC related ECC errors.
-		 */
-		if (slice > U8_MAX) {
-			nvgpu_log(g, gpu_dbg_intr, "Invalid slice id=%d",
-					slice);
-			slice = slice & 0xFFU;
-		}
-
-		gv11b_ltc_intr_handle_rstg_ecc_interrupts(g, ltc, slice,
+		ga10b_ltc_intr_handle_rstg_ecc_interrupts(g, ltc, slice,
 						ecc_status, ecc_addr,
 						uncorrected_delta);

-		gv11b_ltc_intr_handle_tstg_ecc_interrupts(g, ltc, slice,
+		ga10b_ltc_intr_handle_tstg_ecc_interrupts(g, ltc, slice,
 						ecc_status, ecc_addr,
 						uncorrected_delta);

-		gv11b_ltc_intr_handle_dstg_ecc_interrupts(g, ltc, slice,
+		ga10b_ltc_intr_handle_dstg_ecc_interrupts(g, ltc, slice,
 						ecc_status, ecc_addr,
-						uncorrected_delta);
-
-		if ((corrected_overflow != 0U) ||
-				(uncorrected_overflow != 0U)) {
-			nvgpu_info(g, "ecc counter overflow!");
-		}
-
-		nvgpu_log(g, gpu_dbg_intr, "ecc error address: 0x%x", ecc_addr);
+						corrected_delta, uncorrected_delta);
 	}
 }

--- a/drivers/gpu/nvgpu/hal/ltc/ltc_ga10b.h
+++ b/drivers/gpu/nvgpu/hal/ltc/ltc_ga10b.h
@@ -40,6 +40,7 @@ void ga10b_ltc_set_zbc_depth_entry(struct gk20a *g, u32 depth_val, u32 index);
 void ga10b_ltc_init_fs_state(struct gk20a *g);
 void ga10b_ltc_lts_set_mgmt_setup(struct gk20a *g);
 u64 ga10b_determine_L2_size_bytes(struct gk20a *g);
+int ga10b_lts_ecc_init(struct gk20a *g);

 #ifdef CONFIG_NVGPU_DEBUGGER
 u32 ga10b_ltc_pri_shared_addr(struct gk20a *g, u32 addr);
--- a/drivers/gpu/nvgpu/hal/ltc/ltc_ga10b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/ltc/ltc_ga10b_fusa.c
@@ -29,6 +29,7 @@
 #include <nvgpu/errata.h>

 #include "hal/gr/gr/gr_gk20a.h"
+#include "ltc_gv11b.h"
 #include "ltc_ga10b.h"

 #include <nvgpu/hw/ga10b/hw_ltc_ga10b.h>
@@ -234,3 +235,25 @@ u64 ga10b_determine_L2_size_bytes(struct gk20a *g)

 	return size;
 }
+
+int ga10b_lts_ecc_init(struct gk20a *g)
+{
+	int err = 0;
+
+	err = gv11b_lts_ecc_init(g);
+	if (err != 0) {
+		goto done;
+	}
+
+	err = NVGPU_ECC_COUNTER_INIT_PER_LTS(rstg_ecc_parity_count);
+	if (err != 0) {
+		goto done;
+	}
+
+done:
+	if (err != 0) {
+		nvgpu_err(g, "ecc counter allocate failed, err=%d", err);
+	}
+
+	return err;
+}
--- a/drivers/gpu/nvgpu/include/nvgpu/ecc.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/ecc.h
@@ -204,6 +204,8 @@ struct nvgpu_ecc {
 	 * unit.
 	 */
 	struct {
+		/** L2 cache slice RSTG ECC PARITY error count. */
+		struct nvgpu_ecc_stat **rstg_ecc_parity_count;
 		/** L2 cache slice TSTG ECC PARITY error count. */
 		struct nvgpu_ecc_stat **tstg_ecc_parity_count;
 		/** L2 cache slice DSTG BE ECC PARITY error count. */
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h
@@ -293,6 +293,7 @@ struct gr_exception_info {
 #define GPU_LTC_CACHE_DSTG_ECC_CORRECTED	(0U)
 #define GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED	(1U)
 #define GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED	(3U)
+#define GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED	(5U)
 #define GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED	(7U)
 /**
 * @}