mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: address CCM deviations for gv11b_pmu_handle_ext_irq
gv11b_pmu_handle_ext_irq CCM value was higher than 10. Address by separating ecc handling to new function gv11b_pmu_handle_ecc_irq and gv11b_pmu_correct_ecc. Also address checkpatch issues. JIRA NVGPU-3194 Change-Id: I3549c79227508a36635225ff4c19b1f8388b4a1f Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2101946 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
81c4f24a0d
commit
538a471c97
@@ -236,117 +236,131 @@ int gv11b_pmu_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu,
|
||||
return err;
|
||||
}
|
||||
|
||||
void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
|
||||
static void gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr)
|
||||
{
|
||||
if ((ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_corrected_err_imem_m()) != 0U) {
|
||||
nvgpu_pmu_report_ecc_error(g, 0,
|
||||
GPU_PMU_FALCON_IMEM_ECC_CORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected");
|
||||
}
|
||||
if ((ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
|
||||
nvgpu_pmu_report_ecc_error(g, 0,
|
||||
GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected");
|
||||
}
|
||||
if ((ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_corrected_err_dmem_m()) != 0U) {
|
||||
nvgpu_pmu_report_ecc_error(g, 0,
|
||||
GPU_PMU_FALCON_DMEM_ECC_CORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected");
|
||||
}
|
||||
if ((ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
|
||||
nvgpu_pmu_report_ecc_error(g, 0,
|
||||
GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected");
|
||||
}
|
||||
}
|
||||
|
||||
static void gv11b_pmu_handle_ecc_irq(struct gk20a *g)
|
||||
{
|
||||
u32 intr1;
|
||||
u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
|
||||
u32 corrected_delta, uncorrected_delta;
|
||||
u32 corrected_overflow, uncorrected_overflow;
|
||||
|
||||
intr1 = gk20a_readl(g, pwr_pmu_ecc_intr_status_r());
|
||||
if ((intr1 &
|
||||
(pwr_pmu_ecc_intr_status_corrected_m() |
|
||||
pwr_pmu_ecc_intr_status_uncorrected_m())) == 0U) {
|
||||
return;
|
||||
}
|
||||
|
||||
ecc_status = gk20a_readl(g,
|
||||
pwr_pmu_falcon_ecc_status_r());
|
||||
ecc_addr = gk20a_readl(g,
|
||||
pwr_pmu_falcon_ecc_address_r());
|
||||
corrected_cnt = gk20a_readl(g,
|
||||
pwr_pmu_falcon_ecc_corrected_err_count_r());
|
||||
uncorrected_cnt = gk20a_readl(g,
|
||||
pwr_pmu_falcon_ecc_uncorrected_err_count_r());
|
||||
|
||||
corrected_delta =
|
||||
pwr_pmu_falcon_ecc_corrected_err_count_total_v(corrected_cnt);
|
||||
uncorrected_delta =
|
||||
pwr_pmu_falcon_ecc_uncorrected_err_count_total_v(uncorrected_cnt);
|
||||
corrected_overflow = ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_corrected_err_total_counter_overflow_m();
|
||||
|
||||
uncorrected_overflow = ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_uncorrected_err_total_counter_overflow_m();
|
||||
corrected_overflow = ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_corrected_err_total_counter_overflow_m();
|
||||
|
||||
/* clear the interrupt */
|
||||
if (((intr1 & pwr_pmu_ecc_intr_status_corrected_m()) != 0U) ||
|
||||
(corrected_overflow != 0U)) {
|
||||
gk20a_writel(g, pwr_pmu_falcon_ecc_corrected_err_count_r(), 0);
|
||||
}
|
||||
if (((intr1 & pwr_pmu_ecc_intr_status_uncorrected_m()) != 0U) ||
|
||||
(uncorrected_overflow != 0U)) {
|
||||
gk20a_writel(g,
|
||||
pwr_pmu_falcon_ecc_uncorrected_err_count_r(), 0);
|
||||
}
|
||||
|
||||
gk20a_writel(g, pwr_pmu_falcon_ecc_status_r(),
|
||||
pwr_pmu_falcon_ecc_status_reset_task_f());
|
||||
|
||||
/* update counters per slice */
|
||||
if (corrected_overflow != 0U) {
|
||||
corrected_delta +=
|
||||
BIT32(pwr_pmu_falcon_ecc_corrected_err_count_total_s());
|
||||
}
|
||||
if (uncorrected_overflow != 0U) {
|
||||
uncorrected_delta +=
|
||||
BIT32(pwr_pmu_falcon_ecc_uncorrected_err_count_total_s());
|
||||
}
|
||||
|
||||
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter += corrected_delta;
|
||||
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter +=
|
||||
uncorrected_delta;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"pmu ecc interrupt intr1: 0x%x", intr1);
|
||||
|
||||
gv11b_pmu_correct_ecc(g, ecc_status, ecc_addr);
|
||||
|
||||
if ((corrected_overflow != 0U) || (uncorrected_overflow != 0U)) {
|
||||
nvgpu_info(g, "ecc counter overflow!");
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ecc error row address: 0x%x",
|
||||
pwr_pmu_falcon_ecc_address_row_address_v(ecc_addr));
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ecc error count corrected: %d, uncorrected %d",
|
||||
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter,
|
||||
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
|
||||
}
|
||||
|
||||
void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
|
||||
{
|
||||
/*
|
||||
* handle the ECC interrupt
|
||||
*/
|
||||
if ((intr0 & pwr_falcon_irqstat_ext_ecc_parity_true_f()) != 0U) {
|
||||
intr1 = gk20a_readl(g, pwr_pmu_ecc_intr_status_r());
|
||||
if ((intr1 &
|
||||
(pwr_pmu_ecc_intr_status_corrected_m() |
|
||||
pwr_pmu_ecc_intr_status_uncorrected_m())) != 0U) {
|
||||
|
||||
ecc_status = gk20a_readl(g,
|
||||
pwr_pmu_falcon_ecc_status_r());
|
||||
ecc_addr = gk20a_readl(g,
|
||||
pwr_pmu_falcon_ecc_address_r());
|
||||
corrected_cnt = gk20a_readl(g,
|
||||
pwr_pmu_falcon_ecc_corrected_err_count_r());
|
||||
uncorrected_cnt = gk20a_readl(g,
|
||||
pwr_pmu_falcon_ecc_uncorrected_err_count_r());
|
||||
|
||||
corrected_delta =
|
||||
pwr_pmu_falcon_ecc_corrected_err_count_total_v(corrected_cnt);
|
||||
uncorrected_delta =
|
||||
pwr_pmu_falcon_ecc_uncorrected_err_count_total_v(uncorrected_cnt);
|
||||
corrected_overflow = ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_corrected_err_total_counter_overflow_m();
|
||||
|
||||
uncorrected_overflow = ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_uncorrected_err_total_counter_overflow_m();
|
||||
corrected_overflow = ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_corrected_err_total_counter_overflow_m();
|
||||
|
||||
/* clear the interrupt */
|
||||
if (((intr1 & pwr_pmu_ecc_intr_status_corrected_m()) != 0U) ||
|
||||
(corrected_overflow != 0U)) {
|
||||
gk20a_writel(g, pwr_pmu_falcon_ecc_corrected_err_count_r(), 0);
|
||||
}
|
||||
if (((intr1 & pwr_pmu_ecc_intr_status_uncorrected_m()) != 0U) ||
|
||||
(uncorrected_overflow != 0U)) {
|
||||
gk20a_writel(g,
|
||||
pwr_pmu_falcon_ecc_uncorrected_err_count_r(), 0);
|
||||
}
|
||||
|
||||
gk20a_writel(g, pwr_pmu_falcon_ecc_status_r(),
|
||||
pwr_pmu_falcon_ecc_status_reset_task_f());
|
||||
|
||||
/* update counters per slice */
|
||||
if (corrected_overflow != 0U) {
|
||||
corrected_delta += BIT32(pwr_pmu_falcon_ecc_corrected_err_count_total_s());
|
||||
}
|
||||
if (uncorrected_overflow != 0U) {
|
||||
uncorrected_delta += BIT32(pwr_pmu_falcon_ecc_uncorrected_err_count_total_s());
|
||||
}
|
||||
|
||||
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter += corrected_delta;
|
||||
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter += uncorrected_delta;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"pmu ecc interrupt intr1: 0x%x", intr1);
|
||||
|
||||
if ((ecc_status & pwr_pmu_falcon_ecc_status_corrected_err_imem_m()) != 0U) {
|
||||
nvgpu_pmu_report_ecc_error(g, 0,
|
||||
GPU_PMU_FALCON_IMEM_ECC_CORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"imem ecc error corrected");
|
||||
}
|
||||
if ((ecc_status & pwr_pmu_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
|
||||
nvgpu_pmu_report_ecc_error(g, 0,
|
||||
GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"imem ecc error uncorrected");
|
||||
}
|
||||
if ((ecc_status & pwr_pmu_falcon_ecc_status_corrected_err_dmem_m()) != 0U) {
|
||||
nvgpu_pmu_report_ecc_error(g, 0,
|
||||
GPU_PMU_FALCON_DMEM_ECC_CORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"dmem ecc error corrected");
|
||||
}
|
||||
if ((ecc_status & pwr_pmu_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
|
||||
nvgpu_pmu_report_ecc_error(g, 0,
|
||||
GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"dmem ecc error uncorrected");
|
||||
}
|
||||
|
||||
if ((corrected_overflow != 0U) || (uncorrected_overflow != 0U)) {
|
||||
nvgpu_info(g, "ecc counter overflow!");
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ecc error row address: 0x%x",
|
||||
pwr_pmu_falcon_ecc_address_row_address_v(ecc_addr));
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ecc error count corrected: %d, uncorrected %d",
|
||||
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter,
|
||||
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
|
||||
}
|
||||
gv11b_pmu_handle_ecc_irq(g);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user