diff --git a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c index afdb522f3..35ebb6afd 100644 --- a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c +++ b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c @@ -83,6 +83,17 @@ static int acr_wait_for_completion(struct gk20a *g, nvgpu_acr_dbg(g, "flcn-%d: sctl reg %x cpuctl reg %x", flcn_id, sctl, cpuctl); + /* + * When engine-falcon is used for ACR bootstrap, validate the integrity + * of falcon IMEM and DMEM. + */ + if (g->acr->acr.acr_validate_mem_integrity != NULL) { + if (!g->acr->acr.acr_validate_mem_integrity(g)) { + nvgpu_err(g, "flcn-%d: memcheck failed", flcn_id); + completion = -EAGAIN; + error_type = ACR_BOOT_FAILED; + } + } exit: if (completion != 0) { if (g->acr->acr.report_acr_engine_bus_err_status != NULL) { diff --git a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h index f39290ea5..a577ef23a 100644 --- a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h +++ b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h @@ -196,6 +196,7 @@ struct hs_acr { u32 bar0_status, u32 error_type); int (*acr_engine_bus_err_status)(struct gk20a *g, u32 *bar0_status, u32 *error_type); + bool (*acr_validate_mem_integrity)(struct gk20a *g); }; int nvgpu_acr_bootstrap_hs_ucode(struct gk20a *g, struct nvgpu_acr *acr, diff --git a/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.c b/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.c index f2e65487f..e7c67ba43 100644 --- a/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.c +++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.c @@ -208,6 +208,7 @@ static void gv11b_acr_default_sw_init(struct gk20a *g, struct hs_acr *hs_acr) nvgpu_pmu_report_bar0_pri_err_status; hs_acr->acr_engine_bus_err_status = g->ops.pmu.bar0_error_status;; + hs_acr->acr_validate_mem_integrity = g->ops.pmu.validate_mem_integrity; } void nvgpu_gv11b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr) diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index 9bf517d05..97784fad8 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -1001,6 +1001,7 @@ static const struct gpu_ops gv11b_ops = { .write_dmatrfbase = gp10b_write_dmatrfbase, /* ISR */ .pmu_enable_irq = gk20a_pmu_enable_irq, + .validate_mem_integrity = gv11b_pmu_validate_mem_integrity, #ifdef NVGPU_LS_PMU .get_irqdest = gv11b_pmu_get_irqdest, .handle_ext_irq = gv11b_pmu_handle_ext_irq, diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c index 3e9892104..ec1126654 100644 --- a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c +++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c @@ -236,8 +236,10 @@ int gv11b_pmu_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu, return err; } -static void gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr) +static int gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr) { + int ret = 0; + if ((ecc_status & pwr_pmu_falcon_ecc_status_corrected_err_imem_m()) != 0U) { nvgpu_pmu_report_ecc_error(g, 0, @@ -253,6 +255,7 @@ static void gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr) ecc_addr, g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected"); + ret = -EFAULT; } if ((ecc_status & pwr_pmu_falcon_ecc_status_corrected_err_dmem_m()) != 0U) { @@ -269,7 +272,10 @@ static void gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr) ecc_addr, g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); + ret = -EFAULT; } + + return ret; } static void gv11b_pmu_handle_ecc_irq(struct gk20a *g) @@ -338,7 +344,7 @@ static void gv11b_pmu_handle_ecc_irq(struct gk20a *g) nvgpu_log(g, gpu_dbg_intr, "pmu ecc interrupt intr1: 0x%x", intr1); - gv11b_pmu_correct_ecc(g, ecc_status, ecc_addr); + (void)gv11b_pmu_correct_ecc(g, ecc_status, ecc_addr); if ((corrected_overflow != 0U) || (uncorrected_overflow != 0U)) { nvgpu_info(g, "ecc counter overflow!"); @@ -354,6 +360,17 @@ static void gv11b_pmu_handle_ecc_irq(struct gk20a *g) g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter); } +bool gv11b_pmu_validate_mem_integrity(struct gk20a *g) +{ + u32 ecc_status, ecc_addr; + + ecc_status = nvgpu_readl(g, pwr_pmu_falcon_ecc_status_r()); + ecc_addr = nvgpu_readl(g, pwr_pmu_falcon_ecc_address_r()); + + return ((gv11b_pmu_correct_ecc(g, ecc_status, ecc_addr) == 0) ? true : + false); +} + void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0) { /* diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.h b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.h index b938fa2e5..8cf52d7dd 100644 --- a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.h +++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.h @@ -34,5 +34,6 @@ void gv11b_pmu_setup_elpg(struct gk20a *g); u32 gv11b_pmu_get_irqdest(struct gk20a *g); void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0); void gv11b_setup_apertures(struct gk20a *g); +bool gv11b_pmu_validate_mem_integrity(struct gk20a *g); #endif /* PMU_GV11B_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 180c26e0b..de089f288 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1386,6 +1386,7 @@ struct gpu_ops { void (*set_irqmask)(struct gk20a *g); u32 (*get_irqdest)(struct gk20a *g); void (*pmu_enable_irq)(struct nvgpu_pmu *pmu, bool enable); + bool (*validate_mem_integrity)(struct gk20a *g); void (*handle_ext_irq)(struct gk20a *g, u32 intr); /* perfmon */ void (*pmu_init_perfmon_counter)(struct gk20a *g);