diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c index 373dc67df..830e5a05a 100644 --- a/drivers/gpu/nvgpu/common/ecc.c +++ b/drivers/gpu/nvgpu/common/ecc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -30,26 +30,58 @@ void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat) nvgpu_init_list_node(&stat->node); + nvgpu_mutex_acquire(&ecc->stats_lock); + nvgpu_list_add_tail(&stat->node, &ecc->stats_list); ecc->stats_count = nvgpu_safe_add_s32(ecc->stats_count, 1); + + nvgpu_mutex_release(&ecc->stats_lock); +} + +void nvgpu_ecc_stat_del(struct gk20a *g, struct nvgpu_ecc_stat *stat) +{ + struct nvgpu_ecc *ecc = &g->ecc; + + nvgpu_mutex_acquire(&ecc->stats_lock); + + nvgpu_list_del(&stat->node); + ecc->stats_count = nvgpu_safe_sub_s32(ecc->stats_count, 1); + + nvgpu_mutex_release(&ecc->stats_lock); } int nvgpu_ecc_counter_init(struct gk20a *g, - struct nvgpu_ecc_stat **stat, const char *name) + struct nvgpu_ecc_stat **statp, const char *name) { - struct nvgpu_ecc_stat *stats; + struct nvgpu_ecc_stat *stat; - stats = nvgpu_kzalloc(g, sizeof(*stats)); - if (stats == NULL) { + stat = nvgpu_kzalloc(g, sizeof(*stat)); + if (stat == NULL) { + nvgpu_err(g, "ecc counter alloc failed"); return -ENOMEM; } - (void)strncpy(stats->name, name, NVGPU_ECC_STAT_NAME_MAX_SIZE - 1U); - nvgpu_ecc_stat_add(g, stats); - *stat = stats; + (void)strncpy(stat->name, name, NVGPU_ECC_STAT_NAME_MAX_SIZE - 1U); + nvgpu_ecc_stat_add(g, stat); + *statp = stat; return 0; } +void nvgpu_ecc_counter_deinit(struct gk20a *g, struct nvgpu_ecc_stat **statp) +{ + struct nvgpu_ecc_stat *stat; + + if (*statp == NULL) { + return; + } + + stat = *statp; + + nvgpu_ecc_stat_del(g, stat); + nvgpu_kfree(g, stat); + *statp = NULL; +} + /* release all ecc_stat */ void nvgpu_ecc_free(struct gk20a *g) { @@ -72,6 +104,10 @@ void nvgpu_ecc_free(struct gk20a *g) g->ops.pmu.ecc_free(g); } + nvgpu_mutex_acquire(&ecc->stats_lock); + WARN_ON(!nvgpu_list_empty(&ecc->stats_list)); + nvgpu_mutex_release(&ecc->stats_lock); + (void)memset(ecc, 0, sizeof(*ecc)); } @@ -83,6 +119,7 @@ int nvgpu_ecc_init_support(struct gk20a *g) return 0; } + nvgpu_mutex_init(&ecc->stats_lock); nvgpu_init_list_node(&ecc->stats_list); return 0; @@ -125,4 +162,6 @@ void nvgpu_ecc_remove_support(struct gk20a *g) nvgpu_ecc_sysfs_remove(g); #endif nvgpu_ecc_free(g); + + nvgpu_mutex_destroy(&g->ecc.stats_lock); } diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index 96812e8d8..bc428c084 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -996,7 +996,7 @@ int nvgpu_gr_alloc(struct gk20a *g) * FECS ECC errors during FECS load need to be handled and reported * using the ECC counters. */ - if (g->ops.gr.ecc.fecs_ecc_init != NULL) { + if ((g->ops.gr.ecc.fecs_ecc_init != NULL) && !g->ecc.initialized) { err = g->ops.gr.ecc.fecs_ecc_init(g); if (err != 0) { nvgpu_err(g, "failed to init gr fecs ecc"); diff --git a/drivers/gpu/nvgpu/common/gr/gr_ecc.c b/drivers/gpu/nvgpu/common/gr/gr_ecc.c index fc25438cb..827131e3f 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_ecc.c +++ b/drivers/gpu/nvgpu/common/gr/gr_ecc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -80,6 +80,7 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, if (stats == NULL) { return -ENOMEM; } + for (gpc = 0; gpc < gpc_count; gpc++) { stats[gpc] = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats[gpc]), @@ -128,11 +129,10 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, fail: if (err != 0) { -#ifdef CONFIG_NVGPU_DGPU while (gpc-- != 0u) { nvgpu_kfree(g, stats[gpc]); } -#endif + nvgpu_kfree(g, stats); } @@ -178,85 +178,92 @@ int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, return 0; } -/* helper function that frees the count array if non-NULL. */ -static void free_ecc_stat_count_array(struct gk20a *g, - struct nvgpu_ecc_stat **stat, - u32 gpc_count) +void nvgpu_ecc_counter_deinit_per_gr(struct gk20a *g, + struct nvgpu_ecc_stat **stats_p) { + struct nvgpu_ecc_stat *stats = NULL; u32 i; - if (stat != NULL) { - for (i = 0; i < gpc_count; i++) { - nvgpu_kfree(g, stat[i]); + if (*stats_p != NULL) { + stats = *stats_p; + + for (i = 0; i < g->num_gr_instances; i++) { + nvgpu_ecc_stat_del(g, &stats[i]); } - nvgpu_kfree(g, stat); + + nvgpu_kfree(g, stats); + *stats_p = NULL; + } +} + +void nvgpu_ecc_counter_deinit_per_tpc(struct gk20a *g, + struct nvgpu_ecc_stat ***stats_p) +{ + struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); + struct nvgpu_ecc_stat **stats = NULL; + u32 gpc_count; + u32 gpc, tpc; + + if (*stats_p != NULL) { + gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); + stats = *stats_p; + + for (gpc = 0; gpc < gpc_count; gpc++) { + if (stats[gpc] == NULL) { + continue; + } + + for (tpc = 0; + tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc); + tpc++) { + nvgpu_ecc_stat_del(g, &stats[gpc][tpc]); + } + + nvgpu_kfree(g, stats[gpc]); + stats[gpc] = NULL; + } + + nvgpu_kfree(g, stats); + *stats_p = NULL; + } +} + +void nvgpu_ecc_counter_deinit_per_gpc(struct gk20a *g, + struct nvgpu_ecc_stat **stats_p) +{ + struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); + struct nvgpu_ecc_stat *stats = NULL; + u32 gpc_count; + u32 gpc; + + if (*stats_p != NULL) { + gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); + stats = *stats_p; + + for (gpc = 0; gpc < gpc_count; gpc++) { + nvgpu_ecc_stat_del(g, &stats[gpc]); + } + + nvgpu_kfree(g, stats); + *stats_p = NULL; } } void nvgpu_gr_ecc_free(struct gk20a *g) { - struct nvgpu_ecc *ecc = &g->ecc; struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); - u32 gpc_count; + + nvgpu_log(g, gpu_dbg_gr, " "); if (gr_config == NULL) { return; } - gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); + if (g->ops.gr.ecc.fecs_ecc_deinit != NULL) { + g->ops.gr.ecc.fecs_ecc_deinit(g); + } - free_ecc_stat_count_array(g, ecc->gr.sm_lrf_ecc_single_err_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_lrf_ecc_double_err_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_shm_ecc_sec_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_shm_ecc_sed_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_shm_ecc_ded_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_ecc_total_sec_pipe0_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_ecc_total_ded_pipe0_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_unique_ecc_sec_pipe0_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_unique_ecc_ded_pipe0_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_ecc_total_sec_pipe1_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_ecc_total_ded_pipe1_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_unique_ecc_sec_pipe1_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_unique_ecc_ded_pipe1_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, - ecc->gr.sm_l1_tag_ecc_uncorrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_cbu_ecc_corrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_l1_data_ecc_corrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, - ecc->gr.sm_l1_data_ecc_uncorrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_icache_ecc_corrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, - ecc->gr.sm_icache_ecc_uncorrected_err_count, - gpc_count); - - nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_uncorrected_err_count); - nvgpu_kfree(g, ecc->gr.gpccs_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->gr.gpccs_ecc_uncorrected_err_count); - nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_uncorrected_err_count); - nvgpu_kfree(g, ecc->gr.fecs_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->gr.fecs_ecc_uncorrected_err_count); + if (g->ops.gr.ecc.gpc_tpc_ecc_deinit != NULL) { + g->ops.gr.ecc.gpc_tpc_ecc_deinit(g); + } } diff --git a/drivers/gpu/nvgpu/common/ltc/ltc.c b/drivers/gpu/nvgpu/common/ltc/ltc.c index 60c52804f..bb8dce3ad 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc.c @@ -126,6 +126,7 @@ int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, if (stats == NULL) { return -ENOMEM; } + for (ltc = 0; ltc < ltc_count; ltc++) { stats[ltc] = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats[ltc]), @@ -184,17 +185,45 @@ fail: void nvgpu_ltc_ecc_free(struct gk20a *g) { struct nvgpu_ecc *ecc = &g->ecc; - u32 i; + struct nvgpu_ecc_stat *stat; + u32 slices_per_ltc; + u32 ltc_count; + u32 ltc, lts; - for (i = 0; i < nvgpu_ltc_get_ltc_count(g); i++) { - if (ecc->ltc.ecc_sec_count != NULL) { - nvgpu_kfree(g, ecc->ltc.ecc_sec_count[i]); + if (g->ltc == NULL) { + return; + } + + ltc_count = nvgpu_ltc_get_ltc_count(g); + slices_per_ltc = nvgpu_ltc_get_slices_per_ltc(g); + + for (ltc = 0; ltc < ltc_count; ltc++) { + if (ecc->ltc.ecc_sec_count != NULL && + ecc->ltc.ecc_sec_count[ltc] != NULL) { + for (lts = 0; lts < slices_per_ltc; lts++) { + stat = &ecc->ltc.ecc_sec_count[ltc][lts]; + nvgpu_ecc_stat_del(g, stat); + } + + nvgpu_kfree(g, ecc->ltc.ecc_sec_count[ltc]); + ecc->ltc.ecc_sec_count[ltc] = NULL; } - if (ecc->ltc.ecc_ded_count != NULL) { - nvgpu_kfree(g, ecc->ltc.ecc_ded_count[i]); + if (ecc->ltc.ecc_ded_count != NULL && + ecc->ltc.ecc_ded_count[ltc] != NULL) { + for (lts = 0; lts < slices_per_ltc; lts++) { + stat = &ecc->ltc.ecc_ded_count[ltc][lts]; + nvgpu_ecc_stat_del(g, stat); + } + + nvgpu_kfree(g, ecc->ltc.ecc_ded_count[ltc]); + ecc->ltc.ecc_ded_count[ltc] = NULL; } } + nvgpu_kfree(g, ecc->ltc.ecc_sec_count); + ecc->ltc.ecc_sec_count = NULL; + nvgpu_kfree(g, ecc->ltc.ecc_ded_count); + ecc->ltc.ecc_ded_count = NULL; } diff --git a/drivers/gpu/nvgpu/hal/fb/ecc/fb_ecc_ga10b_fusa.c b/drivers/gpu/nvgpu/hal/fb/ecc/fb_ecc_ga10b_fusa.c index 46b80ce41..240a2de15 100644 --- a/drivers/gpu/nvgpu/hal/fb/ecc/fb_ecc_ga10b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fb/ecc/fb_ecc_ga10b_fusa.c @@ -1,7 +1,7 @@ /* * GA10B FB ECC * - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -50,61 +50,52 @@ int ga10b_fb_ecc_init(struct gk20a *g) err = gv11b_fb_ecc_init(g); if (err != 0) { - goto init_fb_gv11b_counters_fail; + goto init_fb_ecc_err; } err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_uncorrected_unique_err_count); if (err != 0) { - goto init_l2tlb_ecc_uncorrected_unique_fail; + goto init_fb_ecc_err; } err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_corrected_unique_err_count); if (err != 0) { - goto init_l2tlb_ecc_corrected_unique_fail; + goto init_fb_ecc_err; } err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_uncorrected_unique_err_count); if (err != 0) { - goto init_hubtlb_ecc_uncorrected_unique_fail; + goto init_fb_ecc_err; } err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_corrected_unique_err_count); if (err != 0) { - goto init_hubtlb_ecc_corrected_unique_fail; + goto init_fb_ecc_err; } err = NVGPU_ECC_COUNTER_INIT_FB(mmu_fillunit_ecc_uncorrected_unique_err_count); if (err != 0) { - goto init_fillunit_ecc_uncorrected_unique_fail; + goto init_fb_ecc_err; } err = NVGPU_ECC_COUNTER_INIT_FB(mmu_fillunit_ecc_corrected_unique_err_count); if (err != 0) { - goto init_fillunit_ecc_corrected_unique_fail; + goto init_fb_ecc_err; } - return 0; +init_fb_ecc_err: + + if (err != 0) { + nvgpu_err(g, "ecc counter allocate failed, err=%d", err); + ga10b_fb_ecc_free(g); + } -init_fillunit_ecc_corrected_unique_fail: - NVGPU_ECC_COUNTER_FREE_FB(mmu_fillunit_ecc_uncorrected_unique_err_count); -init_fillunit_ecc_uncorrected_unique_fail: - NVGPU_ECC_COUNTER_FREE_FB(mmu_hubtlb_ecc_corrected_unique_err_count); -init_hubtlb_ecc_corrected_unique_fail: - NVGPU_ECC_COUNTER_FREE_FB(mmu_hubtlb_ecc_uncorrected_unique_err_count); -init_hubtlb_ecc_uncorrected_unique_fail: - NVGPU_ECC_COUNTER_FREE_FB(mmu_l2tlb_ecc_corrected_unique_err_count); -init_l2tlb_ecc_corrected_unique_fail: - NVGPU_ECC_COUNTER_FREE_FB(mmu_l2tlb_ecc_uncorrected_unique_err_count); -init_l2tlb_ecc_uncorrected_unique_fail: - gv11b_fb_ecc_free(g); -init_fb_gv11b_counters_fail: return err; } void ga10b_fb_ecc_free(struct gk20a *g) { - struct nvgpu_ecc *ecc = &g->ecc; + NVGPU_ECC_COUNTER_FREE_FB(mmu_l2tlb_ecc_corrected_unique_err_count); + NVGPU_ECC_COUNTER_FREE_FB(mmu_l2tlb_ecc_uncorrected_unique_err_count); + NVGPU_ECC_COUNTER_FREE_FB(mmu_hubtlb_ecc_corrected_unique_err_count); + NVGPU_ECC_COUNTER_FREE_FB(mmu_hubtlb_ecc_uncorrected_unique_err_count); + NVGPU_ECC_COUNTER_FREE_FB(mmu_fillunit_ecc_corrected_unique_err_count); + NVGPU_ECC_COUNTER_FREE_FB(mmu_fillunit_ecc_uncorrected_unique_err_count); - nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_corrected_unique_err_count); - nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_uncorrected_unique_err_count); - nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_corrected_unique_err_count); - nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_uncorrected_unique_err_count); - nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_corrected_unique_err_count); - nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_uncorrected_unique_err_count); gv11b_fb_ecc_free(g); } diff --git a/drivers/gpu/nvgpu/hal/fb/ecc/fb_ecc_gv11b.h b/drivers/gpu/nvgpu/hal/fb/ecc/fb_ecc_gv11b.h index 3e9d8ac6d..284aaeaca 100644 --- a/drivers/gpu/nvgpu/hal/fb/ecc/fb_ecc_gv11b.h +++ b/drivers/gpu/nvgpu/hal/fb/ecc/fb_ecc_gv11b.h @@ -1,7 +1,7 @@ /* * GV11B FB ECC * - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -36,8 +36,8 @@ struct gk20a; #define NVGPU_ECC_COUNTER_INIT_FB(stat) \ nvgpu_ecc_counter_init(g, &g->ecc.fb.stat, #stat) -#define NVGPU_ECC_COUNTER_FREE_FB(stat) \ - nvgpu_kfree(g, g->ecc.fb.stat) +#define NVGPU_ECC_COUNTER_FREE_FB(stat) \ + nvgpu_ecc_counter_deinit(g, &g->ecc.fb.stat) int gv11b_fb_ecc_init(struct gk20a *g); void gv11b_fb_ecc_free(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/hal/fb/ecc/fb_ecc_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fb/ecc/fb_ecc_gv11b_fusa.c index c21399f32..718ed0c67 100644 --- a/drivers/gpu/nvgpu/hal/fb/ecc/fb_ecc_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fb/ecc/fb_ecc_gv11b_fusa.c @@ -1,7 +1,7 @@ /* * GV11B FB ECC * - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -36,57 +36,49 @@ int gv11b_fb_ecc_init(struct gk20a *g) err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_uncorrected_err_count); if (err != 0) { - goto init_l2tlb_ecc_uncorrected_fail; + goto init_fb_ecc_err; } err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_corrected_err_count); if (err != 0) { - goto init_l2tlb_ecc_corrected_fail; + goto init_fb_ecc_err; } err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_uncorrected_err_count); if (err != 0) { - goto init_hubtlb_ecc_uncorrected_fail; + goto init_fb_ecc_err; } err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_corrected_err_count); if (err != 0) { - goto init_hubtlb_ecc_corrected_fail; + goto init_fb_ecc_err; } err = NVGPU_ECC_COUNTER_INIT_FB( mmu_fillunit_ecc_uncorrected_err_count); if (err != 0) { - goto init_fillunit_ecc_uncorrected_fail; + goto init_fb_ecc_err; } err = NVGPU_ECC_COUNTER_INIT_FB( mmu_fillunit_ecc_corrected_err_count); if (err != 0) { - goto init_fillunit_ecc_corrected_fail; + goto init_fb_ecc_err; } - return 0; +init_fb_ecc_err: + + if (err != 0) { + nvgpu_err(g, "ecc counter allocate failed, err=%d", err); + gv11b_fb_ecc_free(g); + } -init_fillunit_ecc_corrected_fail: - NVGPU_ECC_COUNTER_FREE_FB(mmu_fillunit_ecc_uncorrected_err_count); -init_fillunit_ecc_uncorrected_fail: - NVGPU_ECC_COUNTER_FREE_FB(mmu_hubtlb_ecc_corrected_err_count); -init_hubtlb_ecc_corrected_fail: - NVGPU_ECC_COUNTER_FREE_FB(mmu_hubtlb_ecc_uncorrected_err_count); -init_hubtlb_ecc_uncorrected_fail: - NVGPU_ECC_COUNTER_FREE_FB(mmu_l2tlb_ecc_corrected_err_count); -init_l2tlb_ecc_corrected_fail: - NVGPU_ECC_COUNTER_FREE_FB(mmu_l2tlb_ecc_uncorrected_err_count); -init_l2tlb_ecc_uncorrected_fail: return err; } void gv11b_fb_ecc_free(struct gk20a *g) { - struct nvgpu_ecc *ecc = &g->ecc; - - nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_uncorrected_err_count); - nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_uncorrected_err_count); - nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_FREE_FB(mmu_l2tlb_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_FREE_FB(mmu_l2tlb_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_FREE_FB(mmu_hubtlb_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_FREE_FB(mmu_hubtlb_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_FREE_FB(mmu_fillunit_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_FREE_FB(mmu_fillunit_ecc_uncorrected_err_count); } void gv11b_fb_ecc_l2tlb_error_mask(u32 *corrected_error_mask, diff --git a/drivers/gpu/nvgpu/hal/fbpa/fbpa_tu104.c b/drivers/gpu/nvgpu/hal/fbpa/fbpa_tu104.c index 9a129bbe6..a5a2d61c9 100644 --- a/drivers/gpu/nvgpu/hal/fbpa/fbpa_tu104.c +++ b/drivers/gpu/nvgpu/hal/fbpa/fbpa_tu104.c @@ -152,6 +152,25 @@ int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g, return 0; } +static void free_fbpa_ecc_stat_count_array(struct gk20a *g, + struct nvgpu_ecc_stat **stats_p) +{ + u32 num_fbpa = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); + struct nvgpu_ecc_stat *stats; + u32 i; + + if (*stats_p != NULL) { + stats = *stats_p; + + for (i = 0; i < num_fbpa; i++) { + nvgpu_ecc_stat_del(g, &stats[i]); + } + + nvgpu_kfree(g, stats); + *stats_p = NULL; + } +} + int tu104_fbpa_ecc_init(struct gk20a *g) { int err; @@ -168,7 +187,7 @@ int tu104_fbpa_ecc_init(struct gk20a *g) done: if (err != 0) { nvgpu_err(g, "ecc counter allocate failed, err=%d", err); - nvgpu_ecc_free(g); + tu104_fbpa_ecc_free(g); } return err; @@ -178,6 +197,6 @@ void tu104_fbpa_ecc_free(struct gk20a *g) { struct nvgpu_ecc *ecc = &g->ecc; - nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_sec_err_count); - nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_ded_err_count); + free_fbpa_ecc_stat_count_array(g, &ecc->fbpa.fbpa_ecc_sec_err_count); + free_fbpa_ecc_stat_count_array(g, &ecc->fbpa.fbpa_ecc_ded_err_count); } diff --git a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_ga10b.h b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_ga10b.h index a462b7fd3..677105d24 100644 --- a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_ga10b.h +++ b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_ga10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -36,5 +36,6 @@ struct nvgpu_hw_err_inject_info_desc * void ga10b_ecc_detect_enabled_units(struct gk20a *g); int ga10b_gr_gpc_tpc_ecc_init(struct gk20a *g); +void ga10b_gr_gpc_tpc_ecc_deinit(struct gk20a *g); #endif /* NVGPU_ECC_GA10B_H */ diff --git a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_ga10b_fusa.c b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_ga10b_fusa.c index 2d1b91c38..55d2b4864 100644 --- a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_ga10b_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_ga10b_fusa.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -158,11 +158,40 @@ void ga10b_ecc_detect_enabled_units(struct gk20a *g) } } -int ga10b_gr_gpc_tpc_ecc_init(struct gk20a *g) +static int _ga10b_gr_gpc_tpc_ecc_init(struct gk20a *g) { - gv11b_gr_gpc_tpc_ecc_init(g); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_rams_ecc_corrected_err_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_rams_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_rams_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_rams_ecc_uncorrected_err_count); return 0; } + +int ga10b_gr_gpc_tpc_ecc_init(struct gk20a *g) +{ + int err; + + err = gv11b_gr_gpc_tpc_ecc_init(g); + if (err != 0) { + goto done; + } + + err = _ga10b_gr_gpc_tpc_ecc_init(g); + if (err != 0) { + goto done; + } + +done: + if (err != 0) { + nvgpu_err(g, "ecc counter allocate failed, err=%d", err); + ga10b_gr_gpc_tpc_ecc_deinit(g); + } + + return 0; +} + +void ga10b_gr_gpc_tpc_ecc_deinit(struct gk20a *g) +{ + gv11b_gr_gpc_tpc_ecc_deinit(g); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_rams_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_rams_ecc_uncorrected_err_count); +} diff --git a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.c b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.c index 7ac2bc59f..ae7cebfc8 100644 --- a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.c +++ b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -126,25 +126,25 @@ void gp10b_ecc_detect_enabled_units(struct gk20a *g) static int gp10b_ecc_init_tpc_sm(struct gk20a *g) { - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sec_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sed_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_ded_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_lrf_ecc_single_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_lrf_ecc_double_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_shm_ecc_sec_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_shm_ecc_sed_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_shm_ecc_ded_count); return 0; } static int gp10b_ecc_init_tpc_tex(struct gk20a *g) { - NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe0_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe0_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe0_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe0_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe1_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe1_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe1_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe1_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(tex_ecc_total_sec_pipe0_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(tex_ecc_total_ded_pipe0_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(tex_unique_ecc_sec_pipe0_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(tex_unique_ecc_ded_pipe0_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(tex_ecc_total_sec_pipe1_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(tex_ecc_total_ded_pipe1_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(tex_unique_ecc_sec_pipe1_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(tex_unique_ecc_ded_pipe1_count); return 0; } @@ -170,8 +170,36 @@ int gp10b_gr_ecc_init(struct gk20a *g) err = gp10b_ecc_init_tpc(g); if (err != 0) { nvgpu_err(g, "ecc counter allocate failed, err=%d", err); - nvgpu_ecc_free(g); + gp10b_gr_ecc_deinit(g); } return err; } + +static void gp10b_ecc_deinit_tpc_sm(struct gk20a *g) +{ + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_lrf_ecc_single_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_lrf_ecc_double_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_shm_ecc_sec_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_shm_ecc_sed_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_shm_ecc_ded_count); +} + +static void gp10b_ecc_deinit_tpc_tex(struct gk20a *g) +{ + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(tex_ecc_total_sec_pipe0_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(tex_ecc_total_ded_pipe0_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(tex_unique_ecc_sec_pipe0_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(tex_unique_ecc_ded_pipe0_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(tex_ecc_total_sec_pipe1_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(tex_ecc_total_ded_pipe1_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(tex_unique_ecc_sec_pipe1_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(tex_unique_ecc_ded_pipe1_count); +} + +void gp10b_gr_ecc_deinit(struct gk20a *g) +{ + gp10b_ecc_deinit_tpc_sm(g); + + gp10b_ecc_deinit_tpc_tex(g); +} diff --git a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.h b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.h index 94eaa1751..a70b7ca06 100644 --- a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.h +++ b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,5 +27,6 @@ struct gk20a; void gp10b_ecc_detect_enabled_units(struct gk20a *g); int gp10b_gr_ecc_init(struct gk20a *g); +void gp10b_gr_ecc_deinit(struct gk20a *g); #endif /* NVGPU_ECC_GP10B_H */ diff --git a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b.h b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b.h index 1774632c5..e9a4a4c9e 100644 --- a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b.h +++ b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -35,6 +35,8 @@ void gv11b_ecc_detect_enabled_units(struct gk20a *g); int gv11b_gr_gpc_tpc_ecc_init(struct gk20a *g); int gv11b_gr_fecs_ecc_init(struct gk20a *g); +void gv11b_gr_gpc_tpc_ecc_deinit(struct gk20a *g); +void gv11b_gr_fecs_ecc_deinit(struct gk20a *g); #ifdef CONFIG_NVGPU_INJECT_HWERR void gv11b_gr_intr_inject_fecs_ecc_error(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b_fusa.c index 351a860f1..20e13fe43 100644 --- a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b_fusa.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -177,20 +177,20 @@ void gv11b_ecc_detect_enabled_units(struct gk20a *g) static int gv11b_ecc_init_sm_corrected_err_count(struct gk20a *g) { - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_l1_tag_ecc_corrected_err_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_cbu_ecc_corrected_err_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_l1_data_ecc_corrected_err_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_icache_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_l1_tag_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_cbu_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_l1_data_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_icache_ecc_corrected_err_count); return 0; } static int gv11b_ecc_init_sm_uncorrected_err_count(struct gk20a *g) { - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_l1_tag_ecc_uncorrected_err_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_cbu_ecc_uncorrected_err_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_l1_data_ecc_uncorrected_err_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_icache_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_l1_tag_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_cbu_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_l1_data_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_icache_ecc_uncorrected_err_count); return 0; } @@ -199,8 +199,8 @@ static int gv11b_ecc_init_tpc(struct gk20a *g) { int ret; - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count); - NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_lrf_ecc_single_err_count); + NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(sm_lrf_ecc_double_err_count); ret = gv11b_ecc_init_sm_corrected_err_count(g); if (ret != 0) { @@ -268,22 +268,23 @@ int gv11b_gr_gpc_tpc_ecc_init(struct gk20a *g) done: if (err != 0) { nvgpu_err(g, "ecc counter allocate failed, err=%d", err); - nvgpu_ecc_free(g); + gv11b_gr_gpc_tpc_ecc_deinit(g); } return err; } + int gv11b_gr_fecs_ecc_init(struct gk20a *g) { int err; nvgpu_log(g, gpu_dbg_gr, " "); - err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_uncorrected_err_count); + err = NVGPU_ECC_COUNTER_INIT_PER_GR(fecs_ecc_uncorrected_err_count); if (err != 0) { goto done; } - err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_corrected_err_count); + err = NVGPU_ECC_COUNTER_INIT_PER_GR(fecs_ecc_corrected_err_count); if (err != 0) { goto done; } @@ -291,8 +292,60 @@ int gv11b_gr_fecs_ecc_init(struct gk20a *g) done: if (err != 0) { nvgpu_err(g, "ecc counter allocate failed, err=%d", err); - nvgpu_ecc_free(g); + gv11b_gr_fecs_ecc_deinit(g); } return err; } + +static void gv11b_ecc_deinit_sm_corrected_err_count(struct gk20a *g) +{ + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_l1_tag_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_cbu_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_l1_data_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_icache_ecc_corrected_err_count); +} + +static void gv11b_ecc_deinit_sm_uncorrected_err_count(struct gk20a *g) +{ + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_l1_tag_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_cbu_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_l1_data_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_icache_ecc_uncorrected_err_count); +} + +static void gv11b_ecc_deinit_tpc(struct gk20a *g) +{ + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_lrf_ecc_single_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_TPC(sm_lrf_ecc_double_err_count); + + gv11b_ecc_deinit_sm_corrected_err_count(g); + gv11b_ecc_deinit_sm_uncorrected_err_count(g); +} + +static void gv11b_ecc_deinit_gpc(struct gk20a *g) +{ + NVGPU_ECC_COUNTER_DEINIT_PER_GPC(gcc_l15_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_GPC(gcc_l15_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_GPC(gpccs_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_GPC(gpccs_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_GPC(mmu_l1tlb_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_GPC(mmu_l1tlb_ecc_corrected_err_count); +} + +void gv11b_gr_gpc_tpc_ecc_deinit(struct gk20a *g) +{ + nvgpu_log(g, gpu_dbg_gr, " "); + + gv11b_ecc_deinit_tpc(g); + + gv11b_ecc_deinit_gpc(g); +} + +void gv11b_gr_fecs_ecc_deinit(struct gk20a *g) +{ + nvgpu_log(g, gpu_dbg_gr, " "); + + NVGPU_ECC_COUNTER_DEINIT_PER_GR(fecs_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_DEINIT_PER_GR(fecs_ecc_corrected_err_count); +} diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga100.c b/drivers/gpu/nvgpu/hal/init/hal_ga100.c index d446f6ebb..b520cf981 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c @@ -430,6 +430,8 @@ static const struct gops_gr_ecc ga100_ops_gr_ecc = { .detect = ga10b_ecc_detect_enabled_units, .gpc_tpc_ecc_init = ga10b_gr_gpc_tpc_ecc_init, .fecs_ecc_init = gv11b_gr_fecs_ecc_init, + .gpc_tpc_ecc_deinit = ga10b_gr_gpc_tpc_ecc_deinit, + .fecs_ecc_deinit = gv11b_gr_fecs_ecc_deinit, #ifdef CONFIG_NVGPU_INJECT_HWERR .get_mmu_err_desc = ga10b_gr_ecc_get_mmu_err_desc, .get_gcc_err_desc = gv11b_gr_intr_get_gcc_err_desc, diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c index ed2ba3f9e..5c4f976d0 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c @@ -394,6 +394,8 @@ static const struct gops_gr_ecc ga10b_ops_gr_ecc = { .detect = ga10b_ecc_detect_enabled_units, .gpc_tpc_ecc_init = ga10b_gr_gpc_tpc_ecc_init, .fecs_ecc_init = gv11b_gr_fecs_ecc_init, + .gpc_tpc_ecc_deinit = ga10b_gr_gpc_tpc_ecc_deinit, + .fecs_ecc_deinit = gv11b_gr_fecs_ecc_deinit, #ifdef CONFIG_NVGPU_INJECT_HWERR .get_mmu_err_desc = ga10b_gr_ecc_get_mmu_err_desc, .get_gcc_err_desc = gv11b_gr_intr_get_gcc_err_desc, diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c index 3be1f7190..eca3dbf52 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c @@ -247,6 +247,7 @@ static const struct gops_ce gp10b_ops_ce = { static const struct gops_gr_ecc gp10b_ops_gr_ecc = { .detect = gp10b_ecc_detect_enabled_units, .gpc_tpc_ecc_init = gp10b_gr_ecc_init, + .gpc_tpc_ecc_deinit = gp10b_gr_ecc_deinit, }; static const struct gops_gr_ctxsw_prog gp10b_ops_gr_ctxsw_prog = { diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index e6433c198..212fe2498 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -316,6 +316,8 @@ static const struct gops_gr_ecc gv11b_ops_gr_ecc = { .detect = gv11b_ecc_detect_enabled_units, .gpc_tpc_ecc_init = gv11b_gr_gpc_tpc_ecc_init, .fecs_ecc_init = gv11b_gr_fecs_ecc_init, + .gpc_tpc_ecc_deinit = gv11b_gr_gpc_tpc_ecc_deinit, + .fecs_ecc_deinit = gv11b_gr_fecs_ecc_deinit, #ifdef CONFIG_NVGPU_INJECT_HWERR .get_mmu_err_desc = gv11b_gr_intr_get_mmu_err_desc, .get_gcc_err_desc = gv11b_gr_intr_get_gcc_err_desc, diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 931ee2a74..f433402d8 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -368,6 +368,8 @@ static const struct gops_gr_ecc tu104_ops_gr_ecc = { .detect = NULL, .gpc_tpc_ecc_init = gv11b_gr_gpc_tpc_ecc_init, .fecs_ecc_init = gv11b_gr_fecs_ecc_init, + .gpc_tpc_ecc_deinit = gv11b_gr_gpc_tpc_ecc_deinit, + .fecs_ecc_deinit = gv11b_gr_fecs_ecc_deinit, }; static const struct gops_gr_ctxsw_prog tu104_ops_gr_ctxsw_prog = { diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c b/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c index f2c71be5b..531aa7931 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c @@ -1,7 +1,7 @@ /* * GP10B L2 * - * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -56,7 +56,7 @@ int gp10b_lts_ecc_init(struct gk20a *g) init_lts_err: if (err != 0) { nvgpu_err(g, "ecc counter allocate failed, err=%d", err); - nvgpu_ecc_free(g); + nvgpu_ltc_ecc_free(g); } return err; diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b_fusa.c index 6e0ab1d04..d7933963c 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b_fusa.c @@ -73,7 +73,7 @@ int gv11b_lts_ecc_init(struct gk20a *g) done: if (err != 0) { nvgpu_err(g, "ecc counter allocate failed, err=%d", err); - nvgpu_ecc_free(g); + nvgpu_ltc_ecc_free(g); } return err; diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b_fusa.c index dde962cf8..7b951f277 100644 --- a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b_fusa.c @@ -328,7 +328,7 @@ int gv11b_pmu_ecc_init(struct gk20a *g) done: if (err != 0) { nvgpu_err(g, "ecc counter allocate failed, err=%d", err); - nvgpu_ecc_free(g); + gv11b_pmu_ecc_free(g); } return err; @@ -336,10 +336,8 @@ done: void gv11b_pmu_ecc_free(struct gk20a *g) { - struct nvgpu_ecc *ecc = &g->ecc; - - nvgpu_kfree(g, ecc->pmu.pmu_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->pmu.pmu_ecc_uncorrected_err_count); + NVGPU_ECC_COUNTER_FREE_PMU(pmu_ecc_corrected_err_count); + NVGPU_ECC_COUNTER_FREE_PMU(pmu_ecc_uncorrected_err_count); } static void gv11b_pmu_handle_ecc_irq(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/include/nvgpu/ecc.h b/drivers/gpu/nvgpu/include/nvgpu/ecc.h index 7d0d9279c..e4ab2fbdd 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/ecc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/ecc.h @@ -77,6 +77,7 @@ #include #include +#include #define NVGPU_ECC_STAT_NAME_MAX_SIZE 100UL @@ -268,6 +269,8 @@ struct nvgpu_ecc { /** Contains the head to the list of error statistics. */ struct nvgpu_list_node stats_list; + /** Lock to protect the stats_list updates. */ + struct nvgpu_mutex stats_lock; /** Contains the number of error statistics. */ int stats_count; /** @@ -281,29 +284,50 @@ struct nvgpu_ecc { * @brief Allocates, initializes an error counter with specified name. * * @param g [in] The GPU driver struct. - * @param stat [out] Pointer to array of tpc error counters. + * @param statp [out] Pointer to error counter pointer. * @param name [in] Unique name for error counter. * * Allocate memory for one error counter, initializes the counter with 0 and the - * specified string identifier. Finally the counter is added to the status_list + * specified string identifier. Finally the counter is added to the stats_list * of struct nvgpu_ecc. * * @return 0 in case of success, less than 0 for failure. * @return -ENOMEM if there is not enough memory to allocate ecc stats. */ int nvgpu_ecc_counter_init(struct gk20a *g, - struct nvgpu_ecc_stat **stat, const char *name); + struct nvgpu_ecc_stat **statp, const char *name); /** - * @brief Concatenates the error counter to status list. + * @brief Deallocates an error counter. + * + * @param g [in] The GPU driver struct. + * @param statp [in] Pointer to error counter pointer. + * + * Delete the counter from the nvgpu_ecc stats_list. Deallocate memory for the + * error counter. + */ +void nvgpu_ecc_counter_deinit(struct gk20a *g, struct nvgpu_ecc_stat **statp); + +/** + * @brief Concatenates the error counter to stats list. * * @param g [in] The GPU driver struct. * @param stat [in] Pointer to error counter. * - * The counter is added to the status_list of struct nvgpu_ecc. + * The counter is added to the stats_list of struct nvgpu_ecc. */ void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat); +/** + * @brief Deletes the error counter from the stats list. + * + * @param g [in] The GPU driver struct. + * @param stat [in] Pointer to error counter. + * + * The counter is removed from the stats_list of struct nvgpu_ecc. + */ +void nvgpu_ecc_stat_del(struct gk20a *g, struct nvgpu_ecc_stat *stat); + /** * @brief Release memory associated with all error counters. * diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h index 37c8aad39..c59d77167 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h @@ -105,6 +105,27 @@ struct gops_gr_ecc { */ int (*fecs_ecc_init)(struct gk20a *g); + /** + * @brief Deinitialize GR unit ECC support. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function deallocates memory to track the ecc error counts + * for GR unit and subunits of GR (like GPCs, TPCs etc) and removes + * it from global list. + */ + void (*gpc_tpc_ecc_deinit)(struct gk20a *g); + + /** + * @brief Deinitialize GR unit ECC support. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function deallocates memory to track the ecc error counts + * for FECS in GR and removes it from global list. + */ + void (*fecs_ecc_deinit)(struct gk20a *g); + /** * @brief Detect ECC enabled units in GR engine. * diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_ecc.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_ecc.h index 4f8ca4391..c5c8cbeda 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_ecc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_ecc.h @@ -49,7 +49,7 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. * */ -#define NVGPU_ECC_COUNTER_INIT_PER_TPC(stat) \ +#define NVGPU_ECC_COUNTER_INIT_PER_TPC_OR_RETURN(stat) \ do { \ int err = 0; \ err = nvgpu_ecc_counter_init_per_tpc(g, \ @@ -59,6 +59,27 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, } \ } while (false) +/** + * @brief Free error counter of all tpc instances in all gpc instances. + * + * @param g [in] The GPU driver struct. + * @param stats_p [out] Pointer to 2D array of error counters in tpcs in gpcs. + * + * Removes the error counter of all gpc instances from stats_list in struct + * nvgpu_ecc and frees the memory allocated for it. + */ +void nvgpu_ecc_counter_deinit_per_tpc(struct gk20a *g, + struct nvgpu_ecc_stat ***stats_p); + +/* + * @brief Frees counters for memories shared across a TPCs in GPCs. + * + * @param stat [in] error counter member from g->ecc.gr. + * + */ +#define NVGPU_ECC_COUNTER_DEINIT_PER_TPC(stat) \ + nvgpu_ecc_counter_deinit_per_tpc(g, &g->ecc.gr.stat) + /** * @brief Allocate and initialize error counter specified by name for all gpc * instances. @@ -79,12 +100,33 @@ int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, /* * @brief Allocate and initialize counters for memories shared across a GPC. * - * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. + * @param stat [in] error counter member from g->ecc.gr. * */ #define NVGPU_ECC_COUNTER_INIT_PER_GPC(stat) \ nvgpu_ecc_counter_init_per_gpc(g, &g->ecc.gr.stat, #stat) +/** + * @brief Free error counter of all gpc instances. + * + * @param g [in] The GPU driver struct. + * @param stats_p [out] Pointer to array of gpc error counters. + * + * Removes the error counter of all gpc instances from stats_list in struct + * nvgpu_ecc and frees the memory allocated for it. + */ +void nvgpu_ecc_counter_deinit_per_gpc(struct gk20a *g, + struct nvgpu_ecc_stat **stats_p); + +/* + * @brief Frees counters for memories shared across a GPC. + * + * @param stat [in] error counter member from g->ecc.gr. + * + */ +#define NVGPU_ECC_COUNTER_DEINIT_PER_GPC(stat) \ + nvgpu_ecc_counter_deinit_per_gpc(g, &g->ecc.gr.stat) + /** * @brief Allocate and initialize error counter specified by name for all gr * instances. @@ -107,9 +149,30 @@ int nvgpu_ecc_counter_init_per_gr(struct gk20a *g, * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. * */ -#define NVGPU_ECC_COUNTER_INIT_GR(stat) \ +#define NVGPU_ECC_COUNTER_INIT_PER_GR(stat) \ nvgpu_ecc_counter_init_per_gr(g, &g->ecc.gr.stat, #stat) +/** + * @brief Free error counter of all gr instances. + * + * @param g [in] The GPU driver struct. + * @param stats_p [out] Pointer to array of gr error counters. + * + * Removes the error counter of all gr instances from stats_list in struct + * nvgpu_ecc and frees the memory allocated for it. + */ +void nvgpu_ecc_counter_deinit_per_gr(struct gk20a *g, + struct nvgpu_ecc_stat **stats_p); + +/* + * @brief Frees counters for memories shared across a GR instances. + * + * @param stat [in] error counter member from g->ecc.gr. + * + */ +#define NVGPU_ECC_COUNTER_DEINIT_PER_GR(stat) \ + nvgpu_ecc_counter_deinit_per_gr(g, &g->ecc.gr.stat) + /** * @brief Release all GR ECC stats counters. * diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h index 1141dcd39..44a84dc36 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h @@ -528,7 +528,7 @@ int nvgpu_pmu_early_init(struct gk20a *g); void nvgpu_pmu_remove_support(struct gk20a *g, struct nvgpu_pmu *pmu); /* - * @brief Allocate and initialize counter for memories within PMU. + * @brief Allocate and initialize ECC counter for memories within PMU. * * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. * @@ -536,5 +536,14 @@ void nvgpu_pmu_remove_support(struct gk20a *g, struct nvgpu_pmu *pmu); #define NVGPU_ECC_COUNTER_INIT_PMU(stat) \ nvgpu_ecc_counter_init(g, &g->ecc.pmu.stat, #stat) +/* + * @brief Remove ECC counter from the list and free the counter. + * + * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. + * + */ +#define NVGPU_ECC_COUNTER_FREE_PMU(stat) \ + nvgpu_ecc_counter_deinit(g, &g->ecc.pmu.stat) + #endif /* NVGPU_PMU_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ecc_sysfs.c b/drivers/gpu/nvgpu/os/linux/ecc_sysfs.c index 73ae3dc2b..02ddc5799 100644 --- a/drivers/gpu/nvgpu/os/linux/ecc_sysfs.c +++ b/drivers/gpu/nvgpu/os/linux/ecc_sysfs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -28,9 +28,13 @@ int nvgpu_ecc_sysfs_init(struct gk20a *g) struct nvgpu_ecc_stat *stat; int i = 0, err; + nvgpu_mutex_acquire(&ecc->stats_lock); + attr = nvgpu_kzalloc(g, sizeof(*attr) * ecc->stats_count); - if (!attr) + if (!attr) { + nvgpu_mutex_release(&ecc->stats_lock); return -ENOMEM; + } nvgpu_list_for_each_entry(stat, &ecc->stats_list, nvgpu_ecc_stat, node) { @@ -54,6 +58,8 @@ int nvgpu_ecc_sysfs_init(struct gk20a *g) i++; } + nvgpu_mutex_release(&ecc->stats_lock); + if (err) { while (i-- > 0) device_remove_file(dev, &attr[i].attr); @@ -73,8 +79,13 @@ void nvgpu_ecc_sysfs_remove(struct gk20a *g) struct nvgpu_ecc *ecc = &g->ecc; int i; + nvgpu_mutex_acquire(&ecc->stats_lock); + for (i = 0; i < ecc->stats_count; i++) device_remove_file(dev, &l->ecc_attrs[i].attr); + + nvgpu_mutex_release(&ecc->stats_lock); + nvgpu_kfree(g, l->ecc_attrs); l->ecc_attrs = NULL; } diff --git a/libs/dgpu/libnvgpu-drv-dgpu_safe.export b/libs/dgpu/libnvgpu-drv-dgpu_safe.export index 5ace22e07..19728d0da 100644 --- a/libs/dgpu/libnvgpu-drv-dgpu_safe.export +++ b/libs/dgpu/libnvgpu-drv-dgpu_safe.export @@ -779,6 +779,7 @@ nvgpu_get_nvhost_dev nvgpu_free_nvhost_dev nvgpu_ecc_free nvgpu_ecc_counter_init +nvgpu_ecc_counter_deinit nvgpu_ecc_finalize_support nvgpu_rc_fifo_recover nvgpu_rc_ctxsw_timeout diff --git a/libs/igpu/libnvgpu-drv-igpu_safe.export b/libs/igpu/libnvgpu-drv-igpu_safe.export index 0feb422f9..e82a72bdd 100644 --- a/libs/igpu/libnvgpu-drv-igpu_safe.export +++ b/libs/igpu/libnvgpu-drv-igpu_safe.export @@ -796,6 +796,7 @@ nvgpu_get_nvhost_dev nvgpu_free_nvhost_dev nvgpu_ecc_free nvgpu_ecc_counter_init +nvgpu_ecc_counter_deinit nvgpu_ecc_finalize_support nvgpu_rc_fifo_recover nvgpu_rc_ctxsw_timeout diff --git a/userspace/units/ecc/nvgpu-ecc.c b/userspace/units/ecc/nvgpu-ecc.c index 70118f5c6..4e0230d3f 100644 --- a/userspace/units/ecc/nvgpu-ecc.c +++ b/userspace/units/ecc/nvgpu-ecc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -121,7 +121,7 @@ int test_ecc_counter_init(struct unit_module *m, struct gk20a *g, ret = UNIT_FAIL; goto cleanup; } - nvgpu_kfree(g, stat); + nvgpu_ecc_counter_deinit(g, &stat); /* * Case #2: @@ -147,12 +147,17 @@ int test_ecc_counter_init(struct unit_module *m, struct gk20a *g, ret = UNIT_FAIL; goto cleanup; } - nvgpu_kfree(g, stat); - stat = NULL; + + nvgpu_ecc_counter_deinit(g, &stat); + + if (!nvgpu_list_empty(&g->ecc.stats_list)) { + ret = UNIT_FAIL; + goto cleanup; + } cleanup: if (stat != NULL) { - nvgpu_kfree(g, stat); + nvgpu_ecc_counter_deinit(g, &stat); } nvgpu_kfree(g, name); diff --git a/userspace/units/ecc/nvgpu-ecc.h b/userspace/units/ecc/nvgpu-ecc.h index 88751b495..20d45b9d0 100644 --- a/userspace/units/ecc/nvgpu-ecc.h +++ b/userspace/units/ecc/nvgpu-ecc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -110,6 +110,8 @@ int test_ecc_finalize_support(struct unit_module *m, * - Set counter name to string with invalid length equal to * NVGPU_ECC_STAT_NAME_MAX_SIZE. * - "nvgpu_ecc_counter_init" will truncate the counter name and return 0. + * - Test case #4 + * - Verify that the g->ecc.stats_list is empty. * * Output: * - UNIT_FAIL under the following conditions: diff --git a/userspace/units/fb/fb_gv11b_fusa.c b/userspace/units/fb/fb_gv11b_fusa.c index 337b98e69..5fafc1e20 100644 --- a/userspace/units/fb/fb_gv11b_fusa.c +++ b/userspace/units/fb/fb_gv11b_fusa.c @@ -120,6 +120,8 @@ int fb_gv11b_init_test(struct unit_module *m, struct gk20a *g, void *args) if (err != -ENOMEM) { unit_return_fail(m, "gv11b_fb_ecc_init did not fail as expected (%d)\n", i); } + + g->ops.ecc.ecc_init_support(g); } err = g->ops.fb.ecc.init(g); diff --git a/userspace/units/ltc/nvgpu-ltc.c b/userspace/units/ltc/nvgpu-ltc.c index 0301a9ffd..2b2794ce3 100644 --- a/userspace/units/ltc/nvgpu-ltc.c +++ b/userspace/units/ltc/nvgpu-ltc.c @@ -440,7 +440,6 @@ static int mock_l2_flush(struct gk20a *g, bool inv) int test_ltc_intr(struct unit_module *m, struct gk20a *g, void *args) { int err = UNIT_SUCCESS; - u32 i; const u32 offset1 = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) * nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); int (*save_func)(struct gk20a *g, bool inv); @@ -560,15 +559,7 @@ int test_ltc_intr(struct unit_module *m, struct gk20a *g, void *args) g->ops.mm.cache.l2_flush = save_func; done: - for (i = 0; i < nvgpu_ltc_get_ltc_count(g); i++) { - if (g->ecc.ltc.ecc_sec_count != NULL) { - nvgpu_kfree(g, g->ecc.ltc.ecc_sec_count[i]); - } - - if (g->ecc.ltc.ecc_ded_count != NULL) { - nvgpu_kfree(g, g->ecc.ltc.ecc_ded_count[i]); - } - } + nvgpu_ltc_ecc_free(g); return err; }