diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml index daabf0a1f..cb561d826 100644 --- a/arch/nvgpu-common.yaml +++ b/arch/nvgpu-common.yaml @@ -66,7 +66,8 @@ ecc: safe: yes owner: Antony C sources: [ common/ecc.c, - include/nvgpu/ecc.h ] + include/nvgpu/ecc.h, + include/nvgpu/gops_ecc.h ] deps: log: @@ -458,6 +459,10 @@ gr: safe: yes sources: [ common/gr/gr_setup.c, include/nvgpu/gr/setup.h ] + ecc: + safe: yes + sources: [ common/gr/gr_ecc.c, + include/nvgpu/gr/gr_ecc.h ] fbp: safe: yes diff --git a/arch/nvgpu-hal-new.yaml b/arch/nvgpu-hal-new.yaml index 6b0d6295d..61d595877 100644 --- a/arch/nvgpu-hal-new.yaml +++ b/arch/nvgpu-hal-new.yaml @@ -474,9 +474,7 @@ gr: safe: no sources: [hal/gr/ecc/ecc_gv11b.c, hal/gr/ecc/ecc_gp10b.c, - hal/gr/ecc/ecc_tu104.c, - hal/gr/ecc/ecc_gp10b.h, - hal/gr/ecc/ecc_tu104.h ] + hal/gr/ecc/ecc_gp10b.h ] ctxsw_prog_fusa: safe: yes sources: [ hal/gr/ctxsw_prog/ctxsw_prog_gm20b_fusa.c, diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 11758411d..7ed5d8da8 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -109,6 +109,7 @@ nvgpu-y += \ common/gr/hwpm_map.o \ common/gr/obj_ctx.o \ common/gr/fs_state.o \ + common/gr/gr_ecc.o \ common/netlist/netlist.o \ common/init/nvgpu_init.o \ common/pmu/pmu.o \ @@ -206,7 +207,6 @@ nvgpu-y += \ hal/clk/clk_tu104.o \ hal/gr/ecc/ecc_gp10b.o \ hal/gr/ecc/ecc_gv11b.o \ - hal/gr/ecc/ecc_tu104.o \ hal/gr/zcull/zcull_gm20b.o \ hal/gr/zcull/zcull_gv11b.o \ hal/gr/ctxsw_prog/ctxsw_prog_gp10b.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 091845223..8ceffa0c8 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -119,6 +119,7 @@ srcs += common/utils/enabled.c \ common/gr/gr_setup.c \ common/gr/obj_ctx.c \ common/gr/fs_state.c \ + common/gr/gr_ecc.c \ common/netlist/netlist.c \ common/pmu/pmu.c \ common/acr/acr.c \ @@ -593,7 +594,6 @@ srcs += common/sec2/sec2.c \ hal/ce/ce_tu104.c \ hal/class/class_tu104.c \ hal/clk/clk_tu104.c \ - hal/gr/ecc/ecc_tu104.c \ hal/gr/init/gr_init_gv100.c \ hal/gr/init/gr_init_tu104.c \ hal/gr/intr/gr_intr_tu104.c \ diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c index 54517106c..a46665570 100644 --- a/drivers/gpu/nvgpu/common/ecc.c +++ b/drivers/gpu/nvgpu/common/ecc.c @@ -21,14 +21,10 @@ */ #include -#include -#include -#include +#include #include -#include -#include -static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat) +void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat) { struct nvgpu_ecc *ecc = &g->ecc; @@ -38,125 +34,6 @@ static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat) ecc->stats_count = nvgpu_safe_add_s32(ecc->stats_count, 1); } -static void nvgpu_ecc_init(struct gk20a *g) -{ - struct nvgpu_ecc *ecc = &g->ecc; - - nvgpu_init_list_node(&ecc->stats_list); -} - -int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, - struct nvgpu_ecc_stat ***stat, const char *name) -{ - struct nvgpu_ecc_stat **stats; - struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); - u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); - u32 gpc, tpc; - char gpc_str[10] = {0}, tpc_str[10] = {0}; - int err = 0; - - stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), - gpc_count)); - if (stats == NULL) { - return -ENOMEM; - } - for (gpc = 0; gpc < gpc_count; gpc++) { - stats[gpc] = nvgpu_kzalloc(g, - nvgpu_safe_mult_u64(sizeof(*stats[gpc]), - nvgpu_gr_config_get_gpc_tpc_count(gr_config, - gpc))); - if (stats[gpc] == NULL) { - err = -ENOMEM; - goto fail; - } - } - - for (gpc = 0; gpc < gpc_count; gpc++) { - for (tpc = 0; - tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc); - tpc++) { - /** - * Store stats name as below: - * gpc_tpc_ - */ - (void)strcpy(stats[gpc][tpc].name, "gpc"); - (void)nvgpu_strnadd_u32(gpc_str, gpc, - sizeof(gpc_str), 10U); - (void)strncat(stats[gpc][tpc].name, gpc_str, - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[gpc][tpc].name)); - (void)strncat(stats[gpc][tpc].name, "_tpc", - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[gpc][tpc].name)); - (void)nvgpu_strnadd_u32(tpc_str, tpc, - sizeof(tpc_str), 10U); - (void)strncat(stats[gpc][tpc].name, tpc_str, - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[gpc][tpc].name)); - (void)strncat(stats[gpc][tpc].name, "_", - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[gpc][tpc].name)); - (void)strncat(stats[gpc][tpc].name, name, - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[gpc][tpc].name)); - - nvgpu_ecc_stat_add(g, &stats[gpc][tpc]); - } - } - - *stat = stats; - -fail: - if (err != 0) { - while (gpc-- != 0u) { - nvgpu_kfree(g, stats[gpc]); - } - - nvgpu_kfree(g, stats); - } - - return err; -} - -int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, - struct nvgpu_ecc_stat **stat, const char *name) -{ - struct nvgpu_ecc_stat *stats; - struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); - u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); - u32 gpc; - char gpc_str[10] = {0}; - - stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), - gpc_count)); - if (stats == NULL) { - return -ENOMEM; - } - - for (gpc = 0; gpc < gpc_count; gpc++) { - /** - * Store stats name as below: - * gpc_ - */ - (void)strcpy(stats[gpc].name, "gpc"); - (void)nvgpu_strnadd_u32(gpc_str, gpc, sizeof(gpc_str), 10U); - (void)strncat(stats[gpc].name, gpc_str, - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[gpc].name)); - (void)strncat(stats[gpc].name, "_", - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[gpc].name)); - (void)strncat(stats[gpc].name, name, - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[gpc].name)); - - nvgpu_ecc_stat_add(g, &stats[gpc]); - } - - *stat = stats; - return 0; -} - int nvgpu_ecc_counter_init(struct gk20a *g, struct nvgpu_ecc_stat **stat, const char *name) { @@ -173,246 +50,56 @@ int nvgpu_ecc_counter_init(struct gk20a *g, return 0; } -int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, - struct nvgpu_ecc_stat ***stat, const char *name) -{ - struct nvgpu_ecc_stat **stats; - u32 ltc, lts; - char ltc_str[10] = {0}, lts_str[10] = {0}; - int err = 0; - u32 ltc_count = nvgpu_ltc_get_ltc_count(g); - u32 slices_per_ltc = nvgpu_ltc_get_slices_per_ltc(g); - - stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), - ltc_count)); - if (stats == NULL) { - return -ENOMEM; - } - for (ltc = 0; ltc < ltc_count; ltc++) { - stats[ltc] = nvgpu_kzalloc(g, - nvgpu_safe_mult_u64(sizeof(*stats[ltc]), - slices_per_ltc)); - if (stats[ltc] == NULL) { - err = -ENOMEM; - goto fail; - } - } - - for (ltc = 0; ltc < ltc_count; ltc++) { - for (lts = 0; lts < slices_per_ltc; lts++) { - /** - * Store stats name as below: - * ltc_lts_ - */ - (void)strcpy(stats[ltc][lts].name, "ltc"); - (void)nvgpu_strnadd_u32(ltc_str, ltc, - sizeof(ltc_str), 10U); - (void)strncat(stats[ltc][lts].name, ltc_str, - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[ltc][lts].name)); - (void)strncat(stats[ltc][lts].name, "_lts", - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[ltc][lts].name)); - (void)nvgpu_strnadd_u32(lts_str, lts, - sizeof(lts_str), 10U); - (void)strncat(stats[ltc][lts].name, lts_str, - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[ltc][lts].name)); - (void)strncat(stats[ltc][lts].name, "_", - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[ltc][lts].name)); - (void)strncat(stats[ltc][lts].name, name, - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[ltc][lts].name)); - - nvgpu_ecc_stat_add(g, &stats[ltc][lts]); - } - } - - *stat = stats; - -fail: - if (err != 0) { - while (ltc-- > 0u) { - nvgpu_kfree(g, stats[ltc]); - } - - nvgpu_kfree(g, stats); - } - - return err; -} - -int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g, - struct nvgpu_ecc_stat **stat, const char *name) -{ - u32 i; - u32 num_fbpa = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); - struct nvgpu_ecc_stat *stats; - char fbpa_str[10] = {0}; - - stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), - (size_t)num_fbpa)); - if (stats == NULL) { - return -ENOMEM; - } - - for (i = 0; i < num_fbpa; i++) { - /** - * Store stats name as below: - * fbpa_ - */ - (void)strcpy(stats[i].name, "fbpa"); - (void)nvgpu_strnadd_u32(fbpa_str, i, sizeof(fbpa_str), 10U); - (void)strncat(stats[i].name, fbpa_str, - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[i].name)); - (void)strncat(stats[i].name, "_", - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[i].name)); - (void)strncat(stats[i].name, name, - NVGPU_ECC_STAT_NAME_MAX_SIZE - - strlen(stats[i].name)); - - nvgpu_ecc_stat_add(g, &stats[i]); - } - - *stat = stats; - return 0; -} - -/* helper function that frees the count array if non-NULL. */ -static void free_ecc_stat_count_array(struct gk20a *g, - struct nvgpu_ecc_stat **stat, - u32 gpc_count) -{ - u32 i; - - if (stat != NULL) { - for (i = 0; i < gpc_count; i++) { - nvgpu_kfree(g, stat[i]); - } - nvgpu_kfree(g, stat); - } -} - /* release all ecc_stat */ void nvgpu_ecc_free(struct gk20a *g) { struct nvgpu_ecc *ecc = &g->ecc; - struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); - u32 gpc_count; - u32 i; - if (gr_config == NULL) { - return; + nvgpu_gr_ecc_free(g); + nvgpu_ltc_ecc_free(g); + + if (g->ops.fb.fb_ecc_free != NULL) { + g->ops.fb.fb_ecc_free(g); } - gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); - - free_ecc_stat_count_array(g, ecc->gr.sm_lrf_ecc_single_err_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_lrf_ecc_double_err_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_shm_ecc_sec_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_shm_ecc_sed_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_shm_ecc_ded_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_ecc_total_sec_pipe0_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_ecc_total_ded_pipe0_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_unique_ecc_sec_pipe0_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_unique_ecc_ded_pipe0_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_ecc_total_sec_pipe1_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_ecc_total_ded_pipe1_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_unique_ecc_sec_pipe1_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.tex_unique_ecc_ded_pipe1_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, - ecc->gr.sm_l1_tag_ecc_uncorrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_cbu_ecc_corrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_l1_data_ecc_corrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, - ecc->gr.sm_l1_data_ecc_uncorrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, ecc->gr.sm_icache_ecc_corrected_err_count, - gpc_count); - free_ecc_stat_count_array(g, - ecc->gr.sm_icache_ecc_uncorrected_err_count, - gpc_count); - - nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_uncorrected_err_count); - nvgpu_kfree(g, ecc->gr.gpccs_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->gr.gpccs_ecc_uncorrected_err_count); - nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_uncorrected_err_count); - nvgpu_kfree(g, ecc->gr.fecs_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->gr.fecs_ecc_uncorrected_err_count); - - for (i = 0; i < nvgpu_ltc_get_ltc_count(g); i++) { - if (ecc->ltc.ecc_sec_count != NULL) { - nvgpu_kfree(g, ecc->ltc.ecc_sec_count[i]); - } - - if (ecc->ltc.ecc_ded_count != NULL) { - nvgpu_kfree(g, ecc->ltc.ecc_ded_count[i]); - } + if (g->ops.fb.fbpa_ecc_free != NULL) { + g->ops.fb.fbpa_ecc_free(g); } - nvgpu_kfree(g, ecc->ltc.ecc_sec_count); - nvgpu_kfree(g, ecc->ltc.ecc_ded_count); - nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_uncorrected_err_count); - nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_uncorrected_err_count); - nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_uncorrected_err_count); - - nvgpu_kfree(g, ecc->pmu.pmu_ecc_corrected_err_count); - nvgpu_kfree(g, ecc->pmu.pmu_ecc_uncorrected_err_count); - - nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_sec_err_count); - nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_ded_err_count); + if (g->ops.pmu.ecc_free != NULL) { + g->ops.pmu.ecc_free(g); + } (void)memset(ecc, 0, sizeof(*ecc)); - - ecc->initialized = false; } int nvgpu_ecc_init_support(struct gk20a *g) { + struct nvgpu_ecc *ecc = &g->ecc; + + if (ecc->initialized) { + return 0; + } + + nvgpu_init_list_node(&ecc->stats_list); + + return 0; +} + +/** + * Note that this function is to be called after all units requiring ecc stats + * have added entries to ecc->stats_list. + */ +int nvgpu_ecc_finalize_support(struct gk20a *g) +{ +#ifdef CONFIG_NVGPU_SYSFS int err; +#endif if (g->ecc.initialized) { return 0; } - if (g->ops.gr.ecc.init == NULL) { - return 0; - } - - nvgpu_ecc_init(g); - err = g->ops.gr.ecc.init(g); - if (err != 0) { - return err; - } - #ifdef CONFIG_NVGPU_SYSFS err = nvgpu_ecc_sysfs_init(g); if (err != 0) { @@ -428,7 +115,7 @@ int nvgpu_ecc_init_support(struct gk20a *g) void nvgpu_ecc_remove_support(struct gk20a *g) { - if (g->ops.gr.ecc.init == NULL) { + if (!g->ecc.initialized) { return; } diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index 1d312a263..ee831fc01 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -487,6 +487,13 @@ static int gr_init_setup_sw(struct gk20a *g) goto clean_up; } + if (g->ops.gr.ecc.init != NULL && !g->ecc.initialized) { + err = g->ops.gr.ecc.init(g); + if (err != 0) { + goto clean_up; + } + } + gr->remove_support = gr_remove_support; gr->sw_ready = true; diff --git a/drivers/gpu/nvgpu/common/gr/gr_ecc.c b/drivers/gpu/nvgpu/common/gr/gr_ecc.c new file mode 100644 index 000000000..3d9356721 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr_ecc.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, + struct nvgpu_ecc_stat ***stat, const char *name) +{ + struct nvgpu_ecc_stat **stats; + struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); + u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); + u32 gpc, tpc; + char gpc_str[10] = {0}, tpc_str[10] = {0}; + int err = 0; + + stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), + gpc_count)); + if (stats == NULL) { + return -ENOMEM; + } + for (gpc = 0; gpc < gpc_count; gpc++) { + stats[gpc] = nvgpu_kzalloc(g, + nvgpu_safe_mult_u64(sizeof(*stats[gpc]), + nvgpu_gr_config_get_gpc_tpc_count(gr_config, + gpc))); + if (stats[gpc] == NULL) { + err = -ENOMEM; + goto fail; + } + } + + for (gpc = 0; gpc < gpc_count; gpc++) { + for (tpc = 0; + tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc); + tpc++) { + /** + * Store stats name as below: + * gpc_tpc_ + */ + (void)strcpy(stats[gpc][tpc].name, "gpc"); + (void)nvgpu_strnadd_u32(gpc_str, gpc, + sizeof(gpc_str), 10U); + (void)strncat(stats[gpc][tpc].name, gpc_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc][tpc].name)); + (void)strncat(stats[gpc][tpc].name, "_tpc", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc][tpc].name)); + (void)nvgpu_strnadd_u32(tpc_str, tpc, + sizeof(tpc_str), 10U); + (void)strncat(stats[gpc][tpc].name, tpc_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc][tpc].name)); + (void)strncat(stats[gpc][tpc].name, "_", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc][tpc].name)); + (void)strncat(stats[gpc][tpc].name, name, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc][tpc].name)); + + nvgpu_ecc_stat_add(g, &stats[gpc][tpc]); + } + } + + *stat = stats; + +fail: + if (err != 0) { + while (gpc-- != 0u) { + nvgpu_kfree(g, stats[gpc]); + } + + nvgpu_kfree(g, stats); + } + + return err; +} + +int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name) +{ + struct nvgpu_ecc_stat *stats; + struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); + u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); + u32 gpc; + char gpc_str[10] = {0}; + + stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), + gpc_count)); + if (stats == NULL) { + return -ENOMEM; + } + + for (gpc = 0; gpc < gpc_count; gpc++) { + /** + * Store stats name as below: + * gpc_ + */ + (void)strcpy(stats[gpc].name, "gpc"); + (void)nvgpu_strnadd_u32(gpc_str, gpc, sizeof(gpc_str), 10U); + (void)strncat(stats[gpc].name, gpc_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc].name)); + (void)strncat(stats[gpc].name, "_", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc].name)); + (void)strncat(stats[gpc].name, name, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc].name)); + + nvgpu_ecc_stat_add(g, &stats[gpc]); + } + + *stat = stats; + return 0; +} + +/* helper function that frees the count array if non-NULL. */ +static void free_ecc_stat_count_array(struct gk20a *g, + struct nvgpu_ecc_stat **stat, + u32 gpc_count) +{ + u32 i; + + if (stat != NULL) { + for (i = 0; i < gpc_count; i++) { + nvgpu_kfree(g, stat[i]); + } + nvgpu_kfree(g, stat); + } +} + +void nvgpu_gr_ecc_free(struct gk20a *g) +{ + struct nvgpu_ecc *ecc = &g->ecc; + struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); + u32 gpc_count; + + if (gr_config == NULL) { + return; + } + + gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); + + free_ecc_stat_count_array(g, ecc->gr.sm_lrf_ecc_single_err_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.sm_lrf_ecc_double_err_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.sm_shm_ecc_sec_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.sm_shm_ecc_sed_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.sm_shm_ecc_ded_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.tex_ecc_total_sec_pipe0_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.tex_ecc_total_ded_pipe0_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.tex_unique_ecc_sec_pipe0_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.tex_unique_ecc_ded_pipe0_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.tex_ecc_total_sec_pipe1_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.tex_ecc_total_ded_pipe1_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.tex_unique_ecc_sec_pipe1_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.tex_unique_ecc_ded_pipe1_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count, + gpc_count); + free_ecc_stat_count_array(g, + ecc->gr.sm_l1_tag_ecc_uncorrected_err_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.sm_cbu_ecc_corrected_err_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.sm_l1_data_ecc_corrected_err_count, + gpc_count); + free_ecc_stat_count_array(g, + ecc->gr.sm_l1_data_ecc_uncorrected_err_count, + gpc_count); + free_ecc_stat_count_array(g, ecc->gr.sm_icache_ecc_corrected_err_count, + gpc_count); + free_ecc_stat_count_array(g, + ecc->gr.sm_icache_ecc_uncorrected_err_count, + gpc_count); + + nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->gr.gpccs_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->gr.gpccs_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->gr.fecs_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->gr.fecs_ecc_uncorrected_err_count); +} diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index d1a5e1149..1182c139e 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -502,6 +502,13 @@ int nvgpu_finalize_poweron(struct gk20a *g) * Do this early so any early VMs that get made are capable of * mapping buffers. */ + /** + * ECC support initialization is split into generic init + * followed by per unit initialization and ends with sysfs + * support init. This is done to setup ECC data structures + * prior to enabling interrupts for corresponding units. + */ + NVGPU_INIT_TABLE_ENTRY(g->ops.ecc.ecc_init_support, NO_FLAG), NVGPU_INIT_TABLE_ENTRY(g->ops.mm.pd_cache_init, NO_FLAG), NVGPU_INIT_TABLE_ENTRY(&nvgpu_falcons_sw_init, NO_FLAG), NVGPU_INIT_TABLE_ENTRY(g->ops.pmu.pmu_early_init, NO_FLAG), @@ -561,7 +568,12 @@ int nvgpu_finalize_poweron(struct gk20a *g) #endif NVGPU_INIT_TABLE_ENTRY(g->ops.fbp.fbp_init_support, NO_FLAG), NVGPU_INIT_TABLE_ENTRY(g->ops.gr.gr_init_support, NO_FLAG), - NVGPU_INIT_TABLE_ENTRY(g->ops.gr.ecc.ecc_init_support, NO_FLAG), + /** + * All units requiring ECC stats must initialize ECC counters + * before this call to finalize ECC support. + */ + NVGPU_INIT_TABLE_ENTRY(g->ops.ecc.ecc_finalize_support, + NO_FLAG), NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_release_tpc_pg_lock, NO_FLAG), #ifdef CONFIG_NVGPU_LS_PMU @@ -732,8 +744,8 @@ static void gk20a_free_cb(struct nvgpu_ref *refcount) } #endif - if (g->ops.gr.ecc.ecc_remove_support != NULL) { - g->ops.gr.ecc.ecc_remove_support(g); + if (g->ops.ecc.ecc_remove_support != NULL) { + g->ops.ecc.ecc_remove_support(g); } if (g->remove_support != NULL) { diff --git a/drivers/gpu/nvgpu/common/ltc/ltc.c b/drivers/gpu/nvgpu/common/ltc/ltc.c index c4ed4c14d..c16f59b96 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc.c @@ -24,6 +24,7 @@ #include #include #include +#include void nvgpu_ltc_remove_support(struct gk20a *g) { @@ -43,10 +44,10 @@ void nvgpu_ltc_remove_support(struct gk20a *g) int nvgpu_init_ltc_support(struct gk20a *g) { struct nvgpu_ltc *ltc = g->ltc; + int err; nvgpu_log_fn(g, " "); - g->mm.ltc_enabled_current = true; g->mm.ltc_enabled_target = true; @@ -63,6 +64,15 @@ int nvgpu_init_ltc_support(struct gk20a *g) g->ops.ltc.init_fs_state(g); } + if (g->ops.ltc.ecc_init != NULL && !g->ecc.initialized) { + err = g->ops.ltc.ecc_init(g); + if (err != 0) { + nvgpu_kfree(g, ltc); + g->ltc = NULL; + return err; + } + } + return 0; } @@ -94,3 +104,91 @@ u32 nvgpu_ltc_get_cacheline_size(struct gk20a *g) { return g->ltc->cacheline_size; } + +int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, + struct nvgpu_ecc_stat ***stat, const char *name) +{ + struct nvgpu_ecc_stat **stats; + u32 ltc, lts; + char ltc_str[10] = {0}, lts_str[10] = {0}; + int err = 0; + u32 ltc_count = nvgpu_ltc_get_ltc_count(g); + u32 slices_per_ltc = nvgpu_ltc_get_slices_per_ltc(g); + + stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), + ltc_count)); + if (stats == NULL) { + return -ENOMEM; + } + for (ltc = 0; ltc < ltc_count; ltc++) { + stats[ltc] = nvgpu_kzalloc(g, + nvgpu_safe_mult_u64(sizeof(*stats[ltc]), + slices_per_ltc)); + if (stats[ltc] == NULL) { + err = -ENOMEM; + goto fail; + } + } + + for (ltc = 0; ltc < ltc_count; ltc++) { + for (lts = 0; lts < slices_per_ltc; lts++) { + /** + * Store stats name as below: + * ltc_lts_ + */ + (void)strcpy(stats[ltc][lts].name, "ltc"); + (void)nvgpu_strnadd_u32(ltc_str, ltc, + sizeof(ltc_str), 10U); + (void)strncat(stats[ltc][lts].name, ltc_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[ltc][lts].name)); + (void)strncat(stats[ltc][lts].name, "_lts", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[ltc][lts].name)); + (void)nvgpu_strnadd_u32(lts_str, lts, + sizeof(lts_str), 10U); + (void)strncat(stats[ltc][lts].name, lts_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[ltc][lts].name)); + (void)strncat(stats[ltc][lts].name, "_", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[ltc][lts].name)); + (void)strncat(stats[ltc][lts].name, name, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[ltc][lts].name)); + + nvgpu_ecc_stat_add(g, &stats[ltc][lts]); + } + } + + *stat = stats; + +fail: + if (err != 0) { + while (ltc-- > 0u) { + nvgpu_kfree(g, stats[ltc]); + } + + nvgpu_kfree(g, stats); + } + + return err; +} + +void nvgpu_ltc_ecc_free(struct gk20a *g) +{ + struct nvgpu_ecc *ecc = &g->ecc; + u32 i; + + for (i = 0; i < nvgpu_ltc_get_ltc_count(g); i++) { + if (ecc->ltc.ecc_sec_count != NULL) { + nvgpu_kfree(g, ecc->ltc.ecc_sec_count[i]); + } + + if (ecc->ltc.ecc_ded_count != NULL) { + nvgpu_kfree(g, ecc->ltc.ecc_ded_count[i]); + } + } + nvgpu_kfree(g, ecc->ltc.ecc_sec_count); + nvgpu_kfree(g, ecc->ltc.ecc_ded_count); +} diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c index 587c60164..ec1d5faa2 100644 --- a/drivers/gpu/nvgpu/common/mm/mm.c +++ b/drivers/gpu/nvgpu/common/mm/mm.c @@ -543,6 +543,20 @@ static int nvgpu_init_mm_setup_sw(struct gk20a *g) } } + if (g->ops.fb.fb_ecc_init != NULL && !g->ecc.initialized) { + err = g->ops.fb.fb_ecc_init(g); + if (err != 0) { + return err; + } + } + + if (g->ops.fb.fbpa_ecc_init != NULL && !g->ecc.initialized) { + err = g->ops.fb.fbpa_ecc_init(g); + if (err != 0) { + return err; + } + } + mm->remove_support = nvgpu_remove_mm_support; #ifdef CONFIG_NVGPU_DGPU mm->remove_ce_support = nvgpu_remove_mm_ce_support; diff --git a/drivers/gpu/nvgpu/common/pmu/pmu.c b/drivers/gpu/nvgpu/common/pmu/pmu.c index dce4ef5e9..b875726ed 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu.c @@ -181,6 +181,15 @@ int nvgpu_pmu_early_init(struct gk20a *g) goto exit; } + if (g->ops.pmu.ecc_init != NULL && !g->ecc.initialized) { + err = g->ops.pmu.ecc_init(g); + if (err != 0) { + nvgpu_kfree(g, pmu); + g->pmu = NULL; + goto exit; + } + } + #ifdef CONFIG_NVGPU_LS_PMU err = nvgpu_pmu_rtos_early_init(g, pmu); #endif diff --git a/drivers/gpu/nvgpu/hal/fb/fb_gv11b.h b/drivers/gpu/nvgpu/hal/fb/fb_gv11b.h index 8b939403c..7aa8b18e9 100644 --- a/drivers/gpu/nvgpu/hal/fb/fb_gv11b.h +++ b/drivers/gpu/nvgpu/hal/fb/fb_gv11b.h @@ -35,4 +35,16 @@ struct nvgpu_cbc; void gv11b_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc); #endif +/* + * @brief Allocate and initialize counters for memories within FB. + * + * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. + * + */ +#define NVGPU_ECC_COUNTER_INIT_FB(stat) \ + nvgpu_ecc_counter_init(g, &g->ecc.fb.stat, #stat) + +int gv11b_fb_ecc_init(struct gk20a *g); +void gv11b_fb_ecc_free(struct gk20a *g); + #endif /* NVGPU_FB_GV11B_H */ diff --git a/drivers/gpu/nvgpu/hal/fb/fb_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fb/fb_gv11b_fusa.c index 91f61cb18..81c60846b 100644 --- a/drivers/gpu/nvgpu/hal/fb/fb_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fb/fb_gv11b_fusa.c @@ -115,3 +115,47 @@ void gv11b_fb_init_fs_state(struct gk20a *g) nvgpu_writel(g, fb_priv_mmu_phy_secure_r(), U32_MAX); } } + +int gv11b_fb_ecc_init(struct gk20a *g) +{ + int err = 0; + + err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_uncorrected_err_count); + if (err != 0) { + goto init_fb_done; + } + err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_corrected_err_count); + if (err != 0) { + goto init_fb_done; + } + err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_uncorrected_err_count); + if (err != 0) { + goto init_fb_done; + } + err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_corrected_err_count); + if (err != 0) { + goto init_fb_done; + } + err = NVGPU_ECC_COUNTER_INIT_FB( + mmu_fillunit_ecc_uncorrected_err_count); + if (err != 0) { + goto init_fb_done; + } + err = NVGPU_ECC_COUNTER_INIT_FB( + mmu_fillunit_ecc_corrected_err_count); + +init_fb_done: + return err; +} + +void gv11b_fb_ecc_free(struct gk20a *g) +{ + struct nvgpu_ecc *ecc = &g->ecc; + + nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_uncorrected_err_count); +} diff --git a/drivers/gpu/nvgpu/hal/fbpa/fbpa_tu104.c b/drivers/gpu/nvgpu/hal/fbpa/fbpa_tu104.c index 4659a2ef0..63312d791 100644 --- a/drivers/gpu/nvgpu/hal/fbpa/fbpa_tu104.c +++ b/drivers/gpu/nvgpu/hal/fbpa/fbpa_tu104.c @@ -22,6 +22,7 @@ * DEALINGS IN THE SOFTWARE. */ +#include #include #include #include @@ -52,11 +53,6 @@ static void tu104_fbpa_handle_ecc_intr(struct gk20a *g, status = gk20a_readl(g, offset + fbpa_0_ecc_status_r(subp_id)); - if (!g->ecc.initialized) { - nvgpu_info(g, "ecc support is not initialized"); - goto out; - } - if ((status & fbpa_0_ecc_status_sec_counter_overflow_pending_f()) != 0U) { nvgpu_err(g, "fbpa %u subp %u ecc sec counter overflow", fbpa_id, subp_id); @@ -81,7 +77,6 @@ static void tu104_fbpa_handle_ecc_intr(struct gk20a *g, g->ecc.fbpa.fbpa_ecc_ded_err_count[cnt_idx].counter += ded_cnt; } -out: gk20a_writel(g, offset + fbpa_0_ecc_status_r(subp_id), status); } @@ -109,3 +104,71 @@ void tu104_fbpa_handle_intr(struct gk20a *g, u32 fbpa_id) tu104_fbpa_handle_ecc_intr(g, fbpa_id, 1u); } } + +int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name) +{ + u32 i; + u32 num_fbpa = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); + struct nvgpu_ecc_stat *stats; + char fbpa_str[10] = {0}; + + stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), + (size_t)num_fbpa)); + if (stats == NULL) { + return -ENOMEM; + } + + for (i = 0; i < num_fbpa; i++) { + /** + * Store stats name as below: + * fbpa_ + */ + (void)strcpy(stats[i].name, "fbpa"); + (void)nvgpu_strnadd_u32(fbpa_str, i, sizeof(fbpa_str), 10U); + (void)strncat(stats[i].name, fbpa_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[i].name)); + (void)strncat(stats[i].name, "_", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[i].name)); + (void)strncat(stats[i].name, name, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[i].name)); + + nvgpu_ecc_stat_add(g, &stats[i]); + } + + *stat = stats; + return 0; +} + +int tu104_fbpa_ecc_init(struct gk20a *g) +{ + int err; + + err = NVGPU_ECC_COUNTER_INIT_PER_FBPA(fbpa_ecc_sec_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_FBPA(fbpa_ecc_ded_err_count); + if (err != 0) { + goto done; + } + +done: + if (err != 0) { + nvgpu_err(g, "ecc counter allocate failed, err=%d", err); + nvgpu_ecc_free(g); + } + + return err; +} + +void tu104_fbpa_ecc_free(struct gk20a *g) +{ + struct nvgpu_ecc *ecc = &g->ecc; + + nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_sec_err_count); + nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_ded_err_count); +} diff --git a/drivers/gpu/nvgpu/hal/fbpa/fbpa_tu104.h b/drivers/gpu/nvgpu/hal/fbpa/fbpa_tu104.h index 326c6ad12..f15ff259a 100644 --- a/drivers/gpu/nvgpu/hal/fbpa/fbpa_tu104.h +++ b/drivers/gpu/nvgpu/hal/fbpa/fbpa_tu104.h @@ -1,7 +1,7 @@ /* * TU104 FBPA * - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -30,4 +30,26 @@ struct gk20a; int tu104_fbpa_init(struct gk20a *g); void tu104_fbpa_handle_intr(struct gk20a *g, u32 fbpa_id); +/** + * @brief Allocate and initialize error counters for all fbpa instances. + * + * @param g [in] The GPU driver struct. + * @param stat [out] Pointer to array of tpc error counters. + * @param name [in] Unique name for error counter. + * + * Calculates the total number of fbpa instances, allocates memory for each + * instance of error counter, initializes the counter with 0 and the specified + * string identifier. Finally the counter is added to the stats_list of + * struct nvgpu_ecc. + * + * @return 0 in case of success, less than 0 for failure. + */ +int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name); +#define NVGPU_ECC_COUNTER_INIT_PER_FBPA(stat) \ + nvgpu_ecc_counter_init_per_fbpa(g, &g->ecc.fbpa.stat, #stat) + +int tu104_fbpa_ecc_init(struct gk20a *g); +void tu104_fbpa_ecc_free(struct gk20a *g); + #endif /* NVGPU_FBPA_TU104_H */ diff --git a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.c b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.c index 19a5a38fc..7ac2bc59f 100644 --- a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.c +++ b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.c @@ -21,8 +21,8 @@ */ #include -#include #include +#include #include @@ -163,32 +163,11 @@ init_tpc_err: return err; } -static int gp10b_ecc_init_lts(struct gk20a *g) -{ - int err = 0; - - err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count); - if (err != 0) { - goto init_lts_err; - } - err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count); - -init_lts_err: - return err; -} - -int gp10b_ecc_init(struct gk20a *g) +int gp10b_gr_ecc_init(struct gk20a *g) { int err = 0; err = gp10b_ecc_init_tpc(g); - if (err != 0) { - goto done; - } - - err = gp10b_ecc_init_lts(g); - -done: if (err != 0) { nvgpu_err(g, "ecc counter allocate failed, err=%d", err); nvgpu_ecc_free(g); diff --git a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.h b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.h index 7d62d0428..94eaa1751 100644 --- a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.h +++ b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gp10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -26,6 +26,6 @@ struct gk20a; void gp10b_ecc_detect_enabled_units(struct gk20a *g); -int gp10b_ecc_init(struct gk20a *g); +int gp10b_gr_ecc_init(struct gk20a *g); #endif /* NVGPU_ECC_GP10B_H */ diff --git a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b.h b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b.h index efffa1a16..a61f72101 100644 --- a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b.h +++ b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b.h @@ -31,7 +31,8 @@ struct nvgpu_hw_err_inject_info; struct nvgpu_hw_err_inject_info_desc; void gv11b_ecc_detect_enabled_units(struct gk20a *g); -int gv11b_ecc_init(struct gk20a *g); + +int gv11b_gr_ecc_init(struct gk20a *g); #ifdef CONFIG_NVGPU_INJECT_HWERR void gv11b_gr_intr_inject_fecs_ecc_error(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b_fusa.c index f1857d443..0b2cd9f20 100644 --- a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_gv11b_fusa.c @@ -21,7 +21,7 @@ */ #include -#include +#include #include #include @@ -251,69 +251,7 @@ init_gpc_done: return err; } -static int gv11b_ecc_init_fb(struct gk20a *g) -{ - int err = 0; - - err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_uncorrected_err_count); - if (err != 0) { - goto init_fb_done; - } - err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_corrected_err_count); - if (err != 0) { - goto init_fb_done; - } - err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_uncorrected_err_count); - if (err != 0) { - goto init_fb_done; - } - err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_corrected_err_count); - if (err != 0) { - goto init_fb_done; - } - err = NVGPU_ECC_COUNTER_INIT_FB( - mmu_fillunit_ecc_uncorrected_err_count); - if (err != 0) { - goto init_fb_done; - } - err = NVGPU_ECC_COUNTER_INIT_FB( - mmu_fillunit_ecc_corrected_err_count); - -init_fb_done: - return err; -} - -static int gv11b_ecc_init_other_units(struct gk20a *g) -{ - int err = 0; - - err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count); - if (err != 0) { - goto init_other_done; - } - err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count); - if (err != 0) { - goto init_other_done; - } - err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_uncorrected_err_count); - if (err != 0) { - goto init_other_done; - } - err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_corrected_err_count); - if (err != 0) { - goto init_other_done; - } - err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_uncorrected_err_count); - if (err != 0) { - goto init_other_done; - } - err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_corrected_err_count); - -init_other_done: - return err; -} - -int gv11b_ecc_init(struct gk20a *g) +int gv11b_gr_ecc_init(struct gk20a *g) { int err; @@ -327,12 +265,14 @@ int gv11b_ecc_init(struct gk20a *g) goto done; } - err = gv11b_ecc_init_fb(g); + err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_corrected_err_count); if (err != 0) { goto done; } - - err = gv11b_ecc_init_other_units(g); done: if (err != 0) { diff --git a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_tu104.c b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_tu104.c deleted file mode 100644 index 3caf4724b..000000000 --- a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_tu104.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include - -#include "ecc_gv11b.h" -#include "ecc_tu104.h" - -int tu104_ecc_init(struct gk20a *g) -{ - int err; - - err = gv11b_ecc_init(g); - if (err != 0) { - return err; - } - - err = NVGPU_ECC_COUNTER_INIT_PER_FBPA(fbpa_ecc_sec_err_count); - if (err != 0) { - goto done; - } - err = NVGPU_ECC_COUNTER_INIT_PER_FBPA(fbpa_ecc_ded_err_count); - if (err != 0) { - goto done; - } - -done: - if (err != 0) { - nvgpu_err(g, "ecc counter allocate failed, err=%d", err); - nvgpu_ecc_free(g); - } - - return err; -} diff --git a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_tu104.h b/drivers/gpu/nvgpu/hal/gr/ecc/ecc_tu104.h deleted file mode 100644 index ab846dcfd..000000000 --- a/drivers/gpu/nvgpu/hal/gr/ecc/ecc_tu104.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef NVGPU_ECC_TU104_H -#define NVGPU_ECC_TU104_H - -struct gk20a; - -int tu104_ecc_init(struct gk20a *g); - -#endif /* NVGPU_ECC_TU104_H */ diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c index 3e046708d..b3742d36c 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c @@ -179,7 +179,13 @@ static const struct gpu_ops gp10b_ops = { .bios_sw_init = nvgpu_bios_sw_init, }, #endif /* CONFIG_NVGPU_DGPU */ + .ecc = { + .ecc_init_support = nvgpu_ecc_init_support, + .ecc_finalize_support = nvgpu_ecc_finalize_support, + .ecc_remove_support = nvgpu_ecc_remove_support, + }, .ltc = { + .ecc_init = gp10b_lts_ecc_init, .init_ltc_support = nvgpu_init_ltc_support, .ltc_remove_support = nvgpu_ltc_remove_support, .determine_L2_size_bytes = gp10b_determine_L2_size_bytes, @@ -280,10 +286,8 @@ static const struct gpu_ops gp10b_ops = { .esr_bpt_pending_events = gm20b_gr_esr_bpt_pending_events, #endif /* CONFIG_NVGPU_DEBUGGER */ .ecc = { - .ecc_init_support = nvgpu_ecc_init_support, - .ecc_remove_support = nvgpu_ecc_remove_support, .detect = gp10b_ecc_detect_enabled_units, - .init = gp10b_ecc_init, + .init = gp10b_gr_ecc_init, }, .ctxsw_prog = { .hw_get_fecs_header_size = @@ -1233,6 +1237,7 @@ int gp10b_init_hal(struct gk20a *g) gops->acr = gp10b_ops.acr; gops->bios = gp10b_ops.bios; + gops->ecc = gp10b_ops.ecc; gops->fbp = gp10b_ops.fbp; gops->ltc = gp10b_ops.ltc; #ifdef CONFIG_NVGPU_COMPRESSION diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index 7cf3517f9..061ea89c5 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -217,7 +217,13 @@ static const struct gpu_ops gv11b_ops = { .bios_sw_init = nvgpu_bios_sw_init, }, #endif /* CONFIG_NVGPU_DGPU */ + .ecc = { + .ecc_init_support = nvgpu_ecc_init_support, + .ecc_finalize_support = nvgpu_ecc_finalize_support, + .ecc_remove_support = nvgpu_ecc_remove_support, + }, .ltc = { + .ecc_init = gv11b_lts_ecc_init, .init_ltc_support = nvgpu_init_ltc_support, .ltc_remove_support = nvgpu_ltc_remove_support, #ifdef CONFIG_NVGPU_INJECT_HWERR @@ -337,10 +343,8 @@ static const struct gpu_ops gv11b_ops = { .esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events, #endif /* CONFIG_NVGPU_DEBUGGER */ .ecc = { - .ecc_init_support = nvgpu_ecc_init_support, - .ecc_remove_support = nvgpu_ecc_remove_support, .detect = gv11b_ecc_detect_enabled_units, - .init = gv11b_ecc_init, + .init = gv11b_gr_ecc_init, #ifdef CONFIG_NVGPU_INJECT_HWERR .get_mmu_err_desc = gv11b_gr_intr_get_mmu_err_desc, @@ -772,6 +776,8 @@ static const struct gpu_ops gv11b_ops = { #endif }, .fb = { + .fb_ecc_init = gv11b_fb_ecc_init, + .fb_ecc_free = gv11b_fb_ecc_free, #ifdef CONFIG_NVGPU_INJECT_HWERR .get_hubmmu_err_desc = gv11b_fb_intr_get_hubmmu_err_desc, @@ -1160,6 +1166,8 @@ static const struct gpu_ops gv11b_ops = { .elcg_init_idle_filters = gv11b_elcg_init_idle_filters, }, .pmu = { + .ecc_init = gv11b_pmu_ecc_init, + .ecc_free = gv11b_pmu_ecc_free, #ifdef CONFIG_NVGPU_INJECT_HWERR .get_pmu_err_desc = gv11b_pmu_intr_get_err_desc, @@ -1438,6 +1446,7 @@ int gv11b_init_hal(struct gk20a *g) gops->acr = gv11b_ops.acr; gops->bios = gv11b_ops.bios; + gops->ecc = gv11b_ops.ecc; gops->fbp = gv11b_ops.fbp; gops->ltc = gv11b_ops.ltc; #ifdef CONFIG_NVGPU_COMPRESSION diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index a4e50e2b6..497f72c6c 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -111,7 +111,7 @@ #include "hal/fifo/engine_status_gv100.h" #include "hal/fifo/pbdma_status_gm20b.h" #include "hal/fifo/ctxsw_timeout_gv11b.h" -#include "hal/gr/ecc/ecc_tu104.h" +#include "hal/gr/ecc/ecc_gv11b.h" #include "hal/gr/fecs_trace/fecs_trace_gm20b.h" #include "hal/gr/fecs_trace/fecs_trace_gv11b.h" #include "hal/gr/falcon/gr_falcon_gm20b.h" @@ -256,7 +256,13 @@ static const struct gpu_ops tu104_ops = { #endif /* CONFIG_NVGPU_DGPU */ .get_aon_secure_scratch_reg = tu104_get_aon_secure_scratch_reg, }, + .ecc = { + .ecc_init_support = nvgpu_ecc_init_support, + .ecc_finalize_support = nvgpu_ecc_finalize_support, + .ecc_remove_support = nvgpu_ecc_remove_support, + }, .ltc = { + .ecc_init = gv11b_lts_ecc_init, .init_ltc_support = nvgpu_init_ltc_support, .ltc_remove_support = nvgpu_ltc_remove_support, .determine_L2_size_bytes = gp10b_determine_L2_size_bytes, @@ -374,10 +380,8 @@ static const struct gpu_ops tu104_ops = { .esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events, #endif /* CONFIG_NVGPU_DEBUGGER */ .ecc = { - .ecc_init_support = nvgpu_ecc_init_support, - .ecc_remove_support = nvgpu_ecc_remove_support, .detect = NULL, - .init = tu104_ecc_init, + .init = gv11b_gr_ecc_init, }, .ctxsw_prog = { .hw_get_fecs_header_size = @@ -792,6 +796,10 @@ static const struct gpu_ops tu104_ops = { #endif }, .fb = { + .fb_ecc_init = gv11b_fb_ecc_init, + .fb_ecc_free = gv11b_fb_ecc_free, + .fbpa_ecc_init = tu104_fbpa_ecc_init, + .fbpa_ecc_free = tu104_fbpa_ecc_free, .init_hw = gv11b_fb_init_hw, .init_fs_state = gp106_fb_init_fs_state, .set_mmu_page_size = NULL, @@ -1186,6 +1194,9 @@ static const struct gpu_ops tu104_ops = { }, #ifdef CONFIG_NVGPU_LS_PMU .pmu = { + .ecc_init = gv11b_pmu_ecc_init, + .ecc_free = gv11b_pmu_ecc_free, + /* Init */ .pmu_early_init = nvgpu_pmu_early_init, .pmu_rtos_init = nvgpu_pmu_rtos_init, @@ -1553,6 +1564,7 @@ int tu104_init_hal(struct gk20a *g) gops->bios = tu104_ops.bios; gops->acr = tu104_ops.acr; + gops->ecc = tu104_ops.ecc; gops->fbp = tu104_ops.fbp; gops->ltc = tu104_ops.ltc; #ifdef CONFIG_NVGPU_COMPRESSION diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c b/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c index df398c788..f2c71be5b 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -41,3 +42,22 @@ void gp10b_ltc_init_fs_state(struct gk20a *g) ltc_ltca_g_axi_pctrl_user_sid_f(g->ltc_streamid)); } + +int gp10b_lts_ecc_init(struct gk20a *g) +{ + int err = 0; + + err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count); + if (err != 0) { + goto init_lts_err; + } + err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count); + +init_lts_err: + if (err != 0) { + nvgpu_err(g, "ecc counter allocate failed, err=%d", err); + nvgpu_ecc_free(g); + } + + return err; +} diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.h b/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.h index cd55f3ec3..304aeebcc 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.h +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.h @@ -27,6 +27,7 @@ struct gk20a; u64 gp10b_determine_L2_size_bytes(struct gk20a *g); #ifdef CONFIG_NVGPU_HAL_NON_FUSA void gp10b_ltc_init_fs_state(struct gk20a *g); +int gp10b_lts_ecc_init(struct gk20a *g); #endif void gp10b_ltc_set_enabled(struct gk20a *g, bool enabled); diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b.h b/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b.h index f370cd737..460e7ebe1 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b.h +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b.h @@ -31,6 +31,8 @@ struct nvgpu_hw_err_inject_info; struct nvgpu_hw_err_inject_info_desc; void gv11b_ltc_init_fs_state(struct gk20a *g); +int gv11b_lts_ecc_init(struct gk20a *g); + #ifdef CONFIG_NVGPU_GRAPHICS void gv11b_ltc_set_zbc_stencil_entry(struct gk20a *g, u32 stencil_depth, diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b_fusa.c index 33c449a41..c2882fc07 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b_fusa.c @@ -57,3 +57,26 @@ void gv11b_ltc_init_fs_state(struct gk20a *g) g->ops.ltc.intr.configure(g); } + +int gv11b_lts_ecc_init(struct gk20a *g) +{ + int err = 0; + + err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count); + if (err != 0) { + goto done; + } + +done: + if (err != 0) { + nvgpu_err(g, "ecc counter allocate failed, err=%d", err); + nvgpu_ecc_free(g); + } + + return err; +} diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.h b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.h index 48ba6a5bc..c82b77f82 100644 --- a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.h +++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.h @@ -66,4 +66,7 @@ void gv11b_pmu_inject_ecc_error(struct gk20a *g, #endif /* CONFIG_NVGPU_INJECT_HWERR */ +int gv11b_pmu_ecc_init(struct gk20a *g); +void gv11b_pmu_ecc_free(struct gk20a *g); + #endif /* PMU_GV11B_H */ diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b_fusa.c index 24c01dc57..adb757966 100644 --- a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b_fusa.c @@ -315,3 +315,34 @@ bool gv11b_is_pmu_supported(struct gk20a *g) return false; #endif } + +int gv11b_pmu_ecc_init(struct gk20a *g) +{ + int err = 0; + + err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + +done: + if (err != 0) { + nvgpu_err(g, "ecc counter allocate failed, err=%d", err); + nvgpu_ecc_free(g); + } + + return err; +} + +void gv11b_pmu_ecc_free(struct gk20a *g) +{ + struct nvgpu_ecc *ecc = &g->ecc; + + nvgpu_kfree(g, ecc->pmu.pmu_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->pmu.pmu_ecc_uncorrected_err_count); +} diff --git a/drivers/gpu/nvgpu/include/nvgpu/ecc.h b/drivers/gpu/nvgpu/include/nvgpu/ecc.h index 04e818753..e0bf3e4d5 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/ecc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/ecc.h @@ -37,13 +37,13 @@ * Overview * ======== * The memories within the GPU are protected using data integrity protection - * mechanism like ecc or parity. This unit is responsible for allocating, - * initializing and maintaining error counters for all memories which support - * ecc/parity protection. + * mechanism like ecc or parity. This unit is responsible for maintaining + * error counters for all memories which support ecc/parity protection in + * a list. * * + Initialization: - * This unit allocates and initializes error counters (corrected and - * uncorrected) for each memory and concatenates them into a list. + * This unit concatenates error counters (corrected and uncorrected) for + * each memory into a list. * * Data Structures * =============== @@ -250,67 +250,13 @@ struct nvgpu_ecc { struct nvgpu_list_node stats_list; /** Contains the number of error statistics. */ int stats_count; - /** Flag stores the initialization status of ECC unit. */ + /** + * Indicates if ECC initialization (counters allocation and sysfs + * setup) is completed. + */ bool initialized; }; -/** - * @brief Allocate and initialize error counter specified by name for all - * gpc-tpc instances. - * - * @param g [in] The GPU driver struct. - * @param stat [out] Pointer to array of pointers of error counters. - * @param name [in] Unique name for error counter. - * - * Calculates the total number of tpcs across all gpcs within the gr unit. - * Then allocates, initializes memory to hold error counters associated with all - * tpcs, which is then added to the stats_list in struct nvgpu_ecc. - * - * @return 0 in case of success, less than 0 for failure. - */ -int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, - struct nvgpu_ecc_stat ***stat, const char *name); -/* - * @brief Allocate and initalize counter for memories common across a TPC. - * - * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. - * - */ -#define NVGPU_ECC_COUNTER_INIT_PER_TPC(stat) \ - do { \ - int err = 0; \ - err = nvgpu_ecc_counter_init_per_tpc(g, \ - &g->ecc.gr.stat, #stat);\ - if (err != 0) { \ - return err; \ - } \ - } while (false) - -/** - * @brief Allocate and initialize error counter specified by name for all gpc - * instances. - * - * @param g [in] The GPU driver struct. - * @param stat [out] Pointer to array of tpc error counters. - * @param name [in] Unique name for error counter. - * - * Calculates the total number of gpcs within the gr unit. Then allocates, - * initializes memory to hold error counters associated with all gpcs, which is - * then added to the stats_list in struct nvgpu_ecc. - * - * @return 0 in case of success, less than 0 for failure. - */ -int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, - struct nvgpu_ecc_stat **stat, const char *name); -/* - * @brief Allocate and initalize counters for memories shared across a GPC. - * - * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. - * - */ -#define NVGPU_ECC_COUNTER_INIT_PER_GPC(stat) \ - nvgpu_ecc_counter_init_per_gpc(g, &g->ecc.gr.stat, #stat) - /** * @brief Allocates, initializes an error counter with specified name. * @@ -326,74 +272,16 @@ int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, */ int nvgpu_ecc_counter_init(struct gk20a *g, struct nvgpu_ecc_stat **stat, const char *name); -/* - * @brief Allocate and initalize counters for memories shared within GR. - * - * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. - * - */ -#define NVGPU_ECC_COUNTER_INIT_GR(stat) \ - nvgpu_ecc_counter_init(g, &g->ecc.gr.stat, #stat) -/* - * @brief Allocate and initalize counters for memories within FB. - * - * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. - * - */ -#define NVGPU_ECC_COUNTER_INIT_FB(stat) \ - nvgpu_ecc_counter_init(g, &g->ecc.fb.stat, #stat) -/* - * @brief Allocate and initalize counter for memories within PMU. - * - * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. - * - */ -#define NVGPU_ECC_COUNTER_INIT_PMU(stat) \ - nvgpu_ecc_counter_init(g, &g->ecc.pmu.stat, #stat) /** - * @brief Allocate and initialize a error counters for all ltc-lts instances. + * @brief Concatenates the error counter to status list. * * @param g [in] The GPU driver struct. - * @param stat [out] Pointer to array of tpc error counters. - * @param name [in] Unique name for error counter. + * @param stat [out] Pointer to error counter. * - * Calculates the total number of ltc-lts instances, allocates memory for each - * instance of error counter, initializes the counter with 0 and the specified - * string identifier. Finally the counter is added to the stats_list of - * struct nvgpu_ecc. - * - * @return 0 in case of success, less than 0 for failure. + * The counter is added to the status_list of struct nvgpu_ecc. */ -int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, - struct nvgpu_ecc_stat ***stat, const char *name); -/* - * @brief Allocate and initalize counters for memories within ltc-lts - * - * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. - * - */ -#define NVGPU_ECC_COUNTER_INIT_PER_LTS(stat) \ - nvgpu_ecc_counter_init_per_lts(g, &g->ecc.ltc.stat, #stat) - -/** - * @brief Allocate and initialize error counters for all fbpa instances. - * - * @param g [in] The GPU driver struct. - * @param stat [out] Pointer to array of tpc error counters. - * @param name [in] Unique name for error counter. - * - * Calculates the total number of fbpa instances, allocates memory for each - * instance of error counter, initializes the counter with 0 and the specified - * string identifier. Finally the counter is added to the stats_list of - * struct nvgpu_ecc. - * - * @return 0 in case of success, less than 0 for failure. - */ -int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g, - struct nvgpu_ecc_stat **stat, const char *name); -#define NVGPU_ECC_COUNTER_INIT_PER_FBPA(stat) \ - nvgpu_ecc_counter_init_per_fbpa(g, &g->ecc.fbpa.stat, #stat) +void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat); /** * @brief Release memory associated with all error counters. @@ -406,8 +294,7 @@ int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g, void nvgpu_ecc_free(struct gk20a *g); /** - * @brief Allocates and initializes error counters for memories within gpu - * hardware units. + * @brief Initializes error counters list. * * @param g [in] The GPU driver struct. * @@ -422,6 +309,13 @@ int nvgpu_ecc_init_support(struct gk20a *g); */ void nvgpu_ecc_remove_support(struct gk20a *g); +/** + * @brief Finish ECC support initialization. + * + * @param g [in] The GPU driver struct. + */ +int nvgpu_ecc_finalize_support(struct gk20a *g); + #ifdef CONFIG_NVGPU_SYSFS int nvgpu_ecc_sysfs_init(struct gk20a *g); void nvgpu_ecc_sysfs_remove(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 967f40b37..ee9bf526a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -168,6 +168,7 @@ enum nvgpu_unit; #include #include #include +#include #include "hal/clk/clk_gk20a.h" @@ -272,6 +273,7 @@ struct gpu_ops { int (*acr_construct_execute)(struct gk20a *g); } acr; + struct gops_ecc ecc; struct gops_ltc ltc; #ifdef CONFIG_NVGPU_COMPRESSION struct { diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops_ecc.h b/drivers/gpu/nvgpu/include/nvgpu/gops_ecc.h new file mode 100644 index 000000000..1ff9f162f --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/gops_ecc.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_GOPS_ECC_H +#define NVGPU_GOPS_ECC_H + +#include + +/** + * @file + * + * ECC HAL interface. + */ +struct gk20a; + +/** + * ECC unit hal operations. + * + * This structure stores the ECC unit hal pointers. + * + * @see gops + */ +struct gops_ecc { + /** + * @brief Initialize ECC support. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function initializes the list head for tracking the list + * of ecc error counts for all units (like GR/LTC/FB/PMU) and + * subunits of GR (like falcon/sm/gpccs/etc). + * + * @return 0 in case of success, < 0 in case of failure. + */ + int (*ecc_init_support)(struct gk20a *g); + + /** + * @brief Remove ECC support. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function frees all the memory allocated for keeping + * track of ecc error counts for each GR engine units. + */ + void (*ecc_remove_support)(struct gk20a *g); + + /** + * @brief Finish ECC support initialization. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function initializes the sysfs nodes for ECC counters and + * marks ECC as initialized. + * + * @return 0 in case of success, < 0 in case of failure. + */ + int (*ecc_finalize_support)(struct gk20a *g); +}; + +#endif /* NVGPU_GOPS_ECC_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops_fb.h b/drivers/gpu/nvgpu/include/nvgpu/gops_fb.h index b8bcf50db..b09a2865a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops_fb.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops_fb.h @@ -92,6 +92,50 @@ struct gops_fb_intr { * @see gpu_ops */ struct gops_fb { + /** + * @brief Initialize FB unit ECC support. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function allocates memory to track the ecc error counts + * for FB unit. + * + * @return 0 in case of success, < 0 in case of failure. + */ + int (*fb_ecc_init)(struct gk20a *g); + + /** + * @brief Free FB unit ECC support. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function deallocates memory allocated for ecc error counts + * for FB unit. + */ + void (*fb_ecc_free)(struct gk20a *g); + + /** + * @brief Initialize FBPA unit ECC support. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function allocates memory to track the ecc error counts + * for FBPA unit. + * + * @return 0 in case of success, < 0 in case of failure. + */ + int (*fbpa_ecc_init)(struct gk20a *g); + + /** + * @brief Free FBPA unit ECC support. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function deallocates memory allocated for ecc error counts + * for FBPA unit. + */ + void (*fbpa_ecc_free)(struct gk20a *g); + /** * @brief Initializes frame buffer h/w configuration. * diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops_gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops_gr.h index c9b69f03a..d7cda1c5b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops_gr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops_gr.h @@ -80,29 +80,16 @@ enum nvgpu_event_id_type; */ struct gops_gr_ecc { /** - * @brief Initialize ECC support. + * @brief Initialize GR unit ECC support. * * @param g [in] Pointer to GPU driver struct. * * This function allocates memory to track the ecc error counts - * for all units (like GR/LTC/FB/PMU) and subunits of GR - * (like falcon/sm/gpccs/etc). All these allocated memory - * is tracked as a list. + * for GR unit and subunits of GR (like falcon/sm/gpccs/etc). * * @return 0 in case of success, < 0 in case of failure. - * @retval -ENOMEM if memory allocation fail for any unit. */ - int (*ecc_init_support)(struct gk20a *g); - - /** - * @brief Remove ECC support. - * - * @param g [in] Pointer to GPU driver struct. - * - * This function frees all the memory allocated for keeping - * track of ecc error counts for each GR engine units. - */ - void (*ecc_remove_support)(struct gk20a *g); + int (*init)(struct gk20a *g); /** * @brief Detect ECC enabled units in GR engine. @@ -118,7 +105,6 @@ struct gops_gr_ecc { void (*detect)(struct gk20a *g); /** @cond DOXYGEN_SHOULD_SKIP_THIS */ - int (*init)(struct gk20a *g); struct nvgpu_hw_err_inject_info_desc * (*get_mmu_err_desc) (struct gk20a *g); struct nvgpu_hw_err_inject_info_desc * (*get_gcc_err_desc) diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops_ltc.h b/drivers/gpu/nvgpu/include/nvgpu/gops_ltc.h index 643792f6b..5eeb98352 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops_ltc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops_ltc.h @@ -63,6 +63,18 @@ struct gops_ltc_intr { * @see gpu_ops */ struct gops_ltc { + /** + * @brief Initialize LTC unit ECC support. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function allocates memory to track the ecc error counts + * for LTC unit. + * + * @return 0 in case of success, < 0 in case of failure. + */ + int (*ecc_init)(struct gk20a *g); + /** * @brief Initialize LTC support. * diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops_pmu.h b/drivers/gpu/nvgpu/include/nvgpu/gops_pmu.h index 010325244..a008b71c9 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops_pmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops_pmu.h @@ -37,6 +37,28 @@ struct nvgpu_hw_err_inject_info_desc; * @see gpu_ops */ struct gops_pmu { + /** + * @brief Initialize PMU unit ECC support. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function allocates memory to track the ecc error counts + * for PMU unit. + * + * @return 0 in case of success, < 0 in case of failure. + */ + int (*ecc_init)(struct gk20a *g); + + /** + * @brief Free PMU unit ECC support. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function deallocates memory allocated for ecc error counts + * for PMU unit. + */ + void (*ecc_free)(struct gk20a *g); + /** @cond DOXYGEN_SHOULD_SKIP_THIS */ /** diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_ecc.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_ecc.h new file mode 100644 index 000000000..f7974b388 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_ecc.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_ECC_H +#define NVGPU_GR_ECC_H + +struct gk20a; +struct nvgpu_ecc_stat; + +/** + * @brief Allocate and initialize error counter specified by name for all + * gpc-tpc instances. + * + * @param g [in] The GPU driver struct. + * @param stat [out] Pointer to array of pointers of error counters. + * @param name [in] Unique name for error counter. + * + * Calculates the total number of tpcs across all gpcs within the gr unit. + * Then allocates, initializes memory to hold error counters associated with all + * tpcs, which is then added to the stats_list in struct nvgpu_ecc. + * + * @return 0 in case of success, less than 0 for failure. + */ +int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, + struct nvgpu_ecc_stat ***stat, const char *name); +/* + * @brief Allocate and initialize counter for memories common across a TPC. + * + * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. + * + */ +#define NVGPU_ECC_COUNTER_INIT_PER_TPC(stat) \ + do { \ + int err = 0; \ + err = nvgpu_ecc_counter_init_per_tpc(g, \ + &g->ecc.gr.stat, #stat);\ + if (err != 0) { \ + return err; \ + } \ + } while (false) + +/** + * @brief Allocate and initialize error counter specified by name for all gpc + * instances. + * + * @param g [in] The GPU driver struct. + * @param stat [out] Pointer to array of tpc error counters. + * @param name [in] Unique name for error counter. + * + * Calculates the total number of gpcs within the gr unit. Then allocates, + * initializes memory to hold error counters associated with all gpcs, which is + * then added to the stats_list in struct nvgpu_ecc. + * + * @return 0 in case of success, less than 0 for failure. + */ +int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name); +/* + * @brief Allocate and initialize counters for memories shared across a GPC. + * + * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. + * + */ +#define NVGPU_ECC_COUNTER_INIT_PER_GPC(stat) \ + nvgpu_ecc_counter_init_per_gpc(g, &g->ecc.gr.stat, #stat) + +/* + * @brief Allocate and initialize counters for memories shared within GR. + * + * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. + * + */ +#define NVGPU_ECC_COUNTER_INIT_GR(stat) \ + nvgpu_ecc_counter_init(g, &g->ecc.gr.stat, #stat) + +/** + * @brief Release all GR ECC stats counters. + * + * @param g [in] The GPU driver struct. + * + * Frees all error counters associated with all gpcs in the GR unit. + */ +void nvgpu_gr_ecc_free(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/ltc.h b/drivers/gpu/nvgpu/include/nvgpu/ltc.h index 7e71b6d44..49fdd9a42 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/ltc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/ltc.h @@ -32,7 +32,7 @@ #include struct gk20a; - +struct nvgpu_ecc_stat; /** * LTC data structure. * @@ -53,6 +53,41 @@ struct nvgpu_ltc { u32 cacheline_size; }; +/** + * @brief Allocate and initialize a error counters for all ltc-lts instances. + * + * @param g [in] The GPU driver struct. + * @param stat [out] Pointer to array of tpc error counters. + * @param name [in] Unique name for error counter. + * + * Calculates the total number of ltc-lts instances, allocates memory for each + * instance of error counter, initializes the counter with 0 and the specified + * string identifier. Finally the counter is added to the stats_list of + * struct nvgpu_ecc. + * + * @return 0 in case of success, less than 0 for failure. + */ +int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, + struct nvgpu_ecc_stat ***stat, const char *name); + +/* + * @brief Allocate and initialize counters for memories within ltc-lts + * + * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. + * + */ +#define NVGPU_ECC_COUNTER_INIT_PER_LTS(stat) \ + nvgpu_ecc_counter_init_per_lts(g, &g->ecc.ltc.stat, #stat) + +/** + * @brief Release all LTC ECC stats counters. + * + * @param g [in] The GPU driver struct. + * + * Frees all error counters associated with the LTC unit. + */ +void nvgpu_ltc_ecc_free(struct gk20a *g); + /** * @brief Initialize #nvgpu_ltc structure. * diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h index cd6947331..dc52d5d7f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h @@ -385,5 +385,14 @@ int nvgpu_pmu_early_init(struct gk20a *g); */ void nvgpu_pmu_remove_support(struct gk20a *g, struct nvgpu_pmu *pmu); +/* + * @brief Allocate and initialize counter for memories within PMU. + * + * @param stat [in] Address of pointer to struct nvgpu_ecc_stat. + * + */ +#define NVGPU_ECC_COUNTER_INIT_PMU(stat) \ + nvgpu_ecc_counter_init(g, &g->ecc.pmu.stat, #stat) + #endif /* NVGPU_PMU_H */ diff --git a/userspace/units/acr/nvgpu-acr.c b/userspace/units/acr/nvgpu-acr.c index 09c59934f..19e0c313f 100644 --- a/userspace/units/acr/nvgpu-acr.c +++ b/userspace/units/acr/nvgpu-acr.c @@ -242,6 +242,11 @@ static int init_test_env(struct unit_module *m, struct gk20a *g) * preparation */ + err = g->ops.ecc.ecc_init_support(g); + if (err != 0) { + unit_return_fail(m, "ecc init failed\n"); + } + err = g->ops.mm.init_mm_support(g); if (err != 0) { unit_return_fail(m, "failed to init gk20a mm"); @@ -384,6 +389,11 @@ int test_acr_construct_execute(struct unit_module *m, unit_return_fail(m, "Bootstrap HS ACR failed"); } + err = g->ops.ecc.ecc_init_support(g); + if (err != 0) { + unit_return_fail(m, "ecc init failed\n"); + } + /* * case 2: pass g->acr as NULL to create fail scenario */ @@ -470,7 +480,6 @@ int test_acr_prepare_ucode_blob(struct unit_module *m, unit_return_fail(m, "Test env init failed\n"); } - nvgpu_mutex_acquire(&g->tpc_pg_lock); /* diff --git a/userspace/units/fifo/nvgpu-fifo.c b/userspace/units/fifo/nvgpu-fifo.c index c06568b10..a1a602bea 100644 --- a/userspace/units/fifo/nvgpu-fifo.c +++ b/userspace/units/fifo/nvgpu-fifo.c @@ -121,6 +121,7 @@ int test_fifo_init_support(struct unit_module *m, struct gk20a *g, void *args) */ g->ops.userd.setup_sw = stub_userd_setup_sw; #endif + g->ops.ecc.ecc_init_support(g); g->ops.mm.init_mm_support(g); err = nvgpu_fifo_init_support(g); diff --git a/userspace/units/gr/nvgpu-gr.c b/userspace/units/gr/nvgpu-gr.c index 8db93aed7..987226b71 100644 --- a/userspace/units/gr/nvgpu-gr.c +++ b/userspace/units/gr/nvgpu-gr.c @@ -117,6 +117,7 @@ int test_gr_init_support(struct unit_module *m, struct gk20a *g, void *args) nvgpu_gr_init(g); + g->ops.ecc.ecc_init_support(g); g->ops.ltc.init_ltc_support(g); g->ops.mm.init_mm_support(g); @@ -131,14 +132,6 @@ int test_gr_init_support(struct unit_module *m, struct gk20a *g, void *args) unit_return_fail(m, "nvgpu_gr_init_support returned fail\n"); } - /* gr ecc init */ - if (g->ops.gr.ecc.ecc_init_support != NULL) { - err = g->ops.gr.ecc.ecc_init_support(g); - if (err != 0) { - unit_return_fail(m, "gr_ecc_init failed\n"); - } - } - return UNIT_SUCCESS; } diff --git a/userspace/units/init/nvgpu-init.c b/userspace/units/init/nvgpu-init.c index e03fd5d19..96bfc4c1e 100644 --- a/userspace/units/init/nvgpu-init.c +++ b/userspace/units/init/nvgpu-init.c @@ -356,7 +356,7 @@ int test_get_put(struct unit_module *m, /* to cover the cases where these are set */ g->remove_support = no_return; g->gfree = no_return; - g->ops.gr.ecc.ecc_remove_support = no_return; + g->ops.ecc.ecc_remove_support = no_return; g->ops.ltc.ltc_remove_support = no_return; if (g != nvgpu_get(g)) { @@ -494,6 +494,7 @@ static void set_poweron_funcs_success(struct gk20a *g) unsigned int i = 0; /* these are the simple case of just taking a g param */ + setup_simple_init_func_success(&g->ops.ecc.ecc_init_support, i++); setup_simple_init_func_success(&g->ops.mm.pd_cache_init, i++); setup_simple_init_func_success(&g->ops.clk.init_clk_support, i++); setup_simple_init_func_success(&g->ops.nvlink.init, i++); @@ -508,7 +509,7 @@ static void set_poweron_funcs_success(struct gk20a *g) setup_simple_init_func_success(&g->ops.gr.gr_enable_hw, i++); setup_simple_init_func_success(&g->ops.fbp.fbp_init_support, i++); setup_simple_init_func_success(&g->ops.gr.gr_init_support, i++); - setup_simple_init_func_success(&g->ops.gr.ecc.ecc_init_support, i++); + setup_simple_init_func_success(&g->ops.ecc.ecc_finalize_support, i++); setup_simple_init_func_success(&g->ops.therm.init_therm_support, i++); setup_simple_init_func_success(&g->ops.ce.ce_init_support, i++); setup_simple_init_func_success(&g->ops.bus.init_hw, i++); @@ -618,7 +619,7 @@ int test_poweron_branches(struct unit_module *m, struct gk20a *g, void *args) g->ops.fb.mem_unlock = NULL; g->ops.tpc.tpc_powergate = NULL; g->ops.therm.elcg_init_idle_filters = NULL; - g->ops.gr.ecc.ecc_init_support = NULL; + g->ops.ecc.ecc_init_support = NULL; g->ops.channel.resume_all_serviceable_ch = NULL; nvgpu_set_power_state(g, NVGPU_STATE_POWERED_OFF); err = nvgpu_finalize_poweron(g);