gpu: nvgpu: rework ecc structure and sysfs

- create common file common/ecc.c which include common functions for add
  ecc counters and remove counters.
- common code will create a list of all counter which make it easier to
  iterate all counters.
- Add chip specific file for adding ecc counters.
- add linux specific file os/linux/ecc_sysfs.c to export counters to
  sysfs.
- remove obsolete code
- MISRA violation for using snprintf is not solved, tracking with
  jira NVGPU-859

Jira NVGPUT-115

Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1763536
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Richard Zhao
2018-06-26 17:37:40 -07:00
committed by mobile promotions
parent 5ff1b3fe5a
commit 7f14aafc2c
32 changed files with 1044 additions and 1006 deletions

View File

@@ -69,7 +69,8 @@ nvgpu-y += \
os/linux/sim_pci.o \ os/linux/sim_pci.o \
os/linux/os_sched.o \ os/linux/os_sched.o \
os/linux/nvlink.o \ os/linux/nvlink.o \
os/linux/dt.o os/linux/dt.o \
os/linux/ecc_sysfs.o
nvgpu-$(CONFIG_GK20A_VIDMEM) += \ nvgpu-$(CONFIG_GK20A_VIDMEM) += \
os/linux/dmabuf_vidmem.o os/linux/dmabuf_vidmem.o
@@ -100,7 +101,6 @@ nvgpu-$(CONFIG_TEGRA_GK20A) += \
os/linux/module_usermode.o \ os/linux/module_usermode.o \
os/linux/soc.o \ os/linux/soc.o \
os/linux/fuse.o \ os/linux/fuse.o \
os/linux/platform_ecc_sysfs.o \
os/linux/platform_gk20a_tegra.o \ os/linux/platform_gk20a_tegra.o \
os/linux/platform_gp10b_tegra.o \ os/linux/platform_gp10b_tegra.o \
os/linux/platform_gv11b_tegra.o os/linux/platform_gv11b_tegra.o
@@ -185,6 +185,7 @@ nvgpu-y += \
common/sim.o \ common/sim.o \
common/sim_pci.o \ common/sim_pci.o \
common/fifo/submit.o \ common/fifo/submit.o \
common/ecc.o \
gk20a/gk20a.o \ gk20a/gk20a.o \
gk20a/ce2_gk20a.o \ gk20a/ce2_gk20a.o \
gk20a/fifo_gk20a.o \ gk20a/fifo_gk20a.o \
@@ -267,6 +268,7 @@ nvgpu-y += \
gp10b/priv_ring_gp10b.o \ gp10b/priv_ring_gp10b.o \
gp10b/gp10b.o \ gp10b/gp10b.o \
gp10b/fuse_gp10b.o \ gp10b/fuse_gp10b.o \
gp10b/ecc_gp10b.o \
gp106/hal_gp106.o \ gp106/hal_gp106.o \
gp106/mm_gp106.o \ gp106/mm_gp106.o \
gp106/flcn_gp106.o \ gp106/flcn_gp106.o \
@@ -296,6 +298,7 @@ nvgpu-y += \
gv11b/subctx_gv11b.o \ gv11b/subctx_gv11b.o \
gv11b/regops_gv11b.o \ gv11b/regops_gv11b.o \
gv11b/therm_gv11b.o \ gv11b/therm_gv11b.o \
gv11b/ecc_gv11b.o \
gv100/mm_gv100.o \ gv100/mm_gv100.o \
gv100/gr_ctx_gv100.o \ gv100/gr_ctx_gv100.o \
gv100/bios_gv100.o \ gv100/bios_gv100.o \

View File

@@ -49,6 +49,7 @@ srcs := common/mm/nvgpu_allocator.c \
common/rbtree.c \ common/rbtree.c \
common/ltc.c \ common/ltc.c \
common/io_common.c \ common/io_common.c \
common/ecc.c \
common/vbios/bios.c \ common/vbios/bios.c \
common/falcon/falcon.c \ common/falcon/falcon.c \
common/pmu/pmu.c \ common/pmu/pmu.c \
@@ -166,6 +167,7 @@ srcs := common/mm/nvgpu_allocator.c \
gp10b/priv_ring_gp10b.c \ gp10b/priv_ring_gp10b.c \
gp10b/gp10b.c \ gp10b/gp10b.c \
gp10b/fuse_gp10b.c \ gp10b/fuse_gp10b.c \
gp10b/ecc_gp10b.c \
gv11b/gv11b.c \ gv11b/gv11b.c \
gv11b/dbg_gpu_gv11b.c \ gv11b/dbg_gpu_gv11b.c \
gv11b/mc_gv11b.c \ gv11b/mc_gv11b.c \
@@ -181,6 +183,7 @@ srcs := common/mm/nvgpu_allocator.c \
gv11b/subctx_gv11b.c \ gv11b/subctx_gv11b.c \
gv11b/regops_gv11b.c \ gv11b/regops_gv11b.c \
gv11b/therm_gv11b.c \ gv11b/therm_gv11b.c \
gv11b/ecc_gv11b.c \
gp106/hal_gp106.c \ gp106/hal_gp106.c \
gp106/mm_gp106.c \ gp106/mm_gp106.c \
gp106/flcn_gp106.c \ gp106/flcn_gp106.c \

View File

@@ -0,0 +1,369 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "gk20a/gk20a.h"
static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat)
{
struct nvgpu_ecc *ecc = &g->ecc;
nvgpu_init_list_node(&stat->node);
nvgpu_list_add_tail(&stat->node, &ecc->stats_list);
ecc->stats_count++;
}
static void nvgpu_ecc_init(struct gk20a *g)
{
struct nvgpu_ecc *ecc = &g->ecc;
nvgpu_init_list_node(&ecc->stats_list);
}
int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
struct nvgpu_ecc_stat ***stat, const char *name)
{
struct gr_gk20a *gr = &g->gr;
struct nvgpu_ecc_stat **stats;
u32 gpc, tpc;
int err = 0;
stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
if (stats == NULL) {
return -ENOMEM;
}
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
stats[gpc] = nvgpu_kzalloc(g,
sizeof(*stats[gpc]) * gr->gpc_tpc_count[gpc]);
if (stats[gpc] == NULL) {
err = -ENOMEM;
break;
}
}
if (err != 0) {
while (gpc-- != 0u) {
nvgpu_kfree(g, stats[gpc]);
}
nvgpu_kfree(g, stats);
return err;
}
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
snprintf(stats[gpc][tpc].name,
NVGPU_ECC_STAT_NAME_MAX_SIZE,
"gpc%d_tpc%d_%s", gpc, tpc, name);
nvgpu_ecc_stat_add(g, &stats[gpc][tpc]);
}
}
*stat = stats;
return 0;
}
int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
struct nvgpu_ecc_stat **stat, const char *name)
{
struct gr_gk20a *gr = &g->gr;
struct nvgpu_ecc_stat *stats;
u32 gpc;
stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
if (stats == NULL) {
return -ENOMEM;
}
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
"gpc%d_%s", gpc, name);
nvgpu_ecc_stat_add(g, &stats[gpc]);
}
*stat = stats;
return 0;
}
int nvgpu_ecc_counter_init(struct gk20a *g,
struct nvgpu_ecc_stat **stat, const char *name)
{
struct nvgpu_ecc_stat *stats;
stats = nvgpu_kzalloc(g, sizeof(*stats));
if (stats == NULL) {
return -ENOMEM;
}
(void)strncpy(stats->name, name, NVGPU_ECC_STAT_NAME_MAX_SIZE - 1);
nvgpu_ecc_stat_add(g, stats);
*stat = stats;
return 0;
}
int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
struct nvgpu_ecc_stat ***stat, const char *name)
{
struct gr_gk20a *gr = &g->gr;
struct nvgpu_ecc_stat **stats;
u32 ltc, lts;
int err = 0;
stats = nvgpu_kzalloc(g, sizeof(*stats) * g->ltc_count);
if (stats == NULL) {
return -ENOMEM;
}
for (ltc = 0; ltc < g->ltc_count; ltc++) {
stats[ltc] = nvgpu_kzalloc(g,
sizeof(*stats[ltc]) * gr->slices_per_ltc);
if (stats[ltc] == NULL) {
err = -ENOMEM;
break;
}
}
if (err != 0) {
while (ltc-- > 0u) {
nvgpu_kfree(g, stats[ltc]);
}
nvgpu_kfree(g, stats);
return err;
}
for (ltc = 0; ltc < g->ltc_count; ltc++) {
for (lts = 0; lts < gr->slices_per_ltc; lts++) {
snprintf(stats[ltc][lts].name,
NVGPU_ECC_STAT_NAME_MAX_SIZE,
"ltc%d_lts%d_%s", ltc, lts, name);
nvgpu_ecc_stat_add(g, &stats[ltc][lts]);
}
}
*stat = stats;
return 0;
}
int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g,
struct nvgpu_ecc_stat **stat, const char *name)
{
int i;
int num_fbpa = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
struct nvgpu_ecc_stat *stats;
stats = nvgpu_kzalloc(g, sizeof(*stats) * num_fbpa);
if (stats == NULL) {
return -ENOMEM;
}
for (i = 0; i < num_fbpa; i++) {
snprintf(stats[i].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
"fbpa%d_%s", i, name);
nvgpu_ecc_stat_add(g, &stats[i]);
}
*stat = stats;
return 0;
}
/* release all ecc_stat */
void nvgpu_ecc_free(struct gk20a *g)
{
struct nvgpu_ecc *ecc = &g->ecc;
struct gr_gk20a *gr = &g->gr;
u32 i;
for (i = 0; i < gr->gpc_count; i++) {
if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]);
}
if (ecc->gr.sm_lrf_ecc_double_err_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count[i]);
}
if (ecc->gr.sm_shm_ecc_sec_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count[i]);
}
if (ecc->gr.sm_shm_ecc_sed_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count[i]);
}
if (ecc->gr.sm_shm_ecc_ded_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count[i]);
}
if (ecc->gr.tex_ecc_total_sec_pipe0_count != NULL) {
nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count[i]);
}
if (ecc->gr.tex_ecc_total_ded_pipe0_count != NULL) {
nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count[i]);
}
if (ecc->gr.tex_unique_ecc_sec_pipe0_count != NULL) {
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count[i]);
}
if (ecc->gr.tex_unique_ecc_ded_pipe0_count != NULL) {
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count[i]);
}
if (ecc->gr.tex_ecc_total_sec_pipe1_count != NULL) {
nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count[i]);
}
if (ecc->gr.tex_ecc_total_ded_pipe1_count != NULL) {
nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count[i]);
}
if (ecc->gr.tex_unique_ecc_sec_pipe1_count != NULL) {
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count[i]);
}
if (ecc->gr.tex_unique_ecc_ded_pipe1_count != NULL) {
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count[i]);
}
if (ecc->gr.sm_l1_tag_ecc_corrected_err_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count[i]);
}
if (ecc->gr.sm_l1_tag_ecc_uncorrected_err_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count[i]);
}
if (ecc->gr.sm_cbu_ecc_corrected_err_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count[i]);
}
if (ecc->gr.sm_cbu_ecc_uncorrected_err_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count[i]);
}
if (ecc->gr.sm_l1_data_ecc_corrected_err_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count[i]);
}
if (ecc->gr.sm_l1_data_ecc_uncorrected_err_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count[i]);
}
if (ecc->gr.sm_icache_ecc_corrected_err_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count[i]);
}
if (ecc->gr.sm_icache_ecc_uncorrected_err_count != NULL) {
nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count[i]);
}
}
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count);
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count);
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count);
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count);
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count);
nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count);
nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count);
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count);
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count);
nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count);
nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count);
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count);
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count);
nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count);
nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count);
nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count);
nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count);
nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count);
nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count);
nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count);
nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count);
nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_corrected_err_count);
nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_uncorrected_err_count);
nvgpu_kfree(g, ecc->gr.gpccs_ecc_corrected_err_count);
nvgpu_kfree(g, ecc->gr.gpccs_ecc_uncorrected_err_count);
nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_corrected_err_count);
nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_uncorrected_err_count);
nvgpu_kfree(g, ecc->gr.fecs_ecc_corrected_err_count);
nvgpu_kfree(g, ecc->gr.fecs_ecc_uncorrected_err_count);
for (i = 0; i < g->ltc_count; i++) {
if (ecc->ltc.ecc_sec_count != NULL) {
nvgpu_kfree(g, ecc->ltc.ecc_sec_count[i]);
}
if (ecc->ltc.ecc_ded_count != NULL) {
nvgpu_kfree(g, ecc->ltc.ecc_ded_count[i]);
}
}
nvgpu_kfree(g, ecc->ltc.ecc_sec_count);
nvgpu_kfree(g, ecc->ltc.ecc_ded_count);
nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_corrected_err_count);
nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_uncorrected_err_count);
nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_corrected_err_count);
nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_uncorrected_err_count);
nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_corrected_err_count);
nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_uncorrected_err_count);
nvgpu_kfree(g, ecc->pmu.pmu_ecc_corrected_err_count);
nvgpu_kfree(g, ecc->pmu.pmu_ecc_uncorrected_err_count);
nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_sec_err_count);
nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_ded_err_count);
(void)memset(ecc, 0, sizeof(*ecc));
}
int nvgpu_ecc_init_support(struct gk20a *g)
{
int err;
if (g->ops.gr.init_ecc == NULL) {
return 0;
}
nvgpu_ecc_init(g);
err = g->ops.gr.init_ecc(g);
if (err != 0) {
return err;
}
err = nvgpu_ecc_sysfs_init(g);
if (err != 0) {
nvgpu_ecc_free(g);
return err;
}
return 0;
}
void nvgpu_ecc_remove_support(struct gk20a *g)
{
if (g->ops.gr.init_ecc == NULL) {
return;
}
nvgpu_ecc_sysfs_remove(g);
nvgpu_ecc_free(g);
}

View File

@@ -445,9 +445,9 @@ void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status)
uncorrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_uncorrected_err_count_total_s()); uncorrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_uncorrected_err_count_total_s());
g->ecc.fb.mmu_l2tlb_corrected_err_count.counters[0] += g->ecc.fb.mmu_l2tlb_ecc_corrected_err_count[0].counter +=
corrected_delta; corrected_delta;
g->ecc.fb.mmu_l2tlb_uncorrected_err_count.counters[0] += g->ecc.fb.mmu_l2tlb_ecc_uncorrected_err_count[0].counter +=
uncorrected_delta; uncorrected_delta;
if (ecc_status & fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m()) if (ecc_status & fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m())
@@ -461,8 +461,8 @@ void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status)
"ecc error address: 0x%x", ecc_addr); "ecc error address: 0x%x", ecc_addr);
nvgpu_log(g, gpu_dbg_intr, nvgpu_log(g, gpu_dbg_intr,
"ecc error count corrected: %d, uncorrected %d", "ecc error count corrected: %d, uncorrected %d",
g->ecc.fb.mmu_l2tlb_corrected_err_count.counters[0], g->ecc.fb.mmu_l2tlb_ecc_corrected_err_count[0].counter,
g->ecc.fb.mmu_l2tlb_uncorrected_err_count.counters[0]); g->ecc.fb.mmu_l2tlb_ecc_uncorrected_err_count[0].counter);
} }
void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status) void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
@@ -503,9 +503,9 @@ void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
uncorrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_uncorrected_err_count_total_s()); uncorrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_uncorrected_err_count_total_s());
g->ecc.fb.mmu_hubtlb_corrected_err_count.counters[0] += g->ecc.fb.mmu_hubtlb_ecc_corrected_err_count[0].counter +=
corrected_delta; corrected_delta;
g->ecc.fb.mmu_hubtlb_uncorrected_err_count.counters[0] += g->ecc.fb.mmu_hubtlb_ecc_uncorrected_err_count[0].counter +=
uncorrected_delta; uncorrected_delta;
if (ecc_status & fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m()) if (ecc_status & fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m())
@@ -519,8 +519,8 @@ void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
"ecc error address: 0x%x", ecc_addr); "ecc error address: 0x%x", ecc_addr);
nvgpu_log(g, gpu_dbg_intr, nvgpu_log(g, gpu_dbg_intr,
"ecc error count corrected: %d, uncorrected %d", "ecc error count corrected: %d, uncorrected %d",
g->ecc.fb.mmu_hubtlb_corrected_err_count.counters[0], g->ecc.fb.mmu_hubtlb_ecc_corrected_err_count[0].counter,
g->ecc.fb.mmu_hubtlb_uncorrected_err_count.counters[0]); g->ecc.fb.mmu_hubtlb_ecc_uncorrected_err_count[0].counter);
} }
void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status) void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
@@ -561,9 +561,9 @@ void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
uncorrected_delta += (0x1UL << fb_mmu_fillunit_ecc_uncorrected_err_count_total_s()); uncorrected_delta += (0x1UL << fb_mmu_fillunit_ecc_uncorrected_err_count_total_s());
g->ecc.fb.mmu_fillunit_corrected_err_count.counters[0] += g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter +=
corrected_delta; corrected_delta;
g->ecc.fb.mmu_fillunit_uncorrected_err_count.counters[0] += g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter +=
uncorrected_delta; uncorrected_delta;
if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m()) if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m())
@@ -582,8 +582,8 @@ void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
"ecc error address: 0x%x", ecc_addr); "ecc error address: 0x%x", ecc_addr);
nvgpu_log(g, gpu_dbg_intr, nvgpu_log(g, gpu_dbg_intr,
"ecc error count corrected: %d, uncorrected %d", "ecc error count corrected: %d, uncorrected %d",
g->ecc.fb.mmu_fillunit_corrected_err_count.counters[0], g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter,
g->ecc.fb.mmu_fillunit_uncorrected_err_count.counters[0]); g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter);
} }
static void gv11b_fb_parse_mmfault(struct mmu_fault_info *mmfault) static void gv11b_fb_parse_mmfault(struct mmu_fault_info *mmfault)

View File

@@ -25,8 +25,19 @@
* for an implementation. * for an implementation.
*/ */
#include <nvgpu/ecc.h>
#include "gk20a/dbg_gpu_gk20a.h" #include "gk20a/dbg_gpu_gk20a.h"
void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s) void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s)
{ {
} }
int nvgpu_ecc_sysfs_init(struct gk20a *g)
{
return 0;
}
void nvgpu_ecc_sysfs_remove(struct gk20a *g)
{
}

View File

@@ -1,102 +0,0 @@
/*
* GK20A ECC
*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef ECC_GK20A_H
#define ECC_GK20A_H
struct gk20a_ecc_stat {
char **names;
u32 *counters;
u32 count;
#ifdef CONFIG_SYSFS
struct hlist_node hash_node;
struct device_attribute *attr_array;
#endif
};
struct ecc_gk20a {
/* Stats per engine */
struct {
struct gk20a_ecc_stat sm_lrf_single_err_count;
struct gk20a_ecc_stat sm_lrf_double_err_count;
struct gk20a_ecc_stat sm_shm_sec_count;
struct gk20a_ecc_stat sm_shm_sed_count;
struct gk20a_ecc_stat sm_shm_ded_count;
struct gk20a_ecc_stat tex_total_sec_pipe0_count;
struct gk20a_ecc_stat tex_total_ded_pipe0_count;
struct gk20a_ecc_stat tex_unique_sec_pipe0_count;
struct gk20a_ecc_stat tex_unique_ded_pipe0_count;
struct gk20a_ecc_stat tex_total_sec_pipe1_count;
struct gk20a_ecc_stat tex_total_ded_pipe1_count;
struct gk20a_ecc_stat tex_unique_sec_pipe1_count;
struct gk20a_ecc_stat tex_unique_ded_pipe1_count;
struct gk20a_ecc_stat sm_l1_tag_corrected_err_count;
struct gk20a_ecc_stat sm_l1_tag_uncorrected_err_count;
struct gk20a_ecc_stat sm_cbu_corrected_err_count;
struct gk20a_ecc_stat sm_cbu_uncorrected_err_count;
struct gk20a_ecc_stat sm_l1_data_corrected_err_count;
struct gk20a_ecc_stat sm_l1_data_uncorrected_err_count;
struct gk20a_ecc_stat sm_icache_corrected_err_count;
struct gk20a_ecc_stat sm_icache_uncorrected_err_count;
struct gk20a_ecc_stat gcc_l15_corrected_err_count;
struct gk20a_ecc_stat gcc_l15_uncorrected_err_count;
struct gk20a_ecc_stat fecs_corrected_err_count;
struct gk20a_ecc_stat fecs_uncorrected_err_count;
struct gk20a_ecc_stat gpccs_corrected_err_count;
struct gk20a_ecc_stat gpccs_uncorrected_err_count;
struct gk20a_ecc_stat mmu_l1tlb_corrected_err_count;
struct gk20a_ecc_stat mmu_l1tlb_uncorrected_err_count;
} gr;
struct {
struct gk20a_ecc_stat l2_sec_count;
struct gk20a_ecc_stat l2_ded_count;
struct gk20a_ecc_stat l2_cache_corrected_err_count;
struct gk20a_ecc_stat l2_cache_uncorrected_err_count;
} ltc;
struct {
struct gk20a_ecc_stat mmu_l2tlb_corrected_err_count;
struct gk20a_ecc_stat mmu_l2tlb_uncorrected_err_count;
struct gk20a_ecc_stat mmu_hubtlb_corrected_err_count;
struct gk20a_ecc_stat mmu_hubtlb_uncorrected_err_count;
struct gk20a_ecc_stat mmu_fillunit_corrected_err_count;
struct gk20a_ecc_stat mmu_fillunit_uncorrected_err_count;
} fb;
struct {
struct gk20a_ecc_stat pmu_corrected_err_count;
struct gk20a_ecc_stat pmu_uncorrected_err_count;
} pmu;
struct {
struct gk20a_ecc_stat fbpa_sec_err_count;
struct gk20a_ecc_stat fbpa_ded_err_count;
} fbpa;
};
#endif /*__ECC_GK20A_H__*/

View File

@@ -35,7 +35,6 @@ struct gk20a_ctxsw_trace;
struct acr_desc; struct acr_desc;
struct nvgpu_mem_alloc_tracker; struct nvgpu_mem_alloc_tracker;
struct dbg_profiler_object_data; struct dbg_profiler_object_data;
struct ecc_gk20a;
struct gk20a_debug_output; struct gk20a_debug_output;
struct nvgpu_clk_pll_debug_data; struct nvgpu_clk_pll_debug_data;
struct nvgpu_nvhost_dev; struct nvgpu_nvhost_dev;
@@ -64,6 +63,7 @@ struct nvgpu_ctxsw_trace_filter;
#include <nvgpu/clk_arb.h> #include <nvgpu/clk_arb.h>
#include <nvgpu/nvlink.h> #include <nvgpu/nvlink.h>
#include <nvgpu/sim.h> #include <nvgpu/sim.h>
#include <nvgpu/ecc.h>
#include "clk_gk20a.h" #include "clk_gk20a.h"
#include "ce2_gk20a.h" #include "ce2_gk20a.h"
@@ -77,7 +77,6 @@ struct nvgpu_ctxsw_trace_filter;
#include "perf/perf.h" #include "perf/perf.h"
#include "pmgr/pmgr.h" #include "pmgr/pmgr.h"
#include "therm/thrm.h" #include "therm/thrm.h"
#include "ecc_gk20a.h"
/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds.
32 ns is the resolution of ptimer. */ 32 ns is the resolution of ptimer. */
@@ -384,8 +383,7 @@ struct gpu_ops {
u32 gpc_exception); u32 gpc_exception);
void (*enable_gpc_exceptions)(struct gk20a *g); void (*enable_gpc_exceptions)(struct gk20a *g);
void (*enable_exceptions)(struct gk20a *g); void (*enable_exceptions)(struct gk20a *g);
void (*create_gr_sysfs)(struct gk20a *g); int (*init_ecc)(struct gk20a *g);
void (*remove_gr_sysfs)(struct gk20a *g);
u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g); u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g);
int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc, int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc,
u32 sm, struct channel_gk20a *fault_ch); u32 sm, struct channel_gk20a *fault_ch);
@@ -1385,7 +1383,7 @@ struct gk20a {
struct mm_gk20a mm; struct mm_gk20a mm;
struct nvgpu_pmu pmu; struct nvgpu_pmu pmu;
struct acr_desc acr; struct acr_desc acr;
struct ecc_gk20a ecc; struct nvgpu_ecc ecc;
struct clk_pmupstate clk_pmu; struct clk_pmupstate clk_pmu;
struct perf_pmupstate perf_pmu; struct perf_pmupstate perf_pmu;
struct pmgr_pmupstate pmgr_pmu; struct pmgr_pmupstate pmgr_pmu;

View File

@@ -38,6 +38,7 @@
#include <nvgpu/mm.h> #include <nvgpu/mm.h>
#include <nvgpu/ctxsw_trace.h> #include <nvgpu/ctxsw_trace.h>
#include <nvgpu/error_notifier.h> #include <nvgpu/error_notifier.h>
#include <nvgpu/ecc.h>
#include "gk20a.h" #include "gk20a.h"
#include "gr_gk20a.h" #include "gr_gk20a.h"
@@ -3127,6 +3128,8 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
gk20a_comptag_allocator_destroy(g, &gr->comp_tags); gk20a_comptag_allocator_destroy(g, &gr->comp_tags);
nvgpu_ecc_remove_support(g);
} }
static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
@@ -4872,8 +4875,9 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
gr->remove_support = gk20a_remove_gr_support; gr->remove_support = gk20a_remove_gr_support;
gr->sw_ready = true; gr->sw_ready = true;
if (g->ops.gr.create_gr_sysfs) err = nvgpu_ecc_init_support(g);
g->ops.gr.create_gr_sysfs(g); if (err)
goto clean_up;
nvgpu_log_fn(g, "done"); nvgpu_log_fn(g, "done");
return 0; return 0;

View File

@@ -382,10 +382,6 @@ static const struct gpu_ops gp106_ops = {
.update_boosted_ctx = NULL, .update_boosted_ctx = NULL,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
#ifdef CONFIG_SYSFS
.create_gr_sysfs = NULL,
.remove_gr_sysfs = NULL,
#endif
.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode, .set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode,
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
.fecs_host_int_enable = gr_gk20a_fecs_host_int_enable, .fecs_host_int_enable = gr_gk20a_fecs_host_int_enable,

View File

@@ -0,0 +1,106 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/ecc.h>
#include "gk20a/gk20a.h"
#include "gp10b/ecc_gp10b.h"
int gp10b_ecc_init(struct gk20a *g)
{
int err = 0;
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sec_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sed_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_ded_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe0_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe0_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe0_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe0_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe1_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe1_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe1_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe1_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count);
if (err != 0) {
goto done;
}
done:
if (err != 0) {
nvgpu_err(g, "ecc counter allocate failed, err=%d", err);
nvgpu_ecc_free(g);
}
return err;
}

View File

@@ -0,0 +1,28 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __ECC_GP10B_H__
#define __ECC_GP10B_H__
int gp10b_ecc_init(struct gk20a *g);
#endif

View File

@@ -176,7 +176,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
lrf_ecc_ded_status, lrf_ecc_ded_status,
&lrf_single_count_delta, &lrf_single_count_delta,
lrf_double_count_delta); lrf_double_count_delta);
g->ecc.gr.sm_lrf_single_err_count.counters[tpc] += g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter +=
lrf_single_count_delta; lrf_single_count_delta;
} }
if (lrf_ecc_ded_status) { if (lrf_ecc_ded_status) {
@@ -188,7 +188,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
lrf_ecc_ded_status, lrf_ecc_ded_status,
&lrf_double_count_delta, &lrf_double_count_delta,
lrf_single_count_delta); lrf_single_count_delta);
g->ecc.gr.sm_lrf_double_err_count.counters[tpc] += g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
lrf_double_count_delta; lrf_double_count_delta;
} }
gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
@@ -213,9 +213,9 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
ecc_stats_reg_val = ecc_stats_reg_val =
gk20a_readl(g, gk20a_readl(g,
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
g->ecc.gr.sm_shm_sec_count.counters[tpc] += g->ecc.gr.sm_shm_ecc_sec_count[gpc][tpc].counter +=
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val); gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val);
g->ecc.gr.sm_shm_sed_count.counters[tpc] += g->ecc.gr.sm_shm_ecc_sed_count[gpc][tpc].counter +=
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val); gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val);
ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() | ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() |
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m()); gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m());
@@ -235,7 +235,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
ecc_stats_reg_val = ecc_stats_reg_val =
gk20a_readl(g, gk20a_readl(g,
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
g->ecc.gr.sm_shm_ded_count.counters[tpc] += g->ecc.gr.sm_shm_ecc_ded_count[gpc][tpc].counter +=
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val); gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val);
ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m()); ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m());
gk20a_writel(g, gk20a_writel(g,
@@ -276,7 +276,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
ecc_stats_reg_val = gk20a_readl(g, ecc_stats_reg_val = gk20a_readl(g,
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
g->ecc.gr.tex_total_sec_pipe0_count.counters[tpc] += g->ecc.gr.tex_ecc_total_sec_pipe0_count[gpc][tpc].counter +=
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
gk20a_writel(g, gk20a_writel(g,
@@ -285,7 +285,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
ecc_stats_reg_val = gk20a_readl(g, ecc_stats_reg_val = gk20a_readl(g,
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
g->ecc.gr.tex_unique_sec_pipe0_count.counters[tpc] += g->ecc.gr.tex_unique_ecc_sec_pipe0_count[gpc][tpc].counter +=
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
gk20a_writel(g, gk20a_writel(g,
@@ -300,7 +300,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
ecc_stats_reg_val = gk20a_readl(g, ecc_stats_reg_val = gk20a_readl(g,
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
g->ecc.gr.tex_total_sec_pipe1_count.counters[tpc] += g->ecc.gr.tex_ecc_total_sec_pipe1_count[gpc][tpc].counter +=
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
gk20a_writel(g, gk20a_writel(g,
@@ -309,7 +309,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
ecc_stats_reg_val = gk20a_readl(g, ecc_stats_reg_val = gk20a_readl(g,
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
g->ecc.gr.tex_unique_sec_pipe1_count.counters[tpc] += g->ecc.gr.tex_unique_ecc_sec_pipe1_count[gpc][tpc].counter +=
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
gk20a_writel(g, gk20a_writel(g,
@@ -332,7 +332,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
ecc_stats_reg_val = gk20a_readl(g, ecc_stats_reg_val = gk20a_readl(g,
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
g->ecc.gr.tex_total_ded_pipe0_count.counters[tpc] += g->ecc.gr.tex_ecc_total_ded_pipe0_count[gpc][tpc].counter +=
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
gk20a_writel(g, gk20a_writel(g,
@@ -341,7 +341,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
ecc_stats_reg_val = gk20a_readl(g, ecc_stats_reg_val = gk20a_readl(g,
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
g->ecc.gr.tex_unique_ded_pipe0_count.counters[tpc] += g->ecc.gr.tex_unique_ecc_ded_pipe0_count[gpc][tpc].counter +=
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
gk20a_writel(g, gk20a_writel(g,
@@ -356,7 +356,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
ecc_stats_reg_val = gk20a_readl(g, ecc_stats_reg_val = gk20a_readl(g,
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
g->ecc.gr.tex_total_ded_pipe1_count.counters[tpc] += g->ecc.gr.tex_ecc_total_ded_pipe1_count[gpc][tpc].counter +=
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
gk20a_writel(g, gk20a_writel(g,
@@ -365,7 +365,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
ecc_stats_reg_val = gk20a_readl(g, ecc_stats_reg_val = gk20a_readl(g,
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
g->ecc.gr.tex_unique_ded_pipe1_count.counters[tpc] += g->ecc.gr.tex_unique_ecc_ded_pipe1_count[gpc][tpc].counter +=
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
gk20a_writel(g, gk20a_writel(g,

View File

@@ -56,6 +56,7 @@
#include "gp10b/regops_gp10b.h" #include "gp10b/regops_gp10b.h"
#include "gp10b/therm_gp10b.h" #include "gp10b/therm_gp10b.h"
#include "gp10b/priv_ring_gp10b.h" #include "gp10b/priv_ring_gp10b.h"
#include "gp10b/ecc_gp10b.h"
#include "gm20b/ltc_gm20b.h" #include "gm20b/ltc_gm20b.h"
#include "gm20b/gr_gm20b.h" #include "gm20b/gr_gm20b.h"
@@ -339,11 +340,8 @@ static const struct gpu_ops gp10b_ops = {
.init_preemption_state = gr_gp10b_init_preemption_state, .init_preemption_state = gr_gp10b_init_preemption_state,
.update_boosted_ctx = gr_gp10b_update_boosted_ctx, .update_boosted_ctx = gr_gp10b_update_boosted_ctx,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
#ifdef CONFIG_SYSFS
.create_gr_sysfs = gr_gp10b_create_sysfs,
.remove_gr_sysfs = gr_gp10b_remove_sysfs,
#endif
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode, .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
.init_ecc = gp10b_ecc_init,
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
.init_gfxp_wfi_timeout_count = .init_gfxp_wfi_timeout_count =
gr_gp10b_init_gfxp_wfi_timeout_count, gr_gp10b_init_gfxp_wfi_timeout_count,

View File

@@ -249,7 +249,7 @@ void gp10b_ltc_isr(struct gk20a *g)
ecc_stats_reg_val = ecc_stats_reg_val =
gk20a_readl(g, gk20a_readl(g,
ltc_ltc0_lts0_dstg_ecc_report_r() + offset); ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
g->ecc.ltc.l2_sec_count.counters[ltc*g->ltc_count + slice] += g->ecc.ltc.ecc_sec_count[ltc][slice].counter +=
ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val); ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val);
ecc_stats_reg_val &= ecc_stats_reg_val &=
~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m()); ~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m());
@@ -268,7 +268,7 @@ void gp10b_ltc_isr(struct gk20a *g)
ecc_stats_reg_val = ecc_stats_reg_val =
gk20a_readl(g, gk20a_readl(g,
ltc_ltc0_lts0_dstg_ecc_report_r() + offset); ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
g->ecc.ltc.l2_ded_count.counters[ltc*g->ltc_count + slice] += g->ecc.ltc.ecc_ded_count[ltc][slice].counter +=
ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val); ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val);
ecc_stats_reg_val &= ecc_stats_reg_val &=
~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m()); ~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m());

View File

@@ -415,10 +415,6 @@ static const struct gpu_ops gv100_ops = {
.update_boosted_ctx = gr_gp10b_update_boosted_ctx, .update_boosted_ctx = gr_gp10b_update_boosted_ctx,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
#ifdef CONFIG_SYSFS
.create_gr_sysfs = gr_gv11b_create_sysfs,
.remove_gr_sysfs = gr_gv11b_remove_sysfs,
#endif
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode, .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr, .is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,

View File

@@ -0,0 +1,181 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/ecc.h>
#include "gk20a/gk20a.h"
#include "gv11b/ecc_gv11b.h"
int gv11b_ecc_init(struct gk20a *g)
{
int err;
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
sm_l1_tag_ecc_corrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
sm_l1_tag_ecc_uncorrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
sm_cbu_ecc_corrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
sm_cbu_ecc_uncorrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
sm_l1_data_ecc_corrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
sm_l1_data_ecc_uncorrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
sm_icache_ecc_corrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
sm_icache_ecc_uncorrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
gcc_l15_ecc_corrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
gcc_l15_ecc_uncorrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_uncorrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_corrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
gpccs_ecc_uncorrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
gpccs_ecc_corrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
mmu_l1tlb_ecc_uncorrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
mmu_l1tlb_ecc_corrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_uncorrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_corrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_uncorrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_corrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_FB(
mmu_fillunit_ecc_uncorrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_FB(
mmu_fillunit_ecc_corrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_uncorrected_err_count);
if (err != 0) {
goto done;
}
err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_corrected_err_count);
if (err != 0) {
goto done;
}
done:
if (err != 0) {
nvgpu_err(g, "ecc counter allocate failed, err=%d", err);
nvgpu_ecc_free(g);
}
return err;
}

View File

@@ -0,0 +1,28 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __ECC_GV11B_H__
#define __ECC_GV11B_H__
int gv11b_ecc_init(struct gk20a *g);
#endif

View File

@@ -198,7 +198,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
l1_tag_corrected_err_count_delta += l1_tag_corrected_err_count_delta +=
(is_l1_tag_ecc_corrected_total_err_overflow << (is_l1_tag_ecc_corrected_total_err_overflow <<
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s()); gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s());
g->ecc.gr.sm_l1_tag_corrected_err_count.counters[tpc] += g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter +=
l1_tag_corrected_err_count_delta; l1_tag_corrected_err_count_delta;
gk20a_writel(g, gk20a_writel(g,
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset,
@@ -213,7 +213,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
l1_tag_uncorrected_err_count_delta += l1_tag_uncorrected_err_count_delta +=
(is_l1_tag_ecc_uncorrected_total_err_overflow << (is_l1_tag_ecc_uncorrected_total_err_overflow <<
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s()); gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s());
g->ecc.gr.sm_l1_tag_uncorrected_err_count.counters[tpc] += g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter +=
l1_tag_uncorrected_err_count_delta; l1_tag_uncorrected_err_count_delta;
gk20a_writel(g, gk20a_writel(g,
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset,
@@ -290,7 +290,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
lrf_corrected_err_count_delta += lrf_corrected_err_count_delta +=
(is_lrf_ecc_corrected_total_err_overflow << (is_lrf_ecc_corrected_total_err_overflow <<
gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s()); gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s());
g->ecc.gr.sm_lrf_single_err_count.counters[tpc] += g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter +=
lrf_corrected_err_count_delta; lrf_corrected_err_count_delta;
gk20a_writel(g, gk20a_writel(g,
gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset, gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset,
@@ -305,7 +305,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
lrf_uncorrected_err_count_delta += lrf_uncorrected_err_count_delta +=
(is_lrf_ecc_uncorrected_total_err_overflow << (is_lrf_ecc_uncorrected_total_err_overflow <<
gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s()); gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s());
g->ecc.gr.sm_lrf_double_err_count.counters[tpc] += g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
lrf_uncorrected_err_count_delta; lrf_uncorrected_err_count_delta;
gk20a_writel(g, gk20a_writel(g,
gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset,
@@ -449,7 +449,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
cbu_corrected_err_count_delta += cbu_corrected_err_count_delta +=
(is_cbu_ecc_corrected_total_err_overflow << (is_cbu_ecc_corrected_total_err_overflow <<
gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s()); gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s());
g->ecc.gr.sm_cbu_corrected_err_count.counters[tpc] += g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter +=
cbu_corrected_err_count_delta; cbu_corrected_err_count_delta;
gk20a_writel(g, gk20a_writel(g,
gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset, gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset,
@@ -464,7 +464,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
cbu_uncorrected_err_count_delta += cbu_uncorrected_err_count_delta +=
(is_cbu_ecc_uncorrected_total_err_overflow << (is_cbu_ecc_uncorrected_total_err_overflow <<
gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s()); gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s());
g->ecc.gr.sm_cbu_uncorrected_err_count.counters[tpc] += g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter +=
cbu_uncorrected_err_count_delta; cbu_uncorrected_err_count_delta;
gk20a_writel(g, gk20a_writel(g,
gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset,
@@ -529,7 +529,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
l1_data_corrected_err_count_delta += l1_data_corrected_err_count_delta +=
(is_l1_data_ecc_corrected_total_err_overflow << (is_l1_data_ecc_corrected_total_err_overflow <<
gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s()); gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s());
g->ecc.gr.sm_l1_data_corrected_err_count.counters[tpc] += g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter +=
l1_data_corrected_err_count_delta; l1_data_corrected_err_count_delta;
gk20a_writel(g, gk20a_writel(g,
gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset, gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset,
@@ -544,7 +544,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
l1_data_uncorrected_err_count_delta += l1_data_uncorrected_err_count_delta +=
(is_l1_data_ecc_uncorrected_total_err_overflow << (is_l1_data_ecc_uncorrected_total_err_overflow <<
gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s()); gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s());
g->ecc.gr.sm_l1_data_uncorrected_err_count.counters[tpc] += g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter +=
l1_data_uncorrected_err_count_delta; l1_data_uncorrected_err_count_delta;
gk20a_writel(g, gk20a_writel(g,
gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset,
@@ -613,7 +613,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
icache_corrected_err_count_delta += icache_corrected_err_count_delta +=
(is_icache_ecc_corrected_total_err_overflow << (is_icache_ecc_corrected_total_err_overflow <<
gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s()); gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s());
g->ecc.gr.sm_icache_corrected_err_count.counters[tpc] += g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter +=
icache_corrected_err_count_delta; icache_corrected_err_count_delta;
gk20a_writel(g, gk20a_writel(g,
gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset, gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset,
@@ -628,7 +628,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
icache_uncorrected_err_count_delta += icache_uncorrected_err_count_delta +=
(is_icache_ecc_uncorrected_total_err_overflow << (is_icache_ecc_uncorrected_total_err_overflow <<
gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s()); gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s());
g->ecc.gr.sm_icache_uncorrected_err_count.counters[tpc] += g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter +=
icache_uncorrected_err_count_delta; icache_uncorrected_err_count_delta;
gk20a_writel(g, gk20a_writel(g,
gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset, gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset,
@@ -717,7 +717,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
gcc_l15_corrected_err_count_delta += gcc_l15_corrected_err_count_delta +=
(is_gcc_l15_ecc_corrected_total_err_overflow << (is_gcc_l15_ecc_corrected_total_err_overflow <<
gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s()); gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s());
g->ecc.gr.gcc_l15_corrected_err_count.counters[gpc] += g->ecc.gr.gcc_l15_ecc_corrected_err_count[gpc].counter +=
gcc_l15_corrected_err_count_delta; gcc_l15_corrected_err_count_delta;
gk20a_writel(g, gk20a_writel(g,
gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset,
@@ -732,7 +732,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
gcc_l15_uncorrected_err_count_delta += gcc_l15_uncorrected_err_count_delta +=
(is_gcc_l15_ecc_uncorrected_total_err_overflow << (is_gcc_l15_ecc_uncorrected_total_err_overflow <<
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s()); gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s());
g->ecc.gr.gcc_l15_uncorrected_err_count.counters[gpc] += g->ecc.gr.gcc_l15_ecc_uncorrected_err_count[gpc].counter +=
gcc_l15_uncorrected_err_count_delta; gcc_l15_uncorrected_err_count_delta;
gk20a_writel(g, gk20a_writel(g,
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset,
@@ -802,9 +802,9 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc,
uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s()); uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s());
g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc] += g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter +=
corrected_delta; corrected_delta;
g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc] += g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter +=
uncorrected_delta; uncorrected_delta;
nvgpu_log(g, gpu_dbg_intr, nvgpu_log(g, gpu_dbg_intr,
"mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); "mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
@@ -824,8 +824,8 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc,
"ecc error address: 0x%x", ecc_addr); "ecc error address: 0x%x", ecc_addr);
nvgpu_log(g, gpu_dbg_intr, nvgpu_log(g, gpu_dbg_intr,
"ecc error count corrected: %d, uncorrected %d", "ecc error count corrected: %d, uncorrected %d",
g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc], g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter,
g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc]); g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter);
return ret; return ret;
} }
@@ -880,9 +880,9 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc,
gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset, gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset,
gr_gpc0_gpccs_falcon_ecc_status_reset_task_f()); gr_gpc0_gpccs_falcon_ecc_status_reset_task_f());
g->ecc.gr.gpccs_corrected_err_count.counters[gpc] += g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter +=
corrected_delta; corrected_delta;
g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc] += g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter +=
uncorrected_delta; uncorrected_delta;
nvgpu_log(g, gpu_dbg_intr, nvgpu_log(g, gpu_dbg_intr,
"gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
@@ -907,8 +907,8 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc,
nvgpu_log(g, gpu_dbg_intr, nvgpu_log(g, gpu_dbg_intr,
"ecc error count corrected: %d, uncorrected %d", "ecc error count corrected: %d, uncorrected %d",
g->ecc.gr.gpccs_corrected_err_count.counters[gpc], g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter,
g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc]); g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter);
return ret; return ret;
} }
@@ -2419,9 +2419,9 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
gk20a_writel(g, gr_fecs_falcon_ecc_status_r(), gk20a_writel(g, gr_fecs_falcon_ecc_status_r(),
gr_fecs_falcon_ecc_status_reset_task_f()); gr_fecs_falcon_ecc_status_reset_task_f());
g->ecc.gr.fecs_corrected_err_count.counters[0] += g->ecc.gr.fecs_ecc_corrected_err_count[0].counter +=
corrected_delta; corrected_delta;
g->ecc.gr.fecs_uncorrected_err_count.counters[0] += g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter +=
uncorrected_delta; uncorrected_delta;
nvgpu_log(g, gpu_dbg_intr, nvgpu_log(g, gpu_dbg_intr,
@@ -2450,8 +2450,8 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
nvgpu_log(g, gpu_dbg_intr, nvgpu_log(g, gpu_dbg_intr,
"ecc error count corrected: %d, uncorrected %d", "ecc error count corrected: %d, uncorrected %d",
g->ecc.gr.fecs_corrected_err_count.counters[0], g->ecc.gr.fecs_ecc_corrected_err_count[0].counter,
g->ecc.gr.fecs_uncorrected_err_count.counters[0]); g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
} }
} }

View File

@@ -84,6 +84,7 @@
#include "regops_gv11b.h" #include "regops_gv11b.h"
#include "subctx_gv11b.h" #include "subctx_gv11b.h"
#include "therm_gv11b.h" #include "therm_gv11b.h"
#include "ecc_gv11b.h"
#include <nvgpu/ptimer.h> #include <nvgpu/ptimer.h>
#include <nvgpu/debug.h> #include <nvgpu/debug.h>
@@ -369,10 +370,7 @@ static const struct gpu_ops gv11b_ops = {
.update_boosted_ctx = gr_gp10b_update_boosted_ctx, .update_boosted_ctx = gr_gp10b_update_boosted_ctx,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
#ifdef CONFIG_SYSFS .init_ecc = gv11b_ecc_init,
.create_gr_sysfs = gr_gv11b_create_sysfs,
.remove_gr_sysfs = gr_gv11b_remove_sysfs,
#endif
.set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode, .set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode,
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr, .is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,

View File

@@ -90,13 +90,11 @@ void gv11b_ltc_isr(struct gk20a *g)
u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
u32 corrected_delta, uncorrected_delta; u32 corrected_delta, uncorrected_delta;
u32 corrected_overflow, uncorrected_overflow; u32 corrected_overflow, uncorrected_overflow;
u32 ltc_corrected, ltc_uncorrected;
mc_intr = gk20a_readl(g, mc_intr_ltc_r()); mc_intr = gk20a_readl(g, mc_intr_ltc_r());
for (ltc = 0; ltc < g->ltc_count; ltc++) { for (ltc = 0; ltc < g->ltc_count; ltc++) {
if ((mc_intr & 1U << ltc) == 0) if ((mc_intr & 1U << ltc) == 0)
continue; continue;
ltc_corrected = ltc_uncorrected = 0U;
for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
u32 offset = ltc_stride * ltc + lts_stride * slice; u32 offset = ltc_stride * ltc + lts_stride * slice;
@@ -150,8 +148,8 @@ void gv11b_ltc_isr(struct gk20a *g)
if (uncorrected_overflow) if (uncorrected_overflow)
uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s()); uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
ltc_corrected += corrected_delta; g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta;
ltc_uncorrected += uncorrected_delta; g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta;
nvgpu_log(g, gpu_dbg_intr, nvgpu_log(g, gpu_dbg_intr,
"ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3); "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
@@ -177,10 +175,6 @@ void gv11b_ltc_isr(struct gk20a *g)
} }
} }
g->ecc.ltc.l2_cache_corrected_err_count.counters[ltc] +=
ltc_corrected;
g->ecc.ltc.l2_cache_uncorrected_err_count.counters[ltc] +=
ltc_uncorrected;
} }

View File

@@ -343,8 +343,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
if (uncorrected_overflow) if (uncorrected_overflow)
uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s()); uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s());
g->ecc.pmu.pmu_corrected_err_count.counters[0] += corrected_delta; g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter += corrected_delta;
g->ecc.pmu.pmu_uncorrected_err_count.counters[0] += uncorrected_delta; g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter += uncorrected_delta;
nvgpu_log(g, gpu_dbg_intr, nvgpu_log(g, gpu_dbg_intr,
"pmu ecc interrupt intr1: 0x%x", intr1); "pmu ecc interrupt intr1: 0x%x", intr1);
@@ -371,8 +371,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
nvgpu_log(g, gpu_dbg_intr, nvgpu_log(g, gpu_dbg_intr,
"ecc error count corrected: %d, uncorrected %d", "ecc error count corrected: %d, uncorrected %d",
g->ecc.pmu.pmu_corrected_err_count.counters[0], g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter,
g->ecc.pmu.pmu_uncorrected_err_count.counters[0]); g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
} }
} }
} }

View File

@@ -0,0 +1,162 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_ECC_H
#define NVGPU_ECC_H
#include <nvgpu/types.h>
#include <nvgpu/list.h>
#define NVGPU_ECC_STAT_NAME_MAX_SIZE 100
struct gk20a;
struct nvgpu_ecc_stat {
char name[NVGPU_ECC_STAT_NAME_MAX_SIZE];
u32 counter;
struct nvgpu_list_node node;
};
static inline struct nvgpu_ecc_stat *nvgpu_ecc_stat_from_node(
struct nvgpu_list_node *node)
{
return (struct nvgpu_ecc_stat *)(
(uintptr_t)node - offsetof(struct nvgpu_ecc_stat, node)
);
}
struct nvgpu_ecc {
struct {
/* stats per tpc */
struct nvgpu_ecc_stat **sm_lrf_ecc_single_err_count;
struct nvgpu_ecc_stat **sm_lrf_ecc_double_err_count;
struct nvgpu_ecc_stat **sm_shm_ecc_sec_count;
struct nvgpu_ecc_stat **sm_shm_ecc_sed_count;
struct nvgpu_ecc_stat **sm_shm_ecc_ded_count;
struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe0_count;
struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe0_count;
struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe0_count;
struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe0_count;
struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe1_count;
struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe1_count;
struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe1_count;
struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe1_count;
struct nvgpu_ecc_stat **sm_l1_tag_ecc_corrected_err_count;
struct nvgpu_ecc_stat **sm_l1_tag_ecc_uncorrected_err_count;
struct nvgpu_ecc_stat **sm_cbu_ecc_corrected_err_count;
struct nvgpu_ecc_stat **sm_cbu_ecc_uncorrected_err_count;
struct nvgpu_ecc_stat **sm_l1_data_ecc_corrected_err_count;
struct nvgpu_ecc_stat **sm_l1_data_ecc_uncorrected_err_count;
struct nvgpu_ecc_stat **sm_icache_ecc_corrected_err_count;
struct nvgpu_ecc_stat **sm_icache_ecc_uncorrected_err_count;
/* stats per gpc */
struct nvgpu_ecc_stat *gcc_l15_ecc_corrected_err_count;
struct nvgpu_ecc_stat *gcc_l15_ecc_uncorrected_err_count;
struct nvgpu_ecc_stat *gpccs_ecc_corrected_err_count;
struct nvgpu_ecc_stat *gpccs_ecc_uncorrected_err_count;
struct nvgpu_ecc_stat *mmu_l1tlb_ecc_corrected_err_count;
struct nvgpu_ecc_stat *mmu_l1tlb_ecc_uncorrected_err_count;
/* stats per device */
struct nvgpu_ecc_stat *fecs_ecc_corrected_err_count;
struct nvgpu_ecc_stat *fecs_ecc_uncorrected_err_count;
} gr;
struct {
/* stats per lts */
struct nvgpu_ecc_stat **ecc_sec_count;
struct nvgpu_ecc_stat **ecc_ded_count;
} ltc;
struct {
/* stats per device */
struct nvgpu_ecc_stat *mmu_l2tlb_ecc_corrected_err_count;
struct nvgpu_ecc_stat *mmu_l2tlb_ecc_uncorrected_err_count;
struct nvgpu_ecc_stat *mmu_hubtlb_ecc_corrected_err_count;
struct nvgpu_ecc_stat *mmu_hubtlb_ecc_uncorrected_err_count;
struct nvgpu_ecc_stat *mmu_fillunit_ecc_corrected_err_count;
struct nvgpu_ecc_stat *mmu_fillunit_ecc_uncorrected_err_count;
} fb;
struct {
/* stats per device */
struct nvgpu_ecc_stat *pmu_ecc_corrected_err_count;
struct nvgpu_ecc_stat *pmu_ecc_uncorrected_err_count;
} pmu;
struct {
/* stats per fbpa */
struct nvgpu_ecc_stat *fbpa_ecc_sec_err_count;
struct nvgpu_ecc_stat *fbpa_ecc_ded_err_count;
} fbpa;
struct nvgpu_list_node stats_list;
int stats_count;
};
int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
struct nvgpu_ecc_stat ***stat, const char *name);
#define NVGPU_ECC_COUNTER_INIT_PER_TPC(stat) \
nvgpu_ecc_counter_init_per_tpc(g, &g->ecc.gr.stat, #stat)
int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
struct nvgpu_ecc_stat **stat, const char *name);
#define NVGPU_ECC_COUNTER_INIT_PER_GPC(stat) \
nvgpu_ecc_counter_init_per_gpc(g, &g->ecc.gr.stat, #stat)
int nvgpu_ecc_counter_init(struct gk20a *g,
struct nvgpu_ecc_stat **stat, const char *name);
#define NVGPU_ECC_COUNTER_INIT_GR(stat) \
nvgpu_ecc_counter_init(g, &g->ecc.gr.stat, #stat)
#define NVGPU_ECC_COUNTER_INIT_FB(stat) \
nvgpu_ecc_counter_init(g, &g->ecc.fb.stat, #stat)
#define NVGPU_ECC_COUNTER_INIT_PMU(stat) \
nvgpu_ecc_counter_init(g, &g->ecc.pmu.stat, #stat)
int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
struct nvgpu_ecc_stat ***stat, const char *name);
#define NVGPU_ECC_COUNTER_INIT_PER_LTS(stat) \
nvgpu_ecc_counter_init_per_lts(g, &g->ecc.ltc.stat, #stat)
int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g,
struct nvgpu_ecc_stat **stat, const char *name);
#define NVGPU_ECC_COUNTER_INIT_PER_FBPA(stat) \
nvgpu_ecc_counter_init_per_fbpa(g, &g->ecc.fbpa.stat, #stat)
void nvgpu_ecc_free(struct gk20a *g);
int nvgpu_ecc_init_support(struct gk20a *g);
void nvgpu_ecc_remove_support(struct gk20a *g);
/* OSes to implement */
int nvgpu_ecc_sysfs_init(struct gk20a *g);
void nvgpu_ecc_sysfs_remove(struct gk20a *g);
#endif

View File

@@ -0,0 +1,80 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/ecc.h>
#include "gk20a/gk20a.h"
#include "os_linux.h"
int nvgpu_ecc_sysfs_init(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
struct nvgpu_ecc *ecc = &g->ecc;
struct dev_ext_attribute *attr;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct nvgpu_ecc_stat *stat;
int i = 0, err;
attr = nvgpu_kzalloc(g, sizeof(*attr) * ecc->stats_count);
if (!attr)
return -ENOMEM;
nvgpu_list_for_each_entry(stat,
&ecc->stats_list, nvgpu_ecc_stat, node) {
if (i >= ecc->stats_count) {
err = -EINVAL;
nvgpu_err(g, "stats_list longer than stats_count %d",
ecc->stats_count);
break;
}
sysfs_attr_init(&attr[i].attr);
attr[i].attr.attr.name = stat->name;
attr[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
attr[i].var = &stat->counter;
attr[i].attr.show = device_show_int;
err = device_create_file(dev, &attr[i].attr);
if (err) {
nvgpu_err(g, "sysfs node create failed for %s\n",
stat->name);
break;
}
i++;
}
if (err) {
while (i-- > 0)
device_remove_file(dev, &attr[i].attr);
nvgpu_kfree(g, attr);
return err;
}
l->ecc_attrs = attr;
return 0;
}
void nvgpu_ecc_sysfs_remove(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct nvgpu_ecc *ecc = &g->ecc;
int i;
for (i = 0; i < ecc->stats_count; i++)
device_remove_file(dev, &l->ecc_attrs[i].attr);
nvgpu_kfree(g, l->ecc_attrs);
l->ecc_attrs = NULL;
}

View File

@@ -141,6 +141,7 @@ struct nvgpu_os_linux {
struct dentry *debugfs_dump_ctxsw_stats; struct dentry *debugfs_dump_ctxsw_stats;
#endif #endif
DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5); DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5);
struct dev_ext_attribute *ecc_attrs;
struct gk20a_cde_app cde_app; struct gk20a_cde_app cde_app;

View File

@@ -52,11 +52,6 @@ static int nvgpu_pci_tegra_probe(struct device *dev)
static int nvgpu_pci_tegra_remove(struct device *dev) static int nvgpu_pci_tegra_remove(struct device *dev)
{ {
struct gk20a *g = get_gk20a(dev);
if (g->ops.gr.remove_gr_sysfs)
g->ops.gr.remove_gr_sysfs(g);
return 0; return 0;
} }

View File

@@ -1,269 +0,0 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/hashtable.h>
#include <nvgpu/kmem.h>
#include <nvgpu/bug.h>
#include <nvgpu/hashtable.h>
#include "os_linux.h"
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "platform_gk20a_tegra.h"
#include "platform_gp10b.h"
#include "platform_gp10b_tegra.h"
#include "platform_ecc_sysfs.h"
static u32 gen_ecc_hash_key(char *str)
{
int i = 0;
u32 hash_key = 0x811c9dc5;
while (str[i]) {
hash_key *= 0x1000193;
hash_key ^= (u32)(str[i]);
i++;
};
return hash_key;
}
static ssize_t ecc_stat_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
const char *ecc_stat_full_name = attr->attr.name;
const char *ecc_stat_base_name;
unsigned int hw_unit;
unsigned int subunit;
struct gk20a_ecc_stat *ecc_stat;
u32 hash_key;
struct gk20a *g = get_gk20a(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit,
&subunit) == 2) {
ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]);
hw_unit = g->gr.slices_per_ltc * hw_unit + subunit;
} else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) {
ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]);
} else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) {
ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]);
} else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) {
ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]);
} else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) {
ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]);
} else {
return snprintf(buf,
PAGE_SIZE,
"Error: Invalid ECC stat name!\n");
}
hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name);
hash_for_each_possible(l->ecc_sysfs_stats_htable,
ecc_stat,
hash_node,
hash_key) {
if (hw_unit >= ecc_stat->count)
continue;
if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit]))
return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]);
}
return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n");
}
int nvgpu_gr_ecc_stat_create(struct device *dev,
int is_l2, char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat)
{
struct gk20a *g = get_gk20a(dev);
char *ltc_unit_name = "ltc";
char *gr_unit_name = "gpc0_tpc";
char *lts_unit_name = "lts";
int num_hw_units = 0;
int num_subunits = 0;
if (is_l2 == 1)
num_hw_units = g->ltc_count;
else if (is_l2 == 2) {
num_hw_units = g->ltc_count;
num_subunits = g->gr.slices_per_ltc;
} else
num_hw_units = g->gr.tpc_count;
return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits,
is_l2 ? ltc_unit_name : gr_unit_name,
num_subunits ? lts_unit_name: NULL,
ecc_stat_name,
ecc_stat);
}
int nvgpu_ecc_stat_create(struct device *dev,
int num_hw_units, int num_subunits,
char *ecc_unit_name, char *ecc_subunit_name,
char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat)
{
int error = 0;
struct gk20a *g = get_gk20a(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
int hw_unit = 0;
int subunit = 0;
int element = 0;
u32 hash_key = 0;
struct device_attribute *dev_attr_array;
int num_elements = num_subunits ? num_subunits * num_hw_units :
num_hw_units;
/* Allocate arrays */
dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) *
num_elements);
ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements);
ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements);
for (hw_unit = 0; hw_unit < num_elements; hw_unit++) {
ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) *
ECC_STAT_NAME_MAX_SIZE);
}
ecc_stat->count = num_elements;
if (num_subunits) {
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
for (subunit = 0; subunit < num_subunits; subunit++) {
element = hw_unit*num_subunits + subunit;
snprintf(ecc_stat->names[element],
ECC_STAT_NAME_MAX_SIZE,
"%s%d_%s%d_%s",
ecc_unit_name,
hw_unit,
ecc_subunit_name,
subunit,
ecc_stat_name);
sysfs_attr_init(&dev_attr_array[element].attr);
dev_attr_array[element].attr.name =
ecc_stat->names[element];
dev_attr_array[element].attr.mode =
VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
dev_attr_array[element].show = ecc_stat_show;
dev_attr_array[element].store = NULL;
/* Create sysfs file */
error |= device_create_file(dev,
&dev_attr_array[element]);
}
}
} else {
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
/* Fill in struct device_attribute members */
snprintf(ecc_stat->names[hw_unit],
ECC_STAT_NAME_MAX_SIZE,
"%s%d_%s",
ecc_unit_name,
hw_unit,
ecc_stat_name);
sysfs_attr_init(&dev_attr_array[hw_unit].attr);
dev_attr_array[hw_unit].attr.name =
ecc_stat->names[hw_unit];
dev_attr_array[hw_unit].attr.mode =
VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
dev_attr_array[hw_unit].show = ecc_stat_show;
dev_attr_array[hw_unit].store = NULL;
/* Create sysfs file */
error |= device_create_file(dev,
&dev_attr_array[hw_unit]);
}
}
/* Add hash table entry */
hash_key = gen_ecc_hash_key(ecc_stat_name);
hash_add(l->ecc_sysfs_stats_htable,
&ecc_stat->hash_node,
hash_key);
ecc_stat->attr_array = dev_attr_array;
return error;
}
void nvgpu_gr_ecc_stat_remove(struct device *dev,
int is_l2, struct gk20a_ecc_stat *ecc_stat)
{
struct gk20a *g = get_gk20a(dev);
int num_hw_units = 0;
int num_subunits = 0;
if (is_l2 == 1)
num_hw_units = g->ltc_count;
else if (is_l2 == 2) {
num_hw_units = g->ltc_count;
num_subunits = g->gr.slices_per_ltc;
} else
num_hw_units = g->gr.tpc_count;
nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat);
}
void nvgpu_ecc_stat_remove(struct device *dev,
int num_hw_units, int num_subunits,
struct gk20a_ecc_stat *ecc_stat)
{
struct gk20a *g = get_gk20a(dev);
struct device_attribute *dev_attr_array = ecc_stat->attr_array;
int hw_unit = 0;
int subunit = 0;
int element = 0;
int num_elements = num_subunits ? num_subunits * num_hw_units :
num_hw_units;
/* Remove sysfs files */
if (num_subunits) {
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
for (subunit = 0; subunit < num_subunits; subunit++) {
element = hw_unit * num_subunits + subunit;
device_remove_file(dev,
&dev_attr_array[element]);
}
}
} else {
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++)
device_remove_file(dev, &dev_attr_array[hw_unit]);
}
/* Remove hash table entry */
hash_del(&ecc_stat->hash_node);
/* Free arrays */
nvgpu_kfree(g, ecc_stat->counters);
for (hw_unit = 0; hw_unit < num_elements; hw_unit++)
nvgpu_kfree(g, ecc_stat->names[hw_unit]);
nvgpu_kfree(g, ecc_stat->names);
nvgpu_kfree(g, dev_attr_array);
}

View File

@@ -1,37 +0,0 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _NVGPU_PLATFORM_SYSFS_H_
#define _NVGPU_PLATFORM_SYSFS_H_
#include "gp10b/gr_gp10b.h"
#define ECC_STAT_NAME_MAX_SIZE 100
int nvgpu_gr_ecc_stat_create(struct device *dev,
int is_l2, char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat);
int nvgpu_ecc_stat_create(struct device *dev,
int num_hw_units, int num_subunits,
char *ecc_unit_name, char *ecc_subunit_name,
char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat);
void nvgpu_gr_ecc_stat_remove(struct device *dev,
int is_l2, struct gk20a_ecc_stat *ecc_stat);
void nvgpu_ecc_stat_remove(struct device *dev,
int num_hw_units, int num_subunits,
struct gk20a_ecc_stat *ecc_stat);
#endif

View File

@@ -41,7 +41,6 @@
#include "gk20a/gk20a.h" #include "gk20a/gk20a.h"
#include "platform_gk20a.h" #include "platform_gk20a.h"
#include "platform_ecc_sysfs.h"
#include "platform_gk20a_tegra.h" #include "platform_gk20a_tegra.h"
#include "platform_gp10b.h" #include "platform_gp10b.h"
#include "platform_gp10b_tegra.h" #include "platform_gp10b_tegra.h"
@@ -177,11 +176,6 @@ static int gp10b_tegra_late_probe(struct device *dev)
static int gp10b_tegra_remove(struct device *dev) static int gp10b_tegra_remove(struct device *dev)
{ {
struct gk20a *g = get_gk20a(dev);
if (g->ops.gr.remove_gr_sysfs)
g->ops.gr.remove_gr_sysfs(g);
/* deinitialise tegra specific scaling quirks */ /* deinitialise tegra specific scaling quirks */
gp10b_tegra_scale_exit(dev); gp10b_tegra_scale_exit(dev);
@@ -476,162 +470,3 @@ struct gk20a_platform gp10b_tegra_platform = {
.secure_buffer_size = 401408, .secure_buffer_size = 401408,
}; };
void gr_gp10b_create_sysfs(struct gk20a *g)
{
int error = 0;
struct device *dev = dev_from_gk20a(g);
/* This stat creation function is called on GR init. GR can get
initialized multiple times but we only need to create the ECC
stats once. Therefore, add the following check to avoid
creating duplicate stat sysfs nodes. */
if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL)
return;
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_lrf_ecc_single_err_count",
&g->ecc.gr.sm_lrf_single_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_lrf_ecc_double_err_count",
&g->ecc.gr.sm_lrf_double_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_shm_ecc_sec_count",
&g->ecc.gr.sm_shm_sec_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_shm_ecc_sed_count",
&g->ecc.gr.sm_shm_sed_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_shm_ecc_ded_count",
&g->ecc.gr.sm_shm_ded_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_total_sec_pipe0_count",
&g->ecc.gr.tex_total_sec_pipe0_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_total_ded_pipe0_count",
&g->ecc.gr.tex_total_ded_pipe0_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_unique_sec_pipe0_count",
&g->ecc.gr.tex_unique_sec_pipe0_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_unique_ded_pipe0_count",
&g->ecc.gr.tex_unique_ded_pipe0_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_total_sec_pipe1_count",
&g->ecc.gr.tex_total_sec_pipe1_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_total_ded_pipe1_count",
&g->ecc.gr.tex_total_ded_pipe1_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_unique_sec_pipe1_count",
&g->ecc.gr.tex_unique_sec_pipe1_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_unique_ded_pipe1_count",
&g->ecc.gr.tex_unique_ded_pipe1_count);
error |= nvgpu_gr_ecc_stat_create(dev,
2,
"ecc_sec_count",
&g->ecc.ltc.l2_sec_count);
error |= nvgpu_gr_ecc_stat_create(dev,
2,
"ecc_ded_count",
&g->ecc.ltc.l2_ded_count);
if (error)
dev_err(dev, "Failed to create sysfs attributes!\n");
}
void gr_gp10b_remove_sysfs(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
if (!g->ecc.gr.sm_lrf_single_err_count.counters)
return;
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_lrf_single_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_lrf_double_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_shm_sec_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_shm_sed_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_shm_ded_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_sec_pipe0_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_ded_pipe0_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_sec_pipe0_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_ded_pipe0_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_sec_pipe1_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_ded_pipe1_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_sec_pipe1_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_ded_pipe1_count);
nvgpu_gr_ecc_stat_remove(dev,
2,
&g->ecc.ltc.l2_sec_count);
nvgpu_gr_ecc_stat_remove(dev,
2,
&g->ecc.ltc.l2_ded_count);
}

View File

@@ -18,6 +18,5 @@
#define _PLATFORM_GP10B_TEGRA_H_ #define _PLATFORM_GP10B_TEGRA_H_
#include "gp10b/gr_gp10b.h" #include "gp10b/gr_gp10b.h"
#include "platform_ecc_sysfs.h"
#endif #endif

View File

@@ -39,7 +39,6 @@
#include "platform_gp10b.h" #include "platform_gp10b.h"
#include "platform_gp10b_tegra.h" #include "platform_gp10b_tegra.h"
#include "platform_ecc_sysfs.h"
#include "os_linux.h" #include "os_linux.h"
#include "platform_gk20a_tegra.h" #include "platform_gk20a_tegra.h"
@@ -94,11 +93,6 @@ static int gv11b_tegra_late_probe(struct device *dev)
static int gv11b_tegra_remove(struct device *dev) static int gv11b_tegra_remove(struct device *dev)
{ {
struct gk20a *g = get_gk20a(dev);
if (g->ops.gr.remove_gr_sysfs)
g->ops.gr.remove_gr_sysfs(g);
gv11b_tegra_scale_exit(dev); gv11b_tegra_scale_exit(dev);
#ifdef CONFIG_TEGRA_GK20A_NVHOST #ifdef CONFIG_TEGRA_GK20A_NVHOST
@@ -261,328 +255,3 @@ struct gk20a_platform gv11b_tegra_platform = {
.secure_buffer_size = 667648, .secure_buffer_size = 667648,
}; };
void gr_gv11b_create_sysfs(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
int error = 0;
/* This stat creation function is called on GR init. GR can get
initialized multiple times but we only need to create the ECC
stats once. Therefore, add the following check to avoid
creating duplicate stat sysfs nodes. */
if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL)
return;
gr_gp10b_create_sysfs(g);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_l1_tag_ecc_corrected_err_count",
&g->ecc.gr.sm_l1_tag_corrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_l1_tag_ecc_uncorrected_err_count",
&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_cbu_ecc_corrected_err_count",
&g->ecc.gr.sm_cbu_corrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_cbu_ecc_uncorrected_err_count",
&g->ecc.gr.sm_cbu_uncorrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_l1_data_ecc_corrected_err_count",
&g->ecc.gr.sm_l1_data_corrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_l1_data_ecc_uncorrected_err_count",
&g->ecc.gr.sm_l1_data_uncorrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_icache_ecc_corrected_err_count",
&g->ecc.gr.sm_icache_corrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_icache_ecc_uncorrected_err_count",
&g->ecc.gr.sm_icache_uncorrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"gcc_l15_ecc_corrected_err_count",
&g->ecc.gr.gcc_l15_corrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"gcc_l15_ecc_uncorrected_err_count",
&g->ecc.gr.gcc_l15_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
g->ltc_count,
0,
"ltc",
NULL,
"l2_cache_uncorrected_err_count",
&g->ecc.ltc.l2_cache_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
g->ltc_count,
0,
"ltc",
NULL,
"l2_cache_corrected_err_count",
&g->ecc.ltc.l2_cache_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"gpc",
NULL,
"fecs_ecc_uncorrected_err_count",
&g->ecc.gr.fecs_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"gpc",
NULL,
"fecs_ecc_corrected_err_count",
&g->ecc.gr.fecs_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
g->gr.gpc_count,
0,
"gpc",
NULL,
"gpccs_ecc_uncorrected_err_count",
&g->ecc.gr.gpccs_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
g->gr.gpc_count,
0,
"gpc",
NULL,
"gpccs_ecc_corrected_err_count",
&g->ecc.gr.gpccs_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
g->gr.gpc_count,
0,
"gpc",
NULL,
"mmu_l1tlb_ecc_uncorrected_err_count",
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
g->gr.gpc_count,
0,
"gpc",
NULL,
"mmu_l1tlb_ecc_corrected_err_count",
&g->ecc.gr.mmu_l1tlb_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_l2tlb_ecc_uncorrected_err_count",
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_l2tlb_ecc_corrected_err_count",
&g->ecc.fb.mmu_l2tlb_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_hubtlb_ecc_uncorrected_err_count",
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_hubtlb_ecc_corrected_err_count",
&g->ecc.fb.mmu_hubtlb_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_fillunit_ecc_uncorrected_err_count",
&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_fillunit_ecc_corrected_err_count",
&g->ecc.fb.mmu_fillunit_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"pmu_ecc_uncorrected_err_count",
&g->ecc.pmu.pmu_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"pmu_ecc_corrected_err_count",
&g->ecc.pmu.pmu_corrected_err_count);
if (error)
dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
}
void gr_gv11b_remove_sysfs(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters)
return;
gr_gp10b_remove_sysfs(g);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_l1_tag_corrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_cbu_corrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_cbu_uncorrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_l1_data_corrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_l1_data_uncorrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_icache_corrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_icache_uncorrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.gcc_l15_corrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.gcc_l15_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
g->ltc_count,
0,
&g->ecc.ltc.l2_cache_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
g->ltc_count,
0,
&g->ecc.ltc.l2_cache_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.gr.fecs_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.gr.fecs_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
g->gr.gpc_count,
0,
&g->ecc.gr.gpccs_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
g->gr.gpc_count,
0,
&g->ecc.gr.gpccs_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
g->gr.gpc_count,
0,
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
g->gr.gpc_count,
0,
&g->ecc.gr.mmu_l1tlb_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.fb.mmu_l2tlb_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.fb.mmu_hubtlb_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.fb.mmu_fillunit_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.pmu.pmu_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.pmu.pmu_corrected_err_count);
}

View File

@@ -215,10 +215,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.update_boosted_ctx = NULL, .update_boosted_ctx = NULL,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
#ifdef CONFIG_SYSFS
.create_gr_sysfs = gr_gp10b_create_sysfs,
.remove_gr_sysfs = gr_gp10b_remove_sysfs,
#endif
.set_ctxsw_preemption_mode = .set_ctxsw_preemption_mode =
vgpu_gr_gp10b_set_ctxsw_preemption_mode, vgpu_gr_gp10b_set_ctxsw_preemption_mode,
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,

View File

@@ -232,10 +232,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.update_boosted_ctx = NULL, .update_boosted_ctx = NULL,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
#ifdef CONFIG_SYSFS
.create_gr_sysfs = gr_gv11b_create_sysfs,
.remove_gr_sysfs = gr_gv11b_remove_sysfs,
#endif
.set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode, .set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode,
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr, .is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,