mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: rework ecc structure and sysfs
- create common file common/ecc.c which include common functions for add ecc counters and remove counters. - common code will create a list of all counter which make it easier to iterate all counters. - Add chip specific file for adding ecc counters. - add linux specific file os/linux/ecc_sysfs.c to export counters to sysfs. - remove obsolete code - MISRA violation for using snprintf is not solved, tracking with jira NVGPU-859 Jira NVGPUT-115 Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1763536 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
5ff1b3fe5a
commit
7f14aafc2c
@@ -69,7 +69,8 @@ nvgpu-y += \
|
||||
os/linux/sim_pci.o \
|
||||
os/linux/os_sched.o \
|
||||
os/linux/nvlink.o \
|
||||
os/linux/dt.o
|
||||
os/linux/dt.o \
|
||||
os/linux/ecc_sysfs.o
|
||||
|
||||
nvgpu-$(CONFIG_GK20A_VIDMEM) += \
|
||||
os/linux/dmabuf_vidmem.o
|
||||
@@ -100,7 +101,6 @@ nvgpu-$(CONFIG_TEGRA_GK20A) += \
|
||||
os/linux/module_usermode.o \
|
||||
os/linux/soc.o \
|
||||
os/linux/fuse.o \
|
||||
os/linux/platform_ecc_sysfs.o \
|
||||
os/linux/platform_gk20a_tegra.o \
|
||||
os/linux/platform_gp10b_tegra.o \
|
||||
os/linux/platform_gv11b_tegra.o
|
||||
@@ -185,6 +185,7 @@ nvgpu-y += \
|
||||
common/sim.o \
|
||||
common/sim_pci.o \
|
||||
common/fifo/submit.o \
|
||||
common/ecc.o \
|
||||
gk20a/gk20a.o \
|
||||
gk20a/ce2_gk20a.o \
|
||||
gk20a/fifo_gk20a.o \
|
||||
@@ -267,6 +268,7 @@ nvgpu-y += \
|
||||
gp10b/priv_ring_gp10b.o \
|
||||
gp10b/gp10b.o \
|
||||
gp10b/fuse_gp10b.o \
|
||||
gp10b/ecc_gp10b.o \
|
||||
gp106/hal_gp106.o \
|
||||
gp106/mm_gp106.o \
|
||||
gp106/flcn_gp106.o \
|
||||
@@ -296,6 +298,7 @@ nvgpu-y += \
|
||||
gv11b/subctx_gv11b.o \
|
||||
gv11b/regops_gv11b.o \
|
||||
gv11b/therm_gv11b.o \
|
||||
gv11b/ecc_gv11b.o \
|
||||
gv100/mm_gv100.o \
|
||||
gv100/gr_ctx_gv100.o \
|
||||
gv100/bios_gv100.o \
|
||||
|
||||
@@ -49,6 +49,7 @@ srcs := common/mm/nvgpu_allocator.c \
|
||||
common/rbtree.c \
|
||||
common/ltc.c \
|
||||
common/io_common.c \
|
||||
common/ecc.c \
|
||||
common/vbios/bios.c \
|
||||
common/falcon/falcon.c \
|
||||
common/pmu/pmu.c \
|
||||
@@ -166,6 +167,7 @@ srcs := common/mm/nvgpu_allocator.c \
|
||||
gp10b/priv_ring_gp10b.c \
|
||||
gp10b/gp10b.c \
|
||||
gp10b/fuse_gp10b.c \
|
||||
gp10b/ecc_gp10b.c \
|
||||
gv11b/gv11b.c \
|
||||
gv11b/dbg_gpu_gv11b.c \
|
||||
gv11b/mc_gv11b.c \
|
||||
@@ -181,6 +183,7 @@ srcs := common/mm/nvgpu_allocator.c \
|
||||
gv11b/subctx_gv11b.c \
|
||||
gv11b/regops_gv11b.c \
|
||||
gv11b/therm_gv11b.c \
|
||||
gv11b/ecc_gv11b.c \
|
||||
gp106/hal_gp106.c \
|
||||
gp106/mm_gp106.c \
|
||||
gp106/flcn_gp106.c \
|
||||
|
||||
369
drivers/gpu/nvgpu/common/ecc.c
Normal file
369
drivers/gpu/nvgpu/common/ecc.c
Normal file
@@ -0,0 +1,369 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat)
|
||||
{
|
||||
struct nvgpu_ecc *ecc = &g->ecc;
|
||||
|
||||
nvgpu_init_list_node(&stat->node);
|
||||
|
||||
nvgpu_list_add_tail(&stat->node, &ecc->stats_list);
|
||||
ecc->stats_count++;
|
||||
}
|
||||
|
||||
static void nvgpu_ecc_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_ecc *ecc = &g->ecc;
|
||||
|
||||
nvgpu_init_list_node(&ecc->stats_list);
|
||||
}
|
||||
|
||||
int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat ***stat, const char *name)
|
||||
{
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
struct nvgpu_ecc_stat **stats;
|
||||
u32 gpc, tpc;
|
||||
int err = 0;
|
||||
|
||||
stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
|
||||
if (stats == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
|
||||
stats[gpc] = nvgpu_kzalloc(g,
|
||||
sizeof(*stats[gpc]) * gr->gpc_tpc_count[gpc]);
|
||||
if (stats[gpc] == NULL) {
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err != 0) {
|
||||
while (gpc-- != 0u) {
|
||||
nvgpu_kfree(g, stats[gpc]);
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, stats);
|
||||
return err;
|
||||
}
|
||||
|
||||
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
|
||||
for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
|
||||
snprintf(stats[gpc][tpc].name,
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE,
|
||||
"gpc%d_tpc%d_%s", gpc, tpc, name);
|
||||
nvgpu_ecc_stat_add(g, &stats[gpc][tpc]);
|
||||
}
|
||||
}
|
||||
|
||||
*stat = stats;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat **stat, const char *name)
|
||||
{
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
struct nvgpu_ecc_stat *stats;
|
||||
u32 gpc;
|
||||
|
||||
stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
|
||||
if (stats == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
|
||||
snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
|
||||
"gpc%d_%s", gpc, name);
|
||||
nvgpu_ecc_stat_add(g, &stats[gpc]);
|
||||
}
|
||||
|
||||
*stat = stats;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_ecc_counter_init(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat **stat, const char *name)
|
||||
{
|
||||
struct nvgpu_ecc_stat *stats;
|
||||
|
||||
stats = nvgpu_kzalloc(g, sizeof(*stats));
|
||||
if (stats == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
(void)strncpy(stats->name, name, NVGPU_ECC_STAT_NAME_MAX_SIZE - 1);
|
||||
nvgpu_ecc_stat_add(g, stats);
|
||||
*stat = stats;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat ***stat, const char *name)
|
||||
{
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
struct nvgpu_ecc_stat **stats;
|
||||
u32 ltc, lts;
|
||||
int err = 0;
|
||||
|
||||
stats = nvgpu_kzalloc(g, sizeof(*stats) * g->ltc_count);
|
||||
if (stats == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
for (ltc = 0; ltc < g->ltc_count; ltc++) {
|
||||
stats[ltc] = nvgpu_kzalloc(g,
|
||||
sizeof(*stats[ltc]) * gr->slices_per_ltc);
|
||||
if (stats[ltc] == NULL) {
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err != 0) {
|
||||
while (ltc-- > 0u) {
|
||||
nvgpu_kfree(g, stats[ltc]);
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, stats);
|
||||
return err;
|
||||
}
|
||||
|
||||
for (ltc = 0; ltc < g->ltc_count; ltc++) {
|
||||
for (lts = 0; lts < gr->slices_per_ltc; lts++) {
|
||||
snprintf(stats[ltc][lts].name,
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE,
|
||||
"ltc%d_lts%d_%s", ltc, lts, name);
|
||||
nvgpu_ecc_stat_add(g, &stats[ltc][lts]);
|
||||
}
|
||||
}
|
||||
|
||||
*stat = stats;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat **stat, const char *name)
|
||||
{
|
||||
int i;
|
||||
int num_fbpa = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
|
||||
struct nvgpu_ecc_stat *stats;
|
||||
|
||||
stats = nvgpu_kzalloc(g, sizeof(*stats) * num_fbpa);
|
||||
if (stats == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_fbpa; i++) {
|
||||
snprintf(stats[i].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
|
||||
"fbpa%d_%s", i, name);
|
||||
nvgpu_ecc_stat_add(g, &stats[i]);
|
||||
}
|
||||
|
||||
*stat = stats;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* release all ecc_stat */
|
||||
void nvgpu_ecc_free(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_ecc *ecc = &g->ecc;
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < gr->gpc_count; i++) {
|
||||
if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.sm_lrf_ecc_double_err_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.sm_shm_ecc_sec_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.sm_shm_ecc_sed_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.sm_shm_ecc_ded_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.tex_ecc_total_sec_pipe0_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.tex_ecc_total_ded_pipe0_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.tex_unique_ecc_sec_pipe0_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.tex_unique_ecc_ded_pipe0_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.tex_ecc_total_sec_pipe1_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.tex_ecc_total_ded_pipe1_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.tex_unique_ecc_sec_pipe1_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.tex_unique_ecc_ded_pipe1_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.sm_l1_tag_ecc_corrected_err_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.sm_l1_tag_ecc_uncorrected_err_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.sm_cbu_ecc_corrected_err_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.sm_cbu_ecc_uncorrected_err_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.sm_l1_data_ecc_corrected_err_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.sm_l1_data_ecc_uncorrected_err_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.sm_icache_ecc_corrected_err_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->gr.sm_icache_ecc_uncorrected_err_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count[i]);
|
||||
}
|
||||
}
|
||||
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count);
|
||||
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count);
|
||||
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count);
|
||||
nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count);
|
||||
nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count);
|
||||
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count);
|
||||
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count);
|
||||
nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count);
|
||||
nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count);
|
||||
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count);
|
||||
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count);
|
||||
nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count);
|
||||
|
||||
nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_corrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_uncorrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.gpccs_ecc_corrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.gpccs_ecc_uncorrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_corrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_uncorrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.fecs_ecc_corrected_err_count);
|
||||
nvgpu_kfree(g, ecc->gr.fecs_ecc_uncorrected_err_count);
|
||||
|
||||
for (i = 0; i < g->ltc_count; i++) {
|
||||
if (ecc->ltc.ecc_sec_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->ltc.ecc_sec_count[i]);
|
||||
}
|
||||
|
||||
if (ecc->ltc.ecc_ded_count != NULL) {
|
||||
nvgpu_kfree(g, ecc->ltc.ecc_ded_count[i]);
|
||||
}
|
||||
}
|
||||
nvgpu_kfree(g, ecc->ltc.ecc_sec_count);
|
||||
nvgpu_kfree(g, ecc->ltc.ecc_ded_count);
|
||||
|
||||
nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_corrected_err_count);
|
||||
nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_uncorrected_err_count);
|
||||
nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_corrected_err_count);
|
||||
nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_uncorrected_err_count);
|
||||
nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_corrected_err_count);
|
||||
nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_uncorrected_err_count);
|
||||
|
||||
nvgpu_kfree(g, ecc->pmu.pmu_ecc_corrected_err_count);
|
||||
nvgpu_kfree(g, ecc->pmu.pmu_ecc_uncorrected_err_count);
|
||||
|
||||
nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_sec_err_count);
|
||||
nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_ded_err_count);
|
||||
|
||||
(void)memset(ecc, 0, sizeof(*ecc));
|
||||
}
|
||||
|
||||
int nvgpu_ecc_init_support(struct gk20a *g)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (g->ops.gr.init_ecc == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvgpu_ecc_init(g);
|
||||
err = g->ops.gr.init_ecc(g);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = nvgpu_ecc_sysfs_init(g);
|
||||
if (err != 0) {
|
||||
nvgpu_ecc_free(g);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_ecc_remove_support(struct gk20a *g)
|
||||
{
|
||||
if (g->ops.gr.init_ecc == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
nvgpu_ecc_sysfs_remove(g);
|
||||
nvgpu_ecc_free(g);
|
||||
}
|
||||
@@ -445,9 +445,9 @@ void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status)
|
||||
uncorrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_uncorrected_err_count_total_s());
|
||||
|
||||
|
||||
g->ecc.fb.mmu_l2tlb_corrected_err_count.counters[0] +=
|
||||
g->ecc.fb.mmu_l2tlb_ecc_corrected_err_count[0].counter +=
|
||||
corrected_delta;
|
||||
g->ecc.fb.mmu_l2tlb_uncorrected_err_count.counters[0] +=
|
||||
g->ecc.fb.mmu_l2tlb_ecc_uncorrected_err_count[0].counter +=
|
||||
uncorrected_delta;
|
||||
|
||||
if (ecc_status & fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m())
|
||||
@@ -461,8 +461,8 @@ void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status)
|
||||
"ecc error address: 0x%x", ecc_addr);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ecc error count corrected: %d, uncorrected %d",
|
||||
g->ecc.fb.mmu_l2tlb_corrected_err_count.counters[0],
|
||||
g->ecc.fb.mmu_l2tlb_uncorrected_err_count.counters[0]);
|
||||
g->ecc.fb.mmu_l2tlb_ecc_corrected_err_count[0].counter,
|
||||
g->ecc.fb.mmu_l2tlb_ecc_uncorrected_err_count[0].counter);
|
||||
}
|
||||
|
||||
void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
|
||||
@@ -503,9 +503,9 @@ void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
|
||||
uncorrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_uncorrected_err_count_total_s());
|
||||
|
||||
|
||||
g->ecc.fb.mmu_hubtlb_corrected_err_count.counters[0] +=
|
||||
g->ecc.fb.mmu_hubtlb_ecc_corrected_err_count[0].counter +=
|
||||
corrected_delta;
|
||||
g->ecc.fb.mmu_hubtlb_uncorrected_err_count.counters[0] +=
|
||||
g->ecc.fb.mmu_hubtlb_ecc_uncorrected_err_count[0].counter +=
|
||||
uncorrected_delta;
|
||||
|
||||
if (ecc_status & fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m())
|
||||
@@ -519,8 +519,8 @@ void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
|
||||
"ecc error address: 0x%x", ecc_addr);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ecc error count corrected: %d, uncorrected %d",
|
||||
g->ecc.fb.mmu_hubtlb_corrected_err_count.counters[0],
|
||||
g->ecc.fb.mmu_hubtlb_uncorrected_err_count.counters[0]);
|
||||
g->ecc.fb.mmu_hubtlb_ecc_corrected_err_count[0].counter,
|
||||
g->ecc.fb.mmu_hubtlb_ecc_uncorrected_err_count[0].counter);
|
||||
}
|
||||
|
||||
void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
|
||||
@@ -561,9 +561,9 @@ void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
|
||||
uncorrected_delta += (0x1UL << fb_mmu_fillunit_ecc_uncorrected_err_count_total_s());
|
||||
|
||||
|
||||
g->ecc.fb.mmu_fillunit_corrected_err_count.counters[0] +=
|
||||
g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter +=
|
||||
corrected_delta;
|
||||
g->ecc.fb.mmu_fillunit_uncorrected_err_count.counters[0] +=
|
||||
g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter +=
|
||||
uncorrected_delta;
|
||||
|
||||
if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m())
|
||||
@@ -582,8 +582,8 @@ void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
|
||||
"ecc error address: 0x%x", ecc_addr);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ecc error count corrected: %d, uncorrected %d",
|
||||
g->ecc.fb.mmu_fillunit_corrected_err_count.counters[0],
|
||||
g->ecc.fb.mmu_fillunit_uncorrected_err_count.counters[0]);
|
||||
g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter,
|
||||
g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter);
|
||||
}
|
||||
|
||||
static void gv11b_fb_parse_mmfault(struct mmu_fault_info *mmfault)
|
||||
|
||||
@@ -25,8 +25,19 @@
|
||||
* for an implementation.
|
||||
*/
|
||||
|
||||
#include <nvgpu/ecc.h>
|
||||
|
||||
#include "gk20a/dbg_gpu_gk20a.h"
|
||||
|
||||
void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s)
|
||||
{
|
||||
}
|
||||
|
||||
int nvgpu_ecc_sysfs_init(struct gk20a *g)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_ecc_sysfs_remove(struct gk20a *g)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -1,102 +0,0 @@
|
||||
/*
|
||||
* GK20A ECC
|
||||
*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef ECC_GK20A_H
|
||||
#define ECC_GK20A_H
|
||||
|
||||
struct gk20a_ecc_stat {
|
||||
char **names;
|
||||
u32 *counters;
|
||||
u32 count;
|
||||
#ifdef CONFIG_SYSFS
|
||||
struct hlist_node hash_node;
|
||||
struct device_attribute *attr_array;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct ecc_gk20a {
|
||||
/* Stats per engine */
|
||||
struct {
|
||||
struct gk20a_ecc_stat sm_lrf_single_err_count;
|
||||
struct gk20a_ecc_stat sm_lrf_double_err_count;
|
||||
|
||||
struct gk20a_ecc_stat sm_shm_sec_count;
|
||||
struct gk20a_ecc_stat sm_shm_sed_count;
|
||||
struct gk20a_ecc_stat sm_shm_ded_count;
|
||||
|
||||
struct gk20a_ecc_stat tex_total_sec_pipe0_count;
|
||||
struct gk20a_ecc_stat tex_total_ded_pipe0_count;
|
||||
struct gk20a_ecc_stat tex_unique_sec_pipe0_count;
|
||||
struct gk20a_ecc_stat tex_unique_ded_pipe0_count;
|
||||
struct gk20a_ecc_stat tex_total_sec_pipe1_count;
|
||||
struct gk20a_ecc_stat tex_total_ded_pipe1_count;
|
||||
struct gk20a_ecc_stat tex_unique_sec_pipe1_count;
|
||||
struct gk20a_ecc_stat tex_unique_ded_pipe1_count;
|
||||
|
||||
struct gk20a_ecc_stat sm_l1_tag_corrected_err_count;
|
||||
struct gk20a_ecc_stat sm_l1_tag_uncorrected_err_count;
|
||||
struct gk20a_ecc_stat sm_cbu_corrected_err_count;
|
||||
struct gk20a_ecc_stat sm_cbu_uncorrected_err_count;
|
||||
struct gk20a_ecc_stat sm_l1_data_corrected_err_count;
|
||||
struct gk20a_ecc_stat sm_l1_data_uncorrected_err_count;
|
||||
struct gk20a_ecc_stat sm_icache_corrected_err_count;
|
||||
struct gk20a_ecc_stat sm_icache_uncorrected_err_count;
|
||||
struct gk20a_ecc_stat gcc_l15_corrected_err_count;
|
||||
struct gk20a_ecc_stat gcc_l15_uncorrected_err_count;
|
||||
struct gk20a_ecc_stat fecs_corrected_err_count;
|
||||
struct gk20a_ecc_stat fecs_uncorrected_err_count;
|
||||
struct gk20a_ecc_stat gpccs_corrected_err_count;
|
||||
struct gk20a_ecc_stat gpccs_uncorrected_err_count;
|
||||
struct gk20a_ecc_stat mmu_l1tlb_corrected_err_count;
|
||||
struct gk20a_ecc_stat mmu_l1tlb_uncorrected_err_count;
|
||||
} gr;
|
||||
|
||||
struct {
|
||||
struct gk20a_ecc_stat l2_sec_count;
|
||||
struct gk20a_ecc_stat l2_ded_count;
|
||||
struct gk20a_ecc_stat l2_cache_corrected_err_count;
|
||||
struct gk20a_ecc_stat l2_cache_uncorrected_err_count;
|
||||
} ltc;
|
||||
|
||||
struct {
|
||||
struct gk20a_ecc_stat mmu_l2tlb_corrected_err_count;
|
||||
struct gk20a_ecc_stat mmu_l2tlb_uncorrected_err_count;
|
||||
struct gk20a_ecc_stat mmu_hubtlb_corrected_err_count;
|
||||
struct gk20a_ecc_stat mmu_hubtlb_uncorrected_err_count;
|
||||
struct gk20a_ecc_stat mmu_fillunit_corrected_err_count;
|
||||
struct gk20a_ecc_stat mmu_fillunit_uncorrected_err_count;
|
||||
} fb;
|
||||
|
||||
struct {
|
||||
struct gk20a_ecc_stat pmu_corrected_err_count;
|
||||
struct gk20a_ecc_stat pmu_uncorrected_err_count;
|
||||
} pmu;
|
||||
|
||||
struct {
|
||||
struct gk20a_ecc_stat fbpa_sec_err_count;
|
||||
struct gk20a_ecc_stat fbpa_ded_err_count;
|
||||
} fbpa;
|
||||
|
||||
};
|
||||
|
||||
#endif /*__ECC_GK20A_H__*/
|
||||
@@ -35,7 +35,6 @@ struct gk20a_ctxsw_trace;
|
||||
struct acr_desc;
|
||||
struct nvgpu_mem_alloc_tracker;
|
||||
struct dbg_profiler_object_data;
|
||||
struct ecc_gk20a;
|
||||
struct gk20a_debug_output;
|
||||
struct nvgpu_clk_pll_debug_data;
|
||||
struct nvgpu_nvhost_dev;
|
||||
@@ -64,6 +63,7 @@ struct nvgpu_ctxsw_trace_filter;
|
||||
#include <nvgpu/clk_arb.h>
|
||||
#include <nvgpu/nvlink.h>
|
||||
#include <nvgpu/sim.h>
|
||||
#include <nvgpu/ecc.h>
|
||||
|
||||
#include "clk_gk20a.h"
|
||||
#include "ce2_gk20a.h"
|
||||
@@ -77,7 +77,6 @@ struct nvgpu_ctxsw_trace_filter;
|
||||
#include "perf/perf.h"
|
||||
#include "pmgr/pmgr.h"
|
||||
#include "therm/thrm.h"
|
||||
#include "ecc_gk20a.h"
|
||||
|
||||
/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds.
|
||||
32 ns is the resolution of ptimer. */
|
||||
@@ -384,8 +383,7 @@ struct gpu_ops {
|
||||
u32 gpc_exception);
|
||||
void (*enable_gpc_exceptions)(struct gk20a *g);
|
||||
void (*enable_exceptions)(struct gk20a *g);
|
||||
void (*create_gr_sysfs)(struct gk20a *g);
|
||||
void (*remove_gr_sysfs)(struct gk20a *g);
|
||||
int (*init_ecc)(struct gk20a *g);
|
||||
u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g);
|
||||
int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
u32 sm, struct channel_gk20a *fault_ch);
|
||||
@@ -1385,7 +1383,7 @@ struct gk20a {
|
||||
struct mm_gk20a mm;
|
||||
struct nvgpu_pmu pmu;
|
||||
struct acr_desc acr;
|
||||
struct ecc_gk20a ecc;
|
||||
struct nvgpu_ecc ecc;
|
||||
struct clk_pmupstate clk_pmu;
|
||||
struct perf_pmupstate perf_pmu;
|
||||
struct pmgr_pmupstate pmgr_pmu;
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include <nvgpu/mm.h>
|
||||
#include <nvgpu/ctxsw_trace.h>
|
||||
#include <nvgpu/error_notifier.h>
|
||||
#include <nvgpu/ecc.h>
|
||||
|
||||
#include "gk20a.h"
|
||||
#include "gr_gk20a.h"
|
||||
@@ -3127,6 +3128,8 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
|
||||
gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
|
||||
|
||||
gk20a_comptag_allocator_destroy(g, &gr->comp_tags);
|
||||
|
||||
nvgpu_ecc_remove_support(g);
|
||||
}
|
||||
|
||||
static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
||||
@@ -4872,8 +4875,9 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
|
||||
gr->remove_support = gk20a_remove_gr_support;
|
||||
gr->sw_ready = true;
|
||||
|
||||
if (g->ops.gr.create_gr_sysfs)
|
||||
g->ops.gr.create_gr_sysfs(g);
|
||||
err = nvgpu_ecc_init_support(g);
|
||||
if (err)
|
||||
goto clean_up;
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
return 0;
|
||||
|
||||
@@ -382,10 +382,6 @@ static const struct gpu_ops gp106_ops = {
|
||||
.update_boosted_ctx = NULL,
|
||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
||||
#ifdef CONFIG_SYSFS
|
||||
.create_gr_sysfs = NULL,
|
||||
.remove_gr_sysfs = NULL,
|
||||
#endif
|
||||
.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode,
|
||||
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
||||
.fecs_host_int_enable = gr_gk20a_fecs_host_int_enable,
|
||||
|
||||
106
drivers/gpu/nvgpu/gp10b/ecc_gp10b.c
Normal file
106
drivers/gpu/nvgpu/gp10b/ecc_gp10b.c
Normal file
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/ecc.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gp10b/ecc_gp10b.h"
|
||||
|
||||
int gp10b_ecc_init(struct gk20a *g)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sec_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sed_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_ded_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe0_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe0_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe0_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe0_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe1_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe1_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe1_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe1_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
done:
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "ecc counter allocate failed, err=%d", err);
|
||||
nvgpu_ecc_free(g);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
28
drivers/gpu/nvgpu/gp10b/ecc_gp10b.h
Normal file
28
drivers/gpu/nvgpu/gp10b/ecc_gp10b.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __ECC_GP10B_H__
|
||||
#define __ECC_GP10B_H__
|
||||
|
||||
int gp10b_ecc_init(struct gk20a *g);
|
||||
|
||||
#endif
|
||||
@@ -176,7 +176,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
|
||||
lrf_ecc_ded_status,
|
||||
&lrf_single_count_delta,
|
||||
lrf_double_count_delta);
|
||||
g->ecc.gr.sm_lrf_single_err_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter +=
|
||||
lrf_single_count_delta;
|
||||
}
|
||||
if (lrf_ecc_ded_status) {
|
||||
@@ -188,7 +188,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
|
||||
lrf_ecc_ded_status,
|
||||
&lrf_double_count_delta,
|
||||
lrf_single_count_delta);
|
||||
g->ecc.gr.sm_lrf_double_err_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
|
||||
lrf_double_count_delta;
|
||||
}
|
||||
gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
|
||||
@@ -213,9 +213,9 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
|
||||
ecc_stats_reg_val =
|
||||
gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
|
||||
g->ecc.gr.sm_shm_sec_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_shm_ecc_sec_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val);
|
||||
g->ecc.gr.sm_shm_sed_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_shm_ecc_sed_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() |
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m());
|
||||
@@ -235,7 +235,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
|
||||
ecc_stats_reg_val =
|
||||
gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
|
||||
g->ecc.gr.sm_shm_ded_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_shm_ecc_ded_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m());
|
||||
gk20a_writel(g,
|
||||
@@ -276,7 +276,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
|
||||
ecc_stats_reg_val = gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
|
||||
g->ecc.gr.tex_total_sec_pipe0_count.counters[tpc] +=
|
||||
g->ecc.gr.tex_ecc_total_sec_pipe0_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
|
||||
gk20a_writel(g,
|
||||
@@ -285,7 +285,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
|
||||
ecc_stats_reg_val = gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
|
||||
g->ecc.gr.tex_unique_sec_pipe0_count.counters[tpc] +=
|
||||
g->ecc.gr.tex_unique_ecc_sec_pipe0_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
|
||||
gk20a_writel(g,
|
||||
@@ -300,7 +300,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
|
||||
ecc_stats_reg_val = gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
|
||||
g->ecc.gr.tex_total_sec_pipe1_count.counters[tpc] +=
|
||||
g->ecc.gr.tex_ecc_total_sec_pipe1_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
|
||||
gk20a_writel(g,
|
||||
@@ -309,7 +309,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
|
||||
ecc_stats_reg_val = gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
|
||||
g->ecc.gr.tex_unique_sec_pipe1_count.counters[tpc] +=
|
||||
g->ecc.gr.tex_unique_ecc_sec_pipe1_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
|
||||
gk20a_writel(g,
|
||||
@@ -332,7 +332,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
|
||||
ecc_stats_reg_val = gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
|
||||
g->ecc.gr.tex_total_ded_pipe0_count.counters[tpc] +=
|
||||
g->ecc.gr.tex_ecc_total_ded_pipe0_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
|
||||
gk20a_writel(g,
|
||||
@@ -341,7 +341,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
|
||||
ecc_stats_reg_val = gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
|
||||
g->ecc.gr.tex_unique_ded_pipe0_count.counters[tpc] +=
|
||||
g->ecc.gr.tex_unique_ecc_ded_pipe0_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
|
||||
gk20a_writel(g,
|
||||
@@ -356,7 +356,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
|
||||
ecc_stats_reg_val = gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
|
||||
g->ecc.gr.tex_total_ded_pipe1_count.counters[tpc] +=
|
||||
g->ecc.gr.tex_ecc_total_ded_pipe1_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
|
||||
gk20a_writel(g,
|
||||
@@ -365,7 +365,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
|
||||
ecc_stats_reg_val = gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
|
||||
g->ecc.gr.tex_unique_ded_pipe1_count.counters[tpc] +=
|
||||
g->ecc.gr.tex_unique_ecc_ded_pipe1_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
|
||||
gk20a_writel(g,
|
||||
|
||||
@@ -56,6 +56,7 @@
|
||||
#include "gp10b/regops_gp10b.h"
|
||||
#include "gp10b/therm_gp10b.h"
|
||||
#include "gp10b/priv_ring_gp10b.h"
|
||||
#include "gp10b/ecc_gp10b.h"
|
||||
|
||||
#include "gm20b/ltc_gm20b.h"
|
||||
#include "gm20b/gr_gm20b.h"
|
||||
@@ -339,11 +340,8 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.init_preemption_state = gr_gp10b_init_preemption_state,
|
||||
.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
|
||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||
#ifdef CONFIG_SYSFS
|
||||
.create_gr_sysfs = gr_gp10b_create_sysfs,
|
||||
.remove_gr_sysfs = gr_gp10b_remove_sysfs,
|
||||
#endif
|
||||
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
|
||||
.init_ecc = gp10b_ecc_init,
|
||||
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
|
||||
.init_gfxp_wfi_timeout_count =
|
||||
gr_gp10b_init_gfxp_wfi_timeout_count,
|
||||
|
||||
@@ -249,7 +249,7 @@ void gp10b_ltc_isr(struct gk20a *g)
|
||||
ecc_stats_reg_val =
|
||||
gk20a_readl(g,
|
||||
ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
|
||||
g->ecc.ltc.l2_sec_count.counters[ltc*g->ltc_count + slice] +=
|
||||
g->ecc.ltc.ecc_sec_count[ltc][slice].counter +=
|
||||
ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &=
|
||||
~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m());
|
||||
@@ -268,7 +268,7 @@ void gp10b_ltc_isr(struct gk20a *g)
|
||||
ecc_stats_reg_val =
|
||||
gk20a_readl(g,
|
||||
ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
|
||||
g->ecc.ltc.l2_ded_count.counters[ltc*g->ltc_count + slice] +=
|
||||
g->ecc.ltc.ecc_ded_count[ltc][slice].counter +=
|
||||
ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &=
|
||||
~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m());
|
||||
|
||||
@@ -415,10 +415,6 @@ static const struct gpu_ops gv100_ops = {
|
||||
.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
|
||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
||||
#ifdef CONFIG_SYSFS
|
||||
.create_gr_sysfs = gr_gv11b_create_sysfs,
|
||||
.remove_gr_sysfs = gr_gv11b_remove_sysfs,
|
||||
#endif
|
||||
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
|
||||
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
|
||||
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
|
||||
|
||||
181
drivers/gpu/nvgpu/gv11b/ecc_gv11b.c
Normal file
181
drivers/gpu/nvgpu/gv11b/ecc_gv11b.c
Normal file
@@ -0,0 +1,181 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/ecc.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gv11b/ecc_gv11b.h"
|
||||
|
||||
int gv11b_ecc_init(struct gk20a *g)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||
sm_l1_tag_ecc_corrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||
sm_l1_tag_ecc_uncorrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||
sm_cbu_ecc_corrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||
sm_cbu_ecc_uncorrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||
sm_l1_data_ecc_corrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||
sm_l1_data_ecc_uncorrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||
sm_icache_ecc_corrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||
sm_icache_ecc_uncorrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
|
||||
gcc_l15_ecc_corrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
|
||||
gcc_l15_ecc_uncorrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_uncorrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_corrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
|
||||
gpccs_ecc_uncorrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
|
||||
gpccs_ecc_corrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
|
||||
mmu_l1tlb_ecc_uncorrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
|
||||
mmu_l1tlb_ecc_corrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_uncorrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_corrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_uncorrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_corrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_FB(
|
||||
mmu_fillunit_ecc_uncorrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_FB(
|
||||
mmu_fillunit_ecc_corrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_uncorrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_corrected_err_count);
|
||||
if (err != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
done:
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "ecc counter allocate failed, err=%d", err);
|
||||
nvgpu_ecc_free(g);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
28
drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
Normal file
28
drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __ECC_GV11B_H__
|
||||
#define __ECC_GV11B_H__
|
||||
|
||||
int gv11b_ecc_init(struct gk20a *g);
|
||||
|
||||
#endif
|
||||
@@ -198,7 +198,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
l1_tag_corrected_err_count_delta +=
|
||||
(is_l1_tag_ecc_corrected_total_err_overflow <<
|
||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s());
|
||||
g->ecc.gr.sm_l1_tag_corrected_err_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter +=
|
||||
l1_tag_corrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset,
|
||||
@@ -213,7 +213,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
l1_tag_uncorrected_err_count_delta +=
|
||||
(is_l1_tag_ecc_uncorrected_total_err_overflow <<
|
||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s());
|
||||
g->ecc.gr.sm_l1_tag_uncorrected_err_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter +=
|
||||
l1_tag_uncorrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset,
|
||||
@@ -290,7 +290,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
lrf_corrected_err_count_delta +=
|
||||
(is_lrf_ecc_corrected_total_err_overflow <<
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s());
|
||||
g->ecc.gr.sm_lrf_single_err_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter +=
|
||||
lrf_corrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset,
|
||||
@@ -305,7 +305,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
lrf_uncorrected_err_count_delta +=
|
||||
(is_lrf_ecc_uncorrected_total_err_overflow <<
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s());
|
||||
g->ecc.gr.sm_lrf_double_err_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
|
||||
lrf_uncorrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset,
|
||||
@@ -449,7 +449,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
cbu_corrected_err_count_delta +=
|
||||
(is_cbu_ecc_corrected_total_err_overflow <<
|
||||
gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s());
|
||||
g->ecc.gr.sm_cbu_corrected_err_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter +=
|
||||
cbu_corrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset,
|
||||
@@ -464,7 +464,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
cbu_uncorrected_err_count_delta +=
|
||||
(is_cbu_ecc_uncorrected_total_err_overflow <<
|
||||
gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s());
|
||||
g->ecc.gr.sm_cbu_uncorrected_err_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter +=
|
||||
cbu_uncorrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset,
|
||||
@@ -529,7 +529,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
l1_data_corrected_err_count_delta +=
|
||||
(is_l1_data_ecc_corrected_total_err_overflow <<
|
||||
gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s());
|
||||
g->ecc.gr.sm_l1_data_corrected_err_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter +=
|
||||
l1_data_corrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset,
|
||||
@@ -544,7 +544,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
l1_data_uncorrected_err_count_delta +=
|
||||
(is_l1_data_ecc_uncorrected_total_err_overflow <<
|
||||
gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s());
|
||||
g->ecc.gr.sm_l1_data_uncorrected_err_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter +=
|
||||
l1_data_uncorrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset,
|
||||
@@ -613,7 +613,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
icache_corrected_err_count_delta +=
|
||||
(is_icache_ecc_corrected_total_err_overflow <<
|
||||
gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s());
|
||||
g->ecc.gr.sm_icache_corrected_err_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter +=
|
||||
icache_corrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset,
|
||||
@@ -628,7 +628,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
icache_uncorrected_err_count_delta +=
|
||||
(is_icache_ecc_uncorrected_total_err_overflow <<
|
||||
gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s());
|
||||
g->ecc.gr.sm_icache_uncorrected_err_count.counters[tpc] +=
|
||||
g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter +=
|
||||
icache_uncorrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset,
|
||||
@@ -717,7 +717,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
gcc_l15_corrected_err_count_delta +=
|
||||
(is_gcc_l15_ecc_corrected_total_err_overflow <<
|
||||
gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s());
|
||||
g->ecc.gr.gcc_l15_corrected_err_count.counters[gpc] +=
|
||||
g->ecc.gr.gcc_l15_ecc_corrected_err_count[gpc].counter +=
|
||||
gcc_l15_corrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset,
|
||||
@@ -732,7 +732,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
gcc_l15_uncorrected_err_count_delta +=
|
||||
(is_gcc_l15_ecc_uncorrected_total_err_overflow <<
|
||||
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s());
|
||||
g->ecc.gr.gcc_l15_uncorrected_err_count.counters[gpc] +=
|
||||
g->ecc.gr.gcc_l15_ecc_uncorrected_err_count[gpc].counter +=
|
||||
gcc_l15_uncorrected_err_count_delta;
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset,
|
||||
@@ -802,9 +802,9 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc,
|
||||
uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s());
|
||||
|
||||
|
||||
g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc] +=
|
||||
g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter +=
|
||||
corrected_delta;
|
||||
g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc] +=
|
||||
g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter +=
|
||||
uncorrected_delta;
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
|
||||
@@ -824,8 +824,8 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc,
|
||||
"ecc error address: 0x%x", ecc_addr);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ecc error count corrected: %d, uncorrected %d",
|
||||
g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc],
|
||||
g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc]);
|
||||
g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter,
|
||||
g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -880,9 +880,9 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc,
|
||||
gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset,
|
||||
gr_gpc0_gpccs_falcon_ecc_status_reset_task_f());
|
||||
|
||||
g->ecc.gr.gpccs_corrected_err_count.counters[gpc] +=
|
||||
g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter +=
|
||||
corrected_delta;
|
||||
g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc] +=
|
||||
g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter +=
|
||||
uncorrected_delta;
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
|
||||
@@ -907,8 +907,8 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc,
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ecc error count corrected: %d, uncorrected %d",
|
||||
g->ecc.gr.gpccs_corrected_err_count.counters[gpc],
|
||||
g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc]);
|
||||
g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter,
|
||||
g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2419,9 +2419,9 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
|
||||
gk20a_writel(g, gr_fecs_falcon_ecc_status_r(),
|
||||
gr_fecs_falcon_ecc_status_reset_task_f());
|
||||
|
||||
g->ecc.gr.fecs_corrected_err_count.counters[0] +=
|
||||
g->ecc.gr.fecs_ecc_corrected_err_count[0].counter +=
|
||||
corrected_delta;
|
||||
g->ecc.gr.fecs_uncorrected_err_count.counters[0] +=
|
||||
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter +=
|
||||
uncorrected_delta;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
@@ -2450,8 +2450,8 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ecc error count corrected: %d, uncorrected %d",
|
||||
g->ecc.gr.fecs_corrected_err_count.counters[0],
|
||||
g->ecc.gr.fecs_uncorrected_err_count.counters[0]);
|
||||
g->ecc.gr.fecs_ecc_corrected_err_count[0].counter,
|
||||
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -84,6 +84,7 @@
|
||||
#include "regops_gv11b.h"
|
||||
#include "subctx_gv11b.h"
|
||||
#include "therm_gv11b.h"
|
||||
#include "ecc_gv11b.h"
|
||||
|
||||
#include <nvgpu/ptimer.h>
|
||||
#include <nvgpu/debug.h>
|
||||
@@ -369,10 +370,7 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
|
||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
||||
#ifdef CONFIG_SYSFS
|
||||
.create_gr_sysfs = gr_gv11b_create_sysfs,
|
||||
.remove_gr_sysfs = gr_gv11b_remove_sysfs,
|
||||
#endif
|
||||
.init_ecc = gv11b_ecc_init,
|
||||
.set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode,
|
||||
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
|
||||
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
|
||||
|
||||
@@ -90,13 +90,11 @@ void gv11b_ltc_isr(struct gk20a *g)
|
||||
u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
|
||||
u32 corrected_delta, uncorrected_delta;
|
||||
u32 corrected_overflow, uncorrected_overflow;
|
||||
u32 ltc_corrected, ltc_uncorrected;
|
||||
|
||||
mc_intr = gk20a_readl(g, mc_intr_ltc_r());
|
||||
for (ltc = 0; ltc < g->ltc_count; ltc++) {
|
||||
if ((mc_intr & 1U << ltc) == 0)
|
||||
continue;
|
||||
ltc_corrected = ltc_uncorrected = 0U;
|
||||
|
||||
for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
|
||||
u32 offset = ltc_stride * ltc + lts_stride * slice;
|
||||
@@ -150,8 +148,8 @@ void gv11b_ltc_isr(struct gk20a *g)
|
||||
if (uncorrected_overflow)
|
||||
uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
|
||||
|
||||
ltc_corrected += corrected_delta;
|
||||
ltc_uncorrected += uncorrected_delta;
|
||||
g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta;
|
||||
g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta;
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
|
||||
|
||||
@@ -177,10 +175,6 @@ void gv11b_ltc_isr(struct gk20a *g)
|
||||
}
|
||||
|
||||
}
|
||||
g->ecc.ltc.l2_cache_corrected_err_count.counters[ltc] +=
|
||||
ltc_corrected;
|
||||
g->ecc.ltc.l2_cache_uncorrected_err_count.counters[ltc] +=
|
||||
ltc_uncorrected;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -343,8 +343,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
|
||||
if (uncorrected_overflow)
|
||||
uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s());
|
||||
|
||||
g->ecc.pmu.pmu_corrected_err_count.counters[0] += corrected_delta;
|
||||
g->ecc.pmu.pmu_uncorrected_err_count.counters[0] += uncorrected_delta;
|
||||
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter += corrected_delta;
|
||||
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter += uncorrected_delta;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"pmu ecc interrupt intr1: 0x%x", intr1);
|
||||
@@ -371,8 +371,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ecc error count corrected: %d, uncorrected %d",
|
||||
g->ecc.pmu.pmu_corrected_err_count.counters[0],
|
||||
g->ecc.pmu.pmu_uncorrected_err_count.counters[0]);
|
||||
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter,
|
||||
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
162
drivers/gpu/nvgpu/include/nvgpu/ecc.h
Normal file
162
drivers/gpu/nvgpu/include/nvgpu/ecc.h
Normal file
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_ECC_H
|
||||
#define NVGPU_ECC_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/list.h>
|
||||
|
||||
#define NVGPU_ECC_STAT_NAME_MAX_SIZE 100
|
||||
|
||||
struct gk20a;
|
||||
|
||||
struct nvgpu_ecc_stat {
|
||||
char name[NVGPU_ECC_STAT_NAME_MAX_SIZE];
|
||||
u32 counter;
|
||||
struct nvgpu_list_node node;
|
||||
};
|
||||
|
||||
static inline struct nvgpu_ecc_stat *nvgpu_ecc_stat_from_node(
|
||||
struct nvgpu_list_node *node)
|
||||
{
|
||||
return (struct nvgpu_ecc_stat *)(
|
||||
(uintptr_t)node - offsetof(struct nvgpu_ecc_stat, node)
|
||||
);
|
||||
}
|
||||
|
||||
struct nvgpu_ecc {
|
||||
struct {
|
||||
/* stats per tpc */
|
||||
|
||||
struct nvgpu_ecc_stat **sm_lrf_ecc_single_err_count;
|
||||
struct nvgpu_ecc_stat **sm_lrf_ecc_double_err_count;
|
||||
|
||||
struct nvgpu_ecc_stat **sm_shm_ecc_sec_count;
|
||||
struct nvgpu_ecc_stat **sm_shm_ecc_sed_count;
|
||||
struct nvgpu_ecc_stat **sm_shm_ecc_ded_count;
|
||||
|
||||
struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe0_count;
|
||||
struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe0_count;
|
||||
struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe0_count;
|
||||
struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe0_count;
|
||||
struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe1_count;
|
||||
struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe1_count;
|
||||
struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe1_count;
|
||||
struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe1_count;
|
||||
|
||||
struct nvgpu_ecc_stat **sm_l1_tag_ecc_corrected_err_count;
|
||||
struct nvgpu_ecc_stat **sm_l1_tag_ecc_uncorrected_err_count;
|
||||
struct nvgpu_ecc_stat **sm_cbu_ecc_corrected_err_count;
|
||||
struct nvgpu_ecc_stat **sm_cbu_ecc_uncorrected_err_count;
|
||||
struct nvgpu_ecc_stat **sm_l1_data_ecc_corrected_err_count;
|
||||
struct nvgpu_ecc_stat **sm_l1_data_ecc_uncorrected_err_count;
|
||||
struct nvgpu_ecc_stat **sm_icache_ecc_corrected_err_count;
|
||||
struct nvgpu_ecc_stat **sm_icache_ecc_uncorrected_err_count;
|
||||
|
||||
/* stats per gpc */
|
||||
|
||||
struct nvgpu_ecc_stat *gcc_l15_ecc_corrected_err_count;
|
||||
struct nvgpu_ecc_stat *gcc_l15_ecc_uncorrected_err_count;
|
||||
|
||||
struct nvgpu_ecc_stat *gpccs_ecc_corrected_err_count;
|
||||
struct nvgpu_ecc_stat *gpccs_ecc_uncorrected_err_count;
|
||||
struct nvgpu_ecc_stat *mmu_l1tlb_ecc_corrected_err_count;
|
||||
struct nvgpu_ecc_stat *mmu_l1tlb_ecc_uncorrected_err_count;
|
||||
|
||||
/* stats per device */
|
||||
struct nvgpu_ecc_stat *fecs_ecc_corrected_err_count;
|
||||
struct nvgpu_ecc_stat *fecs_ecc_uncorrected_err_count;
|
||||
} gr;
|
||||
|
||||
struct {
|
||||
/* stats per lts */
|
||||
struct nvgpu_ecc_stat **ecc_sec_count;
|
||||
struct nvgpu_ecc_stat **ecc_ded_count;
|
||||
} ltc;
|
||||
|
||||
struct {
|
||||
/* stats per device */
|
||||
struct nvgpu_ecc_stat *mmu_l2tlb_ecc_corrected_err_count;
|
||||
struct nvgpu_ecc_stat *mmu_l2tlb_ecc_uncorrected_err_count;
|
||||
struct nvgpu_ecc_stat *mmu_hubtlb_ecc_corrected_err_count;
|
||||
struct nvgpu_ecc_stat *mmu_hubtlb_ecc_uncorrected_err_count;
|
||||
struct nvgpu_ecc_stat *mmu_fillunit_ecc_corrected_err_count;
|
||||
struct nvgpu_ecc_stat *mmu_fillunit_ecc_uncorrected_err_count;
|
||||
} fb;
|
||||
|
||||
struct {
|
||||
/* stats per device */
|
||||
struct nvgpu_ecc_stat *pmu_ecc_corrected_err_count;
|
||||
struct nvgpu_ecc_stat *pmu_ecc_uncorrected_err_count;
|
||||
} pmu;
|
||||
|
||||
struct {
|
||||
/* stats per fbpa */
|
||||
struct nvgpu_ecc_stat *fbpa_ecc_sec_err_count;
|
||||
struct nvgpu_ecc_stat *fbpa_ecc_ded_err_count;
|
||||
} fbpa;
|
||||
|
||||
struct nvgpu_list_node stats_list;
|
||||
int stats_count;
|
||||
};
|
||||
|
||||
int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat ***stat, const char *name);
|
||||
#define NVGPU_ECC_COUNTER_INIT_PER_TPC(stat) \
|
||||
nvgpu_ecc_counter_init_per_tpc(g, &g->ecc.gr.stat, #stat)
|
||||
|
||||
int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat **stat, const char *name);
|
||||
#define NVGPU_ECC_COUNTER_INIT_PER_GPC(stat) \
|
||||
nvgpu_ecc_counter_init_per_gpc(g, &g->ecc.gr.stat, #stat)
|
||||
|
||||
int nvgpu_ecc_counter_init(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat **stat, const char *name);
|
||||
#define NVGPU_ECC_COUNTER_INIT_GR(stat) \
|
||||
nvgpu_ecc_counter_init(g, &g->ecc.gr.stat, #stat)
|
||||
#define NVGPU_ECC_COUNTER_INIT_FB(stat) \
|
||||
nvgpu_ecc_counter_init(g, &g->ecc.fb.stat, #stat)
|
||||
#define NVGPU_ECC_COUNTER_INIT_PMU(stat) \
|
||||
nvgpu_ecc_counter_init(g, &g->ecc.pmu.stat, #stat)
|
||||
|
||||
int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat ***stat, const char *name);
|
||||
#define NVGPU_ECC_COUNTER_INIT_PER_LTS(stat) \
|
||||
nvgpu_ecc_counter_init_per_lts(g, &g->ecc.ltc.stat, #stat)
|
||||
|
||||
int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat **stat, const char *name);
|
||||
#define NVGPU_ECC_COUNTER_INIT_PER_FBPA(stat) \
|
||||
nvgpu_ecc_counter_init_per_fbpa(g, &g->ecc.fbpa.stat, #stat)
|
||||
|
||||
void nvgpu_ecc_free(struct gk20a *g);
|
||||
|
||||
int nvgpu_ecc_init_support(struct gk20a *g);
|
||||
void nvgpu_ecc_remove_support(struct gk20a *g);
|
||||
|
||||
/* OSes to implement */
|
||||
|
||||
int nvgpu_ecc_sysfs_init(struct gk20a *g);
|
||||
void nvgpu_ecc_sysfs_remove(struct gk20a *g);
|
||||
|
||||
#endif
|
||||
80
drivers/gpu/nvgpu/os/linux/ecc_sysfs.c
Normal file
80
drivers/gpu/nvgpu/os/linux/ecc_sysfs.c
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <nvgpu/ecc.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
int nvgpu_ecc_sysfs_init(struct gk20a *g)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
struct nvgpu_ecc *ecc = &g->ecc;
|
||||
struct dev_ext_attribute *attr;
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct nvgpu_ecc_stat *stat;
|
||||
int i = 0, err;
|
||||
|
||||
attr = nvgpu_kzalloc(g, sizeof(*attr) * ecc->stats_count);
|
||||
if (!attr)
|
||||
return -ENOMEM;
|
||||
|
||||
nvgpu_list_for_each_entry(stat,
|
||||
&ecc->stats_list, nvgpu_ecc_stat, node) {
|
||||
if (i >= ecc->stats_count) {
|
||||
err = -EINVAL;
|
||||
nvgpu_err(g, "stats_list longer than stats_count %d",
|
||||
ecc->stats_count);
|
||||
break;
|
||||
}
|
||||
sysfs_attr_init(&attr[i].attr);
|
||||
attr[i].attr.attr.name = stat->name;
|
||||
attr[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
|
||||
attr[i].var = &stat->counter;
|
||||
attr[i].attr.show = device_show_int;
|
||||
err = device_create_file(dev, &attr[i].attr);
|
||||
if (err) {
|
||||
nvgpu_err(g, "sysfs node create failed for %s\n",
|
||||
stat->name);
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
if (err) {
|
||||
while (i-- > 0)
|
||||
device_remove_file(dev, &attr[i].attr);
|
||||
nvgpu_kfree(g, attr);
|
||||
return err;
|
||||
}
|
||||
|
||||
l->ecc_attrs = attr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_ecc_sysfs_remove(struct gk20a *g)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct nvgpu_ecc *ecc = &g->ecc;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ecc->stats_count; i++)
|
||||
device_remove_file(dev, &l->ecc_attrs[i].attr);
|
||||
nvgpu_kfree(g, l->ecc_attrs);
|
||||
l->ecc_attrs = NULL;
|
||||
}
|
||||
@@ -141,6 +141,7 @@ struct nvgpu_os_linux {
|
||||
struct dentry *debugfs_dump_ctxsw_stats;
|
||||
#endif
|
||||
DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5);
|
||||
struct dev_ext_attribute *ecc_attrs;
|
||||
|
||||
struct gk20a_cde_app cde_app;
|
||||
|
||||
|
||||
@@ -52,11 +52,6 @@ static int nvgpu_pci_tegra_probe(struct device *dev)
|
||||
|
||||
static int nvgpu_pci_tegra_remove(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
|
||||
if (g->ops.gr.remove_gr_sysfs)
|
||||
g->ops.gr.remove_gr_sysfs(g);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,269 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/hashtable.h>
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/hashtable.h>
|
||||
|
||||
#include "os_linux.h"
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
#include "platform_gk20a.h"
|
||||
#include "platform_gk20a_tegra.h"
|
||||
#include "platform_gp10b.h"
|
||||
#include "platform_gp10b_tegra.h"
|
||||
#include "platform_ecc_sysfs.h"
|
||||
|
||||
static u32 gen_ecc_hash_key(char *str)
|
||||
{
|
||||
int i = 0;
|
||||
u32 hash_key = 0x811c9dc5;
|
||||
|
||||
while (str[i]) {
|
||||
hash_key *= 0x1000193;
|
||||
hash_key ^= (u32)(str[i]);
|
||||
i++;
|
||||
};
|
||||
|
||||
return hash_key;
|
||||
}
|
||||
|
||||
static ssize_t ecc_stat_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
const char *ecc_stat_full_name = attr->attr.name;
|
||||
const char *ecc_stat_base_name;
|
||||
unsigned int hw_unit;
|
||||
unsigned int subunit;
|
||||
struct gk20a_ecc_stat *ecc_stat;
|
||||
u32 hash_key;
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit,
|
||||
&subunit) == 2) {
|
||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]);
|
||||
hw_unit = g->gr.slices_per_ltc * hw_unit + subunit;
|
||||
} else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) {
|
||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]);
|
||||
} else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) {
|
||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]);
|
||||
} else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) {
|
||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]);
|
||||
} else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) {
|
||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]);
|
||||
} else {
|
||||
return snprintf(buf,
|
||||
PAGE_SIZE,
|
||||
"Error: Invalid ECC stat name!\n");
|
||||
}
|
||||
|
||||
hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name);
|
||||
|
||||
hash_for_each_possible(l->ecc_sysfs_stats_htable,
|
||||
ecc_stat,
|
||||
hash_node,
|
||||
hash_key) {
|
||||
if (hw_unit >= ecc_stat->count)
|
||||
continue;
|
||||
if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit]))
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]);
|
||||
}
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n");
|
||||
}
|
||||
|
||||
int nvgpu_gr_ecc_stat_create(struct device *dev,
|
||||
int is_l2, char *ecc_stat_name,
|
||||
struct gk20a_ecc_stat *ecc_stat)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
char *ltc_unit_name = "ltc";
|
||||
char *gr_unit_name = "gpc0_tpc";
|
||||
char *lts_unit_name = "lts";
|
||||
int num_hw_units = 0;
|
||||
int num_subunits = 0;
|
||||
|
||||
if (is_l2 == 1)
|
||||
num_hw_units = g->ltc_count;
|
||||
else if (is_l2 == 2) {
|
||||
num_hw_units = g->ltc_count;
|
||||
num_subunits = g->gr.slices_per_ltc;
|
||||
} else
|
||||
num_hw_units = g->gr.tpc_count;
|
||||
|
||||
|
||||
return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits,
|
||||
is_l2 ? ltc_unit_name : gr_unit_name,
|
||||
num_subunits ? lts_unit_name: NULL,
|
||||
ecc_stat_name,
|
||||
ecc_stat);
|
||||
}
|
||||
|
||||
int nvgpu_ecc_stat_create(struct device *dev,
|
||||
int num_hw_units, int num_subunits,
|
||||
char *ecc_unit_name, char *ecc_subunit_name,
|
||||
char *ecc_stat_name,
|
||||
struct gk20a_ecc_stat *ecc_stat)
|
||||
{
|
||||
int error = 0;
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
int hw_unit = 0;
|
||||
int subunit = 0;
|
||||
int element = 0;
|
||||
u32 hash_key = 0;
|
||||
struct device_attribute *dev_attr_array;
|
||||
|
||||
int num_elements = num_subunits ? num_subunits * num_hw_units :
|
||||
num_hw_units;
|
||||
|
||||
/* Allocate arrays */
|
||||
dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) *
|
||||
num_elements);
|
||||
ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements);
|
||||
ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements);
|
||||
|
||||
for (hw_unit = 0; hw_unit < num_elements; hw_unit++) {
|
||||
ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) *
|
||||
ECC_STAT_NAME_MAX_SIZE);
|
||||
}
|
||||
ecc_stat->count = num_elements;
|
||||
if (num_subunits) {
|
||||
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
|
||||
for (subunit = 0; subunit < num_subunits; subunit++) {
|
||||
element = hw_unit*num_subunits + subunit;
|
||||
|
||||
snprintf(ecc_stat->names[element],
|
||||
ECC_STAT_NAME_MAX_SIZE,
|
||||
"%s%d_%s%d_%s",
|
||||
ecc_unit_name,
|
||||
hw_unit,
|
||||
ecc_subunit_name,
|
||||
subunit,
|
||||
ecc_stat_name);
|
||||
|
||||
sysfs_attr_init(&dev_attr_array[element].attr);
|
||||
dev_attr_array[element].attr.name =
|
||||
ecc_stat->names[element];
|
||||
dev_attr_array[element].attr.mode =
|
||||
VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
|
||||
dev_attr_array[element].show = ecc_stat_show;
|
||||
dev_attr_array[element].store = NULL;
|
||||
|
||||
/* Create sysfs file */
|
||||
error |= device_create_file(dev,
|
||||
&dev_attr_array[element]);
|
||||
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
|
||||
|
||||
/* Fill in struct device_attribute members */
|
||||
snprintf(ecc_stat->names[hw_unit],
|
||||
ECC_STAT_NAME_MAX_SIZE,
|
||||
"%s%d_%s",
|
||||
ecc_unit_name,
|
||||
hw_unit,
|
||||
ecc_stat_name);
|
||||
|
||||
sysfs_attr_init(&dev_attr_array[hw_unit].attr);
|
||||
dev_attr_array[hw_unit].attr.name =
|
||||
ecc_stat->names[hw_unit];
|
||||
dev_attr_array[hw_unit].attr.mode =
|
||||
VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
|
||||
dev_attr_array[hw_unit].show = ecc_stat_show;
|
||||
dev_attr_array[hw_unit].store = NULL;
|
||||
|
||||
/* Create sysfs file */
|
||||
error |= device_create_file(dev,
|
||||
&dev_attr_array[hw_unit]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Add hash table entry */
|
||||
hash_key = gen_ecc_hash_key(ecc_stat_name);
|
||||
hash_add(l->ecc_sysfs_stats_htable,
|
||||
&ecc_stat->hash_node,
|
||||
hash_key);
|
||||
|
||||
ecc_stat->attr_array = dev_attr_array;
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
void nvgpu_gr_ecc_stat_remove(struct device *dev,
|
||||
int is_l2, struct gk20a_ecc_stat *ecc_stat)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
int num_hw_units = 0;
|
||||
int num_subunits = 0;
|
||||
|
||||
if (is_l2 == 1)
|
||||
num_hw_units = g->ltc_count;
|
||||
else if (is_l2 == 2) {
|
||||
num_hw_units = g->ltc_count;
|
||||
num_subunits = g->gr.slices_per_ltc;
|
||||
} else
|
||||
num_hw_units = g->gr.tpc_count;
|
||||
|
||||
nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat);
|
||||
}
|
||||
|
||||
void nvgpu_ecc_stat_remove(struct device *dev,
|
||||
int num_hw_units, int num_subunits,
|
||||
struct gk20a_ecc_stat *ecc_stat)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct device_attribute *dev_attr_array = ecc_stat->attr_array;
|
||||
int hw_unit = 0;
|
||||
int subunit = 0;
|
||||
int element = 0;
|
||||
int num_elements = num_subunits ? num_subunits * num_hw_units :
|
||||
num_hw_units;
|
||||
|
||||
/* Remove sysfs files */
|
||||
if (num_subunits) {
|
||||
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
|
||||
for (subunit = 0; subunit < num_subunits; subunit++) {
|
||||
element = hw_unit * num_subunits + subunit;
|
||||
|
||||
device_remove_file(dev,
|
||||
&dev_attr_array[element]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++)
|
||||
device_remove_file(dev, &dev_attr_array[hw_unit]);
|
||||
}
|
||||
|
||||
/* Remove hash table entry */
|
||||
hash_del(&ecc_stat->hash_node);
|
||||
|
||||
/* Free arrays */
|
||||
nvgpu_kfree(g, ecc_stat->counters);
|
||||
|
||||
for (hw_unit = 0; hw_unit < num_elements; hw_unit++)
|
||||
nvgpu_kfree(g, ecc_stat->names[hw_unit]);
|
||||
|
||||
nvgpu_kfree(g, ecc_stat->names);
|
||||
nvgpu_kfree(g, dev_attr_array);
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _NVGPU_PLATFORM_SYSFS_H_
|
||||
#define _NVGPU_PLATFORM_SYSFS_H_
|
||||
|
||||
#include "gp10b/gr_gp10b.h"
|
||||
|
||||
#define ECC_STAT_NAME_MAX_SIZE 100
|
||||
|
||||
int nvgpu_gr_ecc_stat_create(struct device *dev,
|
||||
int is_l2, char *ecc_stat_name,
|
||||
struct gk20a_ecc_stat *ecc_stat);
|
||||
int nvgpu_ecc_stat_create(struct device *dev,
|
||||
int num_hw_units, int num_subunits,
|
||||
char *ecc_unit_name, char *ecc_subunit_name,
|
||||
char *ecc_stat_name,
|
||||
struct gk20a_ecc_stat *ecc_stat);
|
||||
void nvgpu_gr_ecc_stat_remove(struct device *dev,
|
||||
int is_l2, struct gk20a_ecc_stat *ecc_stat);
|
||||
void nvgpu_ecc_stat_remove(struct device *dev,
|
||||
int num_hw_units, int num_subunits,
|
||||
struct gk20a_ecc_stat *ecc_stat);
|
||||
#endif
|
||||
@@ -41,7 +41,6 @@
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
#include "platform_gk20a.h"
|
||||
#include "platform_ecc_sysfs.h"
|
||||
#include "platform_gk20a_tegra.h"
|
||||
#include "platform_gp10b.h"
|
||||
#include "platform_gp10b_tegra.h"
|
||||
@@ -177,11 +176,6 @@ static int gp10b_tegra_late_probe(struct device *dev)
|
||||
|
||||
static int gp10b_tegra_remove(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
|
||||
if (g->ops.gr.remove_gr_sysfs)
|
||||
g->ops.gr.remove_gr_sysfs(g);
|
||||
|
||||
/* deinitialise tegra specific scaling quirks */
|
||||
gp10b_tegra_scale_exit(dev);
|
||||
|
||||
@@ -476,162 +470,3 @@ struct gk20a_platform gp10b_tegra_platform = {
|
||||
|
||||
.secure_buffer_size = 401408,
|
||||
};
|
||||
|
||||
void gr_gp10b_create_sysfs(struct gk20a *g)
|
||||
{
|
||||
int error = 0;
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
|
||||
/* This stat creation function is called on GR init. GR can get
|
||||
initialized multiple times but we only need to create the ECC
|
||||
stats once. Therefore, add the following check to avoid
|
||||
creating duplicate stat sysfs nodes. */
|
||||
if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL)
|
||||
return;
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_lrf_ecc_single_err_count",
|
||||
&g->ecc.gr.sm_lrf_single_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_lrf_ecc_double_err_count",
|
||||
&g->ecc.gr.sm_lrf_double_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_shm_ecc_sec_count",
|
||||
&g->ecc.gr.sm_shm_sec_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_shm_ecc_sed_count",
|
||||
&g->ecc.gr.sm_shm_sed_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_shm_ecc_ded_count",
|
||||
&g->ecc.gr.sm_shm_ded_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_total_sec_pipe0_count",
|
||||
&g->ecc.gr.tex_total_sec_pipe0_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_total_ded_pipe0_count",
|
||||
&g->ecc.gr.tex_total_ded_pipe0_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_unique_sec_pipe0_count",
|
||||
&g->ecc.gr.tex_unique_sec_pipe0_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_unique_ded_pipe0_count",
|
||||
&g->ecc.gr.tex_unique_ded_pipe0_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_total_sec_pipe1_count",
|
||||
&g->ecc.gr.tex_total_sec_pipe1_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_total_ded_pipe1_count",
|
||||
&g->ecc.gr.tex_total_ded_pipe1_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_unique_sec_pipe1_count",
|
||||
&g->ecc.gr.tex_unique_sec_pipe1_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_unique_ded_pipe1_count",
|
||||
&g->ecc.gr.tex_unique_ded_pipe1_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
2,
|
||||
"ecc_sec_count",
|
||||
&g->ecc.ltc.l2_sec_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
2,
|
||||
"ecc_ded_count",
|
||||
&g->ecc.ltc.l2_ded_count);
|
||||
|
||||
if (error)
|
||||
dev_err(dev, "Failed to create sysfs attributes!\n");
|
||||
}
|
||||
|
||||
void gr_gp10b_remove_sysfs(struct gk20a *g)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
|
||||
if (!g->ecc.gr.sm_lrf_single_err_count.counters)
|
||||
return;
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_lrf_single_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_lrf_double_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_shm_sec_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_shm_sed_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_shm_ded_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_total_sec_pipe0_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_total_ded_pipe0_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_unique_sec_pipe0_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_unique_ded_pipe0_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_total_sec_pipe1_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_total_ded_pipe1_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_unique_sec_pipe1_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_unique_ded_pipe1_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
2,
|
||||
&g->ecc.ltc.l2_sec_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
2,
|
||||
&g->ecc.ltc.l2_ded_count);
|
||||
}
|
||||
|
||||
@@ -18,6 +18,5 @@
|
||||
#define _PLATFORM_GP10B_TEGRA_H_
|
||||
|
||||
#include "gp10b/gr_gp10b.h"
|
||||
#include "platform_ecc_sysfs.h"
|
||||
|
||||
#endif
|
||||
|
||||
@@ -39,7 +39,6 @@
|
||||
|
||||
#include "platform_gp10b.h"
|
||||
#include "platform_gp10b_tegra.h"
|
||||
#include "platform_ecc_sysfs.h"
|
||||
|
||||
#include "os_linux.h"
|
||||
#include "platform_gk20a_tegra.h"
|
||||
@@ -94,11 +93,6 @@ static int gv11b_tegra_late_probe(struct device *dev)
|
||||
|
||||
static int gv11b_tegra_remove(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
|
||||
if (g->ops.gr.remove_gr_sysfs)
|
||||
g->ops.gr.remove_gr_sysfs(g);
|
||||
|
||||
gv11b_tegra_scale_exit(dev);
|
||||
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
@@ -261,328 +255,3 @@ struct gk20a_platform gv11b_tegra_platform = {
|
||||
|
||||
.secure_buffer_size = 667648,
|
||||
};
|
||||
|
||||
void gr_gv11b_create_sysfs(struct gk20a *g)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
int error = 0;
|
||||
|
||||
/* This stat creation function is called on GR init. GR can get
|
||||
initialized multiple times but we only need to create the ECC
|
||||
stats once. Therefore, add the following check to avoid
|
||||
creating duplicate stat sysfs nodes. */
|
||||
if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL)
|
||||
return;
|
||||
|
||||
gr_gp10b_create_sysfs(g);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_l1_tag_ecc_corrected_err_count",
|
||||
&g->ecc.gr.sm_l1_tag_corrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_l1_tag_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_cbu_ecc_corrected_err_count",
|
||||
&g->ecc.gr.sm_cbu_corrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_cbu_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.sm_cbu_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_l1_data_ecc_corrected_err_count",
|
||||
&g->ecc.gr.sm_l1_data_corrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_l1_data_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.sm_l1_data_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_icache_ecc_corrected_err_count",
|
||||
&g->ecc.gr.sm_icache_corrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_icache_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.sm_icache_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"gcc_l15_ecc_corrected_err_count",
|
||||
&g->ecc.gr.gcc_l15_corrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"gcc_l15_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.gcc_l15_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
g->ltc_count,
|
||||
0,
|
||||
"ltc",
|
||||
NULL,
|
||||
"l2_cache_uncorrected_err_count",
|
||||
&g->ecc.ltc.l2_cache_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
g->ltc_count,
|
||||
0,
|
||||
"ltc",
|
||||
NULL,
|
||||
"l2_cache_corrected_err_count",
|
||||
&g->ecc.ltc.l2_cache_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"gpc",
|
||||
NULL,
|
||||
"fecs_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.fecs_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"gpc",
|
||||
NULL,
|
||||
"fecs_ecc_corrected_err_count",
|
||||
&g->ecc.gr.fecs_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
"gpc",
|
||||
NULL,
|
||||
"gpccs_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.gpccs_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
"gpc",
|
||||
NULL,
|
||||
"gpccs_ecc_corrected_err_count",
|
||||
&g->ecc.gr.gpccs_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
"gpc",
|
||||
NULL,
|
||||
"mmu_l1tlb_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
"gpc",
|
||||
NULL,
|
||||
"mmu_l1tlb_ecc_corrected_err_count",
|
||||
&g->ecc.gr.mmu_l1tlb_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"mmu_l2tlb_ecc_uncorrected_err_count",
|
||||
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"mmu_l2tlb_ecc_corrected_err_count",
|
||||
&g->ecc.fb.mmu_l2tlb_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"mmu_hubtlb_ecc_uncorrected_err_count",
|
||||
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"mmu_hubtlb_ecc_corrected_err_count",
|
||||
&g->ecc.fb.mmu_hubtlb_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"mmu_fillunit_ecc_uncorrected_err_count",
|
||||
&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"mmu_fillunit_ecc_corrected_err_count",
|
||||
&g->ecc.fb.mmu_fillunit_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"pmu_ecc_uncorrected_err_count",
|
||||
&g->ecc.pmu.pmu_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"pmu_ecc_corrected_err_count",
|
||||
&g->ecc.pmu.pmu_corrected_err_count);
|
||||
|
||||
if (error)
|
||||
dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
|
||||
}
|
||||
|
||||
void gr_gv11b_remove_sysfs(struct gk20a *g)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
|
||||
if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters)
|
||||
return;
|
||||
gr_gp10b_remove_sysfs(g);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_l1_tag_corrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_cbu_corrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_cbu_uncorrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_l1_data_corrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_l1_data_uncorrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_icache_corrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_icache_uncorrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.gcc_l15_corrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.gcc_l15_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
g->ltc_count,
|
||||
0,
|
||||
&g->ecc.ltc.l2_cache_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
g->ltc_count,
|
||||
0,
|
||||
&g->ecc.ltc.l2_cache_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.gr.fecs_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.gr.fecs_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
&g->ecc.gr.gpccs_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
&g->ecc.gr.gpccs_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
&g->ecc.gr.mmu_l1tlb_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.fb.mmu_l2tlb_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.fb.mmu_hubtlb_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.fb.mmu_fillunit_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.pmu.pmu_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.pmu.pmu_corrected_err_count);
|
||||
}
|
||||
|
||||
@@ -215,10 +215,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.update_boosted_ctx = NULL,
|
||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
||||
#ifdef CONFIG_SYSFS
|
||||
.create_gr_sysfs = gr_gp10b_create_sysfs,
|
||||
.remove_gr_sysfs = gr_gp10b_remove_sysfs,
|
||||
#endif
|
||||
.set_ctxsw_preemption_mode =
|
||||
vgpu_gr_gp10b_set_ctxsw_preemption_mode,
|
||||
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
|
||||
|
||||
@@ -232,10 +232,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.update_boosted_ctx = NULL,
|
||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
||||
#ifdef CONFIG_SYSFS
|
||||
.create_gr_sysfs = gr_gv11b_create_sysfs,
|
||||
.remove_gr_sysfs = gr_gv11b_remove_sysfs,
|
||||
#endif
|
||||
.set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode,
|
||||
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
|
||||
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
|
||||
|
||||
Reference in New Issue
Block a user