mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: rework ecc structure and sysfs
- create common file common/ecc.c which include common functions for add ecc counters and remove counters. - common code will create a list of all counter which make it easier to iterate all counters. - Add chip specific file for adding ecc counters. - add linux specific file os/linux/ecc_sysfs.c to export counters to sysfs. - remove obsolete code - MISRA violation for using snprintf is not solved, tracking with jira NVGPU-859 Jira NVGPUT-115 Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1763536 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
5ff1b3fe5a
commit
7f14aafc2c
@@ -69,7 +69,8 @@ nvgpu-y += \
|
|||||||
os/linux/sim_pci.o \
|
os/linux/sim_pci.o \
|
||||||
os/linux/os_sched.o \
|
os/linux/os_sched.o \
|
||||||
os/linux/nvlink.o \
|
os/linux/nvlink.o \
|
||||||
os/linux/dt.o
|
os/linux/dt.o \
|
||||||
|
os/linux/ecc_sysfs.o
|
||||||
|
|
||||||
nvgpu-$(CONFIG_GK20A_VIDMEM) += \
|
nvgpu-$(CONFIG_GK20A_VIDMEM) += \
|
||||||
os/linux/dmabuf_vidmem.o
|
os/linux/dmabuf_vidmem.o
|
||||||
@@ -100,7 +101,6 @@ nvgpu-$(CONFIG_TEGRA_GK20A) += \
|
|||||||
os/linux/module_usermode.o \
|
os/linux/module_usermode.o \
|
||||||
os/linux/soc.o \
|
os/linux/soc.o \
|
||||||
os/linux/fuse.o \
|
os/linux/fuse.o \
|
||||||
os/linux/platform_ecc_sysfs.o \
|
|
||||||
os/linux/platform_gk20a_tegra.o \
|
os/linux/platform_gk20a_tegra.o \
|
||||||
os/linux/platform_gp10b_tegra.o \
|
os/linux/platform_gp10b_tegra.o \
|
||||||
os/linux/platform_gv11b_tegra.o
|
os/linux/platform_gv11b_tegra.o
|
||||||
@@ -185,6 +185,7 @@ nvgpu-y += \
|
|||||||
common/sim.o \
|
common/sim.o \
|
||||||
common/sim_pci.o \
|
common/sim_pci.o \
|
||||||
common/fifo/submit.o \
|
common/fifo/submit.o \
|
||||||
|
common/ecc.o \
|
||||||
gk20a/gk20a.o \
|
gk20a/gk20a.o \
|
||||||
gk20a/ce2_gk20a.o \
|
gk20a/ce2_gk20a.o \
|
||||||
gk20a/fifo_gk20a.o \
|
gk20a/fifo_gk20a.o \
|
||||||
@@ -267,6 +268,7 @@ nvgpu-y += \
|
|||||||
gp10b/priv_ring_gp10b.o \
|
gp10b/priv_ring_gp10b.o \
|
||||||
gp10b/gp10b.o \
|
gp10b/gp10b.o \
|
||||||
gp10b/fuse_gp10b.o \
|
gp10b/fuse_gp10b.o \
|
||||||
|
gp10b/ecc_gp10b.o \
|
||||||
gp106/hal_gp106.o \
|
gp106/hal_gp106.o \
|
||||||
gp106/mm_gp106.o \
|
gp106/mm_gp106.o \
|
||||||
gp106/flcn_gp106.o \
|
gp106/flcn_gp106.o \
|
||||||
@@ -296,6 +298,7 @@ nvgpu-y += \
|
|||||||
gv11b/subctx_gv11b.o \
|
gv11b/subctx_gv11b.o \
|
||||||
gv11b/regops_gv11b.o \
|
gv11b/regops_gv11b.o \
|
||||||
gv11b/therm_gv11b.o \
|
gv11b/therm_gv11b.o \
|
||||||
|
gv11b/ecc_gv11b.o \
|
||||||
gv100/mm_gv100.o \
|
gv100/mm_gv100.o \
|
||||||
gv100/gr_ctx_gv100.o \
|
gv100/gr_ctx_gv100.o \
|
||||||
gv100/bios_gv100.o \
|
gv100/bios_gv100.o \
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ srcs := common/mm/nvgpu_allocator.c \
|
|||||||
common/rbtree.c \
|
common/rbtree.c \
|
||||||
common/ltc.c \
|
common/ltc.c \
|
||||||
common/io_common.c \
|
common/io_common.c \
|
||||||
|
common/ecc.c \
|
||||||
common/vbios/bios.c \
|
common/vbios/bios.c \
|
||||||
common/falcon/falcon.c \
|
common/falcon/falcon.c \
|
||||||
common/pmu/pmu.c \
|
common/pmu/pmu.c \
|
||||||
@@ -166,6 +167,7 @@ srcs := common/mm/nvgpu_allocator.c \
|
|||||||
gp10b/priv_ring_gp10b.c \
|
gp10b/priv_ring_gp10b.c \
|
||||||
gp10b/gp10b.c \
|
gp10b/gp10b.c \
|
||||||
gp10b/fuse_gp10b.c \
|
gp10b/fuse_gp10b.c \
|
||||||
|
gp10b/ecc_gp10b.c \
|
||||||
gv11b/gv11b.c \
|
gv11b/gv11b.c \
|
||||||
gv11b/dbg_gpu_gv11b.c \
|
gv11b/dbg_gpu_gv11b.c \
|
||||||
gv11b/mc_gv11b.c \
|
gv11b/mc_gv11b.c \
|
||||||
@@ -181,6 +183,7 @@ srcs := common/mm/nvgpu_allocator.c \
|
|||||||
gv11b/subctx_gv11b.c \
|
gv11b/subctx_gv11b.c \
|
||||||
gv11b/regops_gv11b.c \
|
gv11b/regops_gv11b.c \
|
||||||
gv11b/therm_gv11b.c \
|
gv11b/therm_gv11b.c \
|
||||||
|
gv11b/ecc_gv11b.c \
|
||||||
gp106/hal_gp106.c \
|
gp106/hal_gp106.c \
|
||||||
gp106/mm_gp106.c \
|
gp106/mm_gp106.c \
|
||||||
gp106/flcn_gp106.c \
|
gp106/flcn_gp106.c \
|
||||||
|
|||||||
369
drivers/gpu/nvgpu/common/ecc.c
Normal file
369
drivers/gpu/nvgpu/common/ecc.c
Normal file
@@ -0,0 +1,369 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "gk20a/gk20a.h"
|
||||||
|
|
||||||
|
static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat)
|
||||||
|
{
|
||||||
|
struct nvgpu_ecc *ecc = &g->ecc;
|
||||||
|
|
||||||
|
nvgpu_init_list_node(&stat->node);
|
||||||
|
|
||||||
|
nvgpu_list_add_tail(&stat->node, &ecc->stats_list);
|
||||||
|
ecc->stats_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nvgpu_ecc_init(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct nvgpu_ecc *ecc = &g->ecc;
|
||||||
|
|
||||||
|
nvgpu_init_list_node(&ecc->stats_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
|
||||||
|
struct nvgpu_ecc_stat ***stat, const char *name)
|
||||||
|
{
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
struct nvgpu_ecc_stat **stats;
|
||||||
|
u32 gpc, tpc;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
|
||||||
|
if (stats == NULL) {
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
|
||||||
|
stats[gpc] = nvgpu_kzalloc(g,
|
||||||
|
sizeof(*stats[gpc]) * gr->gpc_tpc_count[gpc]);
|
||||||
|
if (stats[gpc] == NULL) {
|
||||||
|
err = -ENOMEM;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err != 0) {
|
||||||
|
while (gpc-- != 0u) {
|
||||||
|
nvgpu_kfree(g, stats[gpc]);
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_kfree(g, stats);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
|
||||||
|
for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
|
||||||
|
snprintf(stats[gpc][tpc].name,
|
||||||
|
NVGPU_ECC_STAT_NAME_MAX_SIZE,
|
||||||
|
"gpc%d_tpc%d_%s", gpc, tpc, name);
|
||||||
|
nvgpu_ecc_stat_add(g, &stats[gpc][tpc]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*stat = stats;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
|
||||||
|
struct nvgpu_ecc_stat **stat, const char *name)
|
||||||
|
{
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
struct nvgpu_ecc_stat *stats;
|
||||||
|
u32 gpc;
|
||||||
|
|
||||||
|
stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
|
||||||
|
if (stats == NULL) {
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
|
||||||
|
snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
|
||||||
|
"gpc%d_%s", gpc, name);
|
||||||
|
nvgpu_ecc_stat_add(g, &stats[gpc]);
|
||||||
|
}
|
||||||
|
|
||||||
|
*stat = stats;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvgpu_ecc_counter_init(struct gk20a *g,
|
||||||
|
struct nvgpu_ecc_stat **stat, const char *name)
|
||||||
|
{
|
||||||
|
struct nvgpu_ecc_stat *stats;
|
||||||
|
|
||||||
|
stats = nvgpu_kzalloc(g, sizeof(*stats));
|
||||||
|
if (stats == NULL) {
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
(void)strncpy(stats->name, name, NVGPU_ECC_STAT_NAME_MAX_SIZE - 1);
|
||||||
|
nvgpu_ecc_stat_add(g, stats);
|
||||||
|
*stat = stats;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
|
||||||
|
struct nvgpu_ecc_stat ***stat, const char *name)
|
||||||
|
{
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
struct nvgpu_ecc_stat **stats;
|
||||||
|
u32 ltc, lts;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
stats = nvgpu_kzalloc(g, sizeof(*stats) * g->ltc_count);
|
||||||
|
if (stats == NULL) {
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
for (ltc = 0; ltc < g->ltc_count; ltc++) {
|
||||||
|
stats[ltc] = nvgpu_kzalloc(g,
|
||||||
|
sizeof(*stats[ltc]) * gr->slices_per_ltc);
|
||||||
|
if (stats[ltc] == NULL) {
|
||||||
|
err = -ENOMEM;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err != 0) {
|
||||||
|
while (ltc-- > 0u) {
|
||||||
|
nvgpu_kfree(g, stats[ltc]);
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_kfree(g, stats);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (ltc = 0; ltc < g->ltc_count; ltc++) {
|
||||||
|
for (lts = 0; lts < gr->slices_per_ltc; lts++) {
|
||||||
|
snprintf(stats[ltc][lts].name,
|
||||||
|
NVGPU_ECC_STAT_NAME_MAX_SIZE,
|
||||||
|
"ltc%d_lts%d_%s", ltc, lts, name);
|
||||||
|
nvgpu_ecc_stat_add(g, &stats[ltc][lts]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*stat = stats;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g,
|
||||||
|
struct nvgpu_ecc_stat **stat, const char *name)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int num_fbpa = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
|
||||||
|
struct nvgpu_ecc_stat *stats;
|
||||||
|
|
||||||
|
stats = nvgpu_kzalloc(g, sizeof(*stats) * num_fbpa);
|
||||||
|
if (stats == NULL) {
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < num_fbpa; i++) {
|
||||||
|
snprintf(stats[i].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
|
||||||
|
"fbpa%d_%s", i, name);
|
||||||
|
nvgpu_ecc_stat_add(g, &stats[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
*stat = stats;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* release all ecc_stat */
|
||||||
|
void nvgpu_ecc_free(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct nvgpu_ecc *ecc = &g->ecc;
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
u32 i;
|
||||||
|
|
||||||
|
for (i = 0; i < gr->gpc_count; i++) {
|
||||||
|
if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.sm_lrf_ecc_double_err_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.sm_shm_ecc_sec_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.sm_shm_ecc_sed_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.sm_shm_ecc_ded_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.tex_ecc_total_sec_pipe0_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.tex_ecc_total_ded_pipe0_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.tex_unique_ecc_sec_pipe0_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.tex_unique_ecc_ded_pipe0_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.tex_ecc_total_sec_pipe1_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.tex_ecc_total_ded_pipe1_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.tex_unique_ecc_sec_pipe1_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.tex_unique_ecc_ded_pipe1_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.sm_l1_tag_ecc_corrected_err_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.sm_l1_tag_ecc_uncorrected_err_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.sm_cbu_ecc_corrected_err_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.sm_cbu_ecc_uncorrected_err_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.sm_l1_data_ecc_corrected_err_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.sm_l1_data_ecc_uncorrected_err_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.sm_icache_ecc_corrected_err_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->gr.sm_icache_ecc_uncorrected_err_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count);
|
||||||
|
|
||||||
|
nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_corrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_uncorrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.gpccs_ecc_corrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.gpccs_ecc_uncorrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_corrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_uncorrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.fecs_ecc_corrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->gr.fecs_ecc_uncorrected_err_count);
|
||||||
|
|
||||||
|
for (i = 0; i < g->ltc_count; i++) {
|
||||||
|
if (ecc->ltc.ecc_sec_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->ltc.ecc_sec_count[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecc->ltc.ecc_ded_count != NULL) {
|
||||||
|
nvgpu_kfree(g, ecc->ltc.ecc_ded_count[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nvgpu_kfree(g, ecc->ltc.ecc_sec_count);
|
||||||
|
nvgpu_kfree(g, ecc->ltc.ecc_ded_count);
|
||||||
|
|
||||||
|
nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_corrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_uncorrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_corrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_uncorrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_corrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_uncorrected_err_count);
|
||||||
|
|
||||||
|
nvgpu_kfree(g, ecc->pmu.pmu_ecc_corrected_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->pmu.pmu_ecc_uncorrected_err_count);
|
||||||
|
|
||||||
|
nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_sec_err_count);
|
||||||
|
nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_ded_err_count);
|
||||||
|
|
||||||
|
(void)memset(ecc, 0, sizeof(*ecc));
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvgpu_ecc_init_support(struct gk20a *g)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (g->ops.gr.init_ecc == NULL) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_ecc_init(g);
|
||||||
|
err = g->ops.gr.init_ecc(g);
|
||||||
|
if (err != 0) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = nvgpu_ecc_sysfs_init(g);
|
||||||
|
if (err != 0) {
|
||||||
|
nvgpu_ecc_free(g);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvgpu_ecc_remove_support(struct gk20a *g)
|
||||||
|
{
|
||||||
|
if (g->ops.gr.init_ecc == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_ecc_sysfs_remove(g);
|
||||||
|
nvgpu_ecc_free(g);
|
||||||
|
}
|
||||||
@@ -445,9 +445,9 @@ void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status)
|
|||||||
uncorrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_uncorrected_err_count_total_s());
|
uncorrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_uncorrected_err_count_total_s());
|
||||||
|
|
||||||
|
|
||||||
g->ecc.fb.mmu_l2tlb_corrected_err_count.counters[0] +=
|
g->ecc.fb.mmu_l2tlb_ecc_corrected_err_count[0].counter +=
|
||||||
corrected_delta;
|
corrected_delta;
|
||||||
g->ecc.fb.mmu_l2tlb_uncorrected_err_count.counters[0] +=
|
g->ecc.fb.mmu_l2tlb_ecc_uncorrected_err_count[0].counter +=
|
||||||
uncorrected_delta;
|
uncorrected_delta;
|
||||||
|
|
||||||
if (ecc_status & fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m())
|
if (ecc_status & fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m())
|
||||||
@@ -461,8 +461,8 @@ void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status)
|
|||||||
"ecc error address: 0x%x", ecc_addr);
|
"ecc error address: 0x%x", ecc_addr);
|
||||||
nvgpu_log(g, gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_intr,
|
||||||
"ecc error count corrected: %d, uncorrected %d",
|
"ecc error count corrected: %d, uncorrected %d",
|
||||||
g->ecc.fb.mmu_l2tlb_corrected_err_count.counters[0],
|
g->ecc.fb.mmu_l2tlb_ecc_corrected_err_count[0].counter,
|
||||||
g->ecc.fb.mmu_l2tlb_uncorrected_err_count.counters[0]);
|
g->ecc.fb.mmu_l2tlb_ecc_uncorrected_err_count[0].counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
|
void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
|
||||||
@@ -503,9 +503,9 @@ void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
|
|||||||
uncorrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_uncorrected_err_count_total_s());
|
uncorrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_uncorrected_err_count_total_s());
|
||||||
|
|
||||||
|
|
||||||
g->ecc.fb.mmu_hubtlb_corrected_err_count.counters[0] +=
|
g->ecc.fb.mmu_hubtlb_ecc_corrected_err_count[0].counter +=
|
||||||
corrected_delta;
|
corrected_delta;
|
||||||
g->ecc.fb.mmu_hubtlb_uncorrected_err_count.counters[0] +=
|
g->ecc.fb.mmu_hubtlb_ecc_uncorrected_err_count[0].counter +=
|
||||||
uncorrected_delta;
|
uncorrected_delta;
|
||||||
|
|
||||||
if (ecc_status & fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m())
|
if (ecc_status & fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m())
|
||||||
@@ -519,8 +519,8 @@ void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
|
|||||||
"ecc error address: 0x%x", ecc_addr);
|
"ecc error address: 0x%x", ecc_addr);
|
||||||
nvgpu_log(g, gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_intr,
|
||||||
"ecc error count corrected: %d, uncorrected %d",
|
"ecc error count corrected: %d, uncorrected %d",
|
||||||
g->ecc.fb.mmu_hubtlb_corrected_err_count.counters[0],
|
g->ecc.fb.mmu_hubtlb_ecc_corrected_err_count[0].counter,
|
||||||
g->ecc.fb.mmu_hubtlb_uncorrected_err_count.counters[0]);
|
g->ecc.fb.mmu_hubtlb_ecc_uncorrected_err_count[0].counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
|
void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
|
||||||
@@ -561,9 +561,9 @@ void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
|
|||||||
uncorrected_delta += (0x1UL << fb_mmu_fillunit_ecc_uncorrected_err_count_total_s());
|
uncorrected_delta += (0x1UL << fb_mmu_fillunit_ecc_uncorrected_err_count_total_s());
|
||||||
|
|
||||||
|
|
||||||
g->ecc.fb.mmu_fillunit_corrected_err_count.counters[0] +=
|
g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter +=
|
||||||
corrected_delta;
|
corrected_delta;
|
||||||
g->ecc.fb.mmu_fillunit_uncorrected_err_count.counters[0] +=
|
g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter +=
|
||||||
uncorrected_delta;
|
uncorrected_delta;
|
||||||
|
|
||||||
if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m())
|
if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m())
|
||||||
@@ -582,8 +582,8 @@ void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
|
|||||||
"ecc error address: 0x%x", ecc_addr);
|
"ecc error address: 0x%x", ecc_addr);
|
||||||
nvgpu_log(g, gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_intr,
|
||||||
"ecc error count corrected: %d, uncorrected %d",
|
"ecc error count corrected: %d, uncorrected %d",
|
||||||
g->ecc.fb.mmu_fillunit_corrected_err_count.counters[0],
|
g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter,
|
||||||
g->ecc.fb.mmu_fillunit_uncorrected_err_count.counters[0]);
|
g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gv11b_fb_parse_mmfault(struct mmu_fault_info *mmfault)
|
static void gv11b_fb_parse_mmfault(struct mmu_fault_info *mmfault)
|
||||||
|
|||||||
@@ -25,8 +25,19 @@
|
|||||||
* for an implementation.
|
* for an implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <nvgpu/ecc.h>
|
||||||
|
|
||||||
#include "gk20a/dbg_gpu_gk20a.h"
|
#include "gk20a/dbg_gpu_gk20a.h"
|
||||||
|
|
||||||
void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s)
|
void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int nvgpu_ecc_sysfs_init(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvgpu_ecc_sysfs_remove(struct gk20a *g)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,102 +0,0 @@
|
|||||||
/*
|
|
||||||
* GK20A ECC
|
|
||||||
*
|
|
||||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
#ifndef ECC_GK20A_H
|
|
||||||
#define ECC_GK20A_H
|
|
||||||
|
|
||||||
struct gk20a_ecc_stat {
|
|
||||||
char **names;
|
|
||||||
u32 *counters;
|
|
||||||
u32 count;
|
|
||||||
#ifdef CONFIG_SYSFS
|
|
||||||
struct hlist_node hash_node;
|
|
||||||
struct device_attribute *attr_array;
|
|
||||||
#endif
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ecc_gk20a {
|
|
||||||
/* Stats per engine */
|
|
||||||
struct {
|
|
||||||
struct gk20a_ecc_stat sm_lrf_single_err_count;
|
|
||||||
struct gk20a_ecc_stat sm_lrf_double_err_count;
|
|
||||||
|
|
||||||
struct gk20a_ecc_stat sm_shm_sec_count;
|
|
||||||
struct gk20a_ecc_stat sm_shm_sed_count;
|
|
||||||
struct gk20a_ecc_stat sm_shm_ded_count;
|
|
||||||
|
|
||||||
struct gk20a_ecc_stat tex_total_sec_pipe0_count;
|
|
||||||
struct gk20a_ecc_stat tex_total_ded_pipe0_count;
|
|
||||||
struct gk20a_ecc_stat tex_unique_sec_pipe0_count;
|
|
||||||
struct gk20a_ecc_stat tex_unique_ded_pipe0_count;
|
|
||||||
struct gk20a_ecc_stat tex_total_sec_pipe1_count;
|
|
||||||
struct gk20a_ecc_stat tex_total_ded_pipe1_count;
|
|
||||||
struct gk20a_ecc_stat tex_unique_sec_pipe1_count;
|
|
||||||
struct gk20a_ecc_stat tex_unique_ded_pipe1_count;
|
|
||||||
|
|
||||||
struct gk20a_ecc_stat sm_l1_tag_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat sm_l1_tag_uncorrected_err_count;
|
|
||||||
struct gk20a_ecc_stat sm_cbu_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat sm_cbu_uncorrected_err_count;
|
|
||||||
struct gk20a_ecc_stat sm_l1_data_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat sm_l1_data_uncorrected_err_count;
|
|
||||||
struct gk20a_ecc_stat sm_icache_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat sm_icache_uncorrected_err_count;
|
|
||||||
struct gk20a_ecc_stat gcc_l15_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat gcc_l15_uncorrected_err_count;
|
|
||||||
struct gk20a_ecc_stat fecs_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat fecs_uncorrected_err_count;
|
|
||||||
struct gk20a_ecc_stat gpccs_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat gpccs_uncorrected_err_count;
|
|
||||||
struct gk20a_ecc_stat mmu_l1tlb_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat mmu_l1tlb_uncorrected_err_count;
|
|
||||||
} gr;
|
|
||||||
|
|
||||||
struct {
|
|
||||||
struct gk20a_ecc_stat l2_sec_count;
|
|
||||||
struct gk20a_ecc_stat l2_ded_count;
|
|
||||||
struct gk20a_ecc_stat l2_cache_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat l2_cache_uncorrected_err_count;
|
|
||||||
} ltc;
|
|
||||||
|
|
||||||
struct {
|
|
||||||
struct gk20a_ecc_stat mmu_l2tlb_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat mmu_l2tlb_uncorrected_err_count;
|
|
||||||
struct gk20a_ecc_stat mmu_hubtlb_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat mmu_hubtlb_uncorrected_err_count;
|
|
||||||
struct gk20a_ecc_stat mmu_fillunit_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat mmu_fillunit_uncorrected_err_count;
|
|
||||||
} fb;
|
|
||||||
|
|
||||||
struct {
|
|
||||||
struct gk20a_ecc_stat pmu_corrected_err_count;
|
|
||||||
struct gk20a_ecc_stat pmu_uncorrected_err_count;
|
|
||||||
} pmu;
|
|
||||||
|
|
||||||
struct {
|
|
||||||
struct gk20a_ecc_stat fbpa_sec_err_count;
|
|
||||||
struct gk20a_ecc_stat fbpa_ded_err_count;
|
|
||||||
} fbpa;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /*__ECC_GK20A_H__*/
|
|
||||||
@@ -35,7 +35,6 @@ struct gk20a_ctxsw_trace;
|
|||||||
struct acr_desc;
|
struct acr_desc;
|
||||||
struct nvgpu_mem_alloc_tracker;
|
struct nvgpu_mem_alloc_tracker;
|
||||||
struct dbg_profiler_object_data;
|
struct dbg_profiler_object_data;
|
||||||
struct ecc_gk20a;
|
|
||||||
struct gk20a_debug_output;
|
struct gk20a_debug_output;
|
||||||
struct nvgpu_clk_pll_debug_data;
|
struct nvgpu_clk_pll_debug_data;
|
||||||
struct nvgpu_nvhost_dev;
|
struct nvgpu_nvhost_dev;
|
||||||
@@ -64,6 +63,7 @@ struct nvgpu_ctxsw_trace_filter;
|
|||||||
#include <nvgpu/clk_arb.h>
|
#include <nvgpu/clk_arb.h>
|
||||||
#include <nvgpu/nvlink.h>
|
#include <nvgpu/nvlink.h>
|
||||||
#include <nvgpu/sim.h>
|
#include <nvgpu/sim.h>
|
||||||
|
#include <nvgpu/ecc.h>
|
||||||
|
|
||||||
#include "clk_gk20a.h"
|
#include "clk_gk20a.h"
|
||||||
#include "ce2_gk20a.h"
|
#include "ce2_gk20a.h"
|
||||||
@@ -77,7 +77,6 @@ struct nvgpu_ctxsw_trace_filter;
|
|||||||
#include "perf/perf.h"
|
#include "perf/perf.h"
|
||||||
#include "pmgr/pmgr.h"
|
#include "pmgr/pmgr.h"
|
||||||
#include "therm/thrm.h"
|
#include "therm/thrm.h"
|
||||||
#include "ecc_gk20a.h"
|
|
||||||
|
|
||||||
/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds.
|
/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds.
|
||||||
32 ns is the resolution of ptimer. */
|
32 ns is the resolution of ptimer. */
|
||||||
@@ -384,8 +383,7 @@ struct gpu_ops {
|
|||||||
u32 gpc_exception);
|
u32 gpc_exception);
|
||||||
void (*enable_gpc_exceptions)(struct gk20a *g);
|
void (*enable_gpc_exceptions)(struct gk20a *g);
|
||||||
void (*enable_exceptions)(struct gk20a *g);
|
void (*enable_exceptions)(struct gk20a *g);
|
||||||
void (*create_gr_sysfs)(struct gk20a *g);
|
int (*init_ecc)(struct gk20a *g);
|
||||||
void (*remove_gr_sysfs)(struct gk20a *g);
|
|
||||||
u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g);
|
u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g);
|
||||||
int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc,
|
int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc,
|
||||||
u32 sm, struct channel_gk20a *fault_ch);
|
u32 sm, struct channel_gk20a *fault_ch);
|
||||||
@@ -1385,7 +1383,7 @@ struct gk20a {
|
|||||||
struct mm_gk20a mm;
|
struct mm_gk20a mm;
|
||||||
struct nvgpu_pmu pmu;
|
struct nvgpu_pmu pmu;
|
||||||
struct acr_desc acr;
|
struct acr_desc acr;
|
||||||
struct ecc_gk20a ecc;
|
struct nvgpu_ecc ecc;
|
||||||
struct clk_pmupstate clk_pmu;
|
struct clk_pmupstate clk_pmu;
|
||||||
struct perf_pmupstate perf_pmu;
|
struct perf_pmupstate perf_pmu;
|
||||||
struct pmgr_pmupstate pmgr_pmu;
|
struct pmgr_pmupstate pmgr_pmu;
|
||||||
|
|||||||
@@ -38,6 +38,7 @@
|
|||||||
#include <nvgpu/mm.h>
|
#include <nvgpu/mm.h>
|
||||||
#include <nvgpu/ctxsw_trace.h>
|
#include <nvgpu/ctxsw_trace.h>
|
||||||
#include <nvgpu/error_notifier.h>
|
#include <nvgpu/error_notifier.h>
|
||||||
|
#include <nvgpu/ecc.h>
|
||||||
|
|
||||||
#include "gk20a.h"
|
#include "gk20a.h"
|
||||||
#include "gr_gk20a.h"
|
#include "gr_gk20a.h"
|
||||||
@@ -3127,6 +3128,8 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
|
|||||||
gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
|
gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
|
||||||
|
|
||||||
gk20a_comptag_allocator_destroy(g, &gr->comp_tags);
|
gk20a_comptag_allocator_destroy(g, &gr->comp_tags);
|
||||||
|
|
||||||
|
nvgpu_ecc_remove_support(g);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
||||||
@@ -4872,8 +4875,9 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
|
|||||||
gr->remove_support = gk20a_remove_gr_support;
|
gr->remove_support = gk20a_remove_gr_support;
|
||||||
gr->sw_ready = true;
|
gr->sw_ready = true;
|
||||||
|
|
||||||
if (g->ops.gr.create_gr_sysfs)
|
err = nvgpu_ecc_init_support(g);
|
||||||
g->ops.gr.create_gr_sysfs(g);
|
if (err)
|
||||||
|
goto clean_up;
|
||||||
|
|
||||||
nvgpu_log_fn(g, "done");
|
nvgpu_log_fn(g, "done");
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@@ -382,10 +382,6 @@ static const struct gpu_ops gp106_ops = {
|
|||||||
.update_boosted_ctx = NULL,
|
.update_boosted_ctx = NULL,
|
||||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||||
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
||||||
#ifdef CONFIG_SYSFS
|
|
||||||
.create_gr_sysfs = NULL,
|
|
||||||
.remove_gr_sysfs = NULL,
|
|
||||||
#endif
|
|
||||||
.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode,
|
.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode,
|
||||||
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
||||||
.fecs_host_int_enable = gr_gk20a_fecs_host_int_enable,
|
.fecs_host_int_enable = gr_gk20a_fecs_host_int_enable,
|
||||||
|
|||||||
106
drivers/gpu/nvgpu/gp10b/ecc_gp10b.c
Normal file
106
drivers/gpu/nvgpu/gp10b/ecc_gp10b.c
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <nvgpu/ecc.h>
|
||||||
|
|
||||||
|
#include "gk20a/gk20a.h"
|
||||||
|
#include "gp10b/ecc_gp10b.h"
|
||||||
|
|
||||||
|
int gp10b_ecc_init(struct gk20a *g)
|
||||||
|
{
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sec_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sed_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_ded_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe0_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe0_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe0_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe0_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe1_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe1_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe1_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe1_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
|
if (err != 0) {
|
||||||
|
nvgpu_err(g, "ecc counter allocate failed, err=%d", err);
|
||||||
|
nvgpu_ecc_free(g);
|
||||||
|
}
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
28
drivers/gpu/nvgpu/gp10b/ecc_gp10b.h
Normal file
28
drivers/gpu/nvgpu/gp10b/ecc_gp10b.h
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __ECC_GP10B_H__
|
||||||
|
#define __ECC_GP10B_H__
|
||||||
|
|
||||||
|
int gp10b_ecc_init(struct gk20a *g);
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -176,7 +176,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
|
|||||||
lrf_ecc_ded_status,
|
lrf_ecc_ded_status,
|
||||||
&lrf_single_count_delta,
|
&lrf_single_count_delta,
|
||||||
lrf_double_count_delta);
|
lrf_double_count_delta);
|
||||||
g->ecc.gr.sm_lrf_single_err_count.counters[tpc] +=
|
g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter +=
|
||||||
lrf_single_count_delta;
|
lrf_single_count_delta;
|
||||||
}
|
}
|
||||||
if (lrf_ecc_ded_status) {
|
if (lrf_ecc_ded_status) {
|
||||||
@@ -188,7 +188,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
|
|||||||
lrf_ecc_ded_status,
|
lrf_ecc_ded_status,
|
||||||
&lrf_double_count_delta,
|
&lrf_double_count_delta,
|
||||||
lrf_single_count_delta);
|
lrf_single_count_delta);
|
||||||
g->ecc.gr.sm_lrf_double_err_count.counters[tpc] +=
|
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
|
||||||
lrf_double_count_delta;
|
lrf_double_count_delta;
|
||||||
}
|
}
|
||||||
gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
|
gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
|
||||||
@@ -213,9 +213,9 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
|
|||||||
ecc_stats_reg_val =
|
ecc_stats_reg_val =
|
||||||
gk20a_readl(g,
|
gk20a_readl(g,
|
||||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
|
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
|
||||||
g->ecc.gr.sm_shm_sec_count.counters[tpc] +=
|
g->ecc.gr.sm_shm_ecc_sec_count[gpc][tpc].counter +=
|
||||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val);
|
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val);
|
||||||
g->ecc.gr.sm_shm_sed_count.counters[tpc] +=
|
g->ecc.gr.sm_shm_ecc_sed_count[gpc][tpc].counter +=
|
||||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val);
|
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val);
|
||||||
ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() |
|
ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() |
|
||||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m());
|
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m());
|
||||||
@@ -235,7 +235,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
|
|||||||
ecc_stats_reg_val =
|
ecc_stats_reg_val =
|
||||||
gk20a_readl(g,
|
gk20a_readl(g,
|
||||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
|
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
|
||||||
g->ecc.gr.sm_shm_ded_count.counters[tpc] +=
|
g->ecc.gr.sm_shm_ecc_ded_count[gpc][tpc].counter +=
|
||||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val);
|
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val);
|
||||||
ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m());
|
ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m());
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
@@ -276,7 +276,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
|
|
||||||
ecc_stats_reg_val = gk20a_readl(g,
|
ecc_stats_reg_val = gk20a_readl(g,
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
|
||||||
g->ecc.gr.tex_total_sec_pipe0_count.counters[tpc] +=
|
g->ecc.gr.tex_ecc_total_sec_pipe0_count[gpc][tpc].counter +=
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
|
||||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
|
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
@@ -285,7 +285,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
|
|
||||||
ecc_stats_reg_val = gk20a_readl(g,
|
ecc_stats_reg_val = gk20a_readl(g,
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
|
||||||
g->ecc.gr.tex_unique_sec_pipe0_count.counters[tpc] +=
|
g->ecc.gr.tex_unique_ecc_sec_pipe0_count[gpc][tpc].counter +=
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
|
||||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
|
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
@@ -300,7 +300,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
|
|
||||||
ecc_stats_reg_val = gk20a_readl(g,
|
ecc_stats_reg_val = gk20a_readl(g,
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
|
||||||
g->ecc.gr.tex_total_sec_pipe1_count.counters[tpc] +=
|
g->ecc.gr.tex_ecc_total_sec_pipe1_count[gpc][tpc].counter +=
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
|
||||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
|
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
@@ -309,7 +309,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
|
|
||||||
ecc_stats_reg_val = gk20a_readl(g,
|
ecc_stats_reg_val = gk20a_readl(g,
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
|
||||||
g->ecc.gr.tex_unique_sec_pipe1_count.counters[tpc] +=
|
g->ecc.gr.tex_unique_ecc_sec_pipe1_count[gpc][tpc].counter +=
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
|
||||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
|
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
@@ -332,7 +332,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
|
|
||||||
ecc_stats_reg_val = gk20a_readl(g,
|
ecc_stats_reg_val = gk20a_readl(g,
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
|
||||||
g->ecc.gr.tex_total_ded_pipe0_count.counters[tpc] +=
|
g->ecc.gr.tex_ecc_total_ded_pipe0_count[gpc][tpc].counter +=
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
|
||||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
|
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
@@ -341,7 +341,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
|
|
||||||
ecc_stats_reg_val = gk20a_readl(g,
|
ecc_stats_reg_val = gk20a_readl(g,
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
|
||||||
g->ecc.gr.tex_unique_ded_pipe0_count.counters[tpc] +=
|
g->ecc.gr.tex_unique_ecc_ded_pipe0_count[gpc][tpc].counter +=
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
|
||||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
|
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
@@ -356,7 +356,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
|
|
||||||
ecc_stats_reg_val = gk20a_readl(g,
|
ecc_stats_reg_val = gk20a_readl(g,
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
|
||||||
g->ecc.gr.tex_total_ded_pipe1_count.counters[tpc] +=
|
g->ecc.gr.tex_ecc_total_ded_pipe1_count[gpc][tpc].counter +=
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
|
||||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
|
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
@@ -365,7 +365,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
|
|
||||||
ecc_stats_reg_val = gk20a_readl(g,
|
ecc_stats_reg_val = gk20a_readl(g,
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
|
||||||
g->ecc.gr.tex_unique_ded_pipe1_count.counters[tpc] +=
|
g->ecc.gr.tex_unique_ecc_ded_pipe1_count[gpc][tpc].counter +=
|
||||||
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
|
gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
|
||||||
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
|
ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
|
|||||||
@@ -56,6 +56,7 @@
|
|||||||
#include "gp10b/regops_gp10b.h"
|
#include "gp10b/regops_gp10b.h"
|
||||||
#include "gp10b/therm_gp10b.h"
|
#include "gp10b/therm_gp10b.h"
|
||||||
#include "gp10b/priv_ring_gp10b.h"
|
#include "gp10b/priv_ring_gp10b.h"
|
||||||
|
#include "gp10b/ecc_gp10b.h"
|
||||||
|
|
||||||
#include "gm20b/ltc_gm20b.h"
|
#include "gm20b/ltc_gm20b.h"
|
||||||
#include "gm20b/gr_gm20b.h"
|
#include "gm20b/gr_gm20b.h"
|
||||||
@@ -339,11 +340,8 @@ static const struct gpu_ops gp10b_ops = {
|
|||||||
.init_preemption_state = gr_gp10b_init_preemption_state,
|
.init_preemption_state = gr_gp10b_init_preemption_state,
|
||||||
.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
|
.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
|
||||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||||
#ifdef CONFIG_SYSFS
|
|
||||||
.create_gr_sysfs = gr_gp10b_create_sysfs,
|
|
||||||
.remove_gr_sysfs = gr_gp10b_remove_sysfs,
|
|
||||||
#endif
|
|
||||||
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
|
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
|
||||||
|
.init_ecc = gp10b_ecc_init,
|
||||||
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
|
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
|
||||||
.init_gfxp_wfi_timeout_count =
|
.init_gfxp_wfi_timeout_count =
|
||||||
gr_gp10b_init_gfxp_wfi_timeout_count,
|
gr_gp10b_init_gfxp_wfi_timeout_count,
|
||||||
|
|||||||
@@ -249,7 +249,7 @@ void gp10b_ltc_isr(struct gk20a *g)
|
|||||||
ecc_stats_reg_val =
|
ecc_stats_reg_val =
|
||||||
gk20a_readl(g,
|
gk20a_readl(g,
|
||||||
ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
|
ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
|
||||||
g->ecc.ltc.l2_sec_count.counters[ltc*g->ltc_count + slice] +=
|
g->ecc.ltc.ecc_sec_count[ltc][slice].counter +=
|
||||||
ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val);
|
ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val);
|
||||||
ecc_stats_reg_val &=
|
ecc_stats_reg_val &=
|
||||||
~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m());
|
~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m());
|
||||||
@@ -268,7 +268,7 @@ void gp10b_ltc_isr(struct gk20a *g)
|
|||||||
ecc_stats_reg_val =
|
ecc_stats_reg_val =
|
||||||
gk20a_readl(g,
|
gk20a_readl(g,
|
||||||
ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
|
ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
|
||||||
g->ecc.ltc.l2_ded_count.counters[ltc*g->ltc_count + slice] +=
|
g->ecc.ltc.ecc_ded_count[ltc][slice].counter +=
|
||||||
ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val);
|
ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val);
|
||||||
ecc_stats_reg_val &=
|
ecc_stats_reg_val &=
|
||||||
~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m());
|
~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m());
|
||||||
|
|||||||
@@ -415,10 +415,6 @@ static const struct gpu_ops gv100_ops = {
|
|||||||
.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
|
.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
|
||||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||||
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
||||||
#ifdef CONFIG_SYSFS
|
|
||||||
.create_gr_sysfs = gr_gv11b_create_sysfs,
|
|
||||||
.remove_gr_sysfs = gr_gv11b_remove_sysfs,
|
|
||||||
#endif
|
|
||||||
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
|
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
|
||||||
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
|
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
|
||||||
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
|
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
|
||||||
|
|||||||
181
drivers/gpu/nvgpu/gv11b/ecc_gv11b.c
Normal file
181
drivers/gpu/nvgpu/gv11b/ecc_gv11b.c
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <nvgpu/ecc.h>
|
||||||
|
|
||||||
|
#include "gk20a/gk20a.h"
|
||||||
|
#include "gv11b/ecc_gv11b.h"
|
||||||
|
|
||||||
|
int gv11b_ecc_init(struct gk20a *g)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||||
|
sm_l1_tag_ecc_corrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||||
|
sm_l1_tag_ecc_uncorrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||||
|
sm_cbu_ecc_corrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||||
|
sm_cbu_ecc_uncorrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||||
|
sm_l1_data_ecc_corrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||||
|
sm_l1_data_ecc_uncorrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||||
|
sm_icache_ecc_corrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
|
||||||
|
sm_icache_ecc_uncorrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
|
||||||
|
gcc_l15_ecc_corrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
|
||||||
|
gcc_l15_ecc_uncorrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_uncorrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_corrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
|
||||||
|
gpccs_ecc_uncorrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
|
||||||
|
gpccs_ecc_corrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
|
||||||
|
mmu_l1tlb_ecc_uncorrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
|
||||||
|
mmu_l1tlb_ecc_corrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_uncorrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_corrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_uncorrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_corrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_FB(
|
||||||
|
mmu_fillunit_ecc_uncorrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_FB(
|
||||||
|
mmu_fillunit_ecc_corrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_uncorrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_corrected_err_count);
|
||||||
|
if (err != 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
|
if (err != 0) {
|
||||||
|
nvgpu_err(g, "ecc counter allocate failed, err=%d", err);
|
||||||
|
nvgpu_ecc_free(g);
|
||||||
|
}
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
28
drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
Normal file
28
drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __ECC_GV11B_H__
|
||||||
|
#define __ECC_GV11B_H__
|
||||||
|
|
||||||
|
int gv11b_ecc_init(struct gk20a *g);
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -198,7 +198,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
l1_tag_corrected_err_count_delta +=
|
l1_tag_corrected_err_count_delta +=
|
||||||
(is_l1_tag_ecc_corrected_total_err_overflow <<
|
(is_l1_tag_ecc_corrected_total_err_overflow <<
|
||||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s());
|
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s());
|
||||||
g->ecc.gr.sm_l1_tag_corrected_err_count.counters[tpc] +=
|
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter +=
|
||||||
l1_tag_corrected_err_count_delta;
|
l1_tag_corrected_err_count_delta;
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset,
|
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset,
|
||||||
@@ -213,7 +213,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
l1_tag_uncorrected_err_count_delta +=
|
l1_tag_uncorrected_err_count_delta +=
|
||||||
(is_l1_tag_ecc_uncorrected_total_err_overflow <<
|
(is_l1_tag_ecc_uncorrected_total_err_overflow <<
|
||||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s());
|
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s());
|
||||||
g->ecc.gr.sm_l1_tag_uncorrected_err_count.counters[tpc] +=
|
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter +=
|
||||||
l1_tag_uncorrected_err_count_delta;
|
l1_tag_uncorrected_err_count_delta;
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset,
|
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset,
|
||||||
@@ -290,7 +290,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
lrf_corrected_err_count_delta +=
|
lrf_corrected_err_count_delta +=
|
||||||
(is_lrf_ecc_corrected_total_err_overflow <<
|
(is_lrf_ecc_corrected_total_err_overflow <<
|
||||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s());
|
gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s());
|
||||||
g->ecc.gr.sm_lrf_single_err_count.counters[tpc] +=
|
g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter +=
|
||||||
lrf_corrected_err_count_delta;
|
lrf_corrected_err_count_delta;
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset,
|
gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset,
|
||||||
@@ -305,7 +305,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
lrf_uncorrected_err_count_delta +=
|
lrf_uncorrected_err_count_delta +=
|
||||||
(is_lrf_ecc_uncorrected_total_err_overflow <<
|
(is_lrf_ecc_uncorrected_total_err_overflow <<
|
||||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s());
|
gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s());
|
||||||
g->ecc.gr.sm_lrf_double_err_count.counters[tpc] +=
|
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
|
||||||
lrf_uncorrected_err_count_delta;
|
lrf_uncorrected_err_count_delta;
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset,
|
gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset,
|
||||||
@@ -449,7 +449,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
cbu_corrected_err_count_delta +=
|
cbu_corrected_err_count_delta +=
|
||||||
(is_cbu_ecc_corrected_total_err_overflow <<
|
(is_cbu_ecc_corrected_total_err_overflow <<
|
||||||
gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s());
|
gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s());
|
||||||
g->ecc.gr.sm_cbu_corrected_err_count.counters[tpc] +=
|
g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter +=
|
||||||
cbu_corrected_err_count_delta;
|
cbu_corrected_err_count_delta;
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset,
|
gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset,
|
||||||
@@ -464,7 +464,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
cbu_uncorrected_err_count_delta +=
|
cbu_uncorrected_err_count_delta +=
|
||||||
(is_cbu_ecc_uncorrected_total_err_overflow <<
|
(is_cbu_ecc_uncorrected_total_err_overflow <<
|
||||||
gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s());
|
gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s());
|
||||||
g->ecc.gr.sm_cbu_uncorrected_err_count.counters[tpc] +=
|
g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter +=
|
||||||
cbu_uncorrected_err_count_delta;
|
cbu_uncorrected_err_count_delta;
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset,
|
gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset,
|
||||||
@@ -529,7 +529,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
l1_data_corrected_err_count_delta +=
|
l1_data_corrected_err_count_delta +=
|
||||||
(is_l1_data_ecc_corrected_total_err_overflow <<
|
(is_l1_data_ecc_corrected_total_err_overflow <<
|
||||||
gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s());
|
gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s());
|
||||||
g->ecc.gr.sm_l1_data_corrected_err_count.counters[tpc] +=
|
g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter +=
|
||||||
l1_data_corrected_err_count_delta;
|
l1_data_corrected_err_count_delta;
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset,
|
gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset,
|
||||||
@@ -544,7 +544,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
l1_data_uncorrected_err_count_delta +=
|
l1_data_uncorrected_err_count_delta +=
|
||||||
(is_l1_data_ecc_uncorrected_total_err_overflow <<
|
(is_l1_data_ecc_uncorrected_total_err_overflow <<
|
||||||
gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s());
|
gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s());
|
||||||
g->ecc.gr.sm_l1_data_uncorrected_err_count.counters[tpc] +=
|
g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter +=
|
||||||
l1_data_uncorrected_err_count_delta;
|
l1_data_uncorrected_err_count_delta;
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset,
|
gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset,
|
||||||
@@ -613,7 +613,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
icache_corrected_err_count_delta +=
|
icache_corrected_err_count_delta +=
|
||||||
(is_icache_ecc_corrected_total_err_overflow <<
|
(is_icache_ecc_corrected_total_err_overflow <<
|
||||||
gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s());
|
gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s());
|
||||||
g->ecc.gr.sm_icache_corrected_err_count.counters[tpc] +=
|
g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter +=
|
||||||
icache_corrected_err_count_delta;
|
icache_corrected_err_count_delta;
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset,
|
gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset,
|
||||||
@@ -628,7 +628,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
icache_uncorrected_err_count_delta +=
|
icache_uncorrected_err_count_delta +=
|
||||||
(is_icache_ecc_uncorrected_total_err_overflow <<
|
(is_icache_ecc_uncorrected_total_err_overflow <<
|
||||||
gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s());
|
gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s());
|
||||||
g->ecc.gr.sm_icache_uncorrected_err_count.counters[tpc] +=
|
g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter +=
|
||||||
icache_uncorrected_err_count_delta;
|
icache_uncorrected_err_count_delta;
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset,
|
gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset,
|
||||||
@@ -717,7 +717,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
gcc_l15_corrected_err_count_delta +=
|
gcc_l15_corrected_err_count_delta +=
|
||||||
(is_gcc_l15_ecc_corrected_total_err_overflow <<
|
(is_gcc_l15_ecc_corrected_total_err_overflow <<
|
||||||
gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s());
|
gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s());
|
||||||
g->ecc.gr.gcc_l15_corrected_err_count.counters[gpc] +=
|
g->ecc.gr.gcc_l15_ecc_corrected_err_count[gpc].counter +=
|
||||||
gcc_l15_corrected_err_count_delta;
|
gcc_l15_corrected_err_count_delta;
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset,
|
gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset,
|
||||||
@@ -732,7 +732,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
|||||||
gcc_l15_uncorrected_err_count_delta +=
|
gcc_l15_uncorrected_err_count_delta +=
|
||||||
(is_gcc_l15_ecc_uncorrected_total_err_overflow <<
|
(is_gcc_l15_ecc_uncorrected_total_err_overflow <<
|
||||||
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s());
|
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s());
|
||||||
g->ecc.gr.gcc_l15_uncorrected_err_count.counters[gpc] +=
|
g->ecc.gr.gcc_l15_ecc_uncorrected_err_count[gpc].counter +=
|
||||||
gcc_l15_uncorrected_err_count_delta;
|
gcc_l15_uncorrected_err_count_delta;
|
||||||
gk20a_writel(g,
|
gk20a_writel(g,
|
||||||
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset,
|
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset,
|
||||||
@@ -802,9 +802,9 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc,
|
|||||||
uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s());
|
uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s());
|
||||||
|
|
||||||
|
|
||||||
g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc] +=
|
g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter +=
|
||||||
corrected_delta;
|
corrected_delta;
|
||||||
g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc] +=
|
g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter +=
|
||||||
uncorrected_delta;
|
uncorrected_delta;
|
||||||
nvgpu_log(g, gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_intr,
|
||||||
"mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
|
"mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
|
||||||
@@ -824,8 +824,8 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc,
|
|||||||
"ecc error address: 0x%x", ecc_addr);
|
"ecc error address: 0x%x", ecc_addr);
|
||||||
nvgpu_log(g, gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_intr,
|
||||||
"ecc error count corrected: %d, uncorrected %d",
|
"ecc error count corrected: %d, uncorrected %d",
|
||||||
g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc],
|
g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter,
|
||||||
g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc]);
|
g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -880,9 +880,9 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc,
|
|||||||
gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset,
|
gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset,
|
||||||
gr_gpc0_gpccs_falcon_ecc_status_reset_task_f());
|
gr_gpc0_gpccs_falcon_ecc_status_reset_task_f());
|
||||||
|
|
||||||
g->ecc.gr.gpccs_corrected_err_count.counters[gpc] +=
|
g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter +=
|
||||||
corrected_delta;
|
corrected_delta;
|
||||||
g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc] +=
|
g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter +=
|
||||||
uncorrected_delta;
|
uncorrected_delta;
|
||||||
nvgpu_log(g, gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_intr,
|
||||||
"gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
|
"gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
|
||||||
@@ -907,8 +907,8 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc,
|
|||||||
|
|
||||||
nvgpu_log(g, gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_intr,
|
||||||
"ecc error count corrected: %d, uncorrected %d",
|
"ecc error count corrected: %d, uncorrected %d",
|
||||||
g->ecc.gr.gpccs_corrected_err_count.counters[gpc],
|
g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter,
|
||||||
g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc]);
|
g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -2419,9 +2419,9 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
|
|||||||
gk20a_writel(g, gr_fecs_falcon_ecc_status_r(),
|
gk20a_writel(g, gr_fecs_falcon_ecc_status_r(),
|
||||||
gr_fecs_falcon_ecc_status_reset_task_f());
|
gr_fecs_falcon_ecc_status_reset_task_f());
|
||||||
|
|
||||||
g->ecc.gr.fecs_corrected_err_count.counters[0] +=
|
g->ecc.gr.fecs_ecc_corrected_err_count[0].counter +=
|
||||||
corrected_delta;
|
corrected_delta;
|
||||||
g->ecc.gr.fecs_uncorrected_err_count.counters[0] +=
|
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter +=
|
||||||
uncorrected_delta;
|
uncorrected_delta;
|
||||||
|
|
||||||
nvgpu_log(g, gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_intr,
|
||||||
@@ -2450,8 +2450,8 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
|
|||||||
|
|
||||||
nvgpu_log(g, gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_intr,
|
||||||
"ecc error count corrected: %d, uncorrected %d",
|
"ecc error count corrected: %d, uncorrected %d",
|
||||||
g->ecc.gr.fecs_corrected_err_count.counters[0],
|
g->ecc.gr.fecs_ecc_corrected_err_count[0].counter,
|
||||||
g->ecc.gr.fecs_uncorrected_err_count.counters[0]);
|
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -84,6 +84,7 @@
|
|||||||
#include "regops_gv11b.h"
|
#include "regops_gv11b.h"
|
||||||
#include "subctx_gv11b.h"
|
#include "subctx_gv11b.h"
|
||||||
#include "therm_gv11b.h"
|
#include "therm_gv11b.h"
|
||||||
|
#include "ecc_gv11b.h"
|
||||||
|
|
||||||
#include <nvgpu/ptimer.h>
|
#include <nvgpu/ptimer.h>
|
||||||
#include <nvgpu/debug.h>
|
#include <nvgpu/debug.h>
|
||||||
@@ -369,10 +370,7 @@ static const struct gpu_ops gv11b_ops = {
|
|||||||
.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
|
.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
|
||||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||||
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
||||||
#ifdef CONFIG_SYSFS
|
.init_ecc = gv11b_ecc_init,
|
||||||
.create_gr_sysfs = gr_gv11b_create_sysfs,
|
|
||||||
.remove_gr_sysfs = gr_gv11b_remove_sysfs,
|
|
||||||
#endif
|
|
||||||
.set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode,
|
.set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode,
|
||||||
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
|
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
|
||||||
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
|
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
|
||||||
|
|||||||
@@ -90,13 +90,11 @@ void gv11b_ltc_isr(struct gk20a *g)
|
|||||||
u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
|
u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
|
||||||
u32 corrected_delta, uncorrected_delta;
|
u32 corrected_delta, uncorrected_delta;
|
||||||
u32 corrected_overflow, uncorrected_overflow;
|
u32 corrected_overflow, uncorrected_overflow;
|
||||||
u32 ltc_corrected, ltc_uncorrected;
|
|
||||||
|
|
||||||
mc_intr = gk20a_readl(g, mc_intr_ltc_r());
|
mc_intr = gk20a_readl(g, mc_intr_ltc_r());
|
||||||
for (ltc = 0; ltc < g->ltc_count; ltc++) {
|
for (ltc = 0; ltc < g->ltc_count; ltc++) {
|
||||||
if ((mc_intr & 1U << ltc) == 0)
|
if ((mc_intr & 1U << ltc) == 0)
|
||||||
continue;
|
continue;
|
||||||
ltc_corrected = ltc_uncorrected = 0U;
|
|
||||||
|
|
||||||
for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
|
for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
|
||||||
u32 offset = ltc_stride * ltc + lts_stride * slice;
|
u32 offset = ltc_stride * ltc + lts_stride * slice;
|
||||||
@@ -150,8 +148,8 @@ void gv11b_ltc_isr(struct gk20a *g)
|
|||||||
if (uncorrected_overflow)
|
if (uncorrected_overflow)
|
||||||
uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
|
uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
|
||||||
|
|
||||||
ltc_corrected += corrected_delta;
|
g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta;
|
||||||
ltc_uncorrected += uncorrected_delta;
|
g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta;
|
||||||
nvgpu_log(g, gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_intr,
|
||||||
"ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
|
"ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
|
||||||
|
|
||||||
@@ -177,10 +175,6 @@ void gv11b_ltc_isr(struct gk20a *g)
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
g->ecc.ltc.l2_cache_corrected_err_count.counters[ltc] +=
|
|
||||||
ltc_corrected;
|
|
||||||
g->ecc.ltc.l2_cache_uncorrected_err_count.counters[ltc] +=
|
|
||||||
ltc_uncorrected;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -343,8 +343,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
|
|||||||
if (uncorrected_overflow)
|
if (uncorrected_overflow)
|
||||||
uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s());
|
uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s());
|
||||||
|
|
||||||
g->ecc.pmu.pmu_corrected_err_count.counters[0] += corrected_delta;
|
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter += corrected_delta;
|
||||||
g->ecc.pmu.pmu_uncorrected_err_count.counters[0] += uncorrected_delta;
|
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter += uncorrected_delta;
|
||||||
|
|
||||||
nvgpu_log(g, gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_intr,
|
||||||
"pmu ecc interrupt intr1: 0x%x", intr1);
|
"pmu ecc interrupt intr1: 0x%x", intr1);
|
||||||
@@ -371,8 +371,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
|
|||||||
|
|
||||||
nvgpu_log(g, gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_intr,
|
||||||
"ecc error count corrected: %d, uncorrected %d",
|
"ecc error count corrected: %d, uncorrected %d",
|
||||||
g->ecc.pmu.pmu_corrected_err_count.counters[0],
|
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter,
|
||||||
g->ecc.pmu.pmu_uncorrected_err_count.counters[0]);
|
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
162
drivers/gpu/nvgpu/include/nvgpu/ecc.h
Normal file
162
drivers/gpu/nvgpu/include/nvgpu/ecc.h
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef NVGPU_ECC_H
|
||||||
|
#define NVGPU_ECC_H
|
||||||
|
|
||||||
|
#include <nvgpu/types.h>
|
||||||
|
#include <nvgpu/list.h>
|
||||||
|
|
||||||
|
#define NVGPU_ECC_STAT_NAME_MAX_SIZE 100
|
||||||
|
|
||||||
|
struct gk20a;
|
||||||
|
|
||||||
|
struct nvgpu_ecc_stat {
|
||||||
|
char name[NVGPU_ECC_STAT_NAME_MAX_SIZE];
|
||||||
|
u32 counter;
|
||||||
|
struct nvgpu_list_node node;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct nvgpu_ecc_stat *nvgpu_ecc_stat_from_node(
|
||||||
|
struct nvgpu_list_node *node)
|
||||||
|
{
|
||||||
|
return (struct nvgpu_ecc_stat *)(
|
||||||
|
(uintptr_t)node - offsetof(struct nvgpu_ecc_stat, node)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct nvgpu_ecc {
|
||||||
|
struct {
|
||||||
|
/* stats per tpc */
|
||||||
|
|
||||||
|
struct nvgpu_ecc_stat **sm_lrf_ecc_single_err_count;
|
||||||
|
struct nvgpu_ecc_stat **sm_lrf_ecc_double_err_count;
|
||||||
|
|
||||||
|
struct nvgpu_ecc_stat **sm_shm_ecc_sec_count;
|
||||||
|
struct nvgpu_ecc_stat **sm_shm_ecc_sed_count;
|
||||||
|
struct nvgpu_ecc_stat **sm_shm_ecc_ded_count;
|
||||||
|
|
||||||
|
struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe0_count;
|
||||||
|
struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe0_count;
|
||||||
|
struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe0_count;
|
||||||
|
struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe0_count;
|
||||||
|
struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe1_count;
|
||||||
|
struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe1_count;
|
||||||
|
struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe1_count;
|
||||||
|
struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe1_count;
|
||||||
|
|
||||||
|
struct nvgpu_ecc_stat **sm_l1_tag_ecc_corrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat **sm_l1_tag_ecc_uncorrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat **sm_cbu_ecc_corrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat **sm_cbu_ecc_uncorrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat **sm_l1_data_ecc_corrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat **sm_l1_data_ecc_uncorrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat **sm_icache_ecc_corrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat **sm_icache_ecc_uncorrected_err_count;
|
||||||
|
|
||||||
|
/* stats per gpc */
|
||||||
|
|
||||||
|
struct nvgpu_ecc_stat *gcc_l15_ecc_corrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat *gcc_l15_ecc_uncorrected_err_count;
|
||||||
|
|
||||||
|
struct nvgpu_ecc_stat *gpccs_ecc_corrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat *gpccs_ecc_uncorrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat *mmu_l1tlb_ecc_corrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat *mmu_l1tlb_ecc_uncorrected_err_count;
|
||||||
|
|
||||||
|
/* stats per device */
|
||||||
|
struct nvgpu_ecc_stat *fecs_ecc_corrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat *fecs_ecc_uncorrected_err_count;
|
||||||
|
} gr;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
/* stats per lts */
|
||||||
|
struct nvgpu_ecc_stat **ecc_sec_count;
|
||||||
|
struct nvgpu_ecc_stat **ecc_ded_count;
|
||||||
|
} ltc;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
/* stats per device */
|
||||||
|
struct nvgpu_ecc_stat *mmu_l2tlb_ecc_corrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat *mmu_l2tlb_ecc_uncorrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat *mmu_hubtlb_ecc_corrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat *mmu_hubtlb_ecc_uncorrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat *mmu_fillunit_ecc_corrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat *mmu_fillunit_ecc_uncorrected_err_count;
|
||||||
|
} fb;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
/* stats per device */
|
||||||
|
struct nvgpu_ecc_stat *pmu_ecc_corrected_err_count;
|
||||||
|
struct nvgpu_ecc_stat *pmu_ecc_uncorrected_err_count;
|
||||||
|
} pmu;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
/* stats per fbpa */
|
||||||
|
struct nvgpu_ecc_stat *fbpa_ecc_sec_err_count;
|
||||||
|
struct nvgpu_ecc_stat *fbpa_ecc_ded_err_count;
|
||||||
|
} fbpa;
|
||||||
|
|
||||||
|
struct nvgpu_list_node stats_list;
|
||||||
|
int stats_count;
|
||||||
|
};
|
||||||
|
|
||||||
|
int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
|
||||||
|
struct nvgpu_ecc_stat ***stat, const char *name);
|
||||||
|
#define NVGPU_ECC_COUNTER_INIT_PER_TPC(stat) \
|
||||||
|
nvgpu_ecc_counter_init_per_tpc(g, &g->ecc.gr.stat, #stat)
|
||||||
|
|
||||||
|
int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
|
||||||
|
struct nvgpu_ecc_stat **stat, const char *name);
|
||||||
|
#define NVGPU_ECC_COUNTER_INIT_PER_GPC(stat) \
|
||||||
|
nvgpu_ecc_counter_init_per_gpc(g, &g->ecc.gr.stat, #stat)
|
||||||
|
|
||||||
|
int nvgpu_ecc_counter_init(struct gk20a *g,
|
||||||
|
struct nvgpu_ecc_stat **stat, const char *name);
|
||||||
|
#define NVGPU_ECC_COUNTER_INIT_GR(stat) \
|
||||||
|
nvgpu_ecc_counter_init(g, &g->ecc.gr.stat, #stat)
|
||||||
|
#define NVGPU_ECC_COUNTER_INIT_FB(stat) \
|
||||||
|
nvgpu_ecc_counter_init(g, &g->ecc.fb.stat, #stat)
|
||||||
|
#define NVGPU_ECC_COUNTER_INIT_PMU(stat) \
|
||||||
|
nvgpu_ecc_counter_init(g, &g->ecc.pmu.stat, #stat)
|
||||||
|
|
||||||
|
int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
|
||||||
|
struct nvgpu_ecc_stat ***stat, const char *name);
|
||||||
|
#define NVGPU_ECC_COUNTER_INIT_PER_LTS(stat) \
|
||||||
|
nvgpu_ecc_counter_init_per_lts(g, &g->ecc.ltc.stat, #stat)
|
||||||
|
|
||||||
|
int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g,
|
||||||
|
struct nvgpu_ecc_stat **stat, const char *name);
|
||||||
|
#define NVGPU_ECC_COUNTER_INIT_PER_FBPA(stat) \
|
||||||
|
nvgpu_ecc_counter_init_per_fbpa(g, &g->ecc.fbpa.stat, #stat)
|
||||||
|
|
||||||
|
void nvgpu_ecc_free(struct gk20a *g);
|
||||||
|
|
||||||
|
int nvgpu_ecc_init_support(struct gk20a *g);
|
||||||
|
void nvgpu_ecc_remove_support(struct gk20a *g);
|
||||||
|
|
||||||
|
/* OSes to implement */
|
||||||
|
|
||||||
|
int nvgpu_ecc_sysfs_init(struct gk20a *g);
|
||||||
|
void nvgpu_ecc_sysfs_remove(struct gk20a *g);
|
||||||
|
|
||||||
|
#endif
|
||||||
80
drivers/gpu/nvgpu/os/linux/ecc_sysfs.c
Normal file
80
drivers/gpu/nvgpu/os/linux/ecc_sysfs.c
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms and conditions of the GNU General Public License,
|
||||||
|
* version 2, as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
* more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <nvgpu/ecc.h>
|
||||||
|
|
||||||
|
#include "gk20a/gk20a.h"
|
||||||
|
#include "os_linux.h"
|
||||||
|
|
||||||
|
int nvgpu_ecc_sysfs_init(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct device *dev = dev_from_gk20a(g);
|
||||||
|
struct nvgpu_ecc *ecc = &g->ecc;
|
||||||
|
struct dev_ext_attribute *attr;
|
||||||
|
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||||
|
struct nvgpu_ecc_stat *stat;
|
||||||
|
int i = 0, err;
|
||||||
|
|
||||||
|
attr = nvgpu_kzalloc(g, sizeof(*attr) * ecc->stats_count);
|
||||||
|
if (!attr)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
nvgpu_list_for_each_entry(stat,
|
||||||
|
&ecc->stats_list, nvgpu_ecc_stat, node) {
|
||||||
|
if (i >= ecc->stats_count) {
|
||||||
|
err = -EINVAL;
|
||||||
|
nvgpu_err(g, "stats_list longer than stats_count %d",
|
||||||
|
ecc->stats_count);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sysfs_attr_init(&attr[i].attr);
|
||||||
|
attr[i].attr.attr.name = stat->name;
|
||||||
|
attr[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
|
||||||
|
attr[i].var = &stat->counter;
|
||||||
|
attr[i].attr.show = device_show_int;
|
||||||
|
err = device_create_file(dev, &attr[i].attr);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_err(g, "sysfs node create failed for %s\n",
|
||||||
|
stat->name);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err) {
|
||||||
|
while (i-- > 0)
|
||||||
|
device_remove_file(dev, &attr[i].attr);
|
||||||
|
nvgpu_kfree(g, attr);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
l->ecc_attrs = attr;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvgpu_ecc_sysfs_remove(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct device *dev = dev_from_gk20a(g);
|
||||||
|
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||||
|
struct nvgpu_ecc *ecc = &g->ecc;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < ecc->stats_count; i++)
|
||||||
|
device_remove_file(dev, &l->ecc_attrs[i].attr);
|
||||||
|
nvgpu_kfree(g, l->ecc_attrs);
|
||||||
|
l->ecc_attrs = NULL;
|
||||||
|
}
|
||||||
@@ -141,6 +141,7 @@ struct nvgpu_os_linux {
|
|||||||
struct dentry *debugfs_dump_ctxsw_stats;
|
struct dentry *debugfs_dump_ctxsw_stats;
|
||||||
#endif
|
#endif
|
||||||
DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5);
|
DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5);
|
||||||
|
struct dev_ext_attribute *ecc_attrs;
|
||||||
|
|
||||||
struct gk20a_cde_app cde_app;
|
struct gk20a_cde_app cde_app;
|
||||||
|
|
||||||
|
|||||||
@@ -52,11 +52,6 @@ static int nvgpu_pci_tegra_probe(struct device *dev)
|
|||||||
|
|
||||||
static int nvgpu_pci_tegra_remove(struct device *dev)
|
static int nvgpu_pci_tegra_remove(struct device *dev)
|
||||||
{
|
{
|
||||||
struct gk20a *g = get_gk20a(dev);
|
|
||||||
|
|
||||||
if (g->ops.gr.remove_gr_sysfs)
|
|
||||||
g->ops.gr.remove_gr_sysfs(g);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,269 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
|
||||||
* under the terms and conditions of the GNU General Public License,
|
|
||||||
* version 2, as published by the Free Software Foundation.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
||||||
* more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <linux/hashtable.h>
|
|
||||||
|
|
||||||
#include <nvgpu/kmem.h>
|
|
||||||
#include <nvgpu/bug.h>
|
|
||||||
#include <nvgpu/hashtable.h>
|
|
||||||
|
|
||||||
#include "os_linux.h"
|
|
||||||
|
|
||||||
#include "gk20a/gk20a.h"
|
|
||||||
|
|
||||||
#include "platform_gk20a.h"
|
|
||||||
#include "platform_gk20a_tegra.h"
|
|
||||||
#include "platform_gp10b.h"
|
|
||||||
#include "platform_gp10b_tegra.h"
|
|
||||||
#include "platform_ecc_sysfs.h"
|
|
||||||
|
|
||||||
static u32 gen_ecc_hash_key(char *str)
|
|
||||||
{
|
|
||||||
int i = 0;
|
|
||||||
u32 hash_key = 0x811c9dc5;
|
|
||||||
|
|
||||||
while (str[i]) {
|
|
||||||
hash_key *= 0x1000193;
|
|
||||||
hash_key ^= (u32)(str[i]);
|
|
||||||
i++;
|
|
||||||
};
|
|
||||||
|
|
||||||
return hash_key;
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t ecc_stat_show(struct device *dev,
|
|
||||||
struct device_attribute *attr,
|
|
||||||
char *buf)
|
|
||||||
{
|
|
||||||
const char *ecc_stat_full_name = attr->attr.name;
|
|
||||||
const char *ecc_stat_base_name;
|
|
||||||
unsigned int hw_unit;
|
|
||||||
unsigned int subunit;
|
|
||||||
struct gk20a_ecc_stat *ecc_stat;
|
|
||||||
u32 hash_key;
|
|
||||||
struct gk20a *g = get_gk20a(dev);
|
|
||||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
|
||||||
|
|
||||||
if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit,
|
|
||||||
&subunit) == 2) {
|
|
||||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]);
|
|
||||||
hw_unit = g->gr.slices_per_ltc * hw_unit + subunit;
|
|
||||||
} else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) {
|
|
||||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]);
|
|
||||||
} else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) {
|
|
||||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]);
|
|
||||||
} else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) {
|
|
||||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]);
|
|
||||||
} else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) {
|
|
||||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]);
|
|
||||||
} else {
|
|
||||||
return snprintf(buf,
|
|
||||||
PAGE_SIZE,
|
|
||||||
"Error: Invalid ECC stat name!\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name);
|
|
||||||
|
|
||||||
hash_for_each_possible(l->ecc_sysfs_stats_htable,
|
|
||||||
ecc_stat,
|
|
||||||
hash_node,
|
|
||||||
hash_key) {
|
|
||||||
if (hw_unit >= ecc_stat->count)
|
|
||||||
continue;
|
|
||||||
if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit]))
|
|
||||||
return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
int nvgpu_gr_ecc_stat_create(struct device *dev,
|
|
||||||
int is_l2, char *ecc_stat_name,
|
|
||||||
struct gk20a_ecc_stat *ecc_stat)
|
|
||||||
{
|
|
||||||
struct gk20a *g = get_gk20a(dev);
|
|
||||||
char *ltc_unit_name = "ltc";
|
|
||||||
char *gr_unit_name = "gpc0_tpc";
|
|
||||||
char *lts_unit_name = "lts";
|
|
||||||
int num_hw_units = 0;
|
|
||||||
int num_subunits = 0;
|
|
||||||
|
|
||||||
if (is_l2 == 1)
|
|
||||||
num_hw_units = g->ltc_count;
|
|
||||||
else if (is_l2 == 2) {
|
|
||||||
num_hw_units = g->ltc_count;
|
|
||||||
num_subunits = g->gr.slices_per_ltc;
|
|
||||||
} else
|
|
||||||
num_hw_units = g->gr.tpc_count;
|
|
||||||
|
|
||||||
|
|
||||||
return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits,
|
|
||||||
is_l2 ? ltc_unit_name : gr_unit_name,
|
|
||||||
num_subunits ? lts_unit_name: NULL,
|
|
||||||
ecc_stat_name,
|
|
||||||
ecc_stat);
|
|
||||||
}
|
|
||||||
|
|
||||||
int nvgpu_ecc_stat_create(struct device *dev,
|
|
||||||
int num_hw_units, int num_subunits,
|
|
||||||
char *ecc_unit_name, char *ecc_subunit_name,
|
|
||||||
char *ecc_stat_name,
|
|
||||||
struct gk20a_ecc_stat *ecc_stat)
|
|
||||||
{
|
|
||||||
int error = 0;
|
|
||||||
struct gk20a *g = get_gk20a(dev);
|
|
||||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
|
||||||
int hw_unit = 0;
|
|
||||||
int subunit = 0;
|
|
||||||
int element = 0;
|
|
||||||
u32 hash_key = 0;
|
|
||||||
struct device_attribute *dev_attr_array;
|
|
||||||
|
|
||||||
int num_elements = num_subunits ? num_subunits * num_hw_units :
|
|
||||||
num_hw_units;
|
|
||||||
|
|
||||||
/* Allocate arrays */
|
|
||||||
dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) *
|
|
||||||
num_elements);
|
|
||||||
ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements);
|
|
||||||
ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements);
|
|
||||||
|
|
||||||
for (hw_unit = 0; hw_unit < num_elements; hw_unit++) {
|
|
||||||
ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) *
|
|
||||||
ECC_STAT_NAME_MAX_SIZE);
|
|
||||||
}
|
|
||||||
ecc_stat->count = num_elements;
|
|
||||||
if (num_subunits) {
|
|
||||||
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
|
|
||||||
for (subunit = 0; subunit < num_subunits; subunit++) {
|
|
||||||
element = hw_unit*num_subunits + subunit;
|
|
||||||
|
|
||||||
snprintf(ecc_stat->names[element],
|
|
||||||
ECC_STAT_NAME_MAX_SIZE,
|
|
||||||
"%s%d_%s%d_%s",
|
|
||||||
ecc_unit_name,
|
|
||||||
hw_unit,
|
|
||||||
ecc_subunit_name,
|
|
||||||
subunit,
|
|
||||||
ecc_stat_name);
|
|
||||||
|
|
||||||
sysfs_attr_init(&dev_attr_array[element].attr);
|
|
||||||
dev_attr_array[element].attr.name =
|
|
||||||
ecc_stat->names[element];
|
|
||||||
dev_attr_array[element].attr.mode =
|
|
||||||
VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
|
|
||||||
dev_attr_array[element].show = ecc_stat_show;
|
|
||||||
dev_attr_array[element].store = NULL;
|
|
||||||
|
|
||||||
/* Create sysfs file */
|
|
||||||
error |= device_create_file(dev,
|
|
||||||
&dev_attr_array[element]);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
|
|
||||||
|
|
||||||
/* Fill in struct device_attribute members */
|
|
||||||
snprintf(ecc_stat->names[hw_unit],
|
|
||||||
ECC_STAT_NAME_MAX_SIZE,
|
|
||||||
"%s%d_%s",
|
|
||||||
ecc_unit_name,
|
|
||||||
hw_unit,
|
|
||||||
ecc_stat_name);
|
|
||||||
|
|
||||||
sysfs_attr_init(&dev_attr_array[hw_unit].attr);
|
|
||||||
dev_attr_array[hw_unit].attr.name =
|
|
||||||
ecc_stat->names[hw_unit];
|
|
||||||
dev_attr_array[hw_unit].attr.mode =
|
|
||||||
VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
|
|
||||||
dev_attr_array[hw_unit].show = ecc_stat_show;
|
|
||||||
dev_attr_array[hw_unit].store = NULL;
|
|
||||||
|
|
||||||
/* Create sysfs file */
|
|
||||||
error |= device_create_file(dev,
|
|
||||||
&dev_attr_array[hw_unit]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add hash table entry */
|
|
||||||
hash_key = gen_ecc_hash_key(ecc_stat_name);
|
|
||||||
hash_add(l->ecc_sysfs_stats_htable,
|
|
||||||
&ecc_stat->hash_node,
|
|
||||||
hash_key);
|
|
||||||
|
|
||||||
ecc_stat->attr_array = dev_attr_array;
|
|
||||||
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
void nvgpu_gr_ecc_stat_remove(struct device *dev,
|
|
||||||
int is_l2, struct gk20a_ecc_stat *ecc_stat)
|
|
||||||
{
|
|
||||||
struct gk20a *g = get_gk20a(dev);
|
|
||||||
int num_hw_units = 0;
|
|
||||||
int num_subunits = 0;
|
|
||||||
|
|
||||||
if (is_l2 == 1)
|
|
||||||
num_hw_units = g->ltc_count;
|
|
||||||
else if (is_l2 == 2) {
|
|
||||||
num_hw_units = g->ltc_count;
|
|
||||||
num_subunits = g->gr.slices_per_ltc;
|
|
||||||
} else
|
|
||||||
num_hw_units = g->gr.tpc_count;
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat);
|
|
||||||
}
|
|
||||||
|
|
||||||
void nvgpu_ecc_stat_remove(struct device *dev,
|
|
||||||
int num_hw_units, int num_subunits,
|
|
||||||
struct gk20a_ecc_stat *ecc_stat)
|
|
||||||
{
|
|
||||||
struct gk20a *g = get_gk20a(dev);
|
|
||||||
struct device_attribute *dev_attr_array = ecc_stat->attr_array;
|
|
||||||
int hw_unit = 0;
|
|
||||||
int subunit = 0;
|
|
||||||
int element = 0;
|
|
||||||
int num_elements = num_subunits ? num_subunits * num_hw_units :
|
|
||||||
num_hw_units;
|
|
||||||
|
|
||||||
/* Remove sysfs files */
|
|
||||||
if (num_subunits) {
|
|
||||||
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
|
|
||||||
for (subunit = 0; subunit < num_subunits; subunit++) {
|
|
||||||
element = hw_unit * num_subunits + subunit;
|
|
||||||
|
|
||||||
device_remove_file(dev,
|
|
||||||
&dev_attr_array[element]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++)
|
|
||||||
device_remove_file(dev, &dev_attr_array[hw_unit]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Remove hash table entry */
|
|
||||||
hash_del(&ecc_stat->hash_node);
|
|
||||||
|
|
||||||
/* Free arrays */
|
|
||||||
nvgpu_kfree(g, ecc_stat->counters);
|
|
||||||
|
|
||||||
for (hw_unit = 0; hw_unit < num_elements; hw_unit++)
|
|
||||||
nvgpu_kfree(g, ecc_stat->names[hw_unit]);
|
|
||||||
|
|
||||||
nvgpu_kfree(g, ecc_stat->names);
|
|
||||||
nvgpu_kfree(g, dev_attr_array);
|
|
||||||
}
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
|
||||||
* under the terms and conditions of the GNU General Public License,
|
|
||||||
* version 2, as published by the Free Software Foundation.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
||||||
* more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _NVGPU_PLATFORM_SYSFS_H_
|
|
||||||
#define _NVGPU_PLATFORM_SYSFS_H_
|
|
||||||
|
|
||||||
#include "gp10b/gr_gp10b.h"
|
|
||||||
|
|
||||||
#define ECC_STAT_NAME_MAX_SIZE 100
|
|
||||||
|
|
||||||
int nvgpu_gr_ecc_stat_create(struct device *dev,
|
|
||||||
int is_l2, char *ecc_stat_name,
|
|
||||||
struct gk20a_ecc_stat *ecc_stat);
|
|
||||||
int nvgpu_ecc_stat_create(struct device *dev,
|
|
||||||
int num_hw_units, int num_subunits,
|
|
||||||
char *ecc_unit_name, char *ecc_subunit_name,
|
|
||||||
char *ecc_stat_name,
|
|
||||||
struct gk20a_ecc_stat *ecc_stat);
|
|
||||||
void nvgpu_gr_ecc_stat_remove(struct device *dev,
|
|
||||||
int is_l2, struct gk20a_ecc_stat *ecc_stat);
|
|
||||||
void nvgpu_ecc_stat_remove(struct device *dev,
|
|
||||||
int num_hw_units, int num_subunits,
|
|
||||||
struct gk20a_ecc_stat *ecc_stat);
|
|
||||||
#endif
|
|
||||||
@@ -41,7 +41,6 @@
|
|||||||
#include "gk20a/gk20a.h"
|
#include "gk20a/gk20a.h"
|
||||||
|
|
||||||
#include "platform_gk20a.h"
|
#include "platform_gk20a.h"
|
||||||
#include "platform_ecc_sysfs.h"
|
|
||||||
#include "platform_gk20a_tegra.h"
|
#include "platform_gk20a_tegra.h"
|
||||||
#include "platform_gp10b.h"
|
#include "platform_gp10b.h"
|
||||||
#include "platform_gp10b_tegra.h"
|
#include "platform_gp10b_tegra.h"
|
||||||
@@ -177,11 +176,6 @@ static int gp10b_tegra_late_probe(struct device *dev)
|
|||||||
|
|
||||||
static int gp10b_tegra_remove(struct device *dev)
|
static int gp10b_tegra_remove(struct device *dev)
|
||||||
{
|
{
|
||||||
struct gk20a *g = get_gk20a(dev);
|
|
||||||
|
|
||||||
if (g->ops.gr.remove_gr_sysfs)
|
|
||||||
g->ops.gr.remove_gr_sysfs(g);
|
|
||||||
|
|
||||||
/* deinitialise tegra specific scaling quirks */
|
/* deinitialise tegra specific scaling quirks */
|
||||||
gp10b_tegra_scale_exit(dev);
|
gp10b_tegra_scale_exit(dev);
|
||||||
|
|
||||||
@@ -476,162 +470,3 @@ struct gk20a_platform gp10b_tegra_platform = {
|
|||||||
|
|
||||||
.secure_buffer_size = 401408,
|
.secure_buffer_size = 401408,
|
||||||
};
|
};
|
||||||
|
|
||||||
void gr_gp10b_create_sysfs(struct gk20a *g)
|
|
||||||
{
|
|
||||||
int error = 0;
|
|
||||||
struct device *dev = dev_from_gk20a(g);
|
|
||||||
|
|
||||||
/* This stat creation function is called on GR init. GR can get
|
|
||||||
initialized multiple times but we only need to create the ECC
|
|
||||||
stats once. Therefore, add the following check to avoid
|
|
||||||
creating duplicate stat sysfs nodes. */
|
|
||||||
if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL)
|
|
||||||
return;
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_lrf_ecc_single_err_count",
|
|
||||||
&g->ecc.gr.sm_lrf_single_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_lrf_ecc_double_err_count",
|
|
||||||
&g->ecc.gr.sm_lrf_double_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_shm_ecc_sec_count",
|
|
||||||
&g->ecc.gr.sm_shm_sec_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_shm_ecc_sed_count",
|
|
||||||
&g->ecc.gr.sm_shm_sed_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_shm_ecc_ded_count",
|
|
||||||
&g->ecc.gr.sm_shm_ded_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"tex_ecc_total_sec_pipe0_count",
|
|
||||||
&g->ecc.gr.tex_total_sec_pipe0_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"tex_ecc_total_ded_pipe0_count",
|
|
||||||
&g->ecc.gr.tex_total_ded_pipe0_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"tex_ecc_unique_sec_pipe0_count",
|
|
||||||
&g->ecc.gr.tex_unique_sec_pipe0_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"tex_ecc_unique_ded_pipe0_count",
|
|
||||||
&g->ecc.gr.tex_unique_ded_pipe0_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"tex_ecc_total_sec_pipe1_count",
|
|
||||||
&g->ecc.gr.tex_total_sec_pipe1_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"tex_ecc_total_ded_pipe1_count",
|
|
||||||
&g->ecc.gr.tex_total_ded_pipe1_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"tex_ecc_unique_sec_pipe1_count",
|
|
||||||
&g->ecc.gr.tex_unique_sec_pipe1_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"tex_ecc_unique_ded_pipe1_count",
|
|
||||||
&g->ecc.gr.tex_unique_ded_pipe1_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
2,
|
|
||||||
"ecc_sec_count",
|
|
||||||
&g->ecc.ltc.l2_sec_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
2,
|
|
||||||
"ecc_ded_count",
|
|
||||||
&g->ecc.ltc.l2_ded_count);
|
|
||||||
|
|
||||||
if (error)
|
|
||||||
dev_err(dev, "Failed to create sysfs attributes!\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_gp10b_remove_sysfs(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct device *dev = dev_from_gk20a(g);
|
|
||||||
|
|
||||||
if (!g->ecc.gr.sm_lrf_single_err_count.counters)
|
|
||||||
return;
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_lrf_single_err_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_lrf_double_err_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_shm_sec_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_shm_sed_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_shm_ded_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.tex_total_sec_pipe0_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.tex_total_ded_pipe0_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.tex_unique_sec_pipe0_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.tex_unique_ded_pipe0_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.tex_total_sec_pipe1_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.tex_total_ded_pipe1_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.tex_unique_sec_pipe1_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.tex_unique_ded_pipe1_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
2,
|
|
||||||
&g->ecc.ltc.l2_sec_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
2,
|
|
||||||
&g->ecc.ltc.l2_ded_count);
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -18,6 +18,5 @@
|
|||||||
#define _PLATFORM_GP10B_TEGRA_H_
|
#define _PLATFORM_GP10B_TEGRA_H_
|
||||||
|
|
||||||
#include "gp10b/gr_gp10b.h"
|
#include "gp10b/gr_gp10b.h"
|
||||||
#include "platform_ecc_sysfs.h"
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -39,7 +39,6 @@
|
|||||||
|
|
||||||
#include "platform_gp10b.h"
|
#include "platform_gp10b.h"
|
||||||
#include "platform_gp10b_tegra.h"
|
#include "platform_gp10b_tegra.h"
|
||||||
#include "platform_ecc_sysfs.h"
|
|
||||||
|
|
||||||
#include "os_linux.h"
|
#include "os_linux.h"
|
||||||
#include "platform_gk20a_tegra.h"
|
#include "platform_gk20a_tegra.h"
|
||||||
@@ -94,11 +93,6 @@ static int gv11b_tegra_late_probe(struct device *dev)
|
|||||||
|
|
||||||
static int gv11b_tegra_remove(struct device *dev)
|
static int gv11b_tegra_remove(struct device *dev)
|
||||||
{
|
{
|
||||||
struct gk20a *g = get_gk20a(dev);
|
|
||||||
|
|
||||||
if (g->ops.gr.remove_gr_sysfs)
|
|
||||||
g->ops.gr.remove_gr_sysfs(g);
|
|
||||||
|
|
||||||
gv11b_tegra_scale_exit(dev);
|
gv11b_tegra_scale_exit(dev);
|
||||||
|
|
||||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||||
@@ -261,328 +255,3 @@ struct gk20a_platform gv11b_tegra_platform = {
|
|||||||
|
|
||||||
.secure_buffer_size = 667648,
|
.secure_buffer_size = 667648,
|
||||||
};
|
};
|
||||||
|
|
||||||
void gr_gv11b_create_sysfs(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct device *dev = dev_from_gk20a(g);
|
|
||||||
int error = 0;
|
|
||||||
|
|
||||||
/* This stat creation function is called on GR init. GR can get
|
|
||||||
initialized multiple times but we only need to create the ECC
|
|
||||||
stats once. Therefore, add the following check to avoid
|
|
||||||
creating duplicate stat sysfs nodes. */
|
|
||||||
if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL)
|
|
||||||
return;
|
|
||||||
|
|
||||||
gr_gp10b_create_sysfs(g);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_l1_tag_ecc_corrected_err_count",
|
|
||||||
&g->ecc.gr.sm_l1_tag_corrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_l1_tag_ecc_uncorrected_err_count",
|
|
||||||
&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_cbu_ecc_corrected_err_count",
|
|
||||||
&g->ecc.gr.sm_cbu_corrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_cbu_ecc_uncorrected_err_count",
|
|
||||||
&g->ecc.gr.sm_cbu_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_l1_data_ecc_corrected_err_count",
|
|
||||||
&g->ecc.gr.sm_l1_data_corrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_l1_data_ecc_uncorrected_err_count",
|
|
||||||
&g->ecc.gr.sm_l1_data_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_icache_ecc_corrected_err_count",
|
|
||||||
&g->ecc.gr.sm_icache_corrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"sm_icache_ecc_uncorrected_err_count",
|
|
||||||
&g->ecc.gr.sm_icache_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"gcc_l15_ecc_corrected_err_count",
|
|
||||||
&g->ecc.gr.gcc_l15_corrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
|
||||||
0,
|
|
||||||
"gcc_l15_ecc_uncorrected_err_count",
|
|
||||||
&g->ecc.gr.gcc_l15_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
g->ltc_count,
|
|
||||||
0,
|
|
||||||
"ltc",
|
|
||||||
NULL,
|
|
||||||
"l2_cache_uncorrected_err_count",
|
|
||||||
&g->ecc.ltc.l2_cache_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
g->ltc_count,
|
|
||||||
0,
|
|
||||||
"ltc",
|
|
||||||
NULL,
|
|
||||||
"l2_cache_corrected_err_count",
|
|
||||||
&g->ecc.ltc.l2_cache_corrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
"gpc",
|
|
||||||
NULL,
|
|
||||||
"fecs_ecc_uncorrected_err_count",
|
|
||||||
&g->ecc.gr.fecs_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
"gpc",
|
|
||||||
NULL,
|
|
||||||
"fecs_ecc_corrected_err_count",
|
|
||||||
&g->ecc.gr.fecs_corrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
g->gr.gpc_count,
|
|
||||||
0,
|
|
||||||
"gpc",
|
|
||||||
NULL,
|
|
||||||
"gpccs_ecc_uncorrected_err_count",
|
|
||||||
&g->ecc.gr.gpccs_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
g->gr.gpc_count,
|
|
||||||
0,
|
|
||||||
"gpc",
|
|
||||||
NULL,
|
|
||||||
"gpccs_ecc_corrected_err_count",
|
|
||||||
&g->ecc.gr.gpccs_corrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
g->gr.gpc_count,
|
|
||||||
0,
|
|
||||||
"gpc",
|
|
||||||
NULL,
|
|
||||||
"mmu_l1tlb_ecc_uncorrected_err_count",
|
|
||||||
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
g->gr.gpc_count,
|
|
||||||
0,
|
|
||||||
"gpc",
|
|
||||||
NULL,
|
|
||||||
"mmu_l1tlb_ecc_corrected_err_count",
|
|
||||||
&g->ecc.gr.mmu_l1tlb_corrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
"eng",
|
|
||||||
NULL,
|
|
||||||
"mmu_l2tlb_ecc_uncorrected_err_count",
|
|
||||||
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
"eng",
|
|
||||||
NULL,
|
|
||||||
"mmu_l2tlb_ecc_corrected_err_count",
|
|
||||||
&g->ecc.fb.mmu_l2tlb_corrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
"eng",
|
|
||||||
NULL,
|
|
||||||
"mmu_hubtlb_ecc_uncorrected_err_count",
|
|
||||||
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
"eng",
|
|
||||||
NULL,
|
|
||||||
"mmu_hubtlb_ecc_corrected_err_count",
|
|
||||||
&g->ecc.fb.mmu_hubtlb_corrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
"eng",
|
|
||||||
NULL,
|
|
||||||
"mmu_fillunit_ecc_uncorrected_err_count",
|
|
||||||
&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
"eng",
|
|
||||||
NULL,
|
|
||||||
"mmu_fillunit_ecc_corrected_err_count",
|
|
||||||
&g->ecc.fb.mmu_fillunit_corrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
"eng",
|
|
||||||
NULL,
|
|
||||||
"pmu_ecc_uncorrected_err_count",
|
|
||||||
&g->ecc.pmu.pmu_uncorrected_err_count);
|
|
||||||
|
|
||||||
error |= nvgpu_ecc_stat_create(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
"eng",
|
|
||||||
NULL,
|
|
||||||
"pmu_ecc_corrected_err_count",
|
|
||||||
&g->ecc.pmu.pmu_corrected_err_count);
|
|
||||||
|
|
||||||
if (error)
|
|
||||||
dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_gv11b_remove_sysfs(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct device *dev = dev_from_gk20a(g);
|
|
||||||
|
|
||||||
if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters)
|
|
||||||
return;
|
|
||||||
gr_gp10b_remove_sysfs(g);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_l1_tag_corrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_cbu_corrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_cbu_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_l1_data_corrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_l1_data_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_icache_corrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.sm_icache_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.gcc_l15_corrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_gr_ecc_stat_remove(dev,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.gcc_l15_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
g->ltc_count,
|
|
||||||
0,
|
|
||||||
&g->ecc.ltc.l2_cache_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
g->ltc_count,
|
|
||||||
0,
|
|
||||||
&g->ecc.ltc.l2_cache_corrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.fecs_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.fecs_corrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
g->gr.gpc_count,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.gpccs_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
g->gr.gpc_count,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.gpccs_corrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
g->gr.gpc_count,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
g->gr.gpc_count,
|
|
||||||
0,
|
|
||||||
&g->ecc.gr.mmu_l1tlb_corrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
&g->ecc.fb.mmu_l2tlb_corrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
&g->ecc.fb.mmu_hubtlb_corrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
&g->ecc.fb.mmu_fillunit_corrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
&g->ecc.pmu.pmu_uncorrected_err_count);
|
|
||||||
|
|
||||||
nvgpu_ecc_stat_remove(dev,
|
|
||||||
1,
|
|
||||||
0,
|
|
||||||
&g->ecc.pmu.pmu_corrected_err_count);
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -215,10 +215,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
|||||||
.update_boosted_ctx = NULL,
|
.update_boosted_ctx = NULL,
|
||||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||||
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
||||||
#ifdef CONFIG_SYSFS
|
|
||||||
.create_gr_sysfs = gr_gp10b_create_sysfs,
|
|
||||||
.remove_gr_sysfs = gr_gp10b_remove_sysfs,
|
|
||||||
#endif
|
|
||||||
.set_ctxsw_preemption_mode =
|
.set_ctxsw_preemption_mode =
|
||||||
vgpu_gr_gp10b_set_ctxsw_preemption_mode,
|
vgpu_gr_gp10b_set_ctxsw_preemption_mode,
|
||||||
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
|
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
|
||||||
|
|||||||
@@ -232,10 +232,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
|||||||
.update_boosted_ctx = NULL,
|
.update_boosted_ctx = NULL,
|
||||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||||
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
|
||||||
#ifdef CONFIG_SYSFS
|
|
||||||
.create_gr_sysfs = gr_gv11b_create_sysfs,
|
|
||||||
.remove_gr_sysfs = gr_gv11b_remove_sysfs,
|
|
||||||
#endif
|
|
||||||
.set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode,
|
.set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode,
|
||||||
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
|
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
|
||||||
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
|
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
|
||||||
|
|||||||
Reference in New Issue
Block a user