Revert "gpu:nvgpu: Expose physical gpc,tpc layout for ecc sysfs nodes."

This reverts commit 2cc098eae7.

Reason for revert: intermittent boot failures on drv-orin-f1 and 
frspr-f1 on both AV+L and AV+Q.

Bug 3998230

Change-Id: I230ba7ba469fde3f470dab7538cc757c99360d99
Signed-off-by: srajum <srajum@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2863208
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
V M S Seeta Rama Raju Mudundi
2023-02-24 06:22:54 -08:00
committed by mobile promotions
parent a4eca46b4b
commit ab46ee3335
5 changed files with 69 additions and 168 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -549,7 +549,7 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
{
struct nvgpu_gr_config *config;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_index, tpc_index, tpc_phys_pos, local_gpc_tpc_mask_phys, i;
u32 gpc_index;
u32 gpc_phys_id;
int err;
@@ -660,52 +660,9 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
gr_config_init_gpc_skip_mask(config, gpc_index);
}
/*
* This structure holds the physical id for a TPC within a
* GPC. The GPC is indexed using physical id and the TPC is indexed using
* logical id.
*/
config->gpc_tpc_physical_id_map = nvgpu_kzalloc(g,
nvgpu_safe_mult_u64((size_t)config->gpc_count,
sizeof(u32 *)));
if (config->gpc_tpc_physical_id_map == NULL) {
nvgpu_err(g, "alloc gpc_tpc_physical_id_map failed");
goto clean_up_gpc_rop_config;
}
//Get Physical layout of tpc per physical gpc
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
cur_gr_instance, (u32)gpc_index);
config->gpc_tpc_physical_id_map[gpc_phys_id] =
nvgpu_kzalloc(g, config->max_tpc_per_gpc_count);
if (config->gpc_tpc_physical_id_map[gpc_phys_id] == NULL) {
nvgpu_err(g, "alloc tpc_physical_id_map(%u) failed",
gpc_phys_id);
goto clean_up_gpc_tpc_physical_id_map_alloc_fail;
}
tpc_phys_pos = 0U;
local_gpc_tpc_mask_phys = config->gpc_tpc_mask_physical[gpc_phys_id];
tpc_index = 0U;
while (tpc_index < config->gpc_tpc_count[gpc_index]) {
while (local_gpc_tpc_mask_phys != 0x0U) {
if ((local_gpc_tpc_mask_phys & 0x1U) != 0x0U) {
config->gpc_tpc_physical_id_map[gpc_phys_id][tpc_index++] =
tpc_phys_pos;
}
local_gpc_tpc_mask_phys >>= 1;
tpc_phys_pos++;
}
}
}
gr_config_log_info(g, config);
return config;
clean_up_gpc_tpc_physical_id_map_alloc_fail:
for (i = 0; i < gpc_index; i++) {
nvgpu_kfree(g, config->gpc_tpc_physical_id_map[i]);
}
nvgpu_kfree(g, config->gpc_tpc_physical_id_map);
clean_up_gpc_rop_config:
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ROP_IN_GPC)) {
gr_config_free_gpc_rop_config(g, config);
@@ -960,7 +917,6 @@ u32 nvgpu_gr_config_get_gpc_zcb_count(struct nvgpu_gr_config *config,
void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config)
{
u32 i;
if (config == NULL) {
return;
}
@@ -983,10 +939,6 @@ void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config)
config->sm_to_cluster_redex_config = NULL;
}
#endif
for (i = 0; i < config->gpc_count; i++) {
nvgpu_kfree(g, config->gpc_tpc_physical_id_map[i]);
}
nvgpu_kfree(g, config->gpc_tpc_physical_id_map);
nvgpu_kfree(g, config);
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -143,12 +143,6 @@ struct nvgpu_gr_config {
* Array is indexed by GPC physical-id.
*/
u32 *gpc_tpc_mask_physical;
/**
* 2D array to map TPC physical id to logical id.
* Array is indexed by GPC physical id and TPC is indexed using
* logical id.
*/
u32 **gpc_tpc_physical_id_map;
/**
* 2-D array to hold mask of TPCs attached to a PES unit
* in a GPC.

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -23,12 +23,10 @@
#include <nvgpu/gr/gr_ecc.h>
#include <nvgpu/gr/gr_utils.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/string.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/kmem.h>
#include <nvgpu/ecc.h>
#include "common/gr/gr_config_priv.h"
int nvgpu_ecc_counter_init_per_gr(struct gk20a *g,
struct nvgpu_ecc_stat **stat, const char *name)
@@ -72,9 +70,8 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
{
struct nvgpu_ecc_stat **stats;
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr_config);
u32 gpc, tpc, gpc_phys_id, tpc_phys_id;
u32 gpc, tpc;
char gpc_str[10] = {0}, tpc_str[10] = {0};
int err = 0;
@@ -85,48 +82,46 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
}
for (gpc = 0; gpc < gpc_count; gpc++) {
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, cur_gr_instance, gpc);
stats[gpc_phys_id] = nvgpu_kzalloc(g,
nvgpu_safe_mult_u64(sizeof(*stats[gpc_phys_id]),
nvgpu_gr_config_get_max_tpc_per_gpc_count(gr_config)));
if (stats[gpc_phys_id] == NULL) {
nvgpu_err(g, "Mem alloc failed for %s\n", name);
stats[gpc] = nvgpu_kzalloc(g,
nvgpu_safe_mult_u64(sizeof(*stats[gpc]),
nvgpu_gr_config_get_gpc_tpc_count(gr_config,
gpc)));
if (stats[gpc] == NULL) {
err = -ENOMEM;
goto fail;
}
}
for (gpc = 0; gpc < gpc_count; gpc++) {
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, cur_gr_instance, gpc);
// For getting tpc count, gpc id is logical because we read it using gpc_stride.
for (tpc = 0; tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc); tpc++) {
tpc_phys_id = gr_config->gpc_tpc_physical_id_map[gpc_phys_id][tpc];
for (tpc = 0;
tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc);
tpc++) {
/**
* Store stats name as below:
* gpc<gpc_value>_tpc<tpc_value>_<name_string>
*/
(void)strcpy(stats[gpc_phys_id][tpc_phys_id].name, "gpc");
(void)nvgpu_strnadd_u32(gpc_str, gpc_phys_id,
(void)strcpy(stats[gpc][tpc].name, "gpc");
(void)nvgpu_strnadd_u32(gpc_str, gpc,
sizeof(gpc_str), 10U);
(void)strncat(stats[gpc_phys_id][tpc_phys_id].name, gpc_str,
(void)strncat(stats[gpc][tpc].name, gpc_str,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc_phys_id][tpc_phys_id].name));
(void)strncat(stats[gpc_phys_id][tpc_phys_id].name, "_tpc",
strlen(stats[gpc][tpc].name));
(void)strncat(stats[gpc][tpc].name, "_tpc",
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc_phys_id][tpc_phys_id].name));
(void)nvgpu_strnadd_u32(tpc_str, tpc_phys_id,
strlen(stats[gpc][tpc].name));
(void)nvgpu_strnadd_u32(tpc_str, tpc,
sizeof(tpc_str), 10U);
(void)strncat(stats[gpc_phys_id][tpc_phys_id].name, tpc_str,
(void)strncat(stats[gpc][tpc].name, tpc_str,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc_phys_id][tpc_phys_id].name));
(void)strncat(stats[gpc_phys_id][tpc_phys_id].name, "_",
strlen(stats[gpc][tpc].name));
(void)strncat(stats[gpc][tpc].name, "_",
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc_phys_id][tpc_phys_id].name));
(void)strncat(stats[gpc_phys_id][tpc_phys_id].name, name,
strlen(stats[gpc][tpc].name));
(void)strncat(stats[gpc][tpc].name, name,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc_phys_id][tpc_phys_id].name));
strlen(stats[gpc][tpc].name));
nvgpu_ecc_stat_add(g, &stats[gpc_phys_id][tpc_phys_id]);
nvgpu_ecc_stat_add(g, &stats[gpc][tpc]);
}
}
@@ -149,9 +144,8 @@ int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
{
struct nvgpu_ecc_stat *stats;
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr_config);
u32 gpc, gpc_phys_id;
u32 gpc;
char gpc_str[10] = {0};
stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats),
@@ -161,24 +155,23 @@ int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
}
for (gpc = 0; gpc < gpc_count; gpc++) {
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, cur_gr_instance, gpc);
/**
* Store stats name as below:
* gpc<gpc_value>_<name_string>
*/
(void)strcpy(stats[gpc_phys_id].name, "gpc");
(void)nvgpu_strnadd_u32(gpc_str, gpc_phys_id, sizeof(gpc_str), 10U);
(void)strncat(stats[gpc_phys_id].name, gpc_str,
(void)strcpy(stats[gpc].name, "gpc");
(void)nvgpu_strnadd_u32(gpc_str, gpc, sizeof(gpc_str), 10U);
(void)strncat(stats[gpc].name, gpc_str,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc_phys_id].name));
(void)strncat(stats[gpc_phys_id].name, "_",
strlen(stats[gpc].name));
(void)strncat(stats[gpc].name, "_",
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc_phys_id].name));
(void)strncat(stats[gpc_phys_id].name, name,
strlen(stats[gpc].name));
(void)strncat(stats[gpc].name, name,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc_phys_id].name));
strlen(stats[gpc].name));
nvgpu_ecc_stat_add(g, &stats[gpc_phys_id]);
nvgpu_ecc_stat_add(g, &stats[gpc]);
}
*stat = stats;
@@ -210,28 +203,24 @@ void nvgpu_ecc_counter_deinit_per_tpc(struct gk20a *g,
struct nvgpu_ecc_stat **stats = NULL;
u32 gpc_count;
u32 gpc, tpc;
u32 gpc_phys_id, tpc_phys_id;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
if (*stats_p != NULL) {
gpc_count = nvgpu_gr_config_get_gpc_count(gr_config);
stats = *stats_p;
for (gpc = 0; gpc < gpc_count; gpc++) {
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, cur_gr_instance, gpc);
if (stats[gpc_phys_id] == NULL) {
if (stats[gpc] == NULL) {
continue;
}
for (tpc = 0;
tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc);
tpc++) {
tpc_phys_id = gr_config->gpc_tpc_physical_id_map[gpc_phys_id][tpc];
nvgpu_ecc_stat_del(g, &stats[gpc_phys_id][tpc_phys_id]);
nvgpu_ecc_stat_del(g, &stats[gpc][tpc]);
}
nvgpu_kfree(g, stats[gpc_phys_id]);
stats[gpc_phys_id] = NULL;
nvgpu_kfree(g, stats[gpc]);
stats[gpc] = NULL;
}
nvgpu_kfree(g, stats);
@@ -244,17 +233,15 @@ void nvgpu_ecc_counter_deinit_per_gpc(struct gk20a *g,
{
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
struct nvgpu_ecc_stat *stats = NULL;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_count;
u32 gpc, gpc_phys_id;
u32 gpc;
if (*stats_p != NULL) {
gpc_count = nvgpu_gr_config_get_gpc_count(gr_config);
stats = *stats_p;
for (gpc = 0; gpc < gpc_count; gpc++) {
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, cur_gr_instance, gpc);
nvgpu_ecc_stat_del(g, &stats[gpc_phys_id]);
nvgpu_ecc_stat_del(g, &stats[gpc]);
}
nvgpu_kfree(g, stats);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -34,7 +34,6 @@
#include <nvgpu/gr/gr_intr.h>
#include "common/gr/gr_priv.h"
#include "common/gr/gr_config_priv.h"
#include "common/gr/gr_intr_priv.h"
#include "hal/gr/intr/gr_intr_gm20b.h"
#include "hal/gr/intr/gr_intr_gp10b.h"
@@ -811,14 +810,11 @@ static void ga10b_gr_intr_handle_tpc_sm_rams_ecc_exception(struct gk20a *g,
{
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
u32 offset, gpc_phys_id, tpc_phys_id;
u32 offset;
u32 rams_ecc_status;
u32 rams_uncorrected_err_count_delta = 0U;
bool is_rams_ecc_uncorrected_total_err_overflow = false;
struct nvgpu_gr_sm_ecc_status ecc_status;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
struct nvgpu_gr_config *config = gr->config;
offset = nvgpu_safe_add_u32(
nvgpu_safe_mult_u32(gpc_stride, gpc),
@@ -856,11 +852,9 @@ static void ga10b_gr_intr_handle_tpc_sm_rams_ecc_exception(struct gk20a *g,
rams_uncorrected_err_count_delta,
BIT32(gr_pri_gpc0_tpc0_sm_rams_ecc_uncorrected_err_count_total_s()));
}
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, cur_gr_instance, gpc);
tpc_phys_id = config->gpc_tpc_physical_id_map[gpc_phys_id][tpc];
g->ecc.gr.sm_rams_ecc_uncorrected_err_count[gpc_phys_id][tpc_phys_id].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_rams_ecc_uncorrected_err_count[gpc_phys_id][tpc_phys_id].counter,
g->ecc.gr.sm_rams_ecc_uncorrected_err_count[gpc][tpc].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_rams_ecc_uncorrected_err_count[gpc][tpc].counter,
rams_uncorrected_err_count_delta);
nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_rams_ecc_uncorrected_err_count_r(), offset),

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -37,7 +37,6 @@
#include "common/gr/gr_priv.h"
#include "common/gr/gr_config_priv.h"
#include "gr_intr_gp10b.h"
#include "gr_intr_gv11b.h"
@@ -1078,11 +1077,6 @@ static void gv11b_gr_intr_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32
bool is_l1_tag_ecc_corrected_total_err_overflow = false;
bool is_l1_tag_ecc_uncorrected_total_err_overflow = false;
struct nvgpu_gr_sm_ecc_status ecc_status;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
struct nvgpu_gr_config *config = gr->config;
u32 gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, cur_gr_instance, gpc);
u32 tpc_phys_id = config->gpc_tpc_physical_id_map[gpc_phys_id][tpc];
offset = nvgpu_safe_add_u32(
nvgpu_safe_mult_u32(gpc_stride, gpc),
@@ -1126,9 +1120,9 @@ static void gv11b_gr_intr_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32
l1_tag_corrected_err_count_delta,
BIT32(gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s()));
}
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc_phys_id][tpc_phys_id].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc_phys_id][tpc_phys_id].counter,
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter,
l1_tag_corrected_err_count_delta);
gv11b_gr_intr_report_l1_tag_corrected_err(g, &ecc_status, gpc, tpc);
nvgpu_writel(g, nvgpu_safe_add_u32(
@@ -1147,9 +1141,9 @@ static void gv11b_gr_intr_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32
l1_tag_uncorrected_err_count_delta,
BIT32(gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s()));
}
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc_phys_id][tpc_phys_id].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc_phys_id][tpc_phys_id].counter,
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter,
l1_tag_uncorrected_err_count_delta);
gv11b_gr_intr_report_l1_tag_uncorrected_err(g, &ecc_status, gpc, tpc);
nvgpu_writel(g, nvgpu_safe_add_u32(
@@ -1227,11 +1221,6 @@ static void gv11b_gr_intr_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc
bool is_lrf_ecc_corrected_total_err_overflow = false;
bool is_lrf_ecc_uncorrected_total_err_overflow = false;
struct nvgpu_gr_sm_ecc_status ecc_status;
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
struct nvgpu_gr_config *config = gr->config;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, cur_gr_instance, gpc);
u32 tpc_phys_id = config->gpc_tpc_physical_id_map[gpc_phys_id][tpc];
offset = nvgpu_safe_add_u32(
nvgpu_safe_mult_u32(gpc_stride, gpc),
@@ -1294,9 +1283,9 @@ static void gv11b_gr_intr_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc
lrf_uncorrected_err_count_delta,
BIT32(gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s()));
}
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc_phys_id][tpc_phys_id].counter =
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc_phys_id][tpc_phys_id].counter,
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter,
lrf_uncorrected_err_count_delta);
nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r(), offset),
@@ -1365,11 +1354,6 @@ static void gv11b_gr_intr_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc
bool is_cbu_ecc_corrected_total_err_overflow = false;
bool is_cbu_ecc_uncorrected_total_err_overflow = false;
struct nvgpu_gr_sm_ecc_status ecc_status;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
struct nvgpu_gr_config *config = gr->config;
u32 gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, cur_gr_instance, gpc);
u32 tpc_phys_id = config->gpc_tpc_physical_id_map[gpc_phys_id][tpc];
offset = nvgpu_safe_add_u32(
nvgpu_safe_mult_u32(gpc_stride, gpc),
@@ -1430,9 +1414,9 @@ static void gv11b_gr_intr_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc
nvgpu_wrapping_add_u32(cbu_uncorrected_err_count_delta,
BIT32(gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s()));
}
g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc_phys_id][tpc_phys_id].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc_phys_id][tpc_phys_id].counter,
g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter,
cbu_uncorrected_err_count_delta);
nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r(), offset),
@@ -1496,11 +1480,6 @@ static void gv11b_gr_intr_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32
bool is_l1_data_ecc_corrected_total_err_overflow = false;
bool is_l1_data_ecc_uncorrected_total_err_overflow = false;
struct nvgpu_gr_sm_ecc_status ecc_status;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
struct nvgpu_gr_config *config = gr->config;
u32 gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, cur_gr_instance, gpc);
u32 tpc_phys_id = config->gpc_tpc_physical_id_map[gpc_phys_id][tpc];
offset = nvgpu_safe_add_u32(
nvgpu_safe_mult_u32(gpc_stride, gpc),
@@ -1562,9 +1541,9 @@ static void gv11b_gr_intr_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32
nvgpu_wrapping_add_u32(l1_data_uncorrected_err_count_delta,
BIT32(gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s()));
}
g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc_phys_id][tpc_phys_id].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc_phys_id][tpc_phys_id].counter,
g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter,
l1_data_uncorrected_err_count_delta);
nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r(), offset),
@@ -1682,11 +1661,6 @@ static void gv11b_gr_intr_handle_icache_exception(struct gk20a *g, u32 gpc, u32
bool is_icache_ecc_corrected_total_err_overflow = false;
bool is_icache_ecc_uncorrected_total_err_overflow = false;
struct nvgpu_gr_sm_ecc_status ecc_status;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
struct nvgpu_gr_config *config = gr->config;
u32 gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, cur_gr_instance, gpc);
u32 tpc_phys_id = config->gpc_tpc_physical_id_map[gpc_phys_id][tpc];
offset = nvgpu_safe_add_u32(
nvgpu_safe_mult_u32(gpc_stride, gpc),
@@ -1729,9 +1703,9 @@ static void gv11b_gr_intr_handle_icache_exception(struct gk20a *g, u32 gpc, u32
nvgpu_wrapping_add_u32(icache_corrected_err_count_delta,
BIT32(gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s()));
}
g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc_phys_id][tpc_phys_id].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc_phys_id][tpc_phys_id].counter,
g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter,
icache_corrected_err_count_delta);
nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r(), offset),
@@ -1750,9 +1724,9 @@ static void gv11b_gr_intr_handle_icache_exception(struct gk20a *g, u32 gpc, u32
icache_uncorrected_err_count_delta,
BIT32(gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s()));
}
g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc_phys_id][tpc_phys_id].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc_phys_id][tpc_phys_id].counter,
g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter =
nvgpu_wrapping_add_u32(
g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter,
icache_uncorrected_err_count_delta);
nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r(), offset),