gpu: nvgpu: update ecc sysfs node handling

Make ecc sysfs hash table per GPU by adding it as
part of nvgpu_os_linux. Using a single hash table
might give incorrect results as GPUs have same filenames
and a filename is used as a key for a lookup.

Add device_attribute as part of struct gk20a_ecc_stat. Using
a single array of pointers of device attribute for an
ecc_stat results in memory leak and incorrect stats if
multiple GPUs are present on the system. This array of pointers
will always hold info for GPU which created sysfs nodes last.
Fix this by making device attribute array per ecc stat per GPU.

Fix ecc stat removal to consider zero sub-units for a given
number of hwunits. The multiplication with zero results
in not removing any sysfs node at all.

Bug 1987855

Change-Id: Ifcacc5623cede8decfe228c02d72786337cd0876
Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1735989
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Nitin Kumbhar
2018-05-31 19:13:43 +05:30
committed by mobile promotions
parent 19e9a79195
commit 8f2cb85983
8 changed files with 287 additions and 369 deletions

View File

@@ -1146,6 +1146,8 @@ static int gk20a_probe(struct platform_device *dev)
return -ENOMEM;
}
hash_init(l->ecc_sysfs_stats_htable);
gk20a = &l->g;
nvgpu_log_fn(gk20a, " ");

View File

@@ -19,6 +19,7 @@
#include <linux/cdev.h>
#include <linux/iommu.h>
#include <linux/hashtable.h>
#include "gk20a/gk20a.h"
#include "cde.h"
@@ -139,6 +140,8 @@ struct nvgpu_os_linux {
struct dentry *debugfs_force_preemption_gfxp;
struct dentry *debugfs_dump_ctxsw_stats;
#endif
DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5);
struct gk20a_cde_app cde_app;
struct rw_semaphore busy_lock;

View File

@@ -646,6 +646,8 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
return -ENOMEM;
}
hash_init(l->ecc_sysfs_stats_htable);
g = &l->g;
nvgpu_init_gk20a(g);

View File

@@ -14,6 +14,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/hashtable.h>
#include <nvgpu/kmem.h>
#include <nvgpu/bug.h>
#include <nvgpu/hashtable.h>
@@ -28,10 +30,6 @@
#include "platform_gp10b_tegra.h"
#include "platform_ecc_sysfs.h"
#define ECC_STAT_NAME_MAX_SIZE 100
static DEFINE_HASHTABLE(ecc_hash_table, 5);
static u32 gen_ecc_hash_key(char *str)
{
int i = 0;
@@ -57,6 +55,7 @@ static ssize_t ecc_stat_show(struct device *dev,
struct gk20a_ecc_stat *ecc_stat;
u32 hash_key;
struct gk20a *g = get_gk20a(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit,
&subunit) == 2) {
@@ -78,7 +77,7 @@ static ssize_t ecc_stat_show(struct device *dev,
hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name);
hash_for_each_possible(ecc_hash_table,
hash_for_each_possible(l->ecc_sysfs_stats_htable,
ecc_stat,
hash_node,
hash_key) {
@@ -91,11 +90,9 @@ static ssize_t ecc_stat_show(struct device *dev,
return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n");
}
int gr_gp10b_ecc_stat_create(struct device *dev,
int is_l2,
char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat,
struct device_attribute **dev_attr_array)
int nvgpu_gr_ecc_stat_create(struct device *dev,
int is_l2, char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat)
{
struct gk20a *g = get_gk20a(dev);
char *ltc_unit_name = "ltc";
@@ -113,32 +110,29 @@ int gr_gp10b_ecc_stat_create(struct device *dev,
num_hw_units = g->gr.tpc_count;
return gp10b_ecc_stat_create(dev, num_hw_units, num_subunits,
return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits,
is_l2 ? ltc_unit_name : gr_unit_name,
num_subunits ? lts_unit_name: NULL,
ecc_stat_name,
ecc_stat,
dev_attr_array);
ecc_stat);
}
int gp10b_ecc_stat_create(struct device *dev,
int num_hw_units,
int num_subunits,
char *ecc_unit_name,
char *ecc_subunit_name,
char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat,
struct device_attribute **__dev_attr_array)
int nvgpu_ecc_stat_create(struct device *dev,
int num_hw_units, int num_subunits,
char *ecc_unit_name, char *ecc_subunit_name,
char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat)
{
int error = 0;
struct gk20a *g = get_gk20a(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
int hw_unit = 0;
int subunit = 0;
int element = 0;
u32 hash_key = 0;
struct device_attribute *dev_attr_array;
int num_elements = num_subunits ? num_subunits*num_hw_units :
int num_elements = num_subunits ? num_subunits * num_hw_units :
num_hw_units;
/* Allocate arrays */
@@ -146,6 +140,7 @@ int gp10b_ecc_stat_create(struct device *dev,
num_elements);
ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements);
ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements);
for (hw_unit = 0; hw_unit < num_elements; hw_unit++) {
ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) *
ECC_STAT_NAME_MAX_SIZE);
@@ -206,44 +201,58 @@ int gp10b_ecc_stat_create(struct device *dev,
/* Add hash table entry */
hash_key = gen_ecc_hash_key(ecc_stat_name);
hash_add(ecc_hash_table,
hash_add(l->ecc_sysfs_stats_htable,
&ecc_stat->hash_node,
hash_key);
*__dev_attr_array = dev_attr_array;
ecc_stat->attr_array = dev_attr_array;
return error;
}
void gr_gp10b_ecc_stat_remove(struct device *dev,
int is_l2,
struct gk20a_ecc_stat *ecc_stat,
struct device_attribute *dev_attr_array)
void nvgpu_gr_ecc_stat_remove(struct device *dev,
int is_l2, struct gk20a_ecc_stat *ecc_stat)
{
struct gk20a *g = get_gk20a(dev);
int num_hw_units = 0;
int num_subunits = 0;
if (is_l2 == 1)
num_hw_units = g->ltc_count;
else if (is_l2 == 2)
num_hw_units = g->ltc_count * g->gr.slices_per_ltc;
else
else if (is_l2 == 2) {
num_hw_units = g->ltc_count;
num_subunits = g->gr.slices_per_ltc;
} else
num_hw_units = g->gr.tpc_count;
gp10b_ecc_stat_remove(dev, num_hw_units, ecc_stat, dev_attr_array);
nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat);
}
void gp10b_ecc_stat_remove(struct device *dev,
int num_hw_units,
struct gk20a_ecc_stat *ecc_stat,
struct device_attribute *dev_attr_array)
void nvgpu_ecc_stat_remove(struct device *dev,
int num_hw_units, int num_subunits,
struct gk20a_ecc_stat *ecc_stat)
{
struct gk20a *g = get_gk20a(dev);
struct device_attribute *dev_attr_array = ecc_stat->attr_array;
int hw_unit = 0;
int subunit = 0;
int element = 0;
int num_elements = num_subunits ? num_subunits * num_hw_units :
num_hw_units;
/* Remove sysfs files */
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
device_remove_file(dev, &dev_attr_array[hw_unit]);
if (num_subunits) {
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
for (subunit = 0; subunit < num_subunits; subunit++) {
element = hw_unit * num_subunits + subunit;
device_remove_file(dev,
&dev_attr_array[element]);
}
}
} else {
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++)
device_remove_file(dev, &dev_attr_array[hw_unit]);
}
/* Remove hash table entry */
@@ -251,9 +260,10 @@ void gp10b_ecc_stat_remove(struct device *dev,
/* Free arrays */
nvgpu_kfree(g, ecc_stat->counters);
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
for (hw_unit = 0; hw_unit < num_elements; hw_unit++)
nvgpu_kfree(g, ecc_stat->names[hw_unit]);
}
nvgpu_kfree(g, ecc_stat->names);
nvgpu_kfree(g, dev_attr_array);
}

View File

@@ -19,27 +19,19 @@
#include "gp10b/gr_gp10b.h"
int gr_gp10b_ecc_stat_create(struct device *dev,
int is_l2,
char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat,
struct device_attribute **dev_attr_array);
int gp10b_ecc_stat_create(struct device *dev,
int num_hw_units,
int num_subunits,
char *ecc_unit_name,
char *ecc_subunit_name,
char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat,
struct device_attribute **__dev_attr_array);
#define ECC_STAT_NAME_MAX_SIZE 100
void gr_gp10b_ecc_stat_remove(struct device *dev,
int is_l2,
struct gk20a_ecc_stat *ecc_stat,
struct device_attribute *dev_attr_array);
void gp10b_ecc_stat_remove(struct device *dev,
int hw_units,
struct gk20a_ecc_stat *ecc_stat,
struct device_attribute *dev_attr_array);
int nvgpu_gr_ecc_stat_create(struct device *dev,
int is_l2, char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat);
int nvgpu_ecc_stat_create(struct device *dev,
int num_hw_units, int num_subunits,
char *ecc_unit_name, char *ecc_subunit_name,
char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat);
void nvgpu_gr_ecc_stat_remove(struct device *dev,
int is_l2, struct gk20a_ecc_stat *ecc_stat);
void nvgpu_ecc_stat_remove(struct device *dev,
int num_hw_units, int num_subunits,
struct gk20a_ecc_stat *ecc_stat);
#endif

View File

@@ -41,6 +41,7 @@
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "platform_ecc_sysfs.h"
#include "platform_gk20a_tegra.h"
#include "gp10b/platform_gp10b.h"
#include "platform_gp10b_tegra.h"
@@ -446,25 +447,6 @@ struct gk20a_platform gp10b_tegra_platform = {
.secure_buffer_size = 401408,
};
static struct device_attribute *dev_attr_sm_lrf_ecc_single_err_count_array;
static struct device_attribute *dev_attr_sm_lrf_ecc_double_err_count_array;
static struct device_attribute *dev_attr_sm_shm_ecc_sec_count_array;
static struct device_attribute *dev_attr_sm_shm_ecc_sed_count_array;
static struct device_attribute *dev_attr_sm_shm_ecc_ded_count_array;
static struct device_attribute *dev_attr_tex_ecc_total_sec_pipe0_count_array;
static struct device_attribute *dev_attr_tex_ecc_total_ded_pipe0_count_array;
static struct device_attribute *dev_attr_tex_ecc_unique_sec_pipe0_count_array;
static struct device_attribute *dev_attr_tex_ecc_unique_ded_pipe0_count_array;
static struct device_attribute *dev_attr_tex_ecc_total_sec_pipe1_count_array;
static struct device_attribute *dev_attr_tex_ecc_total_ded_pipe1_count_array;
static struct device_attribute *dev_attr_tex_ecc_unique_sec_pipe1_count_array;
static struct device_attribute *dev_attr_tex_ecc_unique_ded_pipe1_count_array;
static struct device_attribute *dev_attr_l2_ecc_sec_count_array;
static struct device_attribute *dev_attr_l2_ecc_ded_count_array;
void gr_gp10b_create_sysfs(struct gk20a *g)
{
int error = 0;
@@ -477,84 +459,80 @@ void gr_gp10b_create_sysfs(struct gk20a *g)
if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL)
return;
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_lrf_ecc_single_err_count",
&g->ecc.gr.sm_lrf_single_err_count,
&dev_attr_sm_lrf_ecc_single_err_count_array);
error |= gr_gp10b_ecc_stat_create(dev,
&g->ecc.gr.sm_lrf_single_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_lrf_ecc_double_err_count",
&g->ecc.gr.sm_lrf_double_err_count,
&dev_attr_sm_lrf_ecc_double_err_count_array);
&g->ecc.gr.sm_lrf_double_err_count);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_shm_ecc_sec_count",
&g->ecc.gr.sm_shm_sec_count,
&dev_attr_sm_shm_ecc_sec_count_array);
error |= gr_gp10b_ecc_stat_create(dev,
&g->ecc.gr.sm_shm_sec_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_shm_ecc_sed_count",
&g->ecc.gr.sm_shm_sed_count,
&dev_attr_sm_shm_ecc_sed_count_array);
error |= gr_gp10b_ecc_stat_create(dev,
&g->ecc.gr.sm_shm_sed_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_shm_ecc_ded_count",
&g->ecc.gr.sm_shm_ded_count,
&dev_attr_sm_shm_ecc_ded_count_array);
&g->ecc.gr.sm_shm_ded_count);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_total_sec_pipe0_count",
&g->ecc.gr.tex_total_sec_pipe0_count,
&dev_attr_tex_ecc_total_sec_pipe0_count_array);
error |= gr_gp10b_ecc_stat_create(dev,
&g->ecc.gr.tex_total_sec_pipe0_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_total_ded_pipe0_count",
&g->ecc.gr.tex_total_ded_pipe0_count,
&dev_attr_tex_ecc_total_ded_pipe0_count_array);
error |= gr_gp10b_ecc_stat_create(dev,
&g->ecc.gr.tex_total_ded_pipe0_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_unique_sec_pipe0_count",
&g->ecc.gr.tex_unique_sec_pipe0_count,
&dev_attr_tex_ecc_unique_sec_pipe0_count_array);
error |= gr_gp10b_ecc_stat_create(dev,
&g->ecc.gr.tex_unique_sec_pipe0_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_unique_ded_pipe0_count",
&g->ecc.gr.tex_unique_ded_pipe0_count,
&dev_attr_tex_ecc_unique_ded_pipe0_count_array);
error |= gr_gp10b_ecc_stat_create(dev,
&g->ecc.gr.tex_unique_ded_pipe0_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_total_sec_pipe1_count",
&g->ecc.gr.tex_total_sec_pipe1_count,
&dev_attr_tex_ecc_total_sec_pipe1_count_array);
error |= gr_gp10b_ecc_stat_create(dev,
&g->ecc.gr.tex_total_sec_pipe1_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_total_ded_pipe1_count",
&g->ecc.gr.tex_total_ded_pipe1_count,
&dev_attr_tex_ecc_total_ded_pipe1_count_array);
error |= gr_gp10b_ecc_stat_create(dev,
&g->ecc.gr.tex_total_ded_pipe1_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_unique_sec_pipe1_count",
&g->ecc.gr.tex_unique_sec_pipe1_count,
&dev_attr_tex_ecc_unique_sec_pipe1_count_array);
error |= gr_gp10b_ecc_stat_create(dev,
&g->ecc.gr.tex_unique_sec_pipe1_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_unique_ded_pipe1_count",
&g->ecc.gr.tex_unique_ded_pipe1_count,
&dev_attr_tex_ecc_unique_ded_pipe1_count_array);
&g->ecc.gr.tex_unique_ded_pipe1_count);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
2,
"ecc_sec_count",
&g->ecc.ltc.l2_sec_count,
&dev_attr_l2_ecc_sec_count_array);
error |= gr_gp10b_ecc_stat_create(dev,
&g->ecc.ltc.l2_sec_count);
error |= nvgpu_gr_ecc_stat_create(dev,
2,
"ecc_ded_count",
&g->ecc.ltc.l2_ded_count,
&dev_attr_l2_ecc_ded_count_array);
&g->ecc.ltc.l2_ded_count);
if (error)
dev_err(dev, "Failed to create sysfs attributes!\n");
@@ -567,67 +545,63 @@ void gr_gp10b_remove_sysfs(struct gk20a *g)
if (!g->ecc.gr.sm_lrf_single_err_count.counters)
return;
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_lrf_single_err_count,
dev_attr_sm_lrf_ecc_single_err_count_array);
gr_gp10b_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_lrf_double_err_count,
dev_attr_sm_lrf_ecc_double_err_count_array);
&g->ecc.gr.sm_lrf_single_err_count);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_shm_sec_count,
dev_attr_sm_shm_ecc_sec_count_array);
gr_gp10b_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_shm_sed_count,
dev_attr_sm_shm_ecc_sed_count_array);
gr_gp10b_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_shm_ded_count,
dev_attr_sm_shm_ecc_ded_count_array);
&g->ecc.gr.sm_lrf_double_err_count);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_sec_pipe0_count,
dev_attr_tex_ecc_total_sec_pipe0_count_array);
gr_gp10b_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_ded_pipe0_count,
dev_attr_tex_ecc_total_ded_pipe0_count_array);
gr_gp10b_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_sec_pipe0_count,
dev_attr_tex_ecc_unique_sec_pipe0_count_array);
gr_gp10b_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_ded_pipe0_count,
dev_attr_tex_ecc_unique_ded_pipe0_count_array);
gr_gp10b_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_sec_pipe1_count,
dev_attr_tex_ecc_total_sec_pipe1_count_array);
gr_gp10b_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_ded_pipe1_count,
dev_attr_tex_ecc_total_ded_pipe1_count_array);
gr_gp10b_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_sec_pipe1_count,
dev_attr_tex_ecc_unique_sec_pipe1_count_array);
gr_gp10b_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_ded_pipe1_count,
dev_attr_tex_ecc_unique_ded_pipe1_count_array);
&g->ecc.gr.sm_shm_sec_count);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_shm_sed_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_shm_ded_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_sec_pipe0_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_ded_pipe0_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_sec_pipe0_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_ded_pipe0_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_sec_pipe1_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_ded_pipe1_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_sec_pipe1_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_ded_pipe1_count);
nvgpu_gr_ecc_stat_remove(dev,
2,
&g->ecc.ltc.l2_sec_count,
dev_attr_l2_ecc_sec_count_array);
gr_gp10b_ecc_stat_remove(dev,
&g->ecc.ltc.l2_sec_count);
nvgpu_gr_ecc_stat_remove(dev,
2,
&g->ecc.ltc.l2_ded_count,
dev_attr_l2_ecc_ded_count_array);
&g->ecc.ltc.l2_ded_count);
}

View File

@@ -39,6 +39,7 @@
#include "gp10b/platform_gp10b.h"
#include "platform_gp10b_tegra.h"
#include "platform_ecc_sysfs.h"
#include "os_linux.h"
#include "platform_gk20a_tegra.h"
@@ -261,41 +262,11 @@ struct gk20a_platform gv11b_tegra_platform = {
.secure_buffer_size = 667648,
};
static struct device_attribute *dev_attr_sm_l1_tag_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_sm_l1_tag_ecc_uncorrected_err_count_array;
static struct device_attribute *dev_attr_sm_cbu_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_sm_cbu_ecc_uncorrected_err_count_array;
static struct device_attribute *dev_attr_sm_l1_data_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_sm_l1_data_ecc_uncorrected_err_count_array;
static struct device_attribute *dev_attr_sm_icache_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_sm_icache_ecc_uncorrected_err_count_array;
static struct device_attribute *dev_attr_gcc_l15_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_gcc_l15_ecc_uncorrected_err_count_array;
static struct device_attribute *dev_attr_mmu_l1tlb_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_mmu_l1tlb_ecc_uncorrected_err_count_array;
static struct device_attribute *dev_attr_fecs_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_fecs_ecc_uncorrected_err_count_array;
static struct device_attribute *dev_attr_gpccs_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_gpccs_ecc_uncorrected_err_count_array;
static struct device_attribute *dev_attr_l2_cache_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_l2_cache_ecc_uncorrected_err_count_array;
static struct device_attribute *dev_attr_mmu_l2tlb_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_mmu_l2tlb_ecc_uncorrected_err_count_array;
static struct device_attribute *dev_attr_mmu_hubtlb_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_mmu_hubtlb_ecc_uncorrected_err_count_array;
static struct device_attribute *dev_attr_mmu_fillunit_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_mmu_fillunit_ecc_uncorrected_err_count_array;
static struct device_attribute *dev_attr_pmu_ecc_corrected_err_count_array;
static struct device_attribute *dev_attr_pmu_ecc_uncorrected_err_count_array;
void gr_gv11b_create_sysfs(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
int error = 0;
/* This stat creation function is called on GR init. GR can get
initialized multiple times but we only need to create the ECC
stats once. Therefore, add the following check to avoid
@@ -305,210 +276,183 @@ void gr_gv11b_create_sysfs(struct gk20a *g)
gr_gp10b_create_sysfs(g);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_l1_tag_ecc_corrected_err_count",
&g->ecc.gr.sm_l1_tag_corrected_err_count,
&dev_attr_sm_l1_tag_ecc_corrected_err_count_array);
&g->ecc.gr.sm_l1_tag_corrected_err_count);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_l1_tag_ecc_uncorrected_err_count",
&g->ecc.gr.sm_l1_tag_uncorrected_err_count,
&dev_attr_sm_l1_tag_ecc_uncorrected_err_count_array);
&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_cbu_ecc_corrected_err_count",
&g->ecc.gr.sm_cbu_corrected_err_count,
&dev_attr_sm_cbu_ecc_corrected_err_count_array);
&g->ecc.gr.sm_cbu_corrected_err_count);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_cbu_ecc_uncorrected_err_count",
&g->ecc.gr.sm_cbu_uncorrected_err_count,
&dev_attr_sm_cbu_ecc_uncorrected_err_count_array);
&g->ecc.gr.sm_cbu_uncorrected_err_count);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_l1_data_ecc_corrected_err_count",
&g->ecc.gr.sm_l1_data_corrected_err_count,
&dev_attr_sm_l1_data_ecc_corrected_err_count_array);
&g->ecc.gr.sm_l1_data_corrected_err_count);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_l1_data_ecc_uncorrected_err_count",
&g->ecc.gr.sm_l1_data_uncorrected_err_count,
&dev_attr_sm_l1_data_ecc_uncorrected_err_count_array);
&g->ecc.gr.sm_l1_data_uncorrected_err_count);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_icache_ecc_corrected_err_count",
&g->ecc.gr.sm_icache_corrected_err_count,
&dev_attr_sm_icache_ecc_corrected_err_count_array);
&g->ecc.gr.sm_icache_corrected_err_count);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_icache_ecc_uncorrected_err_count",
&g->ecc.gr.sm_icache_uncorrected_err_count,
&dev_attr_sm_icache_ecc_uncorrected_err_count_array);
&g->ecc.gr.sm_icache_uncorrected_err_count);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"gcc_l15_ecc_corrected_err_count",
&g->ecc.gr.gcc_l15_corrected_err_count,
&dev_attr_gcc_l15_ecc_corrected_err_count_array);
&g->ecc.gr.gcc_l15_corrected_err_count);
error |= gr_gp10b_ecc_stat_create(dev,
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"gcc_l15_ecc_uncorrected_err_count",
&g->ecc.gr.gcc_l15_uncorrected_err_count,
&dev_attr_gcc_l15_ecc_uncorrected_err_count_array);
&g->ecc.gr.gcc_l15_uncorrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
g->ltc_count,
0,
"ltc",
NULL,
"l2_cache_uncorrected_err_count",
&g->ecc.ltc.l2_cache_uncorrected_err_count,
&dev_attr_l2_cache_ecc_uncorrected_err_count_array);
&g->ecc.ltc.l2_cache_uncorrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
g->ltc_count,
0,
"ltc",
NULL,
"l2_cache_corrected_err_count",
&g->ecc.ltc.l2_cache_corrected_err_count,
&dev_attr_l2_cache_ecc_corrected_err_count_array);
&g->ecc.ltc.l2_cache_corrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"gpc",
NULL,
"fecs_ecc_uncorrected_err_count",
&g->ecc.gr.fecs_uncorrected_err_count,
&dev_attr_fecs_ecc_uncorrected_err_count_array);
&g->ecc.gr.fecs_uncorrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"gpc",
NULL,
"fecs_ecc_corrected_err_count",
&g->ecc.gr.fecs_corrected_err_count,
&dev_attr_fecs_ecc_corrected_err_count_array);
&g->ecc.gr.fecs_corrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
g->gr.gpc_count,
0,
"gpc",
NULL,
"gpccs_ecc_uncorrected_err_count",
&g->ecc.gr.gpccs_uncorrected_err_count,
&dev_attr_gpccs_ecc_uncorrected_err_count_array);
&g->ecc.gr.gpccs_uncorrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
g->gr.gpc_count,
0,
"gpc",
NULL,
"gpccs_ecc_corrected_err_count",
&g->ecc.gr.gpccs_corrected_err_count,
&dev_attr_gpccs_ecc_corrected_err_count_array);
&g->ecc.gr.gpccs_corrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
g->gr.gpc_count,
0,
"gpc",
NULL,
"mmu_l1tlb_ecc_uncorrected_err_count",
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count,
&dev_attr_mmu_l1tlb_ecc_uncorrected_err_count_array);
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
g->gr.gpc_count,
0,
"gpc",
NULL,
"mmu_l1tlb_ecc_corrected_err_count",
&g->ecc.gr.mmu_l1tlb_corrected_err_count,
&dev_attr_mmu_l1tlb_ecc_corrected_err_count_array);
&g->ecc.gr.mmu_l1tlb_corrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_l2tlb_ecc_uncorrected_err_count",
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count,
&dev_attr_mmu_l2tlb_ecc_uncorrected_err_count_array);
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_l2tlb_ecc_corrected_err_count",
&g->ecc.fb.mmu_l2tlb_corrected_err_count,
&dev_attr_mmu_l2tlb_ecc_corrected_err_count_array);
&g->ecc.fb.mmu_l2tlb_corrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_hubtlb_ecc_uncorrected_err_count",
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count,
&dev_attr_mmu_hubtlb_ecc_uncorrected_err_count_array);
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_hubtlb_ecc_corrected_err_count",
&g->ecc.fb.mmu_hubtlb_corrected_err_count,
&dev_attr_mmu_hubtlb_ecc_corrected_err_count_array);
&g->ecc.fb.mmu_hubtlb_corrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_fillunit_ecc_uncorrected_err_count",
&g->ecc.fb.mmu_fillunit_uncorrected_err_count,
&dev_attr_mmu_fillunit_ecc_uncorrected_err_count_array);
&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_fillunit_ecc_corrected_err_count",
&g->ecc.fb.mmu_fillunit_corrected_err_count,
&dev_attr_mmu_fillunit_ecc_corrected_err_count_array);
&g->ecc.fb.mmu_fillunit_corrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"pmu_ecc_uncorrected_err_count",
&g->ecc.pmu.pmu_uncorrected_err_count,
&dev_attr_pmu_ecc_uncorrected_err_count_array);
&g->ecc.pmu.pmu_uncorrected_err_count);
error |= gp10b_ecc_stat_create(dev,
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"pmu_ecc_corrected_err_count",
&g->ecc.pmu.pmu_corrected_err_count,
&dev_attr_pmu_ecc_corrected_err_count_array);
&g->ecc.pmu.pmu_corrected_err_count);
if (error)
dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
@@ -522,133 +466,123 @@ void gr_gv11b_remove_sysfs(struct gk20a *g)
return;
gr_gp10b_remove_sysfs(g);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_l1_tag_corrected_err_count,
dev_attr_sm_l1_tag_ecc_corrected_err_count_array);
&g->ecc.gr.sm_l1_tag_corrected_err_count);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_l1_tag_uncorrected_err_count,
dev_attr_sm_l1_tag_ecc_uncorrected_err_count_array);
&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_cbu_corrected_err_count,
dev_attr_sm_cbu_ecc_corrected_err_count_array);
&g->ecc.gr.sm_cbu_corrected_err_count);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_cbu_uncorrected_err_count,
dev_attr_sm_cbu_ecc_uncorrected_err_count_array);
&g->ecc.gr.sm_cbu_uncorrected_err_count);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_l1_data_corrected_err_count,
dev_attr_sm_l1_data_ecc_corrected_err_count_array);
&g->ecc.gr.sm_l1_data_corrected_err_count);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_l1_data_uncorrected_err_count,
dev_attr_sm_l1_data_ecc_uncorrected_err_count_array);
&g->ecc.gr.sm_l1_data_uncorrected_err_count);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_icache_corrected_err_count,
dev_attr_sm_icache_ecc_corrected_err_count_array);
&g->ecc.gr.sm_icache_corrected_err_count);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_icache_uncorrected_err_count,
dev_attr_sm_icache_ecc_uncorrected_err_count_array);
&g->ecc.gr.sm_icache_uncorrected_err_count);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.gcc_l15_corrected_err_count,
dev_attr_gcc_l15_ecc_corrected_err_count_array);
&g->ecc.gr.gcc_l15_corrected_err_count);
gr_gp10b_ecc_stat_remove(dev,
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.gcc_l15_uncorrected_err_count,
dev_attr_gcc_l15_ecc_uncorrected_err_count_array);
&g->ecc.gr.gcc_l15_uncorrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
g->ltc_count,
&g->ecc.ltc.l2_cache_uncorrected_err_count,
dev_attr_l2_cache_ecc_uncorrected_err_count_array);
0,
&g->ecc.ltc.l2_cache_uncorrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
g->ltc_count,
&g->ecc.ltc.l2_cache_corrected_err_count,
dev_attr_l2_cache_ecc_corrected_err_count_array);
0,
&g->ecc.ltc.l2_cache_corrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
1,
&g->ecc.gr.fecs_uncorrected_err_count,
dev_attr_fecs_ecc_uncorrected_err_count_array);
0,
&g->ecc.gr.fecs_uncorrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
1,
&g->ecc.gr.fecs_corrected_err_count,
dev_attr_fecs_ecc_corrected_err_count_array);
0,
&g->ecc.gr.fecs_corrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
g->gr.gpc_count,
&g->ecc.gr.gpccs_uncorrected_err_count,
dev_attr_gpccs_ecc_uncorrected_err_count_array);
0,
&g->ecc.gr.gpccs_uncorrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
g->gr.gpc_count,
&g->ecc.gr.gpccs_corrected_err_count,
dev_attr_gpccs_ecc_corrected_err_count_array);
0,
&g->ecc.gr.gpccs_corrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
g->gr.gpc_count,
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count,
dev_attr_mmu_l1tlb_ecc_uncorrected_err_count_array);
0,
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
g->gr.gpc_count,
&g->ecc.gr.mmu_l1tlb_corrected_err_count,
dev_attr_mmu_l1tlb_ecc_corrected_err_count_array);
0,
&g->ecc.gr.mmu_l1tlb_corrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
1,
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count,
dev_attr_mmu_l2tlb_ecc_uncorrected_err_count_array);
0,
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
1,
&g->ecc.fb.mmu_l2tlb_corrected_err_count,
dev_attr_mmu_l2tlb_ecc_corrected_err_count_array);
0,
&g->ecc.fb.mmu_l2tlb_corrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
1,
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count,
dev_attr_mmu_hubtlb_ecc_uncorrected_err_count_array);
0,
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
1,
&g->ecc.fb.mmu_hubtlb_corrected_err_count,
dev_attr_mmu_hubtlb_ecc_corrected_err_count_array);
0,
&g->ecc.fb.mmu_hubtlb_corrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
1,
&g->ecc.fb.mmu_fillunit_uncorrected_err_count,
dev_attr_mmu_fillunit_ecc_uncorrected_err_count_array);
0,
&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
1,
&g->ecc.fb.mmu_fillunit_corrected_err_count,
dev_attr_mmu_fillunit_ecc_corrected_err_count_array);
0,
&g->ecc.fb.mmu_fillunit_corrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
1,
&g->ecc.pmu.pmu_uncorrected_err_count,
dev_attr_pmu_ecc_uncorrected_err_count_array);
0,
&g->ecc.pmu.pmu_uncorrected_err_count);
gp10b_ecc_stat_remove(dev,
nvgpu_ecc_stat_remove(dev,
1,
&g->ecc.pmu.pmu_corrected_err_count,
dev_attr_pmu_ecc_corrected_err_count_array);
0,
&g->ecc.pmu.pmu_corrected_err_count);
}

View File

@@ -30,6 +30,7 @@ struct gk20a_ecc_stat {
u32 count;
#ifdef CONFIG_SYSFS
struct hlist_node hash_node;
struct device_attribute *attr_array;
#endif
};