gpu: nvgpu: initialize gr ecc counters for each instance

Add new API nvgpu_ecc_counter_init_per_gr() to initialize ECC counters
per GR instance.
Switch NVGPU_ECC_COUNTER_INIT_GR macro to use
nvgpu_ecc_counter_init_per_gr() instead of nvgpu_ecc_counter_init().

Fix error handling path in nvgpu_gr_alloc().

Jira NVGPU-5648

Change-Id: I18f1bf8b245956bdb5a3e4bb6b03114282366ce6
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2402025
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Deepak Nibade
2020-08-21 11:06:17 +05:30
committed by Alex Waterman
parent f4cc6bf7b9
commit 2427d45102
3 changed files with 61 additions and 4 deletions

View File

@@ -822,12 +822,14 @@ int nvgpu_gr_alloc(struct gk20a *g)
if (gr->falcon == NULL) {
nvgpu_err(g, "failed to init gr falcon");
err = -ENOMEM;
goto fail;
}
gr->intr = nvgpu_gr_intr_init_support(g);
if (gr->intr == NULL) {
nvgpu_err(g, "failed to init gr intr support");
err = -ENOMEM;
goto fail;
}
nvgpu_cond_init(&gr->init_wq);
@@ -845,13 +847,15 @@ int nvgpu_gr_alloc(struct gk20a *g)
err = g->ops.gr.ecc.fecs_ecc_init(g);
if (err != 0) {
nvgpu_err(g, "failed to init gr fecs ecc");
nvgpu_gr_intr_remove_support(g, gr->intr);
gr->intr = NULL;
goto fail;
}
}
return 0;
fail:
nvgpu_gr_free(g);
return err;
}
void nvgpu_gr_free(struct gk20a *g)

View File

@@ -28,6 +28,43 @@
#include <nvgpu/kmem.h>
#include <nvgpu/ecc.h>
int nvgpu_ecc_counter_init_per_gr(struct gk20a *g,
struct nvgpu_ecc_stat **stat, const char *name)
{
struct nvgpu_ecc_stat *stats;
u32 i;
char gr_str[10] = {0};
stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats),
g->num_gr_instances));
if (stats == NULL) {
return -ENOMEM;
}
for (i = 0; i < g->num_gr_instances; i++) {
/**
* Store stats name as below:
* gr<gr_index>_<name_string>
*/
(void)strcpy(stats[i].name, "gr");
(void)nvgpu_strnadd_u32(gr_str, i, sizeof(gr_str), 10U);
(void)strncat(stats[i].name, gr_str,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[i].name));
(void)strncat(stats[i].name, "_",
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[i].name));
(void)strncat(stats[i].name, name,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[i].name));
nvgpu_ecc_stat_add(g, &stats[i]);
}
*stat = stats;
return 0;
}
int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
struct nvgpu_ecc_stat ***stat, const char *name)
{

View File

@@ -83,6 +83,22 @@ int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
#define NVGPU_ECC_COUNTER_INIT_PER_GPC(stat) \
nvgpu_ecc_counter_init_per_gpc(g, &g->ecc.gr.stat, #stat)
/**
* @brief Allocate and initialize error counter specified by name for all gr
* instances.
*
* @param g [in] The GPU driver struct.
* @param stat [out] Pointer to array of error counters.
* @param name [in] Unique name for error counter.
*
* Initialize the memory to hold error counters associated with each gr
* instance. Counter name is set in the form of gr<gr_index>_<provided_name>.
*
* @return 0 in case of success, less than 0 for failure.
*/
int nvgpu_ecc_counter_init_per_gr(struct gk20a *g,
struct nvgpu_ecc_stat **stat, const char *name);
/*
* @brief Allocate and initialize counters for memories shared within GR.
*
@@ -90,7 +106,7 @@ int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
*
*/
#define NVGPU_ECC_COUNTER_INIT_GR(stat) \
nvgpu_ecc_counter_init(g, &g->ecc.gr.stat, #stat)
nvgpu_ecc_counter_init_per_gr(g, &g->ecc.gr.stat, #stat)
/**
* @brief Release all GR ECC stats counters.