From 2427d4510229b74596c4f5fdcf8fce19f682f3a3 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Fri, 21 Aug 2020 11:06:17 +0530 Subject: [PATCH] gpu: nvgpu: initialize gr ecc counters for each instance Add new API nvgpu_ecc_counter_init_per_gr() to initialize ECC counters per GR instance. Switch NVGPU_ECC_COUNTER_INIT_GR macro to use nvgpu_ecc_counter_init_per_gr() instead of nvgpu_ecc_counter_init(). Fix error handling path in nvgpu_gr_alloc(). Jira NVGPU-5648 Change-Id: I18f1bf8b245956bdb5a3e4bb6b03114282366ce6 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2402025 Reviewed-by: automaticguardword Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-cert Reviewed-by: Alex Waterman Reviewed-by: mobile promotions Tested-by: mobile promotions GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/common/gr/gr.c | 10 ++++-- drivers/gpu/nvgpu/common/gr/gr_ecc.c | 37 +++++++++++++++++++++ drivers/gpu/nvgpu/include/nvgpu/gr/gr_ecc.h | 18 +++++++++- 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index 3dc08ec13..5b1636630 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -822,12 +822,14 @@ int nvgpu_gr_alloc(struct gk20a *g) if (gr->falcon == NULL) { nvgpu_err(g, "failed to init gr falcon"); err = -ENOMEM; + goto fail; } gr->intr = nvgpu_gr_intr_init_support(g); if (gr->intr == NULL) { nvgpu_err(g, "failed to init gr intr support"); err = -ENOMEM; + goto fail; } nvgpu_cond_init(&gr->init_wq); @@ -845,13 +847,15 @@ int nvgpu_gr_alloc(struct gk20a *g) err = g->ops.gr.ecc.fecs_ecc_init(g); if (err != 0) { nvgpu_err(g, "failed to init gr fecs ecc"); - - nvgpu_gr_intr_remove_support(g, gr->intr); - gr->intr = NULL; + goto fail; } } return 0; + +fail: + nvgpu_gr_free(g); + return err; } void nvgpu_gr_free(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/common/gr/gr_ecc.c b/drivers/gpu/nvgpu/common/gr/gr_ecc.c index de47b9e00..fc25438cb 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_ecc.c +++ b/drivers/gpu/nvgpu/common/gr/gr_ecc.c @@ -28,6 +28,43 @@ #include #include +int nvgpu_ecc_counter_init_per_gr(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name) +{ + struct nvgpu_ecc_stat *stats; + u32 i; + char gr_str[10] = {0}; + + stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), + g->num_gr_instances)); + if (stats == NULL) { + return -ENOMEM; + } + + for (i = 0; i < g->num_gr_instances; i++) { + /** + * Store stats name as below: + * gr_ + */ + (void)strcpy(stats[i].name, "gr"); + (void)nvgpu_strnadd_u32(gr_str, i, sizeof(gr_str), 10U); + (void)strncat(stats[i].name, gr_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[i].name)); + (void)strncat(stats[i].name, "_", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[i].name)); + (void)strncat(stats[i].name, name, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[i].name)); + + nvgpu_ecc_stat_add(g, &stats[i]); + } + + *stat = stats; + return 0; +} + int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, struct nvgpu_ecc_stat ***stat, const char *name) { diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_ecc.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_ecc.h index f7974b388..f23f9ba9b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_ecc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_ecc.h @@ -83,6 +83,22 @@ int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, #define NVGPU_ECC_COUNTER_INIT_PER_GPC(stat) \ nvgpu_ecc_counter_init_per_gpc(g, &g->ecc.gr.stat, #stat) +/** + * @brief Allocate and initialize error counter specified by name for all gr + * instances. + * + * @param g [in] The GPU driver struct. + * @param stat [out] Pointer to array of error counters. + * @param name [in] Unique name for error counter. + * + * Initialize the memory to hold error counters associated with each gr + * instance. Counter name is set in the form of gr_. + * + * @return 0 in case of success, less than 0 for failure. + */ +int nvgpu_ecc_counter_init_per_gr(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name); + /* * @brief Allocate and initialize counters for memories shared within GR. * @@ -90,7 +106,7 @@ int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, * */ #define NVGPU_ECC_COUNTER_INIT_GR(stat) \ - nvgpu_ecc_counter_init(g, &g->ecc.gr.stat, #stat) + nvgpu_ecc_counter_init_per_gr(g, &g->ecc.gr.stat, #stat) /** * @brief Release all GR ECC stats counters.