gpu: nvgpu: unit: add coverage tests for gops.gr.init.ecc_scrub_reg

Add new unit test to cover gops.gr.init.ecc_scrub_reg HAL function

gops.gr.init.ecc_scrub_reg HAL can generate TIMEOUT errors which are
not returned to caller currently. Update this HAL to return int value
for error propagation.

Jira NVGPU-4458

Change-Id: I98f4d5af2ef17cc4301951fec4d660638c8ef72c
Signed-off-by: dnibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2265456
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
dnibade
2019-12-17 14:37:53 +05:30
committed by Alex Waterman
parent 5f030d6c52
commit ab76dc1ad5
8 changed files with 209 additions and 19 deletions

View File

@@ -245,7 +245,10 @@ static int gr_init_setup_hw(struct gk20a *g)
/* enable ECC for L1/SM */
if (g->ops.gr.init.ecc_scrub_reg != NULL) {
g->ops.gr.init.ecc_scrub_reg(g, gr->config);
err = g->ops.gr.init.ecc_scrub_reg(g, gr->config);
if (err != 0) {
goto out;
}
}
/** Reset and enable exceptions */

View File

@@ -32,7 +32,7 @@ struct netlist_av_list;
u32 gv11b_gr_init_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc,
struct nvgpu_gr_config *gr_config);
void gv11b_gr_init_ecc_scrub_reg(struct gk20a *g,
int gv11b_gr_init_ecc_scrub_reg(struct gk20a *g,
struct nvgpu_gr_config *gr_config);
void gv11b_gr_init_gpc_mmu(struct gk20a *g);
#ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP

View File

@@ -113,7 +113,7 @@ static int gr_gv11b_ecc_scrub_is_done(struct gk20a *g,
return 0;
}
static void gr_gv11b_ecc_scrub_sm_lrf(struct gk20a *g,
static int gr_gv11b_ecc_scrub_sm_lrf(struct gk20a *g,
struct nvgpu_gr_config *gr_config)
{
u32 scrub_mask, scrub_done;
@@ -121,7 +121,7 @@ static void gr_gv11b_ecc_scrub_sm_lrf(struct gk20a *g,
if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_LRF)) {
nvgpu_log_info(g, "ECC SM LRF is disabled");
return;
return 0;
}
nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_lrf");
@@ -154,9 +154,11 @@ static void gr_gv11b_ecc_scrub_sm_lrf(struct gk20a *g,
if (err != 0) {
nvgpu_warn(g, "ECC SCRUB SM LRF Failed");
}
return err;
}
static void gr_gv11b_ecc_scrub_sm_l1_data(struct gk20a *g,
static int gr_gv11b_ecc_scrub_sm_l1_data(struct gk20a *g,
struct nvgpu_gr_config *gr_config)
{
u32 scrub_mask, scrub_done;
@@ -164,7 +166,7 @@ static void gr_gv11b_ecc_scrub_sm_l1_data(struct gk20a *g,
if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_L1_DATA)) {
nvgpu_log_info(g, "ECC L1DATA is disabled");
return;
return 0;
}
nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_l1_data");
scrub_mask =
@@ -184,9 +186,11 @@ static void gr_gv11b_ecc_scrub_sm_l1_data(struct gk20a *g,
if (err != 0) {
nvgpu_warn(g, "ECC SCRUB SM L1 DATA Failed");
}
return err;
}
static void gr_gv11b_ecc_scrub_sm_l1_tag(struct gk20a *g,
static int gr_gv11b_ecc_scrub_sm_l1_tag(struct gk20a *g,
struct nvgpu_gr_config *gr_config)
{
u32 scrub_mask, scrub_done;
@@ -194,7 +198,7 @@ static void gr_gv11b_ecc_scrub_sm_l1_tag(struct gk20a *g,
if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_L1_TAG)) {
nvgpu_log_info(g, "ECC L1TAG is disabled");
return;
return 0;
}
nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_l1_tag");
scrub_mask =
@@ -217,9 +221,11 @@ static void gr_gv11b_ecc_scrub_sm_l1_tag(struct gk20a *g,
if (err != 0) {
nvgpu_warn(g, "ECC SCRUB SM L1 TAG Failed");
}
return err;
}
static void gr_gv11b_ecc_scrub_sm_cbu(struct gk20a *g,
static int gr_gv11b_ecc_scrub_sm_cbu(struct gk20a *g,
struct nvgpu_gr_config *gr_config)
{
u32 scrub_mask, scrub_done;
@@ -227,7 +233,7 @@ static void gr_gv11b_ecc_scrub_sm_cbu(struct gk20a *g,
if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_CBU)) {
nvgpu_log_info(g, "ECC CBU is disabled");
return;
return 0;
}
nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_cbu");
scrub_mask =
@@ -249,9 +255,11 @@ static void gr_gv11b_ecc_scrub_sm_cbu(struct gk20a *g,
if (err != 0) {
nvgpu_warn(g, "ECC SCRUB SM CBU Failed");
}
return err;
}
static void gr_gv11b_ecc_scrub_sm_icahe(struct gk20a *g,
static int gr_gv11b_ecc_scrub_sm_icahe(struct gk20a *g,
struct nvgpu_gr_config *gr_config)
{
u32 scrub_mask, scrub_done;
@@ -259,7 +267,7 @@ static void gr_gv11b_ecc_scrub_sm_icahe(struct gk20a *g,
if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_ICACHE)) {
nvgpu_log_info(g, "ECC ICAHE is disabled");
return;
return 0;
}
nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_icahe");
scrub_mask =
@@ -282,22 +290,43 @@ static void gr_gv11b_ecc_scrub_sm_icahe(struct gk20a *g,
if (err != 0) {
nvgpu_warn(g, "ECC SCRUB SM ICACHE Failed");
}
return err;
}
void gv11b_gr_init_ecc_scrub_reg(struct gk20a *g,
int gv11b_gr_init_ecc_scrub_reg(struct gk20a *g,
struct nvgpu_gr_config *gr_config)
{
int err;
nvgpu_log_fn(g, "ecc srub start");
gr_gv11b_ecc_scrub_sm_lrf(g, gr_config);
err = gr_gv11b_ecc_scrub_sm_lrf(g, gr_config);
if (err != 0) {
return err;
}
gr_gv11b_ecc_scrub_sm_l1_data(g, gr_config);
err = gr_gv11b_ecc_scrub_sm_l1_data(g, gr_config);
if (err != 0) {
return err;
}
gr_gv11b_ecc_scrub_sm_l1_tag(g, gr_config);
err = gr_gv11b_ecc_scrub_sm_l1_tag(g, gr_config);
if (err != 0) {
return err;
}
gr_gv11b_ecc_scrub_sm_cbu(g, gr_config);
err = gr_gv11b_ecc_scrub_sm_cbu(g, gr_config);
if (err != 0) {
return err;
}
gr_gv11b_ecc_scrub_sm_icahe(g, gr_config);
err = gr_gv11b_ecc_scrub_sm_icahe(g, gr_config);
if (err != 0) {
return err;
}
return err;
}
u32 gv11b_gr_init_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc,

View File

@@ -613,7 +613,7 @@ struct gops_gr_init {
u32 *default_compute_preempt_mode);
/** @cond DOXYGEN_SHOULD_SKIP_THIS */
void (*ecc_scrub_reg)(struct gk20a *g,
int (*ecc_scrub_reg)(struct gk20a *g,
struct nvgpu_gr_config *gr_config);
void (*lg_coalesce)(struct gk20a *g, u32 data);
void (*su_coalesce)(struct gk20a *g, u32 data);

View File

@@ -2501,6 +2501,12 @@
"unit": "nvgpu_gr_init",
"test_level": 0
},
{
"test": "test_gr_init_hal_ecc_scrub_reg",
"case": "gr_init_hal_ecc_scrub_reg",
"unit": "nvgpu_gr_init",
"test_level": 0
},
{
"test": "test_gr_remove_setup",
"case": "gr_remove_setup",

View File

@@ -33,6 +33,8 @@
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/gr.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/gr_utils.h>
#include "common/gr/gr_priv.h"
#include "../nvgpu-gr.h"
@@ -47,6 +49,125 @@ static int dummy_l2_flush(struct gk20a *g, bool invalidate)
return 0;
}
struct gr_ecc_scrub_reg_rec {
u32 addr;
u32 scrub_done;
};
struct gr_ecc_scrub_reg_rec ecc_scrub_data[] = {
{
.addr = gr_pri_gpc0_tpc0_sm_lrf_ecc_control_r(),
.scrub_done =
(gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp0_init_f() |
gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp1_init_f() |
gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp2_init_f() |
gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp3_init_f() |
gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp4_init_f() |
gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp5_init_f() |
gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp6_init_f() |
gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp7_init_f()),
},
{
.addr = gr_pri_gpc0_tpc0_sm_l1_data_ecc_control_r(),
.scrub_done =
(gr_pri_gpc0_tpc0_sm_l1_data_ecc_control_scrub_el1_0_init_f() |
gr_pri_gpc0_tpc0_sm_l1_data_ecc_control_scrub_el1_1_init_f()),
},
{
.addr = gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_r(),
.scrub_done =
(gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_scrub_el1_0_init_f() |
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_scrub_el1_1_init_f() |
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_scrub_pixprf_init_f() |
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_scrub_miss_fifo_init_f()),
},
{
.addr = gr_pri_gpc0_tpc0_sm_cbu_ecc_control_r(),
.scrub_done =
(gr_pri_gpc0_tpc0_sm_cbu_ecc_control_scrub_warp_sm0_init_f() |
gr_pri_gpc0_tpc0_sm_cbu_ecc_control_scrub_warp_sm1_init_f() |
gr_pri_gpc0_tpc0_sm_cbu_ecc_control_scrub_barrier_sm0_init_f() |
gr_pri_gpc0_tpc0_sm_cbu_ecc_control_scrub_barrier_sm1_init_f()),
},
{
.addr = gr_pri_gpc0_tpc0_sm_icache_ecc_control_r(),
.scrub_done =
(gr_pri_gpc0_tpc0_sm_icache_ecc_control_scrub_l0_data_init_f() |
gr_pri_gpc0_tpc0_sm_icache_ecc_control_scrub_l0_predecode_init_f() |
gr_pri_gpc0_tpc0_sm_icache_ecc_control_scrub_l1_data_init_f() |
gr_pri_gpc0_tpc0_sm_icache_ecc_control_scrub_l1_predecode_init_f()),
},
};
int test_gr_init_hal_ecc_scrub_reg(struct unit_module *m,
struct gk20a *g, void *args)
{
u32 i;
int err;
struct nvgpu_gr_config *config = nvgpu_gr_get_config_ptr(g);
struct nvgpu_posix_fault_inj *timer_fi =
nvgpu_timers_get_fault_injection();
/* Code coverage */
nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_ICACHE, false);
nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_CBU, false);
nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_L1_TAG, false);
nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_L1_DATA, false);
nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_LRF, false);
err = g->ops.gr.init.ecc_scrub_reg(g, config);
if (err != 0) {
unit_return_fail(m, "ECC scrub failed");
}
/* Re-enable the features */
nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_ICACHE, true);
nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_CBU, true);
nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_L1_TAG, true);
nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_L1_DATA, true);
nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_LRF, true);
/* Trigger timeout initialization failure */
for (i = 0;
i < (sizeof(ecc_scrub_data) / sizeof(struct gr_ecc_scrub_reg_rec));
i++) {
nvgpu_posix_enable_fault_injection(timer_fi, true, i);
err = g->ops.gr.init.ecc_scrub_reg(g, config);
if (err == 0) {
unit_return_fail(m, "Timeout was expected");
}
}
nvgpu_posix_enable_fault_injection(timer_fi, false, 0);
for (i = 0;
i < (sizeof(ecc_scrub_data) / sizeof(struct gr_ecc_scrub_reg_rec));
i++) {
/* Set incorrect values of scrub_done so that scrub wait times out */
nvgpu_writel(g,
ecc_scrub_data[i].addr,
~(ecc_scrub_data[i].scrub_done));
err = g->ops.gr.init.ecc_scrub_reg(g, config);
if (err == 0) {
unit_return_fail(m, "Timeout was expected");
}
/* Set correct values of scrub_done so that scrub wait is successful */
nvgpu_writel(g,
ecc_scrub_data[i].addr,
ecc_scrub_data[i].scrub_done);
}
/* No error injection, should be successful */
err = g->ops.gr.init.ecc_scrub_reg(g, config);
if (err != 0) {
unit_return_fail(m, "ECC scrub failed");
}
return UNIT_SUCCESS;
}
int test_gr_init_hal_wait_empty(struct unit_module *m,
struct gk20a *g, void *args)
{

View File

@@ -58,6 +58,36 @@ struct unit_module;
int test_gr_init_hal_wait_empty(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_gr_init_hal_ecc_scrub_reg.
*
* Description: Verify error handling in gops.gr.init.ecc_scrub_reg function.
*
* Test Type: Feature, Error guessing.
*
* Targets: g->ops.gr.init.ecc_scrub_reg.
*
* Input: gr_init_setup, gr_init_prepare, gr_init_support must have
* been executed successfully.
*
* Steps:
* - Disable feature flags for common.gr ECC handling for code coverage
* and call g->ops.gr.init.ecc_scrub_reg.
* - Re-enable all the feature flags.
* - Inject timeout initialization failures and call
* g->ops.gr.init.ecc_scrub_reg.
* - Set incorrect values of scrub_done for each error type so that scrub
* wait times out.
* - Ensure that g->ops.gr.init.ecc_scrub_reg returns error.
* - Set correct values of scrub_done for each error so that scrub wait
* is successful again.
*
* Output: Returns PASS if the steps above were executed successfully. FAIL
* otherwise.
*/
int test_gr_init_hal_ecc_scrub_reg(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_gr_init_hal_error_injection.
*

View File

@@ -185,6 +185,7 @@ struct unit_module_test nvgpu_gr_init_tests[] = {
UNIT_TEST(gr_init_support, test_gr_init_support, NULL, 0),
UNIT_TEST(gr_init_hal_error_injection, test_gr_init_hal_error_injection, NULL, 0),
UNIT_TEST(gr_init_hal_wait_empty, test_gr_init_hal_wait_empty, NULL, 0),
UNIT_TEST(gr_init_hal_ecc_scrub_reg, test_gr_init_hal_ecc_scrub_reg, NULL, 0),
UNIT_TEST(gr_suspend, test_gr_suspend, NULL, 0),
UNIT_TEST(gr_ecc_features, test_gr_init_ecc_features, NULL, 0),
UNIT_TEST(gr_remove_support, test_gr_remove_support, NULL, 0),