From ab76dc1ad5d1e1e80867180b7a67282cdfd2e562 Mon Sep 17 00:00:00 2001 From: dnibade Date: Tue, 17 Dec 2019 14:37:53 +0530 Subject: [PATCH] gpu: nvgpu: unit: add coverage tests for gops.gr.init.ecc_scrub_reg Add new unit test to cover gops.gr.init.ecc_scrub_reg HAL function gops.gr.init.ecc_scrub_reg HAL can generate TIMEOUT errors which are not returned to caller currently. Update this HAL to return int value for error propagation. Jira NVGPU-4458 Change-Id: I98f4d5af2ef17cc4301951fec4d660638c8ef72c Signed-off-by: dnibade Reviewed-on: https://git-master.nvidia.com/r/2265456 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/gr/gr.c | 5 +- drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h | 2 +- .../nvgpu/hal/gr/init/gr_init_gv11b_fusa.c | 61 ++++++--- drivers/gpu/nvgpu/include/nvgpu/gops_gr.h | 2 +- userspace/required_tests.json | 6 + .../units/gr/init/nvgpu-gr-init-hal-gv11b.c | 121 ++++++++++++++++++ .../units/gr/init/nvgpu-gr-init-hal-gv11b.h | 30 +++++ userspace/units/gr/init/nvgpu-gr-init.c | 1 + 8 files changed, 209 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index 7303b4143..ed0a23fa8 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -245,7 +245,10 @@ static int gr_init_setup_hw(struct gk20a *g) /* enable ECC for L1/SM */ if (g->ops.gr.init.ecc_scrub_reg != NULL) { - g->ops.gr.init.ecc_scrub_reg(g, gr->config); + err = g->ops.gr.init.ecc_scrub_reg(g, gr->config); + if (err != 0) { + goto out; + } } /** Reset and enable exceptions */ diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h index 6dbe2ffa2..ad97b82f3 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h @@ -32,7 +32,7 @@ struct netlist_av_list; u32 gv11b_gr_init_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc, struct nvgpu_gr_config *gr_config); -void gv11b_gr_init_ecc_scrub_reg(struct gk20a *g, +int gv11b_gr_init_ecc_scrub_reg(struct gk20a *g, struct nvgpu_gr_config *gr_config); void gv11b_gr_init_gpc_mmu(struct gk20a *g); #ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c index 21a473784..c35a053d8 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c @@ -113,7 +113,7 @@ static int gr_gv11b_ecc_scrub_is_done(struct gk20a *g, return 0; } -static void gr_gv11b_ecc_scrub_sm_lrf(struct gk20a *g, +static int gr_gv11b_ecc_scrub_sm_lrf(struct gk20a *g, struct nvgpu_gr_config *gr_config) { u32 scrub_mask, scrub_done; @@ -121,7 +121,7 @@ static void gr_gv11b_ecc_scrub_sm_lrf(struct gk20a *g, if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_LRF)) { nvgpu_log_info(g, "ECC SM LRF is disabled"); - return; + return 0; } nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_lrf"); @@ -154,9 +154,11 @@ static void gr_gv11b_ecc_scrub_sm_lrf(struct gk20a *g, if (err != 0) { nvgpu_warn(g, "ECC SCRUB SM LRF Failed"); } + + return err; } -static void gr_gv11b_ecc_scrub_sm_l1_data(struct gk20a *g, +static int gr_gv11b_ecc_scrub_sm_l1_data(struct gk20a *g, struct nvgpu_gr_config *gr_config) { u32 scrub_mask, scrub_done; @@ -164,7 +166,7 @@ static void gr_gv11b_ecc_scrub_sm_l1_data(struct gk20a *g, if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_L1_DATA)) { nvgpu_log_info(g, "ECC L1DATA is disabled"); - return; + return 0; } nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_l1_data"); scrub_mask = @@ -184,9 +186,11 @@ static void gr_gv11b_ecc_scrub_sm_l1_data(struct gk20a *g, if (err != 0) { nvgpu_warn(g, "ECC SCRUB SM L1 DATA Failed"); } + + return err; } -static void gr_gv11b_ecc_scrub_sm_l1_tag(struct gk20a *g, +static int gr_gv11b_ecc_scrub_sm_l1_tag(struct gk20a *g, struct nvgpu_gr_config *gr_config) { u32 scrub_mask, scrub_done; @@ -194,7 +198,7 @@ static void gr_gv11b_ecc_scrub_sm_l1_tag(struct gk20a *g, if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_L1_TAG)) { nvgpu_log_info(g, "ECC L1TAG is disabled"); - return; + return 0; } nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_l1_tag"); scrub_mask = @@ -217,9 +221,11 @@ static void gr_gv11b_ecc_scrub_sm_l1_tag(struct gk20a *g, if (err != 0) { nvgpu_warn(g, "ECC SCRUB SM L1 TAG Failed"); } + + return err; } -static void gr_gv11b_ecc_scrub_sm_cbu(struct gk20a *g, +static int gr_gv11b_ecc_scrub_sm_cbu(struct gk20a *g, struct nvgpu_gr_config *gr_config) { u32 scrub_mask, scrub_done; @@ -227,7 +233,7 @@ static void gr_gv11b_ecc_scrub_sm_cbu(struct gk20a *g, if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_CBU)) { nvgpu_log_info(g, "ECC CBU is disabled"); - return; + return 0; } nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_cbu"); scrub_mask = @@ -249,9 +255,11 @@ static void gr_gv11b_ecc_scrub_sm_cbu(struct gk20a *g, if (err != 0) { nvgpu_warn(g, "ECC SCRUB SM CBU Failed"); } + + return err; } -static void gr_gv11b_ecc_scrub_sm_icahe(struct gk20a *g, +static int gr_gv11b_ecc_scrub_sm_icahe(struct gk20a *g, struct nvgpu_gr_config *gr_config) { u32 scrub_mask, scrub_done; @@ -259,7 +267,7 @@ static void gr_gv11b_ecc_scrub_sm_icahe(struct gk20a *g, if (!nvgpu_is_enabled(g, NVGPU_ECC_ENABLED_SM_ICACHE)) { nvgpu_log_info(g, "ECC ICAHE is disabled"); - return; + return 0; } nvgpu_log_info(g, "gr_gv11b_ecc_scrub_sm_icahe"); scrub_mask = @@ -282,22 +290,43 @@ static void gr_gv11b_ecc_scrub_sm_icahe(struct gk20a *g, if (err != 0) { nvgpu_warn(g, "ECC SCRUB SM ICACHE Failed"); } + + return err; } -void gv11b_gr_init_ecc_scrub_reg(struct gk20a *g, +int gv11b_gr_init_ecc_scrub_reg(struct gk20a *g, struct nvgpu_gr_config *gr_config) { + int err; + nvgpu_log_fn(g, "ecc srub start"); - gr_gv11b_ecc_scrub_sm_lrf(g, gr_config); + err = gr_gv11b_ecc_scrub_sm_lrf(g, gr_config); + if (err != 0) { + return err; + } - gr_gv11b_ecc_scrub_sm_l1_data(g, gr_config); + err = gr_gv11b_ecc_scrub_sm_l1_data(g, gr_config); + if (err != 0) { + return err; + } - gr_gv11b_ecc_scrub_sm_l1_tag(g, gr_config); + err = gr_gv11b_ecc_scrub_sm_l1_tag(g, gr_config); + if (err != 0) { + return err; + } - gr_gv11b_ecc_scrub_sm_cbu(g, gr_config); + err = gr_gv11b_ecc_scrub_sm_cbu(g, gr_config); + if (err != 0) { + return err; + } - gr_gv11b_ecc_scrub_sm_icahe(g, gr_config); + err = gr_gv11b_ecc_scrub_sm_icahe(g, gr_config); + if (err != 0) { + return err; + } + + return err; } u32 gv11b_gr_init_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops_gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops_gr.h index 6d807e625..2683ad5cc 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops_gr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops_gr.h @@ -613,7 +613,7 @@ struct gops_gr_init { u32 *default_compute_preempt_mode); /** @cond DOXYGEN_SHOULD_SKIP_THIS */ - void (*ecc_scrub_reg)(struct gk20a *g, + int (*ecc_scrub_reg)(struct gk20a *g, struct nvgpu_gr_config *gr_config); void (*lg_coalesce)(struct gk20a *g, u32 data); void (*su_coalesce)(struct gk20a *g, u32 data); diff --git a/userspace/required_tests.json b/userspace/required_tests.json index b72a49769..7588148a3 100644 --- a/userspace/required_tests.json +++ b/userspace/required_tests.json @@ -2501,6 +2501,12 @@ "unit": "nvgpu_gr_init", "test_level": 0 }, + { + "test": "test_gr_init_hal_ecc_scrub_reg", + "case": "gr_init_hal_ecc_scrub_reg", + "unit": "nvgpu_gr_init", + "test_level": 0 + }, { "test": "test_gr_remove_setup", "case": "gr_remove_setup", diff --git a/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c b/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c index d16fe1022..06c2458bf 100644 --- a/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c +++ b/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include "common/gr/gr_priv.h" #include "../nvgpu-gr.h" @@ -47,6 +49,125 @@ static int dummy_l2_flush(struct gk20a *g, bool invalidate) return 0; } +struct gr_ecc_scrub_reg_rec { + u32 addr; + u32 scrub_done; +}; + +struct gr_ecc_scrub_reg_rec ecc_scrub_data[] = { + { + .addr = gr_pri_gpc0_tpc0_sm_lrf_ecc_control_r(), + .scrub_done = + (gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp0_init_f() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp1_init_f() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp2_init_f() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp3_init_f() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp4_init_f() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp5_init_f() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp6_init_f() | + gr_pri_gpc0_tpc0_sm_lrf_ecc_control_scrub_qrfdp7_init_f()), + }, + { + .addr = gr_pri_gpc0_tpc0_sm_l1_data_ecc_control_r(), + .scrub_done = + (gr_pri_gpc0_tpc0_sm_l1_data_ecc_control_scrub_el1_0_init_f() | + gr_pri_gpc0_tpc0_sm_l1_data_ecc_control_scrub_el1_1_init_f()), + }, + { + .addr = gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_r(), + .scrub_done = + (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_scrub_el1_0_init_f() | + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_scrub_el1_1_init_f() | + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_scrub_pixprf_init_f() | + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_control_scrub_miss_fifo_init_f()), + }, + { + .addr = gr_pri_gpc0_tpc0_sm_cbu_ecc_control_r(), + .scrub_done = + (gr_pri_gpc0_tpc0_sm_cbu_ecc_control_scrub_warp_sm0_init_f() | + gr_pri_gpc0_tpc0_sm_cbu_ecc_control_scrub_warp_sm1_init_f() | + gr_pri_gpc0_tpc0_sm_cbu_ecc_control_scrub_barrier_sm0_init_f() | + gr_pri_gpc0_tpc0_sm_cbu_ecc_control_scrub_barrier_sm1_init_f()), + }, + { + .addr = gr_pri_gpc0_tpc0_sm_icache_ecc_control_r(), + .scrub_done = + (gr_pri_gpc0_tpc0_sm_icache_ecc_control_scrub_l0_data_init_f() | + gr_pri_gpc0_tpc0_sm_icache_ecc_control_scrub_l0_predecode_init_f() | + gr_pri_gpc0_tpc0_sm_icache_ecc_control_scrub_l1_data_init_f() | + gr_pri_gpc0_tpc0_sm_icache_ecc_control_scrub_l1_predecode_init_f()), + }, +}; + +int test_gr_init_hal_ecc_scrub_reg(struct unit_module *m, + struct gk20a *g, void *args) +{ + u32 i; + int err; + struct nvgpu_gr_config *config = nvgpu_gr_get_config_ptr(g); + struct nvgpu_posix_fault_inj *timer_fi = + nvgpu_timers_get_fault_injection(); + + /* Code coverage */ + nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_ICACHE, false); + nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_CBU, false); + nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_L1_TAG, false); + nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_L1_DATA, false); + nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_LRF, false); + + err = g->ops.gr.init.ecc_scrub_reg(g, config); + if (err != 0) { + unit_return_fail(m, "ECC scrub failed"); + } + + /* Re-enable the features */ + nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_ICACHE, true); + nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_CBU, true); + nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_L1_TAG, true); + nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_L1_DATA, true); + nvgpu_set_enabled(g, NVGPU_ECC_ENABLED_SM_LRF, true); + + /* Trigger timeout initialization failure */ + for (i = 0; + i < (sizeof(ecc_scrub_data) / sizeof(struct gr_ecc_scrub_reg_rec)); + i++) { + nvgpu_posix_enable_fault_injection(timer_fi, true, i); + err = g->ops.gr.init.ecc_scrub_reg(g, config); + if (err == 0) { + unit_return_fail(m, "Timeout was expected"); + } + } + + nvgpu_posix_enable_fault_injection(timer_fi, false, 0); + + for (i = 0; + i < (sizeof(ecc_scrub_data) / sizeof(struct gr_ecc_scrub_reg_rec)); + i++) { + /* Set incorrect values of scrub_done so that scrub wait times out */ + nvgpu_writel(g, + ecc_scrub_data[i].addr, + ~(ecc_scrub_data[i].scrub_done)); + + err = g->ops.gr.init.ecc_scrub_reg(g, config); + if (err == 0) { + unit_return_fail(m, "Timeout was expected"); + } + + /* Set correct values of scrub_done so that scrub wait is successful */ + nvgpu_writel(g, + ecc_scrub_data[i].addr, + ecc_scrub_data[i].scrub_done); + } + + /* No error injection, should be successful */ + err = g->ops.gr.init.ecc_scrub_reg(g, config); + if (err != 0) { + unit_return_fail(m, "ECC scrub failed"); + } + + return UNIT_SUCCESS; +} + int test_gr_init_hal_wait_empty(struct unit_module *m, struct gk20a *g, void *args) { diff --git a/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.h b/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.h index 3a2307081..8993d6dca 100644 --- a/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.h +++ b/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.h @@ -58,6 +58,36 @@ struct unit_module; int test_gr_init_hal_wait_empty(struct unit_module *m, struct gk20a *g, void *args); +/** + * Test specification for: test_gr_init_hal_ecc_scrub_reg. + * + * Description: Verify error handling in gops.gr.init.ecc_scrub_reg function. + * + * Test Type: Feature, Error guessing. + * + * Targets: g->ops.gr.init.ecc_scrub_reg. + * + * Input: gr_init_setup, gr_init_prepare, gr_init_support must have + * been executed successfully. + * + * Steps: + * - Disable feature flags for common.gr ECC handling for code coverage + * and call g->ops.gr.init.ecc_scrub_reg. + * - Re-enable all the feature flags. + * - Inject timeout initialization failures and call + * g->ops.gr.init.ecc_scrub_reg. + * - Set incorrect values of scrub_done for each error type so that scrub + * wait times out. + * - Ensure that g->ops.gr.init.ecc_scrub_reg returns error. + * - Set correct values of scrub_done for each error so that scrub wait + * is successful again. + * + * Output: Returns PASS if the steps above were executed successfully. FAIL + * otherwise. + */ +int test_gr_init_hal_ecc_scrub_reg(struct unit_module *m, + struct gk20a *g, void *args); + /** * Test specification for: test_gr_init_hal_error_injection. * diff --git a/userspace/units/gr/init/nvgpu-gr-init.c b/userspace/units/gr/init/nvgpu-gr-init.c index aebea35e6..7c5dea291 100644 --- a/userspace/units/gr/init/nvgpu-gr-init.c +++ b/userspace/units/gr/init/nvgpu-gr-init.c @@ -185,6 +185,7 @@ struct unit_module_test nvgpu_gr_init_tests[] = { UNIT_TEST(gr_init_support, test_gr_init_support, NULL, 0), UNIT_TEST(gr_init_hal_error_injection, test_gr_init_hal_error_injection, NULL, 0), UNIT_TEST(gr_init_hal_wait_empty, test_gr_init_hal_wait_empty, NULL, 0), + UNIT_TEST(gr_init_hal_ecc_scrub_reg, test_gr_init_hal_ecc_scrub_reg, NULL, 0), UNIT_TEST(gr_suspend, test_gr_suspend, NULL, 0), UNIT_TEST(gr_ecc_features, test_gr_init_ecc_features, NULL, 0), UNIT_TEST(gr_remove_support, test_gr_remove_support, NULL, 0),