From bcbccbe083adddd01a15604b59532021508a41e3 Mon Sep 17 00:00:00 2001 From: Sagar Kamble Date: Wed, 30 Jun 2021 01:53:11 +0530 Subject: [PATCH] gpu: nvgpu: add BVEC test for nvgpu_rc_mmu_fault Update nvgpu_rc_mmu_fault to return error on invalid params and add BVEC test for it. JIRA NVGPU-6772 Change-Id: If44d80888c665ca3b528c9937de8a66ccce29f57 Signed-off-by: Sagar Kamble Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2551618 (cherry picked from commit 229727512a1facc33ef9f16cc1831405e960ab2a) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2623626 Reviewed-by: Vaibhav Kachore GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/common/rc/rc.c | 66 +++++++++++++++++-- .../hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c | 8 ++- drivers/gpu/nvgpu/include/nvgpu/rc.h | 7 +- userspace/units/rc/nvgpu-rc.c | 56 ++++++++++++++++ userspace/units/rc/nvgpu-rc.h | 36 ++++++++++ 5 files changed, 165 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/nvgpu/common/rc/rc.c b/drivers/gpu/nvgpu/common/rc/rc.c index 23023f9b3..57129080a 100644 --- a/drivers/gpu/nvgpu/common/rc/rc.c +++ b/drivers/gpu/nvgpu/common/rc/rc.c @@ -367,17 +367,63 @@ void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct nvgpu_tsg *tsg, #endif } -void nvgpu_rc_mmu_fault(struct gk20a *g, u32 act_eng_bitmask, - u32 id, unsigned int id_type, unsigned int rc_type, - struct mmu_fault_info *mmufault) +#ifdef CONFIG_NVGPU_RECOVERY +static int nvgpu_rc_mmu_fault_recovery(struct gk20a *g, u32 act_eng_bitmask, + u32 id, unsigned int id_type, + unsigned int rc_type, + struct mmu_fault_info *mmufault) { + int err = 0; + + if (id >= g->fifo.num_channels && id != INVAL_ID) { + nvgpu_err(g, "invalid id %u", id); + err = -EINVAL; + goto out; + } + + if (id_type > ID_TYPE_TSG && id_type != ID_TYPE_UNKNOWN) { + nvgpu_err(g, "invalid id type %u", id_type); + err = -EINVAL; + goto out; + } + nvgpu_err(g, "mmu fault id=%u id_type=%u act_eng_bitmask=%08x", id, id_type, act_eng_bitmask); -#ifdef CONFIG_NVGPU_RECOVERY g->ops.fifo.recover(g, act_eng_bitmask, id, id_type, rc_type, mmufault); -#else + +out: + if (err != 0) { + nvgpu_sw_quiesce(g); + } + + return err; +} +#endif + +int nvgpu_rc_mmu_fault(struct gk20a *g, u32 act_eng_bitmask, + u32 id, unsigned int id_type, unsigned int rc_type, + struct mmu_fault_info *mmufault) +{ +#ifndef CONFIG_NVGPU_RECOVERY + int err = 0; + + if (id >= g->fifo.num_channels) { + nvgpu_err(g, "invalid id %u", id); + err = -EINVAL; + goto out; + } + + if (id_type > ID_TYPE_TSG) { + nvgpu_err(g, "invalid id type %u", id_type); + err = -EINVAL; + goto out; + } + + nvgpu_err(g, "mmu fault id=%u id_type=%u act_eng_bitmask=%08x", + id, id_type, act_eng_bitmask); + if ((id != INVAL_ID) && (id_type == ID_TYPE_TSG)) { struct nvgpu_tsg *tsg = &g->fifo.tsg[id]; nvgpu_tsg_set_ctx_mmu_error(g, tsg); @@ -387,5 +433,15 @@ void nvgpu_rc_mmu_fault(struct gk20a *g, u32 act_eng_bitmask, WARN_ON(!g->sw_quiesce_pending); (void)rc_type; (void)mmufault; + +out: + if (err != 0) { + nvgpu_sw_quiesce(g); + } + + return err; +#else + return nvgpu_rc_mmu_fault_recovery(g, act_eng_bitmask, id, id_type, + rc_type, mmufault); #endif } diff --git a/drivers/gpu/nvgpu/hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c index 079b3d202..a8b58b691 100644 --- a/drivers/gpu/nvgpu/hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c @@ -403,6 +403,7 @@ static bool gv11b_mm_mmu_fault_handle_non_replayable(struct gk20a *g, u32 id = NVGPU_INVALID_TSG_ID; unsigned int rc_type = RC_TYPE_NO_RC; bool ret = false; + int err; if (mmufault->fault_type == gmmu_fault_type_unbound_inst_block_v()) { @@ -439,8 +440,11 @@ static bool gv11b_mm_mmu_fault_handle_non_replayable(struct gk20a *g, } if (rc_type != RC_TYPE_NO_RC) { - nvgpu_rc_mmu_fault(g, act_eng_bitmask, - id, id_type, rc_type, mmufault); + err = nvgpu_rc_mmu_fault(g, act_eng_bitmask, + id, id_type, rc_type, mmufault); + if (err != 0) { + nvgpu_err(g, "recovery failed"); + } } return ret; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/rc.h b/drivers/gpu/nvgpu/include/nvgpu/rc.h index a75ed9f13..69d205970 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/rc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/rc.h @@ -255,6 +255,8 @@ void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct nvgpu_tsg *tsg, * @param rc_type [in] Recovery type. * @param mmufault [in] Mmu fault info * + * Validate the id. Valid range is [0, g->fifo.num_channels). + * Validate the id type parameter. Valid range is [ID_TYPE_CHANNEL, ID_TYPE_TSG]. * Do mmu fault recovery dependending on the \a rc_type, \a act_eng_bitmask, * \a hw_id and \a id_type. * For safety, @@ -262,8 +264,11 @@ void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct nvgpu_tsg *tsg, * when \a id_type is TSG. * - Mark the channels of that TSG as unserviceable when \a id_type is TSG * - print warning if quiesce is not triggered already. + * + * @return 0 in case of success, < 0 in case of failure. + * @retval -EINVAL in case ID and ID type are invalid. */ -void nvgpu_rc_mmu_fault(struct gk20a *g, u32 act_eng_bitmask, +int nvgpu_rc_mmu_fault(struct gk20a *g, u32 act_eng_bitmask, u32 id, unsigned int id_type, unsigned int rc_type, struct mmu_fault_info *mmufault); diff --git a/userspace/units/rc/nvgpu-rc.c b/userspace/units/rc/nvgpu-rc.c index cb59283ca..84de04bc4 100644 --- a/userspace/units/rc/nvgpu-rc.c +++ b/userspace/units/rc/nvgpu-rc.c @@ -298,6 +298,61 @@ int test_rc_mmu_fault(struct unit_module *m, struct gk20a *g, void *args) return UNIT_SUCCESS; } +int test_rc_mmu_fault_bvec(struct unit_module *m, struct gk20a *g, void *args) +{ + u32 valid_id[] = {0, 1 + get_random_u32(2, g->fifo.num_channels), g->fifo.num_channels - 1}; + u32 invalid_id[] = {g->fifo.num_channels, g->fifo.num_channels + 1 + get_random_u32(g->fifo.num_channels, INVAL_ID), INVAL_ID}; + u32 valid_id_type[] = {ID_TYPE_CHANNEL, ID_TYPE_TSG}; + u32 invalid_id_type[] = {ID_TYPE_TSG + 1, ID_TYPE_TSG + 2 + get_random_u32(ID_TYPE_TSG + 1, U32_MAX), ID_TYPE_UNKNOWN}; + int err = UNIT_SUCCESS; + u32 i; + + g->sw_quiesce_pending = true; + clear_error_notifier(ch); + + for (i = 0U; i < ARRAY_SIZE(valid_id); i++) { + err = nvgpu_rc_mmu_fault(g, 0U, valid_id[i], ID_TYPE_TSG, RC_TYPE_MMU_FAULT, NULL); + if (err != 0) { + unit_err(m, "mmu fault with valid id not handled"); + err = UNIT_FAIL; + goto out; + } + } + + for (i = 0U; i < ARRAY_SIZE(invalid_id); i++) { + err = nvgpu_rc_mmu_fault(g, 0U, invalid_id[i], ID_TYPE_TSG, RC_TYPE_MMU_FAULT, NULL); + if (err != -EINVAL) { + unit_err(m, "mmu fault with invalid id handled"); + err = UNIT_FAIL; + goto out; + } + } + + for (i = 0U; i < ARRAY_SIZE(valid_id_type); i++) { + err = nvgpu_rc_mmu_fault(g, 0U, 0U, valid_id_type[i], RC_TYPE_MMU_FAULT, NULL); + if (err != 0) { + unit_err(m, "mmu fault with valid id type not handled"); + err = UNIT_FAIL; + goto out; + } + } + + for (i = 0U; i < ARRAY_SIZE(invalid_id_type); i++) { + err = nvgpu_rc_mmu_fault(g, 0U, 0U, invalid_id_type[i], RC_TYPE_MMU_FAULT, NULL); + if (err != -EINVAL) { + unit_err(m, "mmu fault with invalid id type handled"); + err = UNIT_FAIL; + goto out; + } + } + + err = UNIT_SUCCESS; + +out: + g->sw_quiesce_pending = false; + return err; +} + #define F_RC_IS_CHSW_VALID_OR_SAVE 0U #define F_RC_IS_CHSW_LOAD_OR_SWITCH 1U #define F_RC_IS_CHSW_INVALID 2U @@ -539,6 +594,7 @@ struct unit_module_test nvgpu_rc_tests[] = { UNIT_TEST(rc_sched_error_bad_tsg, test_rc_sched_error_bad_tsg, NULL, 0), UNIT_TEST(rc_tsg_and_related_engines, test_rc_tsg_and_related_engines, NULL, 0), UNIT_TEST(rc_mmu_fault, test_rc_mmu_fault, NULL, 0), + UNIT_TEST(rc_mmu_fault_bvec, test_rc_mmu_fault_bvec, NULL, 0), UNIT_TEST(rc_pbdma_fault, test_rc_pbdma_fault, NULL, 0), UNIT_TEST(rc_deinit, test_rc_deinit, NULL, 0), }; diff --git a/userspace/units/rc/nvgpu-rc.h b/userspace/units/rc/nvgpu-rc.h index 3b5b10803..bcaea736c 100644 --- a/userspace/units/rc/nvgpu-rc.h +++ b/userspace/units/rc/nvgpu-rc.h @@ -244,6 +244,42 @@ int test_rc_tsg_and_related_engines(struct unit_module *m, struct gk20a *g, void */ int test_rc_mmu_fault(struct unit_module *m, struct gk20a *g, void *args); +/** + * Test specification for: test_rc_mmu_fault_bvec + * + * Description: Validate id and id_type parameters for nvgpu_rc_mmu_fault + * + * Test Type: Boundary Value + * + * Targets: nvgpu_rc_mmu_fault + * + * Input: test_rc_init run for this GPU + * + * Equivalence classes: + * Variable: id + * - Valid: [0, g->fifo.num_channels - 1] + * - Invalid: [g->fifo.num_channels, INVAL_ID] + * Variable: id_type + * - Valid: [ID_TYPE_CHANNEL, ID_TYPE_TSG] + * - Invalid: [ID_TYPE_TSG + 1, ID_TYPE_UNKNOWN] + * + * Steps: + * - initialize Channel error_notifier + * - set g->sw_quiesce_pending = true + * - for all valid ids with ID_TYPE_TSG + * - invoke nvgpu_rc_mmu_fault and verify it succeeds with return value 0. + * - for all invalid ids with ID_TYPE_TSG + * - invoke nvgpu_rc_mmu_fault and verify it fails with -EINVAL. + * - for all valid id types with id ID_TYPE_CHANNEL + * - invoke nvgpu_rc_mmu_fault and verify it succeeds with return value 0. + * - for all invalid id types with id ID_TYPE_CHANNEL + * - invoke nvgpu_rc_mmu_fault and verify it fails with -EINVAL. + * + * Output: Returns PASS if nvgpu_rc_mmu_fault succeeds for valid id and id_type + * and fails for invalid id and id_type. Returns FAIL otherwise. + */ +int test_rc_mmu_fault_bvec(struct unit_module *m, struct gk20a *g, void *args); + /** * Test specification for: test_rc_pbdma_fault *