gpu: nvgpu: add BVEC test for nvgpu_rc_mmu_fault

Update nvgpu_rc_mmu_fault to return error on invalid params and
add BVEC test for it.

JIRA NVGPU-6772

Change-Id: If44d80888c665ca3b528c9937de8a66ccce29f57
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2551618
(cherry picked from commit 229727512a1facc33ef9f16cc1831405e960ab2a)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2623626
Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Sagar Kamble
2021-06-30 01:53:11 +05:30
committed by mobile promotions
parent 80efe558b1
commit bcbccbe083
5 changed files with 165 additions and 8 deletions

View File

@@ -367,17 +367,63 @@ void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct nvgpu_tsg *tsg,
#endif #endif
} }
void nvgpu_rc_mmu_fault(struct gk20a *g, u32 act_eng_bitmask, #ifdef CONFIG_NVGPU_RECOVERY
u32 id, unsigned int id_type, unsigned int rc_type, static int nvgpu_rc_mmu_fault_recovery(struct gk20a *g, u32 act_eng_bitmask,
struct mmu_fault_info *mmufault) u32 id, unsigned int id_type,
unsigned int rc_type,
struct mmu_fault_info *mmufault)
{ {
int err = 0;
if (id >= g->fifo.num_channels && id != INVAL_ID) {
nvgpu_err(g, "invalid id %u", id);
err = -EINVAL;
goto out;
}
if (id_type > ID_TYPE_TSG && id_type != ID_TYPE_UNKNOWN) {
nvgpu_err(g, "invalid id type %u", id_type);
err = -EINVAL;
goto out;
}
nvgpu_err(g, "mmu fault id=%u id_type=%u act_eng_bitmask=%08x", nvgpu_err(g, "mmu fault id=%u id_type=%u act_eng_bitmask=%08x",
id, id_type, act_eng_bitmask); id, id_type, act_eng_bitmask);
#ifdef CONFIG_NVGPU_RECOVERY
g->ops.fifo.recover(g, act_eng_bitmask, g->ops.fifo.recover(g, act_eng_bitmask,
id, id_type, rc_type, mmufault); id, id_type, rc_type, mmufault);
#else
out:
if (err != 0) {
nvgpu_sw_quiesce(g);
}
return err;
}
#endif
int nvgpu_rc_mmu_fault(struct gk20a *g, u32 act_eng_bitmask,
u32 id, unsigned int id_type, unsigned int rc_type,
struct mmu_fault_info *mmufault)
{
#ifndef CONFIG_NVGPU_RECOVERY
int err = 0;
if (id >= g->fifo.num_channels) {
nvgpu_err(g, "invalid id %u", id);
err = -EINVAL;
goto out;
}
if (id_type > ID_TYPE_TSG) {
nvgpu_err(g, "invalid id type %u", id_type);
err = -EINVAL;
goto out;
}
nvgpu_err(g, "mmu fault id=%u id_type=%u act_eng_bitmask=%08x",
id, id_type, act_eng_bitmask);
if ((id != INVAL_ID) && (id_type == ID_TYPE_TSG)) { if ((id != INVAL_ID) && (id_type == ID_TYPE_TSG)) {
struct nvgpu_tsg *tsg = &g->fifo.tsg[id]; struct nvgpu_tsg *tsg = &g->fifo.tsg[id];
nvgpu_tsg_set_ctx_mmu_error(g, tsg); nvgpu_tsg_set_ctx_mmu_error(g, tsg);
@@ -387,5 +433,15 @@ void nvgpu_rc_mmu_fault(struct gk20a *g, u32 act_eng_bitmask,
WARN_ON(!g->sw_quiesce_pending); WARN_ON(!g->sw_quiesce_pending);
(void)rc_type; (void)rc_type;
(void)mmufault; (void)mmufault;
out:
if (err != 0) {
nvgpu_sw_quiesce(g);
}
return err;
#else
return nvgpu_rc_mmu_fault_recovery(g, act_eng_bitmask, id, id_type,
rc_type, mmufault);
#endif #endif
} }

View File

@@ -403,6 +403,7 @@ static bool gv11b_mm_mmu_fault_handle_non_replayable(struct gk20a *g,
u32 id = NVGPU_INVALID_TSG_ID; u32 id = NVGPU_INVALID_TSG_ID;
unsigned int rc_type = RC_TYPE_NO_RC; unsigned int rc_type = RC_TYPE_NO_RC;
bool ret = false; bool ret = false;
int err;
if (mmufault->fault_type == if (mmufault->fault_type ==
gmmu_fault_type_unbound_inst_block_v()) { gmmu_fault_type_unbound_inst_block_v()) {
@@ -439,8 +440,11 @@ static bool gv11b_mm_mmu_fault_handle_non_replayable(struct gk20a *g,
} }
if (rc_type != RC_TYPE_NO_RC) { if (rc_type != RC_TYPE_NO_RC) {
nvgpu_rc_mmu_fault(g, act_eng_bitmask, err = nvgpu_rc_mmu_fault(g, act_eng_bitmask,
id, id_type, rc_type, mmufault); id, id_type, rc_type, mmufault);
if (err != 0) {
nvgpu_err(g, "recovery failed");
}
} }
return ret; return ret;
} }

View File

@@ -255,6 +255,8 @@ void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct nvgpu_tsg *tsg,
* @param rc_type [in] Recovery type. * @param rc_type [in] Recovery type.
* @param mmufault [in] Mmu fault info * @param mmufault [in] Mmu fault info
* *
* Validate the id. Valid range is [0, g->fifo.num_channels).
* Validate the id type parameter. Valid range is [ID_TYPE_CHANNEL, ID_TYPE_TSG].
* Do mmu fault recovery dependending on the \a rc_type, \a act_eng_bitmask, * Do mmu fault recovery dependending on the \a rc_type, \a act_eng_bitmask,
* \a hw_id and \a id_type. * \a hw_id and \a id_type.
* For safety, * For safety,
@@ -262,8 +264,11 @@ void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct nvgpu_tsg *tsg,
* when \a id_type is TSG. * when \a id_type is TSG.
* - Mark the channels of that TSG as unserviceable when \a id_type is TSG * - Mark the channels of that TSG as unserviceable when \a id_type is TSG
* - print warning if quiesce is not triggered already. * - print warning if quiesce is not triggered already.
*
* @return 0 in case of success, < 0 in case of failure.
* @retval -EINVAL in case ID and ID type are invalid.
*/ */
void nvgpu_rc_mmu_fault(struct gk20a *g, u32 act_eng_bitmask, int nvgpu_rc_mmu_fault(struct gk20a *g, u32 act_eng_bitmask,
u32 id, unsigned int id_type, unsigned int rc_type, u32 id, unsigned int id_type, unsigned int rc_type,
struct mmu_fault_info *mmufault); struct mmu_fault_info *mmufault);

View File

@@ -298,6 +298,61 @@ int test_rc_mmu_fault(struct unit_module *m, struct gk20a *g, void *args)
return UNIT_SUCCESS; return UNIT_SUCCESS;
} }
int test_rc_mmu_fault_bvec(struct unit_module *m, struct gk20a *g, void *args)
{
u32 valid_id[] = {0, 1 + get_random_u32(2, g->fifo.num_channels), g->fifo.num_channels - 1};
u32 invalid_id[] = {g->fifo.num_channels, g->fifo.num_channels + 1 + get_random_u32(g->fifo.num_channels, INVAL_ID), INVAL_ID};
u32 valid_id_type[] = {ID_TYPE_CHANNEL, ID_TYPE_TSG};
u32 invalid_id_type[] = {ID_TYPE_TSG + 1, ID_TYPE_TSG + 2 + get_random_u32(ID_TYPE_TSG + 1, U32_MAX), ID_TYPE_UNKNOWN};
int err = UNIT_SUCCESS;
u32 i;
g->sw_quiesce_pending = true;
clear_error_notifier(ch);
for (i = 0U; i < ARRAY_SIZE(valid_id); i++) {
err = nvgpu_rc_mmu_fault(g, 0U, valid_id[i], ID_TYPE_TSG, RC_TYPE_MMU_FAULT, NULL);
if (err != 0) {
unit_err(m, "mmu fault with valid id not handled");
err = UNIT_FAIL;
goto out;
}
}
for (i = 0U; i < ARRAY_SIZE(invalid_id); i++) {
err = nvgpu_rc_mmu_fault(g, 0U, invalid_id[i], ID_TYPE_TSG, RC_TYPE_MMU_FAULT, NULL);
if (err != -EINVAL) {
unit_err(m, "mmu fault with invalid id handled");
err = UNIT_FAIL;
goto out;
}
}
for (i = 0U; i < ARRAY_SIZE(valid_id_type); i++) {
err = nvgpu_rc_mmu_fault(g, 0U, 0U, valid_id_type[i], RC_TYPE_MMU_FAULT, NULL);
if (err != 0) {
unit_err(m, "mmu fault with valid id type not handled");
err = UNIT_FAIL;
goto out;
}
}
for (i = 0U; i < ARRAY_SIZE(invalid_id_type); i++) {
err = nvgpu_rc_mmu_fault(g, 0U, 0U, invalid_id_type[i], RC_TYPE_MMU_FAULT, NULL);
if (err != -EINVAL) {
unit_err(m, "mmu fault with invalid id type handled");
err = UNIT_FAIL;
goto out;
}
}
err = UNIT_SUCCESS;
out:
g->sw_quiesce_pending = false;
return err;
}
#define F_RC_IS_CHSW_VALID_OR_SAVE 0U #define F_RC_IS_CHSW_VALID_OR_SAVE 0U
#define F_RC_IS_CHSW_LOAD_OR_SWITCH 1U #define F_RC_IS_CHSW_LOAD_OR_SWITCH 1U
#define F_RC_IS_CHSW_INVALID 2U #define F_RC_IS_CHSW_INVALID 2U
@@ -539,6 +594,7 @@ struct unit_module_test nvgpu_rc_tests[] = {
UNIT_TEST(rc_sched_error_bad_tsg, test_rc_sched_error_bad_tsg, NULL, 0), UNIT_TEST(rc_sched_error_bad_tsg, test_rc_sched_error_bad_tsg, NULL, 0),
UNIT_TEST(rc_tsg_and_related_engines, test_rc_tsg_and_related_engines, NULL, 0), UNIT_TEST(rc_tsg_and_related_engines, test_rc_tsg_and_related_engines, NULL, 0),
UNIT_TEST(rc_mmu_fault, test_rc_mmu_fault, NULL, 0), UNIT_TEST(rc_mmu_fault, test_rc_mmu_fault, NULL, 0),
UNIT_TEST(rc_mmu_fault_bvec, test_rc_mmu_fault_bvec, NULL, 0),
UNIT_TEST(rc_pbdma_fault, test_rc_pbdma_fault, NULL, 0), UNIT_TEST(rc_pbdma_fault, test_rc_pbdma_fault, NULL, 0),
UNIT_TEST(rc_deinit, test_rc_deinit, NULL, 0), UNIT_TEST(rc_deinit, test_rc_deinit, NULL, 0),
}; };

View File

@@ -244,6 +244,42 @@ int test_rc_tsg_and_related_engines(struct unit_module *m, struct gk20a *g, void
*/ */
int test_rc_mmu_fault(struct unit_module *m, struct gk20a *g, void *args); int test_rc_mmu_fault(struct unit_module *m, struct gk20a *g, void *args);
/**
* Test specification for: test_rc_mmu_fault_bvec
*
* Description: Validate id and id_type parameters for nvgpu_rc_mmu_fault
*
* Test Type: Boundary Value
*
* Targets: nvgpu_rc_mmu_fault
*
* Input: test_rc_init run for this GPU
*
* Equivalence classes:
* Variable: id
* - Valid: [0, g->fifo.num_channels - 1]
* - Invalid: [g->fifo.num_channels, INVAL_ID]
* Variable: id_type
* - Valid: [ID_TYPE_CHANNEL, ID_TYPE_TSG]
* - Invalid: [ID_TYPE_TSG + 1, ID_TYPE_UNKNOWN]
*
* Steps:
* - initialize Channel error_notifier
* - set g->sw_quiesce_pending = true
* - for all valid ids with ID_TYPE_TSG
* - invoke nvgpu_rc_mmu_fault and verify it succeeds with return value 0.
* - for all invalid ids with ID_TYPE_TSG
* - invoke nvgpu_rc_mmu_fault and verify it fails with -EINVAL.
* - for all valid id types with id ID_TYPE_CHANNEL
* - invoke nvgpu_rc_mmu_fault and verify it succeeds with return value 0.
* - for all invalid id types with id ID_TYPE_CHANNEL
* - invoke nvgpu_rc_mmu_fault and verify it fails with -EINVAL.
*
* Output: Returns PASS if nvgpu_rc_mmu_fault succeeds for valid id and id_type
* and fails for invalid id and id_type. Returns FAIL otherwise.
*/
int test_rc_mmu_fault_bvec(struct unit_module *m, struct gk20a *g, void *args);
/** /**
* Test specification for: test_rc_pbdma_fault * Test specification for: test_rc_pbdma_fault
* *