mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: mm: code complexity cleanup mmu_fault
This patch divides complex code segments into smaller functions to reduce code complexity in hal mm mmu_fault gv11b_fusa code. Jira NVGPU-4065 Change-Id: I360939dd8fd2673159a515f535edbaf400930c77 Signed-off-by: Vedashree Vidwans <vvidwans@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2205949 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
Alex Waterman
parent
266c2b819f
commit
bf4311a0e3
@@ -287,15 +287,192 @@ static void gv11b_fb_copy_from_hw_fault_buf(struct gk20a *g,
|
||||
gv11b_mm_mmu_fault_parse_mmu_fault_info(mmufault);
|
||||
}
|
||||
|
||||
static bool gv11b_mm_mmu_fault_handle_mmu_fault_ce(struct gk20a *g,
|
||||
struct mmu_fault_info *mmufault, u32 *invalidate_replay_val,
|
||||
u32 num_lce)
|
||||
{
|
||||
int err = 0;
|
||||
struct nvgpu_tsg *tsg = NULL;
|
||||
|
||||
if (mmufault->mmu_engine_id <
|
||||
nvgpu_safe_add_u32(gmmu_fault_mmu_eng_id_ce0_v(),
|
||||
num_lce)) {
|
||||
/* CE page faults are not reported as replayable */
|
||||
nvgpu_log(g, gpu_dbg_intr, "CE Faulted");
|
||||
#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
|
||||
err = gv11b_fb_fix_page_fault(g, mmufault);
|
||||
#else
|
||||
err = -EINVAL;
|
||||
#endif
|
||||
|
||||
if (mmufault->refch != NULL) {
|
||||
tsg = nvgpu_tsg_from_ch(mmufault->refch);
|
||||
nvgpu_tsg_reset_faulted_eng_pbdma(g, tsg,
|
||||
true, true);
|
||||
}
|
||||
if (err == 0) {
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"CE Page Fault Fixed");
|
||||
*invalidate_replay_val = 0;
|
||||
if (mmufault->refch != NULL) {
|
||||
nvgpu_channel_put(mmufault->refch);
|
||||
mmufault->refch = NULL;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/* Do recovery */
|
||||
nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool gv11b_mm_mmu_fault_handle_mmu_fault_refch(struct gk20a *g,
|
||||
struct mmu_fault_info *mmufault, u32 *id_ptr,
|
||||
unsigned int *id_type_ptr, unsigned int *rc_type_ptr)
|
||||
{
|
||||
struct nvgpu_tsg *tsg = NULL;
|
||||
|
||||
if (mmufault->refch->mmu_nack_handled) {
|
||||
/*
|
||||
* We have already recovered for the same
|
||||
* context, skip doing another recovery.
|
||||
*/
|
||||
mmufault->refch->mmu_nack_handled = false;
|
||||
/*
|
||||
* Recovery path can be entered twice for the
|
||||
* same error in case of mmu nack. If mmu
|
||||
* nack interrupt is handled before mmu fault
|
||||
* then channel reference is increased to avoid
|
||||
* closing the channel by userspace. Decrement
|
||||
* channel reference.
|
||||
*/
|
||||
nvgpu_channel_put(mmufault->refch);
|
||||
/*
|
||||
* refch in mmufault is assigned at the time
|
||||
* of copying fault info from snap reg or bar2
|
||||
* fault buf.
|
||||
*/
|
||||
nvgpu_channel_put(mmufault->refch);
|
||||
return true;
|
||||
} else {
|
||||
/*
|
||||
* Indicate recovery is handled if mmu fault is
|
||||
* a result of mmu nack.
|
||||
*/
|
||||
mmufault->refch->mmu_nack_handled = true;
|
||||
}
|
||||
|
||||
tsg = nvgpu_tsg_from_ch(mmufault->refch);
|
||||
if (tsg != NULL) {
|
||||
*id_ptr = mmufault->refch->tsgid;
|
||||
*id_type_ptr = ID_TYPE_TSG;
|
||||
*rc_type_ptr = RC_TYPE_MMU_FAULT;
|
||||
} else {
|
||||
nvgpu_err(g, "chid: %d is referenceable but "
|
||||
"not bound to tsg",
|
||||
mmufault->refch->chid);
|
||||
*id_type_ptr = ID_TYPE_CHANNEL;
|
||||
*rc_type_ptr = RC_TYPE_NO_RC;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool gv11b_mm_mmu_fault_handle_non_replayable(struct gk20a *g,
|
||||
struct mmu_fault_info *mmufault)
|
||||
{
|
||||
unsigned int id_type = ID_TYPE_UNKNOWN;
|
||||
u32 act_eng_bitmask = 0U;
|
||||
u32 id = NVGPU_INVALID_TSG_ID;
|
||||
unsigned int rc_type = RC_TYPE_NO_RC;
|
||||
bool ret = false;
|
||||
|
||||
if (mmufault->fault_type ==
|
||||
gmmu_fault_type_unbound_inst_block_v()) {
|
||||
/*
|
||||
* Bug 1847172: When an engine faults due to an unbound
|
||||
* instance block, the fault cannot be isolated to a
|
||||
* single context so we need to reset the entire runlist
|
||||
*/
|
||||
rc_type = RC_TYPE_MMU_FAULT;
|
||||
|
||||
} else if (mmufault->refch != NULL) {
|
||||
ret = gv11b_mm_mmu_fault_handle_mmu_fault_refch(g, mmufault,
|
||||
&id, &id_type, &rc_type);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
/* Nothing to do here */
|
||||
}
|
||||
|
||||
/* engine is faulted */
|
||||
if (mmufault->faulted_engine != NVGPU_INVALID_ENG_ID) {
|
||||
act_eng_bitmask = BIT32(mmufault->faulted_engine);
|
||||
rc_type = RC_TYPE_MMU_FAULT;
|
||||
}
|
||||
|
||||
/*
|
||||
* refch in mmufault is assigned at the time of copying
|
||||
* fault info from snap reg or bar2 fault buf
|
||||
*/
|
||||
if (mmufault->refch != NULL) {
|
||||
nvgpu_channel_put(mmufault->refch);
|
||||
mmufault->refch = NULL;
|
||||
}
|
||||
|
||||
if (rc_type != RC_TYPE_NO_RC) {
|
||||
#ifdef CONFIG_NVGPU_RECOVERY
|
||||
g->ops.fifo.recover(g, act_eng_bitmask,
|
||||
id, id_type, rc_type, mmufault);
|
||||
#else
|
||||
nvgpu_err(g, "mmu fault id=%u id_type=%u act_eng_bitmask=%08x",
|
||||
id, id_type, act_eng_bitmask);
|
||||
#endif
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void gv11b_mm_mmu_fault_handle_replayable(struct gk20a *g,
|
||||
struct mmu_fault_info *mmufault, u32 *invalidate_replay_val)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (mmufault->fault_type == gmmu_fault_type_pte_v()) {
|
||||
nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix");
|
||||
#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
|
||||
err = gv11b_fb_fix_page_fault(g, mmufault);
|
||||
#else
|
||||
err = -EINVAL;
|
||||
#endif
|
||||
if (err != 0) {
|
||||
*invalidate_replay_val |=
|
||||
gv11b_fb_get_replay_cancel_global_val();
|
||||
} else {
|
||||
#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
|
||||
*invalidate_replay_val |=
|
||||
gv11b_fb_get_replay_start_ack_all();
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
/* cancel faults other than invalid pte */
|
||||
*invalidate_replay_val |=
|
||||
gv11b_fb_get_replay_cancel_global_val();
|
||||
}
|
||||
/*
|
||||
* refch in mmufault is assigned at the time of copying
|
||||
* fault info from snap reg or bar2 fault buf
|
||||
*/
|
||||
if (mmufault->refch != NULL) {
|
||||
nvgpu_channel_put(mmufault->refch);
|
||||
mmufault->refch = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void gv11b_mm_mmu_fault_handle_mmu_fault_common(struct gk20a *g,
|
||||
struct mmu_fault_info *mmufault, u32 *invalidate_replay_val)
|
||||
{
|
||||
unsigned int id_type = ID_TYPE_UNKNOWN;
|
||||
u32 num_lce, act_eng_bitmask = 0U;
|
||||
int err = 0;
|
||||
u32 id = NVGPU_INVALID_TSG_ID;
|
||||
unsigned int rc_type = RC_TYPE_NO_RC;
|
||||
struct nvgpu_tsg *tsg = NULL;
|
||||
u32 num_lce;
|
||||
bool ret = false;
|
||||
|
||||
if (!mmufault->valid) {
|
||||
return;
|
||||
@@ -306,148 +483,21 @@ void gv11b_mm_mmu_fault_handle_mmu_fault_common(struct gk20a *g,
|
||||
num_lce = g->ops.top.get_num_lce(g);
|
||||
if (mmufault->mmu_engine_id >=
|
||||
gmmu_fault_mmu_eng_id_ce0_v()) {
|
||||
if (mmufault->mmu_engine_id <
|
||||
nvgpu_safe_add_u32(gmmu_fault_mmu_eng_id_ce0_v(),
|
||||
num_lce)) {
|
||||
/* CE page faults are not reported as replayable */
|
||||
nvgpu_log(g, gpu_dbg_intr, "CE Faulted");
|
||||
#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
|
||||
err = gv11b_fb_fix_page_fault(g, mmufault);
|
||||
#else
|
||||
err = -EINVAL;
|
||||
#endif
|
||||
|
||||
if (mmufault->refch != NULL) {
|
||||
tsg = nvgpu_tsg_from_ch(mmufault->refch);
|
||||
nvgpu_tsg_reset_faulted_eng_pbdma(g, tsg,
|
||||
true, true);
|
||||
}
|
||||
if (err == 0) {
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"CE Page Fault Fixed");
|
||||
*invalidate_replay_val = 0;
|
||||
if (mmufault->refch != NULL) {
|
||||
nvgpu_channel_put(mmufault->refch);
|
||||
mmufault->refch = NULL;
|
||||
}
|
||||
return;
|
||||
}
|
||||
/* Do recovery */
|
||||
nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed");
|
||||
ret = gv11b_mm_mmu_fault_handle_mmu_fault_ce(g, mmufault,
|
||||
invalidate_replay_val, num_lce);
|
||||
if (ret) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mmufault->replayable_fault) {
|
||||
if (mmufault->fault_type ==
|
||||
gmmu_fault_type_unbound_inst_block_v()) {
|
||||
/*
|
||||
* Bug 1847172: When an engine faults due to an unbound
|
||||
* instance block, the fault cannot be isolated to a
|
||||
* single context so we need to reset the entire runlist
|
||||
*/
|
||||
rc_type = RC_TYPE_MMU_FAULT;
|
||||
|
||||
} else if (mmufault->refch != NULL) {
|
||||
if (mmufault->refch->mmu_nack_handled) {
|
||||
/*
|
||||
* We have already recovered for the same
|
||||
* context, skip doing another recovery.
|
||||
*/
|
||||
mmufault->refch->mmu_nack_handled = false;
|
||||
/*
|
||||
* Recovery path can be entered twice for the
|
||||
* same error in case of mmu nack. If mmu
|
||||
* nack interrupt is handled before mmu fault
|
||||
* then channel reference is increased to avoid
|
||||
* closing the channel by userspace. Decrement
|
||||
* channel reference.
|
||||
*/
|
||||
nvgpu_channel_put(mmufault->refch);
|
||||
/*
|
||||
* refch in mmufault is assigned at the time
|
||||
* of copying fault info from snap reg or bar2
|
||||
* fault buf.
|
||||
*/
|
||||
nvgpu_channel_put(mmufault->refch);
|
||||
return;
|
||||
} else {
|
||||
/*
|
||||
* Indicate recovery is handled if mmu fault is
|
||||
* a result of mmu nack.
|
||||
*/
|
||||
mmufault->refch->mmu_nack_handled = true;
|
||||
}
|
||||
|
||||
tsg = nvgpu_tsg_from_ch(mmufault->refch);
|
||||
if (tsg != NULL) {
|
||||
id = mmufault->refch->tsgid;
|
||||
id_type = ID_TYPE_TSG;
|
||||
rc_type = RC_TYPE_MMU_FAULT;
|
||||
} else {
|
||||
nvgpu_err(g, "chid: %d is referenceable but "
|
||||
"not bound to tsg",
|
||||
mmufault->refch->chid);
|
||||
id_type = ID_TYPE_CHANNEL;
|
||||
rc_type = RC_TYPE_NO_RC;
|
||||
}
|
||||
} else {
|
||||
/* Nothing to do here */
|
||||
}
|
||||
|
||||
/* engine is faulted */
|
||||
if (mmufault->faulted_engine != NVGPU_INVALID_ENG_ID) {
|
||||
act_eng_bitmask = BIT32(mmufault->faulted_engine);
|
||||
rc_type = RC_TYPE_MMU_FAULT;
|
||||
}
|
||||
|
||||
/*
|
||||
* refch in mmufault is assigned at the time of copying
|
||||
* fault info from snap reg or bar2 fault buf
|
||||
*/
|
||||
if (mmufault->refch != NULL) {
|
||||
nvgpu_channel_put(mmufault->refch);
|
||||
mmufault->refch = NULL;
|
||||
}
|
||||
|
||||
if (rc_type != RC_TYPE_NO_RC) {
|
||||
#ifdef CONFIG_NVGPU_RECOVERY
|
||||
g->ops.fifo.recover(g, act_eng_bitmask,
|
||||
id, id_type, rc_type, mmufault);
|
||||
#else
|
||||
nvgpu_err(g, "mmu fault id=%u id_type=%u act_eng_bitmask=%08x",
|
||||
id, id_type, act_eng_bitmask);
|
||||
#endif
|
||||
ret = gv11b_mm_mmu_fault_handle_non_replayable(g, mmufault);
|
||||
if (ret) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (mmufault->fault_type == gmmu_fault_type_pte_v()) {
|
||||
nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix");
|
||||
#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
|
||||
err = gv11b_fb_fix_page_fault(g, mmufault);
|
||||
#else
|
||||
err = -EINVAL;
|
||||
#endif
|
||||
if (err != 0) {
|
||||
*invalidate_replay_val |=
|
||||
gv11b_fb_get_replay_cancel_global_val();
|
||||
} else {
|
||||
#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT
|
||||
*invalidate_replay_val |=
|
||||
gv11b_fb_get_replay_start_ack_all();
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
/* cancel faults other than invalid pte */
|
||||
*invalidate_replay_val |=
|
||||
gv11b_fb_get_replay_cancel_global_val();
|
||||
}
|
||||
/*
|
||||
* refch in mmufault is assigned at the time of copying
|
||||
* fault info from snap reg or bar2 fault buf
|
||||
*/
|
||||
if (mmufault->refch != NULL) {
|
||||
nvgpu_channel_put(mmufault->refch);
|
||||
mmufault->refch = NULL;
|
||||
}
|
||||
gv11b_mm_mmu_fault_handle_replayable(g, mmufault,
|
||||
invalidate_replay_val);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user