gpu: nvgpu: check and handle all bits set in fecs_host_intr_status

Check all the bits set in fecs_host_intr_status h/w register.
Read fecs_host_intr_status before calling handle_fecs_error
and store this info in isr_data.

JIRA NVGPU-5502

Change-Id: I198b11aa62e394706007d6dc034fe0ac8da2bcb5
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2343684
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Seema Khowala
2020-06-26 21:34:50 -07:00
committed by Alex Waterman
parent fe6bf2c241
commit b91b1f06e1
5 changed files with 82 additions and 32 deletions

View File

@@ -518,18 +518,18 @@ int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch,
{
u32 gr_fecs_intr, mailbox_value;
int ret = 0;
struct nvgpu_fecs_host_intr_status fecs_host_intr;
u32 chid = (isr_data->ch != NULL) ?
isr_data->ch->chid : NVGPU_INVALID_CHANNEL_ID;
u32 mailbox_id = NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX6;
struct nvgpu_fecs_host_intr_status *fecs_host_intr;
gr_fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g,
&fecs_host_intr);
gr_fecs_intr = isr_data->fecs_intr;
if (gr_fecs_intr == 0U) {
return 0;
}
fecs_host_intr = &isr_data->fecs_host_intr_status;
if (fecs_host_intr.unimp_fw_method_active) {
if (fecs_host_intr->unimp_fw_method_active) {
mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
mailbox_id);
nvgpu_gr_intr_set_error_notifier(g, isr_data,
@@ -542,15 +542,9 @@ int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch,
isr_data->offset << 2U, isr_data->class_num,
isr_data->data_lo);
ret = -1;
} else if (fecs_host_intr.watchdog_active) {
gr_intr_report_ctxsw_error(g,
GPU_FECS_CTXSW_WATCHDOG_TIMEOUT,
chid, 0);
/* currently, recovery is not initiated */
nvgpu_err(g, "fecs watchdog triggered for channel %u, "
"cannot ctxsw anymore !!", chid);
g->ops.gr.falcon.dump_stats(g);
} else if (fecs_host_intr.ctxsw_intr0 != 0U) {
}
if (fecs_host_intr->ctxsw_intr0 != 0U) {
mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
mailbox_id);
#ifdef CONFIG_NVGPU_FECS_TRACE
@@ -589,19 +583,30 @@ int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch,
mailbox_value);
ret = -1;
}
} else if (fecs_host_intr.fault_during_ctxsw_active) {
}
if (fecs_host_intr->fault_during_ctxsw_active) {
gr_intr_report_ctxsw_error(g,
GPU_FECS_FAULT_DURING_CTXSW,
chid, 0);
nvgpu_err(g, "fecs fault during ctxsw for channel %u", chid);
ret = -1;
} else {
nvgpu_err(g,
"unhandled fecs error interrupt 0x%08x for channel %u",
gr_fecs_intr, chid);
}
if (fecs_host_intr->watchdog_active) {
gr_intr_report_ctxsw_error(g,
GPU_FECS_CTXSW_WATCHDOG_TIMEOUT,
chid, 0);
/* currently, recovery is not initiated */
nvgpu_err(g, "fecs watchdog triggered for channel %u, "
"cannot ctxsw anymore !!", chid);
g->ops.gr.falcon.dump_stats(g);
}
/*
* un-supported interrupts will be flagged in
* g->ops.gr.falcon.fecs_host_intr_status.
*/
g->ops.gr.falcon.fecs_host_clear_intr(g, gr_fecs_intr);
return ret;
@@ -899,6 +904,8 @@ static u32 gr_intr_handle_error_interrupts(struct gk20a *g,
u32 do_reset = 0U;
if (intr_info->fecs_error != 0U) {
isr_data->fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g,
&(isr_data->fecs_host_intr_status));
if (g->ops.gr.intr.handle_fecs_error(g,
isr_data->ch, isr_data) != 0) {
do_reset = 1U;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,6 +25,7 @@
#include <nvgpu/types.h>
#include <nvgpu/lock.h>
#include <include/nvgpu/gr/gr_falcon.h>
struct nvgpu_channel;
@@ -154,6 +155,14 @@ struct nvgpu_gr_isr_data {
* Class ID corresponding to above subchannel.
*/
u32 class_num;
/**
* Value read from fecs_host_int_status h/w reg.
*/
u32 fecs_intr;
/**
* S/W defined status for fecs_host_int_status.
*/
struct nvgpu_fecs_host_intr_status fecs_host_intr_status;
};
/**

View File

@@ -887,28 +887,52 @@ u32 gm20b_gr_falcon_fecs_host_intr_status(struct gk20a *g,
struct nvgpu_fecs_host_intr_status *fecs_host_intr)
{
u32 gr_fecs_intr = nvgpu_readl(g, gr_fecs_host_int_status_r());
u32 host_int_status = 0U;
(void) memset(fecs_host_intr, 0,
sizeof(struct nvgpu_fecs_host_intr_status));
if ((gr_fecs_intr &
gr_fecs_host_int_status_umimp_firmware_method_f(1)) != 0U) {
fecs_host_intr->unimp_fw_method_active = true;
} else if ((gr_fecs_intr &
host_int_status |=
gr_fecs_host_int_status_umimp_firmware_method_f(1);
}
if ((gr_fecs_intr &
gr_fecs_host_int_status_watchdog_active_f()) != 0U) {
fecs_host_intr->watchdog_active = true;
} else if ((gr_fecs_intr &
host_int_status |= gr_fecs_host_int_status_watchdog_active_f();
}
if ((gr_fecs_intr &
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0)) != 0U) {
fecs_host_intr->ctxsw_intr0 =
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0);
} else if ((gr_fecs_intr &
host_int_status |=
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0);
}
if ((gr_fecs_intr &
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR1)) != 0U) {
fecs_host_intr->ctxsw_intr1 =
gr_fecs_host_int_clear_ctxsw_intr1_clear_f();
} else if ((gr_fecs_intr &
host_int_status |=
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR1);
}
if ((gr_fecs_intr &
gr_fecs_host_int_status_fault_during_ctxsw_f(1)) != 0U) {
fecs_host_intr->fault_during_ctxsw_active = true;
} else {
nvgpu_log_info(g, "un-handled fecs intr: 0x%x", gr_fecs_intr);
host_int_status |=
gr_fecs_host_int_status_fault_during_ctxsw_f(1);
}
if (gr_fecs_intr != host_int_status) {
nvgpu_err(g, "un-supported fecs_host_int_status. "
"fecs_host_int_status: 0x%x "
"handled host_int_status: 0x%x",
gr_fecs_intr, host_int_status);
}
return gr_fecs_intr;

View File

@@ -150,13 +150,12 @@ int gp10b_gr_intr_handle_fecs_error(struct gk20a *g,
struct nvgpu_channel *ch;
u32 chid = NVGPU_INVALID_CHANNEL_ID;
int ret = 0;
struct nvgpu_fecs_host_intr_status *fecs_host_intr;
#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
struct nvgpu_tsg *tsg;
#endif
#endif
struct nvgpu_fecs_host_intr_status fecs_host_intr;
u32 gr_fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g,
&fecs_host_intr);
u32 gr_fecs_intr = isr_data->fecs_intr;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");
@@ -164,18 +163,27 @@ int gp10b_gr_intr_handle_fecs_error(struct gk20a *g,
return 0;
}
#ifdef CONFIG_NVGPU_CILP
fecs_host_intr = &isr_data->fecs_host_intr_status;
/*
* INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
* indicates that a CILP ctxsw save has finished
*/
if (fecs_host_intr.ctxsw_intr1 != 0U) {
if (fecs_host_intr->ctxsw_intr1 != 0U) {
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
"CILP: ctxsw save completed!\n");
/* now clear the interrupt */
g->ops.gr.falcon.fecs_host_clear_intr(g,
fecs_host_intr.ctxsw_intr1);
fecs_host_intr->ctxsw_intr1);
/**
* clear the interrupt from isr_data too. This is
* for nvgpu_gr_intr_handle_fecs_error to not handle
* already handled interrupt.
*/
isr_data->fecs_intr &= ~(fecs_host_intr->ctxsw_intr1);
fecs_host_intr->ctxsw_intr1 = 0U;
ret = gp10b_gr_intr_get_cilp_preempt_pending_chid(g, &chid);
if ((ret != 0) || (chid == NVGPU_INVALID_CHANNEL_ID)) {

View File

@@ -422,10 +422,12 @@ static int test_gr_intr_error_injections(struct unit_module *m,
/* Call fecs_interrupt handler with fecs error set */
isr_data.ch = NULL;
nvgpu_posix_io_writel_reg_space(g, gr_fecs_host_int_status_r(), 0);
err = nvgpu_gr_intr_handle_fecs_error(g, NULL, &isr_data);
isr_data.fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g,
&(isr_data.fecs_host_intr_status));
err = g->ops.gr.intr.handle_fecs_error(g, NULL, &isr_data);
if (err != 0) {
unit_return_fail(m,
"nvgpu_gr_intr_handle_fecs_error failed\n");
"gr.intr.handle_fecs_error failed\n");
}
/* Fault injection - gpc exception with reset */