mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: check and handle all bits set in fecs_host_intr_status
Check all the bits set in fecs_host_intr_status h/w register. Read fecs_host_intr_status before calling handle_fecs_error and store this info in isr_data. JIRA NVGPU-5502 Change-Id: I198b11aa62e394706007d6dc034fe0ac8da2bcb5 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2343684 Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
This commit is contained in:
committed by
Alex Waterman
parent
fe6bf2c241
commit
b91b1f06e1
@@ -518,18 +518,18 @@ int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch,
|
|||||||
{
|
{
|
||||||
u32 gr_fecs_intr, mailbox_value;
|
u32 gr_fecs_intr, mailbox_value;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
struct nvgpu_fecs_host_intr_status fecs_host_intr;
|
|
||||||
u32 chid = (isr_data->ch != NULL) ?
|
u32 chid = (isr_data->ch != NULL) ?
|
||||||
isr_data->ch->chid : NVGPU_INVALID_CHANNEL_ID;
|
isr_data->ch->chid : NVGPU_INVALID_CHANNEL_ID;
|
||||||
u32 mailbox_id = NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX6;
|
u32 mailbox_id = NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX6;
|
||||||
|
struct nvgpu_fecs_host_intr_status *fecs_host_intr;
|
||||||
|
|
||||||
gr_fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g,
|
gr_fecs_intr = isr_data->fecs_intr;
|
||||||
&fecs_host_intr);
|
|
||||||
if (gr_fecs_intr == 0U) {
|
if (gr_fecs_intr == 0U) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
fecs_host_intr = &isr_data->fecs_host_intr_status;
|
||||||
|
|
||||||
if (fecs_host_intr.unimp_fw_method_active) {
|
if (fecs_host_intr->unimp_fw_method_active) {
|
||||||
mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
|
mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
|
||||||
mailbox_id);
|
mailbox_id);
|
||||||
nvgpu_gr_intr_set_error_notifier(g, isr_data,
|
nvgpu_gr_intr_set_error_notifier(g, isr_data,
|
||||||
@@ -542,15 +542,9 @@ int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch,
|
|||||||
isr_data->offset << 2U, isr_data->class_num,
|
isr_data->offset << 2U, isr_data->class_num,
|
||||||
isr_data->data_lo);
|
isr_data->data_lo);
|
||||||
ret = -1;
|
ret = -1;
|
||||||
} else if (fecs_host_intr.watchdog_active) {
|
}
|
||||||
gr_intr_report_ctxsw_error(g,
|
|
||||||
GPU_FECS_CTXSW_WATCHDOG_TIMEOUT,
|
if (fecs_host_intr->ctxsw_intr0 != 0U) {
|
||||||
chid, 0);
|
|
||||||
/* currently, recovery is not initiated */
|
|
||||||
nvgpu_err(g, "fecs watchdog triggered for channel %u, "
|
|
||||||
"cannot ctxsw anymore !!", chid);
|
|
||||||
g->ops.gr.falcon.dump_stats(g);
|
|
||||||
} else if (fecs_host_intr.ctxsw_intr0 != 0U) {
|
|
||||||
mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
|
mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
|
||||||
mailbox_id);
|
mailbox_id);
|
||||||
#ifdef CONFIG_NVGPU_FECS_TRACE
|
#ifdef CONFIG_NVGPU_FECS_TRACE
|
||||||
@@ -589,19 +583,30 @@ int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch,
|
|||||||
mailbox_value);
|
mailbox_value);
|
||||||
ret = -1;
|
ret = -1;
|
||||||
}
|
}
|
||||||
} else if (fecs_host_intr.fault_during_ctxsw_active) {
|
}
|
||||||
|
|
||||||
|
if (fecs_host_intr->fault_during_ctxsw_active) {
|
||||||
gr_intr_report_ctxsw_error(g,
|
gr_intr_report_ctxsw_error(g,
|
||||||
GPU_FECS_FAULT_DURING_CTXSW,
|
GPU_FECS_FAULT_DURING_CTXSW,
|
||||||
chid, 0);
|
chid, 0);
|
||||||
nvgpu_err(g, "fecs fault during ctxsw for channel %u", chid);
|
nvgpu_err(g, "fecs fault during ctxsw for channel %u", chid);
|
||||||
ret = -1;
|
ret = -1;
|
||||||
} else {
|
}
|
||||||
nvgpu_err(g,
|
|
||||||
"unhandled fecs error interrupt 0x%08x for channel %u",
|
if (fecs_host_intr->watchdog_active) {
|
||||||
gr_fecs_intr, chid);
|
gr_intr_report_ctxsw_error(g,
|
||||||
|
GPU_FECS_CTXSW_WATCHDOG_TIMEOUT,
|
||||||
|
chid, 0);
|
||||||
|
/* currently, recovery is not initiated */
|
||||||
|
nvgpu_err(g, "fecs watchdog triggered for channel %u, "
|
||||||
|
"cannot ctxsw anymore !!", chid);
|
||||||
g->ops.gr.falcon.dump_stats(g);
|
g->ops.gr.falcon.dump_stats(g);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* un-supported interrupts will be flagged in
|
||||||
|
* g->ops.gr.falcon.fecs_host_intr_status.
|
||||||
|
*/
|
||||||
g->ops.gr.falcon.fecs_host_clear_intr(g, gr_fecs_intr);
|
g->ops.gr.falcon.fecs_host_clear_intr(g, gr_fecs_intr);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@@ -899,6 +904,8 @@ static u32 gr_intr_handle_error_interrupts(struct gk20a *g,
|
|||||||
u32 do_reset = 0U;
|
u32 do_reset = 0U;
|
||||||
|
|
||||||
if (intr_info->fecs_error != 0U) {
|
if (intr_info->fecs_error != 0U) {
|
||||||
|
isr_data->fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g,
|
||||||
|
&(isr_data->fecs_host_intr_status));
|
||||||
if (g->ops.gr.intr.handle_fecs_error(g,
|
if (g->ops.gr.intr.handle_fecs_error(g,
|
||||||
isr_data->ch, isr_data) != 0) {
|
isr_data->ch, isr_data) != 0) {
|
||||||
do_reset = 1U;
|
do_reset = 1U;
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
@@ -25,6 +25,7 @@
|
|||||||
|
|
||||||
#include <nvgpu/types.h>
|
#include <nvgpu/types.h>
|
||||||
#include <nvgpu/lock.h>
|
#include <nvgpu/lock.h>
|
||||||
|
#include <include/nvgpu/gr/gr_falcon.h>
|
||||||
|
|
||||||
struct nvgpu_channel;
|
struct nvgpu_channel;
|
||||||
|
|
||||||
@@ -154,6 +155,14 @@ struct nvgpu_gr_isr_data {
|
|||||||
* Class ID corresponding to above subchannel.
|
* Class ID corresponding to above subchannel.
|
||||||
*/
|
*/
|
||||||
u32 class_num;
|
u32 class_num;
|
||||||
|
/**
|
||||||
|
* Value read from fecs_host_int_status h/w reg.
|
||||||
|
*/
|
||||||
|
u32 fecs_intr;
|
||||||
|
/**
|
||||||
|
* S/W defined status for fecs_host_int_status.
|
||||||
|
*/
|
||||||
|
struct nvgpu_fecs_host_intr_status fecs_host_intr_status;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -887,28 +887,52 @@ u32 gm20b_gr_falcon_fecs_host_intr_status(struct gk20a *g,
|
|||||||
struct nvgpu_fecs_host_intr_status *fecs_host_intr)
|
struct nvgpu_fecs_host_intr_status *fecs_host_intr)
|
||||||
{
|
{
|
||||||
u32 gr_fecs_intr = nvgpu_readl(g, gr_fecs_host_int_status_r());
|
u32 gr_fecs_intr = nvgpu_readl(g, gr_fecs_host_int_status_r());
|
||||||
|
u32 host_int_status = 0U;
|
||||||
|
|
||||||
(void) memset(fecs_host_intr, 0,
|
(void) memset(fecs_host_intr, 0,
|
||||||
sizeof(struct nvgpu_fecs_host_intr_status));
|
sizeof(struct nvgpu_fecs_host_intr_status));
|
||||||
|
|
||||||
if ((gr_fecs_intr &
|
if ((gr_fecs_intr &
|
||||||
gr_fecs_host_int_status_umimp_firmware_method_f(1)) != 0U) {
|
gr_fecs_host_int_status_umimp_firmware_method_f(1)) != 0U) {
|
||||||
fecs_host_intr->unimp_fw_method_active = true;
|
fecs_host_intr->unimp_fw_method_active = true;
|
||||||
} else if ((gr_fecs_intr &
|
host_int_status |=
|
||||||
|
gr_fecs_host_int_status_umimp_firmware_method_f(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((gr_fecs_intr &
|
||||||
gr_fecs_host_int_status_watchdog_active_f()) != 0U) {
|
gr_fecs_host_int_status_watchdog_active_f()) != 0U) {
|
||||||
fecs_host_intr->watchdog_active = true;
|
fecs_host_intr->watchdog_active = true;
|
||||||
} else if ((gr_fecs_intr &
|
host_int_status |= gr_fecs_host_int_status_watchdog_active_f();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((gr_fecs_intr &
|
||||||
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0)) != 0U) {
|
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0)) != 0U) {
|
||||||
fecs_host_intr->ctxsw_intr0 =
|
fecs_host_intr->ctxsw_intr0 =
|
||||||
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0);
|
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0);
|
||||||
} else if ((gr_fecs_intr &
|
host_int_status |=
|
||||||
|
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((gr_fecs_intr &
|
||||||
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR1)) != 0U) {
|
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR1)) != 0U) {
|
||||||
fecs_host_intr->ctxsw_intr1 =
|
fecs_host_intr->ctxsw_intr1 =
|
||||||
gr_fecs_host_int_clear_ctxsw_intr1_clear_f();
|
gr_fecs_host_int_clear_ctxsw_intr1_clear_f();
|
||||||
} else if ((gr_fecs_intr &
|
host_int_status |=
|
||||||
|
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((gr_fecs_intr &
|
||||||
gr_fecs_host_int_status_fault_during_ctxsw_f(1)) != 0U) {
|
gr_fecs_host_int_status_fault_during_ctxsw_f(1)) != 0U) {
|
||||||
fecs_host_intr->fault_during_ctxsw_active = true;
|
fecs_host_intr->fault_during_ctxsw_active = true;
|
||||||
} else {
|
host_int_status |=
|
||||||
nvgpu_log_info(g, "un-handled fecs intr: 0x%x", gr_fecs_intr);
|
gr_fecs_host_int_status_fault_during_ctxsw_f(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gr_fecs_intr != host_int_status) {
|
||||||
|
nvgpu_err(g, "un-supported fecs_host_int_status. "
|
||||||
|
"fecs_host_int_status: 0x%x "
|
||||||
|
"handled host_int_status: 0x%x",
|
||||||
|
gr_fecs_intr, host_int_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
return gr_fecs_intr;
|
return gr_fecs_intr;
|
||||||
|
|||||||
@@ -150,13 +150,12 @@ int gp10b_gr_intr_handle_fecs_error(struct gk20a *g,
|
|||||||
struct nvgpu_channel *ch;
|
struct nvgpu_channel *ch;
|
||||||
u32 chid = NVGPU_INVALID_CHANNEL_ID;
|
u32 chid = NVGPU_INVALID_CHANNEL_ID;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
struct nvgpu_fecs_host_intr_status *fecs_host_intr;
|
||||||
#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
|
#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
|
||||||
struct nvgpu_tsg *tsg;
|
struct nvgpu_tsg *tsg;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
struct nvgpu_fecs_host_intr_status fecs_host_intr;
|
u32 gr_fecs_intr = isr_data->fecs_intr;
|
||||||
u32 gr_fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g,
|
|
||||||
&fecs_host_intr);
|
|
||||||
|
|
||||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");
|
||||||
|
|
||||||
@@ -164,18 +163,27 @@ int gp10b_gr_intr_handle_fecs_error(struct gk20a *g,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_NVGPU_CILP
|
#ifdef CONFIG_NVGPU_CILP
|
||||||
|
fecs_host_intr = &isr_data->fecs_host_intr_status;
|
||||||
/*
|
/*
|
||||||
* INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
|
* INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
|
||||||
* indicates that a CILP ctxsw save has finished
|
* indicates that a CILP ctxsw save has finished
|
||||||
*/
|
*/
|
||||||
if (fecs_host_intr.ctxsw_intr1 != 0U) {
|
if (fecs_host_intr->ctxsw_intr1 != 0U) {
|
||||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
|
||||||
"CILP: ctxsw save completed!\n");
|
"CILP: ctxsw save completed!\n");
|
||||||
|
|
||||||
/* now clear the interrupt */
|
/* now clear the interrupt */
|
||||||
g->ops.gr.falcon.fecs_host_clear_intr(g,
|
g->ops.gr.falcon.fecs_host_clear_intr(g,
|
||||||
fecs_host_intr.ctxsw_intr1);
|
fecs_host_intr->ctxsw_intr1);
|
||||||
|
/**
|
||||||
|
* clear the interrupt from isr_data too. This is
|
||||||
|
* for nvgpu_gr_intr_handle_fecs_error to not handle
|
||||||
|
* already handled interrupt.
|
||||||
|
*/
|
||||||
|
isr_data->fecs_intr &= ~(fecs_host_intr->ctxsw_intr1);
|
||||||
|
fecs_host_intr->ctxsw_intr1 = 0U;
|
||||||
|
|
||||||
ret = gp10b_gr_intr_get_cilp_preempt_pending_chid(g, &chid);
|
ret = gp10b_gr_intr_get_cilp_preempt_pending_chid(g, &chid);
|
||||||
if ((ret != 0) || (chid == NVGPU_INVALID_CHANNEL_ID)) {
|
if ((ret != 0) || (chid == NVGPU_INVALID_CHANNEL_ID)) {
|
||||||
|
|||||||
@@ -422,10 +422,12 @@ static int test_gr_intr_error_injections(struct unit_module *m,
|
|||||||
/* Call fecs_interrupt handler with fecs error set */
|
/* Call fecs_interrupt handler with fecs error set */
|
||||||
isr_data.ch = NULL;
|
isr_data.ch = NULL;
|
||||||
nvgpu_posix_io_writel_reg_space(g, gr_fecs_host_int_status_r(), 0);
|
nvgpu_posix_io_writel_reg_space(g, gr_fecs_host_int_status_r(), 0);
|
||||||
err = nvgpu_gr_intr_handle_fecs_error(g, NULL, &isr_data);
|
isr_data.fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g,
|
||||||
|
&(isr_data.fecs_host_intr_status));
|
||||||
|
err = g->ops.gr.intr.handle_fecs_error(g, NULL, &isr_data);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
unit_return_fail(m,
|
unit_return_fail(m,
|
||||||
"nvgpu_gr_intr_handle_fecs_error failed\n");
|
"gr.intr.handle_fecs_error failed\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fault injection - gpc exception with reset */
|
/* Fault injection - gpc exception with reset */
|
||||||
|
|||||||
Reference in New Issue
Block a user