diff --git a/drivers/gpu/nvgpu/common/gr/gr_intr.c b/drivers/gpu/nvgpu/common/gr/gr_intr.c index c7fcd773d..3cab8df44 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_intr.c +++ b/drivers/gpu/nvgpu/common/gr/gr_intr.c @@ -518,18 +518,18 @@ int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch, { u32 gr_fecs_intr, mailbox_value; int ret = 0; - struct nvgpu_fecs_host_intr_status fecs_host_intr; u32 chid = (isr_data->ch != NULL) ? isr_data->ch->chid : NVGPU_INVALID_CHANNEL_ID; u32 mailbox_id = NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX6; + struct nvgpu_fecs_host_intr_status *fecs_host_intr; - gr_fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g, - &fecs_host_intr); + gr_fecs_intr = isr_data->fecs_intr; if (gr_fecs_intr == 0U) { return 0; } + fecs_host_intr = &isr_data->fecs_host_intr_status; - if (fecs_host_intr.unimp_fw_method_active) { + if (fecs_host_intr->unimp_fw_method_active) { mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g, mailbox_id); nvgpu_gr_intr_set_error_notifier(g, isr_data, @@ -542,15 +542,9 @@ int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch, isr_data->offset << 2U, isr_data->class_num, isr_data->data_lo); ret = -1; - } else if (fecs_host_intr.watchdog_active) { - gr_intr_report_ctxsw_error(g, - GPU_FECS_CTXSW_WATCHDOG_TIMEOUT, - chid, 0); - /* currently, recovery is not initiated */ - nvgpu_err(g, "fecs watchdog triggered for channel %u, " - "cannot ctxsw anymore !!", chid); - g->ops.gr.falcon.dump_stats(g); - } else if (fecs_host_intr.ctxsw_intr0 != 0U) { + } + + if (fecs_host_intr->ctxsw_intr0 != 0U) { mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g, mailbox_id); #ifdef CONFIG_NVGPU_FECS_TRACE @@ -589,19 +583,30 @@ int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch, mailbox_value); ret = -1; } - } else if (fecs_host_intr.fault_during_ctxsw_active) { + } + + if (fecs_host_intr->fault_during_ctxsw_active) { gr_intr_report_ctxsw_error(g, GPU_FECS_FAULT_DURING_CTXSW, chid, 0); nvgpu_err(g, "fecs fault during ctxsw for channel %u", chid); ret = -1; - } else { - nvgpu_err(g, - "unhandled fecs error interrupt 0x%08x for channel %u", - gr_fecs_intr, chid); + } + + if (fecs_host_intr->watchdog_active) { + gr_intr_report_ctxsw_error(g, + GPU_FECS_CTXSW_WATCHDOG_TIMEOUT, + chid, 0); + /* currently, recovery is not initiated */ + nvgpu_err(g, "fecs watchdog triggered for channel %u, " + "cannot ctxsw anymore !!", chid); g->ops.gr.falcon.dump_stats(g); } + /* + * un-supported interrupts will be flagged in + * g->ops.gr.falcon.fecs_host_intr_status. + */ g->ops.gr.falcon.fecs_host_clear_intr(g, gr_fecs_intr); return ret; @@ -899,6 +904,8 @@ static u32 gr_intr_handle_error_interrupts(struct gk20a *g, u32 do_reset = 0U; if (intr_info->fecs_error != 0U) { + isr_data->fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g, + &(isr_data->fecs_host_intr_status)); if (g->ops.gr.intr.handle_fecs_error(g, isr_data->ch, isr_data) != 0) { do_reset = 1U; diff --git a/drivers/gpu/nvgpu/common/gr/gr_intr_priv.h b/drivers/gpu/nvgpu/common/gr/gr_intr_priv.h index c75a987c7..e587d07ab 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_intr_priv.h +++ b/drivers/gpu/nvgpu/common/gr/gr_intr_priv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,6 +25,7 @@ #include #include +#include struct nvgpu_channel; @@ -154,6 +155,14 @@ struct nvgpu_gr_isr_data { * Class ID corresponding to above subchannel. */ u32 class_num; + /** + * Value read from fecs_host_int_status h/w reg. + */ + u32 fecs_intr; + /** + * S/W defined status for fecs_host_int_status. + */ + struct nvgpu_fecs_host_intr_status fecs_host_intr_status; }; /** diff --git a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b_fusa.c b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b_fusa.c index d4cb3d80c..353cea5f6 100644 --- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b_fusa.c @@ -887,28 +887,52 @@ u32 gm20b_gr_falcon_fecs_host_intr_status(struct gk20a *g, struct nvgpu_fecs_host_intr_status *fecs_host_intr) { u32 gr_fecs_intr = nvgpu_readl(g, gr_fecs_host_int_status_r()); + u32 host_int_status = 0U; (void) memset(fecs_host_intr, 0, sizeof(struct nvgpu_fecs_host_intr_status)); + if ((gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) != 0U) { fecs_host_intr->unimp_fw_method_active = true; - } else if ((gr_fecs_intr & + host_int_status |= + gr_fecs_host_int_status_umimp_firmware_method_f(1); + } + + if ((gr_fecs_intr & gr_fecs_host_int_status_watchdog_active_f()) != 0U) { fecs_host_intr->watchdog_active = true; - } else if ((gr_fecs_intr & + host_int_status |= gr_fecs_host_int_status_watchdog_active_f(); + } + + if ((gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0)) != 0U) { fecs_host_intr->ctxsw_intr0 = gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0); - } else if ((gr_fecs_intr & + host_int_status |= + gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0); + } + + if ((gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR1)) != 0U) { fecs_host_intr->ctxsw_intr1 = gr_fecs_host_int_clear_ctxsw_intr1_clear_f(); - } else if ((gr_fecs_intr & + host_int_status |= + gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR1); + } + + if ((gr_fecs_intr & gr_fecs_host_int_status_fault_during_ctxsw_f(1)) != 0U) { fecs_host_intr->fault_during_ctxsw_active = true; - } else { - nvgpu_log_info(g, "un-handled fecs intr: 0x%x", gr_fecs_intr); + host_int_status |= + gr_fecs_host_int_status_fault_during_ctxsw_f(1); + } + + if (gr_fecs_intr != host_int_status) { + nvgpu_err(g, "un-supported fecs_host_int_status. " + "fecs_host_int_status: 0x%x " + "handled host_int_status: 0x%x", + gr_fecs_intr, host_int_status); } return gr_fecs_intr; diff --git a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gp10b_fusa.c b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gp10b_fusa.c index 0b7788170..8f4890fe7 100644 --- a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gp10b_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gp10b_fusa.c @@ -150,13 +150,12 @@ int gp10b_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch; u32 chid = NVGPU_INVALID_CHANNEL_ID; int ret = 0; + struct nvgpu_fecs_host_intr_status *fecs_host_intr; #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL struct nvgpu_tsg *tsg; #endif #endif - struct nvgpu_fecs_host_intr_status fecs_host_intr; - u32 gr_fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g, - &fecs_host_intr); + u32 gr_fecs_intr = isr_data->fecs_intr; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " "); @@ -164,18 +163,27 @@ int gp10b_gr_intr_handle_fecs_error(struct gk20a *g, return 0; } + #ifdef CONFIG_NVGPU_CILP + fecs_host_intr = &isr_data->fecs_host_intr_status; /* * INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR) * indicates that a CILP ctxsw save has finished */ - if (fecs_host_intr.ctxsw_intr1 != 0U) { + if (fecs_host_intr->ctxsw_intr1 != 0U) { nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: ctxsw save completed!\n"); /* now clear the interrupt */ g->ops.gr.falcon.fecs_host_clear_intr(g, - fecs_host_intr.ctxsw_intr1); + fecs_host_intr->ctxsw_intr1); + /** + * clear the interrupt from isr_data too. This is + * for nvgpu_gr_intr_handle_fecs_error to not handle + * already handled interrupt. + */ + isr_data->fecs_intr &= ~(fecs_host_intr->ctxsw_intr1); + fecs_host_intr->ctxsw_intr1 = 0U; ret = gp10b_gr_intr_get_cilp_preempt_pending_chid(g, &chid); if ((ret != 0) || (chid == NVGPU_INVALID_CHANNEL_ID)) { diff --git a/userspace/units/gr/intr/nvgpu-gr-intr.c b/userspace/units/gr/intr/nvgpu-gr-intr.c index 6fded638f..313bec4aa 100644 --- a/userspace/units/gr/intr/nvgpu-gr-intr.c +++ b/userspace/units/gr/intr/nvgpu-gr-intr.c @@ -422,10 +422,12 @@ static int test_gr_intr_error_injections(struct unit_module *m, /* Call fecs_interrupt handler with fecs error set */ isr_data.ch = NULL; nvgpu_posix_io_writel_reg_space(g, gr_fecs_host_int_status_r(), 0); - err = nvgpu_gr_intr_handle_fecs_error(g, NULL, &isr_data); + isr_data.fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g, + &(isr_data.fecs_host_intr_status)); + err = g->ops.gr.intr.handle_fecs_error(g, NULL, &isr_data); if (err != 0) { unit_return_fail(m, - "nvgpu_gr_intr_handle_fecs_error failed\n"); + "gr.intr.handle_fecs_error failed\n"); } /* Fault injection - gpc exception with reset */