gpu: nvgpu: add fecs_host_intr hals

Add three hals in gr.falcon
- fecs_host_intr_status
 This reads the fecs_host_intr_status, set the variables in the
 nvgpu_fecs_host_intr_status struct to report back for gr to handle the
 interrupts properly
- fecs_host_clear_intr
 This helps to clear the needed bits in fecs_host_intr.
- read_fecs_ctxsw_mailbox
 This reads the ctxsw_mailbox register based on register index.

Use these hals in gk20a_gr_handle_fecs_error and
gp10b_gr_handle_fecs_error functions.

JIRA NVGPU-1881

Change-Id: Ia02a254acc38e7e25c7c3605e9f1dda4da898543
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2093917
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Vinod G
2019-04-09 19:47:37 -07:00
committed by mobile promotions
parent 815c102e5d
commit 3ef8e6b099
12 changed files with 128 additions and 25 deletions

View File

@@ -379,39 +379,42 @@ static int gk20a_gr_handle_illegal_method(struct gk20a *g,
int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
struct nvgpu_gr_isr_data *isr_data)
{
u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
u32 gr_fecs_intr, mailbox_value;
int ret = 0;
struct nvgpu_fecs_host_intr_status fecs_host_intr;
u32 chid = isr_data->ch != NULL ?
isr_data->ch->chid : FIFO_INVAL_CHANNEL_ID;
u32 mailbox_id = NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX6;
gr_fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g,
&fecs_host_intr);
if (gr_fecs_intr == 0U) {
return 0;
}
if ((gr_fecs_intr &
gr_fecs_host_int_status_umimp_firmware_method_f(1)) != 0U) {
if (fecs_host_intr.unimp_fw_method_active) {
mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
mailbox_id);
gk20a_gr_set_error_notifier(g, isr_data,
NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD);
nvgpu_err(g,
"firmware method error 0x%08x for offset 0x%04x",
gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)),
mailbox_value,
isr_data->data_lo);
ret = -1;
} else if ((gr_fecs_intr &
gr_fecs_host_int_status_watchdog_active_f()) != 0U) {
} else if (fecs_host_intr.watchdog_active) {
gr_report_ctxsw_error(g, GPU_FECS_CTXSW_WATCHDOG_TIMEOUT,
chid, 0);
/* currently, recovery is not initiated */
nvgpu_err(g, "fecs watchdog triggered for channel %u, "
"cannot ctxsw anymore !!", chid);
g->ops.gr.falcon.dump_stats(g);
} else if ((gr_fecs_intr &
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0)) != 0U) {
u32 mailbox_value = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6));
} else if (fecs_host_intr.ctxsw_intr0 != 0U) {
mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
mailbox_id);
#ifdef CONFIG_GK20A_CTXSW_TRACE
if (mailbox_value ==
g->ops.gr.fecs_trace.get_buffer_full_mailbox_val()) {
g->ops.gr.fecs_trace.get_buffer_full_mailbox_val()) {
nvgpu_info(g, "ctxsw intr0 set by ucode, "
"timestamp buffer full");
nvgpu_gr_fecs_trace_reset_buffer(g);
@@ -421,9 +424,9 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
* The mailbox values may vary across chips hence keeping it
* as a HAL.
*/
if (g->ops.gr.get_ctxsw_checksum_mismatch_mailbox_val
!= NULL && mailbox_value ==
g->ops.gr.get_ctxsw_checksum_mismatch_mailbox_val()) {
if ((g->ops.gr.get_ctxsw_checksum_mismatch_mailbox_val != NULL)
&& (mailbox_value ==
g->ops.gr.get_ctxsw_checksum_mismatch_mailbox_val())) {
gr_report_ctxsw_error(g, GPU_FECS_CTXSW_CRC_MISMATCH,
chid, mailbox_value);
@@ -443,8 +446,7 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
mailbox_value);
ret = -1;
}
} else if ((gr_fecs_intr &
gr_fecs_host_int_status_fault_during_ctxsw_f(1)) != 0U) {
} else if (fecs_host_intr.fault_during_ctxsw_active) {
gr_report_ctxsw_error(g, GPU_FECS_FAULT_DURING_CTXSW,
chid, 0);
nvgpu_err(g, "fecs fault during ctxsw for channel %u", chid);
@@ -456,7 +458,8 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
g->ops.gr.falcon.dump_stats(g);
}
gk20a_writel(g, gr_fecs_host_int_clear_r(), gr_fecs_intr);
g->ops.gr.falcon.fecs_host_clear_intr(g, gr_fecs_intr);
return ret;
}

View File

@@ -44,9 +44,6 @@
#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP BIT32(1)
#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP BIT32(2)
#define CTXSW_INTR0 BIT32(0)
#define CTXSW_INTR1 BIT32(1)
struct tsg_gk20a;
struct nvgpu_gr_ctx;
struct channel_gk20a;

View File

@@ -507,6 +507,12 @@ static const struct gpu_ops gm20b_ops = {
gm20ab_gr_intr_tpc_exception_sm_disable,
},
.falcon = {
.read_fecs_ctxsw_mailbox =
gm20b_gr_falcon_read_fecs_ctxsw_mailbox,
.fecs_host_clear_intr =
gm20b_gr_falcon_fecs_host_clear_intr,
.fecs_host_intr_status =
gm20b_gr_falcon_fecs_host_intr_status,
.fecs_base_addr = gm20b_gr_falcon_fecs_base_addr,
.gpccs_base_addr = gm20b_gr_falcon_gpccs_base_addr,
.set_current_ctx_invalid =

View File

@@ -903,26 +903,32 @@ int gr_gp10b_handle_fecs_error(struct gk20a *g,
struct channel_gk20a *__ch,
struct nvgpu_gr_isr_data *isr_data)
{
u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
struct channel_gk20a *ch;
u32 chid = FIFO_INVAL_CHANNEL_ID;
int ret = 0;
struct tsg_gk20a *tsg;
struct nvgpu_fecs_host_intr_status fecs_host_intr;
u32 gr_fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g,
&fecs_host_intr);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");
if (gr_fecs_intr == 0U) {
return 0;
}
/*
* INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
* indicates that a CILP ctxsw save has finished
*/
if ((gr_fecs_intr &
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR1)) != 0U) {
if (fecs_host_intr.ctxsw_intr1 != 0U) {
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
"CILP: ctxsw save completed!\n");
/* now clear the interrupt */
gk20a_writel(g, gr_fecs_host_int_clear_r(),
gr_fecs_host_int_clear_ctxsw_intr1_clear_f());
g->ops.gr.falcon.fecs_host_clear_intr(g,
fecs_host_intr.ctxsw_intr1);
ret = gr_gp10b_get_cilp_preempt_pending_chid(g, &chid);
if ((ret != 0) || (chid == FIFO_INVAL_CHANNEL_ID)) {

View File

@@ -602,6 +602,12 @@ static const struct gpu_ops gp10b_ops = {
gm20ab_gr_intr_tpc_exception_sm_disable,
},
.falcon = {
.read_fecs_ctxsw_mailbox =
gm20b_gr_falcon_read_fecs_ctxsw_mailbox,
.fecs_host_clear_intr =
gm20b_gr_falcon_fecs_host_clear_intr,
.fecs_host_intr_status =
gm20b_gr_falcon_fecs_host_intr_status,
.fecs_base_addr = gm20b_gr_falcon_fecs_base_addr,
.gpccs_base_addr = gm20b_gr_falcon_gpccs_base_addr,
.set_current_ctx_invalid =

View File

@@ -740,6 +740,12 @@ static const struct gpu_ops gv100_ops = {
gm20ab_gr_intr_tpc_exception_sm_disable,
},
.falcon = {
.read_fecs_ctxsw_mailbox =
gm20b_gr_falcon_read_fecs_ctxsw_mailbox,
.fecs_host_clear_intr =
gm20b_gr_falcon_fecs_host_clear_intr,
.fecs_host_intr_status =
gm20b_gr_falcon_fecs_host_intr_status,
.fecs_base_addr = gm20b_gr_falcon_fecs_base_addr,
.gpccs_base_addr = gm20b_gr_falcon_gpccs_base_addr,
.set_current_ctx_invalid =

View File

@@ -715,6 +715,12 @@ static const struct gpu_ops gv11b_ops = {
gm20ab_gr_intr_tpc_exception_sm_disable,
},
.falcon = {
.read_fecs_ctxsw_mailbox =
gm20b_gr_falcon_read_fecs_ctxsw_mailbox,
.fecs_host_clear_intr =
gm20b_gr_falcon_fecs_host_clear_intr,
.fecs_host_intr_status =
gm20b_gr_falcon_fecs_host_intr_status,
.fecs_base_addr = gm20b_gr_falcon_fecs_base_addr,
.gpccs_base_addr = gm20b_gr_falcon_gpccs_base_addr,
.set_current_ctx_invalid =

View File

@@ -39,6 +39,8 @@
#define CTXSW_MEM_SCRUBBING_TIMEOUT_DEFAULT_US 10U
#define CTXSW_WDT_DEFAULT_VALUE 0x7FFFFFFFU
#define CTXSW_INTR0 BIT32(0)
#define CTXSW_INTR1 BIT32(1)
void gm20b_gr_falcon_load_gpccs_dmem(struct gk20a *g,
const u32 *ucode_u32_data, u32 ucode_u32_size)
@@ -957,3 +959,42 @@ void gm20b_gr_falcon_fecs_host_int_enable(struct gk20a *g)
gr_fecs_host_int_enable_umimp_illegal_method_enable_f() |
gr_fecs_host_int_enable_watchdog_enable_f());
}
u32 gm20b_gr_falcon_read_fecs_ctxsw_mailbox(struct gk20a *g, u32 reg_index)
{
return nvgpu_readl(g, gr_fecs_ctxsw_mailbox_r(reg_index));
}
void gm20b_gr_falcon_fecs_host_clear_intr(struct gk20a *g, u32 fecs_intr)
{
nvgpu_writel(g, gr_fecs_host_int_clear_r(), fecs_intr);
}
u32 gm20b_gr_falcon_fecs_host_intr_status(struct gk20a *g,
struct nvgpu_fecs_host_intr_status *fecs_host_intr)
{
u32 gr_fecs_intr = nvgpu_readl(g, gr_fecs_host_int_status_r());
(void) memset(fecs_host_intr, 0,
sizeof(struct nvgpu_fecs_host_intr_status));
if ((gr_fecs_intr &
gr_fecs_host_int_status_umimp_firmware_method_f(1)) != 0U) {
fecs_host_intr->unimp_fw_method_active = true;
} else if ((gr_fecs_intr &
gr_fecs_host_int_status_watchdog_active_f()) != 0U) {
fecs_host_intr->watchdog_active = true;
} else if ((gr_fecs_intr &
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0)) != 0U) {
fecs_host_intr->ctxsw_intr0 =
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0);
} else if ((gr_fecs_intr &
gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR1)) != 0U) {
fecs_host_intr->ctxsw_intr1 =
gr_fecs_host_int_clear_ctxsw_intr1_clear_f();
} else if ((gr_fecs_intr &
gr_fecs_host_int_status_fault_during_ctxsw_f(1)) != 0U) {
fecs_host_intr->fault_during_ctxsw_active = true;
}
return gr_fecs_intr;
}

View File

@@ -27,7 +27,12 @@
struct gk20a;
struct nvgpu_fecs_method_op;
struct nvgpu_fecs_host_intr_status;
u32 gm20b_gr_falcon_read_fecs_ctxsw_mailbox(struct gk20a *g, u32 reg_index);
void gm20b_gr_falcon_fecs_host_clear_intr(struct gk20a *g, u32 fecs_intr);
u32 gm20b_gr_falcon_fecs_host_intr_status(struct gk20a *g,
struct nvgpu_fecs_host_intr_status *fecs_host_intr);
u32 gm20b_gr_falcon_fecs_base_addr(void);
u32 gm20b_gr_falcon_gpccs_base_addr(void);
void gm20b_gr_falcon_fecs_dump_stats(struct gk20a *g);

View File

@@ -83,6 +83,7 @@ struct nvgpu_dbg_reg_op;
struct gk20a_cs_snapshot;
struct nvgpu_preemption_modes_rec;
struct nvgpu_gr_ctx;
struct nvgpu_fecs_host_intr_status;
typedef void (*global_ctx_mem_destroy_fn)(struct gk20a *g,
struct nvgpu_mem *mem);
@@ -537,6 +538,12 @@ struct gpu_ops {
} config;
struct {
u32 (*read_fecs_ctxsw_mailbox)(struct gk20a *g,
u32 reg_index);
void (*fecs_host_clear_intr)(struct gk20a *g,
u32 fecs_intr);
u32 (*fecs_host_intr_status)(struct gk20a *g,
struct nvgpu_fecs_host_intr_status *fecs_host_intr);
u32 (*fecs_base_addr)(void);
u32 (*gpccs_base_addr)(void);
void (*set_current_ctx_invalid)(struct gk20a *g);

View File

@@ -42,6 +42,20 @@ struct gk20a;
#define NVGPU_GR_FALCON_METHOD_PREEMPT_IMAGE_SIZE 12
#define NVGPU_GR_FALCON_METHOD_CONFIGURE_CTXSW_INTR 13
#define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0 0U
#define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX1 1U
#define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX2 2U
#define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX4 4U
#define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX6 6U
#define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX7 7U
struct nvgpu_fecs_host_intr_status {
u32 ctxsw_intr0;
u32 ctxsw_intr1;
bool fault_during_ctxsw_active;
bool unimp_fw_method_active;
bool watchdog_active;
};
int nvgpu_gr_falcon_bind_fecs_elpg(struct gk20a *g);
int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g);

View File

@@ -773,6 +773,12 @@ static const struct gpu_ops tu104_ops = {
gm20ab_gr_intr_tpc_exception_sm_disable,
},
.falcon = {
.read_fecs_ctxsw_mailbox =
gm20b_gr_falcon_read_fecs_ctxsw_mailbox,
.fecs_host_clear_intr =
gm20b_gr_falcon_fecs_host_clear_intr,
.fecs_host_intr_status =
gm20b_gr_falcon_fecs_host_intr_status,
.fecs_base_addr = gm20b_gr_falcon_fecs_base_addr,
.gpccs_base_addr = gm20b_gr_falcon_gpccs_base_addr,
.set_current_ctx_invalid =