gpu: nvgpu: Add CE interrupt handling

a. LAUNCH_ERR
    - Userspace error.
    - Triggered due to faulty launch.
    - Handle using recovery to reset CE engine and teardown the
      faulty channel.

b. An INVALID_CONFIG -
    - Triggered when LCE is mapped to floorswept PCE.
    - On iGPU, we use the default PCE 2 LCE  HW mapping.
      The default mapping can be read from NV_CE_PCE2LCE_CONFIG
      INIT value in CE refmanual.
    - NvGPU driver configures the mapping on dGPUs (currently only on
      Turing).
    - So, this interrupt can only be triggered if there is
      kernel or HW error
    - Recovery ( which is killing the context + engine reset) will
      not help resolve this error.
    - Trigger Quiesce as part of handling.

c. A MTHD_BUFFER_FAULT -
    - NvGPU driver allocates fault buffers for all TSGs or contexts,
      maps them in BAR2 VA space and writes the VA into channel
      instance block.
    - Can be triggered only due to kernel bug
    - Recovery will not help, need quiesce

d. FBUF_CRC_FAIL
    - Triggered when the CRC entry read from the method fault buffer
      does not match the computed CRC from the methods contained in
      the buffer.
    - This indicates memory corruption and is a fatal interrupt which
      at least requires the LCE to be reset before operations can
      start again, if not the entire GPU.
    - Better to quiesce on memory corruption
      CE Engine reset (via recovery) will not help.

e. FBUF_MAGIC_CHK_FAIL
    - Triggered when the MAGIC_NUM entry read from the method fault
      buf does not match NV_CE_MTHD_BUFFER_GLOBAL_HDR_MAGIC_NUM_VAL
    - This indicates memory corruption and is a fatal interrupt
    - Better to quiesce on memory corruption

f. STALLING_DEBUG
    - Only triggered with SW write for debug purposes
    - Debug interrupt, currently ignored

Move launch error handling from GP10b to GV11b HAL as -
1. LAUNCHERR_REPORT errcode METHOD_BUFFER_ACCESS_FAULT is not
   defined on Pascal
2. We do not support GP10b on dev-main ToT

JIRA NVGPU-8102

Change-Id: Idc84119bc23b5e85f3479fe62cc8720e98b627a5
Signed-off-by: Tejal Kudav <tkudav@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2678893
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Tejal Kudav
2022-03-09 12:40:14 +00:00
committed by mobile promotions
parent 15739c52e9
commit b80b2bdab8
35 changed files with 246 additions and 144 deletions

View File

@@ -28,6 +28,8 @@
#include <nvgpu/power_features/cg.h> #include <nvgpu/power_features/cg.h>
#include <nvgpu/cic_mon.h> #include <nvgpu/cic_mon.h>
#include <nvgpu/mc.h> #include <nvgpu/mc.h>
#include <nvgpu/rc.h>
#include <nvgpu/nvgpu_init.h>
int nvgpu_ce_init_support(struct gk20a *g) int nvgpu_ce_init_support(struct gk20a *g)
{ {
@@ -83,3 +85,24 @@ int nvgpu_ce_init_support(struct gk20a *g)
return 0; return 0;
} }
void nvgpu_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
{
bool needs_rc = false;
bool needs_quiesce = false;
if (g->ops.ce.isr_stall != NULL) {
g->ops.ce.isr_stall(g, inst_id, pri_base, &needs_rc,
&needs_quiesce);
}
if (needs_quiesce) {
nvgpu_sw_quiesce(g);
}
if (needs_rc) {
nvgpu_log(g, gpu_dbg_intr,
"Recovery needed to handle CE interrupt.");
nvgpu_rc_ce_fault(g, inst_id);
}
}

View File

@@ -224,6 +224,42 @@ void nvgpu_rc_gr_fault(struct gk20a *g, struct nvgpu_tsg *tsg,
nvgpu_log(g, gpu_dbg_gr, "done"); nvgpu_log(g, gpu_dbg_gr, "done");
} }
void nvgpu_rc_ce_fault(struct gk20a *g, u32 inst_id)
{
struct nvgpu_channel *ch = NULL;
struct nvgpu_tsg *tsg = NULL;
u32 chid = NVGPU_INVALID_CHANNEL_ID;
u64 inst_ptr = 0U;
if (g->ops.ce.get_inst_ptr_from_lce != NULL) {
inst_ptr = g->ops.ce.get_inst_ptr_from_lce(g,
inst_id);
}
/* refch will be put back before recovery */
ch = nvgpu_channel_refch_from_inst_ptr(g, inst_ptr);
if (ch == NULL) {
return;
} else {
chid = ch->chid;
nvgpu_channel_put(ch);
tsg = nvgpu_tsg_from_ch(ch);
if (tsg == NULL) {
nvgpu_err(g, "channel_id: %d not bound to tsg",
chid);
/* ToDo: Trigger Quiesce? */
return;
}
nvgpu_tsg_set_error_notifier(g, tsg, NVGPU_ERR_NOTIFIER_CE_ERROR);
}
#ifdef CONFIG_NVGPU_RECOVERY
nvgpu_rc_tsg_and_related_engines(g, tsg, true,
RC_TYPE_CE_FAULT);
#else
WARN_ON(!g->sw_quiesce_pending);
(void)tsg;
#endif
}
void nvgpu_rc_sched_error_bad_tsg(struct gk20a *g) void nvgpu_rc_sched_error_bad_tsg(struct gk20a *g)
{ {
#ifdef CONFIG_NVGPU_RECOVERY #ifdef CONFIG_NVGPU_RECOVERY

View File

@@ -38,7 +38,8 @@
#include <nvgpu/hw/gk20a/hw_ce2_gk20a.h> #include <nvgpu/hw/gk20a/hw_ce2_gk20a.h>
void gk20a_ce2_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) void gk20a_ce2_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base,
bool *needs_rc, bool *needs_quiesce)
{ {
u32 ce2_intr = nvgpu_readl(g, ce2_intr_status_r()); u32 ce2_intr = nvgpu_readl(g, ce2_intr_status_r());
u32 clear_intr = 0U; u32 clear_intr = 0U;
@@ -55,9 +56,11 @@ void gk20a_ce2_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
} }
if ((ce2_intr & ce2_intr_status_launcherr_pending_f()) != 0U) { if ((ce2_intr & ce2_intr_status_launcherr_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr, "ce2 launch error interrupt"); nvgpu_log(g, gpu_dbg_intr, "ce2 launch error interrupt");
*needs_rc |= true;
clear_intr |= ce2_intr_status_launcherr_pending_f(); clear_intr |= ce2_intr_status_launcherr_pending_f();
} }
*needs_quiesce |= false;
nvgpu_writel(g, ce2_intr_status_r(), clear_intr); nvgpu_writel(g, ce2_intr_status_r(), clear_intr);
} }

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2011-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -26,7 +26,8 @@
struct gk20a; struct gk20a;
void gk20a_ce2_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); void gk20a_ce2_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base,
bool *needs_rc, bool *needs_quiesce);
u32 gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); u32 gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
#endif /*NVGPU_CE2_GK20A_H*/ #endif /*NVGPU_CE2_GK20A_H*/

View File

@@ -29,7 +29,8 @@ struct gk20a;
void ga10b_ce_init_hw(struct gk20a *g); void ga10b_ce_init_hw(struct gk20a *g);
#endif #endif
void ga10b_ce_intr_enable(struct gk20a *g, bool enable); void ga10b_ce_intr_enable(struct gk20a *g, bool enable);
void ga10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); void ga10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base,
bool *needs_rc, bool *needs_quiesce);
void ga10b_ce_intr_retrigger(struct gk20a *g, u32 inst_id); void ga10b_ce_intr_retrigger(struct gk20a *g, u32 inst_id);
void ga10b_ce_request_idle(struct gk20a *g); void ga10b_ce_request_idle(struct gk20a *g);

View File

@@ -184,7 +184,8 @@ void ga10b_ce_intr_enable(struct gk20a *g, bool enable)
} }
} }
void ga10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) void ga10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base,
bool *needs_rc, bool *needs_quiesce)
{ {
u32 ce_intr = nvgpu_readl(g, ce_intr_status_r(inst_id)); u32 ce_intr = nvgpu_readl(g, ce_intr_status_r(inst_id));
u32 clear_intr = 0U; u32 clear_intr = 0U;
@@ -199,6 +200,7 @@ void ga10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
*/ */
if ((ce_intr & ce_intr_status_fbuf_crc_fail_pending_f()) != 0U) { if ((ce_intr & ce_intr_status_fbuf_crc_fail_pending_f()) != 0U) {
nvgpu_err(g, "ce: inst %d, fault buffer crc mismatch", inst_id); nvgpu_err(g, "ce: inst %d, fault buffer crc mismatch", inst_id);
*needs_quiesce |= true;
clear_intr |= ce_intr_status_fbuf_crc_fail_reset_f(); clear_intr |= ce_intr_status_fbuf_crc_fail_reset_f();
} }
@@ -210,6 +212,7 @@ void ga10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
if ((ce_intr & ce_intr_status_fbuf_magic_chk_fail_pending_f()) != 0U) { if ((ce_intr & ce_intr_status_fbuf_magic_chk_fail_pending_f()) != 0U) {
nvgpu_err(g, "ce: inst %d, fault buffer magic check fail", nvgpu_err(g, "ce: inst %d, fault buffer magic check fail",
inst_id); inst_id);
*needs_quiesce |= true;
clear_intr |= ce_intr_status_fbuf_magic_chk_fail_reset_f(); clear_intr |= ce_intr_status_fbuf_magic_chk_fail_reset_f();
} }
@@ -229,7 +232,7 @@ void ga10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
* The remaining legacy interrupts are handled by legacy interrupt * The remaining legacy interrupts are handled by legacy interrupt
* handler. * handler.
*/ */
gv11b_ce_stall_isr(g, inst_id, pri_base); gv11b_ce_stall_isr(g, inst_id, pri_base, needs_rc, needs_quiesce);
} }
void ga10b_ce_intr_retrigger(struct gk20a *g, u32 inst_id) void ga10b_ce_intr_retrigger(struct gk20a *g, u32 inst_id)

View File

@@ -28,7 +28,8 @@
struct gk20a; struct gk20a;
void gp10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); void gp10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base,
bool *needs_rc, bool *needs_quiesce);
#ifdef CONFIG_NVGPU_NONSTALL_INTR #ifdef CONFIG_NVGPU_NONSTALL_INTR
u32 gp10b_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); u32 gp10b_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
#endif /* NVGPU_HAL_NON_FUSA */ #endif /* NVGPU_HAL_NON_FUSA */

View File

@@ -32,7 +32,8 @@
#include <nvgpu/hw/gp10b/hw_ce_gp10b.h> #include <nvgpu/hw/gp10b/hw_ce_gp10b.h>
void gp10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) void gp10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base,
bool *needs_rc, bool *needs_quiesce)
{ {
u32 ce_intr = nvgpu_readl(g, ce_intr_status_r(inst_id)); u32 ce_intr = nvgpu_readl(g, ce_intr_status_r(inst_id));
u32 clear_intr = 0U; u32 clear_intr = 0U;
@@ -47,13 +48,8 @@ void gp10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
clear_intr |= ce_intr_status_blockpipe_pending_f(); clear_intr |= ce_intr_status_blockpipe_pending_f();
} }
if ((ce_intr & ce_intr_status_launcherr_pending_f()) != 0U) { *needs_quiesce |= false;
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_CE, *needs_rc |= false;
GPU_CE_LAUNCH_ERROR);
nvgpu_err(g, "ce launch error interrupt");
clear_intr |= ce_intr_status_launcherr_pending_f();
}
nvgpu_writel(g, ce_intr_status_r(inst_id), clear_intr); nvgpu_writel(g, ce_intr_status_r(inst_id), clear_intr);
return; return;
} }

View File

@@ -28,9 +28,13 @@
struct gk20a; struct gk20a;
struct nvgpu_device; struct nvgpu_device;
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
void gv11b_ce_mthd_buffer_fault_in_bar2_fault(struct gk20a *g); void gv11b_ce_mthd_buffer_fault_in_bar2_fault(struct gk20a *g);
#endif
u32 gv11b_ce_get_num_pce(struct gk20a *g); u32 gv11b_ce_get_num_pce(struct gk20a *g);
void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base,
bool *needs_rc, bool *needs_quiesce);
void gv11b_ce_init_prod_values(struct gk20a *g); void gv11b_ce_init_prod_values(struct gk20a *g);
void gv11b_ce_halt_engine(struct gk20a *g, const struct nvgpu_device *dev); void gv11b_ce_halt_engine(struct gk20a *g, const struct nvgpu_device *dev);
u64 gv11b_ce_get_inst_ptr_from_lce(struct gk20a *g, u32 inst_id);
#endif /* NVGPU_CE_GV11B_H */ #endif /* NVGPU_CE_GV11B_H */

View File

@@ -28,6 +28,7 @@
#include <nvgpu/device.h> #include <nvgpu/device.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_err.h> #include <nvgpu/nvgpu_err.h>
#include <nvgpu/nvgpu_init.h>
#include "ce_gp10b.h" #include "ce_gp10b.h"
#include "ce_gv11b.h" #include "ce_gv11b.h"
@@ -48,13 +49,40 @@ u32 gv11b_ce_get_num_pce(struct gk20a *g)
return num_pce; return num_pce;
} }
void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base,
bool *needs_rc, bool *needs_quiesce)
{ {
u32 ce_intr = nvgpu_readl(g, ce_intr_status_r(inst_id)); u32 ce_intr = nvgpu_readl(g, ce_intr_status_r(inst_id));
u32 clear_intr = 0U; u32 clear_intr = 0U;
u32 reg_val;
u32 err_code;
nvgpu_log(g, gpu_dbg_intr, "ce isr 0x%08x 0x%08x", ce_intr, inst_id); nvgpu_log(g, gpu_dbg_intr, "ce isr 0x%08x 0x%08x", ce_intr, inst_id);
if ((ce_intr & ce_intr_status_launcherr_pending_f()) != 0U) {
nvgpu_err(g, "ce launch error interrupt");
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_CE,
GPU_CE_LAUNCH_ERROR);
/* INVALID_CONFIG and METHOD_BUFFER_FAULT may still be
* reported via LAUNCHERR bit, but using different
* error code. Check the LAUNCHERR errorcode to
* check if above two interrupts are routed to
* LAUNCHERR bit and handle as per error handling
* policy.
*/
reg_val = nvgpu_readl(g, ce_lce_launcherr_r(inst_id));
err_code = ce_lce_launcherr_report_v(reg_val);
nvgpu_err(g, "ce launch error interrupt with errcode:0x%x", err_code);
if ((err_code == ce_lce_launcherr_report_method_buffer_access_fault_v()) ||
(err_code == ce_lce_launcherr_report_invalid_config_v())) {
*needs_quiesce |= true;
} else {
*needs_rc |= true;
}
clear_intr |= ce_intr_status_launcherr_pending_f();
}
#ifdef CONFIG_NVGPU_HAL_NON_FUSA #ifdef CONFIG_NVGPU_HAL_NON_FUSA
/* /*
* An INVALID_CONFIG interrupt will be generated if a floorswept * An INVALID_CONFIG interrupt will be generated if a floorswept
@@ -66,6 +94,7 @@ void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_CE, nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_CE,
GPU_CE_INVALID_CONFIG); GPU_CE_INVALID_CONFIG);
nvgpu_err(g, "ce: inst %d: invalid config", inst_id); nvgpu_err(g, "ce: inst %d: invalid config", inst_id);
*needs_quiesce |= true;
clear_intr |= ce_intr_status_invalid_config_reset_f(); clear_intr |= ce_intr_status_invalid_config_reset_f();
} }
@@ -79,15 +108,17 @@ void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_CE, nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_CE,
GPU_CE_METHOD_BUFFER_FAULT); GPU_CE_METHOD_BUFFER_FAULT);
nvgpu_err(g, "ce: inst %d: mthd buffer fault", inst_id); nvgpu_err(g, "ce: inst %d: mthd buffer fault", inst_id);
*needs_quiesce |= true;
clear_intr |= ce_intr_status_mthd_buffer_fault_reset_f(); clear_intr |= ce_intr_status_mthd_buffer_fault_reset_f();
} }
#endif #endif
nvgpu_writel(g, ce_intr_status_r(inst_id), clear_intr); nvgpu_writel(g, ce_intr_status_r(inst_id), clear_intr);
gp10b_ce_stall_isr(g, inst_id, pri_base); gp10b_ce_stall_isr(g, inst_id, pri_base, needs_rc, needs_quiesce);
} }
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
void gv11b_ce_mthd_buffer_fault_in_bar2_fault(struct gk20a *g) void gv11b_ce_mthd_buffer_fault_in_bar2_fault(struct gk20a *g)
{ {
u32 reg_val, num_lce, lce, clear_intr; u32 reg_val, num_lce, lce, clear_intr;
@@ -98,13 +129,19 @@ void gv11b_ce_mthd_buffer_fault_in_bar2_fault(struct gk20a *g)
reg_val = nvgpu_readl(g, ce_intr_status_r(lce)); reg_val = nvgpu_readl(g, ce_intr_status_r(lce));
if ((reg_val & if ((reg_val &
ce_intr_status_mthd_buffer_fault_pending_f()) != 0U) { ce_intr_status_mthd_buffer_fault_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr, nvgpu_err(g, "ce: lce %d: mthd buffer fault", lce);
"ce: lce %d: mthd buffer fault", lce); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_CE,
GPU_CE_METHOD_BUFFER_FAULT);
/* This is a fatal interrupt as it implies a kernel bug.
* Needs quiesce.
*/
nvgpu_sw_quiesce(g);
clear_intr = ce_intr_status_mthd_buffer_fault_reset_f(); clear_intr = ce_intr_status_mthd_buffer_fault_reset_f();
nvgpu_writel(g, ce_intr_status_r(lce), clear_intr); nvgpu_writel(g, ce_intr_status_r(lce), clear_intr);
} }
} }
} }
#endif
void gv11b_ce_init_prod_values(struct gk20a *g) void gv11b_ce_init_prod_values(struct gk20a *g)
{ {
@@ -133,4 +170,20 @@ void gv11b_ce_halt_engine(struct gk20a *g, const struct nvgpu_device *dev)
nvgpu_err(g, "The CE engine %u is not idle" nvgpu_err(g, "The CE engine %u is not idle"
"while reset", dev->inst_id); "while reset", dev->inst_id);
} }
}
u64 gv11b_ce_get_inst_ptr_from_lce(struct gk20a *g, u32 inst_id)
{
u32 reg_val;
reg_val = nvgpu_readl(g, ce_lce_bind_status_r(inst_id));
if (ce_lce_bind_status_bound_v(reg_val) ==
ce_lce_bind_status_bound_false_v()) {
/* CE appears to have never been bound -- ignore */
return 0U;
}
return (((u64)(ce_lce_bind_status_ctx_ptr_v(reg_val))) <<
g->ops.ramin.base_shift());
} }

View File

@@ -469,8 +469,9 @@ void gv11b_fb_handle_bar2_fault(struct gk20a *g,
} }
} }
#endif #endif
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
g->ops.ce.mthd_buffer_fault_in_bar2_fault(g); g->ops.ce.mthd_buffer_fault_in_bar2_fault(g);
#endif
err = g->ops.bus.bar2_bind(g, &g->mm.bar2.inst_block); err = g->ops.bus.bar2_bind(g, &g->mm.bar2.inst_block);
if (err != 0) { if (err != 0) {
nvgpu_err(g, "bar2_bind failed!"); nvgpu_err(g, "bar2_bind failed!");

View File

@@ -427,10 +427,13 @@ static const struct gops_ce ga100_ops_ce = {
.init_hw = NULL, .init_hw = NULL,
#endif #endif
.get_num_pce = gv11b_ce_get_num_pce, .get_num_pce = gv11b_ce_get_num_pce,
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
.mthd_buffer_fault_in_bar2_fault = gv11b_ce_mthd_buffer_fault_in_bar2_fault, .mthd_buffer_fault_in_bar2_fault = gv11b_ce_mthd_buffer_fault_in_bar2_fault,
#endif
.init_prod_values = gv11b_ce_init_prod_values, .init_prod_values = gv11b_ce_init_prod_values,
.halt_engine = gv11b_ce_halt_engine, .halt_engine = gv11b_ce_halt_engine,
.request_idle = NULL, .request_idle = NULL,
.get_inst_ptr_from_lce = gv11b_ce_get_inst_ptr_from_lce,
}; };
static const struct gops_gr_ecc ga100_ops_gr_ecc = { static const struct gops_gr_ecc ga100_ops_gr_ecc = {

View File

@@ -401,10 +401,13 @@ static const struct gops_ce ga10b_ops_ce = {
.init_hw = ga10b_ce_init_hw, .init_hw = ga10b_ce_init_hw,
#endif #endif
.get_num_pce = gv11b_ce_get_num_pce, .get_num_pce = gv11b_ce_get_num_pce,
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
.mthd_buffer_fault_in_bar2_fault = gv11b_ce_mthd_buffer_fault_in_bar2_fault, .mthd_buffer_fault_in_bar2_fault = gv11b_ce_mthd_buffer_fault_in_bar2_fault,
#endif
.init_prod_values = gv11b_ce_init_prod_values, .init_prod_values = gv11b_ce_init_prod_values,
.halt_engine = gv11b_ce_halt_engine, .halt_engine = gv11b_ce_halt_engine,
.request_idle = ga10b_ce_request_idle, .request_idle = ga10b_ce_request_idle,
.get_inst_ptr_from_lce = gv11b_ce_get_inst_ptr_from_lce,
}; };
static const struct gops_gr_ecc ga10b_ops_gr_ecc = { static const struct gops_gr_ecc ga10b_ops_gr_ecc = {

View File

@@ -314,10 +314,13 @@ static const struct gops_ce gv11b_ops_ce = {
.isr_nonstall = gp10b_ce_nonstall_isr, .isr_nonstall = gp10b_ce_nonstall_isr,
#endif #endif
.get_num_pce = gv11b_ce_get_num_pce, .get_num_pce = gv11b_ce_get_num_pce,
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
.mthd_buffer_fault_in_bar2_fault = gv11b_ce_mthd_buffer_fault_in_bar2_fault, .mthd_buffer_fault_in_bar2_fault = gv11b_ce_mthd_buffer_fault_in_bar2_fault,
#endif
.init_prod_values = gv11b_ce_init_prod_values, .init_prod_values = gv11b_ce_init_prod_values,
.halt_engine = gv11b_ce_halt_engine, .halt_engine = gv11b_ce_halt_engine,
.request_idle = NULL, .request_idle = NULL,
.get_inst_ptr_from_lce = gv11b_ce_get_inst_ptr_from_lce,
}; };
static const struct gops_gr_ecc gv11b_ops_gr_ecc = { static const struct gops_gr_ecc gv11b_ops_gr_ecc = {

View File

@@ -363,10 +363,13 @@ static const struct gops_ce tu104_ops_ce = {
.isr_nonstall = NULL, .isr_nonstall = NULL,
#endif #endif
.get_num_pce = gv11b_ce_get_num_pce, .get_num_pce = gv11b_ce_get_num_pce,
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
.mthd_buffer_fault_in_bar2_fault = gv11b_ce_mthd_buffer_fault_in_bar2_fault, .mthd_buffer_fault_in_bar2_fault = gv11b_ce_mthd_buffer_fault_in_bar2_fault,
#endif
.init_prod_values = gv11b_ce_init_prod_values, .init_prod_values = gv11b_ce_init_prod_values,
.halt_engine = gv11b_ce_halt_engine, .halt_engine = gv11b_ce_halt_engine,
.request_idle = NULL, .request_idle = NULL,
.get_inst_ptr_from_lce = gv11b_ce_get_inst_ptr_from_lce,
}; };
static const struct gops_gr_ecc tu104_ops_gr_ecc = { static const struct gops_gr_ecc tu104_ops_gr_ecc = {

View File

@@ -1,7 +1,7 @@
/* /*
* GM20B Master Control * GM20B Master Control
* *
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2014-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -33,6 +33,7 @@
#include <nvgpu/engines.h> #include <nvgpu/engines.h>
#include <nvgpu/device.h> #include <nvgpu/device.h>
#include <nvgpu/power_features/pg.h> #include <nvgpu/power_features/pg.h>
#include <nvgpu/ce.h>
#include "mc_gm20b.h" #include "mc_gm20b.h"
@@ -62,9 +63,8 @@ void gm20b_mc_isr_stall(struct gk20a *g)
} }
/* CE Engine */ /* CE Engine */
if (nvgpu_device_is_ce(g, dev) && if (nvgpu_device_is_ce(g, dev)) {
(g->ops.ce.isr_stall != NULL)) { nvgpu_ce_stall_isr(g, dev->inst_id, dev->pri_base);
g->ops.ce.isr_stall(g, dev->inst_id, dev->pri_base);
} }
} }

View File

@@ -29,6 +29,7 @@
#include <nvgpu/engines.h> #include <nvgpu/engines.h>
#include <nvgpu/device.h> #include <nvgpu/device.h>
#include <nvgpu/power_features/pg.h> #include <nvgpu/power_features/pg.h>
#include <nvgpu/ce.h>
#include "mc_gp10b.h" #include "mc_gp10b.h"
@@ -135,9 +136,8 @@ void mc_gp10b_isr_stall_engine(struct gk20a *g,
} }
/* CE Engine */ /* CE Engine */
if (nvgpu_device_is_ce(g, dev) && if (nvgpu_device_is_ce(g, dev)) {
(g->ops.ce.isr_stall != NULL)) { nvgpu_ce_stall_isr(g, dev->inst_id, dev->pri_base);
g->ops.ce.isr_stall(g, dev->inst_id, dev->pri_base);
} }
} }

View File

@@ -30,6 +30,7 @@
#include <nvgpu/cic_mon.h> #include <nvgpu/cic_mon.h>
#include <nvgpu/power_features/pg.h> #include <nvgpu/power_features/pg.h>
#include <nvgpu/gr/gr_instances.h> #include <nvgpu/gr/gr_instances.h>
#include <nvgpu/ce.h>
#ifdef CONFIG_NVGPU_GSP_SCHEDULER #ifdef CONFIG_NVGPU_GSP_SCHEDULER
#include <nvgpu/gsp.h> #include <nvgpu/gsp.h>
#endif #endif
@@ -869,13 +870,8 @@ static void ga10b_intr_isr_stall_host2soc_3(struct gk20a *g)
if ((unit_subtree_mask & engine_intr_mask) == 0ULL) { if ((unit_subtree_mask & engine_intr_mask) == 0ULL) {
continue; continue;
} }
if (g->ops.ce.isr_stall != NULL) {
g->ops.ce.isr_stall(g, nvgpu_ce_stall_isr(g, dev->inst_id, dev->pri_base);
dev->inst_id,
dev->pri_base);
} else {
nvgpu_err(g, "unhandled intr_unit_ce_stall");
}
g->ops.ce.intr_retrigger(g, dev->inst_id); g->ops.ce.intr_retrigger(g, dev->inst_id);
} }

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2011-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -83,4 +83,5 @@ struct gk20a;
*/ */
int nvgpu_ce_init_support(struct gk20a *g); int nvgpu_ce_init_support(struct gk20a *g);
void nvgpu_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
#endif /*NVGPU_CE_H*/ #endif /*NVGPU_CE_H*/

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -39,6 +39,7 @@ enum {
NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD, NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD,
NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR,
NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH, NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH,
NVGPU_ERR_NOTIFIER_CE_ERROR,
}; };
void nvgpu_set_err_notifier_locked(struct nvgpu_channel *ch, u32 error); void nvgpu_set_err_notifier_locked(struct nvgpu_channel *ch, u32 error);

View File

@@ -43,6 +43,10 @@ struct gops_ce {
* @param g [in] The GPU driver struct. * @param g [in] The GPU driver struct.
* @param inst_id [in] Copy engine instance id. * @param inst_id [in] Copy engine instance id.
* @param pri_base [in] Start of h/w register address space. * @param pri_base [in] Start of h/w register address space.
* @param needs_rc [out] Flag indicating if recovery should be
* triggered as part of CE error handling.
* @param needs_quiesce [out] Flag indicating if SW quiesce should be
* triggered as part of CE error handling.
* *
* This function is invoked by MC stalling isr handler to handle * This function is invoked by MC stalling isr handler to handle
* the CE stalling interrupt. * the CE stalling interrupt.
@@ -56,9 +60,11 @@ struct gops_ce {
* - Method buffer fault interrupt. * - Method buffer fault interrupt.
* - Blocking pipe interrupt. * - Blocking pipe interrupt.
* - Launch error interrupt. * - Launch error interrupt.
* - Sets needs_rc / needs_quiesce based on error handling policy.
* - Clear the handled interrupts by writing to ce_intr_status_r. * - Clear the handled interrupts by writing to ce_intr_status_r.
*/ */
void (*isr_stall)(struct gk20a *g, u32 inst_id, u32 pri_base); void (*isr_stall)(struct gk20a *g, u32 inst_id, u32 pri_base,
bool *needs_rc, bool *needs_quiesce);
#ifdef CONFIG_NVGPU_NONSTALL_INTR #ifdef CONFIG_NVGPU_NONSTALL_INTR
/** /**
@@ -113,6 +119,7 @@ struct gops_ce {
*/ */
u32 (*get_num_pce)(struct gk20a *g); u32 (*get_num_pce)(struct gk20a *g);
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
/** /**
* @brief Handler for method buffer fault in BAR2. * @brief Handler for method buffer fault in BAR2.
* *
@@ -126,6 +133,7 @@ struct gops_ce {
* clear if pending. * clear if pending.
*/ */
void (*mthd_buffer_fault_in_bar2_fault)(struct gk20a *g); void (*mthd_buffer_fault_in_bar2_fault)(struct gk20a *g);
#endif
/** @cond DOXYGEN_SHOULD_SKIP_THIS */ /** @cond DOXYGEN_SHOULD_SKIP_THIS */
@@ -147,6 +155,7 @@ struct gops_ce {
void (*intr_retrigger)(struct gk20a *g, u32 inst_id); void (*intr_retrigger)(struct gk20a *g, u32 inst_id);
u64 (*get_inst_ptr_from_lce)(struct gk20a *g, u32 inst_id);
#ifdef CONFIG_NVGPU_DGPU #ifdef CONFIG_NVGPU_DGPU
int (*ce_app_init_support)(struct gk20a *g); int (*ce_app_init_support)(struct gk20a *g);
void (*ce_app_suspend)(struct gk20a *g); void (*ce_app_suspend)(struct gk20a *g);

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -98,7 +98,7 @@ struct gops_mc {
* - For the FIFO engines with pending interrupt invoke corresponding * - For the FIFO engines with pending interrupt invoke corresponding
* handlers. * handlers.
* - Invoke g->ops.gr.intr.stall_isr if GR interrupt is pending. * - Invoke g->ops.gr.intr.stall_isr if GR interrupt is pending.
* - Invoke g->ops.ce.isr_stall if CE interrupt is pending. * - Invoke nvgpu_ce_stall_isr if CE interrupt is pending.
* - For other units with pending interrupt invoke corresponding * - For other units with pending interrupt invoke corresponding
* handlers. * handlers.
* - Invoke g->ops.fb.intr.isr if HUB interrupt is pending, determined * - Invoke g->ops.fb.intr.isr if HUB interrupt is pending, determined

View File

@@ -72,6 +72,16 @@
(nvgpu_safe_add_u32(0x00104434U, nvgpu_safe_mult_u32((i), 128U))) (nvgpu_safe_add_u32(0x00104434U, nvgpu_safe_mult_u32((i), 128U)))
#define ce_intr_retrigger_trigger_true_f() (0x1U) #define ce_intr_retrigger_trigger_true_f() (0x1U)
#define ce_pce_map_r() (0x00104028U) #define ce_pce_map_r() (0x00104028U)
#define ce_lce_bind_status_r(i)\
(nvgpu_safe_add_u32(0x00104404U, nvgpu_safe_mult_u32((i), 128U)))
#define ce_lce_bind_status_bound_v(r) (((r) >> 0U) & 0x1U)
#define ce_lce_bind_status_bound_false_v() (0x00000000U)
#define ce_lce_bind_status_ctx_ptr_v(r) (((r) >> 1U) & 0xfffffffU)
#define ce_lce_launcherr_r(i)\
(nvgpu_safe_add_u32(0x00104418U, nvgpu_safe_mult_u32((i), 128U)))
#define ce_lce_launcherr_report_v(r) (((r) >> 0U) & 0xfU)
#define ce_lce_launcherr_report_invalid_config_v() (0x0000000dU)
#define ce_lce_launcherr_report_method_buffer_access_fault_v() (0x0000000eU)
#define ce_lce_opt_r(i)\ #define ce_lce_opt_r(i)\
(nvgpu_safe_add_u32(0x00104414U, nvgpu_safe_mult_u32((i), 128U))) (nvgpu_safe_add_u32(0x00104414U, nvgpu_safe_mult_u32((i), 128U)))
#define ce_lce_opt_force_barriers_npl__prod_f() (0x8U) #define ce_lce_opt_force_barriers_npl__prod_f() (0x8U)

View File

@@ -101,6 +101,16 @@
#define ce_lce_intr_notify_ctrl_cpu_m() (U32(0x1U) << 31U) #define ce_lce_intr_notify_ctrl_cpu_m() (U32(0x1U) << 31U)
#define ce_lce_intr_notify_ctrl_cpu_enable_f() (0x80000000U) #define ce_lce_intr_notify_ctrl_cpu_enable_f() (0x80000000U)
#define ce_lce_intr_notify_ctrl_cpu_disable_f() (0x0U) #define ce_lce_intr_notify_ctrl_cpu_disable_f() (0x0U)
#define ce_lce_bind_status_r(i)\
(nvgpu_safe_add_u32(0x00104404U, nvgpu_safe_mult_u32((i), 128U)))
#define ce_lce_bind_status_bound_v(r) (((r) >> 0U) & 0x1U)
#define ce_lce_bind_status_bound_false_v() (0x00000000U)
#define ce_lce_bind_status_ctx_ptr_v(r) (((r) >> 1U) & 0xfffffffU)
#define ce_lce_launcherr_r(i)\
(nvgpu_safe_add_u32(0x00104418U, nvgpu_safe_mult_u32((i), 128U)))
#define ce_lce_launcherr_report_v(r) (((r) >> 0U) & 0xfU)
#define ce_lce_launcherr_report_invalid_config_v() (0x0000000dU)
#define ce_lce_launcherr_report_method_buffer_access_fault_v() (0x0000000eU)
#define ce_lce_intr_ctrl_r(i)\ #define ce_lce_intr_ctrl_r(i)\
(nvgpu_safe_add_u32(0x0010442cU, nvgpu_safe_mult_u32((i), 128U))) (nvgpu_safe_add_u32(0x0010442cU, nvgpu_safe_mult_u32((i), 128U)))
#define ce_lce_intr_ctrl_gsp_m() (U32(0x1U) << 30U) #define ce_lce_intr_ctrl_gsp_m() (U32(0x1U) << 30U)

View File

@@ -72,6 +72,16 @@
#define ce_intr_status_mthd_buffer_fault_pending_f() (0x10U) #define ce_intr_status_mthd_buffer_fault_pending_f() (0x10U)
#define ce_intr_status_mthd_buffer_fault_reset_f() (0x10U) #define ce_intr_status_mthd_buffer_fault_reset_f() (0x10U)
#define ce_pce_map_r() (0x00104028U) #define ce_pce_map_r() (0x00104028U)
#define ce_lce_bind_status_r(i)\
(nvgpu_safe_add_u32(0x00104404U, nvgpu_safe_mult_u32((i), 128U)))
#define ce_lce_bind_status_bound_v(r) (((r) >> 0U) & 0x1U)
#define ce_lce_bind_status_bound_false_v() (0x00000000U)
#define ce_lce_bind_status_ctx_ptr_v(r) (((r) >> 1U) & 0xfffffffU)
#define ce_lce_launcherr_r(i)\
(nvgpu_safe_add_u32(0x00104418U, nvgpu_safe_mult_u32((i), 128U)))
#define ce_lce_launcherr_report_v(r) (((r) >> 0U) & 0xfU)
#define ce_lce_launcherr_report_invalid_config_v() (0x0000000dU)
#define ce_lce_launcherr_report_method_buffer_access_fault_v() (0x0000000eU)
#define ce_lce_opt_r(i)\ #define ce_lce_opt_r(i)\
(nvgpu_safe_add_u32(0x00104414U, nvgpu_safe_mult_u32((i), 128U))) (nvgpu_safe_add_u32(0x00104414U, nvgpu_safe_mult_u32((i), 128U)))
#define ce_lce_opt_force_barriers_npl__prod_f() (0x8U) #define ce_lce_opt_force_barriers_npl__prod_f() (0x8U)

View File

@@ -87,6 +87,16 @@
#define ce_grce_config_shared_f(v) ((U32(v) & 0x1U) << 30U) #define ce_grce_config_shared_f(v) ((U32(v) & 0x1U) << 30U)
#define ce_grce_config_timeslice_short_f() (0x0U) #define ce_grce_config_timeslice_short_f() (0x0U)
#define ce_grce_config_timeslice_long_f() (0x80000000U) #define ce_grce_config_timeslice_long_f() (0x80000000U)
#define ce_lce_bind_status_r(i)\
(nvgpu_safe_add_u32(0x00104404U, nvgpu_safe_mult_u32((i), 128U)))
#define ce_lce_bind_status_bound_v(r) (((r) >> 0U) & 0x1U)
#define ce_lce_bind_status_bound_false_v() (0x00000000U)
#define ce_lce_bind_status_ctx_ptr_v(r) (((r) >> 1U) & 0xfffffffU)
#define ce_lce_launcherr_r(i)\
(nvgpu_safe_add_u32(0x00104418U, nvgpu_safe_mult_u32((i), 128U)))
#define ce_lce_launcherr_report_v(r) (((r) >> 0U) & 0xfU)
#define ce_lce_launcherr_report_invalid_config_v() (0x0000000dU)
#define ce_lce_launcherr_report_method_buffer_access_fault_v() (0x0000000eU)
#define ce_pce2lce_config_r(i)\ #define ce_pce2lce_config_r(i)\
(nvgpu_safe_add_u32(0x00104040U, nvgpu_safe_mult_u32((i), 4U))) (nvgpu_safe_add_u32(0x00104040U, nvgpu_safe_mult_u32((i), 4U)))
#define ce_pce2lce_config__size_1_v() (0x00000004U) #define ce_pce2lce_config__size_1_v() (0x00000004U)

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2011-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -76,6 +76,10 @@
* Scheduler error recovery. * Scheduler error recovery.
*/ */
#define RC_TYPE_SCHED_ERR 8U #define RC_TYPE_SCHED_ERR 8U
/**
* Copy-engine error recovery.
*/
#define RC_TYPE_CE_FAULT 9U
/** /**
* Invalid recovery id. * Invalid recovery id.
@@ -132,6 +136,9 @@ static inline const char *nvgpu_rc_type_to_str(unsigned int rc_type)
case RC_TYPE_SCHED_ERR: case RC_TYPE_SCHED_ERR:
str = "Sched err"; str = "Sched err";
break; break;
case RC_TYPE_CE_FAULT:
str = "Copy engine err";
break;
default: default:
str = "Unknown"; str = "Unknown";
break; break;
@@ -272,4 +279,6 @@ void nvgpu_rc_fifo_recover(struct gk20a *g,
bool id_is_tsg, /* ignored if hw_id == ~0 */ bool id_is_tsg, /* ignored if hw_id == ~0 */
bool id_is_known, bool debug_dump, u32 rc_type); bool id_is_known, bool debug_dump, u32 rc_type);
void nvgpu_rc_ce_fault(struct gk20a *g, u32 inst_id);
#endif /* NVGPU_RC_H */ #endif /* NVGPU_RC_H */

View File

@@ -98,8 +98,8 @@ gv100_dump_engine_status
gv100_read_engine_status_info gv100_read_engine_status_info
gv11b_ce_get_num_pce gv11b_ce_get_num_pce
gv11b_ce_init_prod_values gv11b_ce_init_prod_values
gv11b_ce_mthd_buffer_fault_in_bar2_fault
gv11b_ce_stall_isr gv11b_ce_stall_isr
gv11b_ce_get_inst_ptr_from_lce
gv11b_channel_count gv11b_channel_count
gv11b_channel_read_state gv11b_channel_read_state
gv11b_channel_reset_faulted gv11b_channel_reset_faulted
@@ -275,6 +275,7 @@ nvgpu_bug_unregister_cb
nvgpu_can_busy nvgpu_can_busy
nvgpu_ce_engine_interrupt_mask nvgpu_ce_engine_interrupt_mask
nvgpu_ce_init_support nvgpu_ce_init_support
nvgpu_ce_stall_isr
nvgpu_cg_blcg_fb_load_enable nvgpu_cg_blcg_fb_load_enable
nvgpu_cg_blcg_ltc_load_enable nvgpu_cg_blcg_ltc_load_enable
nvgpu_cg_blcg_fifo_load_enable nvgpu_cg_blcg_fifo_load_enable
@@ -792,6 +793,7 @@ nvgpu_rc_gr_fault
nvgpu_rc_sched_error_bad_tsg nvgpu_rc_sched_error_bad_tsg
nvgpu_rc_tsg_and_related_engines nvgpu_rc_tsg_and_related_engines
nvgpu_rc_mmu_fault nvgpu_rc_mmu_fault
nvgpu_rc_ce_fault
nvgpu_init_pramin nvgpu_init_pramin
gk20a_bus_set_bar0_window gk20a_bus_set_bar0_window
nvgpu_pramin_ops_init nvgpu_pramin_ops_init

View File

@@ -98,8 +98,8 @@ gv100_dump_engine_status
gv100_read_engine_status_info gv100_read_engine_status_info
gv11b_ce_get_num_pce gv11b_ce_get_num_pce
gv11b_ce_init_prod_values gv11b_ce_init_prod_values
gv11b_ce_mthd_buffer_fault_in_bar2_fault
gv11b_ce_stall_isr gv11b_ce_stall_isr
gv11b_ce_get_inst_ptr_from_lce
gv11b_channel_count gv11b_channel_count
gv11b_channel_read_state gv11b_channel_read_state
gv11b_channel_reset_faulted gv11b_channel_reset_faulted
@@ -283,6 +283,7 @@ nvgpu_bug_unregister_cb
nvgpu_can_busy nvgpu_can_busy
nvgpu_ce_engine_interrupt_mask nvgpu_ce_engine_interrupt_mask
nvgpu_ce_init_support nvgpu_ce_init_support
nvgpu_ce_stall_isr
nvgpu_cg_blcg_fb_load_enable nvgpu_cg_blcg_fb_load_enable
nvgpu_cg_blcg_ltc_load_enable nvgpu_cg_blcg_ltc_load_enable
nvgpu_cg_blcg_fifo_load_enable nvgpu_cg_blcg_fifo_load_enable
@@ -811,6 +812,7 @@ nvgpu_rc_gr_fault
nvgpu_rc_sched_error_bad_tsg nvgpu_rc_sched_error_bad_tsg
nvgpu_rc_tsg_and_related_engines nvgpu_rc_tsg_and_related_engines
nvgpu_rc_mmu_fault nvgpu_rc_mmu_fault
nvgpu_rc_ce_fault
gp10b_priv_ring_isr_handle_0 gp10b_priv_ring_isr_handle_0
gp10b_priv_ring_isr_handle_1 gp10b_priv_ring_isr_handle_1
nvgpu_cic_mon_setup nvgpu_cic_mon_setup

View File

@@ -102,7 +102,6 @@ test_ce_setup_env.ce_setup_env=0
test_ce_stall_isr.ce_stall_isr=0 test_ce_stall_isr.ce_stall_isr=0
test_get_num_pce.ce_get_num_pce=0 test_get_num_pce.ce_get_num_pce=0
test_init_prod_values.ce_init_prod_values=0 test_init_prod_values.ce_init_prod_values=0
test_mthd_buffer_fault_in_bar2_fault.mthd_buffer_fault_in_bar2_fault=0
[cg] [cg]
init_test_env.init=0 init_test_env.init=0

View File

@@ -129,6 +129,7 @@ int test_ce_setup_env(struct unit_module *m,
nvgpu_spinlock_init(&g->mc.intr_lock); nvgpu_spinlock_init(&g->mc.intr_lock);
g->ops.cic_mon.init = ga10b_cic_mon_init; g->ops.cic_mon.init = ga10b_cic_mon_init;
g->ops.ce.get_inst_ptr_from_lce = gv11b_ce_get_inst_ptr_from_lce;
if (nvgpu_cic_mon_setup(g) != 0) { if (nvgpu_cic_mon_setup(g) != 0) {
unit_err(m, "%s: failed to initialize CIC\n", unit_err(m, "%s: failed to initialize CIC\n",
@@ -211,7 +212,7 @@ int test_ce_stall_isr(struct unit_module *m, struct gk20a *g, void *args)
intr_val = 0x4; intr_val = 0x4;
nvgpu_posix_io_writel_reg_space(g, ce_intr_status_r(inst_id), nvgpu_posix_io_writel_reg_space(g, ce_intr_status_r(inst_id),
intr_val); intr_val);
g->ops.ce.isr_stall(g, inst_id, 0); nvgpu_ce_stall_isr(g, inst_id, 0);
if (intr_status_written[inst_id] != (intr_val & if (intr_status_written[inst_id] != (intr_val &
~ce_intr_status_nonblockpipe_pending_f())) { ~ce_intr_status_nonblockpipe_pending_f())) {
ret = UNIT_FAIL; ret = UNIT_FAIL;
@@ -224,7 +225,7 @@ int test_ce_stall_isr(struct unit_module *m, struct gk20a *g, void *args)
intr_val = 0x0; intr_val = 0x0;
nvgpu_posix_io_writel_reg_space(g, ce_intr_status_r(inst_id), nvgpu_posix_io_writel_reg_space(g, ce_intr_status_r(inst_id),
intr_val); intr_val);
g->ops.ce.isr_stall(g, inst_id, 0); nvgpu_ce_stall_isr(g, inst_id, 0);
if (intr_status_written[inst_id] != intr_val) { if (intr_status_written[inst_id] != intr_val) {
ret = UNIT_FAIL; ret = UNIT_FAIL;
unit_err(m, "intr_status not cleared, only 0x%08x\n", unit_err(m, "intr_status not cleared, only 0x%08x\n",
@@ -237,59 +238,6 @@ done:
return ret; return ret;
} }
static u32 mock_get_num_lce(struct gk20a *g)
{
return NUM_INST;
}
int test_mthd_buffer_fault_in_bar2_fault(struct unit_module *m, struct gk20a *g,
void *args)
{
int ret = UNIT_SUCCESS;
int inst_id;
u32 intr_val;
g->ops.ce.mthd_buffer_fault_in_bar2_fault =
gv11b_ce_mthd_buffer_fault_in_bar2_fault;
g->ops.top.get_num_lce = mock_get_num_lce;
intr_val = 0x1f; /* all intr sources */
for (inst_id = 0; inst_id < NUM_INST; inst_id++) {
intr_status_written[inst_id] = 0;
nvgpu_posix_io_writel_reg_space(g, ce_intr_status_r(inst_id),
intr_val);
}
g->ops.ce.mthd_buffer_fault_in_bar2_fault(g);
for (inst_id = 0; inst_id < NUM_INST; inst_id++) {
if (intr_status_written[inst_id] !=
ce_intr_status_mthd_buffer_fault_pending_f()) {
ret = UNIT_FAIL;
unit_err(m, "intr_status not cleared properly, only 0x%08x\n",
intr_status_written[inst_id]);
goto done;
}
}
intr_val = 0x0;
for (inst_id = 0; inst_id < NUM_INST; inst_id++) {
intr_status_written[inst_id] = 0;
nvgpu_posix_io_writel_reg_space(g, ce_intr_status_r(inst_id),
intr_val);
}
g->ops.ce.mthd_buffer_fault_in_bar2_fault(g);
for (inst_id = 0; inst_id < NUM_INST; inst_id++) {
if (intr_status_written[inst_id] != 0) {
ret = UNIT_FAIL;
unit_err(m, "intr_status not cleared properly, only 0x%08x\n",
intr_status_written[inst_id]);
goto done;
}
}
done:
return ret;
}
int test_get_num_pce(struct unit_module *m, struct gk20a *g, void *args) int test_get_num_pce(struct unit_module *m, struct gk20a *g, void *args)
{ {
u32 pce_map_val; /* 16 bit bitmap */ u32 pce_map_val; /* 16 bit bitmap */
@@ -334,7 +282,6 @@ struct unit_module_test ce_tests[] = {
UNIT_TEST(ce_setup_env, test_ce_setup_env, NULL, 0), UNIT_TEST(ce_setup_env, test_ce_setup_env, NULL, 0),
UNIT_TEST(ce_init_support, test_ce_init_support, NULL, 0), UNIT_TEST(ce_init_support, test_ce_init_support, NULL, 0),
UNIT_TEST(ce_stall_isr, test_ce_stall_isr, NULL, 0), UNIT_TEST(ce_stall_isr, test_ce_stall_isr, NULL, 0),
UNIT_TEST(mthd_buffer_fault_in_bar2_fault, test_mthd_buffer_fault_in_bar2_fault, NULL, 0),
UNIT_TEST(ce_get_num_pce, test_get_num_pce, NULL, 0), UNIT_TEST(ce_get_num_pce, test_get_num_pce, NULL, 0),
UNIT_TEST(ce_init_prod_values, test_init_prod_values, NULL, 0), UNIT_TEST(ce_init_prod_values, test_init_prod_values, NULL, 0),
UNIT_TEST(ce_free_env, test_ce_free_env, NULL, 0), UNIT_TEST(ce_free_env, test_ce_free_env, NULL, 0),

View File

@@ -115,33 +115,6 @@ int test_ce_init_support(struct unit_module *m, struct gk20a *g, void *args);
*/ */
int test_ce_stall_isr(struct unit_module *m, struct gk20a *g, void *args); int test_ce_stall_isr(struct unit_module *m, struct gk20a *g, void *args);
/**
* Test specification for: test_mthd_buffer_fault_in_bar2_fault
*
* Description: Validate method buffer interrupt functionality.
*
* Test Type: Feature
*
* Targets: gops_ce.mthd_buffer_fault_in_bar2_fault,
* gv11b_ce_mthd_buffer_fault_in_bar2_fault
*
* Input: test_ce_setup_env must have been run.
*
* Steps:
* - Set all CE interrupt sources pending in the interrupt status reg for each
* instance.
* - Call gops_ce.mthd_buffer_fault_in_bar2_fault.
* - Verify only the correct interrupt is cleared.
* - Set no CE interrupt sources pending in the interrupt status reg for each
* instance.
* - Call gops_ce.mthd_buffer_fault_in_bar2_fault.
* - Verify no interrupts are cleared.
*
* Output: Returns PASS if expected result is met, FAIL otherwise.
*/
int test_mthd_buffer_fault_in_bar2_fault(struct unit_module *m, struct gk20a *g,
void *args);
/** /**
* Test specification for: test_get_num_pce * Test specification for: test_get_num_pce
* *

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -49,11 +49,6 @@ static u32 hal_channel_count(struct gk20a *g)
return 0x00000200U; return 0x00000200U;
} }
static void hal_bar2_fault_nop(struct gk20a *g)
{
/* no-op */
}
static int hal_bar2_bind_nop(struct gk20a *g, struct nvgpu_mem *bar2_inst) static int hal_bar2_bind_nop(struct gk20a *g, struct nvgpu_mem *bar2_inst)
{ {
/* no-op */ /* no-op */
@@ -99,7 +94,6 @@ int fb_mmu_fault_gv11b_init_test(struct unit_module *m, struct gk20a *g,
/* Other HALs that are needed */ /* Other HALs that are needed */
g->ops.channel.count = hal_channel_count; g->ops.channel.count = hal_channel_count;
g->ops.ce.mthd_buffer_fault_in_bar2_fault = hal_bar2_fault_nop;
g->ops.bus.bar2_bind = hal_bar2_bind_nop; g->ops.bus.bar2_bind = hal_bar2_bind_nop;
g->ops.fifo.mmu_fault_id_to_pbdma_id = g->ops.fifo.mmu_fault_id_to_pbdma_id =
hal_fifo_mmu_fault_id_to_pbdma_id; hal_fifo_mmu_fault_id_to_pbdma_id;

View File

@@ -171,7 +171,8 @@ static void mock_bus_isr(struct gk20a *g)
u.bus_isr = true; u.bus_isr = true;
} }
static void mock_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) static void mock_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base,
bool *needs_rc, bool *needs_quiesce)
{ {
u.ce_isr = true; u.ce_isr = true;
} }

View File

@@ -398,10 +398,6 @@ static const char *f_mmu_fault_notify[] = {
"mmu_fault_notify_eng_id_physical", "mmu_fault_notify_eng_id_physical",
}; };
static void stub_ce_mthd_buffer_fault_in_bar2_fault(struct gk20a *g)
{
}
static int stub_bus_bar2_bind(struct gk20a *g, struct nvgpu_mem *bar2_inst) static int stub_bus_bar2_bind(struct gk20a *g, struct nvgpu_mem *bar2_inst)
{ {
return 0; return 0;
@@ -427,8 +423,6 @@ int test_gv11b_mm_mmu_fault_handle_other_fault_notify(struct unit_module *m,
gv11b_fb_read_mmu_fault_addr_lo_hi; gv11b_fb_read_mmu_fault_addr_lo_hi;
g->ops.fb.read_mmu_fault_info = gv11b_fb_read_mmu_fault_info; g->ops.fb.read_mmu_fault_info = gv11b_fb_read_mmu_fault_info;
g->ops.fb.write_mmu_fault_status = gv11b_fb_write_mmu_fault_status; g->ops.fb.write_mmu_fault_status = gv11b_fb_write_mmu_fault_status;
g->ops.ce.mthd_buffer_fault_in_bar2_fault =
stub_ce_mthd_buffer_fault_in_bar2_fault;
g->ops.bus.bar2_bind = stub_bus_bar2_bind; g->ops.bus.bar2_bind = stub_bus_bar2_bind;
g->ops.fifo.mmu_fault_id_to_pbdma_id = g->ops.fifo.mmu_fault_id_to_pbdma_id =
stub_fifo_mmu_fault_id_to_pbdma_id; stub_fifo_mmu_fault_id_to_pbdma_id;