mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: gv11b: Handle all SM errors
Add the missing register bits to identify the SM errors. Except for mmu_nack error, all other errors are handled using a single function. That function sets the error notifier with GR_EXCEPTION, clears interrupt and triggers recovery process. bug 200402677 JIRA NVGPU-573 Change-Id: Icfaff1f20f1f35adb4cd35ce288ce694845aed3c Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1730963 Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
@@ -2089,7 +2089,7 @@ void gr_gv11b_get_access_map(struct gk20a *g,
|
||||
|
||||
static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 sm,
|
||||
u32 warp_esr,
|
||||
u32 warp_esr_error,
|
||||
struct channel_gk20a *fault_ch)
|
||||
{
|
||||
struct tsg_gk20a *tsg;
|
||||
@@ -2117,17 +2117,92 @@ static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g,
|
||||
nvgpu_writel(g,
|
||||
gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
||||
"ESR %s(0x%x)",
|
||||
"MMU NACK ERROR",
|
||||
warp_esr_error);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gr_gv11b_handle_warp_esr_error_misaligned_addr(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 sm,
|
||||
u32 warp_esr,
|
||||
struct channel_gk20a *fault_ch)
|
||||
static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error)
|
||||
{
|
||||
u32 index = 0U;
|
||||
u32 esr_err = gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f();
|
||||
|
||||
struct warp_esr_error_table_s {
|
||||
u32 error_value;
|
||||
const char *error_name;
|
||||
};
|
||||
|
||||
struct warp_esr_error_table_s warp_esr_error_table[] = {
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_error_f(),
|
||||
"STACK ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_api_stack_error_f(),
|
||||
"API STACK ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_wrap_f(),
|
||||
"PC WRAP ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_pc_f(),
|
||||
"MISALIGNED PC ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_overflow_f(),
|
||||
"PC OVERFLOW ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_reg_f(),
|
||||
"MISALIGNED REG ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_encoding_f(),
|
||||
"ILLEGAL INSTRUCTION ENCODING ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_param_f(),
|
||||
"ILLEGAL INSTRUCTION PARAM ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_reg_f(),
|
||||
"OOR REG ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_addr_f(),
|
||||
"OOR ADDR ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f(),
|
||||
"MISALIGNED ADDR ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_addr_space_f(),
|
||||
"INVALID ADDR SPACE ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_const_addr_ldc_f(),
|
||||
"INVALID ADDR LDC ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_overflow_f(),
|
||||
"STACK OVERFLOW ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_fault_f(),
|
||||
"MMU FAULT ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_format_f(),
|
||||
"TEX FORMAT ERROR"},
|
||||
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_layout_f(),
|
||||
"TEX LAYOUT ERROR"},
|
||||
};
|
||||
|
||||
for (index = 0; index < ARRAY_SIZE(warp_esr_error_table); index++) {
|
||||
if (warp_esr_error_table[index].error_value == warp_esr_error) {
|
||||
esr_err = warp_esr_error_table[index].error_value;
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
||||
"ESR %s(0x%x)",
|
||||
warp_esr_error_table[index].error_name,
|
||||
esr_err);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (esr_err == 0U) ? false : true;
|
||||
}
|
||||
static int gr_gv11b_handle_all_warp_esr_errors(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 sm,
|
||||
u32 warp_esr_error,
|
||||
struct channel_gk20a *fault_ch)
|
||||
{
|
||||
struct tsg_gk20a *tsg;
|
||||
u32 offset;
|
||||
struct channel_gk20a *ch_tsg;
|
||||
u32 offset = 0U;
|
||||
bool is_esr_error = false;
|
||||
|
||||
/*
|
||||
* Check for an esr error
|
||||
*/
|
||||
is_esr_error = gr_gv11b_check_warp_esr_error(g, warp_esr_error);
|
||||
if (!is_esr_error) {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
||||
"No ESR error, Skip RC recovery and Trigeer CILP");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (fault_ch) {
|
||||
tsg = &g->fifo.tsg[fault_ch->tsgid];
|
||||
@@ -2170,8 +2245,10 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
|
||||
u32 offset = gk20a_gr_gpc_offset(g, gpc) +
|
||||
gk20a_gr_tpc_offset(g, tpc) +
|
||||
gv11b_gr_sm_offset(g, sm);
|
||||
u32 warp_esr_error = gr_gpc0_tpc0_sm0_hww_warp_esr_error_v(warp_esr);
|
||||
struct tsg_gk20a *tsg;
|
||||
|
||||
|
||||
*early_exit = false;
|
||||
*ignore_debugger = false;
|
||||
|
||||
@@ -2179,13 +2256,19 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
|
||||
* We don't need to trigger CILP in case of MMU_NACK
|
||||
* So just handle MMU_NACK and return
|
||||
*/
|
||||
if (warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f())
|
||||
if (warp_esr_error == gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f())
|
||||
return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm,
|
||||
warp_esr, fault_ch);
|
||||
warp_esr_error, fault_ch);
|
||||
|
||||
if (warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f())
|
||||
return gr_gv11b_handle_warp_esr_error_misaligned_addr(g, gpc, tpc, sm,
|
||||
warp_esr, fault_ch);
|
||||
/*
|
||||
* Proceed to trigger CILP preemption if the return value
|
||||
* from this function is zero, else proceed to recovery
|
||||
*/
|
||||
ret = gr_gv11b_handle_all_warp_esr_errors(g, gpc, tpc, sm,
|
||||
warp_esr_error, fault_ch);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (fault_ch) {
|
||||
tsg = tsg_gk20a_from_ch(fault_ch);
|
||||
|
||||
@@ -3632,18 +3632,82 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_v(void)
|
||||
{
|
||||
return 0x00000000U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(void)
|
||||
{
|
||||
return 0x0U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_error_f(void)
|
||||
{
|
||||
return 0x1U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_api_stack_error_f(void)
|
||||
{
|
||||
return 0x2U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_wrap_f(void)
|
||||
{
|
||||
return 0x4U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_pc_f(void)
|
||||
{
|
||||
return 0x5U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_overflow_f(void)
|
||||
{
|
||||
return 0x6U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_reg_f(void)
|
||||
{
|
||||
return 0x8U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_encoding_f(void)
|
||||
{
|
||||
return 0x9U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_param_f(void)
|
||||
{
|
||||
return 0xbU;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_reg_f(void)
|
||||
{
|
||||
return 0xdU;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_addr_f(void)
|
||||
{
|
||||
return 0xeU;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f(void)
|
||||
{
|
||||
return 0xfU;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_addr_space_f(void)
|
||||
{
|
||||
return 0x10U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_const_addr_ldc_f(void)
|
||||
{
|
||||
return 0x12U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_overflow_f(void)
|
||||
{
|
||||
return 0x16U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_fault_f(void)
|
||||
{
|
||||
return 0x17U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_format_f(void)
|
||||
{
|
||||
return 0x18U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_layout_f(void)
|
||||
{
|
||||
return 0x19U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f(void)
|
||||
{
|
||||
return 0x20U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(void)
|
||||
{
|
||||
return 0x0U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_wrap_id_m(void)
|
||||
{
|
||||
return 0xffU << 16U;
|
||||
@@ -3672,6 +3736,10 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r(void)
|
||||
{
|
||||
return 0x00504738U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_pc_hi_r(void)
|
||||
{
|
||||
return 0x0050473cU;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
|
||||
{
|
||||
return 0x005043a0U;
|
||||
|
||||
@@ -4392,10 +4392,74 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(void)
|
||||
{
|
||||
return 0x0U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_error_f(void)
|
||||
{
|
||||
return 0x1U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_api_stack_error_f(void)
|
||||
{
|
||||
return 0x2U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_wrap_f(void)
|
||||
{
|
||||
return 0x4U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_pc_f(void)
|
||||
{
|
||||
return 0x5U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_overflow_f(void)
|
||||
{
|
||||
return 0x6U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_reg_f(void)
|
||||
{
|
||||
return 0x8U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_encoding_f(void)
|
||||
{
|
||||
return 0x9U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_param_f(void)
|
||||
{
|
||||
return 0xbU;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_reg_f(void)
|
||||
{
|
||||
return 0xdU;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_addr_f(void)
|
||||
{
|
||||
return 0xeU;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f(void)
|
||||
{
|
||||
return 0xfU;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_addr_space_f(void)
|
||||
{
|
||||
return 0x10U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_const_addr_ldc_f(void)
|
||||
{
|
||||
return 0x12U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_overflow_f(void)
|
||||
{
|
||||
return 0x16U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_fault_f(void)
|
||||
{
|
||||
return 0x17U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_format_f(void)
|
||||
{
|
||||
return 0x18U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_layout_f(void)
|
||||
{
|
||||
return 0x19U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f(void)
|
||||
{
|
||||
return 0x20U;
|
||||
@@ -4428,6 +4492,10 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r(void)
|
||||
{
|
||||
return 0x00504738U;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_pc_hi_r(void)
|
||||
{
|
||||
return 0x0050473cU;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
|
||||
{
|
||||
return 0x005043a0U;
|
||||
|
||||
Reference in New Issue
Block a user