mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-25 02:52:51 +03:00
gpu: nvgpu: move handle_sm_exception to gr.intr
Move gr_gp10b_handle_sm_exception from gr_gp10b to gp10b_gr_intr_handle_sm_exception in hal.gr.intr unit Move gr_gk20a_handle_sm_exception from gr_gk20a to nvgpu_gr_intr_handle_sm_exception in common.gr.intr Move nvgpu_report_gr_sm_exception to common.gr.intr JIRA NVGPU-3016 Change-Id: I545ddca052122f87685f35f515831841a246dab3 Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2103736 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
3bd35af767
commit
490ea365d2
@@ -26,6 +26,7 @@
|
||||
#include <nvgpu/regops.h>
|
||||
#include <nvgpu/rc.h>
|
||||
#include <nvgpu/error_notifier.h>
|
||||
#include <nvgpu/power_features/pg.h>
|
||||
|
||||
#include <nvgpu/gr/gr.h>
|
||||
#include <nvgpu/gr/gr_intr.h>
|
||||
@@ -71,7 +72,7 @@ static int gr_intr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
"GPC%d TPC%d: SM%d exception pending",
|
||||
gpc, tpc, sm);
|
||||
|
||||
tmp_ret = g->ops.gr.handle_sm_exception(g,
|
||||
tmp_ret = g->ops.gr.intr.handle_sm_exception(g,
|
||||
gpc, tpc, sm, post_event, fault_ch,
|
||||
hww_global_esr);
|
||||
ret = (ret != 0) ? ret : tmp_ret;
|
||||
@@ -153,6 +154,48 @@ static int gr_intr_handle_class_error(struct gk20a *g,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void gr_intr_report_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
u32 sm, u32 hww_warp_esr_status, u64 hww_warp_esr_pc)
|
||||
{
|
||||
int ret;
|
||||
struct gr_sm_mcerr_info err_info;
|
||||
struct channel_gk20a *ch;
|
||||
struct gr_err_info info;
|
||||
u32 tsgid, chid, curr_ctx, inst = 0;
|
||||
|
||||
if (g->ops.gr.err_ops.report_gr_err == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
tsgid = NVGPU_INVALID_TSG_ID;
|
||||
curr_ctx = g->ops.gr.falcon.get_current_ctx(g);
|
||||
ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_ctx, &tsgid);
|
||||
chid = ch != NULL ? ch->chid : FIFO_INVAL_CHANNEL_ID;
|
||||
if (ch != NULL) {
|
||||
gk20a_channel_put(ch);
|
||||
}
|
||||
|
||||
(void) memset(&err_info, 0, sizeof(err_info));
|
||||
(void) memset(&info, 0, sizeof(info));
|
||||
err_info.curr_ctx = curr_ctx;
|
||||
err_info.chid = chid;
|
||||
err_info.tsgid = tsgid;
|
||||
err_info.hww_warp_esr_pc = hww_warp_esr_pc;
|
||||
err_info.hww_warp_esr_status = hww_warp_esr_status;
|
||||
err_info.gpc = gpc;
|
||||
err_info.tpc = tpc;
|
||||
err_info.sm = sm;
|
||||
info.sm_mcerr_info = &err_info;
|
||||
ret = g->ops.gr.err_ops.report_gr_err(g,
|
||||
NVGPU_ERR_MODULE_SM, inst, GPU_SM_MACHINE_CHECK_ERROR,
|
||||
&info);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "failed to report SM_EXCEPTION "
|
||||
"gpc=%u, tpc=%u, sm=%u, esr_status=%x",
|
||||
gpc, tpc, sm, hww_warp_esr_status);
|
||||
}
|
||||
}
|
||||
|
||||
/* Used by sw interrupt thread to translate current ctx to chid.
|
||||
* Also used by regops to translate current ctx to chid and tsgid.
|
||||
* For performance, we don't want to go through 128 channels every time.
|
||||
@@ -295,6 +338,112 @@ void nvgpu_gr_intr_set_error_notifier(struct gk20a *g,
|
||||
}
|
||||
}
|
||||
|
||||
int nvgpu_gr_intr_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
||||
bool *post_event, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr)
|
||||
{
|
||||
int ret = 0;
|
||||
bool do_warp_sync = false, early_exit = false, ignore_debugger = false;
|
||||
bool disable_sm_exceptions = true;
|
||||
u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);
|
||||
bool sm_debugger_attached;
|
||||
u32 global_esr, warp_esr, global_mask;
|
||||
u64 hww_warp_esr_pc = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
|
||||
|
||||
sm_debugger_attached = g->ops.gr.sm_debugger_attached(g);
|
||||
|
||||
global_esr = g->ops.gr.get_sm_hww_global_esr(g, gpc, tpc, sm);
|
||||
*hww_global_esr = global_esr;
|
||||
warp_esr = g->ops.gr.get_sm_hww_warp_esr(g, gpc, tpc, sm);
|
||||
global_mask = g->ops.gr.get_sm_no_lock_down_hww_global_esr_mask(g);
|
||||
|
||||
if (!sm_debugger_attached) {
|
||||
nvgpu_err(g, "sm hww global 0x%08x warp 0x%08x",
|
||||
global_esr, warp_esr);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"sm hww global 0x%08x warp 0x%08x", global_esr, warp_esr);
|
||||
|
||||
/*
|
||||
* Check and report any fatal wrap errors.
|
||||
*/
|
||||
if ((global_esr & ~global_mask) != 0U) {
|
||||
if (g->ops.gr.get_sm_hww_warp_esr_pc != NULL) {
|
||||
hww_warp_esr_pc = g->ops.gr.get_sm_hww_warp_esr_pc(g,
|
||||
offset);
|
||||
}
|
||||
gr_intr_report_sm_exception(g, gpc, tpc, sm, warp_esr,
|
||||
hww_warp_esr_pc);
|
||||
}
|
||||
nvgpu_pg_elpg_protected_call(g,
|
||||
g->ops.gr.record_sm_error_state(g, gpc, tpc, sm, fault_ch));
|
||||
|
||||
if (g->ops.gr.pre_process_sm_exception != NULL) {
|
||||
ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc, sm,
|
||||
global_esr, warp_esr,
|
||||
sm_debugger_attached,
|
||||
fault_ch,
|
||||
&early_exit,
|
||||
&ignore_debugger);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "could not pre-process sm error!");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (early_exit) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"returning early");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable forwarding of tpc exceptions,
|
||||
* the debugger will reenable exceptions after servicing them.
|
||||
*
|
||||
* Do not disable exceptions if the only SM exception is BPT_INT
|
||||
*/
|
||||
if ((g->ops.gr.esr_bpt_pending_events(global_esr,
|
||||
NVGPU_EVENT_ID_BPT_INT)) && (warp_esr == 0U)) {
|
||||
disable_sm_exceptions = false;
|
||||
}
|
||||
|
||||
if (!ignore_debugger && disable_sm_exceptions) {
|
||||
g->ops.gr.intr.tpc_exception_sm_disable(g, offset);
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"SM Exceptions disabled");
|
||||
}
|
||||
|
||||
/* if a debugger is present and an error has occurred, do a warp sync */
|
||||
if (!ignore_debugger &&
|
||||
((warp_esr != 0U) || ((global_esr & ~global_mask) != 0U))) {
|
||||
nvgpu_log(g, gpu_dbg_intr, "warp sync needed");
|
||||
do_warp_sync = true;
|
||||
}
|
||||
|
||||
if (do_warp_sync) {
|
||||
ret = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
|
||||
global_mask, true);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "sm did not lock down!");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (ignore_debugger) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"ignore_debugger set, skipping event posting");
|
||||
} else {
|
||||
*post_event = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvgpu_gr_intr_handle_gpc_exception(struct gk20a *g, bool *post_event,
|
||||
struct nvgpu_gr_config *gr_config, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr)
|
||||
|
||||
@@ -139,7 +139,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
|
||||
.bpt_reg_info = NULL,
|
||||
.handle_fecs_error = NULL,
|
||||
.handle_sm_exception = NULL,
|
||||
.get_lrf_tex_ltc_dram_override = NULL,
|
||||
.update_smpc_ctxsw_mode = vgpu_gr_update_smpc_ctxsw_mode,
|
||||
.update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode,
|
||||
|
||||
@@ -165,7 +165,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
|
||||
.bpt_reg_info = NULL,
|
||||
.handle_fecs_error = NULL,
|
||||
.handle_sm_exception = NULL,
|
||||
.get_lrf_tex_ltc_dram_override = NULL,
|
||||
.update_smpc_ctxsw_mode = vgpu_gr_update_smpc_ctxsw_mode,
|
||||
.update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode,
|
||||
|
||||
@@ -59,7 +59,6 @@
|
||||
#include <nvgpu/engine_status.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/power_features/cg.h>
|
||||
#include <nvgpu/power_features/pg.h>
|
||||
#include <nvgpu/preempt.h>
|
||||
|
||||
#include "gr_gk20a.h"
|
||||
@@ -67,51 +66,8 @@
|
||||
|
||||
#include "common/gr/gr_priv.h"
|
||||
|
||||
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
|
||||
|
||||
static void nvgpu_report_gr_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
u32 sm, u32 hww_warp_esr_status, u64 hww_warp_esr_pc)
|
||||
{
|
||||
int ret;
|
||||
struct gr_sm_mcerr_info err_info;
|
||||
struct channel_gk20a *ch;
|
||||
struct gr_err_info info;
|
||||
u32 tsgid, chid, curr_ctx, inst = 0;
|
||||
|
||||
if (g->ops.gr.err_ops.report_gr_err == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
tsgid = NVGPU_INVALID_TSG_ID;
|
||||
curr_ctx = g->ops.gr.falcon.get_current_ctx(g);
|
||||
ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_ctx, &tsgid);
|
||||
chid = ch != NULL ? ch->chid : FIFO_INVAL_CHANNEL_ID;
|
||||
if (ch != NULL) {
|
||||
gk20a_channel_put(ch);
|
||||
}
|
||||
|
||||
(void) memset(&err_info, 0, sizeof(err_info));
|
||||
(void) memset(&info, 0, sizeof(info));
|
||||
err_info.curr_ctx = curr_ctx;
|
||||
err_info.chid = chid;
|
||||
err_info.tsgid = tsgid;
|
||||
err_info.hww_warp_esr_pc = hww_warp_esr_pc;
|
||||
err_info.hww_warp_esr_status = hww_warp_esr_status;
|
||||
err_info.gpc = gpc;
|
||||
err_info.tpc = tpc;
|
||||
err_info.sm = sm;
|
||||
info.sm_mcerr_info = &err_info;
|
||||
ret = g->ops.gr.err_ops.report_gr_err(g,
|
||||
NVGPU_ERR_MODULE_SM, inst, GPU_SM_MACHINE_CHECK_ERROR,
|
||||
&info);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "failed to report SM_EXCEPTION "
|
||||
"gpc=%u, tpc=%u, sm=%u, esr_status=%x",
|
||||
gpc, tpc, sm, hww_warp_esr_status);
|
||||
}
|
||||
}
|
||||
|
||||
static void gr_report_ctxsw_error(struct gk20a *g, u32 err_type, u32 chid,
|
||||
u32 mailbox_value)
|
||||
{
|
||||
@@ -373,112 +329,6 @@ bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
|
||||
return false;
|
||||
}
|
||||
|
||||
int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
||||
bool *post_event, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr)
|
||||
{
|
||||
int ret = 0;
|
||||
bool do_warp_sync = false, early_exit = false, ignore_debugger = false;
|
||||
bool disable_sm_exceptions = true;
|
||||
u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);
|
||||
bool sm_debugger_attached;
|
||||
u32 global_esr, warp_esr, global_mask;
|
||||
u64 hww_warp_esr_pc = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
|
||||
|
||||
sm_debugger_attached = g->ops.gr.sm_debugger_attached(g);
|
||||
|
||||
global_esr = g->ops.gr.get_sm_hww_global_esr(g, gpc, tpc, sm);
|
||||
*hww_global_esr = global_esr;
|
||||
warp_esr = g->ops.gr.get_sm_hww_warp_esr(g, gpc, tpc, sm);
|
||||
global_mask = g->ops.gr.get_sm_no_lock_down_hww_global_esr_mask(g);
|
||||
|
||||
if (!sm_debugger_attached) {
|
||||
nvgpu_err(g, "sm hww global 0x%08x warp 0x%08x",
|
||||
global_esr, warp_esr);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"sm hww global 0x%08x warp 0x%08x", global_esr, warp_esr);
|
||||
|
||||
/*
|
||||
* Check and report any fatal wrap errors.
|
||||
*/
|
||||
if ((global_esr & ~global_mask) != 0U) {
|
||||
if (g->ops.gr.get_sm_hww_warp_esr_pc != NULL) {
|
||||
hww_warp_esr_pc = g->ops.gr.get_sm_hww_warp_esr_pc(g,
|
||||
offset);
|
||||
}
|
||||
nvgpu_report_gr_sm_exception(g, gpc, tpc, sm, warp_esr,
|
||||
hww_warp_esr_pc);
|
||||
}
|
||||
nvgpu_pg_elpg_protected_call(g,
|
||||
g->ops.gr.record_sm_error_state(g, gpc, tpc, sm, fault_ch));
|
||||
|
||||
if (g->ops.gr.pre_process_sm_exception != NULL) {
|
||||
ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc, sm,
|
||||
global_esr, warp_esr,
|
||||
sm_debugger_attached,
|
||||
fault_ch,
|
||||
&early_exit,
|
||||
&ignore_debugger);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "could not pre-process sm error!");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (early_exit) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"returning early");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable forwarding of tpc exceptions,
|
||||
* the debugger will reenable exceptions after servicing them.
|
||||
*
|
||||
* Do not disable exceptions if the only SM exception is BPT_INT
|
||||
*/
|
||||
if ((g->ops.gr.esr_bpt_pending_events(global_esr,
|
||||
NVGPU_EVENT_ID_BPT_INT)) && (warp_esr == 0U)) {
|
||||
disable_sm_exceptions = false;
|
||||
}
|
||||
|
||||
if (!ignore_debugger && disable_sm_exceptions) {
|
||||
g->ops.gr.intr.tpc_exception_sm_disable(g, offset);
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"SM Exceptions disabled");
|
||||
}
|
||||
|
||||
/* if a debugger is present and an error has occurred, do a warp sync */
|
||||
if (!ignore_debugger &&
|
||||
((warp_esr != 0U) || ((global_esr & ~global_mask) != 0U))) {
|
||||
nvgpu_log(g, gpu_dbg_intr, "warp sync needed");
|
||||
do_warp_sync = true;
|
||||
}
|
||||
|
||||
if (do_warp_sync) {
|
||||
ret = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
|
||||
global_mask, true);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "sm did not lock down!");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (ignore_debugger) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"ignore_debugger set, skipping event posting");
|
||||
} else {
|
||||
*post_event = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
u32 *esr_sm_sel)
|
||||
{
|
||||
|
||||
@@ -96,9 +96,6 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
|
||||
int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
|
||||
struct channel_gk20a *ch, u64 sms, bool enable);
|
||||
bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
|
||||
int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
||||
bool *post_event, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr);
|
||||
|
||||
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||
int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */
|
||||
|
||||
@@ -57,164 +57,6 @@
|
||||
#include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
|
||||
#include <nvgpu/hw/gp10b/hw_fifo_gp10b.h>
|
||||
|
||||
static void gr_gp10b_sm_lrf_ecc_overcount_war(bool single_err,
|
||||
u32 sed_status,
|
||||
u32 ded_status,
|
||||
u32 *count_to_adjust,
|
||||
u32 opposite_count)
|
||||
{
|
||||
u32 over_count = 0;
|
||||
|
||||
sed_status >>= gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_b();
|
||||
ded_status >>= gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_b();
|
||||
|
||||
/* One overcount for each partition on which a SBE occurred but not a
|
||||
DBE (or vice-versa) */
|
||||
if (single_err) {
|
||||
over_count = (u32)hweight32(sed_status & ~ded_status);
|
||||
} else {
|
||||
over_count = (u32)hweight32(ded_status & ~sed_status);
|
||||
}
|
||||
|
||||
/* If both a SBE and a DBE occur on the same partition, then we have an
|
||||
overcount for the subpartition if the opposite error counts are
|
||||
zero. */
|
||||
if (((sed_status & ded_status) != 0U) && (opposite_count == 0U)) {
|
||||
over_count += (u32)hweight32(sed_status & ded_status);
|
||||
}
|
||||
|
||||
if (*count_to_adjust > over_count) {
|
||||
*count_to_adjust -= over_count;
|
||||
} else {
|
||||
*count_to_adjust = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int gr_gp10b_handle_sm_exception(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 sm,
|
||||
bool *post_event, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr)
|
||||
{
|
||||
int ret = 0;
|
||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
|
||||
u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
|
||||
u32 lrf_ecc_status, lrf_ecc_sed_status, lrf_ecc_ded_status;
|
||||
u32 lrf_single_count_delta, lrf_double_count_delta;
|
||||
u32 shm_ecc_status;
|
||||
|
||||
ret = gr_gk20a_handle_sm_exception(g,
|
||||
gpc, tpc, sm, post_event, fault_ch, hww_global_esr);
|
||||
|
||||
/* Check for LRF ECC errors. */
|
||||
lrf_ecc_status = gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset);
|
||||
lrf_ecc_sed_status = lrf_ecc_status &
|
||||
(gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f() |
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f() |
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f() |
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f());
|
||||
lrf_ecc_ded_status = lrf_ecc_status &
|
||||
(gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f() |
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f() |
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f() |
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f());
|
||||
lrf_single_count_delta =
|
||||
gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() +
|
||||
offset);
|
||||
lrf_double_count_delta =
|
||||
gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() +
|
||||
offset);
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset,
|
||||
0);
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset,
|
||||
0);
|
||||
if (lrf_ecc_sed_status != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
|
||||
"Single bit error detected in SM LRF!");
|
||||
|
||||
gr_gp10b_sm_lrf_ecc_overcount_war(true,
|
||||
lrf_ecc_sed_status,
|
||||
lrf_ecc_ded_status,
|
||||
&lrf_single_count_delta,
|
||||
lrf_double_count_delta);
|
||||
g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter +=
|
||||
lrf_single_count_delta;
|
||||
}
|
||||
if (lrf_ecc_ded_status != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
|
||||
"Double bit error detected in SM LRF!");
|
||||
|
||||
gr_gp10b_sm_lrf_ecc_overcount_war(false,
|
||||
lrf_ecc_sed_status,
|
||||
lrf_ecc_ded_status,
|
||||
&lrf_double_count_delta,
|
||||
lrf_single_count_delta);
|
||||
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
|
||||
lrf_double_count_delta;
|
||||
}
|
||||
gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
|
||||
lrf_ecc_status);
|
||||
|
||||
/* Check for SHM ECC errors. */
|
||||
shm_ecc_status = gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset);
|
||||
if ((shm_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f()) != 0U ||
|
||||
(shm_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f()) != 0U ||
|
||||
(shm_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) != 0U ||
|
||||
(shm_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) != 0U ) {
|
||||
u32 ecc_stats_reg_val;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
|
||||
"Single bit error detected in SM SHM!");
|
||||
|
||||
ecc_stats_reg_val =
|
||||
gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
|
||||
g->ecc.gr.sm_shm_ecc_sec_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val);
|
||||
g->ecc.gr.sm_shm_ecc_sed_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() |
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m());
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
|
||||
ecc_stats_reg_val);
|
||||
}
|
||||
if ((shm_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) != 0U ||
|
||||
(shm_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) != 0U) {
|
||||
u32 ecc_stats_reg_val;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
|
||||
"Double bit error detected in SM SHM!");
|
||||
|
||||
ecc_stats_reg_val =
|
||||
gk20a_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
|
||||
g->ecc.gr.sm_shm_ecc_ded_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m());
|
||||
gk20a_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
|
||||
ecc_stats_reg_val);
|
||||
}
|
||||
gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset,
|
||||
shm_ecc_status);
|
||||
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
@@ -43,11 +43,6 @@ int gr_gp10b_handle_fecs_error(struct gk20a *g,
|
||||
struct nvgpu_gr_isr_data *isr_data);
|
||||
int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g,
|
||||
struct channel_gk20a *fault_ch);
|
||||
|
||||
int gr_gp10b_handle_sm_exception(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 sm,
|
||||
bool *post_event, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr);
|
||||
int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx, bool patch);
|
||||
void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data);
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
#include <nvgpu/class.h>
|
||||
|
||||
#include <nvgpu/gr/config.h>
|
||||
#include <nvgpu/gr/gr.h>
|
||||
#include <nvgpu/gr/gr_intr.h>
|
||||
|
||||
#include "gr_intr_gp10b.h"
|
||||
|
||||
@@ -103,12 +105,165 @@ fail:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void gr_gp10b_sm_lrf_ecc_overcount_war(bool single_err,
|
||||
u32 sed_status,
|
||||
u32 ded_status,
|
||||
u32 *count_to_adjust,
|
||||
u32 opposite_count)
|
||||
{
|
||||
u32 over_count = 0;
|
||||
|
||||
sed_status >>= gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_b();
|
||||
ded_status >>= gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_b();
|
||||
|
||||
/* One overcount for each partition on which a SBE occurred but not a
|
||||
DBE (or vice-versa) */
|
||||
if (single_err) {
|
||||
over_count = (u32)hweight32(sed_status & ~ded_status);
|
||||
} else {
|
||||
over_count = (u32)hweight32(ded_status & ~sed_status);
|
||||
}
|
||||
|
||||
/* If both a SBE and a DBE occur on the same partition, then we have an
|
||||
overcount for the subpartition if the opposite error counts are
|
||||
zero. */
|
||||
if (((sed_status & ded_status) != 0U) && (opposite_count == 0U)) {
|
||||
over_count += (u32)hweight32(sed_status & ded_status);
|
||||
}
|
||||
|
||||
if (*count_to_adjust > over_count) {
|
||||
*count_to_adjust -= over_count;
|
||||
} else {
|
||||
*count_to_adjust = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int gp10b_gr_intr_handle_sm_exception(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 sm,
|
||||
bool *post_event, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr)
|
||||
{
|
||||
int ret = 0;
|
||||
u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);
|
||||
u32 lrf_ecc_status, lrf_ecc_sed_status, lrf_ecc_ded_status;
|
||||
u32 lrf_single_count_delta, lrf_double_count_delta;
|
||||
u32 shm_ecc_status;
|
||||
|
||||
ret = nvgpu_gr_intr_handle_sm_exception(g,
|
||||
gpc, tpc, sm, post_event, fault_ch, hww_global_esr);
|
||||
|
||||
/* Check for LRF ECC errors. */
|
||||
lrf_ecc_status = nvgpu_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset);
|
||||
lrf_ecc_sed_status =
|
||||
lrf_ecc_status &
|
||||
(gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f() |
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f() |
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f() |
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f());
|
||||
lrf_ecc_ded_status =
|
||||
lrf_ecc_status &
|
||||
(gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f() |
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f() |
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f() |
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f());
|
||||
lrf_single_count_delta =
|
||||
nvgpu_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() +
|
||||
offset);
|
||||
lrf_double_count_delta =
|
||||
nvgpu_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() +
|
||||
offset);
|
||||
nvgpu_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset, 0);
|
||||
nvgpu_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset, 0);
|
||||
if (lrf_ecc_sed_status != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
|
||||
"Single bit error detected in SM LRF!");
|
||||
|
||||
gr_gp10b_sm_lrf_ecc_overcount_war(true,
|
||||
lrf_ecc_sed_status,
|
||||
lrf_ecc_ded_status,
|
||||
&lrf_single_count_delta,
|
||||
lrf_double_count_delta);
|
||||
g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter +=
|
||||
lrf_single_count_delta;
|
||||
}
|
||||
if (lrf_ecc_ded_status != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
|
||||
"Double bit error detected in SM LRF!");
|
||||
|
||||
gr_gp10b_sm_lrf_ecc_overcount_war(false,
|
||||
lrf_ecc_sed_status,
|
||||
lrf_ecc_ded_status,
|
||||
&lrf_double_count_delta,
|
||||
lrf_single_count_delta);
|
||||
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
|
||||
lrf_double_count_delta;
|
||||
}
|
||||
nvgpu_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
|
||||
lrf_ecc_status);
|
||||
|
||||
/* Check for SHM ECC errors. */
|
||||
shm_ecc_status = nvgpu_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset);
|
||||
if ((shm_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f()) != 0U ||
|
||||
(shm_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f()) != 0U ||
|
||||
(shm_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) != 0U ||
|
||||
(shm_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) != 0U ) {
|
||||
u32 ecc_stats_reg_val;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
|
||||
"Single bit error detected in SM SHM!");
|
||||
|
||||
ecc_stats_reg_val =
|
||||
nvgpu_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
|
||||
g->ecc.gr.sm_shm_ecc_sec_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val);
|
||||
g->ecc.gr.sm_shm_ecc_sed_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() |
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m());
|
||||
nvgpu_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
|
||||
ecc_stats_reg_val);
|
||||
}
|
||||
if ((shm_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) != 0U ||
|
||||
(shm_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) != 0U) {
|
||||
u32 ecc_stats_reg_val;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
|
||||
"Double bit error detected in SM SHM!");
|
||||
|
||||
ecc_stats_reg_val =
|
||||
nvgpu_readl(g,
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
|
||||
g->ecc.gr.sm_shm_ecc_ded_count[gpc][tpc].counter +=
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val);
|
||||
ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m());
|
||||
nvgpu_writel(g,
|
||||
gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
|
||||
ecc_stats_reg_val);
|
||||
}
|
||||
nvgpu_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset,
|
||||
shm_ecc_status);
|
||||
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void gp10b_gr_intr_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc)
|
||||
{
|
||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_TPC_IN_GPC_STRIDE);
|
||||
u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
|
||||
u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc);
|
||||
u32 esr;
|
||||
u32 ecc_stats_reg_val;
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
struct gk20a;
|
||||
struct channel_gk20a;
|
||||
|
||||
#define NVC097_SET_GO_IDLE_TIMEOUT 0x022cU
|
||||
#define NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dcU
|
||||
@@ -43,4 +44,8 @@ void gp10b_gr_intr_set_go_idle_timeout(struct gk20a *g, u32 data);
|
||||
void gp10b_gr_intr_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc);
|
||||
int gp10b_gr_intr_handle_sw_method(struct gk20a *g, u32 addr,
|
||||
u32 class_num, u32 offset, u32 data);
|
||||
int gp10b_gr_intr_handle_sm_exception(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 sm,
|
||||
bool *post_event, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr);
|
||||
#endif /* NVGPU_GR_INTR_GP10B_H */
|
||||
|
||||
@@ -265,7 +265,6 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
|
||||
.bpt_reg_info = gr_gm20b_bpt_reg_info,
|
||||
.handle_fecs_error = gk20a_gr_handle_fecs_error,
|
||||
.handle_sm_exception = gr_gk20a_handle_sm_exception,
|
||||
.get_lrf_tex_ltc_dram_override = NULL,
|
||||
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
|
||||
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
|
||||
@@ -532,6 +531,8 @@ static const struct gpu_ops gm20b_ops = {
|
||||
gm20ab_gr_intr_tpc_exception_sm_enable,
|
||||
.tpc_exception_sm_disable =
|
||||
gm20ab_gr_intr_tpc_exception_sm_disable,
|
||||
.handle_sm_exception =
|
||||
nvgpu_gr_intr_handle_sm_exception,
|
||||
.stall_isr = nvgpu_gr_intr_stall_isr,
|
||||
},
|
||||
.falcon = {
|
||||
|
||||
@@ -296,7 +296,6 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
|
||||
.bpt_reg_info = gr_gm20b_bpt_reg_info,
|
||||
.handle_fecs_error = gr_gp10b_handle_fecs_error,
|
||||
.handle_sm_exception = gr_gp10b_handle_sm_exception,
|
||||
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
|
||||
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
|
||||
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
|
||||
@@ -597,6 +596,8 @@ static const struct gpu_ops gp10b_ops = {
|
||||
gm20ab_gr_intr_tpc_exception_sm_enable,
|
||||
.tpc_exception_sm_disable =
|
||||
gm20ab_gr_intr_tpc_exception_sm_disable,
|
||||
.handle_sm_exception =
|
||||
gp10b_gr_intr_handle_sm_exception,
|
||||
.stall_isr = nvgpu_gr_intr_stall_isr,
|
||||
},
|
||||
.falcon = {
|
||||
|
||||
@@ -398,7 +398,6 @@ static const struct gpu_ops gv100_ops = {
|
||||
.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode,
|
||||
.bpt_reg_info = gv11b_gr_bpt_reg_info,
|
||||
.handle_fecs_error = gr_gv11b_handle_fecs_error,
|
||||
.handle_sm_exception = gr_gk20a_handle_sm_exception,
|
||||
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
|
||||
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
|
||||
.get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon,
|
||||
@@ -739,6 +738,8 @@ static const struct gpu_ops gv100_ops = {
|
||||
gm20ab_gr_intr_tpc_exception_sm_enable,
|
||||
.tpc_exception_sm_disable =
|
||||
gm20ab_gr_intr_tpc_exception_sm_disable,
|
||||
.handle_sm_exception =
|
||||
nvgpu_gr_intr_handle_sm_exception,
|
||||
.stall_isr = nvgpu_gr_intr_stall_isr,
|
||||
},
|
||||
.falcon = {
|
||||
|
||||
@@ -369,7 +369,6 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode,
|
||||
.bpt_reg_info = gv11b_gr_bpt_reg_info,
|
||||
.handle_fecs_error = gr_gv11b_handle_fecs_error,
|
||||
.handle_sm_exception = gr_gk20a_handle_sm_exception,
|
||||
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
|
||||
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
|
||||
.get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon,
|
||||
@@ -716,6 +715,8 @@ static const struct gpu_ops gv11b_ops = {
|
||||
gm20ab_gr_intr_tpc_exception_sm_enable,
|
||||
.tpc_exception_sm_disable =
|
||||
gm20ab_gr_intr_tpc_exception_sm_disable,
|
||||
.handle_sm_exception =
|
||||
nvgpu_gr_intr_handle_sm_exception,
|
||||
.stall_isr = nvgpu_gr_intr_stall_isr,
|
||||
},
|
||||
.falcon = {
|
||||
|
||||
@@ -418,7 +418,6 @@ static const struct gpu_ops tu104_ops = {
|
||||
.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode,
|
||||
.bpt_reg_info = gv11b_gr_bpt_reg_info,
|
||||
.handle_fecs_error = gr_gv11b_handle_fecs_error,
|
||||
.handle_sm_exception = gr_gk20a_handle_sm_exception,
|
||||
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
|
||||
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
|
||||
.get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon,
|
||||
@@ -769,6 +768,8 @@ static const struct gpu_ops tu104_ops = {
|
||||
gm20ab_gr_intr_tpc_exception_sm_enable,
|
||||
.tpc_exception_sm_disable =
|
||||
gm20ab_gr_intr_tpc_exception_sm_disable,
|
||||
.handle_sm_exception =
|
||||
nvgpu_gr_intr_handle_sm_exception,
|
||||
.stall_isr = nvgpu_gr_intr_stall_isr,
|
||||
},
|
||||
.falcon = {
|
||||
|
||||
@@ -365,10 +365,6 @@ struct gpu_ops {
|
||||
u32 gpc, u32 tpc,
|
||||
bool *post_event, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr);
|
||||
int (*handle_sm_exception)(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 sm,
|
||||
bool *post_event, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr);
|
||||
u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g);
|
||||
int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
u32 sm, struct channel_gk20a *fault_ch);
|
||||
@@ -845,6 +841,10 @@ struct gpu_ops {
|
||||
void (*tpc_exception_sm_disable)(struct gk20a *g,
|
||||
u32 offset);
|
||||
void (*tpc_exception_sm_enable)(struct gk20a *g);
|
||||
int (*handle_sm_exception)(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 sm,
|
||||
bool *post_event, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr);
|
||||
int (*stall_isr)(struct gk20a *g);
|
||||
} intr;
|
||||
|
||||
|
||||
@@ -69,5 +69,8 @@ struct channel_gk20a *nvgpu_gr_intr_get_channel_from_ctx(struct gk20a *g,
|
||||
u32 curr_ctx, u32 *curr_tsgid);
|
||||
void nvgpu_gr_intr_set_error_notifier(struct gk20a *g,
|
||||
struct nvgpu_gr_isr_data *isr_data, u32 error_notifier);
|
||||
int nvgpu_gr_intr_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
||||
bool *post_event, struct channel_gk20a *fault_ch,
|
||||
u32 *hww_global_esr);
|
||||
int nvgpu_gr_intr_stall_isr(struct gk20a *g);
|
||||
#endif /* NVGPU_GR_INTR_H */
|
||||
|
||||
Reference in New Issue
Block a user