gpu: nvgpu: add sm lock_down gr ops

Add lock_down_sm and wait_for_sm_lock_down gr ops
Required to support multiple SM and t19x SM register
address changes

JIRA GPUT19X-75

Change-Id: I529babde51d9b2143fe3740a4f67c582b7eb404b
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master/r/1514042
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Seema Khowala
2017-06-22 11:53:04 -07:00
committed by mobile promotions
parent 4728761b6c
commit 0852c9f1ab
4 changed files with 37 additions and 32 deletions

View File

@@ -296,6 +296,10 @@ struct gpu_ops {
u32 (*get_sm_hww_global_esr)(struct gk20a *g,
u32 gpc, u32 tpc, u32 sm);
u32 (*get_sm_no_lock_down_hww_global_esr_mask)(struct gk20a *g);
int (*lock_down_sm)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
u32 global_esr_mask, bool check_errors);
int (*wait_for_sm_lock_down)(struct gk20a *g, u32 gpc, u32 tpc,
u32 sm, u32 global_esr_mask, bool check_errors);
void (*get_esr_sm_sel)(struct gk20a *g, u32 gpc, u32 tpc,
u32 *esr_sm_sel);
int (*handle_sm_exception)(struct gk20a *g,

View File

@@ -5467,16 +5467,14 @@ unlock:
}
int gk20a_gr_lock_down_sm(struct gk20a *g,
u32 gpc, u32 tpc, u32 global_esr_mask,
bool check_errors)
u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
bool check_errors)
{
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
u32 dbgr_control0;
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GPC%d TPC%d: locking down SM", gpc, tpc);
"GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm);
/* assert stop trigger */
dbgr_control0 =
@@ -5485,7 +5483,7 @@ int gk20a_gr_lock_down_sm(struct gk20a *g,
gk20a_writel(g,
gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
return gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask,
return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask,
check_errors);
}
@@ -5598,7 +5596,8 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
}
if (do_warp_sync) {
ret = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, true);
ret = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
global_mask, true);
if (ret) {
nvgpu_err(g, "sm did not lock down!");
return ret;
@@ -7906,25 +7905,21 @@ void gk20a_init_gr(struct gk20a *g)
nvgpu_cond_init(&g->gr.init_wq);
}
int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
u32 global_esr_mask, bool check_errors)
{
bool locked_down;
bool no_error_pending;
u32 delay = GR_IDLE_CHECK_DEFAULT;
bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g);
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
u32 offset =
gpc_stride * gpc + tpc_in_gpc_stride * tpc;
u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
u32 dbgr_status0 = 0, dbgr_control0 = 0;
u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
struct nvgpu_timeout timeout;
u32 warp_esr;
u32 sm = 0;
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GPC%d TPC%d: locking down SM", gpc, tpc);
"GPC%d TPC%d SM%d: locking down SM", gpc, tpc, sm);
nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER);
@@ -7949,7 +7944,8 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
if (locked_down || no_error_pending) {
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GPC%d TPC%d: locked down SM", gpc, tpc);
"GPC%d TPC%d SM%d: locked down SM",
gpc, tpc, sm);
return 0;
}
@@ -7959,7 +7955,7 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
(g->ops.mm.mmu_fault_pending(g))) {
nvgpu_err(g,
"GPC%d TPC%d: mmu fault pending,"
" sm will never lock down!", gpc, tpc);
" SM%d will never lock down!", gpc, tpc, sm);
return -EFAULT;
}
@@ -8017,7 +8013,7 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,
dbgr_control0);
err = gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc,
err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm,
global_esr_mask, check_errors);
if (err) {
nvgpu_err(g,
@@ -8030,9 +8026,10 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
u32 global_esr_mask, bool check_errors)
{
struct gr_gk20a *gr = &g->gr;
u32 gpc, tpc;
u32 gpc, tpc, sm;
int err;
u32 dbgr_control0;
u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
/* if an SM debugger isn't attached, skip suspend */
if (!g->ops.gr.sm_debugger_attached(g)) {
@@ -8055,13 +8052,14 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
for (gpc = 0; gpc < gr->gpc_count; gpc++) {
for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) {
err =
gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc,
for (sm = 0; sm < sm_per_tpc; sm++) {
err = g->ops.gr.wait_for_sm_lock_down(g,
gpc, tpc, sm,
global_esr_mask, check_errors);
if (err) {
nvgpu_err(g,
"SuspendAllSms failed");
return;
if (err) {
nvgpu_err(g, "SuspendAllSms failed");
return;
}
}
}
}
@@ -8371,7 +8369,7 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)
{
int err = 0;
struct gr_gk20a *gr = &g->gr;
u32 gpc, tpc, sm_id;
u32 gpc, tpc, sm, sm_id;
u32 global_mask;
/* Wait for the SMs to reach full stop. This condition is:
@@ -8386,9 +8384,10 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)
gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
sm = g->gr.sm_to_cluster[sm_id].sm_index;
err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
err = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
global_mask, false);
if (err) {
nvgpu_err(g, "sm did not lock down!");
return err;

View File

@@ -622,9 +622,6 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
void gk20a_gr_suspend_all_sms(struct gk20a *g,
u32 global_esr_mask, bool check_errors);
u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
int gk20a_gr_lock_down_sm(struct gk20a *g,
u32 gpc, u32 tpc, u32 global_esr_mask,
bool check_errors);
int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
struct channel_gk20a *ch, u64 sms, bool enable);
bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
@@ -681,7 +678,10 @@ static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
struct gr_gk20a_isr_data *isr_data);
int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
int gk20a_gr_lock_down_sm(struct gk20a *g,
u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
bool check_errors);
int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
u32 global_esr_mask, bool check_errors);
void gk20a_gr_clear_sm_hww(struct gk20a *g,
u32 gpc, u32 tpc, u32 global_esr);

View File

@@ -1638,4 +1638,6 @@ void gm20b_init_gr(struct gpu_ops *gops)
gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr;
gops->gr.get_sm_no_lock_down_hww_global_esr_mask =
gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask;
gops->gr.lock_down_sm = gk20a_gr_lock_down_sm;
gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down;
}