diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 9f2e0017a..f554cf77c 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -396,6 +396,14 @@ static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true); } +void channel_gk20a_enable(struct channel_gk20a *ch) +{ + /* enable channel */ + gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), + gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) | + ccsr_channel_enable_set_true_f()); +} + void channel_gk20a_disable(struct channel_gk20a *ch) { /* disable channel */ diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index aa87464b9..a028b6f3d 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -223,5 +223,5 @@ int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch); void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch); int channel_gk20a_setup_ramfc(struct channel_gk20a *c, u64 gpfifo_base, u32 gpfifo_entries); - +void channel_gk20a_enable(struct channel_gk20a *ch); #endif /* CHANNEL_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 944860643..39941aae0 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -28,6 +28,7 @@ #include "dbg_gpu_gk20a.h" #include "regops_gk20a.h" #include "hw_therm_gk20a.h" +#include "hw_gr_gk20a.h" struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = { .exec_reg_ops = exec_regops_gk20a, @@ -359,6 +360,11 @@ static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args); +static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); + + long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -418,8 +424,13 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf); break; + case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS: + err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s, + (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); + break; + default: - dev_dbg(dev_from_gk20a(g), + gk20a_err(dev_from_gk20a(g), "unrecognized dbg gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; @@ -693,3 +704,63 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, mutex_unlock(&g->dbg_sessions_lock); return err; } + +static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args) +{ + struct gk20a *g = get_gk20a(dbg_s->pdev); + struct channel_gk20a *ch = dbg_s->ch; + bool ch_is_curr_ctx; + int err = 0, action = args->mode; + + mutex_lock(&g->dbg_sessions_lock); + + /* Suspend GPU context switching */ + /* Disable channel switching. + * at that point the hardware state can be inspected to + * determine if the context we're interested in is current. + */ + err = gr_gk20a_disable_ctxsw(g); + if (err) { + gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw"); + /* this should probably be ctx-fatal... */ + goto clean_up; + } + + /* find out whether the current channel is resident */ + ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch); + + if (ch_is_curr_ctx) { + switch (action) { + case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: + gk20a_suspend_all_sms(g); + break; + + case NVGPU_DBG_GPU_RESUME_ALL_SMS: + gk20a_resume_all_sms(g); + break; + } + } else { + switch (action) { + case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: + /* Disable the channel */ + channel_gk20a_disable(ch); + break; + + case NVGPU_DBG_GPU_RESUME_ALL_SMS: + /* Enable the channel */ + channel_gk20a_enable(ch); + break; + } + } + + /* Resume GPU context switching */ + err = gr_gk20a_enable_ctxsw(g); + if (err) + gk20a_err(dev_from_gk20a(g), "unable to restart ctxsw!\n"); + + clean_up: + mutex_unlock(&g->dbg_sessions_lock); + return err; +} diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 452560d88..b3fc8ae1c 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -79,6 +79,10 @@ static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c); static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, struct channel_gk20a *c); +/* sm lock down */ +static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, + u32 global_esr_mask, bool check_errors); + void gk20a_fecs_dump_falcon_stats(struct gk20a *g) { int i; @@ -5365,13 +5369,9 @@ unlock: return chid; } -static int gk20a_gr_lock_down_sm(struct gk20a *g, +int gk20a_gr_lock_down_sm(struct gk20a *g, u32 gpc, u32 tpc, u32 global_esr_mask) { - unsigned long end_jiffies = jiffies + - msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); - u32 delay = GR_IDLE_CHECK_DEFAULT; - bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); u32 offset = proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; u32 dbgr_control0; @@ -5386,55 +5386,8 @@ static int gk20a_gr_lock_down_sm(struct gk20a *g, gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0); - /* wait for the sm to lock down */ - do { - u32 global_esr = gk20a_readl(g, - gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); - u32 warp_esr = gk20a_readl(g, - gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); - u32 dbgr_status0 = gk20a_readl(g, - gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); - bool locked_down = - (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) == - gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v()); - bool error_pending = - (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) != - gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) || - ((global_esr & ~global_esr_mask) != 0); - - if (locked_down || !error_pending) { - gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, - "GPC%d TPC%d: locked down SM", gpc, tpc); - - /* de-assert stop trigger */ - dbgr_control0 &= ~gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(); - gk20a_writel(g, - gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, - dbgr_control0); - - return 0; - } - - /* if an mmu fault is pending and mmu debug mode is not - * enabled, the sm will never lock down. */ - if (!mmu_debug_mode_enabled && gk20a_fifo_mmu_fault_pending(g)) { - gk20a_err(dev_from_gk20a(g), - "GPC%d TPC%d: mmu fault pending," - " sm will never lock down!", gpc, tpc); - return -EFAULT; - } - - usleep_range(delay, delay * 2); - delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); - - } while (time_before(jiffies, end_jiffies) - || !tegra_platform_is_silicon()); - - gk20a_err(dev_from_gk20a(g), - "GPC%d TPC%d: timed out while trying to lock down SM", - gpc, tpc); - - return -EAGAIN; + return gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask, + true); } bool gk20a_gr_sm_debugger_attached(struct gk20a *g) @@ -7198,6 +7151,131 @@ static u32 gr_gk20a_get_tpc_num(u32 addr) return 0; } +static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, + u32 global_esr_mask, bool check_errors) +{ + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + u32 delay = GR_IDLE_CHECK_DEFAULT; + bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); + u32 offset = + proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; + + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d TPC%d: locking down SM", gpc, tpc); + + /* wait for the sm to lock down */ + do { + u32 global_esr = gk20a_readl(g, + gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); + u32 warp_esr = gk20a_readl(g, + gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); + u32 dbgr_status0 = gk20a_readl(g, + gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); + bool locked_down = + (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) == + gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v()); + bool no_error_pending = + check_errors && + (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) == + gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) && + ((global_esr & ~global_esr_mask) == 0); + + if (locked_down || no_error_pending) { + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d TPC%d: locked down SM", gpc, tpc); + return 0; + } + + /* if an mmu fault is pending and mmu debug mode is not + * enabled, the sm will never lock down. */ + if (!mmu_debug_mode_enabled && + gk20a_fifo_mmu_fault_pending(g)) { + gk20a_err(dev_from_gk20a(g), + "GPC%d TPC%d: mmu fault pending," + " sm will never lock down!", gpc, tpc); + return -EFAULT; + } + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + + } while (time_before(jiffies, end_jiffies) + || !tegra_platform_is_silicon()); + + gk20a_err(dev_from_gk20a(g), + "GPC%d TPC%d: timed out while trying to lock down SM", + gpc, tpc); + + return -EAGAIN; +} + +void gk20a_suspend_all_sms(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + u32 gpc, tpc; + int err; + u32 dbgr_control0; + + /* if an SM debugger isn't attached, skip suspend */ + if (!gk20a_gr_sm_debugger_attached(g)) { + gk20a_err(dev_from_gk20a(g), "SM debugger not attached, " + "skipping suspend!\n"); + return; + } + + /* assert stop trigger. uniformity assumption: all SMs will have + * the same state in dbg_control0. */ + dbgr_control0 = + gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r()); + dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(); + + /* broadcast write */ + gk20a_writel(g, + gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); + + for (gpc = 0; gpc < gr->gpc_count; gpc++) { + for (tpc = 0; tpc < gr->tpc_count; tpc++) { + err = + gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, 0, false); + if (err) { + gk20a_err(dev_from_gk20a(g), + "SuspendAllSms failed\n"); + return; + } + } + } +} + +void gk20a_resume_all_sms(struct gk20a *g) +{ + u32 dbgr_control0; + /* + * The following requires some clarification. Despite the fact that both + * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their + * names, only one is actually a trigger, and that is the STOP_TRIGGER. + * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to + * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0 + * (_DISABLE) as well. + + * Advice from the arch group: Disable the stop trigger first, as a + * separate operation, in order to ensure that the trigger has taken + * effect, before enabling the run trigger. + */ + + /*De-assert stop trigger */ + dbgr_control0 = + gk20a_readl(g, gr_gpcs_tpcs_sm_dbgr_control0_r()); + dbgr_control0 &= ~gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(); + gk20a_writel(g, + gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); + + /* Run trigger */ + dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_enable_f(); + gk20a_writel(g, + gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); +} + void gk20a_init_gr_ops(struct gpu_ops *gops) { gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; @@ -7232,3 +7310,4 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr; gops->gr.get_tpc_num = gr_gk20a_get_tpc_num; } + diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 4b1f6de2d..72642a41f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -449,4 +449,11 @@ void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base, void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c); -#endif /* GR_GK20A_H */ +int gr_gk20a_disable_ctxsw(struct gk20a *g); +int gr_gk20a_enable_ctxsw(struct gk20a *g); +void gk20a_resume_all_sms(struct gk20a *g); +void gk20a_suspend_all_sms(struct gk20a *g); +int gk20a_gr_lock_down_sm(struct gk20a *g, + u32 gpc, u32 tpc, u32 global_esr_mask); +bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch); +#endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h index 463443d6c..65a3072c0 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h @@ -2810,6 +2810,14 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void) { return 0x80000000; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_disable_f(void) +{ + return 0x00000000; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void) +{ + return 0x40000000; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void) { return 0x0050460c; @@ -2822,6 +2830,22 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v(void) { return 0x00000001; } +static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_r(void) +{ + return 0x00419e50; +} +static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_bpt_int_pending_f(void) +{ + return 0x10; +} +static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f(void) +{ + return 0x20; +} +static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_single_step_complete_pending_f(void) +{ + return 0x40; +} static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_r(void) { return 0x00504650; @@ -3206,4 +3230,42 @@ static inline u32 gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_disable_f(void) { return 0x0; } +static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_r(void) +{ + return 0x00419e10; +} + +static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_r_debugger_mode_v(u32 r) +{ + return (r >> 0) & 0x1; +} + +static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_v(u32 r) +{ + return (r >> 31) & 0x1; +} +static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_m(void) +{ + return 0x1 << 31; +} +static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(void) +{ + return 0x80000000; +} +static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_v(u32 r) +{ + return (r >> 30) & 0x1; +} +static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_m(void) +{ + return 0x1 << 30; +} +static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_enable_f(void) +{ + return 0x40000000; +} +static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_debugger_mode_on_f(void) +{ + return 0x1; +} #endif diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 719dc2552..b7f65d75f 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -365,9 +365,19 @@ struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args { #define NVGPU_DBG_GPU_IOCTL_SMPC_CTXSW_MODE \ _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 5, struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args) +/* Suspend /Resume SM control */ +#define NVGPU_DBG_GPU_SUSPEND_ALL_SMS 1 +#define NVGPU_DBG_GPU_RESUME_ALL_SMS 2 + +struct nvgpu_dbg_gpu_suspend_resume_all_sms_args { + __u32 mode; +} __packed; + +#define NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS \ + _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 6, struct nvgpu_dbg_gpu_suspend_resume_all_sms_args) #define NVGPU_DBG_GPU_IOCTL_LAST \ - _IOC_NR(NVGPU_DBG_GPU_IOCTL_SMPC_CTXSW_MODE) + _IOC_NR(NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS) #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_dbg_gpu_exec_reg_ops_args)