diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index d0d33d110..3556d3ea5 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -4160,6 +4160,21 @@ long gk20a_channel_ioctl(struct file *filp, err = gk20a_fifo_preempt(ch->g, ch); gk20a_idle(g); break; + case NVGPU_IOCTL_CHANNEL_PREEMPT_NEXT: + if (!capable(CAP_SYS_NICE)) + return -EPERM; + if (!ch->g->ops.fifo.reschedule_preempt_next) + return -ENOSYS; + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = ch->g->ops.fifo.reschedule_preempt_next(ch); + gk20a_idle(ch->g); + break; case NVGPU_IOCTL_CHANNEL_FORCE_RESET: err = gk20a_busy(g); if (err) { diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index d738e6051..22c05c007 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -41,11 +41,13 @@ #include #define FECS_METHOD_WFI_RESTORE 0x80000 +#define FECS_MAILBOX_0_ACK_RESTORE 0x4 static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, u32 hw_chid, bool add, bool wait_for_finish); static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg); +static int gk20a_fifo_is_preempt_pending(struct gk20a *g); #ifdef CONFIG_DEBUG_FS static void __gk20a_fifo_profile_free(struct kref *ref); @@ -2405,10 +2407,30 @@ void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg) fifo_preempt_type_channel_f()); } -static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) +static int gk20a_fifo_is_preempt_pending(struct gk20a *g) { struct nvgpu_timeout timeout; u32 delay = GR_IDLE_CHECK_DEFAULT; + int ret = -EBUSY; + + nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), + NVGPU_TIMER_CPU_TIMER); + do { + if (!(gk20a_readl(g, fifo_preempt_r()) & + fifo_preempt_pending_true_f())) { + ret = 0; + break; + } + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + } while (!nvgpu_timeout_expired_msg(&timeout, "preempt timeout")); + + return ret; +} + +static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) +{ u32 ret = 0; gk20a_dbg_fn("%d", id); @@ -2418,19 +2440,7 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) gk20a_dbg_fn("%d", id); /* wait for preempt */ - ret = -EBUSY; - nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), - NVGPU_TIMER_CPU_TIMER); - do { - if (!(gk20a_readl(g, fifo_preempt_r()) & - fifo_preempt_pending_true_f())) { - ret = 0; - break; - } - - usleep_range(delay, delay * 2); - delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); - } while (!nvgpu_timeout_expired(&timeout)); + ret = gk20a_fifo_is_preempt_pending(g); gk20a_dbg_fn("%d", id); if (ret) { @@ -3011,16 +3021,18 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, count = 0; if (count != 0) { - gk20a_writel(g, fifo_runlist_base_r(), + runlist->runlist_base_r = fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | gk20a_aperture_mask(g, &runlist->mem[new_buf], fifo_runlist_base_target_sys_mem_ncoh_f(), - fifo_runlist_base_target_vid_mem_f())); + fifo_runlist_base_target_vid_mem_f()); + gk20a_writel(g, fifo_runlist_base_r(), runlist->runlist_base_r); } - gk20a_writel(g, fifo_runlist_r(), + runlist->runlist_r = fifo_runlist_engine_f(runlist_id) | - fifo_eng_runlist_length_f(count)); + fifo_eng_runlist_length_f(count); + gk20a_writel(g, fifo_runlist_r(), runlist->runlist_r); if (wait_for_finish) { ret = gk20a_fifo_runlist_wait_pending(g, runlist_id); @@ -3089,8 +3101,8 @@ int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id) mutex_ret = pmu_mutex_acquire( &g->pmu, PMU_MUTEX_ID_FIFO, &token); - gk20a_writel(g, fifo_runlist_r(), - gk20a_readl(g, fifo_runlist_r())); + gk20a_writel(g, fifo_runlist_base_r(), runlist->runlist_base_r); + gk20a_writel(g, fifo_runlist_r(), runlist->runlist_r); gk20a_fifo_runlist_wait_pending(g, runlist_id); if (!mutex_ret) @@ -3104,6 +3116,79 @@ int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id) return ret; } +/* trigger host preempt of gr engine pending load context if that ctx is not for ch */ +int gk20a_fifo_reschedule_preempt_next(struct channel_gk20a *ch) +{ + struct gk20a *g = ch->g; + struct fifo_runlist_info_gk20a *runlist; + u32 token = PMU_INVALID_MUTEX_OWNER_ID; + u32 mutex_ret; + int ret = 0; + u32 gr_eng_id = 0; + + runlist = &g->fifo.runlist_info[ch->runlist_id]; + if (1 != gk20a_fifo_get_engine_ids( + g, &gr_eng_id, 1, ENGINE_GR_GK20A)) + return 0; + if (!(runlist->eng_bitmask & (1 << gr_eng_id))) + return 0; + + if (!nvgpu_mutex_tryacquire(&runlist->mutex)) + return -EBUSY; /* someone else is writing fifo_runlist_r so not needed here */ + mutex_ret = pmu_mutex_acquire( + &g->pmu, PMU_MUTEX_ID_FIFO, &token); + + do { + u32 engstat, ctxstat, fecsstat0, fecsstat1; + s32 preempt_id = -1; + u32 preempt_type = 0; + bool same_ctx; + + if (gk20a_readl(g, fifo_preempt_r()) & + fifo_preempt_pending_true_f()) + break; + + fecsstat0 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)); + engstat = gk20a_readl(g, fifo_engine_status_r(gr_eng_id)); + ctxstat = fifo_engine_status_ctx_status_v(engstat); + if (ctxstat == fifo_engine_status_ctx_status_ctxsw_switch_v()) { + /* host switching to new context, preempt next context if needed */ + preempt_id = fifo_engine_status_next_id_v(engstat); + preempt_type = fifo_engine_status_next_id_type_v(engstat); + } else { + break; + } + if (gk20a_is_channel_marked_as_tsg(ch)) + same_ctx = (preempt_id == ch->tsgid && preempt_type); + else + same_ctx = (preempt_id == ch->hw_chid && !preempt_type); + if (same_ctx) + break; + fecsstat1 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)); + if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE || + fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) + break; /* preempt is useless if FECS acked save and started restore */ + + gk20a_fifo_issue_preempt(g, preempt_id, preempt_type); + + trace_gk20a_reschedule_preempt_next(ch->hw_chid, fecsstat0, engstat, + fecsstat1, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)), + gk20a_readl(g, fifo_preempt_r())); + + gk20a_fifo_is_preempt_pending(g); + + trace_gk20a_reschedule_preempted_next(ch->hw_chid); + + } while (false); + + if (!mutex_ret) + pmu_mutex_release( + &g->pmu, PMU_MUTEX_ID_FIFO, &token); + nvgpu_mutex_release(&runlist->mutex); + + return ret; +} + /* add/remove a channel from runlist special cases below: runlist->active_channels will NOT be changed. (hw_chid == ~0 && !add) means remove all active channels from runlist. diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 03c93a352..e0e57645a 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -52,6 +52,8 @@ struct fifo_runlist_info_gk20a { u32 pbdma_bitmask; /* pbdmas supported for this runlist*/ u32 eng_bitmask; /* engines using this runlist */ u32 reset_eng_bitmask; /* engines to be reset during recovery */ + u32 runlist_base_r; /* cached runlist_base_r */ + u32 runlist_r; /* cached runlist_r */ bool stopped; bool support_tsg; struct nvgpu_mutex mutex; /* protect channel preempt and runlist update */ @@ -219,6 +221,7 @@ int gk20a_fifo_nonstall_isr(struct gk20a *g); int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid); int gk20a_fifo_preempt_tsg(struct gk20a *g, u32 tsgid); int gk20a_fifo_preempt(struct gk20a *g, struct channel_gk20a *ch); +int gk20a_fifo_preempt_next(struct gk20a *g, struct channel_gk20a *ch); int gk20a_fifo_enable_engine_activity(struct gk20a *g, struct fifo_engine_info_gk20a *eng_info); @@ -231,6 +234,7 @@ int gk20a_fifo_disable_all_engine_activity(struct gk20a *g, u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 hw_chid); int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id); +int gk20a_fifo_reschedule_preempt_next(struct channel_gk20a *ch); int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 hw_chid, bool add, bool wait_for_finish); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 50180551d..33b7b10f4 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -405,6 +405,7 @@ struct gpu_ops { int (*preempt_channel)(struct gk20a *g, u32 hw_chid); int (*preempt_tsg)(struct gk20a *g, u32 tsgid); int (*reschedule_runlist)(struct gk20a *g, u32 runlist_id); + int (*reschedule_preempt_next)(struct channel_gk20a *ch); int (*update_runlist)(struct gk20a *g, u32 runlist_id, u32 hw_chid, bool add, bool wait_for_finish); diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c index 2880244a5..1e0fcc9b6 100644 --- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c @@ -241,4 +241,5 @@ void gp10b_init_fifo(struct gpu_ops *gops) gops->fifo.eng_runlist_base_size = fifo_eng_runlist_base__size_1_v; gops->fifo.device_info_fault_id = top_device_info_data_fault_id_enum_v; gops->fifo.reschedule_runlist = gk20a_fifo_reschedule_runlist; + gops->fifo.reschedule_preempt_next = gk20a_fifo_reschedule_preempt_next; } diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c index 7053739cc..6cf4286d9 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c @@ -46,4 +46,5 @@ void vgpu_gp10b_init_fifo_ops(struct gpu_ops *gops) gops->fifo.init_engine_info = vgpu_gp10b_fifo_init_engine_info; gops->fifo.resetup_ramfc = NULL; gops->fifo.reschedule_runlist = NULL; + gops->fifo.reschedule_preempt_next = NULL; } diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h index a6939833a..d02ac22af 100644 --- a/include/trace/events/gk20a.h +++ b/include/trace/events/gk20a.h @@ -354,6 +354,51 @@ TRACE_EVENT(gk20a_channel_submitted_gpfifo, __entry->flags, __entry->incr_id, __entry->incr_value) ); +TRACE_EVENT(gk20a_reschedule_preempt_next, + TP_PROTO(u32 chid, u32 fecs0, u32 engstat, u32 fecs1, u32 fecs2, + u32 preempt), + + TP_ARGS(chid, fecs0, engstat, fecs1, fecs2, preempt), + + TP_STRUCT__entry( + __field(u32, chid) + __field(u32, fecs0) + __field(u32, engstat) + __field(u32, fecs1) + __field(u32, fecs2) + __field(u32, preempt) + ), + + TP_fast_assign( + __entry->chid = chid; + __entry->fecs0 = fecs0; + __entry->engstat = engstat; + __entry->fecs1 = fecs1; + __entry->fecs2 = fecs2; + __entry->preempt = preempt; + ), + + TP_printk("chid=%d, fecs0=%#x, engstat=%#x, fecs1=%#x, fecs2=%#x," + " preempt=%#x", __entry->chid, __entry->fecs0, __entry->engstat, + __entry->fecs1, __entry->fecs2, __entry->preempt) +); + +TRACE_EVENT(gk20a_reschedule_preempted_next, + TP_PROTO(u32 chid), + + TP_ARGS(chid), + + TP_STRUCT__entry( + __field(u32, chid) + ), + + TP_fast_assign( + __entry->chid = chid; + ), + + TP_printk("chid=%d", __entry->chid) +); + TRACE_EVENT(gk20a_channel_reset, TP_PROTO(u32 hw_chid, u32 tsgid), diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index fd3c1a578..5f02a3189 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -1637,9 +1637,11 @@ struct nvgpu_boosted_ctx_args { _IOW(NVGPU_IOCTL_MAGIC, 123, struct nvgpu_alloc_gpfifo_ex_args) #define NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX \ _IOW(NVGPU_IOCTL_MAGIC, 124, struct nvgpu_boosted_ctx_args) +#define NVGPU_IOCTL_CHANNEL_PREEMPT_NEXT \ + _IO(NVGPU_IOCTL_MAGIC, 126) #define NVGPU_IOCTL_CHANNEL_LAST \ - _IOC_NR(NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX) + _IOC_NR(NVGPU_IOCTL_CHANNEL_PREEMPT_NEXT) #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args) /*