diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c index 1e170b300..f189d3ed6 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ b/drivers/gpu/nvgpu/common/linux/channel.c @@ -64,9 +64,6 @@ u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags) if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING) flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING; - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST) - flags |= NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST; - return flags; } @@ -1008,10 +1005,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, g->ops.fifo.userd_gp_put(g, c); - if ((NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST & flags) && - g->ops.fifo.reschedule_runlist) - g->ops.fifo.reschedule_runlist(g, c->runlist_id); - /* No hw access beyond this point */ if (c->deterministic) nvgpu_rwsem_up_read(&g->deterministic_busy); diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c index 606c52518..c1492cad9 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c @@ -789,10 +789,6 @@ static int gk20a_ioctl_channel_submit_gpfifo( if (ch->has_timedout) return -ETIMEDOUT; - if ((NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST & args->flags) && - !capable(CAP_SYS_NICE)) - return -EPERM; - nvgpu_get_fence_args(&args->fence, &fence); submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags); @@ -1291,6 +1287,27 @@ long gk20a_channel_ioctl(struct file *filp, err = gk20a_fifo_preempt(ch->g, ch); gk20a_idle(ch->g); break; + case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST: + if (!capable(CAP_SYS_NICE)) { + err = -EPERM; + break; + } + if (!ch->g->ops.fifo.reschedule_runlist) { + err = -ENOSYS; + break; + } + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = ch->g->ops.fifo.reschedule_runlist(ch, + NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT & + ((struct nvgpu_reschedule_runlist_args *)buf)->flags); + gk20a_idle(ch->g); + break; case NVGPU_IOCTL_CHANNEL_FORCE_RESET: err = gk20a_busy(ch->g); if (err) { diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 5e8cab0d7..f95184beb 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -54,7 +54,6 @@ struct fifo_profile_gk20a; #define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1 << 3) #define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1 << 4) #define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5) -#define NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST (1 << 6) /* * The binary format of 'struct nvgpu_channel_fence' introduced here diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 22dc1d60a..c94fc5369 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -53,6 +53,7 @@ #include #define FECS_METHOD_WFI_RESTORE 0x80000 +#define FECS_MAILBOX_0_ACK_RESTORE 0x4 static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, u32 chid, bool add, @@ -3282,7 +3283,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, u32 new_buf; struct channel_gk20a *ch = NULL; struct tsg_gk20a *tsg = NULL; - u32 count = 0; u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32); runlist = &f->runlist_info[runlist_id]; @@ -3345,12 +3345,13 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, ret = -E2BIG; goto clean_up; } - count = (runlist_end - runlist_entry_base) / runlist_entry_words; - WARN_ON(count > f->num_runlist_entries); + runlist->count = (runlist_end - runlist_entry_base) / + runlist_entry_words; + WARN_ON(runlist->count > f->num_runlist_entries); } else /* suspend to remove all channels */ - count = 0; + runlist->count = 0; - g->ops.fifo.runlist_hw_submit(g, runlist_id, count, new_buf); + g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf); if (wait_for_finish) { ret = g->ops.fifo.runlist_wait_pending(g, runlist_id); @@ -3406,31 +3407,98 @@ end: return ret; } -/* trigger host to expire current timeslice and reschedule runlist from front */ -int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id) +/* trigger host preempt of GR pending load ctx if that ctx is not for ch */ +static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch, + bool wait_preempt) { + struct gk20a *g = ch->g; + struct fifo_runlist_info_gk20a *runlist = + &g->fifo.runlist_info[ch->runlist_id]; + int ret = 0; + u32 gr_eng_id = 0; + u32 engstat = 0, ctxstat = 0, fecsstat0 = 0, fecsstat1 = 0; + s32 preempt_id = -1; + u32 preempt_type = 0; + + if (1 != gk20a_fifo_get_engine_ids( + g, &gr_eng_id, 1, ENGINE_GR_GK20A)) + return ret; + if (!(runlist->eng_bitmask & (1 << gr_eng_id))) + return ret; + + if (wait_preempt && gk20a_readl(g, fifo_preempt_r()) & + fifo_preempt_pending_true_f()) + return ret; + + fecsstat0 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)); + engstat = gk20a_readl(g, fifo_engine_status_r(gr_eng_id)); + ctxstat = fifo_engine_status_ctx_status_v(engstat); + if (ctxstat == fifo_engine_status_ctx_status_ctxsw_switch_v()) { + /* host switching to next context, preempt that if needed */ + preempt_id = fifo_engine_status_next_id_v(engstat); + preempt_type = fifo_engine_status_next_id_type_v(engstat); + } else + return ret; + if (preempt_id == ch->tsgid && preempt_type) + return ret; + fecsstat1 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)); + if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE || + fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) { + /* preempt useless if FECS acked save and started restore */ + return ret; + } + + gk20a_fifo_issue_preempt(g, preempt_id, preempt_type); +#ifdef TRACEPOINTS_ENABLED + trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, engstat, + fecsstat1, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)), + gk20a_readl(g, fifo_preempt_r())); +#endif + if (wait_preempt) { + g->ops.fifo.is_preempt_pending( + g, preempt_id, preempt_type, PREEMPT_TIMEOUT_RC); + } +#ifdef TRACEPOINTS_ENABLED + trace_gk20a_reschedule_preempted_next(ch->chid); +#endif + return ret; +} + +int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next) +{ + return nvgpu_fifo_reschedule_runlist(ch, preempt_next, true); +} + +/* trigger host to expire current timeslice and reschedule runlist from front */ +int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next, + bool wait_preempt) +{ + struct gk20a *g = ch->g; struct fifo_runlist_info_gk20a *runlist; u32 token = PMU_INVALID_MUTEX_OWNER_ID; u32 mutex_ret; int ret = 0; - runlist = &g->fifo.runlist_info[runlist_id]; - if (nvgpu_mutex_tryacquire(&runlist->runlist_lock)) { - mutex_ret = nvgpu_pmu_mutex_acquire( + runlist = &g->fifo.runlist_info[ch->runlist_id]; + if (!nvgpu_mutex_tryacquire(&runlist->runlist_lock)) + return -EBUSY; + + mutex_ret = nvgpu_pmu_mutex_acquire( + &g->pmu, PMU_MUTEX_ID_FIFO, &token); + + g->ops.fifo.runlist_hw_submit( + g, ch->runlist_id, runlist->count, runlist->cur_buffer); + + if (preempt_next) + __locked_fifo_reschedule_preempt_next(ch, wait_preempt); + + gk20a_fifo_runlist_wait_pending(g, ch->runlist_id); + + if (!mutex_ret) + nvgpu_pmu_mutex_release( &g->pmu, PMU_MUTEX_ID_FIFO, &token); + nvgpu_mutex_release(&runlist->runlist_lock); - gk20a_writel(g, fifo_runlist_r(), - gk20a_readl(g, fifo_runlist_r())); - gk20a_fifo_runlist_wait_pending(g, runlist_id); - - if (!mutex_ret) - nvgpu_pmu_mutex_release( - &g->pmu, PMU_MUTEX_ID_FIFO, &token); - nvgpu_mutex_release(&runlist->runlist_lock); - } else { - /* someone else is writing fifo_runlist_r so not needed here */ - ret = -EBUSY; - } return ret; } diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 5866dd1b5..576a4ac8a 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -95,6 +95,7 @@ struct fifo_runlist_info_gk20a { u32 pbdma_bitmask; /* pbdmas supported for this runlist*/ u32 eng_bitmask; /* engines using this runlist */ u32 reset_eng_bitmask; /* engines to be reset during recovery */ + u32 count; /* cached runlist_hw_submit parameter */ bool stopped; bool support_tsg; /* protect ch/tsg/runlist preempt & runlist update */ @@ -249,7 +250,9 @@ void gk20a_fifo_disable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg); u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 chid); -int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id); +int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next); +int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next, + bool wait_preempt); int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid, bool add, bool wait_for_finish); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 17f662df4..45fa58f19 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -566,7 +566,8 @@ struct gpu_ops { int (*tsg_verify_channel_status)(struct channel_gk20a *ch); void (*tsg_verify_status_ctx_reload)(struct channel_gk20a *ch); void (*tsg_verify_status_faulted)(struct channel_gk20a *ch); - int (*reschedule_runlist)(struct gk20a *g, u32 runlist_id); + int (*reschedule_runlist)(struct channel_gk20a *ch, + bool preempt_next); int (*update_runlist)(struct gk20a *g, u32 runlist_id, u32 chid, bool add, bool wait_for_finish); diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index 30e030929..f00e806f5 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c @@ -664,6 +664,13 @@ static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask, return runlists_mask; } +int gv11b_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next) +{ + /* gv11b allows multiple outstanding preempts, + so always preempt next for best reschedule effect */ + return nvgpu_fifo_reschedule_runlist(ch, true, false); +} + static void gv11b_fifo_issue_runlist_preempt(struct gk20a *g, u32 runlists_mask) { @@ -842,7 +849,6 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid) return ret; } - static int gv11b_fifo_preempt_runlists(struct gk20a *g, u32 runlists_mask) { int ret = 0; diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h index 3f58f9279..1ae3c93e3 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h @@ -76,6 +76,7 @@ void gv11b_dump_channel_status_ramfc(struct gk20a *g, void gv11b_dump_eng_status(struct gk20a *g, struct gk20a_debug_output *o); u32 gv11b_fifo_intr_0_error_mask(struct gk20a *g); +int gv11b_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next); int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id, unsigned int id_type, unsigned int timeout_rc_type); int gv11b_fifo_preempt_channel(struct gk20a *g, u32 chid); diff --git a/drivers/gpu/nvgpu/gv11b/gv11b.c b/drivers/gpu/nvgpu/gv11b/gv11b.c index c1ad7944a..09fe9a451 100644 --- a/drivers/gpu/nvgpu/gv11b/gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gv11b.c @@ -154,6 +154,7 @@ int gv11b_init_gpu_characteristics(struct gk20a *g) __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS, true); __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); __nvgpu_set_enabled(g, NVGPU_SUPPORT_SCG, true); + __nvgpu_set_enabled(g, NVGPU_SUPPORT_RESCHEDULE_RUNLIST, true); __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS, true); __nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true); diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 290a94528..ff7790752 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -503,6 +503,7 @@ static const struct gpu_ops gv11b_ops = { .tsg_verify_channel_status = gk20a_fifo_tsg_unbind_channel_verify_status, .tsg_verify_status_ctx_reload = gm20b_fifo_tsg_verify_status_ctx_reload, .tsg_verify_status_faulted = gv11b_fifo_tsg_verify_status_faulted, + .reschedule_runlist = gv11b_fifo_reschedule_runlist, .update_runlist = gk20a_fifo_update_runlist, .trigger_mmu_fault = NULL, .get_mmu_fault_info = NULL, diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 24748a194..9ae249a2f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -96,7 +96,7 @@ struct gk20a; #define NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING 35 /* Deterministic submits are supported even with job tracking */ #define NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL 36 -/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */ +/* NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST is available */ #define NVGPU_SUPPORT_RESCHEDULE_RUNLIST 37 /* NVGPU_GPU_IOCTL_GET_EVENT_FD is available */ diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h index 99726e4c6..ef51451ac 100644 --- a/include/trace/events/gk20a.h +++ b/include/trace/events/gk20a.h @@ -1,7 +1,7 @@ /* * gk20a event logging to ftrace. * - * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -346,6 +346,51 @@ TRACE_EVENT(gk20a_channel_submitted_gpfifo, __entry->flags, __entry->incr_id, __entry->incr_value) ); +TRACE_EVENT(gk20a_reschedule_preempt_next, + TP_PROTO(u32 chid, u32 fecs0, u32 engstat, u32 fecs1, u32 fecs2, + u32 preempt), + + TP_ARGS(chid, fecs0, engstat, fecs1, fecs2, preempt), + + TP_STRUCT__entry( + __field(u32, chid) + __field(u32, fecs0) + __field(u32, engstat) + __field(u32, fecs1) + __field(u32, fecs2) + __field(u32, preempt) + ), + + TP_fast_assign( + __entry->chid = chid; + __entry->fecs0 = fecs0; + __entry->engstat = engstat; + __entry->fecs1 = fecs1; + __entry->fecs2 = fecs2; + __entry->preempt = preempt; + ), + + TP_printk("chid=%d, fecs0=%#x, engstat=%#x, fecs1=%#x, fecs2=%#x," + " preempt=%#x", __entry->chid, __entry->fecs0, __entry->engstat, + __entry->fecs1, __entry->fecs2, __entry->preempt) +); + +TRACE_EVENT(gk20a_reschedule_preempted_next, + TP_PROTO(u32 chid), + + TP_ARGS(chid), + + TP_STRUCT__entry( + __field(u32, chid) + ), + + TP_fast_assign( + __entry->chid = chid; + ), + + TP_printk("chid=%d", __entry->chid) +); + TRACE_EVENT(gk20a_channel_reset, TP_PROTO(u32 chid, u32 tsgid), diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index d97f8fb63..b14610bd3 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -141,7 +141,7 @@ struct nvgpu_gpu_zbc_query_table_args { #define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19) /* IO coherence support is available */ #define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE (1ULL << 20) -/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */ +/* NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST is available */ #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21) /* subcontexts are available */ #define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS (1ULL << 22) @@ -1477,8 +1477,6 @@ struct nvgpu_fence { #define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI (1 << 4) /* skip buffer refcounting during submit */ #define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5) -/* expire current timeslice and reschedule runlist from front */ -#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST (1 << 6) struct nvgpu_submit_gpfifo_args { __u64 gpfifo; @@ -1659,6 +1657,11 @@ struct nvgpu_get_user_syncpoint_args { __u32 syncpoint_max; /* out */ }; +struct nvgpu_reschedule_runlist_args { +#define NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT (1 << 0) + __u32 flags; +}; + #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \ _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args) #define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \ @@ -1711,9 +1714,11 @@ struct nvgpu_get_user_syncpoint_args { _IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args) #define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \ _IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args) +#define NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST \ + _IOW(NVGPU_IOCTL_MAGIC, 127, struct nvgpu_reschedule_runlist_args) #define NVGPU_IOCTL_CHANNEL_LAST \ - _IOC_NR(NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT) + _IOC_NR(NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST) #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args) /*