diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 4abf089ae..7852a66c0 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -272,9 +272,7 @@ nvgpu-y += \ hal/fifo/ramfc_tu104.o \ hal/fifo/ramin_gk20a.o \ hal/fifo/ramin_tu104.o \ - hal/fifo/runlist_ram_gk20a.o \ hal/fifo/runlist_ram_tu104.o \ - hal/fifo/runlist_fifo_gk20a.o \ hal/fifo/runlist_fifo_gv11b.o \ hal/fifo/runlist_fifo_gv100.o \ hal/fifo/runlist_fifo_tu104.o \ @@ -673,6 +671,8 @@ nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \ hal/fifo/mmu_fault_gk20a.o \ hal/fifo/mmu_fault_gm20b.o \ hal/fifo/mmu_fault_gp10b.o \ + hal/fifo/runlist_fifo_gk20a.o \ + hal/fifo/runlist_ram_gk20a.o \ hal/gr/config/gr_config_gm20b.o \ hal/gr/ctxsw_prog/ctxsw_prog_gm20b.o \ hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 80f7044c8..09d6311ae 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -145,9 +145,7 @@ srcs += common/utils/assert.c \ hal/init/hal_gv11b_litter.c \ hal/init/hal_init.c \ hal/power_features/cg/gv11b_gating_reglist.c \ - hal/fifo/runlist_fifo_gk20a.c \ hal/fifo/runlist_fifo_gv11b.c \ - hal/fifo/runlist_ram_gk20a.c \ hal/fifo/userd_gk20a.c \ hal/tpc/tpc_gv11b.c \ hal/sync/syncpt_cmdbuf_gv11b.c @@ -300,6 +298,8 @@ srcs += hal/init/hal_gp10b.c \ hal/fifo/mmu_fault_gm20b.c \ hal/fifo/mmu_fault_gp10b.c \ hal/fifo/ctxsw_timeout_gk20a.c \ + hal/fifo/runlist_fifo_gk20a.c \ + hal/fifo/runlist_ram_gk20a.c \ hal/netlist/netlist_gm20b.c \ hal/netlist/netlist_gp10b.c \ hal/sync/syncpt_cmdbuf_gk20a.c \ diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a.c index de9afb1c5..47957158e 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a.c +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a.c @@ -46,81 +46,3 @@ u32 gk20a_runlist_count_max(void) { return fifo_eng_runlist_base__size_1_v(); } - -#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING -/* trigger host preempt of GR pending load ctx if that ctx is not for ch */ -int gk20a_fifo_reschedule_preempt_next(struct nvgpu_channel *ch, - bool wait_preempt) -{ - struct gk20a *g = ch->g; - struct nvgpu_runlist_info *runlist = - g->fifo.runlist_info[ch->runlist_id]; - int ret = 0; - u32 gr_eng_id = 0; - u32 fecsstat0 = 0, fecsstat1 = 0; - u32 preempt_id; - u32 preempt_type = 0; - struct nvgpu_engine_status_info engine_status; - - if (1U != nvgpu_engine_get_ids( - g, &gr_eng_id, 1, NVGPU_ENGINE_GR)) { - return ret; - } - if ((runlist->eng_bitmask & BIT32(gr_eng_id)) == 0U) { - return ret; - } - - if (wait_preempt) { - u32 val = nvgpu_readl(g, fifo_preempt_r()); - - if ((val & fifo_preempt_pending_true_f()) != 0U) { - return ret; - } - } - - fecsstat0 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g, - NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0); - g->ops.engine_status.read_engine_status_info(g, gr_eng_id, &engine_status); - if (nvgpu_engine_status_is_ctxsw_switch(&engine_status)) { - nvgpu_engine_status_get_next_ctx_id_type(&engine_status, - &preempt_id, &preempt_type); - } else { - return ret; - } - if ((preempt_id == ch->tsgid) && (preempt_type != 0U)) { - return ret; - } - fecsstat1 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g, - NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0); - if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE || - fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) { - /* preempt useless if FECS acked save and started restore */ - return ret; - } - - g->ops.fifo.preempt_trigger(g, preempt_id, preempt_type != 0U); -#ifdef TRACEPOINTS_ENABLED - trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, - engine_status.reg_data, fecsstat1, - g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g, - NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0), - nvgpu_readl(g, fifo_preempt_r())); -#endif - if (wait_preempt) { - if (g->ops.fifo.is_preempt_pending(g, preempt_id, - preempt_type) != 0) { - nvgpu_err(g, "fifo preempt timed out"); - /* - * This function does not care if preempt - * times out since it is here only to improve - * latency. If a timeout happens, it will be - * handled by other fifo handling code. - */ - } - } -#ifdef TRACEPOINTS_ENABLED - trace_gk20a_reschedule_preempted_next(ch->chid); -#endif - return ret; -} -#endif diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a_fusa.c b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a_fusa.c index 17d22fd43..4935a43a2 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a_fusa.c @@ -126,4 +126,81 @@ void gk20a_runlist_write_state(struct gk20a *g, u32 runlists_mask, nvgpu_writel(g, fifo_sched_disable_r(), reg_val); } +#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING /* trigger host preempt of GR pending load ctx if that ctx is not for ch */ +int gk20a_fifo_reschedule_preempt_next(struct nvgpu_channel *ch, + bool wait_preempt) +{ + struct gk20a *g = ch->g; + struct nvgpu_runlist_info *runlist = + g->fifo.runlist_info[ch->runlist_id]; + int ret = 0; + u32 gr_eng_id = 0; + u32 fecsstat0 = 0, fecsstat1 = 0; + u32 preempt_id; + u32 preempt_type = 0; + struct nvgpu_engine_status_info engine_status; + + if (1U != nvgpu_engine_get_ids( + g, &gr_eng_id, 1, NVGPU_ENGINE_GR)) { + return ret; + } + if ((runlist->eng_bitmask & BIT32(gr_eng_id)) == 0U) { + return ret; + } + + if (wait_preempt) { + u32 val = nvgpu_readl(g, fifo_preempt_r()); + + if ((val & fifo_preempt_pending_true_f()) != 0U) { + return ret; + } + } + + fecsstat0 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g, + NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0); + g->ops.engine_status.read_engine_status_info(g, gr_eng_id, + &engine_status); + if (nvgpu_engine_status_is_ctxsw_switch(&engine_status)) { + nvgpu_engine_status_get_next_ctx_id_type(&engine_status, + &preempt_id, &preempt_type); + } else { + return ret; + } + if ((preempt_id == ch->tsgid) && (preempt_type != 0U)) { + return ret; + } + fecsstat1 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g, + NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0); + if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE || + fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) { + /* preempt useless if FECS acked save and started restore */ + return ret; + } + + g->ops.fifo.preempt_trigger(g, preempt_id, preempt_type != 0U); +#ifdef TRACEPOINTS_ENABLED + trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, + engine_status.reg_data, fecsstat1, + g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g, + NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0), + nvgpu_readl(g, fifo_preempt_r())); +#endif + if (wait_preempt) { + if (g->ops.fifo.is_preempt_pending(g, preempt_id, + preempt_type) != 0) { + nvgpu_err(g, "fifo preempt timed out"); + /* + * This function does not care if preempt + * times out since it is here only to improve + * latency. If a timeout happens, it will be + * handled by other fifo handling code. + */ + } + } +#ifdef TRACEPOINTS_ENABLED + trace_gk20a_reschedule_preempted_next(ch->chid); +#endif + return ret; +} +#endif diff --git a/drivers/gpu/nvgpu/libnvgpu-drv_safe.export b/drivers/gpu/nvgpu/libnvgpu-drv_safe.export index 7c8de5f45..02cb79d58 100644 --- a/drivers/gpu/nvgpu/libnvgpu-drv_safe.export +++ b/drivers/gpu/nvgpu/libnvgpu-drv_safe.export @@ -10,8 +10,6 @@ gv11b_fb_write_mmu_fault_buffer_size find_first_bit find_first_zero_bit find_next_bit -gk20a_runlist_get_ch_entry -gk20a_runlist_get_tsg_entry gk20a_mm_fb_flush gk20a_ramin_alloc_size gk20a_ramin_base_shift diff --git a/userspace/units/fifo/runlist/nvgpu-runlist.c b/userspace/units/fifo/runlist/nvgpu-runlist.c index 3ecd468e1..1060cf585 100644 --- a/userspace/units/fifo/runlist/nvgpu-runlist.c +++ b/userspace/units/fifo/runlist/nvgpu-runlist.c @@ -31,6 +31,50 @@ #include "hal/fifo/runlist_ram_gk20a.h" #include "hal/fifo/tsg_gk20a.h" +#include "nvgpu/hw/gk20a/hw_ram_gk20a.h" + +#define RL_MAX_TIMESLICE_TIMEOUT ram_rl_entry_timeslice_timeout_v(U32_MAX) +#define RL_MAX_TIMESLICE_SCALE ram_rl_entry_timeslice_scale_v(U32_MAX) + +/* + * This helper function mimics the non-FUSA gk20a_runlist_get_tsg_entry + * function that has a simpler logic than other chips but is sufficient for + * runlist test purposes. + */ +static void generic_runlist_get_tsg_entry(struct nvgpu_tsg *tsg, + u32 *runlist, u32 timeslice) +{ + u32 timeout = timeslice; + u32 scale = 0U; + + while (timeout > RL_MAX_TIMESLICE_TIMEOUT) { + timeout >>= 1U; + scale++; + } + + if (scale > RL_MAX_TIMESLICE_SCALE) { + timeout = RL_MAX_TIMESLICE_TIMEOUT; + scale = RL_MAX_TIMESLICE_SCALE; + } + + runlist[0] = ram_rl_entry_id_f(tsg->tsgid) | + ram_rl_entry_type_tsg_f() | + ram_rl_entry_tsg_length_f(tsg->num_active_channels) | + ram_rl_entry_timeslice_scale_f(scale) | + ram_rl_entry_timeslice_timeout_f(timeout); + runlist[1] = 0; +} + +/* + * This helper function mimics the non-FUSA gk20a_runlist_get_ch_entry + * function that has a simpler logic than other chips but is sufficient for + * runlist test purposes. + */ +static void generic_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist) +{ + runlist[0] = ram_rl_entry_chid_f(ch->chid); + runlist[1] = 0; +} static void setup_fifo(struct gk20a *g, unsigned long *tsg_map, unsigned long *ch_map, struct nvgpu_tsg *tsgs, @@ -68,8 +112,8 @@ static void setup_fifo(struct gk20a *g, unsigned long *tsg_map, * entries are enough. The logic is same across chips. */ f->runlist_entry_size = 2 * sizeof(u32); - g->ops.runlist.get_tsg_entry = gk20a_runlist_get_tsg_entry; - g->ops.runlist.get_ch_entry = gk20a_runlist_get_ch_entry; + g->ops.runlist.get_tsg_entry = generic_runlist_get_tsg_entry; + g->ops.runlist.get_ch_entry = generic_runlist_get_ch_entry; g->ops.tsg.default_timeslice_us = nvgpu_tsg_default_timeslice_us; g->runlist_interleave = interleave;