gpu: nvgpu: hal: remove non-FUSA runlist HALs from FUSA build

A number of gk20a_runlist HALs are not used in FUSA builds and are
removed by this patch. It also removes dependencies on those HALs
in the runlist unit test.

JIRA NVGPU-3690

Change-Id: If00bdedd59cf12e91609dd075c9732c6e80a05ff
Signed-off-by: Nicolas Benech <nbenech@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2174743
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Nicolas Benech
2019-08-13 18:15:39 -04:00
committed by mobile promotions
parent d0e7ada592
commit 3bc55a1bf2
6 changed files with 127 additions and 86 deletions

View File

@@ -272,9 +272,7 @@ nvgpu-y += \
hal/fifo/ramfc_tu104.o \
hal/fifo/ramin_gk20a.o \
hal/fifo/ramin_tu104.o \
hal/fifo/runlist_ram_gk20a.o \
hal/fifo/runlist_ram_tu104.o \
hal/fifo/runlist_fifo_gk20a.o \
hal/fifo/runlist_fifo_gv11b.o \
hal/fifo/runlist_fifo_gv100.o \
hal/fifo/runlist_fifo_tu104.o \
@@ -673,6 +671,8 @@ nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \
hal/fifo/mmu_fault_gk20a.o \
hal/fifo/mmu_fault_gm20b.o \
hal/fifo/mmu_fault_gp10b.o \
hal/fifo/runlist_fifo_gk20a.o \
hal/fifo/runlist_ram_gk20a.o \
hal/gr/config/gr_config_gm20b.o \
hal/gr/ctxsw_prog/ctxsw_prog_gm20b.o \
hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.o \

View File

@@ -145,9 +145,7 @@ srcs += common/utils/assert.c \
hal/init/hal_gv11b_litter.c \
hal/init/hal_init.c \
hal/power_features/cg/gv11b_gating_reglist.c \
hal/fifo/runlist_fifo_gk20a.c \
hal/fifo/runlist_fifo_gv11b.c \
hal/fifo/runlist_ram_gk20a.c \
hal/fifo/userd_gk20a.c \
hal/tpc/tpc_gv11b.c \
hal/sync/syncpt_cmdbuf_gv11b.c
@@ -300,6 +298,8 @@ srcs += hal/init/hal_gp10b.c \
hal/fifo/mmu_fault_gm20b.c \
hal/fifo/mmu_fault_gp10b.c \
hal/fifo/ctxsw_timeout_gk20a.c \
hal/fifo/runlist_fifo_gk20a.c \
hal/fifo/runlist_ram_gk20a.c \
hal/netlist/netlist_gm20b.c \
hal/netlist/netlist_gp10b.c \
hal/sync/syncpt_cmdbuf_gk20a.c \

View File

@@ -46,81 +46,3 @@ u32 gk20a_runlist_count_max(void)
{
return fifo_eng_runlist_base__size_1_v();
}
#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
/* trigger host preempt of GR pending load ctx if that ctx is not for ch */
int gk20a_fifo_reschedule_preempt_next(struct nvgpu_channel *ch,
bool wait_preempt)
{
struct gk20a *g = ch->g;
struct nvgpu_runlist_info *runlist =
g->fifo.runlist_info[ch->runlist_id];
int ret = 0;
u32 gr_eng_id = 0;
u32 fecsstat0 = 0, fecsstat1 = 0;
u32 preempt_id;
u32 preempt_type = 0;
struct nvgpu_engine_status_info engine_status;
if (1U != nvgpu_engine_get_ids(
g, &gr_eng_id, 1, NVGPU_ENGINE_GR)) {
return ret;
}
if ((runlist->eng_bitmask & BIT32(gr_eng_id)) == 0U) {
return ret;
}
if (wait_preempt) {
u32 val = nvgpu_readl(g, fifo_preempt_r());
if ((val & fifo_preempt_pending_true_f()) != 0U) {
return ret;
}
}
fecsstat0 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0);
g->ops.engine_status.read_engine_status_info(g, gr_eng_id, &engine_status);
if (nvgpu_engine_status_is_ctxsw_switch(&engine_status)) {
nvgpu_engine_status_get_next_ctx_id_type(&engine_status,
&preempt_id, &preempt_type);
} else {
return ret;
}
if ((preempt_id == ch->tsgid) && (preempt_type != 0U)) {
return ret;
}
fecsstat1 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0);
if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE ||
fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) {
/* preempt useless if FECS acked save and started restore */
return ret;
}
g->ops.fifo.preempt_trigger(g, preempt_id, preempt_type != 0U);
#ifdef TRACEPOINTS_ENABLED
trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0,
engine_status.reg_data, fecsstat1,
g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0),
nvgpu_readl(g, fifo_preempt_r()));
#endif
if (wait_preempt) {
if (g->ops.fifo.is_preempt_pending(g, preempt_id,
preempt_type) != 0) {
nvgpu_err(g, "fifo preempt timed out");
/*
* This function does not care if preempt
* times out since it is here only to improve
* latency. If a timeout happens, it will be
* handled by other fifo handling code.
*/
}
}
#ifdef TRACEPOINTS_ENABLED
trace_gk20a_reschedule_preempted_next(ch->chid);
#endif
return ret;
}
#endif

View File

@@ -126,4 +126,81 @@ void gk20a_runlist_write_state(struct gk20a *g, u32 runlists_mask,
nvgpu_writel(g, fifo_sched_disable_r(), reg_val);
}
#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
/* trigger host preempt of GR pending load ctx if that ctx is not for ch */
int gk20a_fifo_reschedule_preempt_next(struct nvgpu_channel *ch,
bool wait_preempt)
{
struct gk20a *g = ch->g;
struct nvgpu_runlist_info *runlist =
g->fifo.runlist_info[ch->runlist_id];
int ret = 0;
u32 gr_eng_id = 0;
u32 fecsstat0 = 0, fecsstat1 = 0;
u32 preempt_id;
u32 preempt_type = 0;
struct nvgpu_engine_status_info engine_status;
if (1U != nvgpu_engine_get_ids(
g, &gr_eng_id, 1, NVGPU_ENGINE_GR)) {
return ret;
}
if ((runlist->eng_bitmask & BIT32(gr_eng_id)) == 0U) {
return ret;
}
if (wait_preempt) {
u32 val = nvgpu_readl(g, fifo_preempt_r());
if ((val & fifo_preempt_pending_true_f()) != 0U) {
return ret;
}
}
fecsstat0 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0);
g->ops.engine_status.read_engine_status_info(g, gr_eng_id,
&engine_status);
if (nvgpu_engine_status_is_ctxsw_switch(&engine_status)) {
nvgpu_engine_status_get_next_ctx_id_type(&engine_status,
&preempt_id, &preempt_type);
} else {
return ret;
}
if ((preempt_id == ch->tsgid) && (preempt_type != 0U)) {
return ret;
}
fecsstat1 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0);
if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE ||
fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) {
/* preempt useless if FECS acked save and started restore */
return ret;
}
g->ops.fifo.preempt_trigger(g, preempt_id, preempt_type != 0U);
#ifdef TRACEPOINTS_ENABLED
trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0,
engine_status.reg_data, fecsstat1,
g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0),
nvgpu_readl(g, fifo_preempt_r()));
#endif
if (wait_preempt) {
if (g->ops.fifo.is_preempt_pending(g, preempt_id,
preempt_type) != 0) {
nvgpu_err(g, "fifo preempt timed out");
/*
* This function does not care if preempt
* times out since it is here only to improve
* latency. If a timeout happens, it will be
* handled by other fifo handling code.
*/
}
}
#ifdef TRACEPOINTS_ENABLED
trace_gk20a_reschedule_preempted_next(ch->chid);
#endif
return ret;
}
#endif

View File

@@ -10,8 +10,6 @@ gv11b_fb_write_mmu_fault_buffer_size
find_first_bit
find_first_zero_bit
find_next_bit
gk20a_runlist_get_ch_entry
gk20a_runlist_get_tsg_entry
gk20a_mm_fb_flush
gk20a_ramin_alloc_size
gk20a_ramin_base_shift

View File

@@ -31,6 +31,50 @@
#include "hal/fifo/runlist_ram_gk20a.h"
#include "hal/fifo/tsg_gk20a.h"
#include "nvgpu/hw/gk20a/hw_ram_gk20a.h"
#define RL_MAX_TIMESLICE_TIMEOUT ram_rl_entry_timeslice_timeout_v(U32_MAX)
#define RL_MAX_TIMESLICE_SCALE ram_rl_entry_timeslice_scale_v(U32_MAX)
/*
* This helper function mimics the non-FUSA gk20a_runlist_get_tsg_entry
* function that has a simpler logic than other chips but is sufficient for
* runlist test purposes.
*/
static void generic_runlist_get_tsg_entry(struct nvgpu_tsg *tsg,
u32 *runlist, u32 timeslice)
{
u32 timeout = timeslice;
u32 scale = 0U;
while (timeout > RL_MAX_TIMESLICE_TIMEOUT) {
timeout >>= 1U;
scale++;
}
if (scale > RL_MAX_TIMESLICE_SCALE) {
timeout = RL_MAX_TIMESLICE_TIMEOUT;
scale = RL_MAX_TIMESLICE_SCALE;
}
runlist[0] = ram_rl_entry_id_f(tsg->tsgid) |
ram_rl_entry_type_tsg_f() |
ram_rl_entry_tsg_length_f(tsg->num_active_channels) |
ram_rl_entry_timeslice_scale_f(scale) |
ram_rl_entry_timeslice_timeout_f(timeout);
runlist[1] = 0;
}
/*
* This helper function mimics the non-FUSA gk20a_runlist_get_ch_entry
* function that has a simpler logic than other chips but is sufficient for
* runlist test purposes.
*/
static void generic_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist)
{
runlist[0] = ram_rl_entry_chid_f(ch->chid);
runlist[1] = 0;
}
static void setup_fifo(struct gk20a *g, unsigned long *tsg_map,
unsigned long *ch_map, struct nvgpu_tsg *tsgs,
@@ -68,8 +112,8 @@ static void setup_fifo(struct gk20a *g, unsigned long *tsg_map,
* entries are enough. The logic is same across chips.
*/
f->runlist_entry_size = 2 * sizeof(u32);
g->ops.runlist.get_tsg_entry = gk20a_runlist_get_tsg_entry;
g->ops.runlist.get_ch_entry = gk20a_runlist_get_ch_entry;
g->ops.runlist.get_tsg_entry = generic_runlist_get_tsg_entry;
g->ops.runlist.get_ch_entry = generic_runlist_get_ch_entry;
g->ops.tsg.default_timeslice_us = nvgpu_tsg_default_timeslice_us;
g->runlist_interleave = interleave;