gpu: nvgpu: build flag for deterministic channel

Add CONFIG_NVGPU_DETERMINISTIC_CHANNELS and fix
preprocessor #ifdefs to allow compiling kernel mode
submit without deterministic feature enabled.

Jira NVGPU-4661

Change-Id: I4aa678715824e8981d39bd8db0c5ae61ef3a675c
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2310325
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Thomas Fleury
2020-03-10 15:23:40 -04:00
committed by Alex Waterman
parent cc043e1506
commit 8ec4395e82
10 changed files with 67 additions and 47 deletions

View File

@@ -22,6 +22,7 @@ ccflags-y += -DCONFIG_NVGPU_DEBUGGER
ccflags-y += -DCONFIG_NVGPU_ENGINE_RESET
endif
ccflags-y += -DCONFIG_NVGPU_DETERMINISTIC_CHANNELS
ccflags-y += -DCONFIG_NVGPU_TPC_POWERGATE
ccflags-y += -DCONFIG_NVGPU_ACR_LEGACY
ccflags-y += -DCONFIG_NVGPU_ENGINE_QUEUE

View File

@@ -189,6 +189,7 @@ NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CLK_ARB
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FALCON_NON_FUSA
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_IOCTL_NON_FUSA
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_DETERMINISTIC_CHANNELS
CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT := 1
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT

View File

@@ -521,15 +521,19 @@ bool nvgpu_channel_joblist_is_empty(struct nvgpu_channel *c)
bool nvgpu_channel_is_prealloc_enabled(struct nvgpu_channel *c)
{
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
bool pre_alloc_enabled = c->joblist.pre_alloc.enabled;
nvgpu_smp_rmb();
return pre_alloc_enabled;
#else
return false;
#endif
}
static int channel_prealloc_resources(struct nvgpu_channel *ch,
u32 num_jobs)
static int channel_prealloc_resources(struct nvgpu_channel *ch, u32 num_jobs)
{
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
unsigned int i;
int err;
size_t size;
@@ -603,10 +607,14 @@ clean_up_joblist:
clean_up:
(void) memset(&ch->joblist.pre_alloc, 0, sizeof(ch->joblist.pre_alloc));
return err;
#else
return -ENOSYS;
#endif
}
static void channel_free_prealloc_resources(struct nvgpu_channel *c)
{
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs[0].wait_cmd);
nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
nvgpu_fence_pool_free(c);
@@ -618,6 +626,7 @@ static void channel_free_prealloc_resources(struct nvgpu_channel *c)
*/
nvgpu_smp_wmb();
c->joblist.pre_alloc.enabled = false;
#endif
}
int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch)
@@ -742,7 +751,8 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c,
goto clean_up_sync;
}
if (c->deterministic && args->num_inflight_jobs != 0U) {
if (nvgpu_channel_is_deterministic(c) &&
args->num_inflight_jobs != 0U) {
err = channel_prealloc_resources(c,
args->num_inflight_jobs);
if (err != 0) {
@@ -765,7 +775,8 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c,
clean_up_priv_cmd:
channel_free_priv_cmd_q(c);
clean_up_prealloc:
if (c->deterministic && args->num_inflight_jobs != 0U) {
if (nvgpu_channel_is_deterministic(c) &&
args->num_inflight_jobs != 0U) {
channel_free_prealloc_resources(c);
}
clean_up_sync:
@@ -1433,7 +1444,7 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c,
* Deterministic channels have a channel-wide power reference;
* for others, there's one per submit.
*/
if (!c->deterministic) {
if (!nvgpu_channel_is_deterministic(c)) {
gk20a_idle(g);
}
@@ -1696,7 +1707,7 @@ static void channel_free_wait_for_refs(struct nvgpu_channel *ch,
}
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
static void channel_free_put_deterministic_ref_from_init(
struct nvgpu_channel *ch)
{
@@ -1834,7 +1845,7 @@ unbind:
g->ops.channel.unbind(ch);
g->ops.channel.free_inst(g, ch);
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
channel_free_put_deterministic_ref_from_init(ch);
#endif
@@ -2276,7 +2287,7 @@ int nvgpu_channel_setup_bind(struct nvgpu_channel *c,
c->vpr = false;
#endif
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) != 0U) {
nvgpu_rwsem_down_read(&g->deterministic_busy);
/*
@@ -2322,8 +2333,8 @@ int nvgpu_channel_setup_bind(struct nvgpu_channel *c,
return 0;
clean_up_idle:
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
if (c->deterministic) {
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
if (nvgpu_channel_is_deterministic(c)) {
nvgpu_rwsem_down_read(&g->deterministic_busy);
gk20a_idle(g);
c->deterministic = false;
@@ -2410,7 +2421,7 @@ void nvgpu_channel_sw_quiesce(struct gk20a *g)
}
#endif
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
/*
* Stop deterministic channel activity for do_idle() when power needs to go off
* momentarily but deterministic channels keep power refs for potentially a
@@ -2769,7 +2780,7 @@ void nvgpu_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
* user-space managed
* semaphore.
*/
if (!c->deterministic) {
if (!nvgpu_channel_is_deterministic(c)) {
nvgpu_channel_update(c);
}
#endif
@@ -2881,9 +2892,7 @@ void nvgpu_channel_debug_dump_all(struct gk20a *g,
info->tsgid = ch->tsgid;
info->pid = ch->pid;
info->refs = nvgpu_atomic_read(&ch->ref_count);
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
info->deterministic = ch->deterministic;
#endif
info->deterministic = nvgpu_channel_is_deterministic(ch);
#ifdef CONFIG_NVGPU_SW_SEMAPHORE
if (hw_sema != NULL) {

View File

@@ -83,7 +83,8 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
* submission when user requested and the wait hasn't expired.
*/
if (flag_fence_wait) {
u32 max_wait_cmds = c->deterministic ? 1U : 0U;
u32 max_wait_cmds = nvgpu_channel_is_deterministic(c) ?
1U : 0U;
if (!pre_alloc_enabled) {
job->wait_cmd = nvgpu_kzalloc(g,
@@ -419,7 +420,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
flag_fence_get ||
((nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ||
nvgpu_is_vpr_resize_enabled()) &&
!c->deterministic) ||
!nvgpu_channel_is_deterministic(c)) ||
!skip_buffer_refcounting);
#ifdef CONFIG_NVGPU_CHANNEL_WDT
@@ -434,7 +435,8 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
* job tracking is required, the channel must have
* pre-allocated resources. Otherwise, we fail the submit here
*/
if (c->deterministic && !nvgpu_channel_is_prealloc_enabled(c)) {
if (nvgpu_channel_is_deterministic(c) &&
!nvgpu_channel_is_prealloc_enabled(c)) {
return -EINVAL;
}
@@ -456,7 +458,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
* is not required, and we clean-up one job-tracking
* resource in the submit path.
*/
need_deferred_cleanup = !c->deterministic ||
need_deferred_cleanup = !nvgpu_channel_is_deterministic(c) ||
need_sync_framework ||
!skip_buffer_refcounting;
@@ -468,11 +470,11 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
* For deterministic channels, we don't allow deferred clean_up
* processing to occur. In cases we hit this, we fail the submit
*/
if (c->deterministic && need_deferred_cleanup) {
if (nvgpu_channel_is_deterministic(c) && need_deferred_cleanup) {
return -EINVAL;
}
if (!c->deterministic) {
if (!nvgpu_channel_is_deterministic(c)) {
/*
* Get a power ref unless this is a deterministic
* channel that holds them during the channel lifetime.
@@ -495,6 +497,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
}
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
/* Grab access to HW to deal with do_idle */
if (c->deterministic) {
nvgpu_rwsem_down_read(&g->deterministic_busy);
@@ -510,6 +513,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
err = -EINVAL;
goto clean_up;
}
#endif
#ifdef CONFIG_NVGPU_TRACE
trace_gk20a_channel_submit_gpfifo(g->name,
@@ -592,10 +596,12 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
g->ops.userd.gp_put(g, c);
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
/* No hw access beyond this point */
if (c->deterministic) {
nvgpu_rwsem_up_read(&g->deterministic_busy);
}
#endif
#ifdef CONFIG_NVGPU_TRACE
trace_gk20a_channel_submitted_gpfifo(g->name,
@@ -619,12 +625,14 @@ clean_up_job:
clean_up:
nvgpu_log_fn(g, "fail");
nvgpu_fence_put(post_fence);
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
if (c->deterministic) {
nvgpu_rwsem_up_read(&g->deterministic_busy);
} else {
if (need_deferred_cleanup) {
gk20a_idle(g);
}
return err;
}
#endif
if (need_deferred_cleanup) {
gk20a_idle(g);
}
return err;

View File

@@ -85,21 +85,13 @@ void gv11b_channel_debug_dump(struct gk20a *g,
struct nvgpu_debug_context *o,
struct nvgpu_channel_dump_info *info)
{
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs: %d%s: ",
#else
gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs: %d: ",
#endif
info->chid,
g->name,
info->tsgid,
info->pid,
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
info->refs,
info->deterministic ? ", deterministic" : "");
#else
info->refs);
#endif
gk20a_debug_output(o, "channel status: %s in use %s %s\n",
info->hw_state.enabled ? "" : "not",
info->hw_state.status_string,

View File

@@ -183,10 +183,8 @@ struct nvgpu_channel_dump_info {
int pid;
/** Number of references to this channel. */
int refs;
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
/** Channel uses deterministic submit (kernel submit only). */
bool deterministic;
#endif
/** Channel H/W state */
struct nvgpu_channel_hw_state hw_state;
/** Snaphsot of channel instance fields. */
@@ -562,7 +560,7 @@ struct nvgpu_channel {
bool referenceable;
/** True if VPR support was requested during setup bind */
bool vpr;
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
/**
* Channel shall exhibit deterministic behavior in the submit path.
* Submit latency shall be consistent (and low). Submits that may cause
@@ -570,9 +568,9 @@ struct nvgpu_channel {
* sync fds or mapped buffer refcounting are not deterministic).
*/
bool deterministic;
#endif
/** Deterministic, but explicitly idle and submits disallowed. */
bool deterministic_railgate_allowed;
#endif
/** Channel uses Color Decompression Engine. */
bool cde;
/**
@@ -667,6 +665,15 @@ void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g);
#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
static inline bool nvgpu_channel_is_deterministic(struct nvgpu_channel *c)
{
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
return c->deterministic;
#else
return false;
#endif
}
/**
* @brief Get channel pointer from its node in free channels list.
*

View File

@@ -710,12 +710,14 @@ struct gk20a {
/** Stored HW version info */
struct nvgpu_gpu_params params;
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
/**
* Guards access to hardware when usual gk20a_{busy,idle} are skipped
* for submits and held for channel lifetime but dropped for an ongoing
* gk20a_do_idle().
*/
struct nvgpu_rwsem deterministic_busy;
#endif
struct nvgpu_netlist_vars *netlist_vars;
bool netlist_valid;

View File

@@ -1617,6 +1617,7 @@ static int nvgpu_gpu_set_deterministic_ch_railgate(struct nvgpu_channel *ch,
return err;
}
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
static int nvgpu_gpu_set_deterministic_ch(struct nvgpu_channel *ch, u32 flags)
{
if (!ch->deterministic)
@@ -1688,6 +1689,7 @@ out:
args->num_channels = i;
return err;
}
#endif
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{

View File

@@ -284,9 +284,7 @@ int test_gv11b_channel_debug_dump(struct unit_module *m,
info->tsgid = ch->tsgid;
info->pid = ch->pid;
info->refs = nvgpu_atomic_read(&ch->ref_count);
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
info->deterministic = (branches & F_CHANNEL_DUMP_DETERMINISTIC) != 0;
#endif
info->hw_state.enabled = (branches & F_CHANNEL_DUMP_ENABLED) != 0;
info->hw_state.busy = (branches & F_CHANNEL_DUMP_BUSY) != 0;
info->hw_state.status_string = "fake";

View File

@@ -497,7 +497,7 @@ int test_channel_close(struct unit_module *m, struct gk20a *g, void *vargs)
ch->vm = NULL;
}
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
if (branches & F_CHANNEL_CLOSE_DETERMINISTIC) {
/* Compensate for atomic dec in gk20a_idle() */
nvgpu_atomic_set(&g->usage_count, 1);
@@ -603,10 +603,10 @@ int test_channel_close(struct unit_module *m, struct gk20a *g, void *vargs)
ch->subctx = NULL;
}
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
ch->deterministic = false;
#endif
ch->deterministic_railgate_allowed = false;
#endif
unit_assert(ch->usermode_submit_enabled == false, goto done);
/* we took an extra reference to avoid nvgpu_vm_remove_ref */
@@ -901,7 +901,7 @@ int test_channel_setup_bind(struct unit_module *m, struct gk20a *g, void *vargs)
nvgpu_dma_free(g, &ch->usermode_userd);
nvgpu_dma_free(g, &ch->usermode_gpfifo);
ch->userd_iova = 0U;
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
ch->deterministic = false;
#endif
nvgpu_atomic_set(&ch->bound, false);
@@ -1315,7 +1315,7 @@ done:
#define F_CHANNEL_DETERMINISTIC_UNIDLE_GK20ABUSY_FAIL BIT(2)
#define F_CHANNEL_DETERMINISTIC_IDLE_LAST BIT(3)
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
static const char *f_channel_deterministic_idle_unidle[] = {
"deterministic_channel",
"determinstic_railgate_allowed",
@@ -1758,7 +1758,7 @@ int test_channel_semaphore_wakeup(struct unit_module *m,
unit_verbose(m, "%s branches=%s\n", __func__,
branches_str(branches, f_channel_semaphore_wakeup));
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
if (branches & F_CHANNEL_SEMAPHORRE_WAKEUP_DETERMINISTIC_CH) {
ch->deterministic = true;
}
@@ -1777,7 +1777,7 @@ int test_channel_semaphore_wakeup(struct unit_module *m,
nvgpu_channel_semaphore_wakeup(g, false);
unit_assert(stub[0].count == (global_count - 1U), goto done);
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
ch->deterministic = false;
#endif
}
@@ -2007,7 +2007,7 @@ struct unit_module_test nvgpu_channel_tests[] = {
UNIT_TEST(ch_abort, test_channel_abort, &unit_ctx, 0),
UNIT_TEST(mark_error, test_channel_mark_error, &unit_ctx, 0),
UNIT_TEST(sw_quiesce, test_channel_sw_quiesce, &unit_ctx, 0),
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
UNIT_TEST(idle_unidle, test_channel_deterministic_idle_unidle, &unit_ctx, 0),
#endif
UNIT_TEST(suspend_resume, test_channel_suspend_resume_serviceable_chs, &unit_ctx, 0),