mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: hold power ref for deterministic channels
To support deterministic channels even with platforms where railgating is supported, have each deterministic-marked channel hold a power reference during their lifetime, and skip taking power refs for jobs in submit path for those. Previously, railgating blocked deterministic submits in general because of gk20a_busy()/gk20a_idle() calls in submit path possibly taking time and more significantly because the gpu may need turning on which takes a nondeterministic and long amount of time. As an exception, gk20a_do_idle() can still block deterministic submits until gk20a_do_unidle() is called. Add a rwsem to guard this. VPR resize needs do_idle, which conflicts with deterministic channels' requirement to keep the GPU on. This is documented in the ioctl header now. Make NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING always set in the gpu characteristics now that it's supported. The only thing left now blocking NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is the sync framework. Make the channel debug dump show which channels are deterministic. Bug 200291300 Jira NVGPU-70 Change-Id: I47b6f3a8517cd6e4255f6ca2855e3dd912e4f5f3 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1483038 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
3c3c39dfe0
commit
7680fd689e
@@ -39,6 +39,7 @@ static void nvgpu_init_vars(struct gk20a *g)
|
|||||||
gk20a_init_gr(g);
|
gk20a_init_gr(g);
|
||||||
|
|
||||||
init_rwsem(&g->busy_lock);
|
init_rwsem(&g->busy_lock);
|
||||||
|
init_rwsem(&g->deterministic_busy);
|
||||||
|
|
||||||
nvgpu_spinlock_init(&g->mc_enable_lock);
|
nvgpu_spinlock_init(&g->mc_enable_lock);
|
||||||
|
|
||||||
|
|||||||
@@ -298,6 +298,12 @@ int __gk20a_do_idle(struct device *dev, bool force_reset)
|
|||||||
bool is_railgated;
|
bool is_railgated;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hold back deterministic submits and changes to deterministic
|
||||||
|
* channels - this must be outside the power busy locks.
|
||||||
|
*/
|
||||||
|
gk20a_channel_deterministic_idle(g);
|
||||||
|
|
||||||
/* acquire busy lock to block other busy() calls */
|
/* acquire busy lock to block other busy() calls */
|
||||||
down_write(&g->busy_lock);
|
down_write(&g->busy_lock);
|
||||||
|
|
||||||
@@ -403,6 +409,7 @@ fail_drop_usage_count:
|
|||||||
fail_timeout:
|
fail_timeout:
|
||||||
nvgpu_mutex_release(&platform->railgate_lock);
|
nvgpu_mutex_release(&platform->railgate_lock);
|
||||||
up_write(&g->busy_lock);
|
up_write(&g->busy_lock);
|
||||||
|
gk20a_channel_deterministic_unidle(g);
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -456,6 +463,8 @@ int __gk20a_do_unidle(struct device *dev)
|
|||||||
nvgpu_mutex_release(&platform->railgate_lock);
|
nvgpu_mutex_release(&platform->railgate_lock);
|
||||||
up_write(&g->busy_lock);
|
up_write(&g->busy_lock);
|
||||||
|
|
||||||
|
gk20a_channel_deterministic_unidle(g);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -575,8 +575,15 @@ unbind:
|
|||||||
g->ops.fifo.unbind_channel(ch);
|
g->ops.fifo.unbind_channel(ch);
|
||||||
g->ops.fifo.free_inst(g, ch);
|
g->ops.fifo.free_inst(g, ch);
|
||||||
|
|
||||||
|
/* put back the channel-wide submit ref from init */
|
||||||
|
if (ch->deterministic) {
|
||||||
|
down_read(&g->deterministic_busy);
|
||||||
|
ch->deterministic = false;
|
||||||
|
gk20a_idle(g);
|
||||||
|
up_read(&g->deterministic_busy);
|
||||||
|
}
|
||||||
|
|
||||||
ch->vpr = false;
|
ch->vpr = false;
|
||||||
ch->deterministic = false;
|
|
||||||
ch->vm = NULL;
|
ch->vm = NULL;
|
||||||
|
|
||||||
WARN_ON(ch->sync);
|
WARN_ON(ch->sync);
|
||||||
@@ -1228,22 +1235,42 @@ int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
|
|||||||
if (flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED)
|
if (flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED)
|
||||||
c->vpr = true;
|
c->vpr = true;
|
||||||
|
|
||||||
if (flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC)
|
if (flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC) {
|
||||||
|
down_read(&g->deterministic_busy);
|
||||||
|
/*
|
||||||
|
* Railgating isn't deterministic; instead of disallowing
|
||||||
|
* railgating globally, take a power refcount for this
|
||||||
|
* channel's lifetime. The gk20a_idle() pair for this happens
|
||||||
|
* when the channel gets freed.
|
||||||
|
*
|
||||||
|
* Deterministic flag and this busy must be atomic within the
|
||||||
|
* busy lock.
|
||||||
|
*/
|
||||||
|
err = gk20a_busy(g);
|
||||||
|
if (err) {
|
||||||
|
up_read(&g->deterministic_busy);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
c->deterministic = true;
|
c->deterministic = true;
|
||||||
|
up_read(&g->deterministic_busy);
|
||||||
|
}
|
||||||
|
|
||||||
/* an address space needs to have been bound at this point. */
|
/* an address space needs to have been bound at this point. */
|
||||||
if (!gk20a_channel_as_bound(c)) {
|
if (!gk20a_channel_as_bound(c)) {
|
||||||
nvgpu_err(g,
|
nvgpu_err(g,
|
||||||
"not bound to an address space at time of gpfifo"
|
"not bound to an address space at time of gpfifo"
|
||||||
" allocation.");
|
" allocation.");
|
||||||
return -EINVAL;
|
err = -EINVAL;
|
||||||
|
goto clean_up_idle;
|
||||||
}
|
}
|
||||||
ch_vm = c->vm;
|
ch_vm = c->vm;
|
||||||
|
|
||||||
if (c->gpfifo.mem.size) {
|
if (c->gpfifo.mem.size) {
|
||||||
nvgpu_err(g, "channel %d :"
|
nvgpu_err(g, "channel %d :"
|
||||||
"gpfifo already allocated", c->hw_chid);
|
"gpfifo already allocated", c->hw_chid);
|
||||||
return -EEXIST;
|
err = -EEXIST;
|
||||||
|
goto clean_up_idle;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = nvgpu_dma_alloc_map_sys(ch_vm,
|
err = nvgpu_dma_alloc_map_sys(ch_vm,
|
||||||
@@ -1336,6 +1363,13 @@ clean_up_unmap:
|
|||||||
nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem);
|
nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem);
|
||||||
clean_up:
|
clean_up:
|
||||||
memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
|
memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
|
||||||
|
clean_up_idle:
|
||||||
|
if (c->deterministic) {
|
||||||
|
down_read(&g->deterministic_busy);
|
||||||
|
gk20a_idle(g);
|
||||||
|
c->deterministic = false;
|
||||||
|
up_read(&g->deterministic_busy);
|
||||||
|
}
|
||||||
nvgpu_err(g, "fail");
|
nvgpu_err(g, "fail");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@@ -2089,7 +2123,13 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
|
|||||||
|
|
||||||
channel_gk20a_free_job(c, job);
|
channel_gk20a_free_job(c, job);
|
||||||
job_finished = 1;
|
job_finished = 1;
|
||||||
gk20a_idle(g);
|
|
||||||
|
/*
|
||||||
|
* Deterministic channels have a channel-wide power reference;
|
||||||
|
* for others, there's one per submit.
|
||||||
|
*/
|
||||||
|
if (!c->deterministic)
|
||||||
|
gk20a_idle(g);
|
||||||
|
|
||||||
if (!clean_all) {
|
if (!clean_all) {
|
||||||
/* Timeout isn't supported here so don't touch it. */
|
/* Timeout isn't supported here so don't touch it. */
|
||||||
@@ -2457,7 +2497,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
* Job tracking is necessary for any of the following conditions:
|
* Job tracking is necessary for any of the following conditions:
|
||||||
* - pre- or post-fence functionality
|
* - pre- or post-fence functionality
|
||||||
* - channel wdt
|
* - channel wdt
|
||||||
* - GPU rail-gating
|
* - GPU rail-gating with non-deterministic channels
|
||||||
* - buffer refcounting
|
* - buffer refcounting
|
||||||
*
|
*
|
||||||
* If none of the conditions are met, then job tracking is not
|
* If none of the conditions are met, then job tracking is not
|
||||||
@@ -2467,7 +2507,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
need_job_tracking = (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) ||
|
need_job_tracking = (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) ||
|
||||||
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) ||
|
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) ||
|
||||||
c->wdt_enabled ||
|
c->wdt_enabled ||
|
||||||
g->can_railgate ||
|
(g->can_railgate && !c->deterministic) ||
|
||||||
!skip_buffer_refcounting;
|
!skip_buffer_refcounting;
|
||||||
|
|
||||||
if (need_job_tracking) {
|
if (need_job_tracking) {
|
||||||
@@ -2495,7 +2535,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
* behavior of the clean-up operation non-deterministic
|
* behavior of the clean-up operation non-deterministic
|
||||||
* (should not be performed in the submit path)
|
* (should not be performed in the submit path)
|
||||||
* - channel wdt
|
* - channel wdt
|
||||||
* - GPU rail-gating
|
* - GPU rail-gating with non-deterministic channels
|
||||||
* - buffer refcounting
|
* - buffer refcounting
|
||||||
*
|
*
|
||||||
* If none of the conditions are met, then deferred clean-up
|
* If none of the conditions are met, then deferred clean-up
|
||||||
@@ -2505,7 +2545,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
need_deferred_cleanup = !c->deterministic ||
|
need_deferred_cleanup = !c->deterministic ||
|
||||||
need_sync_framework ||
|
need_sync_framework ||
|
||||||
c->wdt_enabled ||
|
c->wdt_enabled ||
|
||||||
g->can_railgate ||
|
(g->can_railgate &&
|
||||||
|
!c->deterministic) ||
|
||||||
!skip_buffer_refcounting;
|
!skip_buffer_refcounting;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -2515,12 +2556,20 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
if (c->deterministic && need_deferred_cleanup)
|
if (c->deterministic && need_deferred_cleanup)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* released by job cleanup via syncpt or sema interrupt */
|
if (!c->deterministic) {
|
||||||
err = gk20a_busy(g);
|
/*
|
||||||
if (err) {
|
* Get a power ref unless this is a deterministic
|
||||||
nvgpu_err(g, "failed to host gk20a to submit gpfifo, process %s",
|
* channel that holds them during the channel lifetime.
|
||||||
current->comm);
|
* This one is released by gk20a_channel_clean_up_jobs,
|
||||||
return err;
|
* via syncpt or sema interrupt, whichever is used.
|
||||||
|
*/
|
||||||
|
err = gk20a_busy(g);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_err(g,
|
||||||
|
"failed to host gk20a to submit gpfifo, process %s",
|
||||||
|
current->comm);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!need_deferred_cleanup) {
|
if (!need_deferred_cleanup) {
|
||||||
@@ -2529,6 +2578,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Grab access to HW to deal with do_idle */
|
||||||
|
if (c->deterministic)
|
||||||
|
down_read(&g->deterministic_busy);
|
||||||
|
|
||||||
trace_gk20a_channel_submit_gpfifo(g->name,
|
trace_gk20a_channel_submit_gpfifo(g->name,
|
||||||
c->hw_chid,
|
c->hw_chid,
|
||||||
num_entries,
|
num_entries,
|
||||||
@@ -2601,6 +2655,10 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
|
|
||||||
g->ops.fifo.userd_gp_put(g, c);
|
g->ops.fifo.userd_gp_put(g, c);
|
||||||
|
|
||||||
|
/* No hw access beyond this point */
|
||||||
|
if (c->deterministic)
|
||||||
|
up_read(&g->deterministic_busy);
|
||||||
|
|
||||||
trace_gk20a_channel_submitted_gpfifo(g->name,
|
trace_gk20a_channel_submitted_gpfifo(g->name,
|
||||||
c->hw_chid,
|
c->hw_chid,
|
||||||
num_entries,
|
num_entries,
|
||||||
@@ -2622,11 +2680,90 @@ clean_up:
|
|||||||
gk20a_dbg_fn("fail");
|
gk20a_dbg_fn("fail");
|
||||||
gk20a_fence_put(pre_fence);
|
gk20a_fence_put(pre_fence);
|
||||||
gk20a_fence_put(post_fence);
|
gk20a_fence_put(post_fence);
|
||||||
if (need_deferred_cleanup)
|
if (c->deterministic)
|
||||||
|
up_read(&g->deterministic_busy);
|
||||||
|
else if (need_deferred_cleanup)
|
||||||
gk20a_idle(g);
|
gk20a_idle(g);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Stop deterministic channel activity for do_idle() when power needs to go off
|
||||||
|
* momentarily but deterministic channels keep power refs for potentially a
|
||||||
|
* long time.
|
||||||
|
*
|
||||||
|
* Takes write access on g->deterministic_busy.
|
||||||
|
*
|
||||||
|
* Must be paired with gk20a_channel_deterministic_unidle().
|
||||||
|
*/
|
||||||
|
void gk20a_channel_deterministic_idle(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct fifo_gk20a *f = &g->fifo;
|
||||||
|
u32 chid;
|
||||||
|
|
||||||
|
/* Grab exclusive access to the hw to block new submits */
|
||||||
|
down_write(&g->deterministic_busy);
|
||||||
|
|
||||||
|
for (chid = 0; chid < f->num_channels; chid++) {
|
||||||
|
struct channel_gk20a *ch = &f->channel[chid];
|
||||||
|
|
||||||
|
if (!gk20a_channel_get(ch))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (ch->deterministic) {
|
||||||
|
/*
|
||||||
|
* Drop the power ref taken when setting deterministic
|
||||||
|
* flag. deterministic_unidle will put this and the
|
||||||
|
* channel ref back.
|
||||||
|
*
|
||||||
|
* Hold the channel ref: it must not get freed in
|
||||||
|
* between. A race could otherwise result in lost
|
||||||
|
* gk20a_busy() via unidle, and in unbalanced
|
||||||
|
* gk20a_idle() via closing the channel.
|
||||||
|
*/
|
||||||
|
gk20a_idle(g);
|
||||||
|
} else {
|
||||||
|
/* Not interesting, carry on. */
|
||||||
|
gk20a_channel_put(ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allow deterministic channel activity again for do_unidle().
|
||||||
|
*
|
||||||
|
* This releases write access on g->deterministic_busy.
|
||||||
|
*/
|
||||||
|
void gk20a_channel_deterministic_unidle(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct fifo_gk20a *f = &g->fifo;
|
||||||
|
u32 chid;
|
||||||
|
|
||||||
|
for (chid = 0; chid < f->num_channels; chid++) {
|
||||||
|
struct channel_gk20a *ch = &f->channel[chid];
|
||||||
|
|
||||||
|
if (!gk20a_channel_get(ch))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Deterministic state changes inside deterministic_busy lock,
|
||||||
|
* which we took in deterministic_idle.
|
||||||
|
*/
|
||||||
|
if (ch->deterministic) {
|
||||||
|
if (gk20a_busy(g))
|
||||||
|
nvgpu_err(g, "cannot busy() again!");
|
||||||
|
/* Took this in idle() */
|
||||||
|
gk20a_channel_put(ch);
|
||||||
|
}
|
||||||
|
|
||||||
|
gk20a_channel_put(ch);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Release submits, new deterministic channels and frees */
|
||||||
|
up_write(&g->deterministic_busy);
|
||||||
|
}
|
||||||
|
|
||||||
int gk20a_init_channel_support(struct gk20a *g, u32 chid)
|
int gk20a_init_channel_support(struct gk20a *g, u32 chid)
|
||||||
{
|
{
|
||||||
struct channel_gk20a *c = g->fifo.channel+chid;
|
struct channel_gk20a *c = g->fifo.channel+chid;
|
||||||
|
|||||||
@@ -328,6 +328,9 @@ int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);
|
|||||||
int gk20a_channel_suspend(struct gk20a *g);
|
int gk20a_channel_suspend(struct gk20a *g);
|
||||||
int gk20a_channel_resume(struct gk20a *g);
|
int gk20a_channel_resume(struct gk20a *g);
|
||||||
|
|
||||||
|
void gk20a_channel_deterministic_idle(struct gk20a *g);
|
||||||
|
void gk20a_channel_deterministic_unidle(struct gk20a *g);
|
||||||
|
|
||||||
int nvgpu_channel_worker_init(struct gk20a *g);
|
int nvgpu_channel_worker_init(struct gk20a *g);
|
||||||
void nvgpu_channel_worker_deinit(struct gk20a *g);
|
void nvgpu_channel_worker_deinit(struct gk20a *g);
|
||||||
|
|
||||||
|
|||||||
@@ -3494,10 +3494,11 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
|
|||||||
syncpointa = inst_mem[ram_fc_syncpointa_w()];
|
syncpointa = inst_mem[ram_fc_syncpointa_w()];
|
||||||
syncpointb = inst_mem[ram_fc_syncpointb_w()];
|
syncpointb = inst_mem[ram_fc_syncpointb_w()];
|
||||||
|
|
||||||
gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid,
|
gk20a_debug_output(o, "%d-%s, pid %d, refs %d%s: ", hw_chid,
|
||||||
g->name,
|
g->name,
|
||||||
ch_state->pid,
|
ch_state->pid,
|
||||||
ch_state->refs);
|
ch_state->refs,
|
||||||
|
ch_state->deterministic ? ", deterministic" : "");
|
||||||
gk20a_debug_output(o, "channel status: %s in use %s %s\n",
|
gk20a_debug_output(o, "channel status: %s in use %s %s\n",
|
||||||
ccsr_channel_enable_v(channel) ? "" : "not",
|
ccsr_channel_enable_v(channel) ? "" : "not",
|
||||||
gk20a_decode_ccsr_chan_status(status),
|
gk20a_decode_ccsr_chan_status(status),
|
||||||
@@ -3576,6 +3577,7 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
|
|||||||
|
|
||||||
ch_state[chid]->pid = ch->pid;
|
ch_state[chid]->pid = ch->pid;
|
||||||
ch_state[chid]->refs = atomic_read(&ch->ref_count);
|
ch_state[chid]->refs = atomic_read(&ch->ref_count);
|
||||||
|
ch_state[chid]->deterministic = ch->deterministic;
|
||||||
nvgpu_mem_rd_n(g, &ch->inst_block, 0,
|
nvgpu_mem_rd_n(g, &ch->inst_block, 0,
|
||||||
&ch_state[chid]->inst_block[0],
|
&ch_state[chid]->inst_block[0],
|
||||||
ram_in_alloc_size_v());
|
ram_in_alloc_size_v());
|
||||||
|
|||||||
@@ -216,6 +216,7 @@ static inline const char *gk20a_fifo_interleave_level_name(u32 interleave_level)
|
|||||||
struct ch_state {
|
struct ch_state {
|
||||||
int pid;
|
int pid;
|
||||||
int refs;
|
int refs;
|
||||||
|
bool deterministic;
|
||||||
u32 inst_block[0];
|
u32 inst_block[0];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -456,22 +456,19 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
|
|||||||
gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
|
gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Railgating needs job tracking which prevents fast submits. They're
|
* Fast submits are supported as long as the user doesn't request
|
||||||
* supported otherwise, provided that the user doesn't request anything
|
* anything that depends on job tracking. (Here, fast means strictly no
|
||||||
* that depends on job tracking. (Here, fast means strictly no
|
|
||||||
* metadata, just the gpfifo contents are copied and gp_put updated).
|
* metadata, just the gpfifo contents are copied and gp_put updated).
|
||||||
*/
|
*/
|
||||||
if (!g->can_railgate)
|
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING;
|
||||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Railgating and sync framework require deferred job cleanup which
|
* Sync framework requires deferred job cleanup, wrapping syncs in FDs,
|
||||||
* prevents deterministic submits. They're supported otherwise,
|
* and other heavy stuff, which prevents deterministic submits. This is
|
||||||
* provided that the user doesn't request anything that depends on
|
* supported otherwise, provided that the user doesn't request anything
|
||||||
* deferred cleanup.
|
* that depends on deferred cleanup.
|
||||||
*/
|
*/
|
||||||
if (!g->can_railgate
|
if (!gk20a_channel_sync_needs_sync_framework(g))
|
||||||
&& !gk20a_channel_sync_needs_sync_framework(g))
|
|
||||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL;
|
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL;
|
||||||
|
|
||||||
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
|
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
|
||||||
|
|||||||
@@ -1025,6 +1025,12 @@ struct gk20a {
|
|||||||
u32 log_trace;
|
u32 log_trace;
|
||||||
|
|
||||||
struct rw_semaphore busy_lock;
|
struct rw_semaphore busy_lock;
|
||||||
|
/*
|
||||||
|
* Guards access to hardware when usual gk20a_{busy,idle} are skipped
|
||||||
|
* for submits and held for channel lifetime but dropped for an ongoing
|
||||||
|
* gk20a_do_idle().
|
||||||
|
*/
|
||||||
|
struct rw_semaphore deterministic_busy;
|
||||||
|
|
||||||
struct nvgpu_falcon pmu_flcn;
|
struct nvgpu_falcon pmu_flcn;
|
||||||
struct nvgpu_falcon sec2_flcn;
|
struct nvgpu_falcon sec2_flcn;
|
||||||
|
|||||||
@@ -1359,6 +1359,10 @@ struct nvgpu_alloc_gpfifo_ex_args {
|
|||||||
/*
|
/*
|
||||||
* Channel shall exhibit deterministic behavior in the submit path.
|
* Channel shall exhibit deterministic behavior in the submit path.
|
||||||
*
|
*
|
||||||
|
* NOTE: as an exception, VPR resize may still cause the GPU to reset at any
|
||||||
|
* time, which is not deterministic behavior. If this is not acceptable, the
|
||||||
|
* user has to make sure that VPR resize does not occur.
|
||||||
|
*
|
||||||
* With this flag, any submits with in-kernel job tracking also require that
|
* With this flag, any submits with in-kernel job tracking also require that
|
||||||
* num_inflight_jobs is nonzero, and additionally that
|
* num_inflight_jobs is nonzero, and additionally that
|
||||||
* NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is found in gpu
|
* NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is found in gpu
|
||||||
|
|||||||
Reference in New Issue
Block a user