diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index ded3bcd86..5947a628d 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -202,11 +202,21 @@ void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch) nvgpu_mutex_release(&ch->joblist.cleanup_lock); - /* - * When closing the channel, this scheduled update holds one ref which - * is waited for before advancing with freeing. + /* The update to flush the job queue is only needed to process + * nondeterministic resources and ch wdt timeouts. Any others are + * either nonexistent or preallocated from pools that can be killed in + * one go on deterministic channels; take a look at what would happen + * in nvgpu_channel_clean_up_deterministic_job() and what + * nvgpu_submit_deterministic() requires. */ - nvgpu_channel_update(ch); + if (!nvgpu_channel_is_deterministic(ch)) { + /* + * When closing the channel, this scheduled update holds one + * channel ref which is waited for before advancing with + * freeing. + */ + nvgpu_channel_update(ch); + } } static void channel_kernelmode_deinit(struct nvgpu_channel *ch) @@ -529,7 +539,7 @@ static void nvgpu_channel_worker_poll_wakeup_process_item( nvgpu_log_fn(ch->g, " "); - nvgpu_channel_clean_up_jobs(ch, true); + nvgpu_channel_clean_up_jobs(ch); /* ref taken when enqueued */ nvgpu_channel_put(ch); @@ -645,16 +655,14 @@ err_put_buffers: /** * Clean up job resources for further jobs to use. - * @clean_all: If true, process as many jobs as possible, otherwise just one. * - * Loop all jobs from the joblist until a pending job is found, or just one if - * clean_all is not set. Pending jobs are detected from the job's post fence, - * so this is only done for jobs that have job tracking resources. Free all - * per-job memory for completed jobs; in case of preallocated resources, this - * opens up slots for new jobs to be submitted. + * Loop all jobs from the joblist until a pending job is found. Pending jobs + * are detected from the job's post fence, so this is only done for jobs that + * have job tracking resources. Free all per-job memory for completed jobs; in + * case of preallocated resources, this opens up slots for new jobs to be + * submitted. */ -void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, - bool clean_all) +void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c) { struct vm_gk20a *vm; struct nvgpu_channel_job *job; @@ -669,13 +677,9 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, vm = c->vm; g = c->g; - /* - * If !clean_all, we're in a condition where watchdog isn't supported - * anyway (this would be a no-op). - */ - if (clean_all) { - watchdog_on = nvgpu_channel_wdt_stop(c->wdt); - } + nvgpu_assert(!nvgpu_channel_is_deterministic(c)); + + watchdog_on = nvgpu_channel_wdt_stop(c->wdt); /* Synchronize with abort cleanup that needs the jobs. */ nvgpu_mutex_acquire(&c->joblist.cleanup_lock); @@ -704,7 +708,7 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, * this - in that case, this is a no-op and the new * later timeout is still used. */ - if (clean_all && watchdog_on) { + if (watchdog_on) { nvgpu_channel_wdt_continue(c->wdt); } break; @@ -738,8 +742,7 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, nvgpu_fence_put(&job->post_fence); /* - * Free the private command buffers (wait_cmd first and - * then incr_cmd i.e. order of allocation) + * Free the private command buffers (in order of allocation) */ if (job->wait_cmd != NULL) { nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->wait_cmd); @@ -754,18 +757,8 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, job_finished = true; - /* - * Deterministic channels have a channel-wide power reference; - * for others, there's one per submit. - */ - if (!nvgpu_channel_is_deterministic(c)) { - gk20a_idle(g); - } - - if (!clean_all) { - /* Timeout isn't supported here so don't touch it. */ - break; - } + /* taken in nvgpu_submit_nondeterministic() */ + gk20a_idle(g); } nvgpu_mutex_release(&c->joblist.cleanup_lock); @@ -776,6 +769,61 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, } } +/** + * Clean up one job if any to provide space for a new submit. + * + * Deterministic channels do very little in the submit path, so the cleanup + * code does not do much either. This assumes the preconditions that + * deterministic channels are missing features such as timeouts and mapped + * buffers. + */ +void nvgpu_channel_clean_up_deterministic_job(struct nvgpu_channel *c) +{ + struct nvgpu_channel_job *job; + + nvgpu_assert(nvgpu_channel_is_deterministic(c)); + + /* Synchronize with abort cleanup that needs the jobs. */ + nvgpu_mutex_acquire(&c->joblist.cleanup_lock); + + nvgpu_channel_joblist_lock(c); + if (nvgpu_channel_joblist_is_empty(c)) { + nvgpu_channel_joblist_unlock(c); + goto out_unlock; + } + job = channel_joblist_peek(c); + nvgpu_channel_joblist_unlock(c); + + nvgpu_assert(job->num_mapped_buffers == 0U); + + if (!nvgpu_fence_is_expired(&job->post_fence)) { + goto out_unlock; + } + + /* + * This fence is syncpoint-based, so cleanup doesn't do anything. Put + * the ref back for consistency though. + */ + nvgpu_fence_put(&job->post_fence); + + /* + * Free the private command buffers (in order of allocation) + */ + if (job->wait_cmd != NULL) { + nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->wait_cmd); + } + nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->incr_cmd); + + nvgpu_channel_free_job(c, job); + + nvgpu_channel_joblist_lock(c); + channel_joblist_delete(c, job); + nvgpu_channel_joblist_unlock(c); + +out_unlock: + nvgpu_mutex_release(&c->joblist.cleanup_lock); +} + /** * Schedule a job cleanup work on this channel to free resources and to signal * about completion. diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 9f17f5f8c..a1d08da8d 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -584,7 +584,7 @@ static int nvgpu_submit_deterministic(struct nvgpu_channel *c, * though - the hw GP_GET pointer can be much further away than * our metadata pointers; gpfifo space is "freed" by the HW. */ - nvgpu_channel_clean_up_jobs(c, true); + nvgpu_channel_clean_up_deterministic_job(c); } /* Grab access to HW to deal with do_idle */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 631ba2110..27fee066e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -563,8 +563,8 @@ u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch); int nvgpu_channel_add_job(struct nvgpu_channel *c, struct nvgpu_channel_job *job, bool skip_buffer_refcounting); -void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, - bool clean_all); +void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c); +void nvgpu_channel_clean_up_deterministic_job(struct nvgpu_channel *c); int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c, struct nvgpu_gpfifo_userdata userdata, u32 num_entries, diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index 05e1c7ed6..1c10ce73d 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c @@ -1376,6 +1376,10 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) goto err_setup_bind; } + /* + * Note that this cannot be deterministic because of the job completion + * callbacks that aren't delivered for deterministic channels. + */ setup_bind_args.num_gpfifo_entries = 1024; setup_bind_args.num_inflight_jobs = 0; setup_bind_args.flags = 0; diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h index b9d27abc9..1603f8403 100644 --- a/drivers/gpu/nvgpu/os/linux/channel.h +++ b/drivers/gpu/nvgpu/os/linux/channel.h @@ -98,6 +98,7 @@ u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags); int nvgpu_channel_init_support_linux(struct nvgpu_os_linux *l); void nvgpu_channel_remove_support_linux(struct nvgpu_os_linux *l); +/* Deprecated. Use fences in new code. */ struct nvgpu_channel *gk20a_open_new_channel_with_cb(struct gk20a *g, void (*update_fn)(struct nvgpu_channel *, void *), void *update_fn_data,