diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 7954a26cd..9d86adee6 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -38,6 +38,12 @@ #include #include +/* + * We might need two extra gpfifo entries per submit - one for pre fence and + * one for post fence. + */ +#define EXTRA_GPFIFO_ENTRIES 2U + /* * Handle the submit synchronization - pre-fences and post-fences. */ @@ -423,6 +429,256 @@ static int nvgpu_submit_prepare_gpfifo_notrack(struct nvgpu_channel *c, return 0; } +static int check_gpfifo_capacity(struct nvgpu_channel *c, u32 required) +{ + /* + * Make sure we have enough space for gpfifo entries. Check cached + * values first and then read from HW. If no space, return -EAGAIN + * and let userpace decide to re-try request or not. + */ + if (nvgpu_channel_get_gpfifo_free_count(c) < required) { + if (nvgpu_channel_update_gpfifo_get_and_get_free_count(c) < + required) { + return -EAGAIN; + } + } + + return 0; +} + +static int nvgpu_do_submit(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *gpfifo, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct nvgpu_fence_type **fence_out, + struct nvgpu_profile *profile, + bool need_job_tracking, + bool need_deferred_cleanup) +{ + struct gk20a *g = c->g; + int err; + +#ifdef CONFIG_NVGPU_TRACE + trace_gk20a_channel_submit_gpfifo(g->name, + c->chid, + num_entries, + flags, + fence ? fence->id : 0, + fence ? fence->value : 0); +#endif + + nvgpu_log_info(g, "pre-submit put %d, get %d, size %d", + c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); + + err = check_gpfifo_capacity(c, num_entries + EXTRA_GPFIFO_ENTRIES); + if (err != 0) { + return err; + } + + if (need_job_tracking) { + err = nvgpu_submit_prepare_gpfifo_track(c, gpfifo, + userdata, num_entries, flags, fence, + fence_out, profile, need_deferred_cleanup); + } else { + err = nvgpu_submit_prepare_gpfifo_notrack(c, gpfifo, + userdata, num_entries, fence_out, profile); + } + + if (err != 0) { + return err; + } + + nvgpu_profile_snapshot(profile, PROFILE_APPEND); + + g->ops.userd.gp_put(g, c); + + return 0; +} + +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS +static int nvgpu_submit_deterministic(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *gpfifo, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct nvgpu_fence_type **fence_out, + struct nvgpu_profile *profile) +{ + bool skip_buffer_refcounting = (flags & + NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U; + bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U; + bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U; + bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U; + struct gk20a *g = c->g; + bool need_job_tracking; + int err = 0; + + nvgpu_assert(nvgpu_channel_is_deterministic(c)); + + /* sync framework on post fences would not be deterministic */ + if (flag_fence_get && flag_sync_fence) { + return -EINVAL; + } + + /* this would be O(n) */ + if (!skip_buffer_refcounting) { + return -EINVAL; + } + +#ifdef CONFIG_NVGPU_CHANNEL_WDT + /* the watchdog needs periodic job cleanup */ + if (c->wdt.enabled) { + return -EINVAL; + } +#endif + + /* + * Job tracking is necessary on deterministic channels if and only if + * pre- or post-fence functionality is needed. If not, a fast submit + * can be done (ie. only need to write out userspace GPFIFO entries and + * update GP_PUT). + */ + need_job_tracking = flag_fence_wait || flag_fence_get; + + if (need_job_tracking) { + /* nvgpu_semaphore is dynamically allocated, not pooled */ + if (!nvgpu_has_syncpoints(g)) { + return -EINVAL; + } + + /* dynamic job allocation wouldn't be deterministic */ + if (!nvgpu_channel_is_prealloc_enabled(c)) { + return -EINVAL; + } + + /* dynamic sync allocation wouldn't be deterministic */ + if (g->aggressive_sync_destroy_thresh != 0U) { + return -EINVAL; + } + + /* + * (Try to) clean up a single job, if available. Each job + * requires the same amount of metadata, so this is enough for + * the job list, fence pool, and private command buffers that + * this submit will need. + * + * This submit might still need more gpfifo space than what the + * previous has used. The job metadata doesn't look at it + * though - the hw GP_GET pointer can be much further away than + * our metadata pointers; gpfifo space is "freed" by the HW. + */ + nvgpu_channel_clean_up_jobs(c, true); + } + + /* Grab access to HW to deal with do_idle */ + nvgpu_rwsem_down_read(&g->deterministic_busy); + + if (c->deterministic_railgate_allowed) { + /* + * Nope - this channel has dropped its own power ref. As + * deterministic submits don't hold power on per each submitted + * job like normal ones do, the GPU might railgate any time now + * and thus submit is disallowed. + */ + err = -EINVAL; + goto clean_up; + } + + err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence, + fence_out, profile, need_job_tracking, false); + if (err != 0) { + goto clean_up; + } + + /* No hw access beyond this point */ + nvgpu_rwsem_up_read(&g->deterministic_busy); + + return 0; + +clean_up: + nvgpu_log_fn(g, "fail %d", err); + nvgpu_rwsem_up_read(&g->deterministic_busy); + + return err; +} +#endif + +static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *gpfifo, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct nvgpu_fence_type **fence_out, + struct nvgpu_profile *profile) +{ + bool skip_buffer_refcounting = (flags & + NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U; + bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U; + bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U; + struct gk20a *g = c->g; + bool need_job_tracking; + int err = 0; + + nvgpu_assert(!nvgpu_channel_is_deterministic(c)); + + /* + * Job tracking is necessary for any of the following conditions on + * non-deterministic channels: + * - pre- or post-fence functionality + * - GPU rail-gating + * - VPR resize enabled + * - buffer refcounting + * - channel watchdog + * + * If none of the conditions are met, then job tracking is not + * required and a fast submit can be done (ie. only need to write + * out userspace GPFIFO entries and update GP_PUT). + */ + need_job_tracking = (flag_fence_wait || + flag_fence_get || + nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) || + nvgpu_is_vpr_resize_enabled() || + !skip_buffer_refcounting); + +#ifdef CONFIG_NVGPU_CHANNEL_WDT + need_job_tracking = need_job_tracking || c->wdt.enabled; +#endif + + if (need_job_tracking) { + /* + * Get a power ref because this isn't a deterministic + * channel that holds them during the channel lifetime. + * This one is released by nvgpu_channel_clean_up_jobs, + * via syncpt or sema interrupt, whichever is used. + */ + err = gk20a_busy(g); + if (err != 0) { + nvgpu_err(g, + "failed to host gk20a to submit gpfifo"); + nvgpu_print_current(g, NULL, NVGPU_ERROR); + return err; + } + } + + err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence, + fence_out, profile, need_job_tracking, true); + if (err != 0) { + goto clean_up; + } + + return 0; + +clean_up: + nvgpu_log_fn(g, "fail %d", err); + gk20a_idle(g); + + return err; +} + static int check_submit_allowed(struct nvgpu_channel *c) { struct gk20a *g = c->g; @@ -464,36 +720,24 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, struct nvgpu_profile *profile) { struct gk20a *g = c->g; - /* we might need two extra gpfifo entries - one for pre fence - * and one for post fence. */ - const u32 extra_entries = 2U; - bool skip_buffer_refcounting = (flags & - NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U; - int err = 0; - bool need_job_tracking; - bool need_deferred_cleanup = false; - bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U; - bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U; - bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U; + int err; err = check_submit_allowed(c); if (err != 0) { return err; } - /* fifo not large enough for request. Return error immediately. + /* + * Fifo not large enough for request. Return error immediately. * Kernel can insert gpfifo entries before and after user gpfifos. - * So, add extra_entries in user request. Also, HW with fifo size N - * can accept only N-1 entreis and so the below condition */ - if (c->gpfifo.entry_num - 1U < num_entries + extra_entries) { + * So, add extra entries in user request. Also, HW with fifo size N + * can accept only N-1 entries. + */ + if (c->gpfifo.entry_num - 1U < num_entries + EXTRA_GPFIFO_ENTRIES) { nvgpu_err(g, "not enough gpfifo space allocated"); return -ENOMEM; } - if ((flag_fence_wait || flag_fence_get) && (fence == NULL)) { - return -EINVAL; - } - nvgpu_profile_snapshot(profile, PROFILE_ENTRY); /* update debug settings */ @@ -501,168 +745,21 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, nvgpu_log_info(g, "channel %d", c->chid); - /* - * Job tracking is necessary for any of the following conditions: - * - pre- or post-fence functionality - * - channel wdt - * - GPU rail-gating with non-deterministic channels - * - VPR resize enabled with non-deterministic channels - * - buffer refcounting - * - * If none of the conditions are met, then job tracking is not - * required and a fast submit can be done (ie. only need to write - * out userspace GPFIFO entries and update GP_PUT). - */ - need_job_tracking = (flag_fence_wait || - flag_fence_get || - ((nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) || - nvgpu_is_vpr_resize_enabled()) && - !nvgpu_channel_is_deterministic(c)) || - !skip_buffer_refcounting); - -#ifdef CONFIG_NVGPU_CHANNEL_WDT - need_job_tracking = need_job_tracking || c->wdt.enabled; -#endif - - if (need_job_tracking) { - /* - * If the channel is to have deterministic latency and - * job tracking is required, the channel must have - * pre-allocated resources. Otherwise, we fail the submit here - */ - if (nvgpu_channel_is_deterministic(c) && - !nvgpu_channel_is_prealloc_enabled(c)) { - return -EINVAL; - } - - /* - * Deferred clean-up is necessary for any of the following - * conditions that could make clean-up behaviour - * non-deterministic and as such not suitable for the submit - * path: - * - channel's deterministic flag is not set (job tracking is - * dynamically allocated) - * - no syncpt support (struct nvgpu_semaphore is dynamically - * allocated, not pooled) - * - dependency on sync framework for post fences - * - buffer refcounting, which is O(n) - * - aggressive sync destroy is enabled (sync objects are not - * held during channel lifetime but refcounted for submits) - * - channel wdt, which needs periodic async cleanup - * - * If none of the conditions are met, then deferred clean-up - * is not required, and we clean-up one job-tracking - * resource in the submit path. - */ - need_deferred_cleanup = !nvgpu_channel_is_deterministic(c) || - !nvgpu_has_syncpoints(g) || - (flag_sync_fence && flag_fence_get) || - !skip_buffer_refcounting || - g->aggressive_sync_destroy_thresh != 0U; - -#ifdef CONFIG_NVGPU_CHANNEL_WDT - need_deferred_cleanup = need_deferred_cleanup || c->wdt.enabled; -#endif - - /* - * For deterministic channels, we don't allow deferred clean_up - * processing to occur. In cases we hit this, we fail the submit - */ - if (nvgpu_channel_is_deterministic(c) && need_deferred_cleanup) { - return -EINVAL; - } - - if (!nvgpu_channel_is_deterministic(c)) { - /* - * Get a power ref unless this is a deterministic - * channel that holds them during the channel lifetime. - * This one is released by nvgpu_channel_clean_up_jobs, - * via syncpt or sema interrupt, whichever is used. - */ - err = gk20a_busy(g); - if (err != 0) { - nvgpu_err(g, - "failed to host gk20a to submit gpfifo"); - nvgpu_print_current(g, NULL, NVGPU_ERROR); - return err; - } - } - - if (!need_deferred_cleanup) { - /* clean up a single job */ - nvgpu_channel_clean_up_jobs(c, false); - } - } - - #ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS - /* Grab access to HW to deal with do_idle */ if (c->deterministic) { - nvgpu_rwsem_down_read(&g->deterministic_busy); - } - - if (c->deterministic && c->deterministic_railgate_allowed) { - /* - * Nope - this channel has dropped its own power ref. As - * deterministic submits don't hold power on per each submitted - * job like normal ones do, the GPU might railgate any time now - * and thus submit is disallowed. - */ - err = -EINVAL; - goto clean_up; - } + err = nvgpu_submit_deterministic(c, gpfifo, userdata, + num_entries, flags, fence, fence_out, profile); + } else #endif - -#ifdef CONFIG_NVGPU_TRACE - trace_gk20a_channel_submit_gpfifo(g->name, - c->chid, - num_entries, - flags, - fence ? fence->id : 0, - fence ? fence->value : 0); -#endif - - nvgpu_log_info(g, "pre-submit put %d, get %d, size %d", - c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); - - /* - * Make sure we have enough space for gpfifo entries. Check cached - * values first and then read from HW. If no space, return EAGAIN - * and let userpace decide to re-try request or not. - */ - if (nvgpu_channel_get_gpfifo_free_count(c) < - num_entries + extra_entries) { - if (nvgpu_channel_update_gpfifo_get_and_get_free_count(c) < - num_entries + extra_entries) { - err = -EAGAIN; - goto clean_up; - } + { + err = nvgpu_submit_nondeterministic(c, gpfifo, userdata, + num_entries, flags, fence, fence_out, profile); } - if (need_job_tracking) { - err = nvgpu_submit_prepare_gpfifo_track(c, gpfifo, - userdata, num_entries, flags, fence, - fence_out, profile, need_deferred_cleanup); - } else { - err = nvgpu_submit_prepare_gpfifo_notrack(c, gpfifo, - userdata, num_entries, fence_out, profile); + if (err != 0) { + return err; } - if (err) { - goto clean_up; - } - - nvgpu_profile_snapshot(profile, PROFILE_APPEND); - - g->ops.userd.gp_put(g, c); - -#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS - /* No hw access beyond this point */ - if (c->deterministic) { - nvgpu_rwsem_up_read(&g->deterministic_busy); - } -#endif - #ifdef CONFIG_NVGPU_TRACE if (fence_out != NULL && *fence_out != NULL) { trace_gk20a_channel_submitted_gpfifo(g->name, @@ -683,20 +780,6 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, nvgpu_log_fn(g, "done"); return err; - -clean_up: - nvgpu_log_fn(g, "fail"); -#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS - if (c->deterministic) { - nvgpu_rwsem_up_read(&g->deterministic_busy); - return err; - } -#endif - if (need_deferred_cleanup) { - gk20a_idle(g); - } - - return err; } int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,