diff --git a/drivers/gpu/nvgpu/common/fifo/runlist.c b/drivers/gpu/nvgpu/common/fifo/runlist.c index 59f2b4b6b..a5a95526b 100644 --- a/drivers/gpu/nvgpu/common/fifo/runlist.c +++ b/drivers/gpu/nvgpu/common/fifo/runlist.c @@ -432,7 +432,7 @@ static int nvgpu_runlist_reconstruct_locked(struct gk20a *g, return 0; } -static void nvgpu_runlist_swap_mem(struct nvgpu_runlist_domain *domain) +void nvgpu_runlist_swap_mem(struct gk20a *g, struct nvgpu_runlist_domain *domain) { struct nvgpu_runlist_mem *mem_tmp; @@ -440,43 +440,36 @@ static void nvgpu_runlist_swap_mem(struct nvgpu_runlist_domain *domain) * mem becomes the previously scheduled buffer and it can be modified once * the runlist lock is released. */ + rl_dbg(g, "Swapping mem for rl domain[%s]", domain->name); mem_tmp = domain->mem; domain->mem = domain->mem_hw; domain->mem_hw = mem_tmp; } -static int nvgpu_runlist_submit_locked(struct gk20a *g, struct nvgpu_runlist *rl, - struct nvgpu_runlist_domain *domain, bool wait_for_finish) +static int nvgpu_runlist_domain_actual_submit(struct gk20a *g, struct nvgpu_runlist *rl, + bool swap_buffer, bool wait_for_finish) { int ret = 0; - /* - * hw_submit updates mem_hw to hardware; swap the buffers now. - */ - nvgpu_runlist_swap_mem(domain); + rl_dbg(g, "Runlist[%u]: submitting domain %s", + rl->id, rl->domain->name); + + if (swap_buffer) { + nvgpu_runlist_swap_mem(g, rl->domain); + } + + nvgpu_atomic_set(&rl->domain->pending_update, 0); - /* - * A non-active domain may be updated, but submit still the currently - * active one just for simplicity. - * - * TODO: Later on, updates and submits will need to be totally - * decoupled so that submits are done only in the domain scheduler. - */ g->ops.runlist.hw_submit(g, rl); if (wait_for_finish) { ret = g->ops.runlist.wait_pending(g, rl); - if (ret == -ETIMEDOUT) { nvgpu_err(g, "runlist %d update timeout", rl->id); /* trigger runlist update timeout recovery */ return ret; - } else { - if (ret == -EINTR) { - nvgpu_err(g, "runlist update interrupted"); - } } } @@ -515,6 +508,8 @@ static int nvgpu_runlist_update_mem_locked(struct gk20a *g, struct nvgpu_runlist return ret; } + nvgpu_atomic_set(&domain->pending_update, 1); + return ret; } @@ -524,7 +519,7 @@ int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl, bool wait_for_finish) { int ret = 0; - + (void)wait_for_finish; /* * Certain use-cases might not have existing user rl domains, fall * back to shadow domain. @@ -539,16 +534,6 @@ int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl, if (ret != 0) { return ret; } - - /* - * A submit assumes domain->mem_hw to be the active buffer, - * and the reconstruction above updates domain->mem, and the swap happens - * in nvgpu_runlist_submit_locked which is done below for only the user - * domain so calling swap_mem here is "equivalent" to nvgpu_runlist_submit_locked - * to keep the ordering for any shadow rl domain submits that may happen in the - * future without going via this nvgpu_runlist_update_locked path. - */ - nvgpu_runlist_swap_mem(rl->shadow_rl_domain); } ret = nvgpu_runlist_update_mem_locked(g, rl, domain, ch, add, true); @@ -556,8 +541,6 @@ int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl, return ret; } - ret = nvgpu_runlist_submit_locked(g, rl, domain, wait_for_finish); - return ret; } @@ -578,18 +561,29 @@ int nvgpu_runlist_reschedule(struct nvgpu_channel *ch, bool preempt_next, if (nvgpu_mutex_tryacquire(&runlist->runlist_lock) == 0) { return -EBUSY; } + #ifdef CONFIG_NVGPU_LS_PMU mutex_ret = nvgpu_pmu_lock_acquire( g, g->pmu, PMU_MUTEX_ID_FIFO, &token); #endif - /* - * Note that the runlist memory is not rewritten; the currently active - * buffer is just resubmitted so that scheduling begins from the first - * entry in it. - */ - g->ops.runlist.hw_submit(g, runlist); + nvgpu_atomic_set(&runlist->domain->pending_update, 1); +#ifdef CONFIG_NVS_PRESENT + ret = g->nvs_worker_submit(g, runlist, runlist->domain, false, wait_preempt); +#else + ret = nvgpu_rl_domain_sync_submit(g, runlist, runlist->domain, false, wait_preempt); +#endif + if (ret != 0) { + if (ret == 1) { + ret = 0; + } + goto done; + } + + /* Acquiring runlist lock above guarantees that the current + * domain won't be switched. + */ if (preempt_next) { if (g->ops.runlist.reschedule_preempt_next_locked(ch, wait_preempt) != 0) { @@ -601,6 +595,7 @@ int nvgpu_runlist_reschedule(struct nvgpu_channel *ch, bool preempt_next, nvgpu_err(g, "wait pending failed for runlist %u", runlist->id); } +done: #ifdef CONFIG_NVGPU_LS_PMU if (mutex_ret == 0) { if (nvgpu_pmu_lock_release(g, g->pmu, @@ -624,11 +619,12 @@ static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl, struct nvgpu_channel *ch, bool add, bool wait_for_finish) { + int ret = 0; + #ifdef CONFIG_NVGPU_LS_PMU u32 token = PMU_INVALID_MUTEX_OWNER_ID; int mutex_ret = 0; #endif - int ret = 0; nvgpu_log_fn(g, " "); @@ -638,6 +634,17 @@ static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl, PMU_MUTEX_ID_FIFO, &token); #endif ret = nvgpu_runlist_update_locked(g, rl, domain, ch, add, wait_for_finish); + if (ret == 0) { + #ifdef CONFIG_NVS_PRESENT + ret = g->nvs_worker_submit(g, rl, domain, true, wait_for_finish); + #else + ret = nvgpu_rl_domain_sync_submit(g, rl, domain, true, wait_for_finish); + #endif + /* Deferred Update */ + if (ret == 1) { + ret = 0; + } + } #ifdef CONFIG_NVGPU_LS_PMU if (mutex_ret == 0) { if (nvgpu_pmu_lock_release(g, g->pmu, @@ -655,8 +662,27 @@ static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl, return ret; } -static void runlist_select_locked(struct gk20a *g, struct nvgpu_runlist *runlist, - struct nvgpu_runlist_domain *next_domain) +/* + * This is expected to be called only when device is powered on. + */ +static int runlist_submit_powered(struct gk20a *g, struct nvgpu_runlist *runlist, + struct nvgpu_runlist_domain *next_domain, bool swap_buffer, + bool wait_for_finish) +{ + int err; + + runlist->domain = next_domain; + + rl_dbg(g, "Runlist[%u]: switching to domain %s", + runlist->id, next_domain->name); + + err = nvgpu_runlist_domain_actual_submit(g, runlist, swap_buffer, wait_for_finish); + + return err; +} + +static int runlist_select_and_submit(struct gk20a *g, struct nvgpu_runlist *runlist, + struct nvgpu_runlist_domain *next_domain, bool wait_for_finish) { int err; @@ -670,7 +696,7 @@ static void runlist_select_locked(struct gk20a *g, struct nvgpu_runlist *runlist rl_dbg(g, "Runlist[%u]: power is off, skip submit", runlist->id); gk20a_idle_nosuspend(g); - return; + return 0; } err = gk20a_busy(g); @@ -683,67 +709,71 @@ static void runlist_select_locked(struct gk20a *g, struct nvgpu_runlist *runlist * the error. Power is already on when the domain scheduler is * actually in use. */ - return; + return err; } - /* - * Just submit the previously built mem (in nvgpu_runlist_update_locked) - * of the active domain to hardware. In the future, the main scheduling - * loop will get signaled when the RL mem is modified and the same domain - * with new data needs to be submitted (typically triggered by a channel - * getting opened or closed). For now, that code path executes separately. - */ - g->ops.runlist.hw_submit(g, runlist); + err = runlist_submit_powered(g, runlist, next_domain, false, wait_for_finish); gk20a_idle(g); + + return err; } -static void runlist_switch_domain_locked(struct gk20a *g, - struct nvgpu_runlist *runlist) +int nvgpu_rl_domain_sync_submit(struct gk20a *g, struct nvgpu_runlist *runlist, + struct nvgpu_runlist_domain *next_domain, bool swap_buffers, + bool wait_for_finish) { - struct nvgpu_runlist_domain *domain; - struct nvgpu_runlist_domain *last; + int err = 0; - /* - * When the last of user created rl domains is removed, - * driver switches to the default domain. Hence, exit. + if (next_domain == NULL) { + next_domain = runlist->shadow_rl_domain; + } + + if (nvgpu_atomic_read(&next_domain->pending_update) == 1) { + err = runlist_submit_powered(g, runlist, next_domain, swap_buffers, + wait_for_finish); + } + + return err; +} + +static int runlist_switch_domain_and_submit(struct gk20a *g, + struct nvgpu_runlist *runlist, struct nvgpu_runlist_domain *rl_domain) +{ + int ret = 0; + struct nvgpu_runlist_domain *prev_rl_domain = runlist->domain; + + /* If no user domains exist, submit the shadow_rl_domain if + * pending is set to true. When the last user domain is removed, + * shadow_rl_domain will have pending_update set to true. + * Eventually, this logic will change. For manual mode, this needs + * to be submitted irrespective of the status of pending_update. */ if (nvgpu_list_empty(&runlist->user_rl_domains)) { - return; - } - - /* - * If there are user created rl domains available, - * runlist->domain always points to one of them. - */ - domain = runlist->domain; - last = nvgpu_list_last_entry(&runlist->user_rl_domains, - nvgpu_runlist_domain, domains_list); - - if (domain == last) { - domain = nvgpu_list_first_entry(&runlist->user_rl_domains, - nvgpu_runlist_domain, domains_list); + if (nvgpu_atomic_read(&rl_domain->pending_update) == 0) { + return 0; + } } else { - domain = nvgpu_list_next_entry(domain, - nvgpu_runlist_domain, domains_list); + /* If only one user domain exists, return if no pending + * update exists. + */ + if (prev_rl_domain == rl_domain) { + if (nvgpu_atomic_read(&prev_rl_domain->pending_update) == 0) { + return 0; + } + } } - if (domain != runlist->domain) { - runlist_select_locked(g, runlist, domain); - } + ret = runlist_select_and_submit(g, runlist, rl_domain, false); + + return ret; } -static void runlist_switch_domain(struct gk20a *g, struct nvgpu_runlist *runlist) -{ - nvgpu_mutex_acquire(&runlist->runlist_lock); - runlist_switch_domain_locked(g, runlist); - nvgpu_mutex_release(&runlist->runlist_lock); -} - -void nvgpu_runlist_tick(struct gk20a *g) +void nvgpu_runlist_tick(struct gk20a *g, struct nvgpu_runlist_domain **rl_domain) { struct nvgpu_fifo *f = &g->fifo; u32 i; + int err = 0; rl_dbg(g, "domain tick"); @@ -751,7 +781,10 @@ void nvgpu_runlist_tick(struct gk20a *g) struct nvgpu_runlist *runlist; runlist = &f->active_runlists[i]; - runlist_switch_domain(g, runlist); + err = runlist_switch_domain_and_submit(g, runlist, rl_domain[i]); + if (err != 0) { + nvgpu_err(g, "Failed to schedule domain [%s]", rl_domain[i]->name); + } } } @@ -864,13 +897,13 @@ static void free_rl_mem(struct gk20a *g, struct nvgpu_runlist_mem *mem) nvgpu_kfree(g, mem); } -static void nvgpu_runlist_domain_unlink(struct nvgpu_runlist_domain *domain) +static void nvgpu_runlist_domain_unlink_locked(struct nvgpu_runlist_domain *domain) { /* added in nvgpu_runlist_domain_alloc() */ nvgpu_list_del(&domain->domains_list); } -static void nvgpu_runlist_domain_free(struct gk20a *g, +void nvgpu_runlist_domain_free(struct gk20a *g, struct nvgpu_runlist_domain *domain) { free_rl_mem(g, domain->mem); @@ -885,51 +918,12 @@ static void nvgpu_runlist_domain_free(struct gk20a *g, nvgpu_kfree(g, domain); } -static void nvgpu_runlist_domain_unlink_and_free(struct gk20a *g, +void nvgpu_runlist_unlink_domain(struct nvgpu_runlist *runlist, struct nvgpu_runlist_domain *domain) { - nvgpu_runlist_domain_unlink(domain); - nvgpu_runlist_domain_free(g, domain); -} - -int nvgpu_rl_domain_delete(struct gk20a *g, const char *name) -{ - struct nvgpu_fifo *f = &g->fifo; - u32 i; - - for (i = 0; i < f->num_runlists; i++) { - struct nvgpu_runlist *runlist; - struct nvgpu_runlist_domain *domain; - - runlist = &f->active_runlists[i]; - - nvgpu_mutex_acquire(&runlist->runlist_lock); - domain = nvgpu_rl_domain_get(g, runlist->id, name); - if (domain != NULL) { - struct nvgpu_runlist_domain *first; - struct nvgpu_runlist_domain *last; - - rl_dbg(g, "deleting rl domain [%s]", domain->name); - - first = nvgpu_list_first_entry(&runlist->user_rl_domains, - nvgpu_runlist_domain, domains_list); - - last = nvgpu_list_last_entry(&runlist->user_rl_domains, - nvgpu_runlist_domain, domains_list); - - if (first == last) { - /* Last of the user created rl domains, switch to default rl domain */ - runlist_select_locked(g, runlist, runlist->shadow_rl_domain); - } else if (domain == runlist->domain) { - /* Don't let the HW access this anymore, switch to another rl domain */ - runlist_switch_domain_locked(g, runlist); - } - nvgpu_runlist_domain_unlink_and_free(g, domain); - } - nvgpu_mutex_release(&runlist->runlist_lock); - } - - return 0; + nvgpu_mutex_acquire(&runlist->runlist_lock); + nvgpu_runlist_domain_unlink_locked(domain); + nvgpu_mutex_release(&runlist->runlist_lock); } void nvgpu_runlist_cleanup_sw(struct gk20a *g) @@ -954,7 +948,7 @@ void nvgpu_runlist_cleanup_sw(struct gk20a *g) nvgpu_runlist_domain, domains_list); - nvgpu_runlist_domain_unlink_and_free(g, domain); + nvgpu_runlist_domain_unlink_locked(domain); } /* this isn't an owning pointer, just reset */ runlist->domain = NULL; @@ -1110,19 +1104,16 @@ static struct nvgpu_runlist_mem *init_rl_mem(struct gk20a *g, u32 runlist_size) return mem; } -static void nvgpu_runlist_link_domain(struct nvgpu_runlist *runlist, +void nvgpu_runlist_link_domain(struct nvgpu_runlist *runlist, struct nvgpu_runlist_domain *domain) { - /* deleted in nvgpu_runlist_domain_unlink() */ + nvgpu_mutex_acquire(&runlist->runlist_lock); + /* deleted in nvgpu_runlist_domain_unlink_locked() */ nvgpu_list_add_tail(&domain->domains_list, &runlist->user_rl_domains); - - /* Select the first created domain as the boot-time default */ - if (runlist->domain == runlist->shadow_rl_domain) { - runlist->domain = domain; - } + nvgpu_mutex_release(&runlist->runlist_lock); } -static struct nvgpu_runlist_domain *nvgpu_runlist_domain_alloc(struct gk20a *g, +struct nvgpu_runlist_domain *nvgpu_runlist_domain_alloc(struct gk20a *g, const char *name) { struct nvgpu_runlist_domain *domain = nvgpu_kzalloc(g, sizeof(*domain)); @@ -1160,6 +1151,8 @@ static struct nvgpu_runlist_domain *nvgpu_runlist_domain_alloc(struct gk20a *g, goto free_active_channels; } + nvgpu_atomic_set(&domain->pending_update, 0); + return domain; free_active_channels: nvgpu_kfree(g, domain->active_channels); @@ -1189,43 +1182,6 @@ struct nvgpu_runlist_domain *nvgpu_rl_domain_get(struct gk20a *g, u32 runlist_id return NULL; } -int nvgpu_rl_domain_alloc(struct gk20a *g, const char *name) -{ - struct nvgpu_fifo *f = &g->fifo; - int err; - u32 i; - - for (i = 0U; i < f->num_runlists; i++) { - struct nvgpu_runlist *runlist; - struct nvgpu_runlist_domain *domain; - - runlist = &f->active_runlists[i]; - - nvgpu_mutex_acquire(&runlist->runlist_lock); - /* this may only happen on the very first runlist */ - if (nvgpu_rl_domain_get(g, runlist->id, name) != NULL) { - nvgpu_mutex_release(&runlist->runlist_lock); - return -EEXIST; - } - - domain = nvgpu_runlist_domain_alloc(g, name); - if (domain == NULL) { - nvgpu_mutex_release(&runlist->runlist_lock); - err = -ENOMEM; - goto clear; - } - - nvgpu_runlist_link_domain(runlist, domain); - nvgpu_mutex_release(&runlist->runlist_lock); - } - - return 0; -clear: - /* deletion skips runlists where the domain isn't found */ - (void)nvgpu_rl_domain_delete(g, name); - return err; -} - static void nvgpu_init_active_runlist_mapping(struct gk20a *g) { struct nvgpu_fifo *f = &g->fifo; diff --git a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c index e11ad8668..131e21312 100644 --- a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c +++ b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c @@ -43,8 +43,18 @@ static struct nvs_sched_ops nvgpu_nvs_ops = { * - currently it just locks all affected runlists * - consider pausing the scheduler logic and signaling users */ + struct nvgpu_nvs_worker_item { + struct gk20a *g; + struct nvgpu_runlist *rl; + struct nvgpu_runlist_domain *rl_domain; + struct nvgpu_cond cond; + bool swap_buffer; + bool wait_for_finish; + bool locked; + int status; struct nvgpu_list_node list; + nvgpu_atomic_t state; }; static inline struct nvgpu_nvs_worker_item * @@ -70,6 +80,9 @@ static void nvgpu_nvs_worker_poll_init(struct nvgpu_worker *worker) nvs_worker->current_timeout = 100; nvgpu_timeout_init_cpu_timer_sw(worker->g, &nvs_worker->timeout, nvs_worker->current_timeout); + + nvgpu_atomic_set(&nvs_worker->nvs_sched_init, 1); + nvgpu_cond_signal(&nvs_worker->worker.wq); } static u32 nvgpu_nvs_worker_wakeup_timeout(struct nvgpu_worker *worker) @@ -80,20 +93,12 @@ static u32 nvgpu_nvs_worker_wakeup_timeout(struct nvgpu_worker *worker) return nvs_worker->current_timeout; } -static void nvgpu_nvs_worker_wakeup_process_item( - struct nvgpu_list_node *work_item) -{ - struct nvgpu_nvs_worker_item *item = - nvgpu_nvs_worker_item_from_worker_item(work_item); - (void)item; - /* placeholder; never called yet */ -} - static u64 nvgpu_nvs_tick(struct gk20a *g) { struct nvgpu_nvs_scheduler *sched = g->scheduler; struct nvgpu_nvs_domain *domain; struct nvs_domain *nvs_next; + struct nvgpu_nvs_domain *nvgpu_domain_next; u64 timeslice; nvs_dbg(g, "nvs tick"); @@ -109,8 +114,9 @@ static u64 nvgpu_nvs_tick(struct gk20a *g) } timeslice = nvs_next->timeslice_ns; + nvgpu_domain_next = nvs_next->priv; - nvgpu_runlist_tick(g); + nvgpu_runlist_tick(g, nvgpu_domain_next->rl_domains); sched->active_domain = nvs_next->priv; nvgpu_mutex_release(&g->sched_mutex); @@ -118,6 +124,113 @@ static u64 nvgpu_nvs_tick(struct gk20a *g) return timeslice; } +static void nvgpu_nvs_worker_wakeup_process_item(struct nvgpu_list_node *work_item) +{ + struct nvgpu_nvs_worker_item *work = + nvgpu_nvs_worker_item_from_worker_item(work_item); + struct gk20a *g = work->g; + int ret = 0; + struct nvgpu_nvs_scheduler *sched = g->scheduler; + struct nvs_domain *nvs_domain; + struct nvgpu_runlist *runlist = work->rl; + struct nvgpu_runlist_domain *rl_domain = work->rl_domain; + + nvgpu_mutex_acquire(&g->sched_mutex); + + if (rl_domain == NULL) { + nvs_domain = sched->shadow_domain->parent; + rl_domain = runlist->shadow_rl_domain; + } else if (strcmp(rl_domain->name, SHADOW_DOMAIN_NAME) == 0) { + nvs_domain = sched->shadow_domain->parent; + } else { + nvs_domain = nvs_domain_by_name(sched->sched, rl_domain->name); + if (nvs_domain == NULL) { + nvgpu_err(g, "Unable to find domain[%s]", rl_domain->name); + ret = -EINVAL; + goto done; + } + } + + if (sched->active_domain == nvs_domain->priv) { + /* Instantly switch domain and force runlist updates */ + ret = nvgpu_rl_domain_sync_submit(g, runlist, rl_domain, work->swap_buffer, work->wait_for_finish); + } else { + /* Swap buffers here even if its deferred for correctness */ + if (work->swap_buffer) { + nvgpu_runlist_swap_mem(g, rl_domain); + } + ret = 1; + } + + nvs_dbg(g, " "); + +done: + nvgpu_mutex_release(&g->sched_mutex); + work->status = ret; + nvgpu_atomic_set(&work->state, 1); + /* Wakeup threads waiting on runlist submit */ + nvgpu_cond_signal(&work->cond); +} + +static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl, + struct nvgpu_runlist_domain *rl_domain, bool swap_buffer, + bool wait_for_finish) +{ + struct nvgpu_nvs_scheduler *sched = g->scheduler; + struct nvgpu_nvs_worker *worker = &sched->worker; + struct nvgpu_nvs_worker_item *work; + int ret = 0; + + if (sched == NULL) { + return -ENODEV; + } + + nvs_dbg(g, " "); + + work = nvgpu_kzalloc(g, sizeof(*work)); + if (work == NULL) { + nvgpu_err(g, "Unable to allocate memory for runlist job"); + ret = -ENOMEM; + goto free_domain; + } + + work->g = g; + work->rl = rl; + work->rl_domain = rl_domain; + nvgpu_cond_init(&work->cond); + nvgpu_init_list_node(&work->list); + work->swap_buffer = swap_buffer; + work->wait_for_finish = wait_for_finish; + nvgpu_atomic_set(&work->state, 0); + + nvs_dbg(g, " enqueueing runlist submit"); + + ret = nvgpu_worker_enqueue(&worker->worker, &work->list); + if (ret != 0) { + goto fail; + } + + nvs_dbg(g, " "); + + ret = NVGPU_COND_WAIT(&work->cond, nvgpu_atomic_read(&work->state) == 1, 0U); + if (ret != 0) { + nvgpu_err(g, "Runlist submit interrupted while waiting for submit"); + goto fail; + } + + nvs_dbg(g, " "); + + ret = work->status; + +fail: + nvgpu_cond_destroy(&work->cond); + nvgpu_kfree(g, work); + +free_domain: + + return ret; +} + static void nvgpu_nvs_worker_wakeup_post_process(struct nvgpu_worker *worker) { struct gk20a *g = worker->g; @@ -146,19 +259,40 @@ static const struct nvgpu_worker_ops nvs_worker_ops = { static int nvgpu_nvs_worker_init(struct gk20a *g) { + int err = 0; struct nvgpu_worker *worker = &g->scheduler->worker.worker; + struct nvgpu_nvs_worker *nvs_worker = &g->scheduler->worker; + + nvgpu_cond_init(&nvs_worker->wq_init); + nvgpu_atomic_set(&nvs_worker->nvs_sched_init, 0); nvgpu_worker_init_name(worker, "nvgpu_nvs", g->name); - return nvgpu_worker_init(g, worker, &nvs_worker_ops); + err = nvgpu_worker_init(g, worker, &nvs_worker_ops); + if (err != 0) { + /* Ensure that scheduler thread is started as soon as possible to handle + * minimal uptime for applications. + */ + err = NVGPU_COND_WAIT(&nvs_worker->worker.wq, + nvgpu_atomic_read(&nvs_worker->nvs_sched_init) == 1, 0); + if (err != 0) { + nvgpu_err(g, "Interrupted while waiting for scheduler thread"); + } + } + + return err; } static void nvgpu_nvs_worker_deinit(struct gk20a *g) { struct nvgpu_worker *worker = &g->scheduler->worker.worker; + struct nvgpu_nvs_worker *nvs_worker = &g->scheduler->worker; nvgpu_worker_deinit(worker); + nvgpu_atomic_set(&nvs_worker->nvs_sched_init, 0); + nvgpu_cond_destroy(&nvs_worker->wq_init); + nvs_dbg(g, "NVS worker suspended"); } @@ -166,8 +300,10 @@ static struct nvgpu_nvs_domain * nvgpu_nvs_gen_domain(struct gk20a *g, const char *name, u64 id, u64 timeslice, u64 preempt_grace) { + struct nvgpu_fifo *f = &g->fifo; struct nvs_domain *nvs_dom = NULL; struct nvgpu_nvs_domain *nvgpu_dom = NULL; + u32 num_runlists = f->num_runlists; nvs_dbg(g, "Adding new domain: %s", name); @@ -177,6 +313,14 @@ static struct nvgpu_nvs_domain * return nvgpu_dom; } + nvgpu_dom->rl_domains = nvgpu_kzalloc(g, sizeof(*nvgpu_dom->rl_domains) * num_runlists); + if (nvgpu_dom->rl_domains == NULL) { + nvs_dbg(g, "failed to allocate memory for domain->rl_domains"); + nvgpu_kfree(g, nvgpu_dom); + nvgpu_dom = NULL; + return nvgpu_dom; + } + nvgpu_dom->id = id; nvgpu_dom->ref = 1U; @@ -185,6 +329,7 @@ static struct nvgpu_nvs_domain * if (nvs_dom == NULL) { nvs_dbg(g, "failed to create nvs domain for %s", name); + nvgpu_kfree(g, nvgpu_dom->rl_domains); nvgpu_kfree(g, nvgpu_dom); nvgpu_dom = NULL; return nvgpu_dom; @@ -195,6 +340,19 @@ static struct nvgpu_nvs_domain * return nvgpu_dom; } +static void nvgpu_nvs_link_shadow_rl_domains(struct gk20a *g, + struct nvgpu_nvs_domain *nvgpu_dom) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 num_runlists = f->num_runlists; + u32 i; + + for (i = 0U; i < num_runlists; i++) { + struct nvgpu_runlist *runlist = &f->active_runlists[i]; + nvgpu_dom->rl_domains[i] = runlist->shadow_rl_domain; + } +} + static int nvgpu_nvs_gen_shadow_domain(struct gk20a *g) { int err = 0; @@ -211,6 +369,8 @@ static int nvgpu_nvs_gen_shadow_domain(struct gk20a *g) goto error; } + nvgpu_nvs_link_shadow_rl_domains(g, nvgpu_dom); + g->scheduler->shadow_domain = nvgpu_dom; /* Set active_domain to shadow_domain during Init */ @@ -243,6 +403,8 @@ static void nvgpu_nvs_remove_shadow_domain(struct gk20a *g) nvs_dom = sched->shadow_domain->parent; nvs_domain_destroy(sched->sched, nvs_dom); + nvgpu_kfree(g, sched->shadow_domain->rl_domains); + sched->shadow_domain->rl_domains = NULL; nvgpu_kfree(g, sched->shadow_domain); sched->shadow_domain = NULL; } @@ -339,6 +501,7 @@ int nvgpu_nvs_open(struct gk20a *g) goto unlock; } + g->nvs_worker_submit = nvgpu_nvs_worker_submit; unlock: if (err) { nvs_dbg(g, " Failed! Error code: %d", err); @@ -362,12 +525,52 @@ static u64 nvgpu_nvs_new_id(struct gk20a *g) return nvgpu_atomic64_inc_return(&g->scheduler->id_counter); } +static int nvgpu_nvs_create_rl_domain_mem(struct gk20a *g, + struct nvgpu_nvs_domain *domain, const char *name) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 i, j; + int err = 0; + + for (i = 0U; i < f->num_runlists; i++) { + domain->rl_domains[i] = nvgpu_runlist_domain_alloc(g, name); + if (domain->rl_domains[i] == NULL) { + err = -ENOMEM; + break; + } + } + + if (err != 0) { + for (j = 0; j != i; j++) { + nvgpu_runlist_domain_free(g, domain->rl_domains[j]); + domain->rl_domains[j] = NULL; + } + } + + return err; +} + +static void nvgpu_nvs_link_rl_domains(struct gk20a *g, + struct nvgpu_nvs_domain *domain) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 i; + + for (i = 0U; i < f->num_runlists; i++) { + struct nvgpu_runlist *runlist; + + runlist = &f->active_runlists[i]; + nvgpu_runlist_link_domain(runlist, domain->rl_domains[i]); + } +} + int nvgpu_nvs_add_domain(struct gk20a *g, const char *name, u64 timeslice, u64 preempt_grace, struct nvgpu_nvs_domain **pdomain) { int err = 0; struct nvs_domain *nvs_dom; struct nvgpu_nvs_domain *nvgpu_dom; + struct nvgpu_nvs_scheduler *sched = g->scheduler; nvgpu_mutex_acquire(&g->sched_mutex); @@ -383,28 +586,26 @@ int nvgpu_nvs_add_domain(struct gk20a *g, const char *name, u64 timeslice, goto unlock; } - nvs_dom = nvgpu_dom->parent; - - nvs_domain_scheduler_attach(g->scheduler->sched, nvs_dom); - - err = nvgpu_rl_domain_alloc(g, name); + err = nvgpu_nvs_create_rl_domain_mem(g, nvgpu_dom, name); if (err != 0) { - nvs_dbg(g, "failed to alloc rl domain for %s", name); - nvs_domain_unlink_and_destroy(g->scheduler->sched, nvs_dom); + nvs_domain_destroy(sched->sched, nvgpu_dom->parent); + nvgpu_kfree(g, nvgpu_dom->rl_domains); nvgpu_kfree(g, nvgpu_dom); goto unlock; } - nvgpu_dom->parent = nvs_dom; + nvgpu_nvs_link_rl_domains(g, nvgpu_dom); - /* Set the first user created domain as active domain */ - if (g->scheduler->active_domain == g->scheduler->shadow_domain) { - g->scheduler->active_domain = nvgpu_dom; - } + nvs_dom = nvgpu_dom->parent; + + nvs_domain_scheduler_attach(g->scheduler->sched, nvs_dom); + + nvgpu_dom->parent = nvs_dom; *pdomain = nvgpu_dom; unlock: nvgpu_mutex_release(&g->sched_mutex); + return err; } @@ -492,6 +693,30 @@ void nvgpu_nvs_domain_put(struct gk20a *g, struct nvgpu_nvs_domain *dom) nvgpu_mutex_release(&g->sched_mutex); } +static void nvgpu_nvs_delete_rl_domain_mem(struct gk20a *g, struct nvgpu_nvs_domain *dom) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 i; + + for (i = 0U; i < f->num_runlists; i++) { + nvgpu_runlist_domain_free(g, dom->rl_domains[i]); + dom->rl_domains[i] = NULL; + } +} + +static void nvgpu_nvs_unlink_rl_domains(struct gk20a *g, struct nvgpu_nvs_domain *domain) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 i; + + for (i = 0; i < f->num_runlists; i++) { + struct nvgpu_runlist *runlist; + runlist = &f->active_runlists[i]; + + nvgpu_runlist_unlink_domain(runlist, domain->rl_domains[i]); + } +} + int nvgpu_nvs_del_domain(struct gk20a *g, u64 dom_id) { struct nvgpu_nvs_scheduler *s = g->scheduler; @@ -519,23 +744,13 @@ int nvgpu_nvs_del_domain(struct gk20a *g, u64 dom_id) nvs_dom = nvgpu_dom->parent; - err = nvgpu_rl_domain_delete(g, nvs_dom->name); - if (err != 0) { - nvs_dbg(g, "failed to delete RL domains on %llu!", dom_id); - /* - * The RL domains require the existence of at least one domain; - * this path inherits that logic until it's been made more - * flexible. - */ - goto unlock; - } - + nvgpu_nvs_unlink_rl_domains(g, nvgpu_dom); + nvgpu_nvs_delete_rl_domain_mem(g, nvgpu_dom); nvgpu_dom->ref = 0U; - /* note: same wraparound logic as in RL domains to keep in sync */ if (s->active_domain == nvgpu_dom) { nvs_next = nvs_domain_get_next_domain(s->sched, nvs_dom); - /* Its the only entry in the list. Set the default domain as the active domain */ + /* Its the only entry in the list. Set the shadow domain as the active domain */ if (nvs_next == nvs_dom) { nvs_next = s->shadow_domain->parent; } @@ -543,6 +758,8 @@ int nvgpu_nvs_del_domain(struct gk20a *g, u64 dom_id) } nvs_domain_unlink_and_destroy(s->sched, nvs_dom); + + nvgpu_kfree(g, nvgpu_dom->rl_domains); nvgpu_kfree(g, nvgpu_dom); unlock: diff --git a/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c index 45728cb52..f36c8bd95 100644 --- a/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c +++ b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c @@ -123,6 +123,16 @@ static void gv11b_fifo_locked_abort_runlist_active_tsgs(struct gk20a *g, nvgpu_err(g, "runlist id %d is not cleaned up", runlist->id); } +#ifdef CONFIG_NVS_PRESENT + /* Special case. Submit the recovery runlist now */ + err = g->nvs_worker_submit(g, runlist, runlist->domain, true, false); +#else + err = nvgpu_rl_domain_sync_submit(g, runlist, runlist->domain, true, false); +#endif + if (err != 0 && err != 1) { + nvgpu_err(g, "runlist id %d is not cleaned up", + runlist->id); + } nvgpu_tsg_abort(g, tsg, false); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index c1313a493..8928ff797 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -74,6 +74,8 @@ struct nvgpu_fifo; struct nvgpu_channel; struct nvgpu_gr; struct nvgpu_fbp; +struct nvgpu_runlist; +struct nvgpu_runlist_domain; #ifdef CONFIG_NVGPU_SIM struct sim_nvgpu; #endif @@ -900,6 +902,20 @@ struct gk20a { #ifdef CONFIG_NVS_PRESENT struct nvgpu_nvs_scheduler *scheduler; struct nvgpu_mutex sched_mutex; + + /** + * A global interface to notify NVS thread about a domain + * modification. Wakes up the worker thread to process domain + * submission synchronously. If the current submitted rl_domain is + * active, update immediately, otherwise return as NVS thread will + * eventually schedule the domain updates. + * + * Must not hold NVS lock while invoking this interface. + * Must hold runlist lock while invoking this interface. + */ + int (*nvs_worker_submit)(struct gk20a *g, struct nvgpu_runlist *rl, + struct nvgpu_runlist_domain *rl_domain, bool swap_buffer, + bool wait_for_finish); #endif #ifdef CONFIG_NVGPU_ENABLE_MISC_EC diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvs.h b/drivers/gpu/nvgpu/include/nvgpu/nvs.h index 1ba508d6e..2c2ac81a3 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvs.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvs.h @@ -39,6 +39,8 @@ struct gk20a; struct nvgpu_nvs_domain_ioctl; +struct nvgpu_runlist; +struct nvgpu_runlist_domain; /* * NvGPU KMD domain implementation details for nvsched. @@ -71,9 +73,16 @@ struct nvgpu_nvs_domain { * Userspace API on the device nodes. */ struct nvgpu_nvs_domain_ioctl *ioctl; + + /* + * One corresponding to every runlist + */ + struct nvgpu_runlist_domain **rl_domains; }; struct nvgpu_nvs_worker { + nvgpu_atomic_t nvs_sched_init; + struct nvgpu_cond wq_init; struct nvgpu_worker worker; struct nvgpu_timeout timeout; u32 current_timeout; diff --git a/drivers/gpu/nvgpu/include/nvgpu/runlist.h b/drivers/gpu/nvgpu/include/nvgpu/runlist.h index 6a2da5d2d..a90192f45 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/runlist.h +++ b/drivers/gpu/nvgpu/include/nvgpu/runlist.h @@ -26,6 +26,7 @@ #include #include #include +#include /** * @file @@ -139,6 +140,14 @@ struct nvgpu_runlist_domain { /** Currently active buffer submitted for hardware. */ struct nvgpu_runlist_mem *mem_hw; + + /** + * When a channel is removed or added, this value is set to true. + * When this rl domain is scheduled to be submitted to the h/w, + * swap mem and mem_hw and submit mem_hw and then its value is + * set to false. + */ + nvgpu_atomic_t pending_update; }; struct nvgpu_runlist { @@ -190,11 +199,34 @@ struct nvgpu_runlist { /** @endcond DOXYGEN_SHOULD_SKIP_THIS */ }; -int nvgpu_rl_domain_alloc(struct gk20a *g, const char *name); -int nvgpu_rl_domain_delete(struct gk20a *g, const char *name); +bool nvgpu_rl_domain_exists(struct gk20a *g, const char *name); +struct nvgpu_runlist_domain *nvgpu_runlist_domain_alloc(struct gk20a *g, + const char *name); +void nvgpu_runlist_domain_free(struct gk20a *g, + struct nvgpu_runlist_domain *domain); +void nvgpu_runlist_swap_mem(struct gk20a *g, struct nvgpu_runlist_domain *domain); +void nvgpu_runlist_link_domain(struct nvgpu_runlist *runlist, + struct nvgpu_runlist_domain *domain); +void nvgpu_runlist_unlink_domain(struct nvgpu_runlist *runlist, + struct nvgpu_runlist_domain *domain); struct nvgpu_runlist_domain *nvgpu_rl_domain_get(struct gk20a *g, u32 runlist_id, const char *name); +/** + * @brief Schedule runlist domain + * + * @param g Global gk20a struct + * @param runlist Runlist context + * @param next_domain -> Actual domain data thats meant to be scheduled + * @param wait_for_finish -> Wait for finish + * @return int 0 in case of success, less than 0 otherwise, + * + * Submit next_domain if there is a pending update. + */ +int nvgpu_rl_domain_sync_submit(struct gk20a *g, struct nvgpu_runlist *runlist, + struct nvgpu_runlist_domain *next_domain, bool swap_buffers, + bool wait_for_finish); + static inline struct nvgpu_runlist_domain * nvgpu_runlist_domain_from_domains_list(struct nvgpu_list_node *node) { @@ -202,7 +234,7 @@ nvgpu_runlist_domain_from_domains_list(struct nvgpu_list_node *node) ((uintptr_t)node - offsetof(struct nvgpu_runlist_domain, domains_list)); } -void nvgpu_runlist_tick(struct gk20a *g); +void nvgpu_runlist_tick(struct gk20a *g, struct nvgpu_runlist_domain **rl_domain); /** * @brief Rebuild runlist diff --git a/libs/dgpu/libnvgpu-drv-dgpu_safe.export b/libs/dgpu/libnvgpu-drv-dgpu_safe.export index 3dd8481c0..0be8774aa 100644 --- a/libs/dgpu/libnvgpu-drv-dgpu_safe.export +++ b/libs/dgpu/libnvgpu-drv-dgpu_safe.export @@ -673,7 +673,6 @@ nvgpu_runlist_unlock_active_runlists nvgpu_runlist_unlock_runlists nvgpu_runlist_update nvgpu_runlist_update_locked -nvgpu_rl_domain_alloc nvgpu_rwsem_init nvgpu_rwsem_down_read nvgpu_rwsem_down_write diff --git a/libs/igpu/libnvgpu-drv-igpu_safe.export b/libs/igpu/libnvgpu-drv-igpu_safe.export index 8adf0a58a..041be4cde 100644 --- a/libs/igpu/libnvgpu-drv-igpu_safe.export +++ b/libs/igpu/libnvgpu-drv-igpu_safe.export @@ -691,7 +691,6 @@ nvgpu_runlist_unlock_active_runlists nvgpu_runlist_unlock_runlists nvgpu_runlist_update nvgpu_runlist_update_locked -nvgpu_rl_domain_alloc nvgpu_rwsem_init nvgpu_rwsem_down_read nvgpu_rwsem_down_write diff --git a/userspace/units/fifo/runlist/gk20a/nvgpu-runlist-gk20a.c b/userspace/units/fifo/runlist/gk20a/nvgpu-runlist-gk20a.c index 4de5b083b..7435cee70 100644 --- a/userspace/units/fifo/runlist/gk20a/nvgpu-runlist-gk20a.c +++ b/userspace/units/fifo/runlist/gk20a/nvgpu-runlist-gk20a.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -81,8 +81,6 @@ int test_gk20a_runlist_hw_submit(struct unit_module *m, struct nvgpu_runlist *runlist = g->fifo.runlists[runlist_id]; u32 count; - nvgpu_rl_domain_alloc(g, "(default)"); - for (count = 0; count < 2; count++) { nvgpu_writel(g, fifo_runlist_r(), 0);