diff --git a/drivers/gpu/nvgpu/common/fifo/runlist.c b/drivers/gpu/nvgpu/common/fifo/runlist.c index dab8be749..41bee11ec 100644 --- a/drivers/gpu/nvgpu/common/fifo/runlist.c +++ b/drivers/gpu/nvgpu/common/fifo/runlist.c @@ -461,15 +461,18 @@ static int nvgpu_runlist_domain_actual_submit(struct gk20a *g, struct nvgpu_runl nvgpu_atomic_set(&rl->domain->pending_update, 0); - g->ops.runlist.hw_submit(g, rl); + /* No submit exists for VGPU */ + if (g->ops.runlist.hw_submit != NULL) { + g->ops.runlist.hw_submit(g, rl); - if (wait_for_finish) { - ret = g->ops.runlist.wait_pending(g, rl); - if (ret == -ETIMEDOUT) { - nvgpu_err(g, "runlist %d update timeout", rl->id); - /* trigger runlist update timeout recovery */ - return ret; + if (wait_for_finish) { + ret = g->ops.runlist.wait_pending(g, rl); + if (ret == -ETIMEDOUT) { + nvgpu_err(g, "runlist %d update timeout", rl->id); + /* trigger runlist update timeout recovery */ + return ret; + } } } @@ -681,44 +684,6 @@ static int runlist_submit_powered(struct gk20a *g, struct nvgpu_runlist *runlist return err; } -static int runlist_select_and_submit(struct gk20a *g, struct nvgpu_runlist *runlist, - struct nvgpu_runlist_domain *next_domain, bool wait_for_finish) -{ - int err; - - rl_dbg(g, "Runlist[%u]: switching to domain %llu", - runlist->id, next_domain->domain_id); - - runlist->domain = next_domain; - - gk20a_busy_noresume(g); - if (nvgpu_is_powered_off(g)) { - rl_dbg(g, "Runlist[%u]: power is off, skip submit", - runlist->id); - gk20a_idle_nosuspend(g); - return 0; - } - - err = gk20a_busy(g); - gk20a_idle_nosuspend(g); - - if (err != 0) { - nvgpu_err(g, "failed to hold power for runlist submit"); - /* - * probably shutting down though, so don't bother propagating - * the error. Power is already on when the domain scheduler is - * actually in use. - */ - return err; - } - - err = runlist_submit_powered(g, runlist, next_domain, false, wait_for_finish); - - gk20a_idle(g); - - return err; -} - int nvgpu_rl_domain_sync_submit(struct gk20a *g, struct nvgpu_runlist *runlist, struct nvgpu_runlist_domain *next_domain, bool swap_buffers, bool wait_for_finish) @@ -764,7 +729,7 @@ static int runlist_switch_domain_and_submit(struct gk20a *g, } } - ret = runlist_select_and_submit(g, runlist, rl_domain, false); + ret = runlist_submit_powered(g, runlist, rl_domain, false, false); return ret; } diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index cbb099d2a..c07446195 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -313,6 +313,9 @@ int nvgpu_prepare_poweroff(struct gk20a *g) } } + /* Ensure that thread is paused before Engines suspend below */ + nvgpu_nvs_worker_pause(g); + #ifdef CONFIG_NVGPU_LS_PMU /* disable elpg before gr or fifo suspend */ if (g->support_ls_pmu) { diff --git a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c index 60e340938..2ef9d85ef 100644 --- a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c +++ b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c @@ -85,8 +85,11 @@ static void nvgpu_nvs_worker_poll_init(struct nvgpu_worker *worker) nvgpu_timeout_init_cpu_timer_sw(worker->g, &nvs_worker->timeout, nvs_worker->current_timeout); - nvgpu_atomic_set(&nvs_worker->nvs_sched_init, 1); - nvgpu_cond_signal(&nvs_worker->worker.wq); + nvgpu_atomic_set(&nvs_worker->nvs_sched_state, NVS_WORKER_STATE_RUNNING); + + /* Atomic Set() and Read() operations donot have implicit barriers */ + nvgpu_wmb(); + nvgpu_cond_signal(&nvs_worker->wq_request); } static u32 nvgpu_nvs_worker_wakeup_timeout(struct nvgpu_worker *worker) @@ -117,7 +120,8 @@ static u64 nvgpu_nvs_tick(struct gk20a *g) nvs_next = sched->shadow_domain->parent; } - if (nvs_next->priv == domain) { + + if (nvs_next->priv == sched->shadow_domain) { /* * This entire thread is going to be changed soon. * The above check ensures that there are no other domain, @@ -236,7 +240,10 @@ static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl, goto fail; } - nvs_dbg(g, " "); + /* Add a barrier here to ensure that worker thread is interrupted + * before waiting on the condition below + */ + nvgpu_mb(); ret = NVGPU_COND_WAIT(&work->cond, nvgpu_atomic_read(&work->state) == 1, 0U); if (ret != 0) { @@ -257,6 +264,59 @@ free_domain: return ret; } +static bool nvgpu_nvs_worker_wakeup_condition(struct nvgpu_worker *worker) +{ + struct nvgpu_nvs_worker *nvs_worker = + nvgpu_nvs_worker_from_worker(worker); + struct gk20a *g = worker->g; + int nvs_worker_state; + + nvs_worker_state = nvgpu_atomic_read(&nvs_worker->nvs_sched_state); + + if (nvs_worker_state == NVS_WORKER_STATE_SHOULD_RESUME) { + /* Set the state to running. Worker will automatically update the timeout + * in the subsequent if block as previous timeout is 0. + */ + nvgpu_atomic_set(&nvs_worker->nvs_sched_state, NVS_WORKER_STATE_RUNNING); + + /* Atomic set donot have an implicit barrier. + * Ensure, that value is updated before invoking signal below. + */ + nvgpu_wmb(); + /* Signal waiting threads about resume */ + nvgpu_cond_signal(&nvs_worker->wq_request); + + nvs_dbg(g, "nvs set for resume"); + } else if (nvs_worker_state == NVS_WORKER_STATE_SHOULD_PAUSE) { + return true; + } + + return false; +} + +static void nvgpu_nvs_handle_pause_requests(struct nvgpu_worker *worker) +{ + struct gk20a *g = worker->g; + struct nvgpu_nvs_worker *nvs_worker = + nvgpu_nvs_worker_from_worker(worker); + int nvs_worker_state = nvgpu_atomic_read(&nvs_worker->nvs_sched_state); + + if (nvs_worker_state == NVS_WORKER_STATE_SHOULD_PAUSE) { + nvgpu_atomic_set(&nvs_worker->nvs_sched_state, NVS_WORKER_STATE_PAUSED); + /* Set the worker->timeout to 0, to allow the worker thread to sleep infinitely. */ + nvgpu_timeout_init_cpu_timer_sw(g, &nvs_worker->timeout, 0); + + /* Atomic_Set doesn't have an implicit barrier. + * Ensure, that value is updated before invoking signal below. + */ + nvgpu_wmb(); + /* Wakeup user threads waiting for pause state */ + nvgpu_cond_signal(&nvs_worker->wq_request); + + nvs_dbg(g, "nvs set for pause"); + } +} + static void nvgpu_nvs_worker_wakeup_post_process(struct nvgpu_worker *worker) { struct gk20a *g = worker->g; @@ -274,23 +334,92 @@ static void nvgpu_nvs_worker_wakeup_post_process(struct nvgpu_worker *worker) nvgpu_timeout_init_cpu_timer_sw(g, &nvs_worker->timeout, nvs_worker->current_timeout); } + + nvgpu_nvs_handle_pause_requests(worker); } static const struct nvgpu_worker_ops nvs_worker_ops = { .pre_process = nvgpu_nvs_worker_poll_init, + .wakeup_condition = nvgpu_nvs_worker_wakeup_condition, .wakeup_timeout = nvgpu_nvs_worker_wakeup_timeout, .wakeup_process_item = nvgpu_nvs_worker_wakeup_process_item, .wakeup_post_process = nvgpu_nvs_worker_wakeup_post_process, }; +void nvgpu_nvs_worker_pause(struct gk20a *g) +{ + struct nvgpu_worker *worker = &g->scheduler->worker.worker; + struct nvgpu_nvs_worker *nvs_worker = &g->scheduler->worker; + int nvs_worker_state; + + if (g->is_virtual) { + return; + } + + nvs_worker_state = nvgpu_atomic_cmpxchg(&nvs_worker->nvs_sched_state, + NVS_WORKER_STATE_RUNNING, NVS_WORKER_STATE_SHOULD_PAUSE); + + if (nvs_worker_state == NVS_WORKER_STATE_RUNNING) { + nvs_dbg(g, "Setting thread state to sleep."); + /* wakeup worker forcibly. */ + nvgpu_cond_signal_interruptible(&worker->wq); + + /* Ensure signal has happened before waiting */ + nvgpu_mb(); + + NVGPU_COND_WAIT(&nvs_worker->wq_request, + nvgpu_atomic_read( + &nvs_worker->nvs_sched_state) == NVS_WORKER_STATE_PAUSED, 0); + + nvs_dbg(g, "Thread is paused"); + } else { + nvs_dbg(g, "Thread state is not running."); + } +} + +void nvgpu_nvs_worker_resume(struct gk20a *g) +{ + struct nvgpu_worker *worker = &g->scheduler->worker.worker; + struct nvgpu_nvs_worker *nvs_worker = &g->scheduler->worker; + int nvs_worker_state; + + if (g->is_virtual) { + return; + } + + nvs_worker_state = nvgpu_atomic_cmpxchg(&nvs_worker->nvs_sched_state, + NVS_WORKER_STATE_PAUSED, NVS_WORKER_STATE_SHOULD_RESUME); + + if (nvs_worker_state == NVS_WORKER_STATE_PAUSED) { + nvs_dbg(g, "Waiting for nvs thread to be resumed"); + /* wakeup worker forcibly. */ + nvgpu_cond_signal_interruptible(&worker->wq); + + /* Ensure signal has happened before waiting */ + nvgpu_mb(); + + NVGPU_COND_WAIT(&nvs_worker->wq_request, + nvgpu_atomic_read( + &nvs_worker->nvs_sched_state) == NVS_WORKER_STATE_RUNNING, 0); + + nvs_dbg(g, "Thread resumed"); + } else { + nvs_dbg(g, "Thread not paused"); + } +} + static int nvgpu_nvs_worker_init(struct gk20a *g) { int err = 0; struct nvgpu_worker *worker = &g->scheduler->worker.worker; struct nvgpu_nvs_worker *nvs_worker = &g->scheduler->worker; - nvgpu_cond_init(&nvs_worker->wq_init); - nvgpu_atomic_set(&nvs_worker->nvs_sched_init, 0); + if (g->is_virtual) { + return 0; + } + + nvgpu_cond_init(&nvs_worker->wq_request); + (void)nvgpu_atomic_xchg(&nvs_worker->nvs_sched_state, NVS_WORKER_STATE_STOPPED); nvgpu_worker_init_name(worker, "nvgpu_nvs", g->name); @@ -299,11 +428,15 @@ static int nvgpu_nvs_worker_init(struct gk20a *g) /* Ensure that scheduler thread is started as soon as possible to handle * minimal uptime for applications. */ - err = NVGPU_COND_WAIT(&nvs_worker->worker.wq, - nvgpu_atomic_read(&nvs_worker->nvs_sched_init) == 1, 0); + err = NVGPU_COND_WAIT(&nvs_worker->wq_request, + nvgpu_atomic_read( + &nvs_worker->nvs_sched_state) == NVS_WORKER_STATE_RUNNING, + 0); if (err != 0) { nvgpu_err(g, "Interrupted while waiting for scheduler thread"); } + + nvs_dbg(g, "Thread started"); } return err; @@ -314,10 +447,14 @@ static void nvgpu_nvs_worker_deinit(struct gk20a *g) struct nvgpu_worker *worker = &g->scheduler->worker.worker; struct nvgpu_nvs_worker *nvs_worker = &g->scheduler->worker; + if (g->is_virtual) { + return; + } + nvgpu_worker_deinit(worker); - nvgpu_atomic_set(&nvs_worker->nvs_sched_init, 0); - nvgpu_cond_destroy(&nvs_worker->wq_init); + nvgpu_atomic_set(&nvs_worker->nvs_sched_state, NVS_WORKER_STATE_STOPPED); + nvgpu_cond_destroy(&nvs_worker->wq_request); nvs_dbg(g, "NVS worker suspended"); } @@ -495,7 +632,9 @@ int nvgpu_nvs_open(struct gk20a *g) if (g->scheduler != NULL) { /* resuming from railgate */ - goto unlock; + nvgpu_mutex_release(&g->sched_mutex); + nvgpu_nvs_worker_resume(g); + return err; } g->scheduler = nvgpu_kzalloc(g, sizeof(*g->scheduler)); @@ -525,6 +664,9 @@ int nvgpu_nvs_open(struct gk20a *g) goto unlock; } + /* Ensure all the previous writes are seen */ + nvgpu_wmb(); + err = nvgpu_nvs_gen_shadow_domain(g); if (err != 0) { goto unlock; diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvs.h b/drivers/gpu/nvgpu/include/nvgpu/nvs.h index 2426a1908..c0937d2b5 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvs.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvs.h @@ -125,9 +125,15 @@ struct nvgpu_nvs_domain { struct nvgpu_runlist_domain **rl_domains; }; +#define NVS_WORKER_STATE_STOPPED 0 +#define NVS_WORKER_STATE_RUNNING 1 +#define NVS_WORKER_STATE_SHOULD_PAUSE 2 +#define NVS_WORKER_STATE_PAUSED 3 +#define NVS_WORKER_STATE_SHOULD_RESUME 4 + struct nvgpu_nvs_worker { - nvgpu_atomic_t nvs_sched_init; - struct nvgpu_cond wq_init; + nvgpu_atomic_t nvs_sched_state; + struct nvgpu_cond wq_request; struct nvgpu_worker worker; struct nvgpu_timeout timeout; u32 current_timeout; @@ -248,6 +254,8 @@ const char *nvgpu_nvs_domain_get_name(struct nvgpu_nvs_domain *dom); void nvgpu_nvs_ctrl_fifo_lock_queues(struct gk20a *g); void nvgpu_nvs_ctrl_fifo_unlock_queues(struct gk20a *g); +void nvgpu_nvs_worker_pause(struct gk20a *g); +void nvgpu_nvs_worker_resume(struct gk20a *g); struct nvgpu_nvs_domain_ctrl_fifo *nvgpu_nvs_ctrl_fifo_create(struct gk20a *g); bool nvgpu_nvs_ctrl_fifo_user_exists(struct nvgpu_nvs_domain_ctrl_fifo *sched_ctrl, int pid, bool rw); @@ -288,6 +296,18 @@ void nvgpu_nvs_ctrl_fifo_erase_queue(struct gk20a *g, struct nvgpu_nvs_ctrl_queu void nvgpu_nvs_ctrl_fifo_erase_all_queues(struct gk20a *g); #else + + +static inline void nvgpu_nvs_worker_pause(struct gk20a *g) +{ + (void)g; +} + +static inline void nvgpu_nvs_worker_resume(struct gk20a *g) +{ + (void)g; +} + static inline int nvgpu_nvs_init(struct gk20a *g) { (void)g;