diff --git a/drivers/gpu/nvgpu/common/fifo/runlist.c b/drivers/gpu/nvgpu/common/fifo/runlist.c index 41bee11ec..07922ae83 100644 --- a/drivers/gpu/nvgpu/common/fifo/runlist.c +++ b/drivers/gpu/nvgpu/common/fifo/runlist.c @@ -31,7 +31,9 @@ #include #include #include +#include #include +#include #include #ifdef CONFIG_NVGPU_LS_PMU #include @@ -442,37 +444,37 @@ void nvgpu_runlist_swap_mem(struct gk20a *g, struct nvgpu_runlist_domain *domain */ rl_dbg(g, "Swapping mem for rl domain[%llu]", domain->domain_id); + nvgpu_spinlock_acquire(&domain->lock); + mem_tmp = domain->mem; domain->mem = domain->mem_hw; domain->mem_hw = mem_tmp; + + nvgpu_spinlock_release(&domain->lock); } static int nvgpu_runlist_domain_actual_submit(struct gk20a *g, struct nvgpu_runlist *rl, - bool swap_buffer, bool wait_for_finish) + bool wait_for_finish) { int ret = 0; rl_dbg(g, "Runlist[%u]: submitting domain[%llu]", rl->id, rl->domain->domain_id); - if (swap_buffer) { - nvgpu_runlist_swap_mem(g, rl->domain); - } + /* Here, its essential to synchronize between hw_submit + * and domain mem swaps. + */ + nvgpu_spinlock_acquire(&rl->domain->lock); + g->ops.runlist.hw_submit(g, rl); + nvgpu_spinlock_release(&rl->domain->lock); - nvgpu_atomic_set(&rl->domain->pending_update, 0); + if (wait_for_finish) { + ret = g->ops.runlist.wait_pending(g, rl); + if (ret == -ETIMEDOUT) { + nvgpu_err(g, "runlist %d update timeout", rl->id); + /* trigger runlist update timeout recovery */ + return ret; - /* No submit exists for VGPU */ - if (g->ops.runlist.hw_submit != NULL) { - g->ops.runlist.hw_submit(g, rl); - - if (wait_for_finish) { - ret = g->ops.runlist.wait_pending(g, rl); - if (ret == -ETIMEDOUT) { - nvgpu_err(g, "runlist %d update timeout", rl->id); - /* trigger runlist update timeout recovery */ - return ret; - - } } } @@ -511,8 +513,6 @@ static int nvgpu_runlist_update_mem_locked(struct gk20a *g, struct nvgpu_runlist return ret; } - nvgpu_atomic_set(&domain->pending_update, 1); - return ret; } @@ -544,6 +544,8 @@ int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl, return ret; } + nvgpu_runlist_swap_mem(g, domain); + return ret; } @@ -570,12 +572,10 @@ int nvgpu_runlist_reschedule(struct nvgpu_channel *ch, bool preempt_next, g, g->pmu, PMU_MUTEX_ID_FIFO, &token); #endif - nvgpu_atomic_set(&runlist->domain->pending_update, 1); - #ifdef CONFIG_NVS_PRESENT - ret = g->nvs_worker_submit(g, runlist, runlist->domain, false, wait_preempt); + ret = g->nvs_worker_submit(g, runlist, runlist->domain, wait_preempt); #else - ret = nvgpu_rl_domain_sync_submit(g, runlist, runlist->domain, false, wait_preempt); + ret = nvgpu_rl_domain_sync_submit(g, runlist, runlist->domain, wait_preempt); #endif if (ret != 0) { if (ret == 1) { @@ -639,9 +639,9 @@ static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl, ret = nvgpu_runlist_update_locked(g, rl, domain, ch, add, wait_for_finish); if (ret == 0) { #ifdef CONFIG_NVS_PRESENT - ret = g->nvs_worker_submit(g, rl, domain, true, wait_for_finish); + ret = g->nvs_worker_submit(g, rl, domain, wait_for_finish); #else - ret = nvgpu_rl_domain_sync_submit(g, rl, domain, true, wait_for_finish); + ret = nvgpu_rl_domain_sync_submit(g, rl, domain, wait_for_finish); #endif /* Deferred Update */ if (ret == 1) { @@ -669,8 +669,7 @@ static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl, * This is expected to be called only when device is powered on. */ static int runlist_submit_powered(struct gk20a *g, struct nvgpu_runlist *runlist, - struct nvgpu_runlist_domain *next_domain, bool swap_buffer, - bool wait_for_finish) + struct nvgpu_runlist_domain *next_domain, bool wait_for_finish) { int err; @@ -679,14 +678,13 @@ static int runlist_submit_powered(struct gk20a *g, struct nvgpu_runlist *runlist rl_dbg(g, "Runlist[%u]: switching to domain %llu", runlist->id, next_domain->domain_id); - err = nvgpu_runlist_domain_actual_submit(g, runlist, swap_buffer, wait_for_finish); + err = nvgpu_runlist_domain_actual_submit(g, runlist, wait_for_finish); return err; } int nvgpu_rl_domain_sync_submit(struct gk20a *g, struct nvgpu_runlist *runlist, - struct nvgpu_runlist_domain *next_domain, bool swap_buffers, - bool wait_for_finish) + struct nvgpu_runlist_domain *next_domain, bool wait_for_finish) { int err = 0; @@ -694,10 +692,7 @@ int nvgpu_rl_domain_sync_submit(struct gk20a *g, struct nvgpu_runlist *runlist, next_domain = runlist->shadow_rl_domain; } - if (nvgpu_atomic_read(&next_domain->pending_update) == 1) { - err = runlist_submit_powered(g, runlist, next_domain, swap_buffers, - wait_for_finish); - } + err = runlist_submit_powered(g, runlist, next_domain, wait_for_finish); return err; } @@ -706,30 +701,8 @@ static int runlist_switch_domain_and_submit(struct gk20a *g, struct nvgpu_runlist *runlist, struct nvgpu_runlist_domain *rl_domain) { int ret = 0; - struct nvgpu_runlist_domain *prev_rl_domain = runlist->domain; - /* If no user domains exist, submit the shadow_rl_domain if - * pending is set to true. When the last user domain is removed, - * shadow_rl_domain will have pending_update set to true. - * Eventually, this logic will change. For manual mode, this needs - * to be submitted irrespective of the status of pending_update. - */ - if (nvgpu_list_empty(&runlist->user_rl_domains)) { - if (nvgpu_atomic_read(&rl_domain->pending_update) == 0) { - return 0; - } - } else { - /* If only one user domain exists, return if no pending - * update exists. - */ - if (prev_rl_domain == rl_domain) { - if (nvgpu_atomic_read(&prev_rl_domain->pending_update) == 0) { - return 0; - } - } - } - - ret = runlist_submit_powered(g, runlist, rl_domain, false, false); + ret = runlist_submit_powered(g, runlist, rl_domain, false); return ret; } @@ -1116,7 +1089,7 @@ struct nvgpu_runlist_domain *nvgpu_runlist_domain_alloc(struct gk20a *g, goto free_active_channels; } - nvgpu_atomic_set(&domain->pending_update, 0); + nvgpu_spinlock_init(&domain->lock); return domain; free_active_channels: diff --git a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c index d73a10e2f..960d0705e 100644 --- a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c +++ b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c @@ -49,7 +49,6 @@ struct nvgpu_nvs_worker_item { struct nvgpu_runlist *rl; struct nvgpu_runlist_domain *rl_domain; struct nvgpu_cond cond; - bool swap_buffer; bool wait_for_finish; bool locked; int status; @@ -83,7 +82,7 @@ static void nvgpu_nvs_worker_poll_init(struct nvgpu_worker *worker) /* 100 ms is a nice arbitrary timeout for default status */ nvs_worker->current_timeout = 100; nvgpu_timeout_init_cpu_timer_sw(worker->g, &nvs_worker->timeout, - nvs_worker->current_timeout); + nvs_worker->current_timeout); nvgpu_atomic_set(&nvs_worker->nvs_sched_state, NVS_WORKER_STATE_RUNNING); @@ -120,13 +119,12 @@ static u64 nvgpu_nvs_tick(struct gk20a *g) nvs_next = sched->shadow_domain->parent; } - if (nvs_next->priv == sched->shadow_domain) { /* * This entire thread is going to be changed soon. * The above check ensures that there are no other domain, - * besides the active domain. So, its safe to simply return here. - * Any active domain updates shall are taken care of during + * besides the shadow domain. So, its safe to simply return here. + * Any shadow domain updates shall are taken care of during * nvgpu_nvs_worker_wakeup_process_item(). * * This is a temporary hack for legacy cases where we donot have @@ -181,30 +179,26 @@ static void nvgpu_nvs_worker_wakeup_process_item(struct nvgpu_list_node *work_it } } + nvs_dbg(g, "Thread sync started"); + if (sched->active_domain == nvs_domain->priv) { /* Instantly switch domain and force runlist updates */ - ret = nvgpu_rl_domain_sync_submit(g, runlist, rl_domain, work->swap_buffer, work->wait_for_finish); + ret = nvgpu_rl_domain_sync_submit(g, runlist, rl_domain, work->wait_for_finish); + nvs_dbg(g, "Active thread updated"); } else { - /* Swap buffers here even if its deferred for correctness */ - if (work->swap_buffer) { - nvgpu_runlist_swap_mem(g, rl_domain); - } ret = 1; } - nvs_dbg(g, " "); - done: nvgpu_mutex_release(&g->sched_mutex); work->status = ret; - nvgpu_atomic_set(&work->state, 1); + (void)nvgpu_atomic_xchg(&work->state, 1); /* Wakeup threads waiting on runlist submit */ nvgpu_cond_signal(&work->cond); } static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl, - struct nvgpu_runlist_domain *rl_domain, bool swap_buffer, - bool wait_for_finish) + struct nvgpu_runlist_domain *rl_domain, bool wait_for_finish) { struct nvgpu_nvs_scheduler *sched = g->scheduler; struct nvgpu_nvs_worker *worker = &sched->worker; @@ -229,7 +223,6 @@ static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl, work->rl_domain = rl_domain; nvgpu_cond_init(&work->cond); nvgpu_init_list_node(&work->list); - work->swap_buffer = swap_buffer; work->wait_for_finish = wait_for_finish; nvgpu_atomic_set(&work->state, 0); @@ -392,6 +385,7 @@ void nvgpu_nvs_worker_resume(struct gk20a *g) if (nvs_worker_state == NVS_WORKER_STATE_PAUSED) { nvs_dbg(g, "Waiting for nvs thread to be resumed"); + /* wakeup worker forcibly. */ nvgpu_cond_signal_interruptible(&worker->wq); @@ -684,6 +678,9 @@ int nvgpu_nvs_open(struct gk20a *g) goto unlock; } + /* Ensure all the previous writes are seen */ + nvgpu_wmb(); + err = nvgpu_nvs_worker_init(g); if (err != 0) { nvgpu_nvs_remove_shadow_domain(g); diff --git a/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c index f36c8bd95..b7c99896b 100644 --- a/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c +++ b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c @@ -125,9 +125,9 @@ static void gv11b_fifo_locked_abort_runlist_active_tsgs(struct gk20a *g, } #ifdef CONFIG_NVS_PRESENT /* Special case. Submit the recovery runlist now */ - err = g->nvs_worker_submit(g, runlist, runlist->domain, true, false); + err = g->nvs_worker_submit(g, runlist, runlist->domain, false); #else - err = nvgpu_rl_domain_sync_submit(g, runlist, runlist->domain, true, false); + err = nvgpu_rl_domain_sync_submit(g, runlist, runlist->domain, false); #endif if (err != 0 && err != 1) { nvgpu_err(g, "runlist id %d is not cleaned up", diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index f24f430ce..0b339b8ca 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -912,8 +912,7 @@ struct gk20a { * Must hold runlist lock while invoking this interface. */ int (*nvs_worker_submit)(struct gk20a *g, struct nvgpu_runlist *rl, - struct nvgpu_runlist_domain *rl_domain, bool swap_buffer, - bool wait_for_finish); + struct nvgpu_runlist_domain *rl_domain, bool wait_for_finish); #endif #ifdef CONFIG_NVGPU_ENABLE_MISC_EC diff --git a/drivers/gpu/nvgpu/include/nvgpu/runlist.h b/drivers/gpu/nvgpu/include/nvgpu/runlist.h index 9007cb70f..7a6f79bd3 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/runlist.h +++ b/drivers/gpu/nvgpu/include/nvgpu/runlist.h @@ -136,19 +136,18 @@ struct nvgpu_runlist_domain { /** Bitmap of active TSGs in the runlist domain. One bit per tsgid. */ unsigned long *active_tsgs; + /* This lock is used to explicitely protect mem and mem_hw. + * There is a need to access control the two buffers as long as NVS + * thread is available. This can be removed once KMD has completely + * switched to GSP. + */ + struct nvgpu_spinlock lock; + /** Runlist buffer free to use in sw. Swapped with another mem on next load. */ struct nvgpu_runlist_mem *mem; /** Currently active buffer submitted for hardware. */ struct nvgpu_runlist_mem *mem_hw; - - /** - * When a channel is removed or added, this value is set to true. - * When this rl domain is scheduled to be submitted to the h/w, - * swap mem and mem_hw and submit mem_hw and then its value is - * set to false. - */ - nvgpu_atomic_t pending_update; }; struct nvgpu_runlist { @@ -225,8 +224,7 @@ struct nvgpu_runlist_domain *nvgpu_rl_domain_get(struct gk20a *g, u32 runlist_id * Submit next_domain if there is a pending update. */ int nvgpu_rl_domain_sync_submit(struct gk20a *g, struct nvgpu_runlist *runlist, - struct nvgpu_runlist_domain *next_domain, bool swap_buffers, - bool wait_for_finish); + struct nvgpu_runlist_domain *next_domain, bool wait_for_finish); static inline struct nvgpu_runlist_domain * nvgpu_runlist_domain_from_domains_list(struct nvgpu_list_node *node) diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c index b55cbaa72..98bb1e546 100644 --- a/drivers/gpu/nvgpu/os/linux/driver_common.c +++ b/drivers/gpu/nvgpu/os/linux/driver_common.c @@ -134,6 +134,7 @@ static void nvgpu_init_vars(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, platform->has_syncpoints); nvgpu_set_enabled(g, NVGPU_SUPPORT_NVS, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_NVS_CTRL_FIFO, true); } static void nvgpu_init_max_comptag(struct gk20a *g)