gpu: nvgpu: simplify the runlist update sequence

Following changes are added here to simplify the overall
sequence.

1) Remove deferred update for runlists. NVS worker thread
shall submit the updated runlist.

2) Moved Runlist mem swap inside update itself. Protect
the swap() and hw_submit() path with a spinlock. This
is temporary till GSP.

3) Enable Control-Fifo mode from nvgpu driver.

Jira NVGPU-8609

Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
Change-Id: Icc52e5d8ccec9d3653c9bc1cf40400fc01a08fde
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2757406
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Debarshi Dutta
2022-08-08 10:11:56 +05:30
committed by mobile promotions
parent 1d4b7b1c5d
commit 42beb7f4db
6 changed files with 56 additions and 88 deletions

View File

@@ -31,7 +31,9 @@
#include <nvgpu/bug.h> #include <nvgpu/bug.h>
#include <nvgpu/dma.h> #include <nvgpu/dma.h>
#include <nvgpu/rc.h> #include <nvgpu/rc.h>
#include <nvgpu/barrier.h>
#include <nvgpu/string.h> #include <nvgpu/string.h>
#include <nvgpu/lock.h>
#include <nvgpu/static_analysis.h> #include <nvgpu/static_analysis.h>
#ifdef CONFIG_NVGPU_LS_PMU #ifdef CONFIG_NVGPU_LS_PMU
#include <nvgpu/pmu/mutex.h> #include <nvgpu/pmu/mutex.h>
@@ -442,37 +444,37 @@ void nvgpu_runlist_swap_mem(struct gk20a *g, struct nvgpu_runlist_domain *domain
*/ */
rl_dbg(g, "Swapping mem for rl domain[%llu]", domain->domain_id); rl_dbg(g, "Swapping mem for rl domain[%llu]", domain->domain_id);
nvgpu_spinlock_acquire(&domain->lock);
mem_tmp = domain->mem; mem_tmp = domain->mem;
domain->mem = domain->mem_hw; domain->mem = domain->mem_hw;
domain->mem_hw = mem_tmp; domain->mem_hw = mem_tmp;
nvgpu_spinlock_release(&domain->lock);
} }
static int nvgpu_runlist_domain_actual_submit(struct gk20a *g, struct nvgpu_runlist *rl, static int nvgpu_runlist_domain_actual_submit(struct gk20a *g, struct nvgpu_runlist *rl,
bool swap_buffer, bool wait_for_finish) bool wait_for_finish)
{ {
int ret = 0; int ret = 0;
rl_dbg(g, "Runlist[%u]: submitting domain[%llu]", rl_dbg(g, "Runlist[%u]: submitting domain[%llu]",
rl->id, rl->domain->domain_id); rl->id, rl->domain->domain_id);
if (swap_buffer) { /* Here, its essential to synchronize between hw_submit
nvgpu_runlist_swap_mem(g, rl->domain); * and domain mem swaps.
} */
nvgpu_spinlock_acquire(&rl->domain->lock);
g->ops.runlist.hw_submit(g, rl);
nvgpu_spinlock_release(&rl->domain->lock);
nvgpu_atomic_set(&rl->domain->pending_update, 0); if (wait_for_finish) {
ret = g->ops.runlist.wait_pending(g, rl);
if (ret == -ETIMEDOUT) {
nvgpu_err(g, "runlist %d update timeout", rl->id);
/* trigger runlist update timeout recovery */
return ret;
/* No submit exists for VGPU */
if (g->ops.runlist.hw_submit != NULL) {
g->ops.runlist.hw_submit(g, rl);
if (wait_for_finish) {
ret = g->ops.runlist.wait_pending(g, rl);
if (ret == -ETIMEDOUT) {
nvgpu_err(g, "runlist %d update timeout", rl->id);
/* trigger runlist update timeout recovery */
return ret;
}
} }
} }
@@ -511,8 +513,6 @@ static int nvgpu_runlist_update_mem_locked(struct gk20a *g, struct nvgpu_runlist
return ret; return ret;
} }
nvgpu_atomic_set(&domain->pending_update, 1);
return ret; return ret;
} }
@@ -544,6 +544,8 @@ int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl,
return ret; return ret;
} }
nvgpu_runlist_swap_mem(g, domain);
return ret; return ret;
} }
@@ -570,12 +572,10 @@ int nvgpu_runlist_reschedule(struct nvgpu_channel *ch, bool preempt_next,
g, g->pmu, PMU_MUTEX_ID_FIFO, &token); g, g->pmu, PMU_MUTEX_ID_FIFO, &token);
#endif #endif
nvgpu_atomic_set(&runlist->domain->pending_update, 1);
#ifdef CONFIG_NVS_PRESENT #ifdef CONFIG_NVS_PRESENT
ret = g->nvs_worker_submit(g, runlist, runlist->domain, false, wait_preempt); ret = g->nvs_worker_submit(g, runlist, runlist->domain, wait_preempt);
#else #else
ret = nvgpu_rl_domain_sync_submit(g, runlist, runlist->domain, false, wait_preempt); ret = nvgpu_rl_domain_sync_submit(g, runlist, runlist->domain, wait_preempt);
#endif #endif
if (ret != 0) { if (ret != 0) {
if (ret == 1) { if (ret == 1) {
@@ -639,9 +639,9 @@ static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl,
ret = nvgpu_runlist_update_locked(g, rl, domain, ch, add, wait_for_finish); ret = nvgpu_runlist_update_locked(g, rl, domain, ch, add, wait_for_finish);
if (ret == 0) { if (ret == 0) {
#ifdef CONFIG_NVS_PRESENT #ifdef CONFIG_NVS_PRESENT
ret = g->nvs_worker_submit(g, rl, domain, true, wait_for_finish); ret = g->nvs_worker_submit(g, rl, domain, wait_for_finish);
#else #else
ret = nvgpu_rl_domain_sync_submit(g, rl, domain, true, wait_for_finish); ret = nvgpu_rl_domain_sync_submit(g, rl, domain, wait_for_finish);
#endif #endif
/* Deferred Update */ /* Deferred Update */
if (ret == 1) { if (ret == 1) {
@@ -669,8 +669,7 @@ static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl,
* This is expected to be called only when device is powered on. * This is expected to be called only when device is powered on.
*/ */
static int runlist_submit_powered(struct gk20a *g, struct nvgpu_runlist *runlist, static int runlist_submit_powered(struct gk20a *g, struct nvgpu_runlist *runlist,
struct nvgpu_runlist_domain *next_domain, bool swap_buffer, struct nvgpu_runlist_domain *next_domain, bool wait_for_finish)
bool wait_for_finish)
{ {
int err; int err;
@@ -679,14 +678,13 @@ static int runlist_submit_powered(struct gk20a *g, struct nvgpu_runlist *runlist
rl_dbg(g, "Runlist[%u]: switching to domain %llu", rl_dbg(g, "Runlist[%u]: switching to domain %llu",
runlist->id, next_domain->domain_id); runlist->id, next_domain->domain_id);
err = nvgpu_runlist_domain_actual_submit(g, runlist, swap_buffer, wait_for_finish); err = nvgpu_runlist_domain_actual_submit(g, runlist, wait_for_finish);
return err; return err;
} }
int nvgpu_rl_domain_sync_submit(struct gk20a *g, struct nvgpu_runlist *runlist, int nvgpu_rl_domain_sync_submit(struct gk20a *g, struct nvgpu_runlist *runlist,
struct nvgpu_runlist_domain *next_domain, bool swap_buffers, struct nvgpu_runlist_domain *next_domain, bool wait_for_finish)
bool wait_for_finish)
{ {
int err = 0; int err = 0;
@@ -694,10 +692,7 @@ int nvgpu_rl_domain_sync_submit(struct gk20a *g, struct nvgpu_runlist *runlist,
next_domain = runlist->shadow_rl_domain; next_domain = runlist->shadow_rl_domain;
} }
if (nvgpu_atomic_read(&next_domain->pending_update) == 1) { err = runlist_submit_powered(g, runlist, next_domain, wait_for_finish);
err = runlist_submit_powered(g, runlist, next_domain, swap_buffers,
wait_for_finish);
}
return err; return err;
} }
@@ -706,30 +701,8 @@ static int runlist_switch_domain_and_submit(struct gk20a *g,
struct nvgpu_runlist *runlist, struct nvgpu_runlist_domain *rl_domain) struct nvgpu_runlist *runlist, struct nvgpu_runlist_domain *rl_domain)
{ {
int ret = 0; int ret = 0;
struct nvgpu_runlist_domain *prev_rl_domain = runlist->domain;
/* If no user domains exist, submit the shadow_rl_domain if ret = runlist_submit_powered(g, runlist, rl_domain, false);
* pending is set to true. When the last user domain is removed,
* shadow_rl_domain will have pending_update set to true.
* Eventually, this logic will change. For manual mode, this needs
* to be submitted irrespective of the status of pending_update.
*/
if (nvgpu_list_empty(&runlist->user_rl_domains)) {
if (nvgpu_atomic_read(&rl_domain->pending_update) == 0) {
return 0;
}
} else {
/* If only one user domain exists, return if no pending
* update exists.
*/
if (prev_rl_domain == rl_domain) {
if (nvgpu_atomic_read(&prev_rl_domain->pending_update) == 0) {
return 0;
}
}
}
ret = runlist_submit_powered(g, runlist, rl_domain, false, false);
return ret; return ret;
} }
@@ -1116,7 +1089,7 @@ struct nvgpu_runlist_domain *nvgpu_runlist_domain_alloc(struct gk20a *g,
goto free_active_channels; goto free_active_channels;
} }
nvgpu_atomic_set(&domain->pending_update, 0); nvgpu_spinlock_init(&domain->lock);
return domain; return domain;
free_active_channels: free_active_channels:

View File

@@ -49,7 +49,6 @@ struct nvgpu_nvs_worker_item {
struct nvgpu_runlist *rl; struct nvgpu_runlist *rl;
struct nvgpu_runlist_domain *rl_domain; struct nvgpu_runlist_domain *rl_domain;
struct nvgpu_cond cond; struct nvgpu_cond cond;
bool swap_buffer;
bool wait_for_finish; bool wait_for_finish;
bool locked; bool locked;
int status; int status;
@@ -83,7 +82,7 @@ static void nvgpu_nvs_worker_poll_init(struct nvgpu_worker *worker)
/* 100 ms is a nice arbitrary timeout for default status */ /* 100 ms is a nice arbitrary timeout for default status */
nvs_worker->current_timeout = 100; nvs_worker->current_timeout = 100;
nvgpu_timeout_init_cpu_timer_sw(worker->g, &nvs_worker->timeout, nvgpu_timeout_init_cpu_timer_sw(worker->g, &nvs_worker->timeout,
nvs_worker->current_timeout); nvs_worker->current_timeout);
nvgpu_atomic_set(&nvs_worker->nvs_sched_state, NVS_WORKER_STATE_RUNNING); nvgpu_atomic_set(&nvs_worker->nvs_sched_state, NVS_WORKER_STATE_RUNNING);
@@ -120,13 +119,12 @@ static u64 nvgpu_nvs_tick(struct gk20a *g)
nvs_next = sched->shadow_domain->parent; nvs_next = sched->shadow_domain->parent;
} }
if (nvs_next->priv == sched->shadow_domain) { if (nvs_next->priv == sched->shadow_domain) {
/* /*
* This entire thread is going to be changed soon. * This entire thread is going to be changed soon.
* The above check ensures that there are no other domain, * The above check ensures that there are no other domain,
* besides the active domain. So, its safe to simply return here. * besides the shadow domain. So, its safe to simply return here.
* Any active domain updates shall are taken care of during * Any shadow domain updates shall are taken care of during
* nvgpu_nvs_worker_wakeup_process_item(). * nvgpu_nvs_worker_wakeup_process_item().
* *
* This is a temporary hack for legacy cases where we donot have * This is a temporary hack for legacy cases where we donot have
@@ -181,30 +179,26 @@ static void nvgpu_nvs_worker_wakeup_process_item(struct nvgpu_list_node *work_it
} }
} }
nvs_dbg(g, "Thread sync started");
if (sched->active_domain == nvs_domain->priv) { if (sched->active_domain == nvs_domain->priv) {
/* Instantly switch domain and force runlist updates */ /* Instantly switch domain and force runlist updates */
ret = nvgpu_rl_domain_sync_submit(g, runlist, rl_domain, work->swap_buffer, work->wait_for_finish); ret = nvgpu_rl_domain_sync_submit(g, runlist, rl_domain, work->wait_for_finish);
nvs_dbg(g, "Active thread updated");
} else { } else {
/* Swap buffers here even if its deferred for correctness */
if (work->swap_buffer) {
nvgpu_runlist_swap_mem(g, rl_domain);
}
ret = 1; ret = 1;
} }
nvs_dbg(g, " ");
done: done:
nvgpu_mutex_release(&g->sched_mutex); nvgpu_mutex_release(&g->sched_mutex);
work->status = ret; work->status = ret;
nvgpu_atomic_set(&work->state, 1); (void)nvgpu_atomic_xchg(&work->state, 1);
/* Wakeup threads waiting on runlist submit */ /* Wakeup threads waiting on runlist submit */
nvgpu_cond_signal(&work->cond); nvgpu_cond_signal(&work->cond);
} }
static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl, static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl,
struct nvgpu_runlist_domain *rl_domain, bool swap_buffer, struct nvgpu_runlist_domain *rl_domain, bool wait_for_finish)
bool wait_for_finish)
{ {
struct nvgpu_nvs_scheduler *sched = g->scheduler; struct nvgpu_nvs_scheduler *sched = g->scheduler;
struct nvgpu_nvs_worker *worker = &sched->worker; struct nvgpu_nvs_worker *worker = &sched->worker;
@@ -229,7 +223,6 @@ static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl,
work->rl_domain = rl_domain; work->rl_domain = rl_domain;
nvgpu_cond_init(&work->cond); nvgpu_cond_init(&work->cond);
nvgpu_init_list_node(&work->list); nvgpu_init_list_node(&work->list);
work->swap_buffer = swap_buffer;
work->wait_for_finish = wait_for_finish; work->wait_for_finish = wait_for_finish;
nvgpu_atomic_set(&work->state, 0); nvgpu_atomic_set(&work->state, 0);
@@ -392,6 +385,7 @@ void nvgpu_nvs_worker_resume(struct gk20a *g)
if (nvs_worker_state == NVS_WORKER_STATE_PAUSED) { if (nvs_worker_state == NVS_WORKER_STATE_PAUSED) {
nvs_dbg(g, "Waiting for nvs thread to be resumed"); nvs_dbg(g, "Waiting for nvs thread to be resumed");
/* wakeup worker forcibly. */ /* wakeup worker forcibly. */
nvgpu_cond_signal_interruptible(&worker->wq); nvgpu_cond_signal_interruptible(&worker->wq);
@@ -684,6 +678,9 @@ int nvgpu_nvs_open(struct gk20a *g)
goto unlock; goto unlock;
} }
/* Ensure all the previous writes are seen */
nvgpu_wmb();
err = nvgpu_nvs_worker_init(g); err = nvgpu_nvs_worker_init(g);
if (err != 0) { if (err != 0) {
nvgpu_nvs_remove_shadow_domain(g); nvgpu_nvs_remove_shadow_domain(g);

View File

@@ -125,9 +125,9 @@ static void gv11b_fifo_locked_abort_runlist_active_tsgs(struct gk20a *g,
} }
#ifdef CONFIG_NVS_PRESENT #ifdef CONFIG_NVS_PRESENT
/* Special case. Submit the recovery runlist now */ /* Special case. Submit the recovery runlist now */
err = g->nvs_worker_submit(g, runlist, runlist->domain, true, false); err = g->nvs_worker_submit(g, runlist, runlist->domain, false);
#else #else
err = nvgpu_rl_domain_sync_submit(g, runlist, runlist->domain, true, false); err = nvgpu_rl_domain_sync_submit(g, runlist, runlist->domain, false);
#endif #endif
if (err != 0 && err != 1) { if (err != 0 && err != 1) {
nvgpu_err(g, "runlist id %d is not cleaned up", nvgpu_err(g, "runlist id %d is not cleaned up",

View File

@@ -912,8 +912,7 @@ struct gk20a {
* Must hold runlist lock while invoking this interface. * Must hold runlist lock while invoking this interface.
*/ */
int (*nvs_worker_submit)(struct gk20a *g, struct nvgpu_runlist *rl, int (*nvs_worker_submit)(struct gk20a *g, struct nvgpu_runlist *rl,
struct nvgpu_runlist_domain *rl_domain, bool swap_buffer, struct nvgpu_runlist_domain *rl_domain, bool wait_for_finish);
bool wait_for_finish);
#endif #endif
#ifdef CONFIG_NVGPU_ENABLE_MISC_EC #ifdef CONFIG_NVGPU_ENABLE_MISC_EC

View File

@@ -136,19 +136,18 @@ struct nvgpu_runlist_domain {
/** Bitmap of active TSGs in the runlist domain. One bit per tsgid. */ /** Bitmap of active TSGs in the runlist domain. One bit per tsgid. */
unsigned long *active_tsgs; unsigned long *active_tsgs;
/* This lock is used to explicitely protect mem and mem_hw.
* There is a need to access control the two buffers as long as NVS
* thread is available. This can be removed once KMD has completely
* switched to GSP.
*/
struct nvgpu_spinlock lock;
/** Runlist buffer free to use in sw. Swapped with another mem on next load. */ /** Runlist buffer free to use in sw. Swapped with another mem on next load. */
struct nvgpu_runlist_mem *mem; struct nvgpu_runlist_mem *mem;
/** Currently active buffer submitted for hardware. */ /** Currently active buffer submitted for hardware. */
struct nvgpu_runlist_mem *mem_hw; struct nvgpu_runlist_mem *mem_hw;
/**
* When a channel is removed or added, this value is set to true.
* When this rl domain is scheduled to be submitted to the h/w,
* swap mem and mem_hw and submit mem_hw and then its value is
* set to false.
*/
nvgpu_atomic_t pending_update;
}; };
struct nvgpu_runlist { struct nvgpu_runlist {
@@ -225,8 +224,7 @@ struct nvgpu_runlist_domain *nvgpu_rl_domain_get(struct gk20a *g, u32 runlist_id
* Submit next_domain if there is a pending update. * Submit next_domain if there is a pending update.
*/ */
int nvgpu_rl_domain_sync_submit(struct gk20a *g, struct nvgpu_runlist *runlist, int nvgpu_rl_domain_sync_submit(struct gk20a *g, struct nvgpu_runlist *runlist,
struct nvgpu_runlist_domain *next_domain, bool swap_buffers, struct nvgpu_runlist_domain *next_domain, bool wait_for_finish);
bool wait_for_finish);
static inline struct nvgpu_runlist_domain * static inline struct nvgpu_runlist_domain *
nvgpu_runlist_domain_from_domains_list(struct nvgpu_list_node *node) nvgpu_runlist_domain_from_domains_list(struct nvgpu_list_node *node)

View File

@@ -134,6 +134,7 @@ static void nvgpu_init_vars(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, platform->has_syncpoints); nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, platform->has_syncpoints);
nvgpu_set_enabled(g, NVGPU_SUPPORT_NVS, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_NVS, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_NVS_CTRL_FIFO, true);
} }
static void nvgpu_init_max_comptag(struct gk20a *g) static void nvgpu_init_max_comptag(struct gk20a *g)