mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: add sw quiesce
For safety build, nvgpu driver should enter SW quiesce state in case an uncorrectable error has occurred. In this state, any activity on the GPU should be prevented, without powering off the GPU. Also, a minimal set of operations should be used to enter SW quiesce state. Entering SW quiesce state does the following: - set sw_quiesce_pending: when this flag is set, interrupt handlers exit after masking interrupts. This should help mitigate an interrupt storm. - wake up thread to complete quiescing. The thread performs the following: - set NVGPU_DRIVER_IS_DYING to prevent allocation of new resources - disable interrupts - disable fifo scheduling - preempt all runlists - set error notifier for all active channels Note: for channels with usermode submit enabled, userspace can still ring doorbell, but this will not trigger any work on engines since fifo scheduling is disabled. Jira NVGPU-3493 Change-Id: I639a32da754d8833f54dcec1fa23135721d8d89a Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2172391 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
e60b7d773b
commit
95bb19827e
@@ -20,6 +20,9 @@
|
||||
|
||||
#include <nvgpu/atomic.h>
|
||||
#include <nvgpu/unit.h>
|
||||
#ifndef CONFIG_NVGPU_RECOVERY
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#endif
|
||||
#include "os_linux.h"
|
||||
|
||||
irqreturn_t nvgpu_intr_stall(struct gk20a *g)
|
||||
@@ -39,6 +42,11 @@ irqreturn_t nvgpu_intr_stall(struct gk20a *g)
|
||||
return IRQ_NONE;
|
||||
|
||||
g->ops.mc.intr_stall_pause(g);
|
||||
#ifndef CONFIG_NVGPU_RECOVERY
|
||||
if (g->sw_quiesce_pending) {
|
||||
return IRQ_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
nvgpu_atomic_inc(&g->hw_irq_stall_count);
|
||||
|
||||
@@ -90,6 +98,11 @@ irqreturn_t nvgpu_intr_nonstall(struct gk20a *g)
|
||||
return IRQ_NONE;
|
||||
|
||||
g->ops.mc.intr_nonstall_pause(g);
|
||||
#ifndef CONFIG_NVGPU_RECOVERY
|
||||
if (g->sw_quiesce_pending) {
|
||||
return IRQ_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
ops = g->ops.mc.isr_nonstall(g);
|
||||
if (ops) {
|
||||
|
||||
Reference in New Issue
Block a user