mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
Make Recovery and quiesce co-exist to support quiesce state on unrecoverrable errors. Currently, the quiesce code is wrapped under ifndef CONFIG_NVGPU_RECOVERY. Isolate the quiesce code from recovery config, thereby enabling it on all builds. On Linux, the hung_task checker(check_hung_uninterruptible_tasks() in kernel/hung_task.c) complains that quiesce thread is stuck for more than 120 seconds. INFO: task sw-quiesce:1068 blocked for more than 120 seconds. The wait time of more than 120 seconds is expected as quiesce thread will wait until quiesce call is triggered on fatal unrecoverable errors. However, the INFO print upsets the kernel_warning_test(KWT) on Linux builds. To fix the failing KWT, change the quiesce task to interruptible instead of uninterruptible as checker only looks at uninterruptible tasks. Bug 2919899 JIRA NVGPU-5479 Change-Id: Ibd1023506859d8371998b785e881ace52cb5f030 Signed-off-by: tkudav <tkudav@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2342774 Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
146 lines
3.5 KiB
C
146 lines
3.5 KiB
C
/*
|
|
* Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*/
|
|
|
|
#include <nvgpu/trace.h>
|
|
#include <linux/irqreturn.h>
|
|
|
|
#include <nvgpu/gk20a.h>
|
|
#include <nvgpu/mc.h>
|
|
#include <nvgpu/nvgpu_init.h>
|
|
#include <nvgpu/gops_mc.h>
|
|
|
|
#include <nvgpu/atomic.h>
|
|
#include "os_linux.h"
|
|
|
|
irqreturn_t nvgpu_intr_stall(struct gk20a *g)
|
|
{
|
|
u32 mc_intr_0;
|
|
|
|
#ifdef CONFIG_NVGPU_TRACE
|
|
trace_mc_gk20a_intr_stall(g->name);
|
|
#endif
|
|
|
|
if (nvgpu_is_powered_off(g))
|
|
return IRQ_NONE;
|
|
|
|
/* not from gpu when sharing irq with others */
|
|
mc_intr_0 = g->ops.mc.intr_stall(g);
|
|
if (unlikely(!mc_intr_0))
|
|
return IRQ_NONE;
|
|
|
|
nvgpu_mc_intr_stall_pause(g);
|
|
if (g->sw_quiesce_pending) {
|
|
return IRQ_NONE;
|
|
}
|
|
|
|
nvgpu_atomic_set(&g->mc.sw_irq_stall_pending, 1);
|
|
|
|
#ifdef CONFIG_NVGPU_TRACE
|
|
trace_mc_gk20a_intr_stall_done(g->name);
|
|
#endif
|
|
|
|
return IRQ_WAKE_THREAD;
|
|
}
|
|
|
|
irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g)
|
|
{
|
|
nvgpu_log(g, gpu_dbg_intr, "interrupt thread launched");
|
|
|
|
#ifdef CONFIG_NVGPU_TRACE
|
|
trace_mc_gk20a_intr_thread_stall(g->name);
|
|
#endif
|
|
|
|
g->ops.mc.isr_stall(g);
|
|
/* sync handled irq counter before re-enabling interrupts */
|
|
nvgpu_atomic_set(&g->mc.sw_irq_stall_pending, 0);
|
|
nvgpu_mc_intr_stall_resume(g);
|
|
|
|
nvgpu_cond_broadcast(&g->mc.sw_irq_stall_last_handled_cond);
|
|
|
|
#ifdef CONFIG_NVGPU_TRACE
|
|
trace_mc_gk20a_intr_thread_stall_done(g->name);
|
|
#endif
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
irqreturn_t nvgpu_intr_nonstall(struct gk20a *g)
|
|
{
|
|
u32 non_stall_intr_val;
|
|
int ops_old, ops_new, ops = 0;
|
|
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
|
|
|
if (nvgpu_is_powered_off(g))
|
|
return IRQ_NONE;
|
|
|
|
/* not from gpu when sharing irq with others */
|
|
non_stall_intr_val = g->ops.mc.intr_nonstall(g);
|
|
if (unlikely(!non_stall_intr_val))
|
|
return IRQ_NONE;
|
|
|
|
nvgpu_mc_intr_nonstall_pause(g);
|
|
if (g->sw_quiesce_pending) {
|
|
return IRQ_NONE;
|
|
}
|
|
|
|
nvgpu_atomic_set(&g->mc.sw_irq_nonstall_pending, 1);
|
|
ops = g->ops.mc.isr_nonstall(g);
|
|
if (ops) {
|
|
do {
|
|
ops_old = atomic_read(&l->nonstall_ops);
|
|
ops_new = ops_old | ops;
|
|
} while (ops_old != atomic_cmpxchg(&l->nonstall_ops,
|
|
ops_old, ops_new));
|
|
|
|
queue_work(l->nonstall_work_queue, &l->nonstall_fn_work);
|
|
}
|
|
|
|
/* sync handled irq counter before re-enabling interrupts */
|
|
nvgpu_atomic_set(&g->mc.sw_irq_nonstall_pending, 0);
|
|
|
|
nvgpu_mc_intr_nonstall_resume(g);
|
|
|
|
nvgpu_cond_broadcast(&g->mc.sw_irq_nonstall_last_handled_cond);
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
static void mc_gk20a_handle_intr_nonstall(struct gk20a *g, u32 ops)
|
|
{
|
|
bool semaphore_wakeup, post_events;
|
|
|
|
semaphore_wakeup =
|
|
(((ops & NVGPU_NONSTALL_OPS_WAKEUP_SEMAPHORE) != 0U) ?
|
|
true : false);
|
|
post_events = (((ops & NVGPU_NONSTALL_OPS_POST_EVENTS) != 0U) ?
|
|
true: false);
|
|
|
|
if (semaphore_wakeup) {
|
|
g->ops.semaphore_wakeup(g, post_events);
|
|
}
|
|
}
|
|
|
|
void nvgpu_intr_nonstall_cb(struct work_struct *work)
|
|
{
|
|
struct nvgpu_os_linux *l =
|
|
container_of(work, struct nvgpu_os_linux, nonstall_fn_work);
|
|
struct gk20a *g = &l->g;
|
|
|
|
do {
|
|
u32 ops;
|
|
|
|
ops = atomic_xchg(&l->nonstall_ops, 0);
|
|
mc_gk20a_handle_intr_nonstall(g, ops);
|
|
} while (atomic_read(&l->nonstall_ops) != 0);
|
|
}
|