From 4e78d478c30225d691c457a2a7ab25968ed935d6 Mon Sep 17 00:00:00 2001
From: Dinesh T
Date: Thu, 4 Aug 2022 10:23:20 +0000
Subject: [PATCH] gpu: nvgpu: Add mutex lock to synchronise isr threads
This is adding a mutex lock to synchronise between
stall isr threads.
Orin(t234) has three interrupt lines and three ISR
threads to handle bottom half of the ISR. The threads
sharing same data between them without proper synchronization.
When multiple interrupts trigger simeltaneously, causing the
threads running in parallell like below traces
#0 nvgpu_cic_mon_intr_stall_isr (g=g@entry=0x5ed62a9318)
at /home/dt/automotive-dev-main-20220802T015100095/kernel/nvgpu/drivers/gpu/nvgpu/common/cic/mon/mon_intr.c:158
#1 0x00000013758cae30 in nvgpu_intr_stall (arg=0x5ed62a9120)
at /home/dt/automotive-dev-main-20220802T015100095/qnx/src/resmgrs/nvrm/nvgpu_rmos/os/intr.c:140
#2 0x00000013758ec090 in nvgpu_posix_thread_wrapper (data=)
at /home/dt/automotive-dev-main-20220802T015100095/kernel/nvgpu/drivers/gpu/nvgpu/os/posix/thread.c:77
#3 0x0000001375b01000 in pthread_attr_setdetachstate ()
from /home/dt/automotive-dev-main-20220802T015100095/out/embedded-qnx-t186ref-debug-none/target_rootfs/lib/libc.so.5
Backtrace stopped: previous frame identical to this frame (corrupt stack?)
This is causing some race in shared data access and causing
multiple issues.
Bug 3647988
Change-Id: If40e581635b52cce288d8f4b00af6a040f7f9a6e
Signed-off-by: Dinesh T
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2755874
Reviewed-by: Tejal Kudav
Reviewed-by: svc-mobile-coverity
Reviewed-by: svc-mobile-cert
Reviewed-by: svc-mobile-misra
Reviewed-by: Ankur Kishore
GVS: Gerrit_Virtual_Submit
---
drivers/gpu/nvgpu/common/cic/mon/mon_intr.c | 2 ++
drivers/gpu/nvgpu/include/nvgpu/mc.h | 2 ++
drivers/gpu/nvgpu/os/linux/driver_common.c | 2 +-
3 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/nvgpu/common/cic/mon/mon_intr.c b/drivers/gpu/nvgpu/common/cic/mon/mon_intr.c
index d920b2539..79f24381c 100644
--- a/drivers/gpu/nvgpu/common/cic/mon/mon_intr.c
+++ b/drivers/gpu/nvgpu/common/cic/mon/mon_intr.c
@@ -185,12 +185,14 @@ u32 nvgpu_cic_mon_intr_stall_isr(struct gk20a *g)
void nvgpu_cic_mon_intr_stall_handle(struct gk20a *g)
{
+ nvgpu_mutex_acquire(&g->mc.intr_thread_mutex);
g->ops.mc.isr_stall(g);
/* sync handled irq counter before re-enabling interrupts */
nvgpu_cic_rm_set_irq_stall(g, 0);
nvgpu_cic_mon_intr_stall_resume(g);
+ nvgpu_mutex_release(&g->mc.intr_thread_mutex);
(void)nvgpu_cic_rm_broadcast_last_irq_stall(g);
}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/mc.h b/drivers/gpu/nvgpu/include/nvgpu/mc.h
index e7a723555..cfa54975d 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/mc.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/mc.h
@@ -188,6 +188,8 @@ struct nvgpu_intr_unit_info {
struct nvgpu_mc {
/** Lock to access the MC interrupt registers. */
struct nvgpu_spinlock intr_lock;
+ /** Lock to synchronise between stall interrupt threads. */
+ struct nvgpu_mutex intr_thread_mutex;
/** Lock to access the MC unit registers. */
struct nvgpu_spinlock enable_lock;
diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c
index bb862c0a2..c6de85e45 100644
--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -86,7 +86,7 @@ static void nvgpu_init_vars(struct gk20a *g)
nvgpu_spinlock_init(&g->power_spinlock);
nvgpu_spinlock_init(&g->mc.intr_lock);
-
+ nvgpu_mutex_init(&g->mc.intr_thread_mutex);
nvgpu_mutex_init(&platform->railgate_lock);
nvgpu_mutex_init(&g->dbg_sessions_lock);
nvgpu_mutex_init(&g->power_lock);