gpu: nvgpu: Add mutex lock to synchronise isr threads

This is adding a mutex lock to synchronise between stall isr threads. Orin(t234) has three interrupt lines and three ISR threads to handle bottom half of the ISR. The threads sharing same data between them without proper synchronization. When multiple interrupts trigger simeltaneously, causing the threads running in parallell like below traces #0 nvgpu_cic_mon_intr_stall_isr (g=g@entry=0x5ed62a9318) at /home/dt/automotive-dev-main-20220802T015100095/kernel/nvgpu/drivers/gpu/nvgpu/common/cic/mon/mon_intr.c:158 #1 0x00000013758cae30 in nvgpu_intr_stall (arg=0x5ed62a9120) at /home/dt/automotive-dev-main-20220802T015100095/qnx/src/resmgrs/nvrm/nvgpu_rmos/os/intr.c:140 #2 0x00000013758ec090 in nvgpu_posix_thread_wrapper (data=<optimized out>) at /home/dt/automotive-dev-main-20220802T015100095/kernel/nvgpu/drivers/gpu/nvgpu/os/posix/thread.c:77 #3 0x0000001375b01000 in pthread_attr_setdetachstate () from /home/dt/automotive-dev-main-20220802T015100095/out/embedded-qnx-t186ref-debug-none/target_rootfs/lib/libc.so.5 Backtrace stopped: previous frame identical to this frame (corrupt stack?) This is causing some race in shared data access and causing multiple issues. Bug 3647988 Change-Id: If40e581635b52cce288d8f4b00af6a040f7f9a6e Signed-off-by: Dinesh T <dt@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2755874 Reviewed-by: Tejal Kudav <tkudav@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: Ankur Kishore <ankkishore@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-22 09:12:24 +03:00 · 2022-08-04 10:23:20 +00:00
parent 59f7a9e318
commit 4e78d478c3
3 changed files with 5 additions and 1 deletions
--- a/drivers/gpu/nvgpu/common/cic/mon/mon_intr.c
+++ b/drivers/gpu/nvgpu/common/cic/mon/mon_intr.c
@@ -185,12 +185,14 @@ u32 nvgpu_cic_mon_intr_stall_isr(struct gk20a *g)
 void nvgpu_cic_mon_intr_stall_handle(struct gk20a *g)
 {
 	nvgpu_mutex_acquire(&g->mc.intr_thread_mutex);
 	g->ops.mc.isr_stall(g);
 	/* sync handled irq counter before re-enabling interrupts */
 	nvgpu_cic_rm_set_irq_stall(g, 0);
 	nvgpu_cic_mon_intr_stall_resume(g);
 	nvgpu_mutex_release(&g->mc.intr_thread_mutex);
 	(void)nvgpu_cic_rm_broadcast_last_irq_stall(g);
 }
--- a/drivers/gpu/nvgpu/include/nvgpu/mc.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/mc.h
@@ -188,6 +188,8 @@ struct nvgpu_intr_unit_info {
 struct nvgpu_mc {
 	/** Lock to access the MC interrupt registers. */
 	struct nvgpu_spinlock intr_lock;
 	/** Lock to synchronise between stall interrupt threads. */
 	struct nvgpu_mutex intr_thread_mutex;
 	/** Lock to access the MC unit registers. */
 	struct nvgpu_spinlock enable_lock;
--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -86,7 +86,7 @@ static void nvgpu_init_vars(struct gk20a *g)
 	nvgpu_spinlock_init(&g->power_spinlock);
 	nvgpu_spinlock_init(&g->mc.intr_lock);
-
+	nvgpu_mutex_init(&g->mc.intr_thread_mutex);
 	nvgpu_mutex_init(&platform->railgate_lock);
 	nvgpu_mutex_init(&g->dbg_sessions_lock);
 	nvgpu_mutex_init(&g->power_lock);