gpu: nvgpu: Add mutex lock to synchronise isr threads

This is adding a mutex lock to synchronise between
stall isr threads.
Orin(t234) has three interrupt lines and three ISR
threads to handle bottom half of the ISR. The threads
sharing same data between them without proper synchronization.
When multiple interrupts trigger simeltaneously, causing the
threads running in parallell like below traces

#0  nvgpu_cic_mon_intr_stall_isr (g=g@entry=0x5ed62a9318)
    at /home/dt/automotive-dev-main-20220802T015100095/kernel/nvgpu/drivers/gpu/nvgpu/common/cic/mon/mon_intr.c:158
#1  0x00000013758cae30 in nvgpu_intr_stall (arg=0x5ed62a9120)
    at /home/dt/automotive-dev-main-20220802T015100095/qnx/src/resmgrs/nvrm/nvgpu_rmos/os/intr.c:140
#2  0x00000013758ec090 in nvgpu_posix_thread_wrapper (data=<optimized out>)
    at /home/dt/automotive-dev-main-20220802T015100095/kernel/nvgpu/drivers/gpu/nvgpu/os/posix/thread.c:77
#3  0x0000001375b01000 in pthread_attr_setdetachstate ()
   from /home/dt/automotive-dev-main-20220802T015100095/out/embedded-qnx-t186ref-debug-none/target_rootfs/lib/libc.so.5
Backtrace stopped: previous frame identical to this frame (corrupt stack?)

This is causing some race in shared data access and causing
multiple issues.

Bug 3647988

Change-Id: If40e581635b52cce288d8f4b00af6a040f7f9a6e
Signed-off-by: Dinesh T <dt@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2755874
Reviewed-by: Tejal Kudav <tkudav@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: Ankur Kishore <ankkishore@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Dinesh T
2022-08-04 10:23:20 +00:00
committed by mobile promotions
parent 59f7a9e318
commit 4e78d478c3
3 changed files with 5 additions and 1 deletions

View File

@@ -185,12 +185,14 @@ u32 nvgpu_cic_mon_intr_stall_isr(struct gk20a *g)
void nvgpu_cic_mon_intr_stall_handle(struct gk20a *g) void nvgpu_cic_mon_intr_stall_handle(struct gk20a *g)
{ {
nvgpu_mutex_acquire(&g->mc.intr_thread_mutex);
g->ops.mc.isr_stall(g); g->ops.mc.isr_stall(g);
/* sync handled irq counter before re-enabling interrupts */ /* sync handled irq counter before re-enabling interrupts */
nvgpu_cic_rm_set_irq_stall(g, 0); nvgpu_cic_rm_set_irq_stall(g, 0);
nvgpu_cic_mon_intr_stall_resume(g); nvgpu_cic_mon_intr_stall_resume(g);
nvgpu_mutex_release(&g->mc.intr_thread_mutex);
(void)nvgpu_cic_rm_broadcast_last_irq_stall(g); (void)nvgpu_cic_rm_broadcast_last_irq_stall(g);
} }

View File

@@ -188,6 +188,8 @@ struct nvgpu_intr_unit_info {
struct nvgpu_mc { struct nvgpu_mc {
/** Lock to access the MC interrupt registers. */ /** Lock to access the MC interrupt registers. */
struct nvgpu_spinlock intr_lock; struct nvgpu_spinlock intr_lock;
/** Lock to synchronise between stall interrupt threads. */
struct nvgpu_mutex intr_thread_mutex;
/** Lock to access the MC unit registers. */ /** Lock to access the MC unit registers. */
struct nvgpu_spinlock enable_lock; struct nvgpu_spinlock enable_lock;

View File

@@ -86,7 +86,7 @@ static void nvgpu_init_vars(struct gk20a *g)
nvgpu_spinlock_init(&g->power_spinlock); nvgpu_spinlock_init(&g->power_spinlock);
nvgpu_spinlock_init(&g->mc.intr_lock); nvgpu_spinlock_init(&g->mc.intr_lock);
nvgpu_mutex_init(&g->mc.intr_thread_mutex);
nvgpu_mutex_init(&platform->railgate_lock); nvgpu_mutex_init(&platform->railgate_lock);
nvgpu_mutex_init(&g->dbg_sessions_lock); nvgpu_mutex_init(&g->dbg_sessions_lock);
nvgpu_mutex_init(&g->power_lock); nvgpu_mutex_init(&g->power_lock);