gpu: nvgpu: fix the usermode mappings deadlock during railgate and munmap

Following locking sequence leads to deadlock:

1. gk20a_pm_prepare_poweroff (alter_usermode_mappings):
   ctrl_privs_lock -> mmap_lock
2. __do_munmap (usermode_vma_close):
   mmap_lock -> ctrl_privs_lock

This lock contention can be resolved by retrying the usermode mapping
alteration after a while releasing the ctrl_priv_lock for munmap to
proceed.

Below is the kernel panic log with deadlock.

[] INFO: task kworker/1:1:116 blocked for more than 120 seconds.
[]       Tainted: G        W         5.10.17-tegra #1
[] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[] task:kworker/1:1     state:D stack:    0 pid:  116 ppid:     2 flags:0x00000028
[] Workqueue: pm pm_runtime_work
[] Call trace:
[]  __switch_to+0x104/0x160
[]  __schedule+0x3d4/0x900
[]  schedule+0x74/0x100
[]  rwsem_down_write_slowpath+0x250/0x4b0
[]  down_write+0x6c/0x80
[]  alter_usermode_mappings+0xb4/0x160 [nvgpu]
[]  nvgpu_hide_usermode_for_poweroff+0x24/0x30 [nvgpu]
[]  gk20a_pm_prepare_poweroff+0xe8/0x140 [nvgpu]
[]  gk20a_pm_runtime_suspend+0x78/0xf0 [nvgpu]
[]  pm_generic_runtime_suspend+0x3c/0x60
[]  genpd_runtime_suspend+0xb0/0x2c0
[]  __rpm_callback+0x90/0x150
[]  rpm_callback+0x34/0xa0
[]  rpm_suspend+0xe0/0x5e0
[]  pm_runtime_work+0xbc/0xc0
[]  process_one_work+0x1c0/0x4a0
[]  worker_thread+0x11c/0x430
[]  kthread+0x148/0x170
[]  ret_from_fork+0x10/0x18

[] INFO: task nvrm_gpu_tests:1273 blocked for more than 121 seconds.
[]       Tainted: G        W         5.10.17-tegra #1
[] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[] task:nvrm_gpu_tests  state:D stack:    0 pid: 1273 ppid:  1245 flags:0x00000000
[] Call trace:
[]  __switch_to+0x104/0x160
[]  __schedule+0x3d4/0x900
[]  schedule+0x74/0x100
[]  schedule_preempt_disabled+0x28/0x40
[]  __mutex_lock.isra.0+0x184/0x5c0
[]  __mutex_lock_slowpath+0x24/0x30
[]  mutex_lock+0x5c/0x70
[]  usermode_vma_close+0x30/0x50 [nvgpu]
[]  remove_vma+0x34/0x60
[]  __do_munmap+0x1f4/0x4a0
[]  __vm_munmap+0x74/0xd0
[]  __arm64_sys_munmap+0x3c/0x50
[]  el0_svc_common.constprop.0+0x7c/0x1a0
[]  do_el0_svc+0x34/0xa0
[]  el0_svc+0x1c/0x30
[]  el0_sync_handler+0xa8/0xb0
[]  el0_sync+0x160/0x180
[] ---[ end Kernel panic - not syncing: hung_task: blocked tasks ]---

Bug 200703921

Change-Id: Ie7f017c92f20061d3bf891079f7fc7fe390f7cf7
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2533853
(cherry picked from commit 1dd3e0761c)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2540111
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Sagar Kamble
2021-05-24 14:36:01 +05:30
committed by mobile promotions
parent cbad9503a7
commit 12e89c21de

View File

@@ -2051,7 +2051,7 @@ int gk20a_ctrl_dev_mmap(struct file *filp, struct vm_area_struct *vma)
return err; return err;
} }
static void alter_usermode_mapping(struct gk20a *g, static int alter_usermode_mapping(struct gk20a *g,
struct gk20a_ctrl_priv *priv, struct gk20a_ctrl_priv *priv,
bool poweroff) bool poweroff)
{ {
@@ -2059,23 +2059,34 @@ static void alter_usermode_mapping(struct gk20a *g,
struct vm_area_struct *vma = priv->usermode_vma.vma; struct vm_area_struct *vma = priv->usermode_vma.vma;
bool vma_mapped = priv->usermode_vma.vma_mapped; bool vma_mapped = priv->usermode_vma.vma_mapped;
u64 addr; u64 addr;
int err; int err = 0;
if (!vma) { if (!vma) {
/* Nothing to do - no mmap called */ /* Nothing to do - no mmap called */
return; return 0;
} }
addr = l->regs_bus_addr + g->ops.fifo.usermode_base(g); addr = l->regs_bus_addr + g->ops.fifo.usermode_base(g);
down_write(&vma->vm_mm->mmap_sem);
/* /*
* This is a no-op for the below cases * This is a no-op for the below cases
* a) poweroff and !vma_mapped - > do nothing as no map exists * a) poweroff and !vma_mapped - > do nothing as no map exists
* b) !poweroff and vmap_mapped -> do nothing as already mapped * b) !poweroff and vmap_mapped -> do nothing as already mapped
*/ */
if (poweroff && vma_mapped) { if (poweroff != vma_mapped) {
return 0;
}
/*
* We use trylock due to lock inversion: we need to acquire
* mmap_lock while holding ctrl_privs_lock. usermode_vma_close
* does it in reverse order. Trylock is a way to avoid deadlock.
*/
if (!down_write_trylock(&vma->vm_mm->mmap_sem)) {
return -EBUSY;
}
if (poweroff) {
err = zap_vma_ptes(vma, vma->vm_start, SZ_4K); err = zap_vma_ptes(vma, vma->vm_start, SZ_4K);
if (err == 0) { if (err == 0) {
vma->vm_flags = VM_NONE; vma->vm_flags = VM_NONE;
@@ -2083,7 +2094,7 @@ static void alter_usermode_mapping(struct gk20a *g,
} else { } else {
nvgpu_err(g, "can't remove usermode mapping"); nvgpu_err(g, "can't remove usermode mapping");
} }
} else if (!poweroff && !vma_mapped) { } else {
vma->vm_flags = priv->usermode_vma.flags; vma->vm_flags = priv->usermode_vma.flags;
err = io_remap_pfn_range(vma, vma->vm_start, err = io_remap_pfn_range(vma, vma->vm_start,
addr >> PAGE_SHIFT, addr >> PAGE_SHIFT,
@@ -2097,19 +2108,34 @@ static void alter_usermode_mapping(struct gk20a *g,
} }
up_write(&vma->vm_mm->mmap_sem); up_write(&vma->vm_mm->mmap_sem);
return err;
} }
static void alter_usermode_mappings(struct gk20a *g, bool poweroff) static void alter_usermode_mappings(struct gk20a *g, bool poweroff)
{ {
struct gk20a_ctrl_priv *priv; struct gk20a_ctrl_priv *priv;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
int err = 0;
nvgpu_mutex_acquire(&l->ctrl.privs_lock); do {
nvgpu_list_for_each_entry(priv, &l->ctrl.privs, nvgpu_mutex_acquire(&l->ctrl.privs_lock);
gk20a_ctrl_priv, list) { nvgpu_list_for_each_entry(priv, &l->ctrl.privs,
alter_usermode_mapping(g, priv, poweroff); gk20a_ctrl_priv, list) {
} err = alter_usermode_mapping(g, priv, poweroff);
nvgpu_mutex_release(&l->ctrl.privs_lock); if (err != 0) {
break;
}
}
nvgpu_mutex_release(&l->ctrl.privs_lock);
if (err == -EBUSY) {
nvgpu_log_info(g, "ctrl_privs_lock lock contended. retry altering usermode mappings");
nvgpu_udelay(10);
} else if (err != 0) {
nvgpu_err(g, "can't alter usermode mapping. err = %d", err);
}
} while (err == -EBUSY);
} }
void nvgpu_hide_usermode_for_poweroff(struct gk20a *g) void nvgpu_hide_usermode_for_poweroff(struct gk20a *g)