mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: fix use-after-free use case of CE APP.
The following issue is reported when running sudo modprobe -r nvgpu [ 134.066392] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058 [ 134.066428] Mem abort info: [ 134.066431] ESR = 0x96000004 [ 134.066434] EC = 0x25: DABT (current EL), IL = 32 bit [ 134.066450] [0000000000000058] pgd=0000000000000000, p4d=0000000000000000 [ 134.066459] Internal error: Oops: 96000004 [#1] PREEMPT_RT SMP [ 134.066639] pc : nvgpu_cic_rm_wait_for_stall_interrupts+0x78/0xd0 [nvgpu] [ 134.066847] lr : nvgpu_cic_rm_wait_for_stall_interrupts+0x74/0xd0 [nvgpu] [ 134.067043] sp : ffff80001971ba80 [ 134.067046] x29: ffff80001971ba80 x28: ffff000093b0da00 [ 134.067054] x27: 0000000000000000 x26: ffff80001c28b990 [ 134.067061] x25: ffff00008cd01000 x24: 0000000000000bb8 [ 134.067067] x23: 0000000000000000 x22: ffff0000915b0000 [ 134.067073] x21: ffff000093b0da00 x20: ffff0000915b0000 [ 134.067079] x19: ffff0000915b0000 x18: 0000000000000036 [ 134.067085] x17: 0000000000000000 x16: 0000000000000000 [ 134.067091] x15: ffff8000126b5fd8 x14: 7373616c633d4d45 [ 134.067097] x13: ffff8000098abef0 x12: 0000000000000000 [ 134.067102] x11: ffff8000098ab5a0 x10: ffff8000098abef8 [ 134.067108] x9 : ffff80001010e844 x8 : ffff80001971ba48 [ 134.067115] x7 : 2222222222222222 x6 : ffff000093b0da00 [ 134.067122] x5 : ffff8000098b1fd8 x4 : 0000000000000000 [ 134.067127] x3 : 0000000000000000 x2 : 0000000000000000 [ 134.067133] x1 : 0000000000000000 x0 : 0000000000000000 [ 134.067138] Call trace: [ 134.067140] nvgpu_cic_rm_wait_for_stall_interrupts+0x78/0xd0 [nvgpu] [ 134.067328] nvgpu_cic_rm_wait_for_deferred_interrupts+0x20/0xb0 [nvgpu] [ 134.067517] nvgpu_channel_deferred_reset_engines+0x29c/0x920 [nvgpu] [ 134.067714] nvgpu_channel_close+0x18/0x20 [nvgpu] [ 134.067904] nvgpu_init_pramin+0x2ac/0x350 [nvgpu] [ 134.068092] nvgpu_ce_app_destroy+0x94/0xe0 [nvgpu] [ 134.068279] nvgpu_put+0x90/0x120 [nvgpu] [ 134.068465] nvgpu_pci_shutdown+0x29c/0x18a0 [nvgpu] [ 134.068655] pci_device_remove+0x44/0xe0 [ 134.068665] device_release_driver_internal+0x114/0x1f0 [ 134.068701] driver_detach+0x54/0xe0 [ 134.068709] bus_remove_driver+0x70/0x120 [ 134.068733] driver_unregister+0x34/0x60 The above issue occurs due to freeing of CIC resources earlier than dependent users of interrupts e.g. CDE, CE etc. As a solution, move CIC deinit sequence to end of nvgpu_put. This handles deinit properly for VGPU/IGPU/DGPU. Bug 200763510 Change-Id: I696e31d5e03a9468cccfe710048000dbf7cf0269 Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2592063 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
aa08389240
commit
9328f057a7
@@ -1102,6 +1102,10 @@ static void gk20a_free_cb(struct nvgpu_ref *refcount)
|
|||||||
g->ops.ltc.ltc_remove_support(g);
|
g->ops.ltc.ltc_remove_support(g);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(void)nvgpu_cic_rm_deinit_vars(g);
|
||||||
|
(void)nvgpu_cic_mon_remove(g);
|
||||||
|
(void)nvgpu_cic_rm_remove(g);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Free the device list once the gk20a struct is removed. We don't want
|
* Free the device list once the gk20a struct is removed. We don't want
|
||||||
* to do this during the railgate poweroff sequence since that means
|
* to do this during the railgate poweroff sequence since that means
|
||||||
|
|||||||
@@ -1919,12 +1919,6 @@ int nvgpu_remove(struct device *dev)
|
|||||||
|
|
||||||
nvgpu_mutex_destroy(&g->clk_arb_enable_lock);
|
nvgpu_mutex_destroy(&g->clk_arb_enable_lock);
|
||||||
|
|
||||||
err = nvgpu_cic_rm_deinit_vars(g);
|
|
||||||
if (err != 0) {
|
|
||||||
nvgpu_err(g, "CIC-RM deinit vars failed.");
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
nvgpu_log_fn(g, "removed");
|
nvgpu_log_fn(g, "removed");
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
@@ -1942,18 +1936,6 @@ static int __exit gk20a_remove(struct platform_device *pdev)
|
|||||||
|
|
||||||
err = nvgpu_remove(dev);
|
err = nvgpu_remove(dev);
|
||||||
|
|
||||||
err = nvgpu_cic_mon_remove(g);
|
|
||||||
if (err != 0) {
|
|
||||||
nvgpu_err(g, "CIC-MON remove failed");
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = nvgpu_cic_rm_remove(g);
|
|
||||||
if (err != 0) {
|
|
||||||
nvgpu_err(g, "CIC-RM remove failed.");
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
gk20a_dma_buf_priv_list_clear(l);
|
gk20a_dma_buf_priv_list_clear(l);
|
||||||
nvgpu_mutex_destroy(&l->dmabuf_priv_list_lock);
|
nvgpu_mutex_destroy(&l->dmabuf_priv_list_lock);
|
||||||
|
|
||||||
|
|||||||
@@ -780,8 +780,6 @@ static void nvgpu_pci_remove(struct pci_dev *pdev)
|
|||||||
nvgpu_enable_irqs(g);
|
nvgpu_enable_irqs(g);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
(void)nvgpu_cic_mon_remove(g);
|
|
||||||
(void)nvgpu_cic_rm_remove(g);
|
|
||||||
|
|
||||||
nvgpu_pci_pm_deinit(&pdev->dev);
|
nvgpu_pci_pm_deinit(&pdev->dev);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user