gpu: nvgpu: fix clk_arb completion file private data access race

clk_arb completion file descriptor can get closed immediately after
poll finishes in the work item gp10b_clk_arb_run_arbiter_cb. In
that case, the refcount for nvgpu_clk_dev can become zero in
the work item and can lead to invalid access while removing
nvgpu_clk_dev from the lists.

Remove nvgpu_clk_dev from the list before dropping the reference to
it.

Also, delete the nvgpu_clk_dev in completion file release handler
within the session and requests spinlocks to avoid race with
gp10b_clk_arb_run_arbiter_cb using it.

bug 200757277

Change-Id: I054eee547f2a6fa633d7ef55df216ec36647a826
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2569522
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Sagar Kamble
2021-08-03 09:11:33 +05:30
committed by mobile promotions
parent 2c441a83d4
commit ce8548ec05
4 changed files with 20 additions and 11 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -886,8 +886,8 @@ void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
nvgpu_spinlock_acquire(&session->session_lock);
nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets,
nvgpu_clk_dev, node) {
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
nvgpu_list_del(&dev->node);
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
}
nvgpu_spinlock_release(&session->session_lock);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -730,8 +730,8 @@ exit_arb:
nvgpu_atomic_set(&dev->poll_mask,
NVGPU_POLLIN | NVGPU_POLLRDNORM);
nvgpu_clk_arb_event_post_event(dev);
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
nvgpu_list_del(&dev->node);
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
}
nvgpu_spinlock_release(&arb->requests_lock);
@@ -768,4 +768,4 @@ void gp106_clk_arb_cleanup(struct nvgpu_clk_arb *arb)
nvgpu_kfree(g, g->clk_arb);
g->clk_arb = NULL;
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -393,8 +393,8 @@ exit_arb:
nvgpu_clk_dev, node) {
nvgpu_atomic_set(&dev->poll_mask, NVGPU_POLLIN | NVGPU_POLLRDNORM);
nvgpu_clk_arb_event_post_event(dev);
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
nvgpu_list_del(&dev->node);
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
}
nvgpu_spinlock_release(&arb->requests_lock);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -51,19 +51,28 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
{
struct nvgpu_clk_dev *dev = filp->private_data;
struct nvgpu_clk_session *session = dev->session;
struct gk20a *g = session->g;
struct nvgpu_clk_arb *arb = g->clk_arb;
clk_arb_dbg(g, " ");
clk_arb_dbg(session->g, " ");
nvgpu_spinlock_acquire(&session->session_lock);
nvgpu_spinlock_acquire(&arb->requests_lock);
nvgpu_list_del(&dev->node);
nvgpu_spinlock_release(&arb->requests_lock);
nvgpu_spinlock_release(&session->session_lock);
/* This is done to account for the extra refcount taken in
* nvgpu_clk_arb_commit_request_fd without events support in iGPU
*/
if (!session->g->clk_arb->clk_arb_events_supported) {
if (!arb->clk_arb_events_supported) {
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
}
nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
return 0;
}