mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: Guard profiler_objects list operations with a lock
Both profiler and debugger device nodes access and update the list,
g->profiler_objects. List operations were currently not guarded by
lock thus leading to synchronisation issues. Stress-ng test attempts
to trigger repeated random open close sessions on all the device nodes
exposed by gpu. This results in kernel panic at random stages of test.
Failure signature - Profiler node receives a release call and as part
of it, nvgpu_profiler_free attempts to delete the prof_obj_entry and
free the prof memory. Simulataneously debugger node also receives a
release call and as part of gk20a_dbg_gpu_dev_release, nvgpu attempts
to access g->profiler_objects to check for any profiling sessions
associated with debugger node. There is a race to access the list which
results in kernel panic for address 0x8 because nvgpu tries to access
prof_obj->session_id which is at offset 0x8.
As part of this change, g->profiler_objects list access/update is
guarded with a mutex lock.
Bug 4858627
Change-Id: I1e2cf8d27d195bbc9c012cf511029de9eaadb038
Signed-off-by: Kishan Palankar <kpalankar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/3239897
(cherry picked from commit 2eabcdb8a4)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/3262771
GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
Reviewed-by: Amulya Yarlagadda <ayarlagadda@nvidia.com>
Tested-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
This commit is contained in:
committed by
Amulya Yarlagadda
parent
e2d19ad097
commit
8a0a534570
@@ -1,5 +1,5 @@
|
|||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
/* SPDX-FileCopyrightText: Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES.
|
/* SPDX-FileCopyrightText: Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
@@ -72,7 +72,9 @@ int nvgpu_profiler_alloc(struct gk20a *g,
|
|||||||
|
|
||||||
nvgpu_mutex_init(&prof->ioctl_lock);
|
nvgpu_mutex_init(&prof->ioctl_lock);
|
||||||
nvgpu_init_list_node(&prof->prof_obj_entry);
|
nvgpu_init_list_node(&prof->prof_obj_entry);
|
||||||
|
nvgpu_mutex_acquire(&g->prof_obj_lock);
|
||||||
nvgpu_list_add(&prof->prof_obj_entry, &g->profiler_objects);
|
nvgpu_list_add(&prof->prof_obj_entry, &g->profiler_objects);
|
||||||
|
nvgpu_mutex_release(&g->prof_obj_lock);
|
||||||
|
|
||||||
nvgpu_log(g, gpu_dbg_prof, "Allocated profiler handle %u",
|
nvgpu_log(g, gpu_dbg_prof, "Allocated profiler handle %u",
|
||||||
prof->prof_handle);
|
prof->prof_handle);
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2011-2024, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* GK20A Graphics
|
* GK20A Graphics
|
||||||
*
|
*
|
||||||
@@ -702,6 +702,8 @@ struct gk20a {
|
|||||||
#endif /* CONFIG_NVGPU_DEBUGGER */
|
#endif /* CONFIG_NVGPU_DEBUGGER */
|
||||||
|
|
||||||
#ifdef CONFIG_NVGPU_PROFILER
|
#ifdef CONFIG_NVGPU_PROFILER
|
||||||
|
/* Held while accessing/manipulating profiler_objects */
|
||||||
|
struct nvgpu_mutex prof_obj_lock;
|
||||||
struct nvgpu_list_node profiler_objects;
|
struct nvgpu_list_node profiler_objects;
|
||||||
struct nvgpu_pm_resource_reservations *pm_reservations;
|
struct nvgpu_pm_resource_reservations *pm_reservations;
|
||||||
nvgpu_atomic_t hwpm_refcount;
|
nvgpu_atomic_t hwpm_refcount;
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2016-2024, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms and conditions of the GNU General Public License,
|
* under the terms and conditions of the GNU General Public License,
|
||||||
@@ -165,6 +165,9 @@ static void nvgpu_init_vars(struct gk20a *g)
|
|||||||
#ifdef CONFIG_NVGPU_TSG_SHARING
|
#ifdef CONFIG_NVGPU_TSG_SHARING
|
||||||
nvgpu_mutex_init(&g->ctrl_dev_id_lock);
|
nvgpu_mutex_init(&g->ctrl_dev_id_lock);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_NVGPU_PROFILER
|
||||||
|
nvgpu_mutex_init(&g->prof_obj_lock);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Init the clock req count to 0 */
|
/* Init the clock req count to 0 */
|
||||||
nvgpu_atomic_set(&g->clk_arb_global_nr, 0);
|
nvgpu_atomic_set(&g->clk_arb_global_nr, 0);
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Tegra GK20A GPU Debugger/Profiler Driver
|
* Tegra GK20A GPU Debugger/Profiler Driver
|
||||||
*
|
*
|
||||||
* Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms and conditions of the GNU General Public License,
|
* under the terms and conditions of the GNU General Public License,
|
||||||
@@ -223,6 +223,7 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
|
|||||||
/* Per-context profiler objects were released when we called
|
/* Per-context profiler objects were released when we called
|
||||||
* dbg_unbind_all_channels. We could still have global ones.
|
* dbg_unbind_all_channels. We could still have global ones.
|
||||||
*/
|
*/
|
||||||
|
nvgpu_mutex_acquire(&g->prof_obj_lock);
|
||||||
nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
|
nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
|
||||||
nvgpu_profiler_object, prof_obj_entry) {
|
nvgpu_profiler_object, prof_obj_entry) {
|
||||||
if (prof_obj->session_id == dbg_s->id) {
|
if (prof_obj->session_id == dbg_s->id) {
|
||||||
@@ -231,6 +232,7 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
|
|||||||
nvgpu_profiler_free(prof_obj);
|
nvgpu_profiler_free(prof_obj);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
nvgpu_mutex_release(&g->prof_obj_lock);
|
||||||
dbg_s->gpu_instance_id = 0U;
|
dbg_s->gpu_instance_id = 0U;
|
||||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
@@ -463,6 +465,7 @@ static int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
|
|||||||
/* If there's a profiler ctx reservation record associated with this
|
/* If there's a profiler ctx reservation record associated with this
|
||||||
* session/channel pair, release it.
|
* session/channel pair, release it.
|
||||||
*/
|
*/
|
||||||
|
nvgpu_mutex_acquire(&g->prof_obj_lock);
|
||||||
nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
|
nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
|
||||||
nvgpu_profiler_object, prof_obj_entry) {
|
nvgpu_profiler_object, prof_obj_entry) {
|
||||||
if ((prof_obj->session_id == dbg_s->id) &&
|
if ((prof_obj->session_id == dbg_s->id) &&
|
||||||
@@ -472,6 +475,7 @@ static int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
|
|||||||
nvgpu_profiler_free(prof_obj);
|
nvgpu_profiler_free(prof_obj);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
nvgpu_mutex_release(&g->prof_obj_lock);
|
||||||
|
|
||||||
nvgpu_list_del(&ch_data->ch_entry);
|
nvgpu_list_del(&ch_data->ch_entry);
|
||||||
|
|
||||||
@@ -1129,6 +1133,7 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
|
|||||||
* return an error, at the point where all client sw has been
|
* return an error, at the point where all client sw has been
|
||||||
* cleaned up.
|
* cleaned up.
|
||||||
*/
|
*/
|
||||||
|
nvgpu_mutex_acquire(&g->prof_obj_lock);
|
||||||
nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
|
nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
|
||||||
nvgpu_profiler_object, prof_obj_entry) {
|
nvgpu_profiler_object, prof_obj_entry) {
|
||||||
if (prof_obj->session_id == dbg_s->id) {
|
if (prof_obj->session_id == dbg_s->id) {
|
||||||
@@ -1137,6 +1142,7 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
nvgpu_mutex_release(&g->prof_obj_lock);
|
||||||
|
|
||||||
if (!reserved) {
|
if (!reserved) {
|
||||||
nvgpu_err(g, "session doesn't have a valid reservation");
|
nvgpu_err(g, "session doesn't have a valid reservation");
|
||||||
@@ -1371,7 +1377,9 @@ static int nvgpu_ioctl_allocate_profiler_object(
|
|||||||
if (ch != NULL) {
|
if (ch != NULL) {
|
||||||
tsg = nvgpu_tsg_from_ch(ch);
|
tsg = nvgpu_tsg_from_ch(ch);
|
||||||
if (tsg == NULL) {
|
if (tsg == NULL) {
|
||||||
|
nvgpu_mutex_acquire(&g->prof_obj_lock);
|
||||||
nvgpu_profiler_free(prof_obj);
|
nvgpu_profiler_free(prof_obj);
|
||||||
|
nvgpu_mutex_release(&g->prof_obj_lock);
|
||||||
goto clean_up;
|
goto clean_up;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1404,6 +1412,7 @@ static int nvgpu_ioctl_free_profiler_object(
|
|||||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
/* Remove profiler object from the list, if a match is found */
|
/* Remove profiler object from the list, if a match is found */
|
||||||
|
nvgpu_mutex_acquire(&g->prof_obj_lock);
|
||||||
nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
|
nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
|
||||||
nvgpu_profiler_object, prof_obj_entry) {
|
nvgpu_profiler_object, prof_obj_entry) {
|
||||||
if (prof_obj->prof_handle == args->profiler_handle) {
|
if (prof_obj->prof_handle == args->profiler_handle) {
|
||||||
@@ -1421,6 +1430,7 @@ static int nvgpu_ioctl_free_profiler_object(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
nvgpu_mutex_release(&g->prof_obj_lock);
|
||||||
if (!obj_found) {
|
if (!obj_found) {
|
||||||
nvgpu_err(g, "profiler %x not found",
|
nvgpu_err(g, "profiler %x not found",
|
||||||
args->profiler_handle);
|
args->profiler_handle);
|
||||||
@@ -1436,8 +1446,9 @@ static struct nvgpu_profiler_object *find_matching_prof_obj(
|
|||||||
u32 profiler_handle)
|
u32 profiler_handle)
|
||||||
{
|
{
|
||||||
struct gk20a *g = dbg_s->g;
|
struct gk20a *g = dbg_s->g;
|
||||||
struct nvgpu_profiler_object *prof_obj;
|
struct nvgpu_profiler_object *prof_obj, *ret_obj;
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&g->prof_obj_lock);
|
||||||
nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
|
nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
|
||||||
nvgpu_profiler_object, prof_obj_entry) {
|
nvgpu_profiler_object, prof_obj_entry) {
|
||||||
if (prof_obj->prof_handle == profiler_handle) {
|
if (prof_obj->prof_handle == profiler_handle) {
|
||||||
@@ -1445,12 +1456,18 @@ static struct nvgpu_profiler_object *find_matching_prof_obj(
|
|||||||
nvgpu_err(g,
|
nvgpu_err(g,
|
||||||
"invalid handle %x",
|
"invalid handle %x",
|
||||||
profiler_handle);
|
profiler_handle);
|
||||||
return NULL;
|
ret_obj = NULL;
|
||||||
|
goto ret;
|
||||||
}
|
}
|
||||||
return prof_obj;
|
ret_obj = prof_obj;
|
||||||
|
goto ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return NULL;
|
ret_obj = NULL;
|
||||||
|
|
||||||
|
ret:
|
||||||
|
nvgpu_mutex_release(&g->prof_obj_lock);
|
||||||
|
return ret_obj;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* used in scenarios where the debugger session can take just the inter-session
|
/* used in scenarios where the debugger session can take just the inter-session
|
||||||
@@ -1577,7 +1594,9 @@ static int nvgpu_perfbuf_reserve_pma(struct dbg_session_gk20a *dbg_s)
|
|||||||
NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
|
NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
nvgpu_err(g, "Failed to reserve PMA stream");
|
nvgpu_err(g, "Failed to reserve PMA stream");
|
||||||
|
nvgpu_mutex_acquire(&g->prof_obj_lock);
|
||||||
nvgpu_profiler_free(dbg_s->prof);
|
nvgpu_profiler_free(dbg_s->prof);
|
||||||
|
nvgpu_mutex_release(&g->prof_obj_lock);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2131,6 +2150,8 @@ static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
|
|||||||
/* Find matching object. */
|
/* Find matching object. */
|
||||||
prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
|
prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&g->prof_obj_lock);
|
||||||
|
|
||||||
if (!prof_obj) {
|
if (!prof_obj) {
|
||||||
nvgpu_err(g, "object not found");
|
nvgpu_err(g, "object not found");
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
@@ -2158,6 +2179,7 @@ static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
|
|||||||
NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY);
|
NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY);
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
|
nvgpu_mutex_release(&g->prof_obj_lock);
|
||||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms and conditions of the GNU General Public License,
|
* under the terms and conditions of the GNU General Public License,
|
||||||
@@ -130,7 +130,9 @@ static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp,
|
|||||||
free_umd_buf:
|
free_umd_buf:
|
||||||
nvgpu_kfree(g, prof_priv->regops_umd_copy_buf);
|
nvgpu_kfree(g, prof_priv->regops_umd_copy_buf);
|
||||||
free_prof:
|
free_prof:
|
||||||
|
nvgpu_mutex_acquire(&g->prof_obj_lock);
|
||||||
nvgpu_profiler_free(prof);
|
nvgpu_profiler_free(prof);
|
||||||
|
nvgpu_mutex_release(&g->prof_obj_lock);
|
||||||
free_priv:
|
free_priv:
|
||||||
nvgpu_kfree(g, prof_priv);
|
nvgpu_kfree(g, prof_priv);
|
||||||
return err;
|
return err;
|
||||||
@@ -211,7 +213,9 @@ int nvgpu_prof_fops_release(struct inode *inode, struct file *filp)
|
|||||||
|
|
||||||
nvgpu_prof_free_pma_stream_priv_data(prof_priv);
|
nvgpu_prof_free_pma_stream_priv_data(prof_priv);
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&g->prof_obj_lock);
|
||||||
nvgpu_profiler_free(prof);
|
nvgpu_profiler_free(prof);
|
||||||
|
nvgpu_mutex_release(&g->prof_obj_lock);
|
||||||
|
|
||||||
nvgpu_kfree(g, prof_priv->regops_umd_copy_buf);
|
nvgpu_kfree(g, prof_priv->regops_umd_copy_buf);
|
||||||
nvgpu_kfree(g, prof_priv->regops_staging_buf);
|
nvgpu_kfree(g, prof_priv->regops_staging_buf);
|
||||||
|
|||||||
Reference in New Issue
Block a user