gpu: nvgpu: fix ecc counter free

ECC counter structures are freed without removing the node from the
stats_list. This can lead to invalid access due to dangling pointers.

Update the ecc counter free logic to set them to NULL upon free, to
remove them from stats_list and free them by validation.

Also updated some of the ecc init paths where error was not propa-
gated to callers and full ecc counters deallocation was not done.

Now, calling unit ecc_free from any context (with counters alloc-
ated or not) is harmless as requisite checks are in place.

bug 3326612
bug 3345977

Change-Id: I05eb6ed226cff9197ad37776912da9dcb7e0716d
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2565264
Tested-by: Ashish Mhetre <amhetre@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Sagar Kamble
2021-06-17 11:34:36 +05:30
committed by mobile promotions
parent 2887d06e3b
commit 40064ef1ec
33 changed files with 546 additions and 218 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -28,9 +28,13 @@ int nvgpu_ecc_sysfs_init(struct gk20a *g)
struct nvgpu_ecc_stat *stat;
int i = 0, err;
nvgpu_mutex_acquire(&ecc->stats_lock);
attr = nvgpu_kzalloc(g, sizeof(*attr) * ecc->stats_count);
if (!attr)
if (!attr) {
nvgpu_mutex_release(&ecc->stats_lock);
return -ENOMEM;
}
nvgpu_list_for_each_entry(stat,
&ecc->stats_list, nvgpu_ecc_stat, node) {
@@ -54,6 +58,8 @@ int nvgpu_ecc_sysfs_init(struct gk20a *g)
i++;
}
nvgpu_mutex_release(&ecc->stats_lock);
if (err) {
while (i-- > 0)
device_remove_file(dev, &attr[i].attr);
@@ -73,8 +79,13 @@ void nvgpu_ecc_sysfs_remove(struct gk20a *g)
struct nvgpu_ecc *ecc = &g->ecc;
int i;
nvgpu_mutex_acquire(&ecc->stats_lock);
for (i = 0; i < ecc->stats_count; i++)
device_remove_file(dev, &l->ecc_attrs[i].attr);
nvgpu_mutex_release(&ecc->stats_lock);
nvgpu_kfree(g, l->ecc_attrs);
l->ecc_attrs = NULL;
}