gpu: nvgpu: fix ecc counter free

ECC counter structures are freed without removing the node from the
stats_list. This can lead to invalid access due to dangling pointers.

Update the ecc counter free logic to set them to NULL upon free, to
remove them from stats_list and free them by validation.

Also updated some of the ecc init paths where error was not propa-
gated to callers and full ecc counters deallocation was not done.

Now, calling unit ecc_free from any context (with counters alloc-
ated or not) is harmless as requisite checks are in place.

bug 3326612
bug 3345977

Change-Id: I05eb6ed226cff9197ad37776912da9dcb7e0716d
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2565264
Tested-by: Ashish Mhetre <amhetre@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Sagar Kamble
2021-06-17 11:34:36 +05:30
committed by mobile promotions
parent 2887d06e3b
commit 40064ef1ec
33 changed files with 546 additions and 218 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -121,7 +121,7 @@ int test_ecc_counter_init(struct unit_module *m, struct gk20a *g,
ret = UNIT_FAIL;
goto cleanup;
}
nvgpu_kfree(g, stat);
nvgpu_ecc_counter_deinit(g, &stat);
/*
* Case #2:
@@ -147,12 +147,17 @@ int test_ecc_counter_init(struct unit_module *m, struct gk20a *g,
ret = UNIT_FAIL;
goto cleanup;
}
nvgpu_kfree(g, stat);
stat = NULL;
nvgpu_ecc_counter_deinit(g, &stat);
if (!nvgpu_list_empty(&g->ecc.stats_list)) {
ret = UNIT_FAIL;
goto cleanup;
}
cleanup:
if (stat != NULL) {
nvgpu_kfree(g, stat);
nvgpu_ecc_counter_deinit(g, &stat);
}
nvgpu_kfree(g, name);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -110,6 +110,8 @@ int test_ecc_finalize_support(struct unit_module *m,
* - Set counter name to string with invalid length equal to
* NVGPU_ECC_STAT_NAME_MAX_SIZE.
* - "nvgpu_ecc_counter_init" will truncate the counter name and return 0.
* - Test case #4
* - Verify that the g->ecc.stats_list is empty.
*
* Output:
* - UNIT_FAIL under the following conditions:

View File

@@ -120,6 +120,8 @@ int fb_gv11b_init_test(struct unit_module *m, struct gk20a *g, void *args)
if (err != -ENOMEM) {
unit_return_fail(m, "gv11b_fb_ecc_init did not fail as expected (%d)\n", i);
}
g->ops.ecc.ecc_init_support(g);
}
err = g->ops.fb.ecc.init(g);

View File

@@ -440,7 +440,6 @@ static int mock_l2_flush(struct gk20a *g, bool inv)
int test_ltc_intr(struct unit_module *m, struct gk20a *g, void *args)
{
int err = UNIT_SUCCESS;
u32 i;
const u32 offset1 = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) *
nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
int (*save_func)(struct gk20a *g, bool inv);
@@ -560,15 +559,7 @@ int test_ltc_intr(struct unit_module *m, struct gk20a *g, void *args)
g->ops.mm.cache.l2_flush = save_func;
done:
for (i = 0; i < nvgpu_ltc_get_ltc_count(g); i++) {
if (g->ecc.ltc.ecc_sec_count != NULL) {
nvgpu_kfree(g, g->ecc.ltc.ecc_sec_count[i]);
}
if (g->ecc.ltc.ecc_ded_count != NULL) {
nvgpu_kfree(g, g->ecc.ltc.ecc_ded_count[i]);
}
}
nvgpu_ltc_ecc_free(g);
return err;
}