mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: Add ECC Support for GV11B in Linux
Implement nvgpu plumbing to allow reporting ECC errors(corrected and uncorrected) to a L1SS service(if one exists). This patch includes the following 1) Added code that submits ECC error reports via the Interrupt context directly to a L1SS service in linux OS. 2) Added support for enabling/disabling the error reports via L1SS's registration/deregistration API. Nvgpu simply invokes an empty function until the registration is successful. 3) Added Spinlock to correctly handle concurrency for accessing the correct Ops for submitting requests. 4) Adds error reporting for a subset of interrupts that can be verified via external ECC injection logic. A subsequent patch will add the API for rest of the interrupts. 5) In case of critical(uncorrected errors), change nvgpu's state to quiesce state. Jira L4T-1187 Bug 200700400 Change-Id: Id31f70531fba355e94e72c4f9762593e7667a11c Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2530411 Tested-by: Bibek Basu <bbasu@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: Bibek Basu <bbasu@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
This commit is contained in:
committed by
mobile promotions
parent
5f88598b9e
commit
34993e4f7b
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* GK20A Graphics
|
||||
*
|
||||
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -49,6 +49,7 @@
|
||||
#include <nvgpu/clk_arb.h>
|
||||
#include <nvgpu/timers.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
#include "platform_gk20a.h"
|
||||
#include "sysfs.h"
|
||||
@@ -355,6 +356,10 @@ int gk20a_pm_finalize_poweron(struct device *dev)
|
||||
gk20a_init_cde_support(l);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
|
||||
nvgpu_enable_ecc_reporting(g);
|
||||
#endif
|
||||
|
||||
err = gk20a_sched_ctrl_init(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to init sched control");
|
||||
@@ -364,9 +369,14 @@ int gk20a_pm_finalize_poweron(struct device *dev)
|
||||
g->sw_ready = true;
|
||||
|
||||
done:
|
||||
if (err)
|
||||
if (err) {
|
||||
g->power_on = false;
|
||||
|
||||
#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
|
||||
nvgpu_disable_ecc_reporting(g);
|
||||
#endif
|
||||
}
|
||||
|
||||
nvgpu_mutex_release(&g->power_lock);
|
||||
return err;
|
||||
}
|
||||
@@ -433,6 +443,10 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
|
||||
/* Stop CPU from accessing the GPU registers. */
|
||||
gk20a_lockout_registers(g);
|
||||
|
||||
#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
|
||||
nvgpu_disable_ecc_reporting(g);
|
||||
#endif
|
||||
|
||||
nvgpu_hide_usermode_for_poweroff(g);
|
||||
nvgpu_mutex_release(&g->power_lock);
|
||||
return 0;
|
||||
@@ -1382,6 +1396,10 @@ static int gk20a_probe(struct platform_device *dev)
|
||||
goto return_err;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
|
||||
nvgpu_init_ecc_reporting(gk20a);
|
||||
#endif
|
||||
|
||||
gk20a->nvgpu_reboot_nb.notifier_call =
|
||||
nvgpu_kernel_shutdown_notification;
|
||||
err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb);
|
||||
|
||||
Reference in New Issue
Block a user