gpu: nvgpu: add polling for back-to-back error reporting in av+l

When an error is reported to Safety_Services, it will be cleared at FSI and reported to SEH (System Error Handler). Since MISC_EC interface provides only one register for error reporting, there is a need to poll the status of previously reported error before reporting the next error. For this purpose, this patch adds logic to perform polling using epl_get_misc_ec_err_status(), in AV+L. JIRA NVGPU-8094 Bug 200729736 Change-Id: Ia01a2fc42a7ce586b7965a82c90027a9a2dd252b Signed-off-by: Rajesh Devaraj <rdevaraj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2684141 Reviewed-by: Dinesh T <dt@nvidia.com> Reviewed-by: Ankur Kishore <ankkishore@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-22 17:36:20 +03:00 · 2022-03-19 18:13:02 +00:00
parent 9edbac4494
commit 4652f96a6f
2 changed files with 55 additions and 1 deletions
--- a/drivers/gpu/nvgpu/os/linux/cic/cic_report_err.c
+++ b/drivers/gpu/nvgpu/os/linux/cic/cic_report_err.c
@@ -19,6 +19,7 @@
 #ifdef CONFIG_NVGPU_ENABLE_MISC_EC
 #include <linux/tegra-epl.h>
 #include <nvgpu/timers.h>
 #include "os/linux/os_linux.h"
 #endif
@@ -26,11 +27,60 @@ struct gk20a;
 int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g, u32 err_id)
 {
-	int ret = 0U;
+	int ret = 0;
 #ifdef CONFIG_NVGPU_ENABLE_MISC_EC
 	u32 ss_retries = 0U;
 	bool ss_status;
 	struct device *dev = dev_from_gk20a(g);
 	if (g->enable_polling == true) {
 		/**
 		 * Do polling, if the error to be reported is corrected one.
 		 */
 		if ((err_id & ERR_TYPE_MASK) == 0U) {
 			for (ss_retries = MAX_SS_RETRIES; ss_retries > 0U;
 					ss_retries--) {
 				ret = epl_get_misc_ec_err_status(dev,
 						MISC_EC_SW_ERR_CODE_0,
 						&ss_status);
 				if (ret == 0) {
 					if (ss_status == true) {
 						/**
 						 * Previously reported error is
 						 * cleared at Safety_Services.
 						 */
 						break;
 					} else {
 						nvgpu_udelay(SS_WAIT_DURATION_US);
 						continue;
 					}
 				} else if (ret == -ENODEV) {
 					nvgpu_err(g, "Error reporting is not "
 						"supported in this platform");
 					ret = 0;
 					return ret;
 				} else {
 					nvgpu_err(g, "Error reporting failed");
 					return ret;
 				}
 			}
 			if (ss_retries == 0U) {
 				nvgpu_err(g, "Error reporting failed: previous"
 						"error is not cleared");
 				ret = -1;
 				return ret;
 			}
 		}
 	}
 	/**
 	 * Enable polling immediately after reporting of first error from boot.
 	 */
 	if (g->enable_polling == false) {
 		g->enable_polling = true;
 	}
 	/**
 	 * MISC_EC_SW_ERR_CODE_0 register has been allocated for NvGPU
 	 * to report GPU HW errors to Safety_Services via MISC_EC interface.
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -452,6 +452,10 @@ int gk20a_pm_finalize_poweron(struct device *dev)
 		}
 	}
 #ifdef CONFIG_NVGPU_ENABLE_MISC_EC
 	g->enable_polling = false;
 #endif
 	err = gk20a_restore_registers(g);
 	if (err)
 		goto done;