From fac998940c0d6147d0da41ccd4c5e8e060e2f0a9 Mon Sep 17 00:00:00 2001
From: Rajesh Devaraj <rdevaraj@nvidia.com>
Date: Wed, 4 May 2022 11:47:24 +0000
Subject: [PATCH] gpu: nvgpu: enable polling support for error reporting in
 AV+L

As per Safety_Services, a client must perform polling to ensure that the
previously reported errors are cleared at FSI, in case of back-to-back
error reporting. However, to minimize the polling overhead, NvGPU driver
performs polling only when the error to be reported is corrected error
to ensure that it is not overwriting the previously reported
uncorrected/corrected error. In case of uncorrected errors, it will be
reported without doing polling. This situation leads to a failure in
error reporting, when uncorrected errors are reported back-to-back. This
is acceptable for safety builds where SW quiesce will be triggered
immediately after the reporting of first uncorrected error. In case of
other build configurations, MCU/SEH takes the decision on encountering
uncorrected errors. To handle such build configurations, polling is
enabled for all types of errors, in all build configurations.

This patch also removes an unused macro "ERR_TYPE_MASK".

Bug 3622420

Change-Id: I750b0406faec9b229d8d0c74e986807234362cb9
Signed-off-by: Rajesh Devaraj <rdevaraj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2707105
Reviewed-by: Tejal Kudav <tkudav@nvidia.com>
Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/include/nvgpu/cic_mon.h     |  1 -
 .../gpu/nvgpu/os/linux/cic/cic_report_err.c   | 53 +++++++++----------
 2 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/nvgpu/include/nvgpu/cic_mon.h b/drivers/gpu/nvgpu/include/nvgpu/cic_mon.h
index 14555342c..7fcbacaa9 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/cic_mon.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/cic_mon.h
@@ -35,7 +35,6 @@
 #define ERR_REPORT_TIMEOUT_US		(5000U)
 #define SS_WAIT_DURATION_US		(500U)
 #define MAX_SS_RETRIES (ERR_REPORT_TIMEOUT_US / SS_WAIT_DURATION_US)
-#define ERR_TYPE_MASK		((1U) << (CORRECTED_BIT_FIELD_SHIFT))
 
 #define U32_BITS		32U
 #define DIV_BY_U32_BITS(x)	((x) / U32_BITS)
diff --git a/drivers/gpu/nvgpu/os/linux/cic/cic_report_err.c b/drivers/gpu/nvgpu/os/linux/cic/cic_report_err.c
index 4f874947c..1a186d622 100644
--- a/drivers/gpu/nvgpu/os/linux/cic/cic_report_err.c
+++ b/drivers/gpu/nvgpu/os/linux/cic/cic_report_err.c
@@ -35,43 +35,38 @@ int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g, u32 err_id)
 	struct device *dev = dev_from_gk20a(g);
 
 	if (g->enable_polling == true) {
-		/**
-		 * Do polling, if the error to be reported is corrected one.
-		 */
-		if ((err_id & ERR_TYPE_MASK) == 0U) {
-			for (ss_retries = MAX_SS_RETRIES; ss_retries > 0U;
+		for (ss_retries = MAX_SS_RETRIES; ss_retries > 0U;
 					ss_retries--) {
-				ret = epl_get_misc_ec_err_status(dev,
+			ret = epl_get_misc_ec_err_status(dev,
 						MISC_EC_SW_ERR_CODE_0,
 						&ss_status);
-				if (ret == 0) {
-					if (ss_status == true) {
-						/**
-						 * Previously reported error is
-						 * cleared at Safety_Services.
-						 */
-						break;
-					} else {
-						nvgpu_udelay(SS_WAIT_DURATION_US);
-						continue;
-					}
-				} else if (ret == -ENODEV) {
-					nvgpu_err(g, "Error reporting is not "
-						"supported in this platform");
-					ret = 0;
-					return ret;
+			if (ret == 0) {
+				if (ss_status == true) {
+					/**
+					 * Previously reported error is cleared.
+					 */
+					break;
 				} else {
-					nvgpu_err(g, "Error reporting failed");
-					return ret;
+					nvgpu_info(g, "Polling is in progress");
+					nvgpu_udelay(SS_WAIT_DURATION_US);
+					continue;
 				}
-			}
-			if (ss_retries == 0U) {
-				nvgpu_err(g, "Error reporting failed: previous"
-						"error is not cleared");
-				ret = -1;
+			} else if (ret == -ENODEV) {
+				nvgpu_err(g, "Error reporting is not "
+						"supported in this platform");
+				ret = 0;
+				return ret;
+			} else {
+				nvgpu_err(g, "Error reporting failed");
 				return ret;
 			}
 		}
+		if (ss_retries == 0U) {
+			nvgpu_err(g, "Error reporting failed: previous"
+					"error is not cleared after retries");
+			ret = -1;
+			return ret;
+		}
 	}
 
 	/**