gpu: nvgpu: gp10b: enhance priv error reporting

-Append 0x for info dumped in hex format -Dump subid and priv_level for ERROR_INFO -Decode ERROR_CODE for supported error types Bug 2072157 Bug 200392445 Bug 2055510 Bug 200379815 Change-Id: I78df8ca15421ee37631157082648e9b545367c95 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1672292 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 18:16:01 +03:00 · 2018-03-09 13:18:03 -08:00
parent 6e0de56121
commit f94c9d19c1
3 changed files with 111 additions and 12 deletions
--- a/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.c
@@ -1,7 +1,7 @@
 /*
 * GP10B priv ring
 *
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -33,6 +33,69 @@
 #include <nvgpu/hw/gp10b/hw_pri_ringstation_sys_gp10b.h>
 #include <nvgpu/hw/gp10b/hw_pri_ringstation_gpc_gp10b.h>
 static const char * const invalid_str = "invalid";
 static const char *const error_type_badf1xyy[] = {
 	"client timeout",
 	"decode error",
 	"client in reset",
 	"client floorswept",
 	"client stuck ack",
 	"client expected ack",
 	"fence error",
 	"subid error",
 	"byte access unsupported",
 };
 static const char *const error_type_badf2xyy[] = {
 	"orphan gpc/fbp"
 };
 static const char *const error_type_badf3xyy[] = {
 	"priv ring dead"
 };
 static const char *const error_type_badf5xyy[] = {
 	"client error",
 	"priv level violation",
 	"indirect priv level violation",
 	"local local ring error",
 	"falcon mem access priv level violation",
 	"pri route error"
 };
 static void gp10b_priv_ring_decode_error_code(struct gk20a *g,
 			u32 error_code)
 {
 	u32 error_type, error_type_index;
 	error_type = (error_code & 0x0000f000) >> 24;
 	error_type_index = (error_code & 0x00000f00) >> 16;
 	error_code = error_code & 0xBADFf000;
 	if (error_code == 0xBADF1000) {
 		if (error_type_index <
 				ARRAY_SIZE(error_type_badf1xyy))
 			nvgpu_err(g, "%s",
 				error_type_badf1xyy[error_type_index]);
 	} else if (error_code == 0xBADF2000) {
 		if (error_type_index <
 				ARRAY_SIZE(error_type_badf2xyy))
 			nvgpu_err(g, "%s",
 				error_type_badf2xyy[error_type_index]);
 	} else if (error_code == 0xBADF3000) {
 		if (error_type_index <
 				ARRAY_SIZE(error_type_badf3xyy))
 			nvgpu_err(g, "%s",
 				error_type_badf3xyy[error_type_index]);
 	} else if (error_code == 0xBADF5000) {
 		if (error_type_index <
 				ARRAY_SIZE(error_type_badf5xyy))
 			nvgpu_err(g, "%s",
 				error_type_badf5xyy[error_type_index]);
 	}
 }
 void gp10b_priv_ring_isr(struct gk20a *g)
 {
 	u32 status0, status1;
@@ -40,6 +103,8 @@ void gp10b_priv_ring_isr(struct gk20a *g)
 	s32 retry = 100;
 	u32 gpc;
 	u32 gpc_stride, offset;
 	u32 error_info;
 	u32 error_code;
 	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
 		nvgpu_info(g, "unhandled priv ring intr");
@@ -63,11 +128,20 @@ void gp10b_priv_ring_isr(struct gk20a *g)
 		nvgpu_err(g, "ring overflowed");
 	if (pri_ringmaster_intr_status0_gbl_write_error_sys_v(status0) != 0) {
-		nvgpu_err(g, "SYS write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x",
+		error_info =
 			gk20a_readl(g, pri_ringstation_sys_priv_error_info_r());
 		error_code =
 			gk20a_readl(g, pri_ringstation_sys_priv_error_code_r());
 		nvgpu_err(g, "SYS write error. ADR 0x%08x WRDAT 0x%08x "
 				"INFO 0x%08x (subid 0x%08x priv level %d), "
 				"CODE 0x%08x",
 			gk20a_readl(g, pri_ringstation_sys_priv_error_adr_r()),
 			gk20a_readl(g, pri_ringstation_sys_priv_error_wrdat_r()),
-			gk20a_readl(g, pri_ringstation_sys_priv_error_info_r()),
+			error_info,
-			gk20a_readl(g, pri_ringstation_sys_priv_error_code_r()));
+			pri_ringstation_sys_priv_error_info_subid_v(error_info),
 			pri_ringstation_sys_priv_error_info_priv_level_v(error_info),
 			error_code);
 		gp10b_priv_ring_decode_error_code(g, error_code);
 	}
 	if (status1) {
@@ -75,16 +149,25 @@ void gp10b_priv_ring_isr(struct gk20a *g)
 		for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
 			offset = gpc * gpc_stride;
 			if (status1 & BIT(gpc)) {
-				nvgpu_err(g, "GPC%u write error. ADR %08x "
+				error_info = gk20a_readl(g,
-					"WRDAT %08x INFO %08x, CODE %08x", gpc,
+					pri_ringstation_gpc_gpc0_priv_error_info_r() + offset);
 				error_code = gk20a_readl(g,
 					pri_ringstation_gpc_gpc0_priv_error_code_r() + offset);
 				nvgpu_err(g, "GPC%u write error. ADR 0x%08x "
 					"WRDAT 0x%08x "
 					"INFO 0x%08x (subid 0x%08x priv level %d), "
 					"CODE 0x%08x", gpc,
 					gk20a_readl(g,
 					pri_ringstation_gpc_gpc0_priv_error_adr_r() + offset),
 					gk20a_readl(g,
 					pri_ringstation_gpc_gpc0_priv_error_wrdat_r() + offset),
-					gk20a_readl(g,
+					error_info,
-					pri_ringstation_gpc_gpc0_priv_error_info_r() + offset),
+					pri_ringstation_gpc_gpc0_priv_error_info_subid_v(error_info),
-					gk20a_readl(g,
+					pri_ringstation_gpc_gpc0_priv_error_info_priv_level_v(error_info),
-					pri_ringstation_gpc_gpc0_priv_error_code_r() + offset));
+					error_code);
 				gp10b_priv_ring_decode_error_code(g, error_code);
 				status1 = status1 & (~(BIT(gpc)));
 				if (!status1)
 					break;
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pri_ringstation_gpc_gp10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pri_ringstation_gpc_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -72,6 +72,14 @@ static inline u32 pri_ringstation_gpc_gpc0_priv_error_info_r(void)
 {
 	return 0x00128128U;
 }
 static inline u32 pri_ringstation_gpc_gpc0_priv_error_info_subid_v(u32 r)
 {
 	return (r >> 24U) & 0x3fU;
 }
 static inline u32 pri_ringstation_gpc_gpc0_priv_error_info_priv_level_v(u32 r)
 {
 	return (r >> 20U) & 0x3U;
 }
 static inline u32 pri_ringstation_gpc_gpc0_priv_error_code_r(void)
 {
 	return 0x0012812cU;
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pri_ringstation_sys_gp10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pri_ringstation_sys_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -84,6 +84,14 @@ static inline u32 pri_ringstation_sys_priv_error_info_r(void)
 {
 	return 0x00122128U;
 }
 static inline u32 pri_ringstation_sys_priv_error_info_subid_v(u32 r)
 {
 	return (r >> 24U) & 0x3fU;
 }
 static inline u32 pri_ringstation_sys_priv_error_info_priv_level_v(u32 r)
 {
 	return (r >> 20U) & 0x3U;
 }
 static inline u32 pri_ringstation_sys_priv_error_code_r(void)
 {
 	return 0x0012212cU;