gpu: nvgpu: update reporting of errors to sdl

In Drive 6.0, the error reporting is supported only for orin (ga10b)
in dev-main. For this purpose, this patch does the following:

- Removes the redundant reporting of following IDs from gv11b:
  - GPU_HOST_PFIFO_SCHED_ERROR
  - GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR
  - GPU_HOST_PBDMA_HCE_ERROR
  - GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED
  - GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED
  - GPU_LTC_CACHE_DSTG_ECC_CORRECTED
  - GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED

- Migrates the reporting of following IDs from gv11b to ga10b:
  - GPU_SM_L1_TAG_ECC_CORRECTED
  - GPU_SM_L1_TAG_ECC_UNCORRECTED
  - GPU_SM_CBU_ECC_UNCORRECTED
  - GPU_SM_LRF_ECC_UNCORRECTED
  - GPU_SM_L1_DATA_ECC_UNCORRECTED
  - GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED
  - GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED
  - GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED
  - GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED

- Removes the unused ID that doesn't have any HSI related to it:
  - GPU_HOST_PBDMA_PREEMPT_ERROR

In addition to the above, this patch does the following:
- Updates error IDs related to page fault error.
- Updates look-up table to remove unused error IDs.

JIRA NVGPU-8094
Bug 200729736

Change-Id: Ifea76d38ba609c894560e61ff5a6e406290f919e
Signed-off-by: Rajesh Devaraj <rdevaraj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2685249
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Dinesh T <dt@nvidia.com>
Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Rajesh Devaraj
2022-03-22 17:11:29 +05:30
committed by mobile promotions
parent 7ff977063b
commit 37c6b8b1c3
10 changed files with 83 additions and 163 deletions

View File

@@ -78,10 +78,9 @@ struct mmu_fault_info;
#define GPU_HOST_PBDMA_METHOD_ERROR (11U)
#define GPU_HOST_PBDMA_SIGNATURE_ERROR (12U)
#define GPU_HOST_PBDMA_HCE_ERROR (13U)
#define GPU_HOST_PBDMA_PREEMPT_ERROR (14U)
#define GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR (15U)
#define GPU_HOST_PFIFO_FB_FLUSH_TIMEOUT_ERROR (16U)
#define GPU_HOST_INVALID_ERROR (17U)
#define GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR (14U)
#define GPU_HOST_PFIFO_FB_FLUSH_TIMEOUT_ERROR (15U)
#define GPU_HOST_INVALID_ERROR (16U)
/**
* @}
*/
@@ -116,11 +115,11 @@ struct mmu_fault_info;
*/
#define GPU_FECS_FALCON_IMEM_ECC_CORRECTED (0U)
#define GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED (1U)
#define GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED (3U)
#define GPU_FECS_CTXSW_WATCHDOG_TIMEOUT (4U)
#define GPU_FECS_CTXSW_CRC_MISMATCH (5U)
#define GPU_FECS_FAULT_DURING_CTXSW (6U)
#define GPU_FECS_CTXSW_INIT_ERROR (7U)
#define GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED (2U)
#define GPU_FECS_CTXSW_WATCHDOG_TIMEOUT (3U)
#define GPU_FECS_CTXSW_CRC_MISMATCH (4U)
#define GPU_FECS_FAULT_DURING_CTXSW (5U)
#define GPU_FECS_CTXSW_INIT_ERROR (6U)
/**
* @}
*/
@@ -132,7 +131,7 @@ struct mmu_fault_info;
*/
#define GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED (0U)
#define GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED (1U)
#define GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED (3U)
#define GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED (2U)
/**
* @}
*/
@@ -153,7 +152,7 @@ struct mmu_fault_info;
* Macros used to assign unique index to errors reported from the GCC unit.
* @{
*/
#define GPU_GCC_L15_ECC_UNCORRECTED (1U)
#define GPU_GCC_L15_ECC_UNCORRECTED (0U)
/**
* @}
*/
@@ -264,25 +263,19 @@ struct mmu_fault_info;
* Macros used to assign unique index to errors reported from the HUBMMU unit.
* @{
*/
#define GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED (0U)
#define GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED (1U)
#define GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED (2U)
#define GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED (3U)
#define GPU_HUBMMU_PAGE_FAULT_ERROR (4U)
#define GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED (0U)
#define GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED (1U)
#define GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED (2U)
#define GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED (3U)
#define GPU_HUBMMU_PAGE_FAULT_OTHER_FAULT_NOTIFY_ERROR (4U)
#define GPU_HUBMMU_PAGE_FAULT_NONREPLAYABLE_FAULT_OVERFLOW_ERROR (5U)
#define GPU_HUBMMU_PAGE_FAULT_REPLAYABLE_FAULT_OVERFLOW_ERROR (6U)
#define GPU_HUBMMU_PAGE_FAULT_REPLAYABLE_FAULT_NOTIFY_ERROR (7U)
#define GPU_HUBMMU_PAGE_FAULT_NONREPLAYABLE_FAULT_NOTIFY_ERROR (8U)
/**
* @}
*/
/**
* This assigns an unique index for sub-errors
* in GPU_HUBMMU_PAGE_FAULT_ERROR.
*/
#define GPU_HUBMMU_REPLAYABLE_FAULT_OVERFLOW (0U)
#define GPU_HUBMMU_REPLAYABLE_FAULT_NOTIFY (1U)
#define GPU_HUBMMU_NONREPLAYABLE_FAULT_OVERFLOW (2U)
#define GPU_HUBMMU_NONREPLAYABLE_FAULT_NOTIFY (3U)
#define GPU_HUBMMU_OTHER_FAULT_NOTIFY (4U)
/**
* @defgroup LIST_OF_ERRORS_REPORTED_FROM_PRI
* Macros used to assign unique index to errors reported from the PRI unit.