mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: compile-out unused apis from safety build
This patch does the following changes: - Compiles-out unused error reporting APIs and the related data structures from safety build. For this purpose, it introduces the new flag: CONFIG_NVGPU_INTR_DEBUG - Updates nvgpu_report_err_to_sdl() API with one more argument, hw_unit_id. This aids in finding whether an error to be reported is corrected or uncorrected from LUT. - Triggers SW quiesce, if an uncorrected error is reported to Safety_Services, in safety build. - Renames files in cic folder by replacing gv11b with ga10b, since error reporting for gv11b is not supported in dev-main. JIRA NVGPU-8002 Change-Id: Ic01e73b0208252abba1f615a2c98d770cdf41ca4 Signed-off-by: Rajesh Devaraj <rdevaraj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2668466 Reviewed-by: Tejal Kudav <tkudav@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com> GVS: Gerrit_Virtual_Submit
This commit is contained in:
committed by
mobile promotions
parent
81c220b95b
commit
0699220b85
@@ -128,6 +128,7 @@ struct gops_ltc_intr {
|
||||
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl
|
||||
* "nvgpu_report_err_to_sdl" with following parameters:
|
||||
* -# \a g
|
||||
* -# \ref NVGPU_ERR_MODULE_LTC
|
||||
* -# \ref GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED
|
||||
* "GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED"
|
||||
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m() is
|
||||
@@ -142,6 +143,7 @@ struct gops_ltc_intr {
|
||||
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl
|
||||
* "nvgpu_report_err_to_sdl" with following parameters:
|
||||
* -# \a g
|
||||
* -# \ref NVGPU_ERR_MODULE_LTC
|
||||
* -# \ref GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED
|
||||
* "GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED"
|
||||
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m() is
|
||||
@@ -157,6 +159,7 @@ struct gops_ltc_intr {
|
||||
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl
|
||||
* "nvgpu_report_err_to_sdl" with following parameters:
|
||||
* -# \a g
|
||||
* -# \ref NVGPU_ERR_MODULE_LTC
|
||||
* -# \ref GPU_LTC_CACHE_DSTG_ECC_CORRECTED
|
||||
* "GPU_LTC_CACHE_DSTG_ECC_CORRECTED"
|
||||
* -# Flush the L2 cache by calling
|
||||
@@ -173,6 +176,7 @@ struct gops_ltc_intr {
|
||||
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl
|
||||
* "nvgpu_report_err_to_sdl" with following parameters:
|
||||
* -# \a g
|
||||
* -# \ref NVGPU_ERR_MODULE_LTC
|
||||
* -# \ref GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED
|
||||
* "GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED"
|
||||
* -# Else if the ECC address correspongs to DSTG BE RAM:
|
||||
@@ -182,6 +186,7 @@ struct gops_ltc_intr {
|
||||
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl
|
||||
* "nvgpu_report_err_to_sdl" with following parameters:
|
||||
* -# \a g
|
||||
* -# \ref NVGPU_ERR_MODULE_LTC
|
||||
* -# \ref GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED
|
||||
* "GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED"
|
||||
* -# Else call \ref BUG "BUG()" as this type of ECC error is not supported.
|
||||
|
||||
@@ -108,28 +108,6 @@ struct mmu_fault_info;
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* This structure is used to store SM machine check related information.
|
||||
*/
|
||||
struct gr_sm_mcerr_info {
|
||||
/** PC which triggered the machine check error. */
|
||||
u64 hww_warp_esr_pc;
|
||||
|
||||
/** Error status register. */
|
||||
u32 hww_warp_esr_status;
|
||||
|
||||
/** GR engine context of the faulted channel. */
|
||||
u32 curr_ctx;
|
||||
|
||||
/** Channel to which the context belongs. */
|
||||
u32 chid;
|
||||
|
||||
/** TSG to which the channel is bound. */
|
||||
u32 tsgid;
|
||||
|
||||
/** IDs of TPC, GPC, and SM. */
|
||||
u32 tpc, gpc, sm;
|
||||
};
|
||||
|
||||
/**
|
||||
* @defgroup LIST_OF_ERRORS_REPORTED_FROM_FECS
|
||||
@@ -147,32 +125,6 @@ struct gr_sm_mcerr_info {
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* This structure is used to store CTXSW error related information.
|
||||
*/
|
||||
struct ctxsw_err_info {
|
||||
|
||||
/** GR engine context of the faulted channel. */
|
||||
u32 curr_ctx;
|
||||
|
||||
/** Context-switch status register-0. */
|
||||
u32 ctxsw_status0;
|
||||
|
||||
/** Context-switch status register-1. */
|
||||
u32 ctxsw_status1;
|
||||
|
||||
/** Channel to which the context belongs. */
|
||||
u32 chid;
|
||||
|
||||
/**
|
||||
* In case of any fault during context-switch transaction,
|
||||
* context-switch error interrupt is set and the FECS firmware
|
||||
* writes error code into FECS mailbox 6. This exception
|
||||
* is handled at GR unit.
|
||||
*/
|
||||
u32 mailbox_value;
|
||||
};
|
||||
|
||||
/**
|
||||
* @defgroup LIST_OF_ERRORS_REPORTED_FROM_GPCCS
|
||||
* Macros used to assign unique index to errors reported from the GPCCS unit.
|
||||
@@ -268,23 +220,6 @@ struct ctxsw_err_info {
|
||||
#define GPU_PGRAPH_ILLEGAL_CLASS (2U)
|
||||
#define GPU_PGRAPH_CLASS_ERROR (3U)
|
||||
|
||||
/**
|
||||
* This structure is used to store GR exception related information.
|
||||
*/
|
||||
struct gr_exception_info {
|
||||
/** GR engine context of the faulted channel. */
|
||||
u32 curr_ctx;
|
||||
|
||||
/** Channel bound to the context. */
|
||||
u32 chid;
|
||||
|
||||
/** TSG to which the channel is bound. */
|
||||
u32 tsgid;
|
||||
|
||||
/** GR interrupt status. */
|
||||
u32 status;
|
||||
};
|
||||
|
||||
/**
|
||||
* @defgroup LIST_OF_ERRORS_REPORTED_FROM_LTC
|
||||
* Macros used to assign unique index to errors reported from the LTC unit.
|
||||
@@ -347,17 +282,6 @@ struct gr_exception_info {
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* This structure is used to store GR error related information.
|
||||
*/
|
||||
struct gr_err_info {
|
||||
/** SM machine check error information. */
|
||||
struct gr_sm_mcerr_info *sm_mcerr_info;
|
||||
|
||||
/** GR exception related information. */
|
||||
struct gr_exception_info *exception_info;
|
||||
};
|
||||
|
||||
/**
|
||||
* This macro is used to initialize the members of nvgpu_hw_err_inject_info
|
||||
* struct.
|
||||
@@ -392,6 +316,85 @@ struct nvgpu_hw_err_inject_info_desc {
|
||||
u32 info_size;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_NVGPU_INTR_DEBUG
|
||||
|
||||
/**
|
||||
* This structure is used to store SM machine check related information.
|
||||
*/
|
||||
struct gr_sm_mcerr_info {
|
||||
/** PC which triggered the machine check error. */
|
||||
u64 hww_warp_esr_pc;
|
||||
|
||||
/** Error status register. */
|
||||
u32 hww_warp_esr_status;
|
||||
|
||||
/** GR engine context of the faulted channel. */
|
||||
u32 curr_ctx;
|
||||
|
||||
/** Channel to which the context belongs. */
|
||||
u32 chid;
|
||||
|
||||
/** TSG to which the channel is bound. */
|
||||
u32 tsgid;
|
||||
|
||||
/** IDs of TPC, GPC, and SM. */
|
||||
u32 tpc, gpc, sm;
|
||||
};
|
||||
|
||||
/**
|
||||
* This structure is used to store CTXSW error related information.
|
||||
*/
|
||||
struct ctxsw_err_info {
|
||||
|
||||
/** GR engine context of the faulted channel. */
|
||||
u32 curr_ctx;
|
||||
|
||||
/** Context-switch status register-0. */
|
||||
u32 ctxsw_status0;
|
||||
|
||||
/** Context-switch status register-1. */
|
||||
u32 ctxsw_status1;
|
||||
|
||||
/** Channel to which the context belongs. */
|
||||
u32 chid;
|
||||
|
||||
/**
|
||||
* In case of any fault during context-switch transaction,
|
||||
* context-switch error interrupt is set and the FECS firmware
|
||||
* writes error code into FECS mailbox 6. This exception
|
||||
* is handled at GR unit.
|
||||
*/
|
||||
u32 mailbox_value;
|
||||
};
|
||||
|
||||
/**
|
||||
* This structure is used to store GR exception related information.
|
||||
*/
|
||||
struct gr_exception_info {
|
||||
/** GR engine context of the faulted channel. */
|
||||
u32 curr_ctx;
|
||||
|
||||
/** Channel bound to the context. */
|
||||
u32 chid;
|
||||
|
||||
/** TSG to which the channel is bound. */
|
||||
u32 tsgid;
|
||||
|
||||
/** GR interrupt status. */
|
||||
u32 status;
|
||||
};
|
||||
|
||||
/**
|
||||
* This structure is used to store GR error related information.
|
||||
*/
|
||||
struct gr_err_info {
|
||||
/** SM machine check error information. */
|
||||
struct gr_sm_mcerr_info *sm_mcerr_info;
|
||||
|
||||
/** GR exception related information. */
|
||||
struct gr_exception_info *exception_info;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This function provides an interface to report errors from HOST
|
||||
* (PFIFO/PBDMA/PBUS) unit to SDL unit.
|
||||
@@ -1194,17 +1197,19 @@ void nvgpu_report_mmu_err(struct gk20a *g, u32 hw_unit,
|
||||
*/
|
||||
void gr_intr_report_ctxsw_error(struct gk20a *g, u32 err_type, u32 chid,
|
||||
u32 mailbox_value);
|
||||
#endif /* CONFIG_NVGPU_INTR_DEBUG */
|
||||
|
||||
/**
|
||||
* @brief This is a wrapper function to report ECC errors from HUBMMU to SDL.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* @param hw_unit_id [in] - HW Unit ID.
|
||||
* @param err_id [in] - Error ID.
|
||||
*
|
||||
* Calls nvgpu_report_err_to_ss to report errors to Safety_Services.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_report_err_to_sdl(struct gk20a *g, u32 err_id);
|
||||
void nvgpu_report_err_to_sdl(struct gk20a *g, u32 hw_unit_id, u32 err_id);
|
||||
|
||||
#endif /* NVGPU_NVGPU_ERR_H */
|
||||
|
||||
Reference in New Issue
Block a user