Files
linux-nvgpu/drivers/gpu/nvgpu/common/cic/cic_priv.h
2025-02-05 08:41:59 -08:00

292 lines
7.5 KiB
C

/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef CIC_PRIV_H
#define CIC_PRIV_H
#include <nvgpu/types.h>
struct gk20a;
struct nvgpu_err_hw_module;
struct nvgpu_err_msg;
struct gpu_err_header;
/*
* @file
*
* Declare CIC's private structure to store error-policy LUT and
* other data and ops needed during error reporting.
*/
#define ERR_INJECT_TEST_PATTERN 0xA5
/*
* This struct contains members related to error-policy look-up table,
* number of units reporting errors.
*/
struct nvgpu_cic {
/** Pointer for error look-up table. */
struct nvgpu_err_hw_module *err_lut;
/** Total number of GPU HW modules considered in CIC. */
u32 num_hw_modules;
};
/**
* @brief Inject ECC error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param inst [in] - Instance ID.
*
* - Sets values for error address and error count.
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_ecc_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 inst);
/**
* @brief Inject HOST error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param sub_err_type [in] - Sub error type.
*
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_host_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type);
/**
* @brief Inject GR error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param sub_err_type [in] - Sub error type.
*
* - Sets values for GR exception and SM machine check error information.
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_gr_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type);
/**
* @brief Inject CE error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param sub_err_type [in] - Sub error type.
*
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_ce_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type);
/**
* @brief Inject CE error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param err_code [in] - Error code.
*
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_pri_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 err_code);
/**
* @brief Inject PMU error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param sub_err_type [in] - Sub error type.
*
* - Sets values for error info.
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_pmu_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type);
/**
* @brief Inject CTXSW error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param inst [in] - Instance ID.
*
* - Sets values for error info.
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_ctxsw_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 inst);
/**
* @brief Inject MMU error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param sub_err_type [in] - Sub error type.
*
* - Sets values for mmu page fault info.
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_mmu_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type);
/**
* @brief Initialize error message header.
*
* @param header [in] - Error message header.
*
* This is used to initialize error message header.
*
* @return None
*/
void nvgpu_init_err_msg_header(struct gpu_err_header *header);
/**
* @brief Initialize error message.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is common
* for all HW units.
*
* @return None
*/
void nvgpu_init_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for HOST unit.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to HOST unit.
*
* @return None
*/
void nvgpu_init_host_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize ECC error message.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to ECC errors.
*
* @return None
*/
void nvgpu_init_ecc_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for PRI unit.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to PRI unit.
*
* @return None
*/
void nvgpu_init_pri_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for CE unit.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to CE unit.
*
* @return None
*/
void nvgpu_init_ce_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for PMU unit.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to PMU unit.
*
* @return None
*/
void nvgpu_init_pmu_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for GR unit.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to GR unit.
*
* @return None
*/
void nvgpu_init_gr_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for CTXSW.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to CTXSW.
*
* @return None
*/
void nvgpu_init_ctxsw_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for MMU unit.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to MMU unit.
*
* @return None
*/
void nvgpu_init_mmu_err_msg(struct nvgpu_err_msg *msg);
#endif /* CIC_PRIV_H */