gpu: nvgpu: Add Central Intr Controller unit

Add a new Central Interrupt Controller(CIC) unit in common code.
The interrupt handling is done in a distributed manner currently.
The error handling policy for different errors resides in each unit's
ISR code. The goal is to converge this data under one central place -
the CIC unit.

This patch creates framework for CIC unit and moves the gv11b QNX
safety LUT to CIC unit. All the error reporting APIs from different
units are also moved to CIC.

New APIs are exposed by CIC unit to access its internal data like:
  1. Struct err_desc - the static err handling /injection data per
                       error id
  2. Num_hw_modules  - the number of error reporting HW units
                       supported by CIC

Init and deinit of CIC unit:
  1. CIC unit should be initialized earlyon during boot so that it
     is available for any interrupt handling.
  2. Initialize CIC just before the interrupts are enabled during
     boot.
  3. Similarly, CIC is disabled late during deinit cycle; right
     after the interrupts are masked.

LUT:
  1. LUT is currently used only for reporting error to safety
     services in gv11b QNX safety build.
  2. This error handling policy LUT currently has only two levels
     of handing - correctable and quiecse.
  3. Once, the error handling policy decision is moved from leaf
     unit nodes to CIC, LUT will be updated to have additional levels
     like fast recovery and full recovery.
  4. Also, then a separate LUT will be added for each platform/build.
  5. In current framework, the LUT is set to NULL for all
     configurations except gv11b.

report_err() ops is added to report error to safety services.
This ops is only effective for gv11b qnx build; and set to NULL for
other configurations.

NVGPU-6521
NVGPU-6523
NVGPU-6750
NVGPU-6758
NVGPU-6760
NVGPU-6754

Change-Id: I24be7836a96d787741e37b732e19863ed8014635
Signed-off-by: Tejal Kudav <tkudav@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2518683
Reviewed-by: Ajesh K V <akv@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Tejal Kudav
2021-04-22 06:46:36 +00:00
committed by mobile promotions
parent bced5c5785
commit e0a1fcf5f5
43 changed files with 2584 additions and 147 deletions

View File

@@ -1054,7 +1054,19 @@ grmgr:
cic:
owner: Tejal K
safe: yes
sources: [ include/nvgpu/nvgpu_cic.h ]
sources: [ common/cic/cic.c,
common/cic/ce_cic.c,
common/cic/ctxsw_cic.c,
common/cic/msg_cic.c,
common/cic/ecc_cic.c,
common/cic/host_cic.c,
common/cic/gr_cic.c,
common/cic/pri_cic.c,
common/cic/pmu_cic.c,
common/cic/mmu_cic.c,
common/cic/cic_priv.h,
include/nvgpu/gops/cic.h,
include/nvgpu/cic.h ]
##
## HAL units. Currently they are under common but this needs to change.

View File

@@ -827,3 +827,10 @@ tpc:
owner: Divya S
sources: [ hal/tpc/tpc_gv11b.c,
hal/tpc/tpc_gv11b.h ]
cic:
safe: yes
owner: Tejal K
sources: [ hal/cic/cic_gv11b_fusa.c,
hal/cic/cic_lut_gv11b_fusa.c,
hal/cic/cic_gv11b.h ]

View File

@@ -227,8 +227,8 @@ vgpu:
vm:
sources: [ os/linux/vm.c ]
sdl:
sources: [ os/linux/sdl/sdl_stub.c ]
cic:
sources: [ os/linux/cic/cic_stub.c ]
# Group all the Linux headers for now.
headers:

View File

@@ -308,6 +308,16 @@ nvgpu-y += \
common/clk_arb/clk_arb_gp10b.o \
common/rc/rc.o \
common/grmgr/grmgr.o \
common/cic/cic.o \
common/cic/ce_cic.o \
common/cic/ctxsw_cic.o \
common/cic/ecc_cic.o \
common/cic/host_cic.o \
common/cic/gr_cic.o \
common/cic/pri_cic.o \
common/cic/pmu_cic.o \
common/cic/mmu_cic.o \
common/cic/msg_cic.o \
hal/bus/bus_gk20a.o \
hal/class/class_gm20b.o \
hal/class/class_gp10b.o \
@@ -380,7 +390,9 @@ nvgpu-y += \
hal/top/top_gp106.o \
hal/top/top_gp10b.o \
hal/tpc/tpc_gv11b.o \
hal/priv_ring/priv_ring_gv11b.o
hal/priv_ring/priv_ring_gv11b.o \
hal/cic/cic_gv11b_fusa.o \
hal/cic/cic_lut_gv11b_fusa.o
# Linux specific parts of nvgpu.
nvgpu-y += \
@@ -418,7 +430,7 @@ nvgpu-y += \
os/linux/dt.o \
os/linux/ecc_sysfs.o \
os/linux/bsearch.o \
os/linux/sdl/sdl_stub.o \
os/linux/cic/cic_stub.o \
os/linux/dmabuf_priv.o \
os/linux/power_ops.o

View File

@@ -152,6 +152,16 @@ srcs += common/device.c \
common/rc/rc.c \
common/ce/ce.c \
common/grmgr/grmgr.c \
common/cic/cic.c \
common/cic/ce_cic.c \
common/cic/ctxsw_cic.c \
common/cic/ecc_cic.c \
common/cic/host_cic.c \
common/cic/gr_cic.c \
common/cic/pri_cic.c \
common/cic/pmu_cic.c \
common/cic/mmu_cic.c \
common/cic/msg_cic.c \
hal/init/hal_gv11b.c \
hal/init/hal_gv11b_litter.c \
hal/init/hal_init.c \
@@ -246,7 +256,9 @@ srcs += hal/mm/mm_gv11b_fusa.c \
hal/sync/syncpt_cmdbuf_gv11b_fusa.c \
hal/therm/therm_gv11b_fusa.c \
hal/top/top_gm20b_fusa.c \
hal/top/top_gv11b_fusa.c
hal/top/top_gv11b_fusa.c \
hal/cic/cic_gv11b_fusa.c \
hal/cic/cic_lut_gv11b_fusa.c
# Source files below are not guaranteed to be functionaly safe (FuSa) and are
# only included in the normal build.

View File

@@ -0,0 +1,91 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/nvgpu_err_info.h>
#include <nvgpu/cic.h>
#include "cic_priv.h"
void nvgpu_report_ce_err(struct gk20a *g, u32 hw_unit,
u32 inst, u32 err_id, u32 intr_info)
{
int err = 0;
struct nvgpu_err_desc *err_desc = NULL;
struct nvgpu_err_msg err_pkt;
if (g->ops.cic.report_err == NULL) {
cic_dbg(g, "CIC does not support reporting error "
"to safety services");
return;
}
if (hw_unit != NVGPU_ERR_MODULE_CE) {
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
err = -EINVAL;
goto handle_report_failure;
}
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
if (err != 0) {
nvgpu_err(g, "Failed to get err_desc for "
"err_id (%u) for hw module (%u)",
err_id, hw_unit);
goto handle_report_failure;
}
nvgpu_init_ce_err_msg(&err_pkt);
err_pkt.hw_unit_id = hw_unit;
err_pkt.err_id = err_desc->error_id;
err_pkt.is_critical = err_desc->is_critical;
err_pkt.err_info.ce_info.header.sub_unit_id = inst;
err_pkt.err_desc = err_desc;
/* sub_err_type can be decoded using intr_info by referring
* to the interrupt status register definition corresponding
* to the error that is being reported.
*/
err_pkt.err_info.ce_info.header.sub_err_type = intr_info;
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
sizeof(err_pkt.err_info.ce_info));
if (g->ops.cic.report_err != NULL) {
err = g->ops.cic.report_err(g, (void *)&err_pkt,
sizeof(err_pkt), err_desc->is_critical);
if (err != 0) {
nvgpu_err(g, "Failed to report CE error: "
"inst=%u err_id=%u intr_info=%u",
inst, err_id, intr_info);
}
}
handle_report_failure:
if (err != 0) {
nvgpu_sw_quiesce(g);
}
}
void nvgpu_inject_ce_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type)
{
nvgpu_report_ce_err(g, hw_unit, 0U, err_index, sub_err_type);
}

View File

@@ -0,0 +1,161 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/kmem.h>
#include <nvgpu/log.h>
#include <nvgpu/cic.h>
#include <nvgpu/nvgpu_err_info.h>
#include "cic_priv.h"
int nvgpu_cic_init_common(struct gk20a *g)
{
struct nvgpu_cic *cic;
int err = 0;
if (g->cic != NULL) {
cic_dbg(g, "CIC unit already initialized");
return 0;
}
cic = nvgpu_kzalloc(g, sizeof(*cic));
if (cic == NULL) {
nvgpu_err(g, "Failed to allocate memory "
"for struct nvgpu_cic");
return -ENOMEM;
}
if (g->ops.cic.init != NULL) {
err = g->ops.cic.init(g, cic);
if (err != 0) {
nvgpu_err(g, "CIC chip specific "
"initialization failed.");
goto cleanup;
}
} else {
cic->err_lut = NULL;
cic->num_hw_modules = 0;
}
g->cic = cic;
cic_dbg(g, "CIC unit initialization done.");
return 0;
cleanup:
if (cic != NULL) {
nvgpu_kfree(g, cic);
}
return err;
}
int nvgpu_cic_deinit_common(struct gk20a *g)
{
struct nvgpu_cic *cic;
cic = g->cic;
if (cic == NULL) {
cic_dbg(g, "CIC unit already deinitialized");
return 0;
}
cic->err_lut = NULL;
cic->num_hw_modules = 0;
nvgpu_kfree(g, cic);
g->cic = NULL;
return 0;
}
int nvgpu_cic_check_hw_unit_id(struct gk20a *g, u32 hw_unit_id)
{
if (g->cic == NULL) {
nvgpu_err(g, "CIC is not initialized");
return -EINVAL;
}
if (g->cic->num_hw_modules == 0U) {
cic_dbg(g, "LUT not initialized.");
return -EINVAL;
}
if (hw_unit_id >= g->cic->num_hw_modules) {
cic_dbg(g, "Invalid input HW unit ID.");
return -EINVAL;
}
return 0;
}
int nvgpu_cic_check_err_id(struct gk20a *g, u32 hw_unit_id,
u32 err_id)
{
int err = 0;
if ((g->cic == NULL) || (g->cic->err_lut == NULL)) {
cic_dbg(g, "CIC/LUT not initialized.");
return -EINVAL;
}
err = nvgpu_cic_check_hw_unit_id(g, hw_unit_id);
if (err != 0) {
return err;
}
if (err_id >= g->cic->err_lut[hw_unit_id].num_errs) {
err = -EINVAL;
}
return err;
}
int nvgpu_cic_get_err_desc(struct gk20a *g, u32 hw_unit_id,
u32 err_id, struct nvgpu_err_desc **err_desc)
{
int err = 0;
/* if (g->cic != NULL) and (g->cic->err_lut != NULL) check
* can be skipped here as it checked as part of
* nvgpu_cic_check_err_id() called below.
*/
err = nvgpu_cic_check_err_id(g, hw_unit_id, err_id);
if (err != 0) {
return err;
}
*err_desc = &(g->cic->err_lut[hw_unit_id].errs[err_id]);
return err;
}
int nvgpu_cic_get_num_hw_modules(struct gk20a *g)
{
if (g->cic == NULL) {
nvgpu_err(g, "CIC is not initialized");
return -EINVAL;
}
return g->cic->num_hw_modules;
}

View File

@@ -0,0 +1,291 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef CIC_PRIV_H
#define CIC_PRIV_H
#include <nvgpu/types.h>
struct gk20a;
struct nvgpu_err_hw_module;
struct nvgpu_err_msg;
struct gpu_err_header;
/*
* @file
*
* Declare CIC's private structure to store error-policy LUT and
* other data and ops needed during error reporting.
*/
#define ERR_INJECT_TEST_PATTERN 0xA5
/*
* This struct contains members related to error-policy look-up table,
* number of units reporting errors.
*/
struct nvgpu_cic {
/** Pointer for error look-up table. */
struct nvgpu_err_hw_module *err_lut;
/** Total number of GPU HW modules considered in CIC. */
u32 num_hw_modules;
};
/**
* @brief Inject ECC error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param inst [in] - Instance ID.
*
* - Sets values for error address and error count.
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_ecc_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 inst);
/**
* @brief Inject HOST error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param sub_err_type [in] - Sub error type.
*
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_host_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type);
/**
* @brief Inject GR error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param sub_err_type [in] - Sub error type.
*
* - Sets values for GR exception and SM machine check error information.
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_gr_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type);
/**
* @brief Inject CE error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param sub_err_type [in] - Sub error type.
*
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_ce_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type);
/**
* @brief Inject CE error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param err_code [in] - Error code.
*
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_pri_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 err_code);
/**
* @brief Inject PMU error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param sub_err_type [in] - Sub error type.
*
* - Sets values for error info.
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_pmu_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type);
/**
* @brief Inject CTXSW error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param inst [in] - Instance ID.
*
* - Sets values for error info.
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_ctxsw_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 inst);
/**
* @brief Inject MMU error.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit [in] - Index of HW unit.
* @param err_index [in] - Error index.
* @param sub_err_type [in] - Sub error type.
*
* - Sets values for mmu page fault info.
* - Invokes error reporting API with the required set of inputs.
*
* @return None
*/
void nvgpu_inject_mmu_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type);
/**
* @brief Initialize error message header.
*
* @param header [in] - Error message header.
*
* This is used to initialize error message header.
*
* @return None
*/
void nvgpu_init_err_msg_header(struct gpu_err_header *header);
/**
* @brief Initialize error message.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is common
* for all HW units.
*
* @return None
*/
void nvgpu_init_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for HOST unit.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to HOST unit.
*
* @return None
*/
void nvgpu_init_host_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize ECC error message.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to ECC errors.
*
* @return None
*/
void nvgpu_init_ecc_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for PRI unit.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to PRI unit.
*
* @return None
*/
void nvgpu_init_pri_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for CE unit.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to CE unit.
*
* @return None
*/
void nvgpu_init_ce_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for PMU unit.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to PMU unit.
*
* @return None
*/
void nvgpu_init_pmu_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for GR unit.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to GR unit.
*
* @return None
*/
void nvgpu_init_gr_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for CTXSW.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to CTXSW.
*
* @return None
*/
void nvgpu_init_ctxsw_err_msg(struct nvgpu_err_msg *msg);
/**
* @brief Initialize error message for MMU unit.
*
* @param msg [in] - Error message.
*
* This is used to initialize error message that is specific to MMU unit.
*
* @return None
*/
void nvgpu_init_mmu_err_msg(struct nvgpu_err_msg *msg);
#endif /* CIC_PRIV_H */

View File

@@ -0,0 +1,97 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/nvgpu_err_info.h>
#include <nvgpu/cic.h>
#include "cic_priv.h"
void nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id,
void *data)
{
int err = 0;
struct nvgpu_err_desc *err_desc = NULL;
struct nvgpu_err_msg err_pkt;
u32 inst = 0;
struct ctxsw_err_info *err_info = (struct ctxsw_err_info *)data;
if (g->ops.cic.report_err == NULL) {
cic_dbg(g, "CIC does not support reporting error "
"to safety services");
return;
}
if (hw_unit != NVGPU_ERR_MODULE_FECS) {
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
err = -EINVAL;
goto handle_report_failure;
}
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
if (err != 0) {
nvgpu_err(g, "Failed to get err_desc for"
" err_id (%u) for hw module (%u)",
err_id, hw_unit);
goto handle_report_failure;
}
nvgpu_init_ctxsw_err_msg(&err_pkt);
err_pkt.hw_unit_id = hw_unit;
err_pkt.err_id = err_desc->error_id;
err_pkt.is_critical = err_desc->is_critical;
err_pkt.err_info.ctxsw_info.header.sub_unit_id = inst;
err_pkt.err_info.ctxsw_info.curr_ctx = err_info->curr_ctx;
err_pkt.err_info.ctxsw_info.chid = err_info->chid;
err_pkt.err_info.ctxsw_info.ctxsw_status0 = err_info->ctxsw_status0;
err_pkt.err_info.ctxsw_info.ctxsw_status1 = err_info->ctxsw_status1;
err_pkt.err_info.ctxsw_info.mailbox_value = err_info->mailbox_value;
err_pkt.err_desc = err_desc;
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
sizeof(err_pkt.err_info.ctxsw_info));
if (g->ops.cic.report_err != NULL) {
err = g->ops.cic.report_err(g, (void *)&err_pkt,
sizeof(err_pkt), err_desc->is_critical);
if (err != 0) {
nvgpu_err(g, "Failed to report CTXSW error: "
"err_id=%u, mailbox_val=%u",
err_id, err_info->mailbox_value);
}
}
handle_report_failure:
if (err != 0) {
nvgpu_sw_quiesce(g);
}
}
void nvgpu_inject_ctxsw_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 inst)
{
struct ctxsw_err_info err_info;
(void)memset(&err_info, ERR_INJECT_TEST_PATTERN, sizeof(err_info));
nvgpu_report_ctxsw_err(g, hw_unit, err_index, (void *)&err_info);
}

View File

@@ -0,0 +1,87 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/nvgpu_err_info.h>
#include <nvgpu/cic.h>
#include "cic_priv.h"
void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst,
u32 err_id, u64 err_addr, u64 err_count)
{
int err = 0;
struct nvgpu_err_desc *err_desc = NULL;
struct nvgpu_err_msg err_pkt;
if (g->ops.cic.report_err == NULL) {
cic_dbg(g, "CIC does not support reporting error "
"to safety services");
return;
}
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
if (err != 0) {
nvgpu_err(g, "Failed to get err_desc for "
"err_id (%u) for hw module (%u)",
err_id, hw_unit);
goto handle_report_failure;
}
nvgpu_init_ecc_err_msg(&err_pkt);
err_pkt.hw_unit_id = hw_unit;
err_pkt.err_id = err_desc->error_id;
err_pkt.is_critical = err_desc->is_critical;
err_pkt.err_info.ecc_info.header.sub_unit_id = inst;
err_pkt.err_info.ecc_info.header.address = err_addr;
err_pkt.err_info.ecc_info.err_cnt = err_count;
err_pkt.err_desc = err_desc;
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
sizeof(err_pkt.err_info.ecc_info));
if (g->ops.cic.report_err != NULL) {
err = g->ops.cic.report_err(g, (void *)&err_pkt,
sizeof(err_pkt), err_desc->is_critical);
if (err != 0) {
nvgpu_err(g, "Failed to report ECC error: hw_unit=%u, inst=%u, "
"err_id=%u, err_addr=%llu, err_count=%llu",
hw_unit, inst, err_id, err_addr, err_count);
}
}
handle_report_failure:
if (err != 0) {
nvgpu_sw_quiesce(g);
}
}
void nvgpu_inject_ecc_swerror(struct gk20a *g, u32 hw_unit, u32 err_index,
u32 inst)
{
u64 err_addr, err_count;
err_addr = (u64)ERR_INJECT_TEST_PATTERN;
err_count = (u64)ERR_INJECT_TEST_PATTERN;
nvgpu_report_ecc_err(g, hw_unit, inst, err_index, err_addr, err_count);
}

View File

@@ -0,0 +1,169 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/nvgpu_err_info.h>
#include <nvgpu/cic.h>
#include "cic_priv.h"
static void nvpgu_report_fill_err_info(u32 hw_unit,
struct nvgpu_err_msg *err_pkt, struct gr_err_info *err_info)
{
if (hw_unit == NVGPU_ERR_MODULE_SM) {
struct gr_sm_mcerr_info *info = err_info->sm_mcerr_info;
err_pkt->err_info.sm_info.warp_esr_pc =
info->hww_warp_esr_pc;
err_pkt->err_info.sm_info.warp_esr_status =
info->hww_warp_esr_status;
err_pkt->err_info.sm_info.curr_ctx =
info->curr_ctx;
err_pkt->err_info.sm_info.chid =
info->chid;
err_pkt->err_info.sm_info.tsgid =
info->tsgid;
err_pkt->err_info.sm_info.gpc =
info->gpc;
err_pkt->err_info.sm_info.tpc =
info->tpc;
err_pkt->err_info.sm_info.sm =
info->sm;
} else {
struct gr_exception_info *info = err_info->exception_info;
err_pkt->err_info.gr_info.curr_ctx = info->curr_ctx;
err_pkt->err_info.gr_info.chid = info->chid;
err_pkt->err_info.gr_info.tsgid = info->tsgid;
err_pkt->err_info.gr_info.status = info->status;
}
}
void nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst,
u32 err_id, struct gr_err_info *err_info, u32 sub_err_type)
{
int err = 0;
struct nvgpu_err_desc *err_desc = NULL;
struct nvgpu_err_msg err_pkt;
if (g->ops.cic.report_err == NULL) {
cic_dbg(g, "CIC does not support reporting error "
"to safety services");
return;
}
if ((hw_unit != NVGPU_ERR_MODULE_SM) &&
(hw_unit != NVGPU_ERR_MODULE_PGRAPH)) {
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
err = -EINVAL;
goto handle_report_failure;
}
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
if (err != 0) {
nvgpu_err(g, "Failed to get err_desc for "
"err_id (%u) for hw module (%u)",
err_id, hw_unit);
goto handle_report_failure;
}
nvgpu_init_gr_err_msg(&err_pkt);
err_pkt.hw_unit_id = hw_unit;
err_pkt.err_id = err_desc->error_id;
err_pkt.is_critical = err_desc->is_critical;
err_pkt.err_desc = err_desc;
err_pkt.err_info.gr_info.header.sub_err_type = sub_err_type;
err_pkt.err_info.gr_info.header.sub_unit_id = inst;
nvpgu_report_fill_err_info(hw_unit, &err_pkt, err_info);
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(sizeof(err_pkt.err_info));
if (g->ops.cic.report_err != NULL) {
err = g->ops.cic.report_err(g, (void *)&err_pkt,
sizeof(err_pkt), err_desc->is_critical);
if (err != 0) {
if (hw_unit == NVGPU_ERR_MODULE_SM) {
nvgpu_err(g, "Failed to report SM exception"
"gpc=%u, tpc=%u, sm=%u, esr_status=%x",
err_pkt.err_info.sm_info.gpc,
err_pkt.err_info.sm_info.tpc,
err_pkt.err_info.sm_info.sm,
err_pkt.err_info.sm_info.warp_esr_status);
}
if (hw_unit == NVGPU_ERR_MODULE_PGRAPH) {
nvgpu_err(g, "Failed to report PGRAPH"
"exception: inst=%u, err_id=%u, "
"status=%u", inst, err_id,
err_pkt.err_info.gr_info.status);
}
}
}
handle_report_failure:
if (err != 0) {
nvgpu_sw_quiesce(g);
}
}
void nvgpu_inject_gr_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type)
{
struct gr_err_info err_info;
struct gr_exception_info gr_error_info;
struct gr_sm_mcerr_info sm_error_info;
int err = 0;
u32 inst = 0U;
/*
* Fill fixed test pattern data for the error message
* payload.
*/
(void)memset(&gr_error_info, ERR_INJECT_TEST_PATTERN, sizeof(gr_error_info));
(void)memset(&sm_error_info, ERR_INJECT_TEST_PATTERN, sizeof(sm_error_info));
switch (hw_unit) {
case NVGPU_ERR_MODULE_PGRAPH:
{
err_info.exception_info = &gr_error_info;
}
break;
case NVGPU_ERR_MODULE_SM:
{
err_info.sm_mcerr_info = &sm_error_info;
}
break;
default:
{
nvgpu_err(g, "unsupported hw_unit(%u)", hw_unit);
err = -EINVAL;
}
break;
}
if (err != 0) {
return;
}
nvgpu_report_gr_err(g, hw_unit, inst, err_index,
&err_info, sub_err_type);
}

View File

@@ -0,0 +1,91 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/nvgpu_err_info.h>
#include <nvgpu/cic.h>
#include "cic_priv.h"
void nvgpu_report_host_err(struct gk20a *g, u32 hw_unit,
u32 inst, u32 err_id, u32 intr_info)
{
int err = 0;
struct nvgpu_err_desc *err_desc = NULL;
struct nvgpu_err_msg err_pkt;
if (g->ops.cic.report_err == NULL) {
cic_dbg(g, "CIC does not support reporting error "
"to safety services");
return;
}
if (hw_unit != NVGPU_ERR_MODULE_HOST) {
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
err = -EINVAL;
goto handle_report_failure;
}
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
if (err != 0) {
nvgpu_err(g, "Failed to get err_desc for "
"err_id (%u) for hw module (%u)",
err_id, hw_unit);
goto handle_report_failure;
}
nvgpu_init_host_err_msg(&err_pkt);
err_pkt.hw_unit_id = hw_unit;
err_pkt.err_id = err_desc->error_id;
err_pkt.is_critical = err_desc->is_critical;
err_pkt.err_info.host_info.header.sub_unit_id = inst;
err_pkt.err_desc = err_desc;
/* sub_err_type can be decoded using intr_info by referring
* to the interrupt status register definition corresponding
* to the error that is being reported.
*/
err_pkt.err_info.host_info.header.sub_err_type = intr_info;
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
sizeof(err_pkt.err_info.host_info));
if (g->ops.cic.report_err != NULL) {
err = g->ops.cic.report_err(g, (void *)&err_pkt,
sizeof(err_pkt), err_desc->is_critical);
if (err != 0) {
nvgpu_err(g, "Failed to report HOST error: "
"inst=%u, err_id=%u, intr_info=%u",
inst, err_id, intr_info);
}
}
handle_report_failure:
if (err != 0) {
nvgpu_sw_quiesce(g);
}
}
void nvgpu_inject_host_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type)
{
nvgpu_report_host_err(g, hw_unit, 0U, err_index, sub_err_type);
}

View File

@@ -0,0 +1,131 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/nvgpu_err_info.h>
#include <nvgpu/cic.h>
#include "cic_priv.h"
void nvgpu_report_mmu_err(struct gk20a *g, u32 hw_unit, u32 err_id,
struct mmu_fault_info *fault_info, u32 status, u32 sub_err_type)
{
int err = 0;
struct nvgpu_err_desc *err_desc = NULL;
struct nvgpu_err_msg err_pkt;
if (g->ops.cic.report_err == NULL) {
cic_dbg(g, "CIC does not support reporting error "
"to safety services");
return;
}
if (hw_unit != NVGPU_ERR_MODULE_HUBMMU) {
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
err = -EINVAL;
goto handle_report_failure;
}
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
if (err != 0) {
nvgpu_err(g, "Failed to get err_desc for "
"err_id (%u) for hw module (%u)",
err_id, hw_unit);
goto handle_report_failure;
}
nvgpu_init_mmu_err_msg(&err_pkt);
err_pkt.hw_unit_id = hw_unit;
err_pkt.err_id = err_desc->error_id;
err_pkt.is_critical = err_desc->is_critical;
err_pkt.err_info.mmu_info.header.sub_err_type = sub_err_type;
err_pkt.err_info.mmu_info.status = status;
/* Copy contents of mmu_fault_info */
if (fault_info != NULL) {
err_pkt.err_info.mmu_info.info.inst_ptr = fault_info->inst_ptr;
err_pkt.err_info.mmu_info.info.inst_aperture
= fault_info->inst_aperture;
err_pkt.err_info.mmu_info.info.fault_addr
= fault_info->fault_addr;
err_pkt.err_info.mmu_info.info.fault_addr_aperture
= fault_info->fault_addr_aperture;
err_pkt.err_info.mmu_info.info.timestamp_lo
= fault_info->timestamp_lo;
err_pkt.err_info.mmu_info.info.timestamp_hi
= fault_info->timestamp_hi;
err_pkt.err_info.mmu_info.info.mmu_engine_id
= fault_info->mmu_engine_id;
err_pkt.err_info.mmu_info.info.gpc_id = fault_info->gpc_id;
err_pkt.err_info.mmu_info.info.client_type
= fault_info->client_type;
err_pkt.err_info.mmu_info.info.client_id
= fault_info->client_id;
err_pkt.err_info.mmu_info.info.fault_type
= fault_info->fault_type;
err_pkt.err_info.mmu_info.info.access_type
= fault_info->access_type;
err_pkt.err_info.mmu_info.info.protected_mode
= fault_info->protected_mode;
err_pkt.err_info.mmu_info.info.replayable_fault
= fault_info->replayable_fault;
err_pkt.err_info.mmu_info.info.replay_fault_en
= fault_info->replay_fault_en;
err_pkt.err_info.mmu_info.info.valid = fault_info->valid;
err_pkt.err_info.mmu_info.info.faulted_pbdma =
fault_info->faulted_pbdma;
err_pkt.err_info.mmu_info.info.faulted_engine =
fault_info->faulted_engine;
err_pkt.err_info.mmu_info.info.faulted_subid =
fault_info->faulted_subid;
err_pkt.err_info.mmu_info.info.chid = fault_info->chid;
}
err_pkt.err_desc = err_desc;
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
sizeof(err_pkt.err_info.mmu_info));
if (g->ops.cic.report_err != NULL) {
err = g->ops.cic.report_err(g, (void *)&err_pkt,
sizeof(err_pkt), err_desc->is_critical);
if (err != 0) {
nvgpu_err(g, "Failed to report MMU fault: hw_unit=%u, "
"err_id=%u, sub_err_type=%u, status=%u",
hw_unit, err_id, sub_err_type, status);
}
}
handle_report_failure:
if (err != 0) {
nvgpu_sw_quiesce(g);
}
}
void nvgpu_inject_mmu_swerror(struct gk20a *g, u32 hw_unit, u32 err_index,
u32 sub_err_type)
{
u32 status = 0U;
struct mmu_fault_info fault_info;
(void) memset(&fault_info, ERR_INJECT_TEST_PATTERN, sizeof(fault_info));
nvgpu_report_mmu_err(g, hw_unit, err_index,
&fault_info, status, sub_err_type);
}

View File

@@ -0,0 +1,126 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/nvgpu_err_info.h>
#include <nvgpu/string.h>
#include "cic_priv.h"
void nvgpu_init_err_msg_header(struct gpu_err_header *header)
{
header->version.major = (u16)1U;
header->version.minor = (u16)0U;
header->sub_err_type = 0U;
header->sub_unit_id = 0UL;
header->address = 0UL;
header->timestamp_ns = 0UL;
}
void nvgpu_init_err_msg(struct nvgpu_err_msg *msg)
{
(void) memset(msg, 0, sizeof(struct nvgpu_err_msg));
msg->hw_unit_id = 0U;
msg->is_critical = false;
msg->err_id = (u8)0U;
msg->err_size = (u8)0U;
}
void nvgpu_init_host_err_msg(struct nvgpu_err_msg *msg)
{
nvgpu_init_err_msg(msg);
nvgpu_init_err_msg_header(&msg->err_info.host_info.header);
}
void nvgpu_init_ecc_err_msg(struct nvgpu_err_msg *msg)
{
nvgpu_init_err_msg(msg);
nvgpu_init_err_msg_header(&msg->err_info.ecc_info.header);
msg->err_info.ecc_info.err_cnt = 0UL;
}
void nvgpu_init_pri_err_msg(struct nvgpu_err_msg *msg)
{
nvgpu_init_err_msg(msg);
nvgpu_init_err_msg_header(&msg->err_info.pri_info.header);
}
void nvgpu_init_ce_err_msg(struct nvgpu_err_msg *msg)
{
nvgpu_init_err_msg(msg);
nvgpu_init_err_msg_header(&msg->err_info.ce_info.header);
}
void nvgpu_init_pmu_err_msg(struct nvgpu_err_msg *msg)
{
nvgpu_init_err_msg(msg);
nvgpu_init_err_msg_header(&msg->err_info.pmu_err_info.header);
msg->err_info.pmu_err_info.status = 0U;
}
void nvgpu_init_gr_err_msg(struct nvgpu_err_msg *msg)
{
nvgpu_init_err_msg(msg);
nvgpu_init_err_msg_header(&msg->err_info.gr_info.header);
msg->err_info.gr_info.curr_ctx = 0U;
msg->err_info.gr_info.chid = 0U;
msg->err_info.gr_info.tsgid = 0U;
msg->err_info.gr_info.status = 0U;
}
void nvgpu_init_ctxsw_err_msg(struct nvgpu_err_msg *msg)
{
nvgpu_init_err_msg(msg);
nvgpu_init_err_msg_header(&msg->err_info.ctxsw_info.header);
msg->err_info.ctxsw_info.curr_ctx = 0U;
msg->err_info.ctxsw_info.tsgid = 0U;
msg->err_info.ctxsw_info.chid = 0U;
msg->err_info.ctxsw_info.ctxsw_status0 = 0U;
msg->err_info.ctxsw_info.ctxsw_status1 = 0U;
msg->err_info.ctxsw_info.mailbox_value = 0U;
}
void nvgpu_init_mmu_err_msg(struct nvgpu_err_msg *msg)
{
nvgpu_init_err_msg(msg);
nvgpu_init_err_msg_header(&msg->err_info.mmu_info.header);
msg->err_info.mmu_info.info.inst_ptr = 0UL;
msg->err_info.mmu_info.info.inst_aperture = 0U;
msg->err_info.mmu_info.info.fault_addr = 0UL;
msg->err_info.mmu_info.info.fault_addr_aperture = 0U;
msg->err_info.mmu_info.info.timestamp_lo = 0U;
msg->err_info.mmu_info.info.timestamp_hi = 0U;
msg->err_info.mmu_info.info.mmu_engine_id = 0U;
msg->err_info.mmu_info.info.gpc_id = 0U;
msg->err_info.mmu_info.info.client_type = 0U;
msg->err_info.mmu_info.info.client_id = 0U;
msg->err_info.mmu_info.info.fault_type = 0U;
msg->err_info.mmu_info.info.access_type = 0U;
msg->err_info.mmu_info.info.protected_mode = 0U;
msg->err_info.mmu_info.info.replayable_fault = false;
msg->err_info.mmu_info.info.replay_fault_en = 0U;
msg->err_info.mmu_info.info.valid = false;
msg->err_info.mmu_info.info.faulted_pbdma = 0U;
msg->err_info.mmu_info.info.faulted_engine = 0U;
msg->err_info.mmu_info.info.faulted_subid = 0U;
msg->err_info.mmu_info.info.chid = 0U;
msg->err_info.mmu_info.status = 0U;
}

View File

@@ -0,0 +1,91 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/nvgpu_err_info.h>
#include <nvgpu/cic.h>
#include "cic_priv.h"
void nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id,
u32 sub_err_type, u32 status)
{
int err = 0;
struct nvgpu_err_desc *err_desc = NULL;
struct nvgpu_err_msg err_pkt;
if (g->ops.cic.report_err == NULL) {
cic_dbg(g, "CIC does not support reporting error "
"to safety services");
return;
}
if (hw_unit != NVGPU_ERR_MODULE_PMU) {
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
err = -EINVAL;
goto handle_report_failure;
}
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
if (err != 0) {
nvgpu_err(g, "Failed to get err_desc for "
"err_id (%u) for hw module (%u)",
err_id, hw_unit);
goto handle_report_failure;
}
nvgpu_init_pmu_err_msg(&err_pkt);
err_pkt.hw_unit_id = hw_unit;
err_pkt.err_id = err_desc->error_id;
err_pkt.is_critical = err_desc->is_critical;
err_pkt.err_info.pmu_err_info.status = status;
err_pkt.err_info.pmu_err_info.header.sub_err_type = sub_err_type;
err_pkt.err_desc = err_desc;
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
sizeof(err_pkt.err_info.pmu_err_info));
if (g->ops.cic.report_err != NULL) {
err = g->ops.cic.report_err(g, (void *)&err_pkt,
sizeof(err_pkt), err_desc->is_critical);
if (err != 0) {
nvgpu_err(g, "Failed to report PMU error: "
"err_id=%u, sub_err_type=%u, status=%u",
err_id, sub_err_type, status);
}
}
handle_report_failure:
if (err != 0) {
nvgpu_sw_quiesce(g);
}
}
void nvgpu_inject_pmu_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 sub_err_type)
{
u32 err_info;
err_info = (u32)ERR_INJECT_TEST_PATTERN;
nvgpu_report_pmu_err(g, hw_unit, err_index, sub_err_type, err_info);
}

View File

@@ -0,0 +1,91 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/nvgpu_err_info.h>
#include <nvgpu/cic.h>
#include "cic_priv.h"
void nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst,
u32 err_id, u32 err_addr, u32 err_code)
{
int err = 0;
struct nvgpu_err_desc *err_desc = NULL;
struct nvgpu_err_msg err_pkt;
if (g->ops.cic.report_err == NULL) {
cic_dbg(g, "CIC does not support reporting error "
"to safety services");
return;
}
if (hw_unit != NVGPU_ERR_MODULE_PRI) {
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
err = -EINVAL;
goto handle_report_failure;
}
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
if (err != 0) {
nvgpu_err(g, "Failed to get err_desc for "
"err_id (%u) for hw module (%u)",
err_id, hw_unit);
goto handle_report_failure;
}
nvgpu_init_pri_err_msg(&err_pkt);
err_pkt.hw_unit_id = hw_unit;
err_pkt.err_id = err_desc->error_id;
err_pkt.is_critical = err_desc->is_critical;
err_pkt.err_info.pri_info.header.sub_unit_id = inst;
err_pkt.err_info.pri_info.header.address = (u64) err_addr;
err_pkt.err_desc = err_desc;
/* sub_err_type can be decoded using err_code by referring
* to the FECS pri error codes.
*/
err_pkt.err_info.pri_info.header.sub_err_type = err_code;
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
sizeof(err_pkt.err_info.pri_info));
if (g->ops.cic.report_err != NULL) {
err = g->ops.cic.report_err(g, (void *)&err_pkt,
sizeof(err_pkt), err_desc->is_critical);
if (err != 0) {
nvgpu_err(g, "Failed to report PRI error: "
"inst=%u, err_id=%u, err_code=%u",
inst, err_id, err_code);
}
}
handle_report_failure:
if (err != 0) {
nvgpu_sw_quiesce(g);
}
}
void nvgpu_inject_pri_swerror(struct gk20a *g, u32 hw_unit,
u32 err_index, u32 err_code)
{
nvgpu_report_pri_err(g, hw_unit, 0U, err_index, 0U, err_code);
}

View File

@@ -46,6 +46,7 @@
#ifdef CONFIG_NVGPU_NON_FUSA
#include <nvgpu/ptimer.h>
#endif
#include <nvgpu/cic.h>
#ifdef CONFIG_NVGPU_LS_PMU
#include <nvgpu/pmu/pmu_pstate.h>
@@ -357,6 +358,12 @@ int nvgpu_prepare_poweroff(struct gk20a *g)
#endif
gk20a_mask_interrupts(g);
/* Disable CIC after the interrupts are masked;
* This will ensure that CIC will not get probed
* after it's deinit.
*/
nvgpu_cic_deinit_common(g);
return ret;
}
@@ -716,6 +723,14 @@ int nvgpu_early_poweron(struct gk20a *g)
goto done;
}
/* Initialize CIC early on before the interrupts are
* enabled.
*/
err = nvgpu_cic_init_common(g);
if (err != 0) {
nvgpu_err(g, "CIC Initialization failed[%d]", err);
goto done;
}
done:
return err;
}

View File

@@ -0,0 +1,36 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef CIC_GV11B_H
#define CIC_GV11B_H
#include <nvgpu/nvgpu_err_info.h>
struct gk20a;
struct nvgpu_cic;
extern struct nvgpu_err_hw_module gv11b_err_lut[];
extern u32 size_of_gv11b_lut;
int gv11b_cic_init(struct gk20a *g, struct nvgpu_cic *cic);
#endif /* CIC_GV11B_H */

View File

@@ -20,23 +20,19 @@
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_CIC_H
#define NVGPU_CIC_H
#include <nvgpu/log.h>
/**
* @file
*
* Public structs and APIs exposed by Central Interrupt Controller
* (CIC) unit.
*/
#include "common/cic/cic_priv.h"
#include "cic_gv11b.h"
/*
* Requires a string literal for the format - notice the string
* concatination.
*/
#define cic_dbg(g, fmt, args...) \
nvgpu_log((g), gpu_dbg_cic, "CIC | " fmt, ##args)
int gv11b_cic_init(struct gk20a *g, struct nvgpu_cic *cic)
{
if (cic == NULL) {
nvgpu_err(g, "Invalid CIC reference pointer.");
return -EINVAL;
}
#endif /* NVGPU_CIC_H */
cic->err_lut = gv11b_err_lut;
cic->num_hw_modules = size_of_gv11b_lut;
return 0;
}

View File

@@ -0,0 +1,599 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/nvgpu_err_info.h>
#include <nvgpu/static_analysis.h>
#include "common/cic/cic_priv.h"
#include "cic_gv11b.h"
/*
* A flag to enable/disable hw error injection.
*/
#ifdef CONFIG_NVGPU_INJECT_HWERR
#define INJECT_TYPE (INJECT_HW)
#else
#define INJECT_TYPE (INJECT_SW)
#endif
/* This look-up table initializes the list of hw units and their errors.
* It also specifies the error injection mechanism supported, for each error.
* In case of hw error injection support, this initialization will be overriden
* by the values provided from the hal layes of corresponding hw units.
*/
struct nvgpu_err_hw_module gv11b_err_lut[] = {
{
.name = "host",
.hw_unit = (u32)NVGPU_ERR_MODULE_HOST,
.num_instances = 1U,
.num_errs = 17U,
.errs = (struct nvgpu_err_desc[]) {
GPU_CRITERR("pfifo_bind_error",
GPU_HOST_PFIFO_BIND_ERROR, INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pfifo_sched_error",
GPU_HOST_PFIFO_SCHED_ERROR, INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pfifo_chsw_error",
GPU_HOST_PFIFO_CHSW_ERROR, INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pfifo_memop_error",
GPU_HOST_PFIFO_MEMOP_TIMEOUT_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pfifo_lb_error",
GPU_HOST_PFIFO_LB_ERROR, INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pbus_squash_error",
GPU_HOST_PBUS_SQUASH_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pbus_fecs_error",
GPU_HOST_PBUS_FECS_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pbus_timeout_error",
GPU_HOST_PBUS_TIMEOUT_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_timeout_error",
GPU_HOST_PBDMA_TIMEOUT_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_extra_error",
GPU_HOST_PBDMA_EXTRA_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_gpfifo_pb_error",
GPU_HOST_PBDMA_GPFIFO_PB_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_method_error",
GPU_HOST_PBDMA_METHOD_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_signature_error",
GPU_HOST_PBDMA_SIGNATURE_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_hce_error",
GPU_HOST_PBDMA_HCE_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_preempt_error",
GPU_HOST_PBDMA_PREEMPT_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("pfifo_ctxsw_timeout",
GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pfifo_fb_flush_timeout",
GPU_HOST_PFIFO_FB_FLUSH_TIMEOUT_ERROR,
INJECT_SW,
NULL, nvgpu_inject_host_swerror,
NULL, NULL, 0, 0),
},
},
{
.name = "sm",
.hw_unit = (u32)NVGPU_ERR_MODULE_SM,
.num_instances = 8U,
.num_errs = 21U,
.errs = (struct nvgpu_err_desc[]) {
GPU_NONCRITERR("l1_tag_ecc_corrected",
GPU_SM_L1_TAG_ECC_CORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("l1_tag_ecc_uncorrected",
GPU_SM_L1_TAG_ECC_UNCORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("cbu_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("cbu_ecc_uncorrected",
GPU_SM_CBU_ECC_UNCORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("lrf_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("lrf_ecc_uncorrected",
GPU_SM_LRF_ECC_UNCORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("l1_data_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("l1_data_ecc_uncorrected",
GPU_SM_L1_DATA_ECC_UNCORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("icache_l0_data_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("icache_l0_data_ecc_uncorrected",
GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("icache_l1_data_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("icache_l1_data_ecc_uncorrected",
GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("icache_l0_predecode_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("icache_l0_predecode_ecc_uncorrected",
GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("l1_tag_miss_fifo_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("l1_tag_miss_fifo_ecc_uncorrected",
GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("l1_tag_s2r_pixprf_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("l1_tag_s2r_pixprf_ecc_uncorrected",
GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("machine_check_error",
GPU_SM_MACHINE_CHECK_ERROR,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("icache_l1_predecode_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("icache_l1_predecode_ecc_uncorrected",
GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
},
},
{
.name = "fecs",
.hw_unit = (u32)NVGPU_ERR_MODULE_FECS,
.num_instances = 1U,
.num_errs = 8U,
.errs = (struct nvgpu_err_desc[]) {
GPU_NONCRITERR("falcon_imem_ecc_corrected",
GPU_FECS_FALCON_IMEM_ECC_CORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("falcon_imem_ecc_uncorrected",
GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("falcon_dmem_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("falcon_dmem_ecc_uncorrected",
GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("ctxsw_watchdog_timeout",
GPU_FECS_CTXSW_WATCHDOG_TIMEOUT,
INJECT_SW,
NULL, nvgpu_inject_ctxsw_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("ctxsw_crc_mismatch",
GPU_FECS_CTXSW_CRC_MISMATCH,
INJECT_SW,
NULL, nvgpu_inject_ctxsw_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("fault_during_ctxsw",
GPU_FECS_FAULT_DURING_CTXSW,
INJECT_SW,
NULL, nvgpu_inject_ctxsw_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("ctxsw_init_error",
GPU_FECS_CTXSW_INIT_ERROR,
INJECT_SW,
NULL, nvgpu_inject_ctxsw_swerror,
NULL, NULL, 0, 0),
},
},
{
.name = "gpccs",
.hw_unit = (u32)NVGPU_ERR_MODULE_GPCCS,
.num_instances = 1U,
.num_errs = 4U,
.errs = (struct nvgpu_err_desc[]) {
GPU_NONCRITERR("falcon_imem_ecc_corrected",
GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("falcon_imem_ecc_uncorrected",
GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("falcon_dmem_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("falcon_dmem_ecc_uncorrected",
GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
},
},
{
.name = "mmu",
.hw_unit = (u32)NVGPU_ERR_MODULE_MMU,
.num_instances = 1U,
.num_errs = 4U,
.errs = (struct nvgpu_err_desc[]) {
GPU_NONCRITERR("l1tlb_sa_data_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("l1tlb_sa_data_ecc_uncorrected",
GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("l1tlb_fa_data_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("l1tlb_fa_data_ecc_uncorrected",
GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
},
},
{
.name = "gcc",
.hw_unit = (u32)NVGPU_ERR_MODULE_GCC,
.num_instances = 1U,
.num_errs = 2U,
.errs = (struct nvgpu_err_desc[]) {
GPU_NONCRITERR("l15_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("l15_ecc_uncorrected",
GPU_GCC_L15_ECC_UNCORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
},
},
{
.name = "pmu",
.hw_unit = (u32)NVGPU_ERR_MODULE_PMU,
.num_instances = 1U,
.num_errs = 5U,
.errs = (struct nvgpu_err_desc[]) {
GPU_NONCRITERR("falcon_imem_ecc_corrected",
GPU_PMU_FALCON_IMEM_ECC_CORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("falcon_imem_ecc_uncorrected",
GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("falcon_dmem_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("falcon_dmem_ecc_uncorrected",
GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("bar0_error_timeout",
GPU_PMU_BAR0_ERROR_TIMEOUT, INJECT_SW,
NULL, nvgpu_inject_pmu_swerror,
NULL, NULL, 0, 0),
},
},
{
.name = "pgraph",
.hw_unit = (u32)NVGPU_ERR_MODULE_PGRAPH,
.num_instances = 1U,
.num_errs = 12U,
.errs = (struct nvgpu_err_desc[]) {
GPU_CRITERR("fe_exception",
GPU_PGRAPH_FE_EXCEPTION,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("memfmt_exception",
GPU_PGRAPH_MEMFMT_EXCEPTION,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pd_exception",
GPU_PGRAPH_PD_EXCEPTION,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("scc_exception",
GPU_PGRAPH_SCC_EXCEPTION,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("ds_exception",
GPU_PGRAPH_DS_EXCEPTION,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("ssync_exception",
GPU_PGRAPH_SSYNC_EXCEPTION,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("mme_exception",
GPU_PGRAPH_MME_EXCEPTION,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("sked_exception",
GPU_PGRAPH_SKED_EXCEPTION,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("be_exception",
GPU_PGRAPH_BE_EXCEPTION,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("mpc_exception",
GPU_PGRAPH_MPC_EXCEPTION,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("illegal_error",
GPU_PGRAPH_ILLEGAL_ERROR,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("gpc_gfx_exception",
GPU_PGRAPH_GPC_GFX_EXCEPTION,
INJECT_SW,
NULL, nvgpu_inject_gr_swerror,
NULL, NULL, 0, 0),
},
},
{
.name = "ltc",
.hw_unit = (u32)NVGPU_ERR_MODULE_LTC,
.num_instances = 1U,
.num_errs = 8U,
.errs = (struct nvgpu_err_desc[]) {
GPU_NONCRITERR("cache_dstg_ecc_corrected",
GPU_LTC_CACHE_DSTG_ECC_CORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("cache_dstg_ecc_uncorrected",
GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("cache_tstg_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("cache_tstg_ecc_uncorrected",
GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("cache_rstg_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("cache_rstg_ecc_uncorrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_NONCRITERR("cache_dstg_be_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("cache_dstg_be_ecc_uncorrected",
GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
},
},
{
.name = "hubmmu",
.hw_unit = (u32)NVGPU_ERR_MODULE_HUBMMU,
.num_instances = 1U,
.num_errs = 9U,
.errs = (struct nvgpu_err_desc[]) {
GPU_NONCRITERR("hubmmu_l2tlb_sa_data_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("hubmmu_l2tlb_sa_data_ecc_uncorrected",
GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("hubmmu_tlb_sa_data_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("hubmmu_tlb_sa_data_ecc_uncorrected",
GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("hubmmu_pte_data_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("hubmmu_pte_data_ecc_uncorrected",
GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED,
INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("hubmmu_pde0_data_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("hubmmu_pde0_data_ecc_uncorrected",
GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED,
INJECT_SW,
NULL, nvgpu_inject_ecc_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("hubmmu_page_fault_error",
GPU_HUBMMU_PAGE_FAULT_ERROR,
INJECT_SW,
NULL, nvgpu_inject_mmu_swerror,
NULL, NULL, 0, 0),
},
},
{
.name = "pri",
.hw_unit = (u32)NVGPU_ERR_MODULE_PRI,
.num_instances = 1U,
.num_errs = 2U,
.errs = (struct nvgpu_err_desc[]) {
GPU_CRITERR("pri_timeout_error",
GPU_PRI_TIMEOUT_ERROR,
INJECT_SW,
NULL, nvgpu_inject_pri_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("pri_access_violation",
GPU_PRI_ACCESS_VIOLATION,
INJECT_SW,
NULL, nvgpu_inject_pri_swerror,
NULL, NULL, 0, 0),
},
},
{
.name = "ce",
.hw_unit = (u32)NVGPU_ERR_MODULE_CE,
.num_instances = 1U,
.num_errs = 5U,
.errs = (struct nvgpu_err_desc[]) {
GPU_CRITERR("ce_launch_error",
GPU_CE_LAUNCH_ERROR,
INJECT_SW,
NULL, nvgpu_inject_ce_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("ce_block_pipe",
GPU_CE_BLOCK_PIPE,
INJECT_SW,
NULL, nvgpu_inject_ce_swerror,
NULL, NULL, 0, 0),
GPU_NONCRITERR("ce_nonblock_pipe",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("ce_invalid_config",
GPU_CE_INVALID_CONFIG,
INJECT_SW,
NULL, nvgpu_inject_ce_swerror,
NULL, NULL, 0, 0),
GPU_CRITERR("ce_method_buffer_fault",
GPU_CE_METHOD_BUFFER_FAULT,
INJECT_SW,
NULL, nvgpu_inject_ce_swerror,
NULL, NULL, 0, 0),
},
},
};
u32 size_of_gv11b_lut = sizeof(gv11b_err_lut) /
sizeof(struct nvgpu_err_hw_module);

View File

@@ -1060,6 +1060,11 @@ static const struct gops_grmgr gm20b_ops_grmgr = {
.init_gr_manager = nvgpu_init_gr_manager,
};
static const struct gops_cic gm20b_ops_cic = {
.init = NULL,
.report_err = NULL,
};
int gm20b_init_hal(struct gk20a *g)
{
struct gpu_ops *gops = &g->ops;
@@ -1165,6 +1170,7 @@ int gm20b_init_hal(struct gk20a *g)
gops->tpc = gm20b_ops_tpc;
#endif
gops->grmgr = gm20b_ops_grmgr;
gops->cic = gm20b_ops_cic;
gops->chip_init_gpu_characteristics = nvgpu_init_gpu_characteristics;
gops->get_litter_value = gm20b_get_litter_value;
gops->semaphore_wakeup = nvgpu_channel_semaphore_wakeup;

View File

@@ -1155,6 +1155,11 @@ static const struct gops_grmgr gp10b_ops_grmgr = {
.init_gr_manager = nvgpu_init_gr_manager,
};
static const struct gops_cic gp10b_ops_cic = {
.init = NULL,
.report_err = NULL,
};
int gp10b_init_hal(struct gk20a *g)
{
struct gpu_ops *gops = &g->ops;
@@ -1250,6 +1255,7 @@ int gp10b_init_hal(struct gk20a *g)
gops->tpc = gp10b_ops_tpc;
#endif
gops->grmgr = gp10b_ops_grmgr;
gops->cic = gp10b_ops_cic;
gops->chip_init_gpu_characteristics = gp10b_init_gpu_characteristics;
gops->get_litter_value = gp10b_get_litter_value;
gops->semaphore_wakeup = nvgpu_channel_semaphore_wakeup;

View File

@@ -188,6 +188,8 @@
#include "hal/fifo/channel_gm20b.h"
#include "hal/fifo/channel_gv11b.h"
#include "hal/cic/cic_gv11b.h"
#ifdef CONFIG_NVGPU_TPC_POWERGATE
#include "hal/tpc/tpc_gv11b.h"
#endif
@@ -209,6 +211,7 @@
#include <nvgpu/gr/gr_intr.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/grmgr.h>
#include <nvgpu/cic.h>
#include <nvgpu/hw/gv11b/hw_pwr_gv11b.h>
@@ -1421,6 +1424,11 @@ static const struct gops_grmgr gv11b_ops_grmgr = {
.init_gr_manager = nvgpu_init_gr_manager,
};
static const struct gops_cic gv11b_ops_cic = {
.init = gv11b_cic_init,
.report_err = nvgpu_cic_report_err_safety_services,
};
int gv11b_init_hal(struct gk20a *g)
{
struct gpu_ops *gops = &g->ops;
@@ -1516,6 +1524,7 @@ int gv11b_init_hal(struct gk20a *g)
gops->tpc = gv11b_ops_tpc;
#endif
gops->grmgr = gv11b_ops_grmgr;
gops->cic = gv11b_ops_cic;
gops->chip_init_gpu_characteristics = gv11b_init_gpu_characteristics;
gops->get_litter_value = gv11b_get_litter_value;
gops->semaphore_wakeup = nvgpu_channel_semaphore_wakeup;

View File

@@ -1612,6 +1612,11 @@ static const struct gops_grmgr tu104_ops_grmgr = {
};
#endif
static const struct gops_cic tu104_ops_cic = {
.init = NULL,
.report_err = NULL,
};
int tu104_init_hal(struct gk20a *g)
{
struct gpu_ops *gops = &g->ops;
@@ -1720,6 +1725,7 @@ int tu104_init_hal(struct gk20a *g)
gops->gsp = tu104_ops_gsp;
gops->top = tu104_ops_top;
gops->grmgr = tu104_ops_grmgr;
gops->cic = tu104_ops_cic;
gops->chip_init_gpu_characteristics = tu104_init_gpu_characteristics;
gops->get_litter_value = tu104_get_litter_value;
gops->semaphore_wakeup = nvgpu_channel_semaphore_wakeup;

View File

@@ -0,0 +1,206 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_CIC_H
#define NVGPU_CIC_H
#include <nvgpu/log.h>
struct nvgpu_err_desc;
/**
* @file
*
* Public structs and APIs exposed by Central Interrupt Controller
* (CIC) unit.
*/
/*
* Requires a string literal for the format - notice the string
* concatination.
*/
#define cic_dbg(g, fmt, args...) \
nvgpu_log((g), gpu_dbg_cic, "CIC | " fmt, ##args)
/**
* @brief Initialize the CIC unit's data structures
*
* @param g [in] - The GPU driver struct.
*
* - Check if CIC unit is already initialized by checking its
* reference in struct gk20a.
* - If not initialized, allocate memory for CIC's private data
* structure.
* - Initialize the members of this private structure.
* - Store a reference pointer to the CIC struct in struct gk20a.
*
* @return 0 if Initialization had already happened or was
* successful in this call.
* < 0 if any steps in initialization fail.
*
* @retval -ENOMEM if sufficient memory is not available for CIC
* struct.
*
*/
int nvgpu_cic_init_common(struct gk20a *g);
/**
* @brief De-initialize the CIC unit's data structures
*
* @param g [in] - The GPU driver struct.
*
* - Check if CIC unit is already deinitialized by checking its
* reference in struct gk20a.
* - If not deinitialized, set the LUT pointer to NULL and set the
* num_hw_modules to 0.
* - Free the memory allocated for CIC's private data structure.
* - Invalidate reference pointer to the CIC struct in struct gk20a.
*
* @return 0 if Deinitialization had already happened or was
* successful in this call.
*
* @retval None.
*/
int nvgpu_cic_deinit_common(struct gk20a *g);
/**
* @brief Check if the input HW unit ID is valid CIC HW unit.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit_id [in] - HW unit ID to be verified
*
* - Check if the CIC unit is initialized so that the LUT is
* available to verify the hw_unit_id.
* - LUT is an array of nvgpu_err_hw_module struct which contains the
* hw_unit_id for a specific unit.
* - The hw_unit_id starts from 0 and ends at
* (g->cic->num_hw_modules -1) and hence effectively can serve as
* index into the LUT array.
*
* @return 0 if input hw_unit_id is valid,
* < 0 if input hw_unit_id is invalid
* @retval -EINVAL if CIC is not initialized and
* if input hw_unit_id is invalid.
*/
int nvgpu_cic_check_hw_unit_id(struct gk20a *g, u32 hw_unit_id);
/**
* @brief Check if the input error ID is valid in CIC domain.
*
* @param g [in] - The GPU driver struct.
* @param hw_unit_id [in] - HW unit ID corresponding to err_id
* @param err_id [in] - Error ID to be verified
*
* - Check if the CIC unit is initialized so that the LUT is
* available to verify the hw_unit_id.
* - LUT is an array of nvgpu_err_hw_module struct which contains the
* hw_unit_id for a specific unit and also the number of errors
* reported by the unit.
* - The hw_unit_id starts from 0 and ends at
* (g->cic->num_hw_modules -1) and hence effectively can serve as
* index into the LUT array.
* - Before using the input hw_unit_id to index into LUT, verify that
* the hw_unit_id is valid.
* - Index using hw_unit_id and derive the num_errs from LUT for the
* given HW unit
* - Check if the input err_id lies between 0 and (num_errs-1).
*
* @return 0 if input err_id is valid, < 0 if input err_id is invalid
* @retval -EINVAL if CIC is not initialized and
* if input hw_unit_id or err_id is invalid.
*/
int nvgpu_cic_check_err_id(struct gk20a *g, u32 hw_unit_id,
u32 err_id);
/**
* @brief Get the LUT data for input HW unit ID and error ID
*
* @param g [in] - The GPU driver struct.
* @param hw_unit_id [in] - HW unit ID corresponding to err_id
* @param err_id [in] - Error ID whose LUT data is required.
* @param err_desc [out] - Pointer to store LUT data into.
*
* - LUT is an array of nvgpu_err_hw_module struct which contains the
* all the static data for each HW unit reporting error to CIC.
* - nvgpu_err_hw_module struct is inturn an array of struct
* nvgpu_err_desc which stores static data per error ID.
* - Use the nvgpu_cic_check_err_id() API to
* - Check if the CIC unit is initialized so that the LUT is
* available to read the static data for input err_id.
* - Check if input HW unit ID and error ID are valid.
* - The hw_unit_id starts from 0 and ends at
* (g->cic->num_hw_modules -1) and hence effectively can serve as
* index into the LUT array.
* - The err_id starts from 0 and ends at
* [lut[hw_unit_id].num_err) - 1], and hence effectively can serve
* as index into array of errs[].
* - Index using hw_unit_id and err_id and store the LUT data into
*
* @return 0 if err_desc was successfully filled with LUT data,
* < 0 otherwise.
* @retval -EINVAL if CIC is not initialized and
* if input hw_unit_id or err_id is invalid.
*/
int nvgpu_cic_get_err_desc(struct gk20a *g, u32 hw_unit_id,
u32 err_id, struct nvgpu_err_desc **err_desc);
/**
* @brief GPU HW errors are reported to Safety_Services via SDL unit.
* This function provides an interface between error reporting functions
* used by sub-units in nvgpu-rm and SDL unit.
*
* @param g [in] - The GPU driver struct.
* @param err_info [in] - Error message.
* @param err_size [in] - Size of the error message.
* @param is_critical [in] - Criticality of the error being reported.
*
* On QNX:
* - Checks whether SDL is initialized.
* - Enqueues \a err_info into error message queue.
* - Signals the workqueue condition variable.
* - If the reported error is critical, invokes #nvgpu_sw_quiesce() api.
*
* on Linux:
* - NOP currently as safety services are absent in Linux
*
* @return 0 in case of success, <0 in case of failure.
* @retval -EAGAIN if SDL not initialized.
* @retval -ENOMEM if sufficient memory is not available.
*/
int nvgpu_cic_report_err_safety_services(struct gk20a *g,
void *err_info, size_t err_size, bool is_critical);
/**
* @brief Get the number of HW modules supported by CIC.
*
* @param g [in] - The GPU driver struct.
*
* - Check if the CIC unit is initialized so that num_hw_modules is
* initialized.
* - Return the num_hw_modules variable stored in CIC's private
* struct.
*
* @return 0 or >0 value of num_hw_modules if successful;
* < 0 otherwise.
* @retval -EINVAL if CIC is not initialized.
*/
int nvgpu_cic_get_num_hw_modules(struct gk20a *g);
#endif /* NVGPU_CIC_H */

View File

@@ -107,6 +107,7 @@ struct nvgpu_gpfifo_entry;
struct vm_gk20a_mapping_batch;
struct pmu_pg_stats_data;
struct clk_domains_mon_status_params;
struct nvgpu_cic;
enum nvgpu_flush_op;
enum gk20a_mem_rw_flag;
@@ -791,6 +792,8 @@ struct gk20a {
/** Multi Instance GPU information. */
struct nvgpu_mig mig;
/** Pointer to struct storing CIC unit's data */
struct nvgpu_cic *cic;
};
/**

View File

@@ -0,0 +1,67 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GOPS_CIC_H
#define NVGPU_GOPS_CIC_H
#include <nvgpu/types.h>
/**
* @file
*
* Central Interrupt Controller unit HAL interface
*
*/
struct gk20a;
struct nvgpu_cic;
/**
* CIC unit HAL operations
*
* @see gpu_ops
*/
struct gops_cic {
/**
* @brief Chip specific CIC unit initialization.
*
* @param g [in] Pointer to GPU driver struct.
* @param cic [in] Pointer to CIC private struct.
*
* @return 0 in case of success, < 0 in case of failure.
*/
int (*init)(struct gk20a *g, struct nvgpu_cic *cic);
/**
* @brief Report error to safety services.
*
* @param g [in] Pointer to GPU driver struct.
* @param err_pkt [in] Pointer to struct holding err details.
* @param err_size [in] Size of err_pkt.
* @param is_critical [in] Flag indicating criticality of error.
*
* @return 0 in case of success, < 0 in case of failure.
*/
int (*report_err)(struct gk20a *g,
void *err_pkt, size_t err_size,
bool is_critical);
};
#endif/*NVGPU_GOPS_CIC_H*/

View File

@@ -71,6 +71,7 @@
#include <nvgpu/gops/pmu.h>
#include <nvgpu/gops/ecc.h>
#include <nvgpu/gops/grmgr.h>
#include <nvgpu/gops/cic.h>
struct gk20a;
struct nvgpu_debug_context;
@@ -224,6 +225,7 @@ struct gpu_ops {
struct gops_grmgr grmgr;
struct gops_cic cic;
};
#endif /* NVGPU_GOPS_OPS_H */

View File

@@ -0,0 +1,26 @@
/*
* Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/cic.h>
#include <nvgpu/types.h>
struct gk20a;
int nvgpu_cic_report_err_safety_services(struct gk20a *g,
void *err_info, size_t err_size, bool is_critical)
{
return 0;
}

View File

@@ -1,69 +0,0 @@
/*
* Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/nvgpu_err.h>
struct gk20a;
struct mmu_fault_info;
void nvgpu_report_host_err(struct gk20a *g, u32 hw_unit,
u32 inst, u32 err_id, u32 intr_info)
{
return;
}
void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst,
u32 err_id, u64 err_addr, u64 err_count)
{
return;
}
void nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst,
u32 err_id, struct gr_err_info *err_info, u32 sub_err_type)
{
return;
}
void nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id,
u32 sub_err_type, u32 status)
{
return;
}
void nvgpu_report_ce_err(struct gk20a *g, u32 hw_unit,
u32 inst, u32 err_id, u32 intr_info)
{
return;
}
void nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst,
u32 err_id, u32 err_addr, u32 err_code)
{
return;
}
void nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id,
void *data)
{
return;
}
void nvgpu_report_mmu_err(struct gk20a *g, u32 hw_unit,
u32 err_id, struct mmu_fault_info *fault_info,
u32 status, u32 sub_err_type)
{
return;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,10 +27,10 @@
#include <nvgpu/ecc.h>
#include <nvgpu/debugger.h>
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/cic.h>
#include <nvgpu/types.h>
struct gk20a;
struct mmu_fault_info;
#ifdef CONFIG_NVGPU_DEBUGGER
void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s)
@@ -49,51 +49,8 @@ void nvgpu_ecc_sysfs_remove(struct gk20a *g)
}
#endif
void nvgpu_report_host_err(struct gk20a *g, u32 hw_unit,
u32 inst, u32 err_id, u32 intr_info)
int nvgpu_cic_report_err_safety_services(struct gk20a *g,
void *err_info, size_t err_size, bool is_critical)
{
return;
}
void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst,
u32 err_id, u64 err_addr, u64 err_count)
{
return;
}
void nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst,
u32 err_id, struct gr_err_info *err_info, u32 sub_err_type)
{
return;
}
void nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id,
u32 sub_err_type, u32 status)
{
return;
}
void nvgpu_report_ce_err(struct gk20a *g, u32 hw_unit,
u32 inst, u32 err_id, u32 intr_info)
{
return;
}
void nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst,
u32 err_id, u32 err_addr, u32 err_code)
{
return;
}
void nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id,
void *data)
{
return;
}
void nvgpu_report_mmu_err(struct gk20a *g, u32 hw_unit,
u32 err_id, struct mmu_fault_info *fault_info,
u32 status, u32 sub_err_type)
{
return;
return 0;
}

View File

@@ -795,3 +795,10 @@ nvgpu_init_pramin
gk20a_bus_set_bar0_window
nvgpu_pramin_ops_init
nvgpu_dma_alloc_vid_at
nvgpu_cic_init_common
nvgpu_cic_deinit_common
nvgpu_cic_check_hw_unit_id
nvgpu_cic_check_err_id
nvgpu_cic_get_err_desc
nvgpu_cic_report_err_safety_services
nvgpu_cic_get_num_hw_modules

View File

@@ -248,6 +248,7 @@ gv11b_blcg_hshub_get_gating_prod
gv11b_netlist_is_firmware_defined
gv11b_top_get_num_lce
gv11b_bus_configure_debug_bus
gv11b_cic_init
mc_gp10b_intr_stall_unit_config
mc_gp10b_intr_nonstall_unit_config
nvgpu_acr_bootstrap_hs_acr
@@ -809,3 +810,10 @@ nvgpu_rc_tsg_and_related_engines
nvgpu_rc_mmu_fault
gp10b_priv_ring_isr_handle_0
gp10b_priv_ring_isr_handle_1
nvgpu_cic_init_common
nvgpu_cic_deinit_common
nvgpu_cic_check_hw_unit_id
nvgpu_cic_check_err_id
nvgpu_cic_get_err_desc
nvgpu_cic_report_err_safety_services
nvgpu_cic_get_num_hw_modules

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -35,6 +35,7 @@
#include <nvgpu/firmware.h>
#include <nvgpu/netlist.h>
#include <nvgpu/fuse.h>
#include <nvgpu/cic.h>
#include <nvgpu/gr/gr.h>
@@ -215,6 +216,11 @@ static int init_acr_falcon_test_env(struct unit_module *m, struct gk20a *g)
return -ENODEV;
}
err = nvgpu_cic_init_common(g);
if (err != 0) {
unit_return_fail(m, "CIC init failed\n");
}
/*
* Register space: FB_MMU
*/

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,6 +25,7 @@
#include <nvgpu/posix/io.h>
#include <unit/core.h>
#include <nvgpu/io.h>
#include <nvgpu/cic.h>
#include <os/posix/os_posix.h>
#include <nvgpu/posix/posix-fault-injection.h>
@@ -35,6 +36,7 @@
#include <hal/bus/bus_gm20b.h>
#include <hal/bus/bus_gp10b.h>
#include <hal/bus/bus_gv11b.h>
#include <hal/cic/cic_gv11b.h>
#include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
#include <nvgpu/hw/gv11b/hw_bus_gv11b.h>
@@ -128,6 +130,8 @@ int test_bus_setup(struct unit_module *m, struct gk20a *g, void *args)
g->ops.mc.intr_nonstall_unit_config =
mc_gp10b_intr_nonstall_unit_config;
g->ops.ptimer.isr = gk20a_ptimer_isr;
g->ops.cic.init = gv11b_cic_init;
g->ops.cic.report_err = nvgpu_cic_report_err_safety_services;
/* Map register space NV_PRIV_MASTER */
if (nvgpu_posix_io_add_reg_space(g, NV_PBUS_START, NV_PBUS_SIZE) != 0) {
@@ -154,6 +158,12 @@ int test_bus_setup(struct unit_module *m, struct gk20a *g, void *args)
(void)nvgpu_posix_register_io(g, &test_reg_callbacks);
if (nvgpu_cic_init_common(g) != 0) {
unit_err(m, "%s: Failed to initialize CIC\n",
__func__);
return UNIT_FAIL;
}
return UNIT_SUCCESS;
}

View File

@@ -27,8 +27,10 @@
#include <nvgpu/gk20a.h>
#include <nvgpu/device.h>
#include <nvgpu/ce.h>
#include <nvgpu/cic.h>
#include <hal/ce/ce_gp10b.h>
#include <hal/ce/ce_gv11b.h>
#include <hal/cic/cic_gv11b.h>
#include <nvgpu/hw/gv11b/hw_ce_gv11b.h>
#include "nvgpu-ce.h"
@@ -126,6 +128,15 @@ int test_ce_setup_env(struct unit_module *m,
g->blcg_enabled = false;
nvgpu_spinlock_init(&g->mc.intr_lock);
g->ops.cic.init = gv11b_cic_init;
g->ops.cic.report_err = nvgpu_cic_report_err_safety_services;
if (nvgpu_cic_init_common(g) != 0) {
unit_err(m, "%s: failed to initialize CIC\n",
__func__);
return UNIT_FAIL;
}
return UNIT_SUCCESS;
}

View File

@@ -27,6 +27,7 @@
#include <nvgpu/gk20a.h>
#include <nvgpu/io.h>
#include <nvgpu/cic.h>
#include <nvgpu/nvgpu_init.h>
#include "hal/mc/mc_gp10b.h"
#include "hal/fb/fb_gm20b.h"
@@ -34,6 +35,7 @@
#include "hal/fb/ecc/fb_ecc_gv11b.h"
#include "hal/fb/intr/fb_intr_gv11b.h"
#include "hal/fb/intr/fb_intr_ecc_gv11b.h"
#include "hal/cic/cic_gv11b.h"
#include <nvgpu/hw/gv11b/hw_fb_gv11b.h>
#include "fb_fusa.h"
@@ -62,6 +64,8 @@ int fb_gv11b_init_test(struct unit_module *m, struct gk20a *g, void *args)
g->ops.mc.intr_nonstall_unit_config =
mc_gp10b_intr_nonstall_unit_config;
g->ops.fb.intr.enable = gv11b_fb_intr_enable;
g->ops.cic.init = gv11b_cic_init;
g->ops.cic.report_err = nvgpu_cic_report_err_safety_services;
/*
* Define some arbitrary addresses for test purposes.
@@ -74,6 +78,10 @@ int fb_gv11b_init_test(struct unit_module *m, struct gk20a *g, void *args)
g->mm.mmu_rd_mem.cpu_va = (void *) 0x30000000;
g->mm.mmu_rd_mem.aperture = APERTURE_SYSMEM;
if (nvgpu_cic_init_common(g) != 0) {
unit_return_fail(m, "CIC init failed\n");
}
g->ops.ecc.ecc_init_support(g);
nvgpu_writel(g, fb_niso_intr_en_set_r(0), 0);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -32,6 +32,7 @@
#include <nvgpu/fifo/userd.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/device.h>
#include <nvgpu/cic.h>
#include <nvgpu/posix/io.h>
@@ -187,6 +188,12 @@ int test_fifo_init_support(struct unit_module *m, struct gk20a *g, void *args)
/* Do not allocate from vidmem */
nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY, true);
err = nvgpu_cic_init_common(g);
if (err != 0) {
unit_err(m, "CIC init failed!\n");
return UNIT_FAIL;
}
return UNIT_SUCCESS;
fail:

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -33,10 +33,12 @@
#include <nvgpu/netlist.h>
#include <nvgpu/gr/gr.h>
#include <nvgpu/gr/gr_falcon.h>
#include <nvgpu/cic.h>
#include "common/gr/gr_falcon_priv.h"
#include "hal/init/hal_gv11b.h"
#include "hal/cic/cic_gv11b.h"
#include "nvgpu-gr.h"
#include "nvgpu-gr-gv11b.h"
@@ -162,6 +164,14 @@ int test_gr_init_setup_ready(struct unit_module *m,
nvgpu_device_init(g);
nvgpu_fifo_setup_sw(g);
g->ops.cic.init = gv11b_cic_init;
g->ops.cic.report_err = nvgpu_cic_report_err_safety_services;
err = nvgpu_cic_init_common(g);
if (err != 0) {
unit_return_fail(m, "CIC init failed\n");
}
/* Allocate and Initialize GR */
err = test_gr_init_setup(m, g, args);
if (err != 0) {

View File

@@ -37,6 +37,7 @@
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/ecc.h>
#include <nvgpu/netlist.h>
#include <nvgpu/cic.h>
#include <nvgpu/gr/gr.h>
#include <hal/ltc/intr/ltc_intr_gv11b.h>
#include <nvgpu/vm.h>
@@ -136,6 +137,11 @@ int test_ltc_init_support(struct unit_module *m,
unit_return_fail(m, "nvgpu_init_hal failed\n");
}
err = nvgpu_cic_init_common(g);
if (err != 0) {
unit_return_fail(m, "CIC init failed\n");
}
/*
* Init dependent ECC unit
*/

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -37,6 +37,7 @@
#include <nvgpu/tsg.h>
#include <nvgpu/engines.h>
#include <nvgpu/preempt.h>
#include <nvgpu/cic.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/hw/gv11b/hw_fb_gv11b.h>
#include <nvgpu/hw/gv11b/hw_gmmu_gv11b.h>
@@ -60,6 +61,7 @@
#include "hal/mm/gmmu/gmmu_gv11b.h"
#include "hal/mm/mm_gp10b.h"
#include "hal/mm/mm_gv11b.h"
#include "hal/cic/cic_gv11b.h"
#include "hal/mm/mmu_fault/mmu_fault_gv11b.h"
#include "mmu-fault-gv11b-fusa.h"
@@ -220,6 +222,13 @@ int test_env_init_mm_mmu_fault_gv11b_fusa(struct unit_module *m,
unit_return_fail(m, "nvgpu_init_mm_support failed\n");
}
g->ops.cic.init = gv11b_cic_init;
g->ops.cic.report_err = nvgpu_cic_report_err_safety_services;
if (nvgpu_cic_init_common(g) != 0) {
unit_return_fail(m, "Failed to initialize CIC\n");
}
return UNIT_SUCCESS;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -23,12 +23,14 @@
#include <unit/unit.h>
#include <unit/io.h>
#include <nvgpu/posix/io.h>
#include <nvgpu/cic.h>
#include <nvgpu/gk20a.h>
#include <hal/priv_ring/priv_ring_gm20b.h>
#include <hal/priv_ring/priv_ring_gp10b.h>
#include <hal/init/hal_gv11b_litter.h>
#include <hal/mc/mc_gp10b.h>
#include "hal/cic/cic_gv11b.h"
#include <nvgpu/hw/gv11b/hw_pri_ringstation_sys_gv11b.h>
#include <nvgpu/hw/gv11b/hw_pri_ringstation_gpc_gv11b.h>
@@ -123,6 +125,8 @@ int test_priv_ring_setup(struct unit_module *m, struct gk20a *g, void *args)
g->ops.get_litter_value = gv11b_get_litter_value;
g->ops.mc.intr_stall_unit_config =
mc_gp10b_intr_stall_unit_config;
g->ops.cic.init = gv11b_cic_init;
g->ops.cic.report_err = nvgpu_cic_report_err_safety_services;
/* Map register space NV_PRIV_MASTER */
if (nvgpu_posix_io_add_reg_space(g, NV_PRIV_MASTER_START,
@@ -158,6 +162,12 @@ int test_priv_ring_setup(struct unit_module *m, struct gk20a *g, void *args)
(void)nvgpu_posix_register_io(g, &test_reg_callbacks);
if (nvgpu_cic_init_common(g) != 0) {
unit_err(m, "%s: Failed to initialize CIC\n",
__func__);
return UNIT_FAIL;
}
return UNIT_SUCCESS;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -26,7 +26,9 @@
#include <nvgpu/gk20a.h>
#include <nvgpu/ptimer.h>
#include <nvgpu/cic.h>
#include <hal/ptimer/ptimer_gk20a.h>
#include <hal/cic/cic_gv11b.h>
#include <nvgpu/hw/gk20a/hw_timer_gk20a.h>
#include "nvgpu-ptimer.h"
@@ -88,6 +90,9 @@ int test_setup_env(struct unit_module *m,
g->ops.ptimer.read_ptimer = gk20a_read_ptimer;
g->ops.ptimer.isr = gk20a_ptimer_isr;
g->ops.cic.init = gv11b_cic_init;
g->ops.cic.report_err = nvgpu_cic_report_err_safety_services;
/* Create ptimer register space */
if (nvgpu_posix_io_add_reg_space(g, PTIMER_REG_SPACE_START,
PTIMER_REG_SPACE_SIZE) != 0) {
@@ -97,6 +102,12 @@ int test_setup_env(struct unit_module *m,
}
(void)nvgpu_posix_register_io(g, &test_reg_callbacks);
if (nvgpu_cic_init_common(g) != 0) {
unit_err(m, "%s: failed to initialize CIC\n",
__func__);
return UNIT_FAIL;
}
return UNIT_SUCCESS;
}