mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-25 02:52:51 +03:00
Open source GPL/LGPL release
This commit is contained in:
91
drivers/gpu/nvgpu/common/cic/ce_cic.c
Normal file
91
drivers/gpu/nvgpu/common/cic/ce_cic.c
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/nvgpu_err_info.h>
|
||||
#include <nvgpu/cic.h>
|
||||
|
||||
#include "cic_priv.h"
|
||||
|
||||
void nvgpu_report_ce_err(struct gk20a *g, u32 hw_unit,
|
||||
u32 inst, u32 err_id, u32 intr_info)
|
||||
{
|
||||
int err = 0;
|
||||
struct nvgpu_err_desc *err_desc = NULL;
|
||||
struct nvgpu_err_msg err_pkt;
|
||||
|
||||
if (g->ops.cic.report_err == NULL) {
|
||||
cic_dbg(g, "CIC does not support reporting error "
|
||||
"to safety services");
|
||||
return;
|
||||
}
|
||||
|
||||
if (hw_unit != NVGPU_ERR_MODULE_CE) {
|
||||
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
|
||||
err = -EINVAL;
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to get err_desc for "
|
||||
"err_id (%u) for hw module (%u)",
|
||||
err_id, hw_unit);
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
nvgpu_init_ce_err_msg(&err_pkt);
|
||||
err_pkt.hw_unit_id = hw_unit;
|
||||
err_pkt.err_id = err_desc->error_id;
|
||||
err_pkt.is_critical = err_desc->is_critical;
|
||||
err_pkt.err_info.ce_info.header.sub_unit_id = inst;
|
||||
err_pkt.err_desc = err_desc;
|
||||
/* sub_err_type can be decoded using intr_info by referring
|
||||
* to the interrupt status register definition corresponding
|
||||
* to the error that is being reported.
|
||||
*/
|
||||
err_pkt.err_info.ce_info.header.sub_err_type = intr_info;
|
||||
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
|
||||
sizeof(err_pkt.err_info.ce_info));
|
||||
|
||||
if (g->ops.cic.report_err != NULL) {
|
||||
err = g->ops.cic.report_err(g, (void *)&err_pkt,
|
||||
sizeof(err_pkt), err_desc->is_critical);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to report CE error: "
|
||||
"inst=%u err_id=%u intr_info=%u",
|
||||
inst, err_id, intr_info);
|
||||
}
|
||||
}
|
||||
handle_report_failure:
|
||||
if (err != 0) {
|
||||
nvgpu_sw_quiesce(g);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_inject_ce_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 sub_err_type)
|
||||
{
|
||||
nvgpu_report_ce_err(g, hw_unit, 0U, err_index, sub_err_type);
|
||||
}
|
||||
161
drivers/gpu/nvgpu/common/cic/cic.c
Normal file
161
drivers/gpu/nvgpu/common/cic/cic.c
Normal file
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/cic.h>
|
||||
#include <nvgpu/nvgpu_err_info.h>
|
||||
|
||||
#include "cic_priv.h"
|
||||
|
||||
int nvgpu_cic_init_common(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_cic *cic;
|
||||
int err = 0;
|
||||
|
||||
if (g->cic != NULL) {
|
||||
cic_dbg(g, "CIC unit already initialized");
|
||||
return 0;
|
||||
}
|
||||
|
||||
cic = nvgpu_kzalloc(g, sizeof(*cic));
|
||||
if (cic == NULL) {
|
||||
nvgpu_err(g, "Failed to allocate memory "
|
||||
"for struct nvgpu_cic");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (g->ops.cic.init != NULL) {
|
||||
err = g->ops.cic.init(g, cic);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "CIC chip specific "
|
||||
"initialization failed.");
|
||||
goto cleanup;
|
||||
}
|
||||
} else {
|
||||
cic->err_lut = NULL;
|
||||
cic->num_hw_modules = 0;
|
||||
}
|
||||
|
||||
g->cic = cic;
|
||||
cic_dbg(g, "CIC unit initialization done.");
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
if (cic != NULL) {
|
||||
nvgpu_kfree(g, cic);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_cic_deinit_common(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_cic *cic;
|
||||
|
||||
cic = g->cic;
|
||||
|
||||
if (cic == NULL) {
|
||||
cic_dbg(g, "CIC unit already deinitialized");
|
||||
return 0;
|
||||
}
|
||||
|
||||
cic->err_lut = NULL;
|
||||
cic->num_hw_modules = 0;
|
||||
|
||||
nvgpu_kfree(g, cic);
|
||||
g->cic = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_cic_check_hw_unit_id(struct gk20a *g, u32 hw_unit_id)
|
||||
{
|
||||
if (g->cic == NULL) {
|
||||
nvgpu_err(g, "CIC is not initialized");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (g->cic->num_hw_modules == 0U) {
|
||||
cic_dbg(g, "LUT not initialized.");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (hw_unit_id >= g->cic->num_hw_modules) {
|
||||
cic_dbg(g, "Invalid input HW unit ID.");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_cic_check_err_id(struct gk20a *g, u32 hw_unit_id,
|
||||
u32 err_id)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if ((g->cic == NULL) || (g->cic->err_lut == NULL)) {
|
||||
cic_dbg(g, "CIC/LUT not initialized.");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = nvgpu_cic_check_hw_unit_id(g, hw_unit_id);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
if (err_id >= g->cic->err_lut[hw_unit_id].num_errs) {
|
||||
err = -EINVAL;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_cic_get_err_desc(struct gk20a *g, u32 hw_unit_id,
|
||||
u32 err_id, struct nvgpu_err_desc **err_desc)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
/* if (g->cic != NULL) and (g->cic->err_lut != NULL) check
|
||||
* can be skipped here as it checked as part of
|
||||
* nvgpu_cic_check_err_id() called below.
|
||||
*/
|
||||
|
||||
err = nvgpu_cic_check_err_id(g, hw_unit_id, err_id);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
*err_desc = &(g->cic->err_lut[hw_unit_id].errs[err_id]);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_cic_get_num_hw_modules(struct gk20a *g)
|
||||
{
|
||||
if (g->cic == NULL) {
|
||||
nvgpu_err(g, "CIC is not initialized");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return g->cic->num_hw_modules;
|
||||
}
|
||||
251
drivers/gpu/nvgpu/common/cic/cic_intr.c
Normal file
251
drivers/gpu/nvgpu/common/cic/cic_intr.c
Normal file
@@ -0,0 +1,251 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/cic.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/trace.h>
|
||||
|
||||
int nvgpu_cic_wait_for_stall_interrupts(struct gk20a *g, u32 timeout)
|
||||
{
|
||||
/* wait until all stalling irqs are handled */
|
||||
return NVGPU_COND_WAIT(&g->mc.sw_irq_stall_last_handled_cond,
|
||||
nvgpu_atomic_read(&g->mc.sw_irq_stall_pending) == 0,
|
||||
timeout);
|
||||
}
|
||||
|
||||
int nvgpu_cic_wait_for_nonstall_interrupts(struct gk20a *g, u32 timeout)
|
||||
{
|
||||
/* wait until all non-stalling irqs are handled */
|
||||
return NVGPU_COND_WAIT(&g->mc.sw_irq_nonstall_last_handled_cond,
|
||||
nvgpu_atomic_read(&g->mc.sw_irq_nonstall_pending) == 0,
|
||||
timeout);
|
||||
}
|
||||
|
||||
void nvgpu_cic_wait_for_deferred_interrupts(struct gk20a *g)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = nvgpu_cic_wait_for_stall_interrupts(g, 0U);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "wait for stall interrupts failed %d", ret);
|
||||
}
|
||||
|
||||
ret = nvgpu_cic_wait_for_nonstall_interrupts(g, 0U);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "wait for nonstall interrupts failed %d", ret);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_cic_intr_mask(struct gk20a *g)
|
||||
{
|
||||
unsigned long flags = 0;
|
||||
|
||||
if (g->ops.mc.intr_mask != NULL) {
|
||||
nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
|
||||
g->ops.mc.intr_mask(g);
|
||||
nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
void nvgpu_cic_log_pending_intrs(struct gk20a *g)
|
||||
{
|
||||
if (g->ops.mc.log_pending_intrs != NULL) {
|
||||
g->ops.mc.log_pending_intrs(g);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_cic_intr_enable(struct gk20a *g)
|
||||
{
|
||||
unsigned long flags = 0;
|
||||
|
||||
if (g->ops.mc.intr_enable != NULL) {
|
||||
nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
|
||||
g->ops.mc.intr_enable(g);
|
||||
nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void nvgpu_cic_intr_stall_unit_config(struct gk20a *g, u32 unit, bool enable)
|
||||
{
|
||||
unsigned long flags = 0;
|
||||
|
||||
nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
|
||||
g->ops.mc.intr_stall_unit_config(g, unit, enable);
|
||||
nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
|
||||
}
|
||||
|
||||
void nvgpu_cic_intr_nonstall_unit_config(struct gk20a *g, u32 unit, bool enable)
|
||||
{
|
||||
unsigned long flags = 0;
|
||||
|
||||
nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
|
||||
g->ops.mc.intr_nonstall_unit_config(g, unit, enable);
|
||||
nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
|
||||
}
|
||||
|
||||
void nvgpu_cic_intr_stall_pause(struct gk20a *g)
|
||||
{
|
||||
unsigned long flags = 0;
|
||||
|
||||
nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
|
||||
g->ops.mc.intr_stall_pause(g);
|
||||
nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
|
||||
}
|
||||
|
||||
void nvgpu_cic_intr_stall_resume(struct gk20a *g)
|
||||
{
|
||||
unsigned long flags = 0;
|
||||
|
||||
nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
|
||||
g->ops.mc.intr_stall_resume(g);
|
||||
nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
|
||||
}
|
||||
|
||||
void nvgpu_cic_intr_nonstall_pause(struct gk20a *g)
|
||||
{
|
||||
unsigned long flags = 0;
|
||||
|
||||
nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
|
||||
g->ops.mc.intr_nonstall_pause(g);
|
||||
nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
|
||||
}
|
||||
|
||||
void nvgpu_cic_intr_nonstall_resume(struct gk20a *g)
|
||||
{
|
||||
unsigned long flags = 0;
|
||||
|
||||
nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
|
||||
g->ops.mc.intr_nonstall_resume(g);
|
||||
nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
|
||||
}
|
||||
|
||||
static void nvgpu_cic_intr_nonstall_work(struct gk20a *g, u32 work_ops)
|
||||
{
|
||||
bool semaphore_wakeup, post_events;
|
||||
|
||||
semaphore_wakeup =
|
||||
(((work_ops & NVGPU_CIC_NONSTALL_OPS_WAKEUP_SEMAPHORE) != 0U) ?
|
||||
true : false);
|
||||
post_events = (((work_ops & NVGPU_CIC_NONSTALL_OPS_POST_EVENTS) != 0U) ?
|
||||
true : false);
|
||||
|
||||
if (semaphore_wakeup) {
|
||||
g->ops.semaphore_wakeup(g, post_events);
|
||||
}
|
||||
}
|
||||
|
||||
u32 nvgpu_cic_intr_nonstall_isr(struct gk20a *g)
|
||||
{
|
||||
u32 non_stall_intr_val = 0U;
|
||||
|
||||
if (nvgpu_is_powered_off(g)) {
|
||||
return NVGPU_CIC_INTR_UNMASK;
|
||||
}
|
||||
|
||||
/* not from gpu when sharing irq with others */
|
||||
non_stall_intr_val = g->ops.mc.intr_nonstall(g);
|
||||
if (non_stall_intr_val == 0U) {
|
||||
return NVGPU_CIC_INTR_NONE;
|
||||
}
|
||||
|
||||
nvgpu_cic_intr_nonstall_pause(g);
|
||||
if (g->sw_quiesce_pending) {
|
||||
return NVGPU_CIC_INTR_QUIESCE_PENDING;
|
||||
}
|
||||
|
||||
nvgpu_atomic_set(&g->mc.sw_irq_nonstall_pending, 1);
|
||||
return NVGPU_CIC_INTR_HANDLE;
|
||||
}
|
||||
|
||||
void nvgpu_cic_intr_nonstall_handle(struct gk20a *g)
|
||||
{
|
||||
int err;
|
||||
u32 nonstall_ops = 0;
|
||||
|
||||
nonstall_ops = g->ops.mc.isr_nonstall(g);
|
||||
if (nonstall_ops != 0U) {
|
||||
nvgpu_cic_intr_nonstall_work(g, nonstall_ops);
|
||||
}
|
||||
|
||||
/* sync handled irq counter before re-enabling interrupts */
|
||||
nvgpu_atomic_set(&g->mc.sw_irq_nonstall_pending, 0);
|
||||
|
||||
nvgpu_cic_intr_nonstall_resume(g);
|
||||
|
||||
err = nvgpu_cond_broadcast(&g->mc.sw_irq_nonstall_last_handled_cond);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "nvgpu_cond_broadcast failed err=%d", err);
|
||||
}
|
||||
}
|
||||
|
||||
u32 nvgpu_cic_intr_stall_isr(struct gk20a *g)
|
||||
{
|
||||
u32 mc_intr_0 = 0U;
|
||||
|
||||
nvgpu_trace_intr_stall_start(g);
|
||||
|
||||
if (nvgpu_is_powered_off(g)) {
|
||||
return NVGPU_CIC_INTR_UNMASK;
|
||||
}
|
||||
|
||||
/* not from gpu when sharing irq with others */
|
||||
mc_intr_0 = g->ops.mc.intr_stall(g);
|
||||
if (mc_intr_0 == 0U) {
|
||||
return NVGPU_CIC_INTR_NONE;
|
||||
}
|
||||
|
||||
nvgpu_cic_intr_stall_pause(g);
|
||||
|
||||
if (g->sw_quiesce_pending) {
|
||||
return NVGPU_CIC_INTR_QUIESCE_PENDING;
|
||||
}
|
||||
|
||||
nvgpu_atomic_set(&g->mc.sw_irq_stall_pending, 1);
|
||||
|
||||
nvgpu_trace_intr_stall_done(g);
|
||||
|
||||
return NVGPU_CIC_INTR_HANDLE;
|
||||
}
|
||||
|
||||
void nvgpu_cic_intr_stall_handle(struct gk20a *g)
|
||||
{
|
||||
int err;
|
||||
|
||||
nvgpu_trace_intr_thread_stall_start(g);
|
||||
|
||||
g->ops.mc.isr_stall(g);
|
||||
|
||||
nvgpu_trace_intr_thread_stall_done(g);
|
||||
|
||||
/* sync handled irq counter before re-enabling interrupts */
|
||||
nvgpu_atomic_set(&g->mc.sw_irq_stall_pending, 0);
|
||||
nvgpu_cic_intr_stall_resume(g);
|
||||
|
||||
err = nvgpu_cond_broadcast(&g->mc.sw_irq_stall_last_handled_cond);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "nvgpu_cond_broadcast failed err=%d", err);
|
||||
}
|
||||
}
|
||||
291
drivers/gpu/nvgpu/common/cic/cic_priv.h
Normal file
291
drivers/gpu/nvgpu/common/cic/cic_priv.h
Normal file
@@ -0,0 +1,291 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CIC_PRIV_H
|
||||
#define CIC_PRIV_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
struct gk20a;
|
||||
struct nvgpu_err_hw_module;
|
||||
struct nvgpu_err_msg;
|
||||
struct gpu_err_header;
|
||||
|
||||
/*
|
||||
* @file
|
||||
*
|
||||
* Declare CIC's private structure to store error-policy LUT and
|
||||
* other data and ops needed during error reporting.
|
||||
*/
|
||||
|
||||
#define ERR_INJECT_TEST_PATTERN 0xA5
|
||||
|
||||
/*
|
||||
* This struct contains members related to error-policy look-up table,
|
||||
* number of units reporting errors.
|
||||
*/
|
||||
struct nvgpu_cic {
|
||||
/** Pointer for error look-up table. */
|
||||
struct nvgpu_err_hw_module *err_lut;
|
||||
|
||||
/** Total number of GPU HW modules considered in CIC. */
|
||||
u32 num_hw_modules;
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Inject ECC error.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* @param hw_unit [in] - Index of HW unit.
|
||||
* @param err_index [in] - Error index.
|
||||
* @param inst [in] - Instance ID.
|
||||
*
|
||||
* - Sets values for error address and error count.
|
||||
* - Invokes error reporting API with the required set of inputs.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_inject_ecc_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 inst);
|
||||
|
||||
/**
|
||||
* @brief Inject HOST error.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* @param hw_unit [in] - Index of HW unit.
|
||||
* @param err_index [in] - Error index.
|
||||
* @param sub_err_type [in] - Sub error type.
|
||||
*
|
||||
* - Invokes error reporting API with the required set of inputs.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_inject_host_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 sub_err_type);
|
||||
|
||||
/**
|
||||
* @brief Inject GR error.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* @param hw_unit [in] - Index of HW unit.
|
||||
* @param err_index [in] - Error index.
|
||||
* @param sub_err_type [in] - Sub error type.
|
||||
*
|
||||
* - Sets values for GR exception and SM machine check error information.
|
||||
* - Invokes error reporting API with the required set of inputs.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_inject_gr_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 sub_err_type);
|
||||
|
||||
/**
|
||||
* @brief Inject CE error.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* @param hw_unit [in] - Index of HW unit.
|
||||
* @param err_index [in] - Error index.
|
||||
* @param sub_err_type [in] - Sub error type.
|
||||
*
|
||||
* - Invokes error reporting API with the required set of inputs.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_inject_ce_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 sub_err_type);
|
||||
|
||||
/**
|
||||
* @brief Inject CE error.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* @param hw_unit [in] - Index of HW unit.
|
||||
* @param err_index [in] - Error index.
|
||||
* @param err_code [in] - Error code.
|
||||
*
|
||||
* - Invokes error reporting API with the required set of inputs.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_inject_pri_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 err_code);
|
||||
|
||||
/**
|
||||
* @brief Inject PMU error.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* @param hw_unit [in] - Index of HW unit.
|
||||
* @param err_index [in] - Error index.
|
||||
* @param sub_err_type [in] - Sub error type.
|
||||
*
|
||||
* - Sets values for error info.
|
||||
* - Invokes error reporting API with the required set of inputs.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_inject_pmu_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 sub_err_type);
|
||||
|
||||
/**
|
||||
* @brief Inject CTXSW error.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* @param hw_unit [in] - Index of HW unit.
|
||||
* @param err_index [in] - Error index.
|
||||
* @param inst [in] - Instance ID.
|
||||
*
|
||||
* - Sets values for error info.
|
||||
* - Invokes error reporting API with the required set of inputs.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_inject_ctxsw_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 inst);
|
||||
|
||||
/**
|
||||
* @brief Inject MMU error.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* @param hw_unit [in] - Index of HW unit.
|
||||
* @param err_index [in] - Error index.
|
||||
* @param sub_err_type [in] - Sub error type.
|
||||
*
|
||||
* - Sets values for mmu page fault info.
|
||||
* - Invokes error reporting API with the required set of inputs.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_inject_mmu_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 sub_err_type);
|
||||
|
||||
/**
|
||||
* @brief Initialize error message header.
|
||||
*
|
||||
* @param header [in] - Error message header.
|
||||
*
|
||||
* This is used to initialize error message header.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_init_err_msg_header(struct gpu_err_header *header);
|
||||
|
||||
/**
|
||||
* @brief Initialize error message.
|
||||
*
|
||||
* @param msg [in] - Error message.
|
||||
*
|
||||
* This is used to initialize error message that is common
|
||||
* for all HW units.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_init_err_msg(struct nvgpu_err_msg *msg);
|
||||
|
||||
/**
|
||||
* @brief Initialize error message for HOST unit.
|
||||
*
|
||||
* @param msg [in] - Error message.
|
||||
*
|
||||
* This is used to initialize error message that is specific to HOST unit.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_init_host_err_msg(struct nvgpu_err_msg *msg);
|
||||
|
||||
/**
|
||||
* @brief Initialize ECC error message.
|
||||
*
|
||||
* @param msg [in] - Error message.
|
||||
*
|
||||
* This is used to initialize error message that is specific to ECC errors.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_init_ecc_err_msg(struct nvgpu_err_msg *msg);
|
||||
|
||||
/**
|
||||
* @brief Initialize error message for PRI unit.
|
||||
*
|
||||
* @param msg [in] - Error message.
|
||||
*
|
||||
* This is used to initialize error message that is specific to PRI unit.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_init_pri_err_msg(struct nvgpu_err_msg *msg);
|
||||
|
||||
/**
|
||||
* @brief Initialize error message for CE unit.
|
||||
*
|
||||
* @param msg [in] - Error message.
|
||||
*
|
||||
* This is used to initialize error message that is specific to CE unit.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_init_ce_err_msg(struct nvgpu_err_msg *msg);
|
||||
|
||||
/**
|
||||
* @brief Initialize error message for PMU unit.
|
||||
*
|
||||
* @param msg [in] - Error message.
|
||||
*
|
||||
* This is used to initialize error message that is specific to PMU unit.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_init_pmu_err_msg(struct nvgpu_err_msg *msg);
|
||||
|
||||
/**
|
||||
* @brief Initialize error message for GR unit.
|
||||
*
|
||||
* @param msg [in] - Error message.
|
||||
*
|
||||
* This is used to initialize error message that is specific to GR unit.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_init_gr_err_msg(struct nvgpu_err_msg *msg);
|
||||
|
||||
/**
|
||||
* @brief Initialize error message for CTXSW.
|
||||
*
|
||||
* @param msg [in] - Error message.
|
||||
*
|
||||
* This is used to initialize error message that is specific to CTXSW.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_init_ctxsw_err_msg(struct nvgpu_err_msg *msg);
|
||||
|
||||
/**
|
||||
* @brief Initialize error message for MMU unit.
|
||||
*
|
||||
* @param msg [in] - Error message.
|
||||
*
|
||||
* This is used to initialize error message that is specific to MMU unit.
|
||||
*
|
||||
* @return None
|
||||
*/
|
||||
void nvgpu_init_mmu_err_msg(struct nvgpu_err_msg *msg);
|
||||
|
||||
#endif /* CIC_PRIV_H */
|
||||
97
drivers/gpu/nvgpu/common/cic/ctxsw_cic.c
Normal file
97
drivers/gpu/nvgpu/common/cic/ctxsw_cic.c
Normal file
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/nvgpu_err_info.h>
|
||||
#include <nvgpu/cic.h>
|
||||
|
||||
#include "cic_priv.h"
|
||||
|
||||
void nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id,
|
||||
void *data)
|
||||
{
|
||||
int err = 0;
|
||||
struct nvgpu_err_desc *err_desc = NULL;
|
||||
struct nvgpu_err_msg err_pkt;
|
||||
u32 inst = 0;
|
||||
struct ctxsw_err_info *err_info = (struct ctxsw_err_info *)data;
|
||||
|
||||
if (g->ops.cic.report_err == NULL) {
|
||||
cic_dbg(g, "CIC does not support reporting error "
|
||||
"to safety services");
|
||||
return;
|
||||
}
|
||||
|
||||
if (hw_unit != NVGPU_ERR_MODULE_FECS) {
|
||||
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
|
||||
err = -EINVAL;
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to get err_desc for"
|
||||
" err_id (%u) for hw module (%u)",
|
||||
err_id, hw_unit);
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
nvgpu_init_ctxsw_err_msg(&err_pkt);
|
||||
err_pkt.hw_unit_id = hw_unit;
|
||||
err_pkt.err_id = err_desc->error_id;
|
||||
err_pkt.is_critical = err_desc->is_critical;
|
||||
err_pkt.err_info.ctxsw_info.header.sub_unit_id = inst;
|
||||
err_pkt.err_info.ctxsw_info.curr_ctx = err_info->curr_ctx;
|
||||
err_pkt.err_info.ctxsw_info.chid = err_info->chid;
|
||||
err_pkt.err_info.ctxsw_info.ctxsw_status0 = err_info->ctxsw_status0;
|
||||
err_pkt.err_info.ctxsw_info.ctxsw_status1 = err_info->ctxsw_status1;
|
||||
err_pkt.err_info.ctxsw_info.mailbox_value = err_info->mailbox_value;
|
||||
err_pkt.err_desc = err_desc;
|
||||
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
|
||||
sizeof(err_pkt.err_info.ctxsw_info));
|
||||
|
||||
if (g->ops.cic.report_err != NULL) {
|
||||
err = g->ops.cic.report_err(g, (void *)&err_pkt,
|
||||
sizeof(err_pkt), err_desc->is_critical);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to report CTXSW error: "
|
||||
"err_id=%u, mailbox_val=%u",
|
||||
err_id, err_info->mailbox_value);
|
||||
}
|
||||
}
|
||||
handle_report_failure:
|
||||
if (err != 0) {
|
||||
nvgpu_sw_quiesce(g);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_inject_ctxsw_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 inst)
|
||||
{
|
||||
struct ctxsw_err_info err_info;
|
||||
|
||||
(void)memset(&err_info, ERR_INJECT_TEST_PATTERN, sizeof(err_info));
|
||||
|
||||
nvgpu_report_ctxsw_err(g, hw_unit, err_index, (void *)&err_info);
|
||||
}
|
||||
87
drivers/gpu/nvgpu/common/cic/ecc_cic.c
Normal file
87
drivers/gpu/nvgpu/common/cic/ecc_cic.c
Normal file
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/nvgpu_err_info.h>
|
||||
#include <nvgpu/cic.h>
|
||||
|
||||
#include "cic_priv.h"
|
||||
|
||||
void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst,
|
||||
u32 err_id, u64 err_addr, u64 err_count)
|
||||
{
|
||||
int err = 0;
|
||||
struct nvgpu_err_desc *err_desc = NULL;
|
||||
struct nvgpu_err_msg err_pkt;
|
||||
|
||||
if (g->ops.cic.report_err == NULL) {
|
||||
cic_dbg(g, "CIC does not support reporting error "
|
||||
"to safety services");
|
||||
return;
|
||||
}
|
||||
|
||||
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to get err_desc for "
|
||||
"err_id (%u) for hw module (%u)",
|
||||
err_id, hw_unit);
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
nvgpu_init_ecc_err_msg(&err_pkt);
|
||||
err_pkt.hw_unit_id = hw_unit;
|
||||
err_pkt.err_id = err_desc->error_id;
|
||||
err_pkt.is_critical = err_desc->is_critical;
|
||||
err_pkt.err_info.ecc_info.header.sub_unit_id = inst;
|
||||
err_pkt.err_info.ecc_info.header.address = err_addr;
|
||||
err_pkt.err_info.ecc_info.err_cnt = err_count;
|
||||
err_pkt.err_desc = err_desc;
|
||||
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
|
||||
sizeof(err_pkt.err_info.ecc_info));
|
||||
|
||||
if (g->ops.cic.report_err != NULL) {
|
||||
err = g->ops.cic.report_err(g, (void *)&err_pkt,
|
||||
sizeof(err_pkt), err_desc->is_critical);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to report ECC error: hw_unit=%u, inst=%u, "
|
||||
"err_id=%u, err_addr=%llu, err_count=%llu",
|
||||
hw_unit, inst, err_id, err_addr, err_count);
|
||||
}
|
||||
}
|
||||
handle_report_failure:
|
||||
if (err != 0) {
|
||||
nvgpu_sw_quiesce(g);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_inject_ecc_swerror(struct gk20a *g, u32 hw_unit, u32 err_index,
|
||||
u32 inst)
|
||||
{
|
||||
u64 err_addr, err_count;
|
||||
|
||||
err_addr = (u64)ERR_INJECT_TEST_PATTERN;
|
||||
err_count = (u64)ERR_INJECT_TEST_PATTERN;
|
||||
|
||||
nvgpu_report_ecc_err(g, hw_unit, inst, err_index, err_addr, err_count);
|
||||
}
|
||||
169
drivers/gpu/nvgpu/common/cic/gr_cic.c
Normal file
169
drivers/gpu/nvgpu/common/cic/gr_cic.c
Normal file
@@ -0,0 +1,169 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/nvgpu_err_info.h>
|
||||
#include <nvgpu/cic.h>
|
||||
|
||||
#include "cic_priv.h"
|
||||
|
||||
static void nvpgu_report_fill_err_info(u32 hw_unit,
|
||||
struct nvgpu_err_msg *err_pkt, struct gr_err_info *err_info)
|
||||
{
|
||||
if (hw_unit == NVGPU_ERR_MODULE_SM) {
|
||||
struct gr_sm_mcerr_info *info = err_info->sm_mcerr_info;
|
||||
|
||||
err_pkt->err_info.sm_info.warp_esr_pc =
|
||||
info->hww_warp_esr_pc;
|
||||
err_pkt->err_info.sm_info.warp_esr_status =
|
||||
info->hww_warp_esr_status;
|
||||
err_pkt->err_info.sm_info.curr_ctx =
|
||||
info->curr_ctx;
|
||||
err_pkt->err_info.sm_info.chid =
|
||||
info->chid;
|
||||
err_pkt->err_info.sm_info.tsgid =
|
||||
info->tsgid;
|
||||
err_pkt->err_info.sm_info.gpc =
|
||||
info->gpc;
|
||||
err_pkt->err_info.sm_info.tpc =
|
||||
info->tpc;
|
||||
err_pkt->err_info.sm_info.sm =
|
||||
info->sm;
|
||||
} else {
|
||||
struct gr_exception_info *info = err_info->exception_info;
|
||||
|
||||
err_pkt->err_info.gr_info.curr_ctx = info->curr_ctx;
|
||||
err_pkt->err_info.gr_info.chid = info->chid;
|
||||
err_pkt->err_info.gr_info.tsgid = info->tsgid;
|
||||
err_pkt->err_info.gr_info.status = info->status;
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst,
|
||||
u32 err_id, struct gr_err_info *err_info, u32 sub_err_type)
|
||||
{
|
||||
int err = 0;
|
||||
struct nvgpu_err_desc *err_desc = NULL;
|
||||
struct nvgpu_err_msg err_pkt;
|
||||
|
||||
if (g->ops.cic.report_err == NULL) {
|
||||
cic_dbg(g, "CIC does not support reporting error "
|
||||
"to safety services");
|
||||
return;
|
||||
}
|
||||
|
||||
if ((hw_unit != NVGPU_ERR_MODULE_SM) &&
|
||||
(hw_unit != NVGPU_ERR_MODULE_PGRAPH)) {
|
||||
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
|
||||
err = -EINVAL;
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to get err_desc for "
|
||||
"err_id (%u) for hw module (%u)",
|
||||
err_id, hw_unit);
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
nvgpu_init_gr_err_msg(&err_pkt);
|
||||
err_pkt.hw_unit_id = hw_unit;
|
||||
err_pkt.err_id = err_desc->error_id;
|
||||
err_pkt.is_critical = err_desc->is_critical;
|
||||
err_pkt.err_desc = err_desc;
|
||||
err_pkt.err_info.gr_info.header.sub_err_type = sub_err_type;
|
||||
err_pkt.err_info.gr_info.header.sub_unit_id = inst;
|
||||
nvpgu_report_fill_err_info(hw_unit, &err_pkt, err_info);
|
||||
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(sizeof(err_pkt.err_info));
|
||||
|
||||
if (g->ops.cic.report_err != NULL) {
|
||||
err = g->ops.cic.report_err(g, (void *)&err_pkt,
|
||||
sizeof(err_pkt), err_desc->is_critical);
|
||||
if (err != 0) {
|
||||
if (hw_unit == NVGPU_ERR_MODULE_SM) {
|
||||
nvgpu_err(g, "Failed to report SM exception"
|
||||
"gpc=%u, tpc=%u, sm=%u, esr_status=%x",
|
||||
err_pkt.err_info.sm_info.gpc,
|
||||
err_pkt.err_info.sm_info.tpc,
|
||||
err_pkt.err_info.sm_info.sm,
|
||||
err_pkt.err_info.sm_info.warp_esr_status);
|
||||
}
|
||||
if (hw_unit == NVGPU_ERR_MODULE_PGRAPH) {
|
||||
nvgpu_err(g, "Failed to report PGRAPH"
|
||||
"exception: inst=%u, err_id=%u, "
|
||||
"status=%u", inst, err_id,
|
||||
err_pkt.err_info.gr_info.status);
|
||||
}
|
||||
}
|
||||
}
|
||||
handle_report_failure:
|
||||
if (err != 0) {
|
||||
nvgpu_sw_quiesce(g);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_inject_gr_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 sub_err_type)
|
||||
{
|
||||
struct gr_err_info err_info;
|
||||
struct gr_exception_info gr_error_info;
|
||||
struct gr_sm_mcerr_info sm_error_info;
|
||||
int err = 0;
|
||||
u32 inst = 0U;
|
||||
|
||||
/*
|
||||
* Fill fixed test pattern data for the error message
|
||||
* payload.
|
||||
*/
|
||||
(void)memset(&gr_error_info, ERR_INJECT_TEST_PATTERN, sizeof(gr_error_info));
|
||||
(void)memset(&sm_error_info, ERR_INJECT_TEST_PATTERN, sizeof(sm_error_info));
|
||||
|
||||
switch (hw_unit) {
|
||||
case NVGPU_ERR_MODULE_PGRAPH:
|
||||
{
|
||||
err_info.exception_info = &gr_error_info;
|
||||
}
|
||||
break;
|
||||
|
||||
case NVGPU_ERR_MODULE_SM:
|
||||
{
|
||||
err_info.sm_mcerr_info = &sm_error_info;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
{
|
||||
nvgpu_err(g, "unsupported hw_unit(%u)", hw_unit);
|
||||
err = -EINVAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (err != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
nvgpu_report_gr_err(g, hw_unit, inst, err_index,
|
||||
&err_info, sub_err_type);
|
||||
}
|
||||
91
drivers/gpu/nvgpu/common/cic/host_cic.c
Normal file
91
drivers/gpu/nvgpu/common/cic/host_cic.c
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/nvgpu_err_info.h>
|
||||
#include <nvgpu/cic.h>
|
||||
|
||||
#include "cic_priv.h"
|
||||
|
||||
void nvgpu_report_host_err(struct gk20a *g, u32 hw_unit,
|
||||
u32 inst, u32 err_id, u32 intr_info)
|
||||
{
|
||||
int err = 0;
|
||||
struct nvgpu_err_desc *err_desc = NULL;
|
||||
struct nvgpu_err_msg err_pkt;
|
||||
|
||||
if (g->ops.cic.report_err == NULL) {
|
||||
cic_dbg(g, "CIC does not support reporting error "
|
||||
"to safety services");
|
||||
return;
|
||||
}
|
||||
|
||||
if (hw_unit != NVGPU_ERR_MODULE_HOST) {
|
||||
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
|
||||
err = -EINVAL;
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to get err_desc for "
|
||||
"err_id (%u) for hw module (%u)",
|
||||
err_id, hw_unit);
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
nvgpu_init_host_err_msg(&err_pkt);
|
||||
err_pkt.hw_unit_id = hw_unit;
|
||||
err_pkt.err_id = err_desc->error_id;
|
||||
err_pkt.is_critical = err_desc->is_critical;
|
||||
err_pkt.err_info.host_info.header.sub_unit_id = inst;
|
||||
err_pkt.err_desc = err_desc;
|
||||
/* sub_err_type can be decoded using intr_info by referring
|
||||
* to the interrupt status register definition corresponding
|
||||
* to the error that is being reported.
|
||||
*/
|
||||
err_pkt.err_info.host_info.header.sub_err_type = intr_info;
|
||||
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
|
||||
sizeof(err_pkt.err_info.host_info));
|
||||
|
||||
if (g->ops.cic.report_err != NULL) {
|
||||
err = g->ops.cic.report_err(g, (void *)&err_pkt,
|
||||
sizeof(err_pkt), err_desc->is_critical);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to report HOST error: "
|
||||
"inst=%u, err_id=%u, intr_info=%u",
|
||||
inst, err_id, intr_info);
|
||||
}
|
||||
}
|
||||
handle_report_failure:
|
||||
if (err != 0) {
|
||||
nvgpu_sw_quiesce(g);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_inject_host_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 sub_err_type)
|
||||
{
|
||||
nvgpu_report_host_err(g, hw_unit, 0U, err_index, sub_err_type);
|
||||
}
|
||||
131
drivers/gpu/nvgpu/common/cic/mmu_cic.c
Normal file
131
drivers/gpu/nvgpu/common/cic/mmu_cic.c
Normal file
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/nvgpu_err_info.h>
|
||||
#include <nvgpu/cic.h>
|
||||
|
||||
#include "cic_priv.h"
|
||||
|
||||
void nvgpu_report_mmu_err(struct gk20a *g, u32 hw_unit, u32 err_id,
|
||||
struct mmu_fault_info *fault_info, u32 status, u32 sub_err_type)
|
||||
{
|
||||
int err = 0;
|
||||
struct nvgpu_err_desc *err_desc = NULL;
|
||||
struct nvgpu_err_msg err_pkt;
|
||||
|
||||
if (g->ops.cic.report_err == NULL) {
|
||||
cic_dbg(g, "CIC does not support reporting error "
|
||||
"to safety services");
|
||||
return;
|
||||
}
|
||||
|
||||
if (hw_unit != NVGPU_ERR_MODULE_HUBMMU) {
|
||||
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
|
||||
err = -EINVAL;
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to get err_desc for "
|
||||
"err_id (%u) for hw module (%u)",
|
||||
err_id, hw_unit);
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
nvgpu_init_mmu_err_msg(&err_pkt);
|
||||
err_pkt.hw_unit_id = hw_unit;
|
||||
err_pkt.err_id = err_desc->error_id;
|
||||
err_pkt.is_critical = err_desc->is_critical;
|
||||
err_pkt.err_info.mmu_info.header.sub_err_type = sub_err_type;
|
||||
err_pkt.err_info.mmu_info.status = status;
|
||||
/* Copy contents of mmu_fault_info */
|
||||
if (fault_info != NULL) {
|
||||
err_pkt.err_info.mmu_info.info.inst_ptr = fault_info->inst_ptr;
|
||||
err_pkt.err_info.mmu_info.info.inst_aperture
|
||||
= fault_info->inst_aperture;
|
||||
err_pkt.err_info.mmu_info.info.fault_addr
|
||||
= fault_info->fault_addr;
|
||||
err_pkt.err_info.mmu_info.info.fault_addr_aperture
|
||||
= fault_info->fault_addr_aperture;
|
||||
err_pkt.err_info.mmu_info.info.timestamp_lo
|
||||
= fault_info->timestamp_lo;
|
||||
err_pkt.err_info.mmu_info.info.timestamp_hi
|
||||
= fault_info->timestamp_hi;
|
||||
err_pkt.err_info.mmu_info.info.mmu_engine_id
|
||||
= fault_info->mmu_engine_id;
|
||||
err_pkt.err_info.mmu_info.info.gpc_id = fault_info->gpc_id;
|
||||
err_pkt.err_info.mmu_info.info.client_type
|
||||
= fault_info->client_type;
|
||||
err_pkt.err_info.mmu_info.info.client_id
|
||||
= fault_info->client_id;
|
||||
err_pkt.err_info.mmu_info.info.fault_type
|
||||
= fault_info->fault_type;
|
||||
err_pkt.err_info.mmu_info.info.access_type
|
||||
= fault_info->access_type;
|
||||
err_pkt.err_info.mmu_info.info.protected_mode
|
||||
= fault_info->protected_mode;
|
||||
err_pkt.err_info.mmu_info.info.replayable_fault
|
||||
= fault_info->replayable_fault;
|
||||
err_pkt.err_info.mmu_info.info.replay_fault_en
|
||||
= fault_info->replay_fault_en;
|
||||
err_pkt.err_info.mmu_info.info.valid = fault_info->valid;
|
||||
err_pkt.err_info.mmu_info.info.faulted_pbdma =
|
||||
fault_info->faulted_pbdma;
|
||||
err_pkt.err_info.mmu_info.info.faulted_engine =
|
||||
fault_info->faulted_engine;
|
||||
err_pkt.err_info.mmu_info.info.faulted_subid =
|
||||
fault_info->faulted_subid;
|
||||
err_pkt.err_info.mmu_info.info.chid = fault_info->chid;
|
||||
}
|
||||
err_pkt.err_desc = err_desc;
|
||||
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
|
||||
sizeof(err_pkt.err_info.mmu_info));
|
||||
|
||||
if (g->ops.cic.report_err != NULL) {
|
||||
err = g->ops.cic.report_err(g, (void *)&err_pkt,
|
||||
sizeof(err_pkt), err_desc->is_critical);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to report MMU fault: hw_unit=%u, "
|
||||
"err_id=%u, sub_err_type=%u, status=%u",
|
||||
hw_unit, err_id, sub_err_type, status);
|
||||
}
|
||||
}
|
||||
handle_report_failure:
|
||||
if (err != 0) {
|
||||
nvgpu_sw_quiesce(g);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_inject_mmu_swerror(struct gk20a *g, u32 hw_unit, u32 err_index,
|
||||
u32 sub_err_type)
|
||||
{
|
||||
u32 status = 0U;
|
||||
struct mmu_fault_info fault_info;
|
||||
|
||||
(void) memset(&fault_info, ERR_INJECT_TEST_PATTERN, sizeof(fault_info));
|
||||
nvgpu_report_mmu_err(g, hw_unit, err_index,
|
||||
&fault_info, status, sub_err_type);
|
||||
}
|
||||
126
drivers/gpu/nvgpu/common/cic/msg_cic.c
Normal file
126
drivers/gpu/nvgpu/common/cic/msg_cic.c
Normal file
@@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/nvgpu_err_info.h>
|
||||
#include <nvgpu/string.h>
|
||||
|
||||
#include "cic_priv.h"
|
||||
|
||||
void nvgpu_init_err_msg_header(struct gpu_err_header *header)
|
||||
{
|
||||
header->version.major = (u16)1U;
|
||||
header->version.minor = (u16)0U;
|
||||
header->sub_err_type = 0U;
|
||||
header->sub_unit_id = 0UL;
|
||||
header->address = 0UL;
|
||||
header->timestamp_ns = 0UL;
|
||||
}
|
||||
|
||||
void nvgpu_init_err_msg(struct nvgpu_err_msg *msg)
|
||||
{
|
||||
(void) memset(msg, 0, sizeof(struct nvgpu_err_msg));
|
||||
msg->hw_unit_id = 0U;
|
||||
msg->is_critical = false;
|
||||
msg->err_id = (u8)0U;
|
||||
msg->err_size = (u8)0U;
|
||||
}
|
||||
|
||||
void nvgpu_init_host_err_msg(struct nvgpu_err_msg *msg)
|
||||
{
|
||||
nvgpu_init_err_msg(msg);
|
||||
nvgpu_init_err_msg_header(&msg->err_info.host_info.header);
|
||||
}
|
||||
|
||||
void nvgpu_init_ecc_err_msg(struct nvgpu_err_msg *msg)
|
||||
{
|
||||
nvgpu_init_err_msg(msg);
|
||||
nvgpu_init_err_msg_header(&msg->err_info.ecc_info.header);
|
||||
msg->err_info.ecc_info.err_cnt = 0UL;
|
||||
}
|
||||
|
||||
void nvgpu_init_pri_err_msg(struct nvgpu_err_msg *msg)
|
||||
{
|
||||
nvgpu_init_err_msg(msg);
|
||||
nvgpu_init_err_msg_header(&msg->err_info.pri_info.header);
|
||||
}
|
||||
|
||||
void nvgpu_init_ce_err_msg(struct nvgpu_err_msg *msg)
|
||||
{
|
||||
nvgpu_init_err_msg(msg);
|
||||
nvgpu_init_err_msg_header(&msg->err_info.ce_info.header);
|
||||
}
|
||||
|
||||
void nvgpu_init_pmu_err_msg(struct nvgpu_err_msg *msg)
|
||||
{
|
||||
nvgpu_init_err_msg(msg);
|
||||
nvgpu_init_err_msg_header(&msg->err_info.pmu_err_info.header);
|
||||
msg->err_info.pmu_err_info.status = 0U;
|
||||
}
|
||||
|
||||
void nvgpu_init_gr_err_msg(struct nvgpu_err_msg *msg)
|
||||
{
|
||||
nvgpu_init_err_msg(msg);
|
||||
nvgpu_init_err_msg_header(&msg->err_info.gr_info.header);
|
||||
msg->err_info.gr_info.curr_ctx = 0U;
|
||||
msg->err_info.gr_info.chid = 0U;
|
||||
msg->err_info.gr_info.tsgid = 0U;
|
||||
msg->err_info.gr_info.status = 0U;
|
||||
}
|
||||
|
||||
void nvgpu_init_ctxsw_err_msg(struct nvgpu_err_msg *msg)
|
||||
{
|
||||
nvgpu_init_err_msg(msg);
|
||||
nvgpu_init_err_msg_header(&msg->err_info.ctxsw_info.header);
|
||||
msg->err_info.ctxsw_info.curr_ctx = 0U;
|
||||
msg->err_info.ctxsw_info.tsgid = 0U;
|
||||
msg->err_info.ctxsw_info.chid = 0U;
|
||||
msg->err_info.ctxsw_info.ctxsw_status0 = 0U;
|
||||
msg->err_info.ctxsw_info.ctxsw_status1 = 0U;
|
||||
msg->err_info.ctxsw_info.mailbox_value = 0U;
|
||||
}
|
||||
|
||||
void nvgpu_init_mmu_err_msg(struct nvgpu_err_msg *msg)
|
||||
{
|
||||
nvgpu_init_err_msg(msg);
|
||||
nvgpu_init_err_msg_header(&msg->err_info.mmu_info.header);
|
||||
msg->err_info.mmu_info.info.inst_ptr = 0UL;
|
||||
msg->err_info.mmu_info.info.inst_aperture = 0U;
|
||||
msg->err_info.mmu_info.info.fault_addr = 0UL;
|
||||
msg->err_info.mmu_info.info.fault_addr_aperture = 0U;
|
||||
msg->err_info.mmu_info.info.timestamp_lo = 0U;
|
||||
msg->err_info.mmu_info.info.timestamp_hi = 0U;
|
||||
msg->err_info.mmu_info.info.mmu_engine_id = 0U;
|
||||
msg->err_info.mmu_info.info.gpc_id = 0U;
|
||||
msg->err_info.mmu_info.info.client_type = 0U;
|
||||
msg->err_info.mmu_info.info.client_id = 0U;
|
||||
msg->err_info.mmu_info.info.fault_type = 0U;
|
||||
msg->err_info.mmu_info.info.access_type = 0U;
|
||||
msg->err_info.mmu_info.info.protected_mode = 0U;
|
||||
msg->err_info.mmu_info.info.replayable_fault = false;
|
||||
msg->err_info.mmu_info.info.replay_fault_en = 0U;
|
||||
msg->err_info.mmu_info.info.valid = false;
|
||||
msg->err_info.mmu_info.info.faulted_pbdma = 0U;
|
||||
msg->err_info.mmu_info.info.faulted_engine = 0U;
|
||||
msg->err_info.mmu_info.info.faulted_subid = 0U;
|
||||
msg->err_info.mmu_info.info.chid = 0U;
|
||||
msg->err_info.mmu_info.status = 0U;
|
||||
}
|
||||
91
drivers/gpu/nvgpu/common/cic/pmu_cic.c
Normal file
91
drivers/gpu/nvgpu/common/cic/pmu_cic.c
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/nvgpu_err_info.h>
|
||||
#include <nvgpu/cic.h>
|
||||
|
||||
#include "cic_priv.h"
|
||||
|
||||
void nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id,
|
||||
u32 sub_err_type, u32 status)
|
||||
{
|
||||
int err = 0;
|
||||
struct nvgpu_err_desc *err_desc = NULL;
|
||||
struct nvgpu_err_msg err_pkt;
|
||||
|
||||
if (g->ops.cic.report_err == NULL) {
|
||||
cic_dbg(g, "CIC does not support reporting error "
|
||||
"to safety services");
|
||||
return;
|
||||
}
|
||||
|
||||
if (hw_unit != NVGPU_ERR_MODULE_PMU) {
|
||||
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
|
||||
err = -EINVAL;
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to get err_desc for "
|
||||
"err_id (%u) for hw module (%u)",
|
||||
err_id, hw_unit);
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
nvgpu_init_pmu_err_msg(&err_pkt);
|
||||
err_pkt.hw_unit_id = hw_unit;
|
||||
err_pkt.err_id = err_desc->error_id;
|
||||
err_pkt.is_critical = err_desc->is_critical;
|
||||
err_pkt.err_info.pmu_err_info.status = status;
|
||||
err_pkt.err_info.pmu_err_info.header.sub_err_type = sub_err_type;
|
||||
err_pkt.err_desc = err_desc;
|
||||
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
|
||||
sizeof(err_pkt.err_info.pmu_err_info));
|
||||
|
||||
if (g->ops.cic.report_err != NULL) {
|
||||
err = g->ops.cic.report_err(g, (void *)&err_pkt,
|
||||
sizeof(err_pkt), err_desc->is_critical);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to report PMU error: "
|
||||
"err_id=%u, sub_err_type=%u, status=%u",
|
||||
err_id, sub_err_type, status);
|
||||
}
|
||||
}
|
||||
handle_report_failure:
|
||||
if (err != 0) {
|
||||
nvgpu_sw_quiesce(g);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_inject_pmu_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 sub_err_type)
|
||||
{
|
||||
u32 err_info;
|
||||
|
||||
err_info = (u32)ERR_INJECT_TEST_PATTERN;
|
||||
|
||||
nvgpu_report_pmu_err(g, hw_unit, err_index, sub_err_type, err_info);
|
||||
}
|
||||
91
drivers/gpu/nvgpu/common/cic/pri_cic.c
Normal file
91
drivers/gpu/nvgpu/common/cic/pri_cic.c
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/nvgpu_err_info.h>
|
||||
#include <nvgpu/cic.h>
|
||||
|
||||
#include "cic_priv.h"
|
||||
|
||||
void nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst,
|
||||
u32 err_id, u32 err_addr, u32 err_code)
|
||||
{
|
||||
int err = 0;
|
||||
struct nvgpu_err_desc *err_desc = NULL;
|
||||
struct nvgpu_err_msg err_pkt;
|
||||
|
||||
if (g->ops.cic.report_err == NULL) {
|
||||
cic_dbg(g, "CIC does not support reporting error "
|
||||
"to safety services");
|
||||
return;
|
||||
}
|
||||
|
||||
if (hw_unit != NVGPU_ERR_MODULE_PRI) {
|
||||
nvgpu_err(g, "invalid hw module (%u)", hw_unit);
|
||||
err = -EINVAL;
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to get err_desc for "
|
||||
"err_id (%u) for hw module (%u)",
|
||||
err_id, hw_unit);
|
||||
goto handle_report_failure;
|
||||
}
|
||||
|
||||
nvgpu_init_pri_err_msg(&err_pkt);
|
||||
err_pkt.hw_unit_id = hw_unit;
|
||||
err_pkt.err_id = err_desc->error_id;
|
||||
err_pkt.is_critical = err_desc->is_critical;
|
||||
err_pkt.err_info.pri_info.header.sub_unit_id = inst;
|
||||
err_pkt.err_info.pri_info.header.address = (u64) err_addr;
|
||||
err_pkt.err_desc = err_desc;
|
||||
/* sub_err_type can be decoded using err_code by referring
|
||||
* to the FECS pri error codes.
|
||||
*/
|
||||
err_pkt.err_info.pri_info.header.sub_err_type = err_code;
|
||||
err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
|
||||
sizeof(err_pkt.err_info.pri_info));
|
||||
|
||||
if (g->ops.cic.report_err != NULL) {
|
||||
err = g->ops.cic.report_err(g, (void *)&err_pkt,
|
||||
sizeof(err_pkt), err_desc->is_critical);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Failed to report PRI error: "
|
||||
"inst=%u, err_id=%u, err_code=%u",
|
||||
inst, err_id, err_code);
|
||||
}
|
||||
}
|
||||
handle_report_failure:
|
||||
if (err != 0) {
|
||||
nvgpu_sw_quiesce(g);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_inject_pri_swerror(struct gk20a *g, u32 hw_unit,
|
||||
u32 err_index, u32 err_code)
|
||||
{
|
||||
nvgpu_report_pri_err(g, hw_unit, 0U, err_index, 0U, err_code);
|
||||
}
|
||||
Reference in New Issue
Block a user