mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-25 02:52:51 +03:00
gpu: nvgpu: Enable the reporting of errors for CE
Enable the reporting of errors on hw module Copy Engine. These errors will be notified to the underlying safety service. Jira NVGPU-1866 Change-Id: Ie183b01f288653978e156cfcfcf231cfcb5426c3 Signed-off-by: Rajesh Devaraj <rdevaraj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2022766 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
9ff9fec887
commit
9c10f2d595
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Pascal GPU series Copy Engine.
|
||||
*
|
||||
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -24,6 +24,7 @@
|
||||
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
#include "ce_gp10b.h"
|
||||
|
||||
@@ -52,10 +53,14 @@ void gp10b_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
|
||||
|
||||
/* clear blocking interrupts: they exibit broken behavior */
|
||||
if ((ce_intr & ce_intr_status_blockpipe_pending_f()) != 0U) {
|
||||
nvgpu_report_ce_error(g, inst_id,
|
||||
GPU_CE_BLOCK_PIPE, ce_intr);
|
||||
clear_intr |= ce_blockpipe_isr(g, ce_intr);
|
||||
}
|
||||
|
||||
if ((ce_intr & ce_intr_status_launcherr_pending_f()) != 0U) {
|
||||
nvgpu_report_ce_error(g, inst_id,
|
||||
GPU_CE_LAUNCH_ERROR, ce_intr);
|
||||
clear_intr |= ce_launcherr_isr(g, ce_intr);
|
||||
}
|
||||
|
||||
@@ -71,6 +76,8 @@ u32 gp10b_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
|
||||
nvgpu_log(g, gpu_dbg_intr, "ce nonstall isr %08x %08x\n", ce_intr, inst_id);
|
||||
|
||||
if ((ce_intr & ce_intr_status_nonblockpipe_pending_f()) != 0U) {
|
||||
nvgpu_report_ce_error(g, inst_id,
|
||||
GPU_CE_NONBLOCK_PIPE, ce_intr);
|
||||
gk20a_writel(g, ce_intr_status_r(inst_id),
|
||||
ce_intr_status_nonblockpipe_pending_f());
|
||||
ops |= (GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE |
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Volta GPU series Copy Engine.
|
||||
*
|
||||
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "nvgpu/log.h"
|
||||
#include "nvgpu/bitops.h"
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
#include "gp10b/ce_gp10b.h"
|
||||
|
||||
@@ -34,6 +35,23 @@
|
||||
#include <nvgpu/hw/gv11b/hw_ce_gv11b.h>
|
||||
#include <nvgpu/hw/gv11b/hw_top_gv11b.h>
|
||||
|
||||
void nvgpu_report_ce_error(struct gk20a *g, u32 inst,
|
||||
u32 err_type, u32 status)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (g->ops.ce2.err_ops.report_ce_err == NULL) {
|
||||
return;
|
||||
}
|
||||
ret = g->ops.ce2.err_ops.report_ce_err(g,
|
||||
NVGPU_ERR_MODULE_CE, inst, err_type, status);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "Failed to report CE error: "
|
||||
"inst=%u, err_type=%u, status=%u",
|
||||
inst, err_type, status);
|
||||
}
|
||||
}
|
||||
|
||||
u32 gv11b_ce_get_num_pce(struct gk20a *g)
|
||||
{
|
||||
/* register contains a bitmask indicating which physical copy
|
||||
@@ -60,6 +78,8 @@ void gv11b_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
|
||||
* reset to get back to a working state.
|
||||
*/
|
||||
if ((ce_intr & ce_intr_status_invalid_config_pending_f()) != 0U) {
|
||||
nvgpu_report_ce_error(g, inst_id,
|
||||
GPU_CE_INVALID_CONFIG, ce_intr);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ce: inst %d: invalid config", inst_id);
|
||||
clear_intr |= ce_intr_status_invalid_config_reset_f();
|
||||
@@ -71,6 +91,8 @@ void gv11b_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
|
||||
* reset before operations can start again, if not the entire GPU.
|
||||
*/
|
||||
if ((ce_intr & ce_intr_status_mthd_buffer_fault_pending_f()) != 0U) {
|
||||
nvgpu_report_ce_error(g, inst_id,
|
||||
GPU_CE_METHOD_BUFFER_FAULT, ce_intr);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ce: inst %d: mthd buffer fault", inst_id);
|
||||
clear_intr |= ce_intr_status_mthd_buffer_fault_reset_f();
|
||||
|
||||
@@ -245,6 +245,11 @@ struct gpu_ops {
|
||||
void (*isr_stall)(struct gk20a *g, u32 inst_id, u32 pri_base);
|
||||
u32 (*isr_nonstall)(struct gk20a *g, u32 inst_id, u32 pri_base);
|
||||
u32 (*get_num_pce)(struct gk20a *g);
|
||||
struct {
|
||||
int (*report_ce_err)(struct gk20a *g,
|
||||
u32 hw_id, u32 inst, u32 err_id,
|
||||
u32 status);
|
||||
} err_ops;
|
||||
} ce2;
|
||||
struct {
|
||||
u32 (*get_patch_slots)(struct gk20a *g);
|
||||
@@ -2055,10 +2060,4 @@ void gk20a_put(struct gk20a *g);
|
||||
|
||||
bool nvgpu_has_syncpoints(struct gk20a *g);
|
||||
|
||||
void nvgpu_report_host_error(struct gk20a *g,
|
||||
u32 inst, u32 err_id, u32 intr_info);
|
||||
|
||||
void nvgpu_report_gr_exception(struct gk20a *g, u32 inst,
|
||||
u32 err_type, u32 status);
|
||||
|
||||
#endif /* GK20A_H */
|
||||
|
||||
@@ -33,7 +33,8 @@
|
||||
#define NVGPU_ERR_MODULE_PGRAPH 7U
|
||||
#define NVGPU_ERR_MODULE_LTC 8U
|
||||
#define NVGPU_ERR_MODULE_HUBMMU 9U
|
||||
#define NVGPU_ERR_MODULE_INVALID 10U
|
||||
#define NVGPU_ERR_MODULE_CE 11U
|
||||
#define NVGPU_ERR_MODULE_INVALID 12U
|
||||
|
||||
#define GPU_HOST_PFIFO_BIND_ERROR 0U
|
||||
#define GPU_HOST_PFIFO_SCHED_ERROR 1U
|
||||
@@ -136,4 +137,18 @@
|
||||
#define GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED 7U
|
||||
#define GPU_HUBMMU_INVALID_ERROR 8U
|
||||
|
||||
#define GPU_CE_LAUNCH_ERROR 0U
|
||||
#define GPU_CE_BLOCK_PIPE 1U
|
||||
#define GPU_CE_NONBLOCK_PIPE 2U
|
||||
#define GPU_CE_INVALID_CONFIG 3U
|
||||
#define GPU_CE_METHOD_BUFFER_FAULT 4U
|
||||
|
||||
void nvgpu_report_host_error(struct gk20a *g,
|
||||
u32 inst, u32 err_id, u32 intr_info);
|
||||
|
||||
void nvgpu_report_gr_exception(struct gk20a *g, u32 inst,
|
||||
u32 err_type, u32 status);
|
||||
|
||||
void nvgpu_report_ce_error(struct gk20a *g, u32 inst,
|
||||
u32 err_type, u32 status);
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user