gpu: nvgpu: add CONFIG_NVGPU_RECOVERY

Add CONFIG_NVGPU_RECOVERY in order to conditionally compile
recovery code. This code will be removed from safety build
when sw quiesce state is implemented, and negative tests are
disabled or modified such that they do not expect recovery
to happen.

Added static inline functions for recovery handlers, when
CONFIG_NVGPU_RECOVERY is not defined. These inline functions
can later be wired to the sw quiesce functions.

Also moved gv11b recovery code to non-fusa, as it will ultimately
be removed from safety build.

Jira NVGPU-3871

Change-Id: Ia705b059fab6120899c7e15082f2a0f51ff51dc9
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2166074
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Thomas Fleury
2019-08-01 11:31:29 -04:00
committed by mobile promotions
parent 83e28e54eb
commit c7b41f106d
11 changed files with 88 additions and 8 deletions

View File

@@ -119,7 +119,7 @@ rc:
owner: Seema K owner: Seema K
sources: [ hal/rc/rc_gk20a.c, sources: [ hal/rc/rc_gk20a.c,
hal/rc/rc_gk20a.h, hal/rc/rc_gk20a.h,
hal/rc/rc_gv11b_fusa.c, hal/rc/rc_gv11b.c,
hal/rc/rc_gv11b.h ] hal/rc/rc_gv11b.h ]
fbpa: fbpa:

View File

@@ -187,3 +187,10 @@ config NVGPU_HAL_NON_FUSA
help help
Enable/Disable the support of HALs from chips that do not have functional Enable/Disable the support of HALs from chips that do not have functional
safety certification safety certification
config NVGPU_RECOVERY
bool "Recovery support"
depends on GK20A
default y
help
Support recovery on failure (which may involve engine reset)

View File

@@ -62,6 +62,14 @@ ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),y)
ccflags-y += -DCONFIG_NVGPU_HAL_NON_FUSA ccflags-y += -DCONFIG_NVGPU_HAL_NON_FUSA
endif endif
ifeq ($(CONFIG_NVGPU_RECOVERY),y)
ccflags-y += -DCONFIG_NVGPU_RECOVERY
nvgpu-y += \
common/rc/rc.o \
hal/rc/rc_gk20a.o \
hal/rc/rc_gv11b.o
endif
obj-$(CONFIG_GK20A) := nvgpu.o obj-$(CONFIG_GK20A) := nvgpu.o
# OS independent parts of nvgpu. The work to collect files here # OS independent parts of nvgpu. The work to collect files here
@@ -252,7 +260,6 @@ nvgpu-y += \
hal/fuse/fuse_gm20b.o \ hal/fuse/fuse_gm20b.o \
hal/fuse/fuse_gp106.o \ hal/fuse/fuse_gp106.o \
hal/func/func_tu104.o \ hal/func/func_tu104.o \
hal/rc/rc_gk20a.o \
hal/fifo/fifo_gk20a.o \ hal/fifo/fifo_gk20a.o \
hal/fifo/fifo_tu104.o \ hal/fifo/fifo_tu104.o \
hal/fifo/preempt_gk20a.o \ hal/fifo/preempt_gk20a.o \
@@ -490,7 +497,6 @@ nvgpu-y += \
common/sim/sim.o \ common/sim/sim.o \
common/sim/sim_pci.o \ common/sim/sim_pci.o \
common/sim/sim_netlist.o \ common/sim/sim_netlist.o \
common/rc/rc.o \
common/fifo/fifo.o \ common/fifo/fifo.o \
common/fifo/preempt.o \ common/fifo/preempt.o \
common/fifo/channel.o \ common/fifo/channel.o \
@@ -640,7 +646,6 @@ nvgpu-y += \
hal/priv_ring/priv_ring_gm20b_fusa.o \ hal/priv_ring/priv_ring_gm20b_fusa.o \
hal/priv_ring/priv_ring_gp10b_fusa.o \ hal/priv_ring/priv_ring_gp10b_fusa.o \
hal/ptimer/ptimer_gk20a_fusa.o \ hal/ptimer/ptimer_gk20a_fusa.o \
hal/rc/rc_gv11b_fusa.o \
hal/sync/syncpt_cmdbuf_gv11b_fusa.o \ hal/sync/syncpt_cmdbuf_gv11b_fusa.o \
hal/therm/therm_gm20b_fusa.o \ hal/therm/therm_gm20b_fusa.o \
hal/therm/therm_gv11b_fusa.o \ hal/therm/therm_gv11b_fusa.o \

View File

@@ -93,6 +93,10 @@ NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CHANNEL_TSG_CONTROL
CONFIG_NVGPU_LOGGING := 1 CONFIG_NVGPU_LOGGING := 1
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_LOGGING NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_LOGGING
# Enable recovery for safety build until sw quiesce is done
CONFIG_NVGPU_RECOVERY := 1
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_RECOVERY
# #
# Flags enabled only for safety debug and regular build profile. # Flags enabled only for safety debug and regular build profile.
# #

View File

@@ -132,7 +132,6 @@ srcs += common/utils/assert.c \
common/power_features/cg/cg.c \ common/power_features/cg/cg.c \
common/fifo/preempt.c \ common/fifo/preempt.c \
common/fifo/channel.c \ common/fifo/channel.c \
common/rc/rc.c \
common/fifo/fifo.c \ common/fifo/fifo.c \
common/fifo/pbdma.c \ common/fifo/pbdma.c \
common/fifo/tsg.c \ common/fifo/tsg.c \
@@ -233,7 +232,6 @@ srcs += hal/mm/mm_gv11b_fusa.c \
hal/priv_ring/priv_ring_gm20b_fusa.c \ hal/priv_ring/priv_ring_gm20b_fusa.c \
hal/priv_ring/priv_ring_gp10b_fusa.c \ hal/priv_ring/priv_ring_gp10b_fusa.c \
hal/ptimer/ptimer_gk20a_fusa.c \ hal/ptimer/ptimer_gk20a_fusa.c \
hal/rc/rc_gv11b_fusa.c \
hal/sync/syncpt_cmdbuf_gv11b_fusa.c \ hal/sync/syncpt_cmdbuf_gv11b_fusa.c \
hal/therm/therm_gm20b_fusa.c \ hal/therm/therm_gm20b_fusa.c \
hal/therm/therm_gv11b_fusa.c \ hal/therm/therm_gv11b_fusa.c \
@@ -283,7 +281,6 @@ srcs += hal/init/hal_gp10b.c \
hal/fb/fb_gm20b.c \ hal/fb/fb_gm20b.c \
hal/fb/fb_gv11b.c \ hal/fb/fb_gv11b.c \
hal/fuse/fuse_gm20b.c \ hal/fuse/fuse_gm20b.c \
hal/rc/rc_gk20a.c \
hal/fifo/fifo_gk20a.c \ hal/fifo/fifo_gk20a.c \
hal/fifo/preempt_gk20a.c \ hal/fifo/preempt_gk20a.c \
hal/fifo/engines_gm20b.c \ hal/fifo/engines_gm20b.c \
@@ -390,6 +387,14 @@ srcs += common/fifo/userd.c \
hal/fifo/userd_gv11b.c hal/fifo/userd_gv11b.c
endif endif
ifeq ($(CONFIG_NVGPU_RECOVERY),1)
srcs += common/rc/rc.c \
hal/rc/rc_gv11b.c
ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),1)
srcs += hal/rc/rc_gk20a.c
endif
endif
ifeq ($(CONFIG_NVGPU_FENCE),1) ifeq ($(CONFIG_NVGPU_FENCE),1)
srcs += common/fence/fence.c srcs += common/fence/fence.c
endif endif

View File

@@ -83,7 +83,9 @@
#include "hal/fifo/ctxsw_timeout_gk20a.h" #include "hal/fifo/ctxsw_timeout_gk20a.h"
#include "hal/fifo/mmu_fault_gk20a.h" #include "hal/fifo/mmu_fault_gk20a.h"
#include "hal/fifo/mmu_fault_gm20b.h" #include "hal/fifo/mmu_fault_gm20b.h"
#ifdef CONFIG_NVGPU_RECOVERY
#include "hal/rc/rc_gk20a.h" #include "hal/rc/rc_gk20a.h"
#endif
#ifdef CONFIG_NVGPU_GRAPHICS #ifdef CONFIG_NVGPU_GRAPHICS
#include "hal/gr/zbc/zbc_gm20b.h" #include "hal/gr/zbc/zbc_gm20b.h"
#include "hal/gr/zcull/zcull_gm20b.h" #include "hal/gr/zcull/zcull_gm20b.h"
@@ -606,7 +608,9 @@ static const struct gpu_ops gm20b_ops = {
.init_pbdma_map = gk20a_fifo_init_pbdma_map, .init_pbdma_map = gk20a_fifo_init_pbdma_map,
.is_preempt_pending = gk20a_fifo_is_preempt_pending, .is_preempt_pending = gk20a_fifo_is_preempt_pending,
.reset_enable_hw = gk20a_init_fifo_reset_enable_hw, .reset_enable_hw = gk20a_init_fifo_reset_enable_hw,
#ifdef CONFIG_NVGPU_RECOVERY
.recover = gk20a_fifo_recover, .recover = gk20a_fifo_recover,
#endif
.intr_set_recover_mask = gk20a_fifo_intr_set_recover_mask, .intr_set_recover_mask = gk20a_fifo_intr_set_recover_mask,
.intr_unset_recover_mask = gk20a_fifo_intr_unset_recover_mask, .intr_unset_recover_mask = gk20a_fifo_intr_unset_recover_mask,
.setup_sw = nvgpu_fifo_setup_sw, .setup_sw = nvgpu_fifo_setup_sw,

View File

@@ -96,7 +96,9 @@
#include "hal/fifo/mmu_fault_gm20b.h" #include "hal/fifo/mmu_fault_gm20b.h"
#include "hal/fifo/mmu_fault_gp10b.h" #include "hal/fifo/mmu_fault_gp10b.h"
#include "hal/fifo/ctxsw_timeout_gk20a.h" #include "hal/fifo/ctxsw_timeout_gk20a.h"
#ifdef CONFIG_NVGPU_RECOVERY
#include "hal/rc/rc_gk20a.h" #include "hal/rc/rc_gk20a.h"
#endif
#include "hal/gr/ecc/ecc_gp10b.h" #include "hal/gr/ecc/ecc_gp10b.h"
#include "hal/gr/fecs_trace/fecs_trace_gm20b.h" #include "hal/gr/fecs_trace/fecs_trace_gm20b.h"
#include "hal/gr/config/gr_config_gm20b.h" #include "hal/gr/config/gr_config_gm20b.h"
@@ -675,7 +677,9 @@ static const struct gpu_ops gp10b_ops = {
.init_pbdma_map = gk20a_fifo_init_pbdma_map, .init_pbdma_map = gk20a_fifo_init_pbdma_map,
.is_preempt_pending = gk20a_fifo_is_preempt_pending, .is_preempt_pending = gk20a_fifo_is_preempt_pending,
.reset_enable_hw = gk20a_init_fifo_reset_enable_hw, .reset_enable_hw = gk20a_init_fifo_reset_enable_hw,
#ifdef CONFIG_NVGPU_RECOVERY
.recover = gk20a_fifo_recover, .recover = gk20a_fifo_recover,
#endif
.intr_set_recover_mask = gk20a_fifo_intr_set_recover_mask, .intr_set_recover_mask = gk20a_fifo_intr_set_recover_mask,
.intr_unset_recover_mask = gk20a_fifo_intr_unset_recover_mask, .intr_unset_recover_mask = gk20a_fifo_intr_unset_recover_mask,
.setup_sw = nvgpu_fifo_setup_sw, .setup_sw = nvgpu_fifo_setup_sw,

View File

@@ -71,7 +71,9 @@
#include "hal/fuse/fuse_gp10b.h" #include "hal/fuse/fuse_gp10b.h"
#include "hal/ptimer/ptimer_gk20a.h" #include "hal/ptimer/ptimer_gk20a.h"
#include "hal/regops/regops_gv11b.h" #include "hal/regops/regops_gv11b.h"
#ifdef CONFIG_NVGPU_RECOVERY
#include "hal/rc/rc_gv11b.h" #include "hal/rc/rc_gv11b.h"
#endif
#include "hal/fifo/fifo_gk20a.h" #include "hal/fifo/fifo_gk20a.h"
#include "hal/fifo/fifo_gv11b.h" #include "hal/fifo/fifo_gv11b.h"
#include "hal/fifo/pbdma_gm20b.h" #include "hal/fifo/pbdma_gm20b.h"
@@ -833,7 +835,9 @@ static const struct gpu_ops gv11b_ops = {
.init_pbdma_map = gk20a_fifo_init_pbdma_map, .init_pbdma_map = gk20a_fifo_init_pbdma_map,
.is_preempt_pending = gv11b_fifo_is_preempt_pending, .is_preempt_pending = gv11b_fifo_is_preempt_pending,
.reset_enable_hw = gv11b_init_fifo_reset_enable_hw, .reset_enable_hw = gv11b_init_fifo_reset_enable_hw,
#ifdef CONFIG_NVGPU_RECOVERY
.recover = gv11b_fifo_recover, .recover = gv11b_fifo_recover,
#endif
.intr_set_recover_mask = gv11b_fifo_intr_set_recover_mask, .intr_set_recover_mask = gv11b_fifo_intr_set_recover_mask,
.intr_unset_recover_mask = gv11b_fifo_intr_unset_recover_mask, .intr_unset_recover_mask = gv11b_fifo_intr_unset_recover_mask,
.setup_sw = nvgpu_fifo_setup_sw, .setup_sw = nvgpu_fifo_setup_sw,

View File

@@ -71,7 +71,9 @@
#include "hal/fuse/fuse_gm20b.h" #include "hal/fuse/fuse_gm20b.h"
#include "hal/fuse/fuse_gp10b.h" #include "hal/fuse/fuse_gp10b.h"
#include "hal/fuse/fuse_gp106.h" #include "hal/fuse/fuse_gp106.h"
#ifdef CONFIG_NVGPU_RECOVERY
#include "hal/rc/rc_gv11b.h" #include "hal/rc/rc_gv11b.h"
#endif
#include "hal/fifo/fifo_gk20a.h" #include "hal/fifo/fifo_gk20a.h"
#include "hal/fifo/fifo_gv11b.h" #include "hal/fifo/fifo_gv11b.h"
#include "hal/fifo/fifo_tu104.h" #include "hal/fifo/fifo_tu104.h"
@@ -863,7 +865,9 @@ static const struct gpu_ops tu104_ops = {
.init_pbdma_map = gk20a_fifo_init_pbdma_map, .init_pbdma_map = gk20a_fifo_init_pbdma_map,
.is_preempt_pending = gv11b_fifo_is_preempt_pending, .is_preempt_pending = gv11b_fifo_is_preempt_pending,
.reset_enable_hw = gv11b_init_fifo_reset_enable_hw, .reset_enable_hw = gv11b_init_fifo_reset_enable_hw,
#ifdef CONFIG_NVGPU_RECOVERY
.recover = gv11b_fifo_recover, .recover = gv11b_fifo_recover,
#endif
.intr_set_recover_mask = gv11b_fifo_intr_set_recover_mask, .intr_set_recover_mask = gv11b_fifo_intr_set_recover_mask,
.intr_unset_recover_mask = gv11b_fifo_intr_unset_recover_mask, .intr_unset_recover_mask = gv11b_fifo_intr_unset_recover_mask,
.setup_sw = nvgpu_fifo_setup_sw, .setup_sw = nvgpu_fifo_setup_sw,

View File

@@ -43,6 +43,7 @@ struct nvgpu_tsg;
struct nvgpu_channel; struct nvgpu_channel;
struct nvgpu_pbdma_status_info; struct nvgpu_pbdma_status_info;
#ifdef CONFIG_NVGPU_RECOVERY
void nvgpu_rc_ctxsw_timeout(struct gk20a *g, u32 eng_bitmask, void nvgpu_rc_ctxsw_timeout(struct gk20a *g, u32 eng_bitmask,
struct nvgpu_tsg *tsg, bool debug_dump); struct nvgpu_tsg *tsg, bool debug_dump);
@@ -64,4 +65,46 @@ void nvgpu_rc_fifo_recover(struct gk20a *g,
u32 hw_id, /* if ~0, will be queried from HW */ u32 hw_id, /* if ~0, will be queried from HW */
bool id_is_tsg, /* ignored if hw_id == ~0 */ bool id_is_tsg, /* ignored if hw_id == ~0 */
bool id_is_known, bool debug_dump, u32 rc_type); bool id_is_known, bool debug_dump, u32 rc_type);
#else
static inline void nvgpu_rc_ctxsw_timeout(struct gk20a *g, u32 eng_bitmask,
struct nvgpu_tsg *tsg, bool debug_dump)
{
}
static inline void nvgpu_rc_pbdma_fault(struct gk20a *g, struct nvgpu_fifo *f,
u32 pbdma_id, u32 error_notifier)
{
}
static inline void nvgpu_rc_runlist_update(struct gk20a *g, u32 runlist_id)
{
}
static inline void nvgpu_rc_preempt_timeout(struct gk20a *g, struct nvgpu_tsg *tsg)
{
}
static inline void nvgpu_rc_gr_fault(struct gk20a *g,
struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
{
}
static inline void nvgpu_rc_sched_error_bad_tsg(struct gk20a *g)
{
}
static inline void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct nvgpu_tsg *tsg,
bool debug_dump, u32 rc_type)
{
}
static inline void nvgpu_rc_fifo_recover(struct gk20a *g,
u32 eng_bitmask, /* if zero, will be queried from HW */
u32 hw_id, /* if ~0, will be queried from HW */
bool id_is_tsg, /* ignored if hw_id == ~0 */
bool id_is_known, bool debug_dump, u32 rc_type)
{
}
#endif
#endif /* NVGPU_RC_H */ #endif /* NVGPU_RC_H */