diff --git a/arch/nvgpu-hal-new.yaml b/arch/nvgpu-hal-new.yaml index 0a3055fe4..1bd84d950 100644 --- a/arch/nvgpu-hal-new.yaml +++ b/arch/nvgpu-hal-new.yaml @@ -119,7 +119,7 @@ rc: owner: Seema K sources: [ hal/rc/rc_gk20a.c, hal/rc/rc_gk20a.h, - hal/rc/rc_gv11b_fusa.c, + hal/rc/rc_gv11b.c, hal/rc/rc_gv11b.h ] fbpa: diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig index 0fef9750e..42bb21e31 100644 --- a/drivers/gpu/nvgpu/Kconfig +++ b/drivers/gpu/nvgpu/Kconfig @@ -186,4 +186,11 @@ config NVGPU_HAL_NON_FUSA default y help Enable/Disable the support of HALs from chips that do not have functional - safety certification \ No newline at end of file + safety certification + +config NVGPU_RECOVERY + bool "Recovery support" + depends on GK20A + default y + help + Support recovery on failure (which may involve engine reset) diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 6a4d06b28..bde3a3495 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -62,6 +62,14 @@ ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),y) ccflags-y += -DCONFIG_NVGPU_HAL_NON_FUSA endif +ifeq ($(CONFIG_NVGPU_RECOVERY),y) +ccflags-y += -DCONFIG_NVGPU_RECOVERY +nvgpu-y += \ + common/rc/rc.o \ + hal/rc/rc_gk20a.o \ + hal/rc/rc_gv11b.o +endif + obj-$(CONFIG_GK20A) := nvgpu.o # OS independent parts of nvgpu. The work to collect files here @@ -252,7 +260,6 @@ nvgpu-y += \ hal/fuse/fuse_gm20b.o \ hal/fuse/fuse_gp106.o \ hal/func/func_tu104.o \ - hal/rc/rc_gk20a.o \ hal/fifo/fifo_gk20a.o \ hal/fifo/fifo_tu104.o \ hal/fifo/preempt_gk20a.o \ @@ -490,7 +497,6 @@ nvgpu-y += \ common/sim/sim.o \ common/sim/sim_pci.o \ common/sim/sim_netlist.o \ - common/rc/rc.o \ common/fifo/fifo.o \ common/fifo/preempt.o \ common/fifo/channel.o \ @@ -640,7 +646,6 @@ nvgpu-y += \ hal/priv_ring/priv_ring_gm20b_fusa.o \ hal/priv_ring/priv_ring_gp10b_fusa.o \ hal/ptimer/ptimer_gk20a_fusa.o \ - hal/rc/rc_gv11b_fusa.o \ hal/sync/syncpt_cmdbuf_gv11b_fusa.o \ hal/therm/therm_gm20b_fusa.o \ hal/therm/therm_gv11b_fusa.o \ diff --git a/drivers/gpu/nvgpu/Makefile.shared.configs b/drivers/gpu/nvgpu/Makefile.shared.configs index 2eb2e14ec..51516f957 100644 --- a/drivers/gpu/nvgpu/Makefile.shared.configs +++ b/drivers/gpu/nvgpu/Makefile.shared.configs @@ -93,6 +93,10 @@ NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CHANNEL_TSG_CONTROL CONFIG_NVGPU_LOGGING := 1 NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_LOGGING +# Enable recovery for safety build until sw quiesce is done +CONFIG_NVGPU_RECOVERY := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_RECOVERY + # # Flags enabled only for safety debug and regular build profile. # diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 87a29427f..6e1b99787 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -132,7 +132,6 @@ srcs += common/utils/assert.c \ common/power_features/cg/cg.c \ common/fifo/preempt.c \ common/fifo/channel.c \ - common/rc/rc.c \ common/fifo/fifo.c \ common/fifo/pbdma.c \ common/fifo/tsg.c \ @@ -233,7 +232,6 @@ srcs += hal/mm/mm_gv11b_fusa.c \ hal/priv_ring/priv_ring_gm20b_fusa.c \ hal/priv_ring/priv_ring_gp10b_fusa.c \ hal/ptimer/ptimer_gk20a_fusa.c \ - hal/rc/rc_gv11b_fusa.c \ hal/sync/syncpt_cmdbuf_gv11b_fusa.c \ hal/therm/therm_gm20b_fusa.c \ hal/therm/therm_gv11b_fusa.c \ @@ -283,7 +281,6 @@ srcs += hal/init/hal_gp10b.c \ hal/fb/fb_gm20b.c \ hal/fb/fb_gv11b.c \ hal/fuse/fuse_gm20b.c \ - hal/rc/rc_gk20a.c \ hal/fifo/fifo_gk20a.c \ hal/fifo/preempt_gk20a.c \ hal/fifo/engines_gm20b.c \ @@ -390,6 +387,14 @@ srcs += common/fifo/userd.c \ hal/fifo/userd_gv11b.c endif +ifeq ($(CONFIG_NVGPU_RECOVERY),1) +srcs += common/rc/rc.c \ + hal/rc/rc_gv11b.c +ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),1) +srcs += hal/rc/rc_gk20a.c +endif +endif + ifeq ($(CONFIG_NVGPU_FENCE),1) srcs += common/fence/fence.c endif diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c index 5bd1ea101..7d5626a94 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c @@ -83,7 +83,9 @@ #include "hal/fifo/ctxsw_timeout_gk20a.h" #include "hal/fifo/mmu_fault_gk20a.h" #include "hal/fifo/mmu_fault_gm20b.h" +#ifdef CONFIG_NVGPU_RECOVERY #include "hal/rc/rc_gk20a.h" +#endif #ifdef CONFIG_NVGPU_GRAPHICS #include "hal/gr/zbc/zbc_gm20b.h" #include "hal/gr/zcull/zcull_gm20b.h" @@ -606,7 +608,9 @@ static const struct gpu_ops gm20b_ops = { .init_pbdma_map = gk20a_fifo_init_pbdma_map, .is_preempt_pending = gk20a_fifo_is_preempt_pending, .reset_enable_hw = gk20a_init_fifo_reset_enable_hw, +#ifdef CONFIG_NVGPU_RECOVERY .recover = gk20a_fifo_recover, +#endif .intr_set_recover_mask = gk20a_fifo_intr_set_recover_mask, .intr_unset_recover_mask = gk20a_fifo_intr_unset_recover_mask, .setup_sw = nvgpu_fifo_setup_sw, diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c index e1dfa2a0d..8050d8894 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c @@ -96,7 +96,9 @@ #include "hal/fifo/mmu_fault_gm20b.h" #include "hal/fifo/mmu_fault_gp10b.h" #include "hal/fifo/ctxsw_timeout_gk20a.h" +#ifdef CONFIG_NVGPU_RECOVERY #include "hal/rc/rc_gk20a.h" +#endif #include "hal/gr/ecc/ecc_gp10b.h" #include "hal/gr/fecs_trace/fecs_trace_gm20b.h" #include "hal/gr/config/gr_config_gm20b.h" @@ -675,7 +677,9 @@ static const struct gpu_ops gp10b_ops = { .init_pbdma_map = gk20a_fifo_init_pbdma_map, .is_preempt_pending = gk20a_fifo_is_preempt_pending, .reset_enable_hw = gk20a_init_fifo_reset_enable_hw, +#ifdef CONFIG_NVGPU_RECOVERY .recover = gk20a_fifo_recover, +#endif .intr_set_recover_mask = gk20a_fifo_intr_set_recover_mask, .intr_unset_recover_mask = gk20a_fifo_intr_unset_recover_mask, .setup_sw = nvgpu_fifo_setup_sw, diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index 67d577b10..e24010376 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -71,7 +71,9 @@ #include "hal/fuse/fuse_gp10b.h" #include "hal/ptimer/ptimer_gk20a.h" #include "hal/regops/regops_gv11b.h" +#ifdef CONFIG_NVGPU_RECOVERY #include "hal/rc/rc_gv11b.h" +#endif #include "hal/fifo/fifo_gk20a.h" #include "hal/fifo/fifo_gv11b.h" #include "hal/fifo/pbdma_gm20b.h" @@ -833,7 +835,9 @@ static const struct gpu_ops gv11b_ops = { .init_pbdma_map = gk20a_fifo_init_pbdma_map, .is_preempt_pending = gv11b_fifo_is_preempt_pending, .reset_enable_hw = gv11b_init_fifo_reset_enable_hw, +#ifdef CONFIG_NVGPU_RECOVERY .recover = gv11b_fifo_recover, +#endif .intr_set_recover_mask = gv11b_fifo_intr_set_recover_mask, .intr_unset_recover_mask = gv11b_fifo_intr_unset_recover_mask, .setup_sw = nvgpu_fifo_setup_sw, diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 62b299a8f..bb77d4449 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -71,7 +71,9 @@ #include "hal/fuse/fuse_gm20b.h" #include "hal/fuse/fuse_gp10b.h" #include "hal/fuse/fuse_gp106.h" +#ifdef CONFIG_NVGPU_RECOVERY #include "hal/rc/rc_gv11b.h" +#endif #include "hal/fifo/fifo_gk20a.h" #include "hal/fifo/fifo_gv11b.h" #include "hal/fifo/fifo_tu104.h" @@ -863,7 +865,9 @@ static const struct gpu_ops tu104_ops = { .init_pbdma_map = gk20a_fifo_init_pbdma_map, .is_preempt_pending = gv11b_fifo_is_preempt_pending, .reset_enable_hw = gv11b_init_fifo_reset_enable_hw, +#ifdef CONFIG_NVGPU_RECOVERY .recover = gv11b_fifo_recover, +#endif .intr_set_recover_mask = gv11b_fifo_intr_set_recover_mask, .intr_unset_recover_mask = gv11b_fifo_intr_unset_recover_mask, .setup_sw = nvgpu_fifo_setup_sw, diff --git a/drivers/gpu/nvgpu/hal/rc/rc_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c similarity index 100% rename from drivers/gpu/nvgpu/hal/rc/rc_gv11b_fusa.c rename to drivers/gpu/nvgpu/hal/rc/rc_gv11b.c diff --git a/drivers/gpu/nvgpu/include/nvgpu/rc.h b/drivers/gpu/nvgpu/include/nvgpu/rc.h index df7236444..3402b71ef 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/rc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/rc.h @@ -43,6 +43,7 @@ struct nvgpu_tsg; struct nvgpu_channel; struct nvgpu_pbdma_status_info; +#ifdef CONFIG_NVGPU_RECOVERY void nvgpu_rc_ctxsw_timeout(struct gk20a *g, u32 eng_bitmask, struct nvgpu_tsg *tsg, bool debug_dump); @@ -64,4 +65,46 @@ void nvgpu_rc_fifo_recover(struct gk20a *g, u32 hw_id, /* if ~0, will be queried from HW */ bool id_is_tsg, /* ignored if hw_id == ~0 */ bool id_is_known, bool debug_dump, u32 rc_type); +#else +static inline void nvgpu_rc_ctxsw_timeout(struct gk20a *g, u32 eng_bitmask, + struct nvgpu_tsg *tsg, bool debug_dump) +{ +} + +static inline void nvgpu_rc_pbdma_fault(struct gk20a *g, struct nvgpu_fifo *f, + u32 pbdma_id, u32 error_notifier) +{ +} + +static inline void nvgpu_rc_runlist_update(struct gk20a *g, u32 runlist_id) +{ +} + +static inline void nvgpu_rc_preempt_timeout(struct gk20a *g, struct nvgpu_tsg *tsg) +{ +} + +static inline void nvgpu_rc_gr_fault(struct gk20a *g, + struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) +{ +} + +static inline void nvgpu_rc_sched_error_bad_tsg(struct gk20a *g) +{ +} + +static inline void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct nvgpu_tsg *tsg, + bool debug_dump, u32 rc_type) +{ +} + +static inline void nvgpu_rc_fifo_recover(struct gk20a *g, + u32 eng_bitmask, /* if zero, will be queried from HW */ + u32 hw_id, /* if ~0, will be queried from HW */ + bool id_is_tsg, /* ignored if hw_id == ~0 */ + bool id_is_known, bool debug_dump, u32 rc_type) +{ +} + +#endif #endif /* NVGPU_RC_H */