diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index 792aaa5eb..f9deb6f98 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -49,13 +49,21 @@ #include "gk20a/ce2_gk20a.h" -void __nvgpu_check_gpu_state(struct gk20a *g) +bool is_nvgpu_gpu_state_valid(struct gk20a *g) { u32 boot_0 = 0xffffffffU; boot_0 = nvgpu_mc_boot_0(g, NULL, NULL, NULL); if (boot_0 == 0xffffffffU) { nvgpu_err(g, "GPU has disappeared from bus!!"); + return false; + } + return true; +} + +void __nvgpu_check_gpu_state(struct gk20a *g) +{ + if (!is_nvgpu_gpu_state_valid(g)) { nvgpu_err(g, "Rebooting system!!"); nvgpu_kernel_restart(NULL); } diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 150897ae9..36ec4bcba 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -2284,6 +2284,8 @@ struct gk20a_cyclestate_buffer_elem { void __nvgpu_check_gpu_state(struct gk20a *g); void __gk20a_warn_on_no_regs(void); +bool is_nvgpu_gpu_state_valid(struct gk20a *g); + /* classes that the device supports */ /* TBD: get these from an open-sourced SDK? */ enum { diff --git a/drivers/gpu/nvgpu/os/linux/pci.c b/drivers/gpu/nvgpu/os/linux/pci.c index d7b8c46c4..af9da76f0 100644 --- a/drivers/gpu/nvgpu/os/linux/pci.c +++ b/drivers/gpu/nvgpu/os/linux/pci.c @@ -747,8 +747,11 @@ void nvgpu_pci_shutdown(struct pci_dev *pdev) if (gk20a_gpu_is_virtual(dev)) return; - err = nvgpu_nvlink_deinit(g); - WARN(err, "gpu failed to remove nvlink"); + if (is_nvgpu_gpu_state_valid(g)) { + err = nvgpu_nvlink_deinit(g); + WARN(err, "gpu failed to remove nvlink"); + } else + nvgpu_err(g, "skipped nvlink deinit"); nvgpu_info(g, "shut down complete"); }