mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-25 02:52:51 +03:00
gpu: nvgpu: Enter Quiesce if GPU drops off the bus
Currently, we reboot the entire system using kernel_restart() if the GPU registers become inaccessible due to GPU disappearing from the bus. GPU hitting high temperatures is one of the reasons we might end up in above scenario. Replace kernel_restart() with quiesce call as a more graceful way of notifying about GPU's unavailability. While entering quiesce state, make sure we do not trigger any register accesses which are bound to fail in this case. Bug 2919899 Change-Id: Ia9d413e04c7d205752414ff3e892f055c4363cce Signed-off-by: Tejal Kudav <tkudav@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2398801 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
Alex Waterman
parent
9963b94b4b
commit
71b005c1ef
@@ -61,8 +61,8 @@ bool is_nvgpu_gpu_state_valid(struct gk20a *g)
|
||||
void nvgpu_check_gpu_state(struct gk20a *g)
|
||||
{
|
||||
if (!is_nvgpu_gpu_state_valid(g)) {
|
||||
nvgpu_err(g, "Rebooting system!!");
|
||||
nvgpu_kernel_restart(NULL);
|
||||
nvgpu_err(g, "Entering SW Quiesce!!");
|
||||
nvgpu_sw_quiesce(g);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -201,9 +201,15 @@ void nvgpu_sw_quiesce(struct gk20a *g)
|
||||
g->sw_quiesce_pending = true;
|
||||
|
||||
nvgpu_cond_signal_interruptible(&g->sw_quiesce_cond);
|
||||
gk20a_mask_interrupts(g);
|
||||
nvgpu_start_gpu_idle(g);
|
||||
nvgpu_fifo_sw_quiesce(g);
|
||||
/*
|
||||
* Avoid register accesses when GPU had disappeared
|
||||
* from the bus.
|
||||
*/
|
||||
if (is_nvgpu_gpu_state_valid(g)) {
|
||||
gk20a_mask_interrupts(g);
|
||||
nvgpu_fifo_sw_quiesce(g);
|
||||
}
|
||||
}
|
||||
|
||||
/* init interface layer support for all falcons */
|
||||
|
||||
Reference in New Issue
Block a user