diff --git a/drivers/gpu/nvgpu/common/linux/debug.c b/drivers/gpu/nvgpu/common/linux/debug.c index 5750800f5..08d0e679d 100644 --- a/drivers/gpu/nvgpu/common/linux/debug.c +++ b/drivers/gpu/nvgpu/common/linux/debug.c @@ -409,5 +409,5 @@ void gk20a_debug_deinit(struct gk20a *g) gk20a_fifo_debugfs_deinit(g); debugfs_remove_recursive(l->debugfs); - debugfs_remove_recursive(l->debugfs_alias); + debugfs_remove(l->debugfs_alias); } diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.c b/drivers/gpu/nvgpu/common/linux/debug_allocator.c index 91ae05123..d63a90307 100644 --- a/drivers/gpu/nvgpu/common/linux/debug_allocator.c +++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.c @@ -55,8 +55,6 @@ void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a) void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a) { - if (!IS_ERR_OR_NULL(a->debugfs_entry)) - debugfs_remove(a->debugfs_entry); } void nvgpu_alloc_debugfs_init(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/common/linux/debug_clk.c b/drivers/gpu/nvgpu/common/linux/debug_clk.c index b265ca690..81839de7d 100644 --- a/drivers/gpu/nvgpu/common/linux/debug_clk.c +++ b/drivers/gpu/nvgpu/common/linux/debug_clk.c @@ -267,6 +267,5 @@ int gm20b_clk_init_debugfs(struct gk20a *g) err_out: pr_err("%s: Failed to make debugfs node\n", __func__); - debugfs_remove_recursive(l->debugfs); return -ENOMEM; } diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c index 191fcb0ee..ec997e280 100644 --- a/drivers/gpu/nvgpu/common/linux/debug_pmu.c +++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.c @@ -477,6 +477,5 @@ int gk20a_pmu_debugfs_init(struct gk20a *g) return 0; err_out: pr_err("%s: Failed to make debugfs node\n", __func__); - debugfs_remove_recursive(l->debugfs); return -ENOMEM; } diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 46b89ad0d..c474f36a2 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c @@ -226,9 +226,12 @@ static int gk20a_pm_prepare_poweroff(struct device *dev) * After this point, gk20a interrupts should not get * serviced. */ - disable_irq(g->irq_stall); - if (g->irq_stall != g->irq_nonstall) - disable_irq(g->irq_nonstall); + if (g->irqs_enabled) { + disable_irq(g->irq_stall); + if (g->irq_stall != g->irq_nonstall) + disable_irq(g->irq_nonstall); + g->irqs_enabled = 0; + } /* Decrement platform power refcount */ if (platform->idle) @@ -640,6 +643,18 @@ static int gk20a_pm_unrailgate(struct device *dev) return ret; } +/* + * Remove association of the driver with OS interrupt handler + */ +void nvgpu_free_irq(struct gk20a *g) +{ + struct device *dev = dev_from_gk20a(g); + + devm_free_irq(dev, g->irq_stall, g); + if (g->irq_stall != g->irq_nonstall) + devm_free_irq(dev, g->irq_nonstall, g); +} + /* * Idle the GPU in preparation of shutdown/remove. * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW @@ -651,24 +666,27 @@ int nvgpu_quiesce(struct gk20a *g) int err; struct device *dev = dev_from_gk20a(g); - err = gk20a_wait_for_idle(g); - if (err) { - nvgpu_err(g, "failed to idle GPU, err=%d", err); - return err; - } + if (g->power_on) { + err = gk20a_wait_for_idle(g); + if (err) { + nvgpu_err(g, "failed to idle GPU, err=%d", err); + return err; + } - err = gk20a_fifo_disable_all_engine_activity(g, true); - if (err) { - nvgpu_err(g, "failed to disable engine activity, err=%d", - err); + err = gk20a_fifo_disable_all_engine_activity(g, true); + if (err) { + nvgpu_err(g, + "failed to disable engine activity, err=%d", + err); return err; - } + } - err = gk20a_fifo_wait_engine_idle(g); - if (err) { - nvgpu_err(g, "failed to idle engines, err=%d", - err); - return err; + err = gk20a_fifo_wait_engine_idle(g); + if (err) { + nvgpu_err(g, "failed to idle engines, err=%d", + err); + return err; + } } if (gk20a_gpu_is_virtual(dev)) @@ -679,6 +697,7 @@ int nvgpu_quiesce(struct gk20a *g) if (err) nvgpu_err(g, "failed to prepare for poweroff, err=%d", err); + return err; } diff --git a/drivers/gpu/nvgpu/common/linux/module.h b/drivers/gpu/nvgpu/common/linux/module.h index def982884..55a3b6925 100644 --- a/drivers/gpu/nvgpu/common/linux/module.h +++ b/drivers/gpu/nvgpu/common/linux/module.h @@ -21,6 +21,7 @@ void gk20a_remove_support(struct gk20a *g); void gk20a_driver_start_unload(struct gk20a *g); int nvgpu_quiesce(struct gk20a *g); int nvgpu_remove(struct device *dev, struct class *class); +void nvgpu_free_irq(struct gk20a *g); extern struct class nvgpu_class; diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c index f1d123671..1a7d1842a 100644 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ b/drivers/gpu/nvgpu/common/linux/pci.c @@ -521,13 +521,12 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) if (gk20a_gpu_is_virtual(dev)) return; - /* only idle the GPU if the GPU is powered on */ - if (g->power_on) { - gk20a_driver_start_unload(g); - err = nvgpu_quiesce(g); - /* TODO: handle failure to idle */ - WARN(err, "gpu failed to idle during driver removal"); - } + gk20a_driver_start_unload(g); + err = nvgpu_quiesce(g); + /* TODO: handle failure to idle */ + WARN(err, "gpu failed to idle during driver removal"); + + nvgpu_free_irq(g); nvgpu_remove(dev, &nvgpu_pci_class); diff --git a/drivers/gpu/nvgpu/common/linux/thread.c b/drivers/gpu/nvgpu/common/linux/thread.c index fe3906eb3..92c556f21 100644 --- a/drivers/gpu/nvgpu/common/linux/thread.c +++ b/drivers/gpu/nvgpu/common/linux/thread.c @@ -46,8 +46,10 @@ int nvgpu_thread_create(struct nvgpu_thread *thread, void nvgpu_thread_stop(struct nvgpu_thread *thread) { - kthread_stop(thread->task); - thread->task = NULL; + if (thread->task) { + kthread_stop(thread->task); + thread->task = NULL; + } }; bool nvgpu_thread_should_stop(struct nvgpu_thread *thread) diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c index 077a1bf83..5fd8121dc 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c @@ -2223,7 +2223,8 @@ static void nvgpu_remove_pmu_support(struct nvgpu_pmu *pmu) if (nvgpu_alloc_initialized(&pmu->dmem)) nvgpu_alloc_destroy(&pmu->dmem); - nvgpu_release_firmware(g, pmu->fw); + if (pmu->fw) + nvgpu_release_firmware(g, pmu->fw); nvgpu_mutex_destroy(&pmu->elpg_mutex); nvgpu_mutex_destroy(&pmu->pg_mutex); diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c index ae9c9b1ff..7955e6c2d 100644 --- a/drivers/gpu/nvgpu/common/pramin.c +++ b/drivers/gpu/nvgpu/common/pramin.c @@ -16,6 +16,7 @@ #include #include +#include #include "gk20a/gk20a.h" @@ -88,6 +89,14 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem, void *sgl; u32 byteoff, start_reg, until_end, n; + /* + * TODO: Vidmem is not accesible through pramin on shutdown path. + * driver should be refactored to prevent this from happening, but for + * now it is ok just to ignore the writes + */ + if (!g->regs && nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) + return; + alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); sgt = &alloc->sgt; for (sgl = sgt->sgl; sgl; sgl = nvgpu_sgt_get_next(sgt, sgl)) { diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 0b8422a60..ea69d7cb5 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -465,21 +465,30 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) trace_gk20a_free_channel(ch->chid); - /* abort channel and remove from runlist */ - if (gk20a_is_channel_marked_as_tsg(ch)) { - err = g->ops.fifo.tsg_unbind_channel(ch); - if (err) - nvgpu_err(g, "failed to unbind channel %d from TSG", ch->chid); - /* - * Channel is not a part of TSG this point onwards - * So stash its status and use it whenever necessary - * e.g. while releasing gr_ctx in g->ops.gr.free_channel_ctx() - */ - was_tsg = true; - } else { - gk20a_disable_channel(ch); + /* + * Disable channel/TSG and unbind here. This should not be executed if + * HW access is not available during shutdown/removal path as it will + * trigger a timeout + */ + if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { + /* abort channel and remove from runlist */ + if (gk20a_is_channel_marked_as_tsg(ch)) { + err = g->ops.fifo.tsg_unbind_channel(ch); + if (err) + nvgpu_err(g, + "failed to unbind channel %d from TSG", + ch->chid); + /* + * Channel is not a part of TSG this point onwards + * So stash its status and use it whenever necessary + * e.g. while releasing gr_ctx in + * g->ops.gr.free_channel_ctx() + */ + was_tsg = true; + } else { + gk20a_disable_channel(ch); + } } - /* wait until there's only our ref to the channel */ if (!force) gk20a_wait_until_counter_is_N( diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index fea3b0fae..71cba9eca 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -67,6 +67,7 @@ struct gk20a_fecs_trace { struct nvgpu_mutex hash_lock; struct nvgpu_mutex poll_lock; struct nvgpu_thread poll_task; + bool init; }; #ifdef CONFIG_GK20A_CTXSW_TRACE @@ -547,23 +548,12 @@ static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) &gk20a_fecs_trace_debugfs_ring_fops); } -static void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - debugfs_remove_recursive(l->debugfs); -} - #else static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) { } -static inline void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g) -{ -} - #endif /* CONFIG_DEBUG_FS */ int gk20a_fecs_trace_init(struct gk20a *g) @@ -598,6 +588,9 @@ int gk20a_fecs_trace_init(struct gk20a *g) NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE; gk20a_fecs_trace_debugfs_init(g); + + trace->init = true; + return 0; clean_hash_lock: @@ -682,15 +675,17 @@ int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch) { u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); - gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, + if (g->fecs_trace) { + gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "ch=%p context_ptr=%x", ch, context_ptr); - if (g->ops.fecs_trace.is_enabled(g)) { - if (g->ops.fecs_trace.flush) - g->ops.fecs_trace.flush(g); - gk20a_fecs_trace_poll(g); + if (g->ops.fecs_trace.is_enabled(g)) { + if (g->ops.fecs_trace.flush) + g->ops.fecs_trace.flush(g); + gk20a_fecs_trace_poll(g); + } + gk20a_fecs_trace_hash_del(g, context_ptr); } - gk20a_fecs_trace_hash_del(g, context_ptr); return 0; } @@ -709,7 +704,9 @@ int gk20a_fecs_trace_deinit(struct gk20a *g) { struct gk20a_fecs_trace *trace = g->fecs_trace; - gk20a_fecs_trace_debugfs_cleanup(g); + if (!trace->init) + return 0; + nvgpu_thread_stop(&trace->poll_task); gk20a_fecs_trace_free_ring(g); gk20a_fecs_trace_free_hash_table(g);