diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index 14dbf7836..f9b3c7eb9 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c @@ -681,6 +681,12 @@ void gk20a_ce_delete_context(struct device *dev, u32 ce_ctx_id) { struct gk20a *g = gk20a_from_dev(dev); + gk20a_ce_delete_context_priv(g, ce_ctx_id); +} + +void gk20a_ce_delete_context_priv(struct gk20a *g, + u32 ce_ctx_id) +{ struct gk20a_ce_app *ce_app = &g->ce_app; struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h index 5cdd233e6..7ecf130f8 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h @@ -144,9 +144,12 @@ int gk20a_ce_execute_ops(struct device *dev, struct gk20a_fence *gk20a_fence_in, u32 submit_flags, struct gk20a_fence **gk20a_fence_out); +void gk20a_ce_delete_context_priv(struct gk20a *g, + u32 ce_ctx_id); void gk20a_ce_delete_context(struct device *dev, u32 ce_ctx_id); + #ifdef CONFIG_DEBUG_FS /* CE app debugfs api */ void gk20a_ce_debugfs_init(struct device *dev); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index d29228c19..c95e83584 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -123,7 +123,8 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *ch) { - struct gk20a_platform *platform = gk20a_get_platform(f->g->dev); + struct gk20a_platform *platform; + struct gk20a *g = f->g; trace_gk20a_release_used_channel(ch->hw_chid); /* refcount is zero here and channel is in a freed/dead state */ @@ -133,10 +134,18 @@ static void free_channel(struct fifo_gk20a *f, f->used_channels--; nvgpu_mutex_release(&f->free_chs_mutex); - if (platform->aggressive_sync_destroy_thresh && + /* + * On teardown it is not possible to dereference platform, but ignoring + * this is fine then because no new channels would be created. + */ + if (!g->driver_is_dying) { + platform = gk20a_get_platform(g->dev); + + if (platform->aggressive_sync_destroy_thresh && (f->used_channels < platform->aggressive_sync_destroy_thresh)) - platform->aggressive_sync_destroy = false; + platform->aggressive_sync_destroy = false; + } } int channel_gk20a_commit_va(struct channel_gk20a *c) @@ -3022,7 +3031,12 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, bool need_deferred_cleanup = false; struct nvgpu_gpfifo __user *user_gpfifo = args ? (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL; - struct gk20a_platform *platform = gk20a_get_platform(d); + struct gk20a_platform *platform; + + if (g->driver_is_dying) + return -ENODEV; + + platform = gk20a_get_platform(d); if (c->has_timedout) return -ETIMEDOUT; diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 37077c7d6..afeb37a48 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -1343,8 +1343,7 @@ static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g, long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct gk20a_ctrl_priv *priv = filp->private_data; - struct device *dev = priv->dev; - struct gk20a *g = get_gk20a(dev); + struct gk20a *g = priv->g; struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args; struct nvgpu_gpu_zcull_get_info_args *get_info_args; struct nvgpu_gpu_zbc_set_table_args *set_table_args; diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 507abb47d..1b96d73de 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -654,7 +654,7 @@ static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_timeout_args *args) { int err; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; gk20a_dbg_fn("powergate mode = %d", args->enable); @@ -669,7 +669,7 @@ static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_timeout_args *args) { int status; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; nvgpu_mutex_acquire(&g->dbg_sessions_lock); status = g->timeouts_enabled; @@ -700,7 +700,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args) { - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct gr_gk20a *gr = &g->gr; struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state; u32 sm_id; @@ -739,7 +739,7 @@ static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args) { - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct gr_gk20a *gr = &g->gr; u32 sm_id; struct channel_gk20a *ch; @@ -770,7 +770,7 @@ static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_write_single_sm_error_state_args *args) { - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct gr_gk20a *gr = &g->gr; u32 sm_id; struct channel_gk20a *ch; @@ -941,7 +941,7 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct dbg_session_gk20a *dbg_s = filp->private_data; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE]; int err = 0; @@ -1130,7 +1130,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, bool is_pg_disabled = false; struct device *dev = dbg_s->dev; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct channel_gk20a *ch; gk20a_dbg_fn("%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops); @@ -1246,7 +1246,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, u32 powermode) { int err = 0; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; /* This function must be called with g->dbg_sessions_lock held */ @@ -1349,7 +1349,7 @@ static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_powergate_args *args) { int err; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; gk20a_dbg_fn("%s powergate mode = %d", dev_name(dbg_s->dev), args->mode); @@ -1363,7 +1363,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args) { int err; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct channel_gk20a *ch_gk20a; gk20a_dbg_fn("%s smpc ctxsw mode = %d", @@ -1405,7 +1405,7 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args) { int err; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct channel_gk20a *ch_gk20a; gk20a_dbg_fn("%s pm ctxsw mode = %d", @@ -1457,7 +1457,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args) { - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct channel_gk20a *ch; int err = 0, action = args->mode; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 03b4f1a93..861eda5d3 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -697,26 +697,14 @@ static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id) return g->ops.mc.isr_thread_stall(g); } -void gk20a_remove_support(struct device *dev) +void gk20a_remove_support(struct gk20a *g) { - struct gk20a *g = get_gk20a(dev); - #ifdef CONFIG_TEGRA_COMMON tegra_unregister_idle_unidle(); #endif if (g->dbg_regops_tmp_buf) kfree(g->dbg_regops_tmp_buf); - nvgpu_wait_for_deferred_interrupts(g); - - gk20a_channel_cancel_pending_sema_waits(g); - - if (g->nonstall_work_queue) { - cancel_work_sync(&g->nonstall_fn_work); - destroy_workqueue(g->nonstall_work_queue); - g->nonstall_work_queue = NULL; - } - if (g->pmu.remove_support) g->pmu.remove_support(&g->pmu); @@ -1636,6 +1624,11 @@ static int gk20a_probe(struct platform_device *dev) if (gk20a->irq_stall != gk20a->irq_nonstall) disable_irq(gk20a->irq_nonstall); + /* + * is_fmodel needs to be in gk20a struct for deferred teardown + */ + gk20a->is_fmodel = platform->is_fmodel; + err = gk20a_init_support(dev); if (err) return err; @@ -1682,11 +1675,6 @@ static int __exit gk20a_remove(struct platform_device *pdev) if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) gk20a_scale_exit(dev); - if (g->remove_support) - g->remove_support(dev); - - gk20a_ce_destroy(g); - #ifdef CONFIG_ARCH_TEGRA_18x_SOC nvgpu_clk_arb_cleanup_arbiter(g); #endif @@ -1775,7 +1763,21 @@ void gk20a_busy_noresume(struct device *dev) void gk20a_driver_start_unload(struct gk20a *g) { gk20a_dbg(gpu_dbg_shutdown, "Driver is now going down!\n"); + + down_write(&g->busy_lock); g->driver_is_dying = 1; + up_write(&g->busy_lock); + + gk20a_wait_for_idle(g->dev); + + nvgpu_wait_for_deferred_interrupts(g); + gk20a_channel_cancel_pending_sema_waits(g); + + if (g->nonstall_work_queue) { + cancel_work_sync(&g->nonstall_fn_work); + destroy_workqueue(g->nonstall_work_queue); + g->nonstall_work_queue = NULL; + } } int gk20a_wait_for_idle(struct device *dev) @@ -2283,6 +2285,12 @@ static void gk20a_free_cb(struct kref *refcount) struct gk20a, refcount); gk20a_dbg(gpu_dbg_shutdown, "Freeing GK20A struct!"); + + gk20a_ce_destroy(g); + + if (g->remove_support) + g->remove_support(g); + kfree(g); } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 0cf1f24f6..e969e53a5 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -862,6 +862,9 @@ struct gk20a { atomic_t nonstall_ops; struct work_struct nonstall_fn_work; struct workqueue_struct *nonstall_work_queue; + + bool is_fmodel; + struct kref refcount; struct resource *reg_mem; @@ -972,7 +975,7 @@ struct gk20a { bool global_profiler_reservation_held; int profiler_reservation_count; - void (*remove_support)(struct device *); + void (*remove_support)(struct gk20a *); u64 pg_ingating_time_us; u64 pg_ungating_time_us; @@ -1443,7 +1446,7 @@ extern struct class nvgpu_class; int gk20a_pm_init(struct device *dev); int gk20a_pm_finalize_poweron(struct device *dev); -void gk20a_remove_support(struct device *dev); +void gk20a_remove_support(struct gk20a *g); static inline struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch) { diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index fdb452988..98f19165a 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -808,7 +808,7 @@ static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm) struct gk20a *g = gk20a_from_mm(mm); if (mm->vidmem.ce_ctx_id != (u32)~0) - gk20a_ce_delete_context(g->dev, mm->vidmem.ce_ctx_id); + gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id); mm->vidmem.ce_ctx_id = (u32)~0; @@ -1237,11 +1237,10 @@ static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, u32 num_pages = 1 << order; u32 len = num_pages * PAGE_SIZE; int err; - struct gk20a_platform *platform = dev_get_drvdata(g->dev); gk20a_dbg_fn(""); - if (platform->is_fmodel) + if (g->is_fmodel) return alloc_gmmu_phys_pages(vm, order, entry); /* @@ -1267,7 +1266,6 @@ void free_gmmu_pages(struct vm_gk20a *vm, struct gk20a_mm_entry *entry) { struct gk20a *g = gk20a_from_vm(vm); - struct gk20a_platform *platform = dev_get_drvdata(g->dev); gk20a_dbg_fn(""); @@ -1277,7 +1275,7 @@ void free_gmmu_pages(struct vm_gk20a *vm, if (entry->woffset) /* fake shadow mem */ return; - if (platform->is_fmodel) { + if (g->is_fmodel) { free_gmmu_phys_pages(vm, entry); return; } @@ -1295,11 +1293,9 @@ void free_gmmu_pages(struct vm_gk20a *vm, int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) { - struct gk20a_platform *platform = dev_get_drvdata(g->dev); - gk20a_dbg_fn(""); - if (platform->is_fmodel) + if (g->is_fmodel) return map_gmmu_phys_pages(entry); if (IS_ENABLED(CONFIG_ARM64)) { @@ -1321,11 +1317,9 @@ int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) { - struct gk20a_platform *platform = dev_get_drvdata(g->dev); - gk20a_dbg_fn(""); - if (platform->is_fmodel) { + if (g->is_fmodel) { unmap_gmmu_phys_pages(entry); return; } @@ -4089,6 +4083,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) struct mapped_buffer_node *mapped_buffer; struct vm_reserved_va_node *va_node, *va_node_tmp; struct rb_node *node; + struct gk20a *g = vm->mm->g; gk20a_dbg_fn(""); @@ -4097,7 +4092,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) * pool involves unmapping a GMMU mapping which means aquiring the * update_gmmu_lock. */ - if (!gk20a_platform_has_syncpoints(gk20a_from_vm(vm)->dev)) { + if (!(g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)) { if (vm->sema_pool) { nvgpu_semaphore_pool_unmap(vm->sema_pool, vm); nvgpu_semaphore_pool_put(vm->sema_pool); @@ -4191,7 +4186,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) /* * Don't waste the memory on semaphores if we don't need them. */ - if (gk20a_platform_has_syncpoints(g->dev)) + if (g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS) return 0; if (vm->sema_pool) diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c index da3fb986a..33625526e 100644 --- a/drivers/gpu/nvgpu/pci.c +++ b/drivers/gpu/nvgpu/pci.c @@ -393,6 +393,11 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, } disable_irq(g->irq_stall); + /* + * is_fmodel needs to be in gk20a struct for deferred teardown + */ + g->is_fmodel = platform->is_fmodel; + err = nvgpu_pci_init_support(pdev); if (err) return err; @@ -430,7 +435,6 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) struct gk20a *g = get_gk20a(&pdev->dev); gk20a_dbg(gpu_dbg_shutdown, "Removing nvgpu driver!\n"); - gk20a_driver_start_unload(g); if (g->irqs_enabled) disable_irq(g->irq_stall); @@ -449,7 +453,7 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) * Wait for the driver to finish up all the IOCTLs it's working on * before cleaning up the driver's data structures. */ - gk20a_wait_for_idle(&pdev->dev); + gk20a_driver_start_unload(g); gk20a_dbg(gpu_dbg_shutdown, "Driver idle.\n"); #ifdef CONFIG_ARCH_TEGRA_18x_SOC @@ -459,9 +463,6 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) gk20a_user_deinit(g->dev, &nvgpu_pci_class); gk20a_dbg(gpu_dbg_shutdown, "User de-init done.\b"); - if (g->remove_support) - g->remove_support(g->dev); - debugfs_remove_recursive(platform->debugfs); debugfs_remove_recursive(platform->debugfs_alias); diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 44c55361c..db3082fb3 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -192,10 +192,9 @@ static int vgpu_intr_thread(void *dev_id) return 0; } -static void vgpu_remove_support(struct device *dev) +static void vgpu_remove_support(struct gk20a *g) { - struct gk20a *g = get_gk20a(dev); - struct vgpu_priv_data *priv = vgpu_get_priv_data_from_dev(dev); + struct vgpu_priv_data *priv = vgpu_get_priv_data_from_dev(g->dev); struct tegra_vgpu_intr_msg msg; int err; @@ -266,7 +265,7 @@ static int vgpu_init_support(struct platform_device *pdev) return 0; fail: - vgpu_remove_support(&pdev->dev); + vgpu_remove_support(g); return err; } @@ -581,6 +580,8 @@ int vgpu_probe(struct platform_device *pdev) platform->vgpu_priv = priv; gk20a->dev = dev; + gk20a->is_fmodel = platform->is_fmodel; + nvgpu_kmem_init(gk20a); err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class); @@ -663,7 +664,7 @@ int vgpu_remove(struct platform_device *pdev) vgpu_pm_qos_remove(dev); if (g->remove_support) - g->remove_support(dev); + g->remove_support(g); vgpu_comm_deinit(); gk20a_sched_ctrl_cleanup(g);