diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index b62c4593f..9e9d10073 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c @@ -514,7 +514,6 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) { #if defined(CONFIG_GK20A_VIDMEM) - bool was_empty; size_t mem_size = mem->size; dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); @@ -523,18 +522,19 @@ static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { - nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); - was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head); - nvgpu_list_add_tail(&mem->clear_list_entry, - &g->mm.vidmem.clear_list_head); - atomic64_add(mem->aligned_size, - &g->mm.vidmem.bytes_pending.atomic_var); - nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); + int err = nvgpu_vidmem_clear_list_enqueue(g, mem); - if (was_empty) { - cancel_work_sync(&g->mm.vidmem.clear_mem_worker); - schedule_work(&g->mm.vidmem.clear_mem_worker); - } + /* + * If there's an error here then that means we can't clear the + * vidmem. That's too bad; however, we still own the nvgpu_mem + * buf so we have to free that. + * + * We don't need to worry about the vidmem allocator itself + * since when that gets cleaned up in the driver shutdown path + * all the outstanding allocs are force freed. + */ + if (err) + nvgpu_kfree(g, mem); } else { nvgpu_memset(g, mem, 0, 0, mem->aligned_size); nvgpu_free(mem->allocator, diff --git a/drivers/gpu/nvgpu/common/linux/vidmem.c b/drivers/gpu/nvgpu/common/linux/vidmem.c index ea8e552f7..92e7e5043 100644 --- a/drivers/gpu/nvgpu/common/linux/vidmem.c +++ b/drivers/gpu/nvgpu/common/linux/vidmem.c @@ -84,6 +84,8 @@ static void gk20a_vidbuf_release(struct dma_buf *dmabuf) nvgpu_kfree(g, linux_buf); nvgpu_vidmem_buf_free(g, buf); + + gk20a_put(g); } static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) @@ -160,13 +162,21 @@ struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf) int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) { - struct nvgpu_vidmem_buf *buf; + struct nvgpu_vidmem_buf *buf = NULL; struct nvgpu_vidmem_linux *priv; int err, fd; + /* + * This ref is released when the dma_buf is closed. + */ + if (!gk20a_get(g)) + return -ENODEV; + priv = nvgpu_kzalloc(g, sizeof(*priv)); - if (!priv) - return -ENOMEM; + if (!priv) { + err = -ENOMEM; + goto fail; + } buf = nvgpu_vidmem_user_alloc(g, bytes); if (!buf) { @@ -195,8 +205,10 @@ int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) return fd; fail: - nvgpu_kfree(g, priv); nvgpu_vidmem_buf_free(g, buf); + nvgpu_kfree(g, priv); + gk20a_put(g); + return err; } @@ -229,24 +241,9 @@ int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, return err; } -void nvgpu_vidmem_clear_mem_worker(struct work_struct *work) +void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem) { - struct mm_gk20a *mm = container_of(work, struct mm_gk20a, - vidmem.clear_mem_worker); - struct gk20a *g = mm->g; - struct nvgpu_mem *mem; - - while ((mem = nvgpu_vidmem_get_pending_alloc(mm)) != NULL) { - nvgpu_vidmem_clear(g, mem); - nvgpu_free(mem->allocator, - (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl)); - nvgpu_free_sgtable(g, &mem->priv.sgt); - - WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size, - &g->mm.vidmem.bytes_pending) < 0); - mem->size = 0; - mem->aperture = APERTURE_INVALID; - - nvgpu_kfree(g, mem); - } + nvgpu_free(vidmem->allocator, + (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl)); + nvgpu_free_sgtable(g, &vidmem->priv.sgt); } diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c index d1c5a2e80..60b819d78 100644 --- a/drivers/gpu/nvgpu/common/mm/vidmem.c +++ b/drivers/gpu/nvgpu/common/mm/vidmem.c @@ -22,15 +22,55 @@ #include +#include #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" +/* + * This is expected to be called from the shutdown path (or the error path in + * the vidmem init code). As such we do not expect new vidmem frees to be + * enqueued. + */ void nvgpu_vidmem_destroy(struct gk20a *g) { + struct nvgpu_timeout timeout; + + nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER); + + /* + * Ensure that the thread runs one last time to flush anything in the + * queue. + */ + nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond); + + /* + * Wait for at most 1 second before just continuing on. It doesn't make + * sense to hang the system over some potential memory leaks. + */ + do { + bool empty; + + nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); + empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head); + nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); + + if (empty) + break; + + nvgpu_msleep(10); + } while (!nvgpu_timeout_expired(&timeout)); + + /* + * Kill the vidmem clearing thread now. This will wake the thread up + * automatically and cause the wait_interruptible condition trigger. + */ + nvgpu_thread_stop(&g->mm.vidmem.clearing_thread); + if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) nvgpu_alloc_destroy(&g->mm.vidmem.allocator); } @@ -107,6 +147,139 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g) return 0; } +void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm) +{ + /* + * On the first increment of the pause_count (0 -> 1) take the pause + * lock and prevent the vidmem clearing thread from processing work + * items. + * + * Otherwise the increment is all that's needed - it's essentially a + * ref-count for the number of pause() calls. + * + * The sync component is implemented by waiting for the lock to be + * released by the clearing thread in case the thread is currently + * processing work items. + */ + if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1) + nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock); +} + +void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm) +{ + /* + * And on the last decrement (1 -> 0) release the pause lock and let + * the vidmem clearing thread continue. + */ + if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0) + nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock); +} + +int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem) +{ + struct mm_gk20a *mm = &g->mm; + + /* + * Crap. Can't enqueue new vidmem bufs! CE may be gone! + * + * However, an errant app can hold a vidmem dma_buf FD open past when + * the nvgpu driver has exited. Thus when the FD does get closed + * eventually the dma_buf release function will try to call the vidmem + * free function which will attempt to enqueue the vidmem into the + * vidmem clearing thread. + */ + if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) + return -ENOSYS; + + nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); + nvgpu_list_add_tail(&mem->clear_list_entry, + &mm->vidmem.clear_list_head); + nvgpu_atomic64_add(mem->aligned_size, &mm->vidmem.bytes_pending); + nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); + + nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond); + + return 0; +} + +static struct nvgpu_mem *nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a *mm) +{ + struct nvgpu_mem *mem = NULL; + + nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); + if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { + mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, + nvgpu_mem, clear_list_entry); + nvgpu_list_del(&mem->clear_list_entry); + } + nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); + + return mem; +} + +static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm) +{ + struct gk20a *g = mm->g; + struct nvgpu_mem *mem; + + while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) { + nvgpu_vidmem_clear(g, mem); + + WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size, + &g->mm.vidmem.bytes_pending) < 0); + mem->size = 0; + mem->aperture = APERTURE_INVALID; + + __nvgpu_mem_free_vidmem_alloc(g, mem); + nvgpu_kfree(g, mem); + } +} + +static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr) +{ + struct mm_gk20a *mm = mm_ptr; + + /* + * Simple thread who's sole job is to periodically clear userspace + * vidmem allocations that have been recently freed. + * + * Since it doesn't make sense to run unless there's pending work a + * condition field is used to wait for work. When the DMA API frees a + * userspace vidmem buf it enqueues it into the clear list and alerts us + * that we have some work to do. + */ + + while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) { + int ret; + + /* + * Wait for work but also make sure we should not be paused. + */ + ret = NVGPU_COND_WAIT_INTERRUPTIBLE( + &mm->vidmem.clearing_thread_cond, + nvgpu_thread_should_stop( + &mm->vidmem.clearing_thread) || + !nvgpu_list_empty(&mm->vidmem.clear_list_head), + 0); + if (ret == -ERESTARTSYS) + continue; + + /* + * Use this lock to implement a pause mechanism. By taking this + * lock some other code can prevent this thread from processing + * work items. + */ + if (!nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock)) + continue; + + nvgpu_vidmem_clear_pending_allocs(mm); + + nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock); + } + + return 0; +} + int nvgpu_vidmem_init(struct mm_gk20a *mm) { struct gk20a *g = mm->g; @@ -156,16 +329,39 @@ int nvgpu_vidmem_init(struct mm_gk20a *mm) mm->vidmem.bootstrap_base = bootstrap_base; mm->vidmem.bootstrap_size = bootstrap_size; - nvgpu_mutex_init(&mm->vidmem.first_clear_mutex); + err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond); + if (err) + goto fail; - INIT_WORK(&mm->vidmem.clear_mem_worker, nvgpu_vidmem_clear_mem_worker); nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0); nvgpu_init_list_node(&mm->vidmem.clear_list_head); nvgpu_mutex_init(&mm->vidmem.clear_list_mutex); + nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock); + nvgpu_atomic_set(&mm->vidmem.pause_count, 0); + + /* + * Start the thread off in the paused state. The thread doesn't have to + * be running for this to work. It will be woken up later on in + * finalize_poweron(). We won't necessarily have a CE context yet + * either, so hypothetically one could cause a race where we try to + * clear a vidmem struct before we have a CE context to do so. + */ + nvgpu_vidmem_thread_pause_sync(mm); + + err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm, + nvgpu_vidmem_clear_pending_allocs_thr, + "vidmem-clear"); + if (err) + goto fail; gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); return 0; + +fail: + nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond); + nvgpu_vidmem_destroy(g); + return err; } int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) @@ -244,21 +440,6 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem) return err; } -struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm) -{ - struct nvgpu_mem *mem = NULL; - - nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); - if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { - mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, - nvgpu_mem, clear_list_entry); - nvgpu_list_del(&mem->clear_list_entry); - } - nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); - - return mem; -} - static int nvgpu_vidmem_clear_all(struct gk20a *g) { int err; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index e1bf2b4b6..02baf6838 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -97,8 +98,6 @@ int gk20a_prepare_poweroff(struct gk20a *g) if (gk20a_fifo_is_engine_busy(g)) return -EBUSY; - gk20a_ce_suspend(g); - ret = gk20a_channel_suspend(g); if (ret) return ret; @@ -111,6 +110,8 @@ int gk20a_prepare_poweroff(struct gk20a *g) ret |= gk20a_mm_suspend(g); ret |= gk20a_fifo_suspend(g); + gk20a_ce_suspend(g); + /* Disable GPCPLL */ if (g->ops.clk.suspend_clk_support) ret |= g->ops.clk.suspend_clk_support(g); @@ -323,6 +324,8 @@ int gk20a_finalize_poweron(struct gk20a *g) } } + nvgpu_vidmem_thread_unpause(&g->mm); + #if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU) if (gk20a_platform_has_syncpoints(g) && g->syncpt_unit_size) { if (!nvgpu_mem_is_valid(&g->syncpt_mem)) { diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 687951a9b..67ab307f1 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -978,9 +978,7 @@ int gk20a_mm_suspend(struct gk20a *g) { gk20a_dbg_fn(""); -#if defined(CONFIG_GK20A_VIDMEM) - cancel_work_sync(&g->mm.vidmem.clear_mem_worker); -#endif + nvgpu_vidmem_thread_pause_sync(&g->mm); g->ops.mm.cbc_clean(g); g->ops.mm.l2_flush(g, false); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 556cb234e..13698cd7c 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -36,6 +36,8 @@ #include #include #include +#include +#include struct nvgpu_pd_cache; @@ -272,7 +274,11 @@ struct mm_gk20a { struct nvgpu_list_node clear_list_head; struct nvgpu_mutex clear_list_mutex; - struct work_struct clear_mem_worker; + struct nvgpu_cond clearing_thread_cond; + struct nvgpu_thread clearing_thread; + struct nvgpu_mutex clearing_thread_lock; + nvgpu_atomic_t pause_count; + nvgpu_atomic64_t bytes_pending; } vidmem; }; diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 537409a82..6feacff72 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h @@ -272,6 +272,16 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, struct nvgpu_mem *dest, struct nvgpu_mem *src, int start_page, int nr_pages); +/* + * Really free a vidmem buffer. There's a fair amount of work involved in + * freeing vidmem buffers in the DMA API. This handles none of that - it only + * frees the underlying vidmem specific structures used in vidmem buffers. + * + * This is implemented in the OS specific code. If it's not necessary it can + * be a noop. But the symbol must at least be present. + */ +void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem); + /* * Buffer accessors - wrap between begin() and end() if there is no permanent * kernel mapping for this buffer. diff --git a/drivers/gpu/nvgpu/include/nvgpu/vidmem.h b/drivers/gpu/nvgpu/include/nvgpu/vidmem.h index 9e9f83013..690f81640 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vidmem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vidmem.h @@ -73,17 +73,19 @@ struct nvgpu_vidmem_buf *nvgpu_vidmem_user_alloc(struct gk20a *g, size_t bytes); void nvgpu_vidmem_buf_free(struct gk20a *g, struct nvgpu_vidmem_buf *buf); +int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem); + bool nvgpu_addr_is_vidmem_page_alloc(u64 addr); int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space); -struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm); - void nvgpu_vidmem_destroy(struct gk20a *g); int nvgpu_vidmem_init(struct mm_gk20a *mm); -void nvgpu_vidmem_clear_mem_worker(struct work_struct *work); int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem); +void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm); +void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm); + #else /* !defined(CONFIG_GK20A_VIDMEM) */ /* @@ -110,11 +112,6 @@ static inline int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) return -ENOSYS; } -static inline struct nvgpu_mem *nvgpu_vidmem_get_pending_alloc(struct mm_gk20a *mm) -{ - return NULL; -} - static inline void nvgpu_vidmem_destroy(struct gk20a *g) { } @@ -135,6 +132,14 @@ static inline int nvgpu_vidmem_clear(struct gk20a *g, return -ENOSYS; } +static inline void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm) +{ +} + +static inline void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm) +{ +} + #endif /* !defined(CONFIG_GK20A_VIDMEM) */ #endif /* __NVGPU_VIDMEM_H__ */