gpu: nvgpu: clear vidmem buffers in worker

We clear buffers allocated in vidmem in buffer free path. But to clear buffers, we need to submit CE jobs and this could cause issues/races if free called from critical path Hence solve this by moving buffer clear/free to a worker gk20a_gmmu_free_attr_vid() will now just put mem_desc into a list and schedule a worker And worker thread will traverse the list and clear/free the allocations In struct gk20a_vidmem_buf, mem variable is statically allocated. But since we delay free of mem, convert this variable into a pointer and allocate it dynamically Since we delay free of vidmem memory, it is now possible to face OOM conditions during allocations. Hence while allocating block until we have sufficient memory available with an upper limit of 1S Jira DNVGPU-84 Change-Id: I7925590644afae50b6fc04c6e1e43bbaa1c220fd Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1201346 (cherry picked from commit b4dec4a30de2431369d677acca00e420f8e581a5) Reviewed-on: http://git-master/r/1210950 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2016-08-10 20:39:47 +05:30
parent f79639f618
commit 6a48f4b335
2 changed files with 109 additions and 25 deletions
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -53,6 +53,10 @@
 */
 #define GK20A_FORCE_PRAMIN_DEFAULT false

+#if defined(CONFIG_GK20A_VIDMEM)
+static void gk20a_vidmem_clear_mem_worker(struct work_struct *work);
+#endif
+
 int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem)
 {
 	void *cpu_va;
@@ -437,7 +441,7 @@ struct gk20a_dmabuf_priv {

 struct gk20a_vidmem_buf {
 	struct gk20a *g;
-	struct mem_desc mem;
+	struct mem_desc *mem;
 	struct dma_buf *dmabuf;
 	void *dmabuf_priv;
 	void (*dmabuf_priv_delete)(void *);
@@ -882,6 +886,10 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
 	mm->vidmem.bootstrap_base = bootstrap_base;
 	mm->vidmem.bootstrap_size = bootstrap_size;

+	INIT_WORK(&mm->vidmem_clear_mem_worker, gk20a_vidmem_clear_mem_worker);
+	INIT_LIST_HEAD(&mm->vidmem.clear_list_head);
+	mutex_init(&mm->vidmem.clear_list_mutex);
+
 	gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);

 #endif
@@ -1988,7 +1996,7 @@ static struct sg_table *gk20a_vidbuf_map_dma_buf(
 {
 	struct gk20a_vidmem_buf *buf = attach->dmabuf->priv;

-	return buf->mem.sgt;
+	return buf->mem->sgt;
 }

 static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach,
@@ -2006,7 +2014,7 @@ static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
 	if (buf->dmabuf_priv)
 		buf->dmabuf_priv_delete(buf->dmabuf_priv);

-	gk20a_gmmu_free(buf->g, &buf->mem);
+	gk20a_gmmu_free(buf->g, buf->mem);
 	kfree(buf);
 }

@@ -2065,12 +2073,12 @@ static struct dma_buf *gk20a_vidbuf_export(struct gk20a_vidmem_buf *buf)

 	exp_info.priv = buf;
 	exp_info.ops = &gk20a_vidbuf_ops;
-	exp_info.size = buf->mem.size;
+	exp_info.size = buf->mem->size;
 	exp_info.flags = O_RDWR;

 	return dma_buf_export(&exp_info);
 #else
-	return dma_buf_export(buf, &gk20a_vidbuf_ops, buf->mem.size,
+	return dma_buf_export(buf, &gk20a_vidbuf_ops, buf->mem->size,
 			O_RDWR, NULL);
 #endif
 }
@@ -2112,10 +2120,14 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
 		}
 	}

-	err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem);
-	if (err)
+	buf->mem = kzalloc(sizeof(struct mem_desc), GFP_KERNEL);
+	if (!buf->mem)
 		goto err_kfree;

+	err = gk20a_gmmu_alloc_vid(g, bytes, buf->mem);
+	if (err)
+		goto err_memfree;
+
 	buf->dmabuf = gk20a_vidbuf_export(buf);
 	if (IS_ERR(buf->dmabuf)) {
 		err = PTR_ERR(buf->dmabuf);
@@ -2135,7 +2147,9 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
 	return fd;

 err_bfree:
-	gk20a_gmmu_free(g, &buf->mem);
+	gk20a_gmmu_free(g, buf->mem);
+err_memfree:
+	kfree(buf->mem);
 err_kfree:
 	kfree(buf);
 	return err;
@@ -2831,7 +2845,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct mem_desc *mem)
 		return -EINVAL;

 	alloc = (struct gk20a_page_alloc *)
-			g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
+			sg_dma_address(mem->sgt->sgl);

 	list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
 		if (gk20a_last_fence)
@@ -2882,12 +2896,28 @@ int gk20a_gmmu_alloc_attr_vid(struct gk20a *g, enum dma_attr attr,
 	return gk20a_gmmu_alloc_attr_vid_at(g, attr, size, mem, 0);
 }

+#if defined(CONFIG_GK20A_VIDMEM)
+static u64 __gk20a_gmmu_alloc(struct gk20a_allocator *allocator, dma_addr_t at,
+				size_t size)
+{
+	u64 addr = 0;
+
+	if (at)
+		addr = gk20a_alloc_fixed(allocator, at, size);
+	else
+		addr = gk20a_alloc(allocator, size);
+
+	return addr;
+}
+#endif
+
 int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
 		size_t size, struct mem_desc *mem, dma_addr_t at)
 {
 #if defined(CONFIG_GK20A_VIDMEM)
 	u64 addr;
 	int err;
+	unsigned long end_jiffies = jiffies + msecs_to_jiffies(1000);
 	struct gk20a_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
 		&g->mm.vidmem.allocator :
 		&g->mm.vidmem.bootstrap_allocator;
@@ -2901,19 +2931,21 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
 	 * are not done anyway */
 	WARN_ON(attr != 0 && attr != DMA_ATTR_NO_KERNEL_MAPPING);

-	if (at) {
-		addr = gk20a_alloc_fixed(vidmem_alloc, at, size);
-		if (!addr)
-			return -ENOMEM;
+	do {
+		addr = __gk20a_gmmu_alloc(vidmem_alloc, at, size);
+		if (!addr) /* Possible OOM */
+			usleep_range(100, 300);
+		else
+			break;
+	} while (time_before(jiffies, end_jiffies));

+	if (!addr)
+		return -ENOMEM;
+
+	if (at)
 		mem->fixed = true;
-	} else {
-		addr = gk20a_alloc(vidmem_alloc, size);
-		if (!addr)
-			return -ENOMEM;
-
+	else
 		mem->fixed = false;
-	}

 	mem->sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
 	if (!mem->sgt) {
@@ -2931,6 +2963,8 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
 	mem->size = size;
 	mem->aperture = APERTURE_VIDMEM;

+	INIT_LIST_HEAD(&mem->clear_list_entry);
+
 	gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);

 	return 0;
@@ -2949,11 +2983,18 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr,
 			  struct mem_desc *mem)
 {
 #if defined(CONFIG_GK20A_VIDMEM)
-	gk20a_gmmu_clear_vidmem_mem(g, mem);
-	gk20a_free(&g->mm.vidmem.allocator, sg_dma_address(mem->sgt->sgl));
-	gk20a_free_sgtable(&mem->sgt);
-	mem->size = 0;
-	mem->aperture = APERTURE_INVALID;
+	bool was_empty;
+
+	mutex_lock(&g->mm.vidmem.clear_list_mutex);
+	was_empty = list_empty(&g->mm.vidmem.clear_list_head);
+	list_add_tail(&mem->clear_list_entry,
+		      &g->mm.vidmem.clear_list_head);
+	mutex_unlock(&g->mm.vidmem.clear_list_mutex);
+
+	if (was_empty) {
+		cancel_work_sync(&g->mm.vidmem_clear_mem_worker);
+		schedule_work(&g->mm.vidmem_clear_mem_worker);
+	}
 #endif
 }

@@ -2975,6 +3016,42 @@ void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem)
 	return gk20a_gmmu_free_attr(g, 0, mem);
 }

+#if defined(CONFIG_GK20A_VIDMEM)
+static struct mem_desc *get_pending_mem_desc(struct mm_gk20a *mm)
+{
+	struct mem_desc *mem = NULL;
+
+	mutex_lock(&mm->vidmem.clear_list_mutex);
+	mem = list_first_entry_or_null(&mm->vidmem.clear_list_head,
+			struct mem_desc, clear_list_entry);
+	if (mem)
+		list_del_init(&mem->clear_list_entry);
+	mutex_unlock(&mm->vidmem.clear_list_mutex);
+
+	return mem;
+}
+
+static void gk20a_vidmem_clear_mem_worker(struct work_struct *work)
+{
+	struct mm_gk20a *mm = container_of(work, struct mm_gk20a,
+					vidmem_clear_mem_worker);
+	struct gk20a *g = mm->g;
+	struct mem_desc *mem;
+
+	while ((mem = get_pending_mem_desc(mm)) != NULL) {
+		gk20a_gmmu_clear_vidmem_mem(g, mem);
+		gk20a_free(&g->mm.vidmem.allocator,
+			   sg_dma_address(mem->sgt->sgl));
+		gk20a_free_sgtable(&mem->sgt);
+
+		mem->size = 0;
+		mem->aperture = APERTURE_INVALID;
+
+		kfree(mem);
+	}
+}
+#endif
+
 u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture,
 		u32 sysmem_mask, u32 vidmem_mask)
 {
@@ -3544,7 +3621,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,

 		if (sgt) {
 			alloc = (struct gk20a_page_alloc *)
-				g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
+					sg_dma_address(sgt->sgl);

 			list_for_each_entry(chunk, &alloc->alloc_chunks,
 							list_entry) {
@@ -4918,6 +4995,8 @@ int gk20a_mm_suspend(struct gk20a *g)
 {
 	gk20a_dbg_fn("");

+	cancel_work_sync(&g->mm.vidmem_clear_mem_worker);
+
 	g->ops.mm.cbc_clean(g);
 	g->ops.mm.l2_flush(g, false);

--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -71,6 +71,7 @@ struct mem_desc {
 	size_t size;
 	u64 gpu_va;
 	bool fixed; /* vidmem only */
+	struct list_head clear_list_entry; /* vidmem only */
 };

 struct mem_desc_sub {
@@ -414,7 +415,11 @@ struct mm_gk20a {

 		u32 ce_ctx_id;
 		bool cleared;
+
+		struct list_head clear_list_head;
+		struct mutex clear_list_mutex;
 	} vidmem;
+	struct work_struct vidmem_clear_mem_worker;
 };

 int gk20a_mm_init(struct mm_gk20a *mm);