gpu: nvgpu: implement sync refcounting

We currently free sync when we find job list empty If aggressive_sync is set to true, we try to free sync during channel unbind() call But we rarely free sync from channel_unbind() call since freeing it when job list is empty is aggressive enough Hence remove sync free code from channel_unbind() Implement refcounting for sync: - get a refcount while submitting a job (and allocate sync if it is not allocated already) - put a refcount while freeing the job - if refcount==0 and if aggressive_sync_destroy is set, free the sync - if aggressive_sync_destroy is not set, we will free the sync during channel close time Bug 200187553 Change-Id: I74e24adb15dc26a375ebca1fdd017b3ad6d57b61 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1120410 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2016-04-18 15:46:10 +05:30
parent 1c96bc6942
commit e0c9da1fe9
4 changed files with 23 additions and 45 deletions
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -370,7 +370,6 @@ static void channel_gk20a_bind(struct channel_gk20a *c)
 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
 {
 	struct gk20a *g = ch_gk20a->g;
-	struct gk20a_platform *platform = gk20a_get_platform(g->dev);

 	gk20a_dbg_fn("");

@@ -380,18 +379,6 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
 			ccsr_channel_inst_bind_false_f());

 	ch_gk20a->bound = false;
-
-	/*
-	 * if we are agrressive then we can destroy the syncpt
-	 * resource at this point
-	 * if not, then it will be destroyed at channel_free()
-	 */
-	mutex_lock(&ch_gk20a->sync_lock);
-	if (ch_gk20a->sync && platform->aggressive_sync_destroy) {
-		ch_gk20a->sync->destroy(ch_gk20a->sync);
-		ch_gk20a->sync = NULL;
-	}
-	mutex_unlock(&ch_gk20a->sync_lock);
 }

 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
@@ -954,7 +941,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
 	/* sync must be destroyed before releasing channel vm */
 	mutex_lock(&ch->sync_lock);
 	if (ch->sync) {
-		ch->sync->destroy(ch->sync);
+		gk20a_channel_sync_destroy(ch->sync);
 		ch->sync = NULL;
 	}
 	mutex_unlock(&ch->sync_lock);
@@ -1922,8 +1909,18 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)

 		gk20a_channel_timeout_stop(c);

-		if (c->sync)
+		mutex_lock(&c->sync_lock);
+		if (c->sync) {
 			c->sync->signal_timeline(c->sync);
+			if (atomic_dec_and_test(&c->sync->refcount) &&
+					platform->aggressive_sync_destroy) {
+				gk20a_channel_sync_destroy(c->sync);
+				c->sync = NULL;
+			}
+		} else {
+			WARN_ON(1);
+		}
+		mutex_unlock(&c->sync_lock);

 		if (job->num_mapped_buffers)
 			gk20a_vm_put_buffers(vm, job->mapped_buffers,
@@ -1950,23 +1947,6 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
 		gk20a_idle(g->dev);
 	}

-	/*
-	 * If job list is empty then channel is idle and we can free
-	 * the syncpt here (given aggressive_destroy flag is set)
-	 * Note: check if last submit is complete before destroying
-	 * the sync resource
-	 */
-	if (list_empty(&c->jobs)) {
-		mutex_lock(&c->sync_lock);
-		mutex_lock(&c->last_submit.fence_lock);
-		if (c->sync && platform->aggressive_sync_destroy &&
-			  gk20a_fence_is_expired(c->last_submit.post_fence)) {
-			c->sync->destroy(c->sync);
-			c->sync = NULL;
-		}
-		mutex_unlock(&c->last_submit.fence_lock);
-		mutex_unlock(&c->sync_lock);
-	}
 	mutex_unlock(&c->jobs_lock);
 	mutex_unlock(&c->submit_lock);

@@ -2121,6 +2101,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 		}
 		new_sync_created = true;
 	}
+	atomic_inc(&c->sync->refcount);
 	mutex_unlock(&c->sync_lock);

 	if (g->ops.fifo.resetup_ramfc && new_sync_created) {
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -356,6 +356,7 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)

 	nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);

+	atomic_set(&sp->ops.refcount, 0);
 	sp->ops.wait_syncpt		= gk20a_channel_syncpt_wait_syncpt;
 	sp->ops.wait_fd			= gk20a_channel_syncpt_wait_fd;
 	sp->ops.incr			= gk20a_channel_syncpt_incr;
@@ -711,6 +712,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
 	if (!sema->timeline)
 		goto clean_up;
 #endif
+	atomic_set(&sema->ops.refcount, 0);
 	sema->ops.wait_syncpt	= gk20a_channel_semaphore_wait_syncpt;
 	sema->ops.wait_fd	= gk20a_channel_semaphore_wait_fd;
 	sema->ops.incr		= gk20a_channel_semaphore_incr;
@@ -727,6 +729,11 @@ clean_up:
 	return NULL;
 }

+void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync)
+{
+	sync->destroy(sync);
+}
+
 struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
 {
 #ifdef CONFIG_TEGRA_GK20A
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -27,6 +27,8 @@ struct gk20a_semaphore;
 struct gk20a_fence;

 struct gk20a_channel_sync {
+	atomic_t refcount;
+
 	/* Generate a gpu wait cmdbuf from syncpoint.
 	 * Returns
 	 *  - a gpu cmdbuf that performs the wait when executed,
@@ -91,13 +93,11 @@ struct gk20a_channel_sync {
 	/* Returns the sync point id or negative number if no syncpt*/
 	int (*syncpt_id)(struct gk20a_channel_sync *s);

-	/* flag to set sync destroy aggressiveness */
-	bool aggressive_destroy;
-
 	/* Free the resources allocated by gk20a_channel_sync_create. */
 	void (*destroy)(struct gk20a_channel_sync *s);
 };

+void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
 struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);

 #endif
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -59,16 +59,6 @@ static void vgpu_channel_unbind(struct channel_gk20a *ch)
 	}

 	ch->bound = false;
-
-	/*
-	 * if we are agrressive then we can destroy the syncpt
-	 * resource at this point
-	 * if not, then it will be destroyed at channel_free()
-	 */
-	if (ch->sync && platform->aggressive_sync_destroy) {
-		ch->sync->destroy(ch->sync);
-		ch->sync = NULL;
-	}
 }

 static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)