gpu: nvgpu: simplify job semaphore release in abort

Instead of looping all jobs and releasing their semaphores separately, do just one semaphore release. All the jobs are using the same sema index, and the final, maximum value of it is known. Move also this resetting into ch->sync->set_min_eq_max() to be consistent with syncpoints. Change-Id: I03601aae67db0a65750c8df6b43387c042d383bd Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1680362 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2018-03-22 15:47:43 +02:00
parent 9b70ad2493
commit d20a501dcb
3 changed files with 30 additions and 55 deletions
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -185,10 +185,6 @@ int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)

 void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 {
-	struct channel_gk20a_job *job, *n;
-	bool released_job_semaphore = false;
-	bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
-
 	/* synchronize with actual job cleanup */
 	nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);

@@ -200,48 +196,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 		ch->user_sync->set_safe_state(ch->user_sync);
 	nvgpu_mutex_release(&ch->sync_lock);

-	/* release all job semaphores (applies only to jobs that use
-	   semaphore synchronization) */
-	channel_gk20a_joblist_lock(ch);
-	if (pre_alloc_enabled) {
-		int tmp_get = ch->joblist.pre_alloc.get;
-		int put = ch->joblist.pre_alloc.put;
-
-		/*
-		 * ensure put is read before any subsequent reads.
-		 * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job()
-		 */
-		nvgpu_smp_rmb();
-
-		while (tmp_get != put) {
-			job = &ch->joblist.pre_alloc.jobs[tmp_get];
-			if (job->post_fence->semaphore) {
-				nvgpu_semaphore_reset(
-					job->post_fence->semaphore,
-					ch->hw_sema);
-				released_job_semaphore = true;
-			}
-			tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
-		}
-	} else {
-		nvgpu_list_for_each_entry_safe(job, n,
-				&ch->joblist.dynamic.jobs,
-				channel_gk20a_job, list) {
-			if (job->post_fence->semaphore) {
-				nvgpu_semaphore_reset(
-					job->post_fence->semaphore,
-					ch->hw_sema);
-				released_job_semaphore = true;
-			}
-		}
-	}
-	channel_gk20a_joblist_unlock(ch);
-
 	nvgpu_mutex_release(&ch->joblist.cleanup_lock);

-	if (released_job_semaphore)
-		nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
-
 	/*
 	 * When closing the channel, this scheduled update holds one ref which
 	 * is waited for before advancing with freeing.
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -646,7 +646,18 @@ static int gk20a_channel_semaphore_incr_user(

 static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
 {
-	/* Nothing to do. */
+	struct gk20a_channel_semaphore *sp =
+		container_of(s, struct gk20a_channel_semaphore, ops);
+	struct channel_gk20a *c = sp->c;
+	bool updated;
+
+	if (!c->hw_sema)
+		return;
+
+	updated = nvgpu_semaphore_reset(c->hw_sema);
+
+	if (updated)
+		nvgpu_cond_broadcast_interruptible(&c->semaphore_wq);
 }

 static void gk20a_channel_semaphore_set_safe_state(struct gk20a_channel_sync *s)
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -276,28 +276,36 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
 }

 /*
- * Fast-forward the hw sema to the threshold represented by sema_thresh.
+ * Fast-forward the hw sema to its tracked max value.
+ *
+ * Return true if the sema wasn't at the max value and needed updating, false
+ * otherwise.
 */
-static inline void nvgpu_semaphore_reset(struct nvgpu_semaphore *sema_thresh,
-					     struct nvgpu_semaphore_int *hw_sema)
+static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
 {
-	u32 current_val;
-	u32 threshold = nvgpu_semaphore_get_value(sema_thresh);
-
-	current_val = nvgpu_semaphore_read(sema_thresh);
+	u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
+	u32 current_val = __nvgpu_semaphore_read(hw_sema);

 	/*
 	 * If the semaphore has already reached the value we would write then
-	 * this is really just a NO-OP.
+	 * this is really just a NO-OP. However, the sema value shouldn't be
+	 * more than what we expect to be the max.
 	 */
-	if (__nvgpu_semaphore_value_released(threshold, current_val))
-		return;
+
+	if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1,
+						     current_val)))
+		return false;
+
+	if (current_val == threshold)
+		return false;

 	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
 			hw_sema->location.offset, threshold);

 	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
 			hw_sema->ch->chid, current_val, threshold);
+
+	return true;
 }

 /*