gpu: nvgpu: add mutex for runlist submit

We right now submit new runlist and wait for submit to complete in gk20a_fifo_update_runlist_locked() It is possible that multiple runlists are being updated in parallel by multiple threads since the lock taken by parent of gk20a_fifo_update_runlist_locked() is per-runlist Note that the concurrent threads would still construct their runlists into per-runlist buffer But we still have a race condition while submitting these runlists to hardware. With an application that creates and destroys multiple contexts in parallel this race condition gets realized and we see h/w reporting an error interrupt NV_PFIFO_INTR_SCHED_ERROR_CODE_BAD_TSG which means a bad TSG was submitted Fix this by adding a global lock for runlist submit and wait sequence This ensures that concurrent threads do not try to submit runlists to the hardware at the same time Bug 200452543 Bug 2405416 Change-Id: I2660a2e5d9af1da400e7f865361722dc0914f96f Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1851114 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2018-09-28 18:19:11 +05:30
parent bbf70e1ce9
commit e8001064ec
2 changed files with 11 additions and 0 deletions
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -600,6 +600,7 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
 	}

 	gk20a_fifo_delete_runlist(f);
+	nvgpu_mutex_destroy(&f->runlist_submit_mutex);

 	nvgpu_kfree(g, f->pbdma_map);
 	f->pbdma_map = NULL;
@@ -919,6 +920,12 @@ int gk20a_init_fifo_setup_sw_common(struct gk20a *g)
 		return err;
 	}

+	err = nvgpu_mutex_init(&f->runlist_submit_mutex);
+	if (err) {
+		nvgpu_err(g, "failed to init runlist_submit_mutex");
+		return err;
+	}
+
 	g->ops.fifo.init_pbdma_intr_descs(f); /* just filling in data/tables */

 	f->num_channels = g->ops.fifo.get_num_fifos(g);
@@ -3583,6 +3590,7 @@ int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 		runlist->count = 0;
 	}

+	nvgpu_mutex_acquire(&f->runlist_submit_mutex);
 	g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf);

 	if (wait_for_finish) {
@@ -3590,6 +3598,7 @@ int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,

 		if (ret == -ETIMEDOUT) {
 			nvgpu_err(g, "runlist %d update timeout", runlist_id);
+			nvgpu_mutex_release(&f->runlist_submit_mutex);
 			/* trigger runlist update timeout recovery */
 			return ret;

@@ -3597,6 +3606,7 @@ int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 			nvgpu_err(g, "runlist update interrupted");
 		}
 	}
+	nvgpu_mutex_release(&f->runlist_submit_mutex);

 	runlist->cur_buffer = new_buf;

--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -185,6 +185,7 @@ struct fifo_gk20a {
 	struct nvgpu_list_node free_chs;
 	struct nvgpu_mutex free_chs_mutex;
 	struct nvgpu_mutex gr_reset_mutex;
+	struct nvgpu_mutex runlist_submit_mutex;

 	struct tsg_gk20a *tsg;
 	struct nvgpu_mutex tsg_inuse_mutex;