From e8001064ec1b02e1ce687f47e93998a444e7700c Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Fri, 28 Sep 2018 18:19:11 +0530
Subject: [PATCH] gpu: nvgpu: add mutex for runlist submit

We right now submit new runlist and wait for submit to complete in
gk20a_fifo_update_runlist_locked()

It is possible that multiple runlists are being updated in parallel
by multiple threads since the lock taken by parent of
gk20a_fifo_update_runlist_locked() is per-runlist

Note that the concurrent threads would still construct their runlists
into per-runlist buffer
But we still have a race condition while submitting these runlists
to hardware.

With an application that creates and destroys multiple contexts in
parallel this race condition gets realized and we see h/w reporting
an error interrupt NV_PFIFO_INTR_SCHED_ERROR_CODE_BAD_TSG which means
a bad TSG was submitted

Fix this by adding a global lock for runlist submit and wait sequence
This ensures that concurrent threads do not try to submit runlists
to the hardware at the same time

Bug 200452543
Bug 2405416

Change-Id: I2660a2e5d9af1da400e7f865361722dc0914f96f
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1851114
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 10 ++++++++++
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.h |  1 +
 2 files changed, 11 insertions(+)

diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 3632963ac..03bebfa07 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -600,6 +600,7 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
 	}
 
 	gk20a_fifo_delete_runlist(f);
+	nvgpu_mutex_destroy(&f->runlist_submit_mutex);
 
 	nvgpu_kfree(g, f->pbdma_map);
 	f->pbdma_map = NULL;
@@ -919,6 +920,12 @@ int gk20a_init_fifo_setup_sw_common(struct gk20a *g)
 		return err;
 	}
 
+	err = nvgpu_mutex_init(&f->runlist_submit_mutex);
+	if (err) {
+		nvgpu_err(g, "failed to init runlist_submit_mutex");
+		return err;
+	}
+
 	g->ops.fifo.init_pbdma_intr_descs(f); /* just filling in data/tables */
 
 	f->num_channels = g->ops.fifo.get_num_fifos(g);
@@ -3583,6 +3590,7 @@ int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 		runlist->count = 0;
 	}
 
+	nvgpu_mutex_acquire(&f->runlist_submit_mutex);
 	g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf);
 
 	if (wait_for_finish) {
@@ -3590,6 +3598,7 @@ int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 
 		if (ret == -ETIMEDOUT) {
 			nvgpu_err(g, "runlist %d update timeout", runlist_id);
+			nvgpu_mutex_release(&f->runlist_submit_mutex);
 			/* trigger runlist update timeout recovery */
 			return ret;
 
@@ -3597,6 +3606,7 @@ int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 			nvgpu_err(g, "runlist update interrupted");
 		}
 	}
+	nvgpu_mutex_release(&f->runlist_submit_mutex);
 
 	runlist->cur_buffer = new_buf;
 
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 21922426c..60e8998a7 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -185,6 +185,7 @@ struct fifo_gk20a {
 	struct nvgpu_list_node free_chs;
 	struct nvgpu_mutex free_chs_mutex;
 	struct nvgpu_mutex gr_reset_mutex;
+	struct nvgpu_mutex runlist_submit_mutex;
 
 	struct tsg_gk20a *tsg;
 	struct nvgpu_mutex tsg_inuse_mutex;