From f1896e0a64e54152582a4e301eaf3d27c1832d6f Mon Sep 17 00:00:00 2001
From: Sagar Kamble <skamble@nvidia.com>
Date: Mon, 30 May 2022 11:17:17 +0530
Subject: [PATCH] gpu: nvgpu: acquire tsg ctx_init_lock when changing ctx state

GR context associated with channel is updated in various driver paths.
Sequence to do the same is disable the TSG, preempt the TSG, update
the GR context or instance block and then enable the TSG.
These operations and runlist updates for channel have to be done under
TSG specific ctx_init_lock to avoid the race.

suspend_contexts and resume_contexts needs special handling which is
not covered in this patch.

Bug 3677982

Change-Id: I837257fe9d9ef3eb6f69f5d7e0707e0bb6d4ea72
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2720222
Reviewed-by: Scott Long <scottl@nvidia.com>
Reviewed-by: Ankur Kishore <ankkishore@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/nvgpu/common/fifo/channel.c |  9 ++++++
 drivers/gpu/nvgpu/common/gr/gr_setup.c  | 38 ++++++++++++++++++-------
 drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c  |  5 ++++
 drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c  | 10 +++++++
 drivers/gpu/nvgpu/os/linux/ioctl_dbg.c  | 18 +++++++++---
 5 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index 8e46d70de..aea277aeb 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -248,6 +248,7 @@ static void channel_kernelmode_deinit(struct nvgpu_channel *ch)
 int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch)
 {
 	struct gk20a *g = ch->g;
+	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
 	struct nvgpu_channel_sync_syncpt *sync_syncpt;
 	u32 new_syncpt = 0U;
 	u32 old_syncpt = g->ops.ramfc.get_syncpt(ch);
@@ -268,9 +269,12 @@ int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch)
 	}
 
 	if ((new_syncpt != 0U) && (new_syncpt != old_syncpt)) {
+		nvgpu_mutex_acquire(&tsg->ctx_init_lock);
+
 		/* disable channel */
 		err = nvgpu_channel_disable_tsg(g, ch);
 		if (err != 0) {
+			nvgpu_mutex_release(&tsg->ctx_init_lock);
 			nvgpu_err(g, "failed to disable channel/TSG");
 			return err;
 		}
@@ -288,6 +292,8 @@ int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch)
 		if (err != 0) {
 			nvgpu_err(g, "failed to enable channel/TSG");
 		}
+
+		nvgpu_mutex_release(&tsg->ctx_init_lock);
 	}
 
 	nvgpu_log_fn(g, "done");
@@ -296,6 +302,9 @@ out:
 	if (nvgpu_channel_enable_tsg(g, ch) != 0) {
 		nvgpu_err(g, "failed to enable channel/TSG");
 	}
+
+	nvgpu_mutex_release(&tsg->ctx_init_lock);
+
 	return err;
 }
 #endif
diff --git a/drivers/gpu/nvgpu/common/gr/gr_setup.c b/drivers/gpu/nvgpu/common/gr/gr_setup.c
index ecb2e648f..470b6933f 100644
--- a/drivers/gpu/nvgpu/common/gr/gr_setup.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_setup.c
@@ -41,12 +41,16 @@
 static int nvgpu_gr_setup_zcull(struct gk20a *g, struct nvgpu_channel *c,
 				struct nvgpu_gr_ctx *gr_ctx)
 {
+	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(c);
 	int ret = 0;
 
 	nvgpu_log_fn(g, " ");
 
+	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
+
 	ret = nvgpu_channel_disable_tsg(g, c);
 	if (ret != 0) {
+		nvgpu_mutex_release(&tsg->ctx_init_lock);
 		nvgpu_err(g, "failed to disable channel/TSG");
 		return ret;
 	}
@@ -68,6 +72,8 @@ static int nvgpu_gr_setup_zcull(struct gk20a *g, struct nvgpu_channel *c,
 		nvgpu_err(g, "failed to re-enable channel/TSG");
 	}
 
+	nvgpu_mutex_release(&tsg->ctx_init_lock);
+
 	return ret;
 
 out:
@@ -81,6 +87,8 @@ out:
 		nvgpu_err(g, "failed to enable channel/TSG");
 	}
 
+	nvgpu_mutex_release(&tsg->ctx_init_lock);
+
 	return ret;
 }
 
@@ -185,25 +193,30 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 
 	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
 
+	g->ops.tsg.disable(tsg);
+
+	err = g->ops.fifo.preempt_tsg(g, tsg);
+	if (err != 0) {
+		nvgpu_err(g, "preempt failed %d", err);
+		goto enable_tsg;
+	}
+
 	err = nvgpu_tsg_validate_class_veid_pbdma(c);
 	if (err != 0) {
 		nvgpu_err(g, "Invalid class/veid/pbdma config");
-		nvgpu_mutex_release(&tsg->ctx_init_lock);
-		goto out;
+		goto enable_tsg;
 	}
 
 	err = nvgpu_tsg_subctx_alloc_gr_subctx(g, c);
 	if (err != 0) {
 		nvgpu_err(g, "failed to alloc gr subctx");
-		nvgpu_mutex_release(&tsg->ctx_init_lock);
-		goto out;
+		goto enable_tsg;
 	}
 
 	err = nvgpu_tsg_subctx_setup_subctx_header(g, c);
 	if (err != 0) {
 		nvgpu_err(g, "failed to setup subctx header");
-		nvgpu_mutex_release(&tsg->ctx_init_lock);
-		goto out;
+		goto enable_tsg;
 	}
 
 	gr_ctx = tsg->gr_ctx;
@@ -211,8 +224,7 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 	mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, c);
 	if (mappings == NULL) {
 		nvgpu_err(g, "fail to allocate/get ctx mappings struct");
-		nvgpu_mutex_release(&tsg->ctx_init_lock);
-		goto out;
+		goto enable_tsg;
 	}
 
 	err = nvgpu_gr_obj_ctx_alloc(g, gr->golden_image,
@@ -223,8 +235,7 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 	if (err != 0) {
 		nvgpu_err(g,
 			"failed to allocate gr ctx buffer");
-		nvgpu_mutex_release(&tsg->ctx_init_lock);
-		goto out;
+		goto enable_tsg;
 	}
 
 	nvgpu_gr_ctx_set_tsgid(gr_ctx, tsg->tsgid);
@@ -263,10 +274,17 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 
 	nvgpu_gr_ctx_mark_ctx_initialized(gr_ctx);
 
+	g->ops.tsg.enable(tsg);
+
 	nvgpu_mutex_release(&tsg->ctx_init_lock);
 
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
 	return 0;
+
+enable_tsg:
+	g->ops.tsg.enable(tsg);
+
+	nvgpu_mutex_release(&tsg->ctx_init_lock);
 out:
 	/* 1. gr_ctx, patch_ctx and global ctx buffer mapping
 	   can be reused so no need to release them.
diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
index fbbc6d968..87a3a17e5 100644
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
@@ -63,6 +63,8 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
 
 	nvgpu_log_fn(g, " ");
 
+	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
+
 	g->ops.tsg.disable(tsg);
 
 	ret = g->ops.fifo.preempt_tsg(g, tsg);
@@ -75,6 +77,9 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
 
 out:
 	g->ops.tsg.enable(tsg);
+
+	nvgpu_mutex_release(&tsg->ctx_init_lock);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c
index 2c1f40a5f..091e2aeee 100644
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c
@@ -766,11 +766,15 @@ int gr_gp10b_set_boosted_ctx(struct nvgpu_channel *ch,
 	}
 
 	gr_ctx = tsg->gr_ctx;
+
+	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
+
 	nvgpu_gr_ctx_set_boosted_ctx(gr_ctx, boost);
 	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX);
 
 	err = nvgpu_channel_disable_tsg(g, ch);
 	if (err != 0) {
+		nvgpu_mutex_release(&tsg->ctx_init_lock);
 		return err;
 	}
 
@@ -792,6 +796,9 @@ int gr_gp10b_set_boosted_ctx(struct nvgpu_channel *ch,
 	if (err != 0) {
 		nvgpu_err(g, "failed to enable channel/TSG");
 	}
+
+	nvgpu_mutex_release(&tsg->ctx_init_lock);
+
 	return err;
 
 out:
@@ -805,6 +812,9 @@ out:
 		/* ch might not be bound to tsg anymore */
 		nvgpu_err(g, "failed to enable channel/TSG");
 	}
+
+	nvgpu_mutex_release(&tsg->ctx_init_lock);
+
 	return err;
 }
 #endif
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index 5f1e400ad..28b5408b7 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1963,27 +1963,33 @@ static int nvgpu_dbg_gpu_ioctl_get_gr_context(struct dbg_session_gk20a *dbg_s,
 		return -EINVAL;
 	}
 
+	nvgpu_mutex_acquire(&tsg->ctx_init_lock);
+
 	ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_CTX);
 	if (ctx_mem == NULL || !nvgpu_mem_is_valid(ctx_mem)) {
 		nvgpu_err(g, "invalid context mem");
-		return -EINVAL;
+		err = -EINVAL;
+		goto out;
 	}
 
 	if (ctx_mem->size > (u64)UINT_MAX) {
 		nvgpu_err(ch->g, "ctx size is larger than expected");
-		return -EINVAL;
+		err = -EINVAL;
+		goto out;
 	}
 
 	/* Check if the input buffer size equals the gr context size */
 	size = (u32)ctx_mem->size;
 	if (args->size != size) {
 		nvgpu_err(g, "size mismatch: %d != %d", args->size, size);
-		return -EINVAL;
+		err = -EINVAL;
+		goto out;
 	}
 
 	if (nvgpu_channel_disable_tsg(g, ch) != 0) {
 		nvgpu_err(g, "failed to disable channel/TSG");
-		return -EINVAL;
+		err = -EINVAL;
+		goto out;
 	}
 
 	err = nvgpu_preempt_channel(g, ch);
@@ -1998,9 +2004,13 @@ done:
 	enable_err = nvgpu_channel_enable_tsg(g, ch);
 	if (enable_err != 0) {
 		nvgpu_err(g, "failed to re-enable channel/TSG");
+		nvgpu_mutex_release(&tsg->ctx_init_lock);
 		return (err != 0) ? err : enable_err;
 	}
 
+out:
+	nvgpu_mutex_release(&tsg->ctx_init_lock);
+
 	return err;
 }