From 9be8fb80a2db263acad3db59365b6d4861896542 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Konsta=20H=C3=B6ltt=C3=A4?= <kholtta@nvidia.com>
Date: Tue, 12 Oct 2021 16:08:48 +0300
Subject: [PATCH] gpu: nvgpu: make tsgs domain aware
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Start transitioning from an assumption of a single runlist buffer to the
domain based approach where a TSG is a participant of a scheduling
domain that then owns has a runlist buffer used for hardware scheduling.

Concretely, move the concept of a runlist domain up to the users of the
runlist code. Modifications to a runlist need to specify which domain is
modified.

There is still only the default domain that is created at boot.

Jira NVGPU-6425

Change-Id: Id9a29cff35c94e0d7e195db382d643e16025282d
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2621213
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/common/fifo/runlist.c       | 23 ++++++--
 drivers/gpu/nvgpu/common/fifo/tsg.c           |  8 ++-
 .../gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c | 57 ++++++++++---------
 .../gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h |  4 +-
 drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c        |  2 +-
 drivers/gpu/nvgpu/hal/rc/rc_gv11b.c           |  2 +-
 .../gpu/nvgpu/include/nvgpu/gops/runlist.h    |  2 +
 drivers/gpu/nvgpu/include/nvgpu/runlist.h     |  2 +
 drivers/gpu/nvgpu/include/nvgpu/tsg.h         |  5 ++
 9 files changed, 65 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/fifo/runlist.c b/drivers/gpu/nvgpu/common/fifo/runlist.c
index 9090da0b4..9088d46d3 100644
--- a/drivers/gpu/nvgpu/common/fifo/runlist.c
+++ b/drivers/gpu/nvgpu/common/fifo/runlist.c
@@ -402,13 +402,13 @@ NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
 }
 
 int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl,
+				struct nvgpu_runlist_domain *domain,
 				struct nvgpu_channel *ch, bool add,
 				bool wait_for_finish)
 {
 	int ret = 0;
 	bool add_entries;
 	struct nvgpu_runlist_mem *mem_tmp;
-	struct nvgpu_runlist_domain *domain = rl->domain;
 
 	if (ch != NULL) {
 		bool update = nvgpu_runlist_modify_active_locked(g, domain, ch, add);
@@ -524,6 +524,7 @@ int nvgpu_runlist_reschedule(struct nvgpu_channel *ch, bool preempt_next,
    (ch == NULL && !add) means remove all active channels from runlist.
    (ch == NULL &&  add) means restore all active channels on runlist. */
 static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl,
+				   struct nvgpu_runlist_domain *domain,
 				   struct nvgpu_channel *ch,
 				   bool add, bool wait_for_finish)
 {
@@ -540,7 +541,7 @@ static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl,
 	mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
 		PMU_MUTEX_ID_FIFO, &token);
 #endif
-	ret = nvgpu_runlist_update_locked(g, rl, ch, add, wait_for_finish);
+	ret = nvgpu_runlist_update_locked(g, rl, domain, ch, add, wait_for_finish);
 #ifdef CONFIG_NVGPU_LS_PMU
 	if (mutex_ret == 0) {
 		if (nvgpu_pmu_lock_release(g, g->pmu,
@@ -562,15 +563,23 @@ int nvgpu_runlist_update(struct gk20a *g, struct nvgpu_runlist *rl,
 			 struct nvgpu_channel *ch,
 			 bool add, bool wait_for_finish)
 {
+	struct nvgpu_tsg *tsg = NULL;
+
 	nvgpu_assert(ch != NULL);
 
-	return nvgpu_runlist_do_update(g, rl, ch, add, wait_for_finish);
+	tsg = nvgpu_tsg_from_ch(ch);
+	if (tsg == NULL) {
+		return -EINVAL;
+	}
+
+	return nvgpu_runlist_do_update(g, rl, tsg->rl_domain, ch, add, wait_for_finish);
 }
 
 int nvgpu_runlist_reload(struct gk20a *g, struct nvgpu_runlist *rl,
-			      bool add, bool wait_for_finish)
+			 struct nvgpu_runlist_domain *domain,
+			 bool add, bool wait_for_finish)
 {
-	return nvgpu_runlist_do_update(g, rl, NULL, add, wait_for_finish);
+	return nvgpu_runlist_do_update(g, rl, domain, NULL, add, wait_for_finish);
 }
 
 int nvgpu_runlist_reload_ids(struct gk20a *g, u32 runlist_ids, bool add)
@@ -589,7 +598,9 @@ int nvgpu_runlist_reload_ids(struct gk20a *g, u32 runlist_ids, bool add)
 	for_each_set_bit(runlist_id, &ulong_runlist_ids, 32U) {
 		/* Capture the last failure error code */
 		errcode = g->ops.runlist.reload(g,
-						f->runlists[runlist_id], add, true);
+						f->runlists[runlist_id],
+						f->runlists[runlist_id]->domain,
+						add, true);
 		if (errcode != 0) {
 			nvgpu_err(g,
 				"failed to update_runlist %lu %d",
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index fb51e2097..ff6c9694c 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -109,6 +109,8 @@ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
 	 */
 	if (tsg->runlist == NULL) {
 		tsg->runlist = ch->runlist;
+		tsg->rl_domain = nvgpu_rl_domain_get(g, tsg->runlist->id, "(default)");
+		WARN_ON(tsg->rl_domain == NULL);
 	} else {
 		if (tsg->runlist != ch->runlist) {
 			nvgpu_err(tsg->g,
@@ -693,7 +695,7 @@ int nvgpu_tsg_set_interleave(struct nvgpu_tsg *tsg, u32 level)
 		return 0;
 	}
 
-	return g->ops.runlist.reload(g, tsg->runlist, true, true);
+	return g->ops.runlist.reload(g, tsg->runlist, tsg->rl_domain, true, true);
 }
 
 int nvgpu_tsg_set_timeslice(struct nvgpu_tsg *tsg, u32 timeslice_us)
@@ -715,7 +717,7 @@ int nvgpu_tsg_set_timeslice(struct nvgpu_tsg *tsg, u32 timeslice_us)
 		return 0;
 	}
 
-	return g->ops.runlist.reload(g, tsg->runlist, true, true);
+	return g->ops.runlist.reload(g, tsg->runlist, tsg->rl_domain, true, true);
 }
 
 u32 nvgpu_tsg_get_timeslice(struct nvgpu_tsg *tsg)
@@ -742,7 +744,7 @@ int nvgpu_tsg_set_long_timeslice(struct nvgpu_tsg *tsg, u32 timeslice_us)
 		return 0;
 	}
 
-	return g->ops.runlist.reload(g, tsg->runlist, true, true);
+	return g->ops.runlist.reload(g, tsg->runlist, tsg->rl_domain, true, true);
 }
 #endif
 
diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c
index 3eacccc25..e68d3fb77 100644
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c
@@ -34,8 +34,9 @@
 #include "runlist_vgpu.h"
 #include "common/vgpu/ivc/comm_vgpu.h"
 
-static int vgpu_submit_runlist(struct gk20a *g, u64 handle, u8 runlist_id,
-			       u16 *runlist, u32 num_entries)
+static int vgpu_submit_runlist(struct gk20a *g, u64 handle,
+			       struct nvgpu_runlist *runlist,
+			       struct nvgpu_runlist_domain *domain)
 {
 	struct tegra_vgpu_cmd_msg msg;
 	struct tegra_vgpu_runlist_params *p;
@@ -51,7 +52,7 @@ static int vgpu_submit_runlist(struct gk20a *g, u64 handle, u8 runlist_id,
 		return -EINVAL;
 	}
 
-	size = sizeof(*runlist) * num_entries;
+	size = sizeof(u16) * domain->mem->count;
 	if (oob_size < size) {
 		err = -ENOMEM;
 		goto done;
@@ -60,10 +61,10 @@ static int vgpu_submit_runlist(struct gk20a *g, u64 handle, u8 runlist_id,
 	msg.cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST;
 	msg.handle = handle;
 	p = &msg.params.runlist;
-	p->runlist_id = runlist_id;
-	p->num_entries = num_entries;
+	p->runlist_id = nvgpu_safe_cast_u32_to_u8(runlist->id);
+	p->num_entries = domain->mem->count;
 
-	nvgpu_memcpy((u8 *)oob, (u8 *)runlist, size);
+	nvgpu_memcpy((u8 *)oob, (u8 *)domain->mem->mem.cpu_va, size);
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 
 	err = (err || msg.ret) ? -1 : 0;
@@ -73,15 +74,11 @@ done:
 	return err;
 }
 
-static bool vgpu_runlist_modify_active_locked(struct gk20a *g, u32 runlist_id,
+static bool vgpu_runlist_modify_active_locked(struct gk20a *g,
+					    struct nvgpu_runlist *runlist,
 					    struct nvgpu_runlist_domain *domain,
 					    struct nvgpu_channel *ch, bool add)
 {
-	struct nvgpu_fifo *f = &g->fifo;
-	struct nvgpu_runlist *runlist;
-
-	runlist = f->runlists[runlist_id];
-
 	if (add) {
 		if (nvgpu_test_and_set_bit(ch->chid,
 				domain->active_channels)) {
@@ -99,14 +96,12 @@ static bool vgpu_runlist_modify_active_locked(struct gk20a *g, u32 runlist_id,
 	return true;
 }
 
-static void vgpu_runlist_reconstruct_locked(struct gk20a *g, u32 runlist_id,
+static void vgpu_runlist_reconstruct_locked(struct gk20a *g,
+				     struct nvgpu_runlist *runlist,
 				     struct nvgpu_runlist_domain *domain,
 				     bool add_entries)
 {
 	struct nvgpu_fifo *f = &g->fifo;
-	struct nvgpu_runlist *runlist;
-
-	runlist = f->runlists[runlist_id];
 
 	if (add_entries) {
 		u16 *runlist_entry;
@@ -129,19 +124,18 @@ static void vgpu_runlist_reconstruct_locked(struct gk20a *g, u32 runlist_id,
 	}
 }
 
-static int vgpu_runlist_update_locked(struct gk20a *g, u32 runlist_id,
+static int vgpu_runlist_update_locked(struct gk20a *g,
+					struct nvgpu_runlist *runlist,
+					struct nvgpu_runlist_domain *domain,
 					struct nvgpu_channel *ch, bool add,
 					bool wait_for_finish)
 {
-	struct nvgpu_fifo *f = &g->fifo;
-	struct nvgpu_runlist *runlist = f->runlists[runlist_id];
-	struct nvgpu_runlist_domain *domain = runlist->domain;
 	bool add_entries;
 
 	nvgpu_log_fn(g, " ");
 
 	if (ch != NULL) {
-		bool update = vgpu_runlist_modify_active_locked(g, runlist_id,
+		bool update = vgpu_runlist_modify_active_locked(g, runlist,
 				domain, ch, add);
 		if (!update) {
 			/* no change in runlist contents */
@@ -154,11 +148,9 @@ static int vgpu_runlist_update_locked(struct gk20a *g, u32 runlist_id,
 		add_entries = add;
 	}
 
-	vgpu_runlist_reconstruct_locked(g, runlist_id, domain, add_entries);
+	vgpu_runlist_reconstruct_locked(g, runlist, domain, add_entries);
 
-	return vgpu_submit_runlist(g, vgpu_get_handle(g), runlist_id,
-				domain->mem->mem.cpu_va,
-				domain->mem->count);
+	return vgpu_submit_runlist(g, vgpu_get_handle(g), runlist, domain);
 }
 
 /* add/remove a channel from runlist
@@ -166,6 +158,7 @@ static int vgpu_runlist_update_locked(struct gk20a *g, u32 runlist_id,
    (ch == NULL && !add) means remove all active channels from runlist.
    (ch == NULL &&  add) means restore all active channels on runlist. */
 static int vgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl,
+				struct nvgpu_runlist_domain *domain,
 				struct nvgpu_channel *ch,
 				bool add, bool wait_for_finish)
 {
@@ -175,7 +168,7 @@ static int vgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl,
 
 	nvgpu_mutex_acquire(&rl->runlist_lock);
 
-	ret = vgpu_runlist_update_locked(g, rl->id, ch, add,
+	ret = vgpu_runlist_update_locked(g, rl, domain, ch, add,
 					wait_for_finish);
 
 	nvgpu_mutex_release(&rl->runlist_lock);
@@ -186,15 +179,23 @@ int vgpu_runlist_update(struct gk20a *g, struct nvgpu_runlist *rl,
 			struct nvgpu_channel *ch,
 			bool add, bool wait_for_finish)
 {
+	struct nvgpu_tsg *tsg;
+
 	nvgpu_assert(ch != NULL);
 
-	return vgpu_runlist_do_update(g, rl, ch, add, wait_for_finish);
+	tsg = nvgpu_tsg_from_ch(ch);
+	if (tsg == NULL) {
+		return -EINVAL;
+	}
+
+	return vgpu_runlist_do_update(g, rl, tsg->rl_domain, ch, add, wait_for_finish);
 }
 
 int vgpu_runlist_reload(struct gk20a *g, struct nvgpu_runlist *rl,
+			struct nvgpu_runlist_domain *domain,
 			bool add, bool wait_for_finish)
 {
-	return vgpu_runlist_do_update(g, rl, NULL, add, wait_for_finish);
+	return vgpu_runlist_do_update(g, rl, domain, NULL, add, wait_for_finish);
 }
 
 u32 vgpu_runlist_length_max(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h
index f7efe330e..301a7e7d0 100644
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h
@@ -1,7 +1,7 @@
 /*
  * Virtualized GPU Runlist
  *
- * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -25,11 +25,13 @@
 struct gk20a;
 struct nvgpu_channel;
 struct nvgpu_runlist;
+struct nvgpu_runlist_domain;
 
 int vgpu_runlist_update(struct gk20a *g, struct nvgpu_runlist *rl,
 			struct nvgpu_channel *ch,
 			bool add, bool wait_for_finish);
 int vgpu_runlist_reload(struct gk20a *g, struct nvgpu_runlist *rl,
+				struct nvgpu_runlist_domain *domain,
 				bool add, bool wait_for_finish);
 u32 vgpu_runlist_length_max(struct gk20a *g);
 u32 vgpu_runlist_entry_size(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c
index 65f737bfa..68d4fde1c 100644
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c
@@ -405,7 +405,7 @@ static int gr_gp10b_disable_channel_or_tsg(struct gk20a *g, struct nvgpu_channel
 		return ret;
 	}
 
-	ret = g->ops.runlist.reload(g, fault_ch->runlist, true, false);
+	ret = g->ops.runlist.reload(g, fault_ch->runlist, tsg->rl_domain, true, false);
 	if (ret != 0) {
 		nvgpu_err(g, "CILP: failed to restart runlist 0!");
 		return ret;
diff --git a/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c
index b1023fc6d..d420a7212 100644
--- a/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c
@@ -118,7 +118,7 @@ static void gv11b_fifo_locked_abort_runlist_active_tsgs(struct gk20a *g,
 			 * the update to finish on hw.
 			 */
 			err = nvgpu_runlist_update_locked(g,
-				runlist, NULL, false, false);
+				runlist, runlist->domain, NULL, false, false);
 			if (err != 0) {
 				nvgpu_err(g, "runlist id %d is not cleaned up",
 					runlist->id);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h b/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h
index b22e9eb32..67b284987 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h
@@ -33,6 +33,7 @@
 struct gk20a;
 struct nvgpu_channel;
 struct nvgpu_runlist;
+struct nvgpu_runlist_domain;
 
 /**
  * Runlist HAL operations.
@@ -67,6 +68,7 @@ struct gops_runlist {
 	 *         buffer to accommodate all active channels/TSGs.
 	 */
 	int (*reload)(struct gk20a *g, struct nvgpu_runlist *rl,
+			struct nvgpu_runlist_domain *domain,
 			bool add, bool wait_for_finish);
 
 	/**
diff --git a/drivers/gpu/nvgpu/include/nvgpu/runlist.h b/drivers/gpu/nvgpu/include/nvgpu/runlist.h
index 310964199..6115ebabe 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/runlist.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/runlist.h
@@ -236,6 +236,7 @@ u32 nvgpu_runlist_construct_locked(struct nvgpu_fifo *f,
  *          describe all active channels and TSGs.
  */
 int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl,
+		struct nvgpu_runlist_domain *domain,
 		struct nvgpu_channel *ch, bool add, bool wait_for_finish);
 
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
@@ -296,6 +297,7 @@ int nvgpu_runlist_update(struct gk20a *g, struct nvgpu_runlist *rl,
  *         to accommodate all active channels/TSGs.
  */
 int nvgpu_runlist_reload(struct gk20a *g, struct nvgpu_runlist *rl,
+		struct nvgpu_runlist_domain *domain,
 		bool add, bool wait_for_finish);
 
 /**
diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
index 93960c2cc..478edbdaa 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -176,6 +176,11 @@ struct nvgpu_tsg {
 	 */
 	struct nvgpu_runlist *runlist;
 
+	/**
+	 * Scheduling domain this TSG is bound to. Bound with an ioctl, initially the default domain.
+	 */
+	struct nvgpu_runlist_domain *rl_domain;
+
 	/** tgid (OS specific) of the process that openend the TSG. */
 
 	/**