From e0a90be04b2026097201d7c8428fabf408b7c71a Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Fri, 18 Nov 2022 13:39:12 +0200
Subject: [PATCH] gpu: host1x: Support for running as guest

Add support for running as a guest system under a hypervisor, using
Host1x HW's virtualization capabilities.

In effect this involves not touching apertures other than the 'vm'
aperture, and channels and syncpoints other than those that are
assigned to the VM.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: Ideec5b0b9a692aa3ee6b4a0240c5755c983cb7bd
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2811837
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/host1x/channel.c    |  6 ++--
 drivers/gpu/host1x/dev.c        | 52 +++++++++++++++++++++++++++++----
 drivers/gpu/host1x/dev.h        |  4 +++
 drivers/gpu/host1x/hw/cdma_hw.c |  7 +++++
 drivers/gpu/host1x/syncpt.c     | 27 ++++++++++++-----
 5 files changed, 81 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 2d0051d6..169e0d86 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -101,11 +101,11 @@ EXPORT_SYMBOL(host1x_channel_put);
 static struct host1x_channel *acquire_unused_channel(struct host1x *host)
 {
 	struct host1x_channel_list *chlist = &host->channel_list;
-	unsigned int max_channels = host->info->nb_channels;
 	unsigned int index;
 
-	index = find_first_zero_bit(chlist->allocated_channels, max_channels);
-	if (index >= max_channels) {
+	index = find_next_zero_bit(chlist->allocated_channels,
+		host->num_channels, host->channel_base);
+	if (index >= host->num_channels) {
 		dev_err(host->dev, "failed to find free channel\n");
 		return NULL;
 	}
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index dba0763f..4b90f58f 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -469,7 +469,7 @@ static void host1x_setup_virtualization_tables(struct host1x *host)
 	const struct host1x_info *info = host->info;
 	unsigned int i;
 
-	if (!info->has_hypervisor)
+	if (!host->hv_regs)
 		return;
 
 	for (i = 0; i < info->num_sid_entries; i++) {
@@ -673,6 +673,39 @@ static int host1x_get_resets(struct host1x *host)
 	return 0;
 }
 
+static int host1x_get_assigned_resources(struct host1x *host)
+{
+	struct device_node *np = host->dev->of_node;
+	u32 vals[2];
+	int err;
+
+	err = of_property_read_u32_array(np, "nvidia,channels", vals, 2);
+	if (err == 0) {
+		host->channel_base = vals[0];
+		host->num_channels = vals[1];
+	} else if (err == -EINVAL) {
+		host->channel_base = 0;
+		host->num_channels = host->info->nb_channels;
+	} else {
+		dev_err(host->dev, "invalid nvidia,channels property: %d\n", err);
+		return err;
+	}
+
+	err = of_property_read_u32_array(np, "nvidia,syncpoints", vals, 2);
+	if (err == 0) {
+		host->syncpt_base = vals[0];
+		host->syncpt_end = vals[0] + vals[1];
+	} else if (err == -EINVAL) {
+		host->syncpt_base = 0;
+		host->syncpt_end = host->info->nb_pts;
+	} else {
+		dev_err(host->dev, "invalid nvidia,syncpoints property: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
 static int host1x_probe(struct platform_device *pdev)
 {
 	struct host1x *host;
@@ -686,15 +719,20 @@ static int host1x_probe(struct platform_device *pdev)
 	host->info = of_device_get_match_data(&pdev->dev);
 
 	if (host->info->has_hypervisor) {
+		struct resource *res;
+
 		host->regs = devm_platform_ioremap_resource_byname(pdev, "vm");
 		if (IS_ERR(host->regs))
 			return PTR_ERR(host->regs);
 
-		host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor");
-		if (IS_ERR(host->hv_regs))
-			return PTR_ERR(host->hv_regs);
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "hypervisor");
+		if (res) {
+			host->hv_regs = devm_ioremap_resource(&pdev->dev, res);
+			if (IS_ERR(host->hv_regs))
+				return PTR_ERR(host->hv_regs);
+		}
 
-		if (host->info->has_common) {
+		if (res && host->info->has_common) {
 			host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common");
 			if (IS_ERR(host->common_regs))
 				return PTR_ERR(host->common_regs);
@@ -726,6 +764,10 @@ static int host1x_probe(struct platform_device *pdev)
 			return err;
 	}
 
+	err = host1x_get_assigned_resources(host);
+	if (err)
+		return err;
+
 	host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(host->clk)) {
 		err = PTR_ERR(host->clk);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 75de50fe..87e8c273 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -132,6 +132,10 @@ struct host1x {
 	struct reset_control_bulk_data resets[2];
 	unsigned int nresets;
 
+	/* Resources accessible by this VM */
+	unsigned int syncpt_base, syncpt_end;
+	unsigned int channel_base, num_channels;
+
 	struct iommu_group *group;
 	struct iommu_domain *domain;
 	struct iova_domain iova;
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index 96f341ad..b681a03d 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -253,6 +253,13 @@ static void timeout_release_mlock(struct host1x_cdma *cdma)
 	struct host1x *host1x = cdma_to_host1x(cdma);
 	u32 offset;
 
+	/*
+	 * On virtualized systems, we rely on the hypervisor to release
+	 * the MLOCK.
+	 */
+	if (!host1x->common_regs)
+		return;
+
 	switch (ch->client->class) {
 	case HOST1X_CLASS_VIC:
 		offset = HOST1X_COMMON_VIC_MLOCK;
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 098ff002..26b59b31 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -69,10 +69,10 @@ struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
 
 	mutex_lock(&host->syncpt_mutex);
 
-	for (i = 0; i < host->info->nb_pts && kref_read(&sp->ref); i++, sp++)
+	for (i = host->syncpt_base; i < host->syncpt_end && kref_read(&sp->ref); i++, sp++)
 		;
 
-	if (i >= host->info->nb_pts)
+	if (i >= host->syncpt_end)
 		goto unlock;
 
 	if (flags & HOST1X_SYNCPT_HAS_BASE) {
@@ -139,7 +139,7 @@ void host1x_syncpt_restore(struct host1x *host)
 	struct host1x_syncpt *sp_base = host->syncpt;
 	unsigned int i;
 
-	for (i = 0; i < host1x_syncpt_nb_pts(host); i++) {
+	for (i = host->syncpt_base; i < host->syncpt_end; i++) {
 		/*
 		 * Unassign syncpt from channels for purposes of Tegra186
 		 * syncpoint protection. This prevents any channel from
@@ -310,6 +310,13 @@ int host1x_syncpt_init(struct host1x *host)
 	for (i = 0; i < host->info->nb_pts; i++) {
 		syncpt[i].id = i;
 		syncpt[i].host = host;
+
+		/*
+		 * Make syncpoints client managed by default, so that
+		 * we don't try to compare to max_val for e.g. syncpoints
+		 * owner by other VMs.
+		 */
+		syncpt[i].client_managed = true;
 	}
 
 	for (i = 0; i < host->info->nb_bases; i++)
@@ -319,10 +326,16 @@ int host1x_syncpt_init(struct host1x *host)
 	host->syncpt = syncpt;
 	host->bases = bases;
 
-	/* Allocate sync point to use for clearing waits for expired fences */
-	host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop");
-	if (!host->nop_sp)
-		return -ENOMEM;
+	/*
+	 * Allocate sync point to use for clearing waits for expired fences.
+	 * On virtualized systems where syncpt_base is nonzero, we don't need
+	 * this for anything.
+	 */
+	if (host->syncpt_base == 0) {
+		host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop");
+		if (!host->nop_sp)
+			return -ENOMEM;
+	}
 
 	if (host->info->reserve_vblank_syncpts) {
 		kref_init(&host->syncpt[26].ref);