From 7c1da7c4a99e7c1a0b11d6103a12817d14f08e68 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Mon, 3 Apr 2023 12:34:31 +0000
Subject: [PATCH 01/73] host1x: Remove Makefile to prepare for host1x
 integration

host1x driver is available in the kernel/nvidia and to
integrate with git history, it is required to remove the
dummy Makefile.

Remove the dummy Makefile.

Bug 4038415

Change-Id: I335c298a235d1e7843bbee7aeb643a1120dfc714
Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
---
 drivers/gpu/host1x/Makefile | 10 ----------
 1 file changed, 10 deletions(-)
 delete mode 100644 drivers/gpu/host1x/Makefile

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
deleted file mode 100644
index 890ed2b0..00000000
--- a/drivers/gpu/host1x/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-
-# NOTE: Do not change or add anything in this makefile.
-# The source code and makefile rules are copied from the
-# kernel/nvidia/drivers/gpu/host1x. This file is
-# just place-holder for empty makefile to avoid any build
-# issue when copy is not done from command line and building
-# the tree independent of source copy.
-

From 685eb2c8d224179e1b73cafa4a137fc2b29111d8 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 29 Feb 2016 16:04:16 +0530
Subject: [PATCH 02/73] gpu: host1x: add T186 support

Add host1x05 version and fill in the h/w details
in host1x05_info

Add all the hardware accessors for T186
(channel, sync, uclass accessors)

Add hardware support files for host1x channel,
syncpoints, cdma, pushbuffer, interrupt, and debug support

Keep gather filter disabled

Things working with this :
- cdma operations
- basic channel job submit path
- syncpoint support
- syncpoint interrupt mechanism
- debug dump

With this support, below tests pass for VIC client
$nvrm_channel channel_Basic
$nvrm_channel --module=vic

Bug 1704301

Change-Id: I7d97560cb1e3a57733fa0853936b0783c71b7060
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1021434
GVS: Gerrit_Virtual_Submit
Reviewed-by: Arto Merilainen <amerilainen@nvidia.com>
Reviewed-by: Shridhar Rasal <srasal@nvidia.com>
---
 drivers/gpu/host1x/Makefile                 |   8 +
 drivers/gpu/host1x/dev_t186.c               |  40 +++
 drivers/gpu/host1x/dev_t186.h               |  34 ++
 drivers/gpu/host1x/hw/cdma_hw_t186.c        | 340 ++++++++++++++++++++
 drivers/gpu/host1x/hw/channel_hw_t186.c     | 297 +++++++++++++++++
 drivers/gpu/host1x/hw/debug_hw_t186.c       | 288 +++++++++++++++++
 drivers/gpu/host1x/hw/host1x05.c            |  40 +++
 drivers/gpu/host1x/hw/host1x05.h            |  24 ++
 drivers/gpu/host1x/hw/host1x05_hardware.h   | 128 ++++++++
 drivers/gpu/host1x/hw/hw_host1x05_channel.h | 169 ++++++++++
 drivers/gpu/host1x/hw/hw_host1x05_sync.h    | 321 ++++++++++++++++++
 drivers/gpu/host1x/hw/hw_host1x05_uclass.h  | 157 +++++++++
 drivers/gpu/host1x/hw/intr_hw_t186.c        | 247 ++++++++++++++
 drivers/gpu/host1x/hw/syncpt_hw_t186.c      | 126 ++++++++
 14 files changed, 2219 insertions(+)
 create mode 100644 drivers/gpu/host1x/Makefile
 create mode 100644 drivers/gpu/host1x/dev_t186.c
 create mode 100644 drivers/gpu/host1x/dev_t186.h
 create mode 100644 drivers/gpu/host1x/hw/cdma_hw_t186.c
 create mode 100644 drivers/gpu/host1x/hw/channel_hw_t186.c
 create mode 100644 drivers/gpu/host1x/hw/debug_hw_t186.c
 create mode 100644 drivers/gpu/host1x/hw/host1x05.c
 create mode 100644 drivers/gpu/host1x/hw/host1x05.h
 create mode 100644 drivers/gpu/host1x/hw/host1x05_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x05_channel.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x05_sync.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x05_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/intr_hw_t186.c
 create mode 100644 drivers/gpu/host1x/hw/syncpt_hw_t186.c

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
new file mode 100644
index 00000000..9c8c6467
--- /dev/null
+++ b/drivers/gpu/host1x/Makefile
@@ -0,0 +1,8 @@
+ccflags-y += -Idrivers/gpu/host1x
+ccflags-y += -I../kernel-t18x/drivers/gpu/host1x
+
+host1x-t186-y = \
+	dev_t186.o \
+	hw/host1x05.o
+
+obj-$(CONFIG_TEGRA_HOST1X) += host1x-t186.o
diff --git a/drivers/gpu/host1x/dev_t186.c b/drivers/gpu/host1x/dev_t186.c
new file mode 100644
index 00000000..e4d14876
--- /dev/null
+++ b/drivers/gpu/host1x/dev_t186.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "dev.h"
+#include "dev_t186.h"
+
+#include "hw/host1x05.h"
+
+struct host1x_info host1x05_info = {
+	.nb_channels = 63,
+	.nb_pts = 576,
+	.nb_mlocks = 24,
+	.nb_bases = 16,
+	.init = host1x05_init,
+	.sync_offset = 0x0,
+	.gather_filter_enabled  = false,
+};
+
+void host1x_writel(struct host1x *host1x, u32 v, u32 r)
+{
+	writel(v, host1x->regs + r);
+}
+
+u32 host1x_readl(struct host1x *host1x, u32 r)
+{
+	return readl(host1x->regs + r);
+}
diff --git a/drivers/gpu/host1x/dev_t186.h b/drivers/gpu/host1x/dev_t186.h
new file mode 100644
index 00000000..be9726e8
--- /dev/null
+++ b/drivers/gpu/host1x/dev_t186.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HOST1X_DEV_T186_H
+#define HOST1X_DEV_T186_H
+
+#include "dev.h"
+
+extern struct host1x_info host1x05_info;
+
+void host1x_writel(struct host1x *host1x, u32 v, u32 r);
+u32 host1x_readl(struct host1x *host1x, u32 r);
+
+static inline void host1x_hw_sync_get_mutex_owner(struct host1x *host,
+					struct host1x_syncpt *sp,
+					unsigned int mutex_id, bool *cpu, bool *ch,
+					unsigned int *chid)
+{
+	host->syncpt_op->get_mutex_owner(sp, mutex_id, cpu, ch, chid);
+}
+#endif
diff --git a/drivers/gpu/host1x/hw/cdma_hw_t186.c b/drivers/gpu/host1x/hw/cdma_hw_t186.c
new file mode 100644
index 00000000..e903f3e8
--- /dev/null
+++ b/drivers/gpu/host1x/hw/cdma_hw_t186.c
@@ -0,0 +1,340 @@
+/*
+ * Tegra host1x Command DMA
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+
+#include "cdma.h"
+#include "channel.h"
+#include "dev.h"
+#include "debug.h"
+
+/*
+ * Put the restart at the end of pushbuffer memory
+ */
+static void push_buffer_init(struct push_buffer *pb)
+{
+	*(u32 *)(pb->mapped + pb->size_bytes) = host1x_opcode_restart(0);
+}
+
+/*
+ * Increment timedout buffer's syncpt via CPU.
+ */
+static void cdma_timeout_handle(struct host1x_cdma *cdma, u32 getptr,
+				u32 nr_slots)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct push_buffer *pb = &cdma->push_buffer;
+
+	/* NOP all the PB slots */
+	while (nr_slots--) {
+		u32 *p = (u32 *)(pb->mapped + getptr);
+		*(p++) = HOST1X_OPCODE_NOP;
+		*(p++) = HOST1X_OPCODE_NOP;
+		dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__,
+			&pb->phys, getptr);
+		getptr = (getptr + 8) & (pb->size_bytes - 1);
+	}
+	wmb();
+}
+
+/*
+ * Start channel DMA
+ */
+static void cdma_start(struct host1x_cdma *cdma)
+{
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+
+	if (cdma->running)
+		return;
+
+	cdma->last_pos = cdma->push_buffer.pos;
+
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	/* set base, put and end pointer */
+	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
+	host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
+	host1x_ch_writel(ch, cdma->push_buffer.phys +
+			 cdma->push_buffer.size_bytes + 4,
+			 HOST1X_CHANNEL_DMAEND);
+
+	/* reset GET */
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP |
+			 HOST1X_CHANNEL_DMACTRL_DMAGETRST |
+			 HOST1X_CHANNEL_DMACTRL_DMAINITGET,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	host1x_channel_enable_gather_filter(ch);
+
+	/* start the command DMA */
+	host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL);
+
+	cdma->running = true;
+}
+
+/*
+ * Similar to cdma_start(), but rather than starting from an idle
+ * state (where DMA GET is set to DMA PUT), on a timeout we restore
+ * DMA GET from an explicit value (so DMA may again be pending).
+ */
+static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+
+	if (cdma->running)
+		return;
+
+	cdma->last_pos = cdma->push_buffer.pos;
+
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	/* set base, end pointer (all of memory) */
+	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
+	host1x_ch_writel(ch, cdma->push_buffer.phys +
+			 cdma->push_buffer.size_bytes,
+			 HOST1X_CHANNEL_DMAEND);
+
+	/* set GET, by loading the value in PUT (then reset GET) */
+	host1x_ch_writel(ch, getptr, HOST1X_CHANNEL_DMAPUT);
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP |
+			 HOST1X_CHANNEL_DMACTRL_DMAGETRST |
+			 HOST1X_CHANNEL_DMACTRL_DMAINITGET,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	dev_dbg(host1x->dev,
+		"%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n", __func__,
+		host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET),
+		host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT),
+		cdma->last_pos);
+
+	/* deassert GET reset and set PUT */
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+			 HOST1X_CHANNEL_DMACTRL);
+	host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
+
+	host1x_channel_enable_gather_filter(ch);
+
+	/* start the command DMA */
+	host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL);
+
+	cdma->running = true;
+}
+
+/*
+ * Kick channel DMA into action by writing its PUT offset (if it has changed)
+ */
+static void cdma_flush(struct host1x_cdma *cdma)
+{
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+
+	if (cdma->push_buffer.pos != cdma->last_pos) {
+		host1x_ch_writel(ch, cdma->push_buffer.pos,
+				 HOST1X_CHANNEL_DMAPUT);
+		cdma->last_pos = cdma->push_buffer.pos;
+	}
+}
+
+static void cdma_stop(struct host1x_cdma *cdma)
+{
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+
+	mutex_lock(&cdma->lock);
+	if (cdma->running) {
+		host1x_cdma_wait_locked(cdma, CDMA_EVENT_SYNC_QUEUE_EMPTY);
+		host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+				 HOST1X_CHANNEL_DMACTRL);
+		cdma->running = false;
+	}
+	mutex_unlock(&cdma->lock);
+}
+
+/*
+ * Stops both channel's command processor and CDMA immediately.
+ * Also, tears down the channel and resets corresponding module.
+ */
+static void cdma_freeze(struct host1x_cdma *cdma)
+{
+	struct host1x *host = cdma_to_host1x(cdma);
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+	u32 cmdproc_stop;
+
+	if (cdma->torndown && !cdma->running) {
+		dev_warn(host->dev, "Already torn down\n");
+		return;
+	}
+
+	dev_dbg(host->dev, "freezing channel (id %d)\n", ch->id);
+
+	cmdproc_stop = host1x_ch_readl(ch, HOST1X_SYNC_CMDPROC_STOP);
+	cmdproc_stop |= BIT(0);
+	host1x_ch_writel(ch, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+
+	dev_dbg(host->dev, "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n",
+		__func__, host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET),
+		host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT),
+		cdma->last_pos);
+
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	host1x_ch_writel(ch, BIT(0), HOST1X_SYNC_CH_TEARDOWN);
+
+	cdma->running = false;
+	cdma->torndown = true;
+}
+
+static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+	u32 cmdproc_stop;
+
+	dev_dbg(host1x->dev,
+		"resuming channel (id %d, DMAGET restart = 0x%x)\n",
+		ch->id, getptr);
+
+	cmdproc_stop = host1x_ch_readl(ch, HOST1X_SYNC_CMDPROC_STOP);
+	cmdproc_stop &= ~(BIT(0));
+	host1x_ch_writel(ch, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+
+	cdma->torndown = false;
+	cdma_timeout_restart(cdma, getptr);
+}
+
+/*
+ * If this timeout fires, it indicates the current sync_queue entry has
+ * exceeded its TTL and the userctx should be timed out and remaining
+ * submits already issued cleaned up (future submits return an error).
+ */
+static void cdma_timeout_handler(struct work_struct *work)
+{
+	struct host1x_cdma *cdma;
+	struct host1x *host1x;
+	struct host1x_channel *ch;
+	bool has_timedout = 0;
+
+	u32 prev_cmdproc, cmdproc_stop;
+
+	unsigned int i;
+
+	cdma = container_of(to_delayed_work(work), struct host1x_cdma,
+			    timeout.wq);
+	host1x = cdma_to_host1x(cdma);
+	ch = cdma_to_channel(cdma);
+
+	host1x_debug_dump(cdma_to_host1x(cdma));
+
+	mutex_lock(&cdma->lock);
+
+	if (!cdma->timeout.client) {
+		dev_dbg(host1x->dev,
+			"cdma_timeout: expired, but has no clientid\n");
+		mutex_unlock(&cdma->lock);
+		return;
+	}
+
+	/* stop processing to get a clean snapshot */
+	prev_cmdproc = host1x_ch_readl(ch, HOST1X_SYNC_CMDPROC_STOP);
+	cmdproc_stop = prev_cmdproc | BIT(0);
+	host1x_ch_writel(ch, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+
+	dev_dbg(host1x->dev, "cdma_timeout: cmdproc was 0x%x is 0x%x\n",
+		prev_cmdproc, cmdproc_stop);
+
+	for (i = 0; i < cdma->timeout.num_syncpts; ++i) {
+		u32 id = cdma->timeout.syncpts[i].id;
+		u32 end = cdma->timeout.syncpts[i].end;
+		struct host1x_syncpt *syncpt = host1x_syncpt_get(host1x, id);
+
+		host1x_syncpt_load(syncpt);
+
+		has_timedout = !host1x_syncpt_is_expired(syncpt, end);
+		if (has_timedout)
+			break;
+	}
+
+	/* has buffer actually completed? */
+	if (!has_timedout) {
+		dev_dbg(host1x->dev,
+			"cdma_timeout: expired, but buffer had completed\n");
+		/* restore */
+		cmdproc_stop = prev_cmdproc & ~(BIT(0));
+		host1x_ch_writel(ch, cmdproc_stop,
+				   HOST1X_SYNC_CMDPROC_STOP);
+		mutex_unlock(&cdma->lock);
+		return;
+	}
+
+	for (i = 0; i < cdma->timeout.num_syncpts; ++i) {
+		u32 id = cdma->timeout.syncpts[i].id;
+		struct host1x_syncpt *syncpt = host1x_syncpt_get(host1x, id);
+		u32 syncpt_val = host1x_syncpt_read_min(syncpt);
+
+		dev_warn(host1x->dev, "%s: timeout: %d (%s), HW thresh %d, done %d\n",
+			__func__, syncpt->id, syncpt->name,
+			syncpt_val, syncpt_val);
+	}
+
+	/* stop HW, resetting channel/module */
+	host1x_hw_cdma_freeze(host1x, cdma);
+
+	host1x_cdma_update_sync_queue(cdma, ch->dev);
+	mutex_unlock(&cdma->lock);
+}
+
+/*
+ * Init timeout resources
+ */
+static int cdma_timeout_init(struct host1x_cdma *cdma)
+{
+	INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler);
+	cdma->timeout.initialized = true;
+
+	return 0;
+}
+
+/*
+ * Clean up timeout resources
+ */
+static void cdma_timeout_destroy(struct host1x_cdma *cdma)
+{
+	if (cdma->timeout.initialized)
+		cancel_delayed_work(&cdma->timeout.wq);
+	cdma->timeout.initialized = false;
+}
+
+static const struct host1x_cdma_ops host1x_cdma_t186_ops = {
+	.start = cdma_start,
+	.stop = cdma_stop,
+	.flush = cdma_flush,
+
+	.timeout_init = cdma_timeout_init,
+	.timeout_destroy = cdma_timeout_destroy,
+	.freeze = cdma_freeze,
+	.resume = cdma_resume,
+	.timeout_handle = cdma_timeout_handle,
+};
+
+static const struct host1x_pushbuffer_ops host1x_pushbuffer_t186_ops = {
+	.init = push_buffer_init,
+};
diff --git a/drivers/gpu/host1x/hw/channel_hw_t186.c b/drivers/gpu/host1x/hw/channel_hw_t186.c
new file mode 100644
index 00000000..2b90ba63
--- /dev/null
+++ b/drivers/gpu/host1x/hw/channel_hw_t186.c
@@ -0,0 +1,297 @@
+/*
+ * Tegra host1x Channel
+ *
+ * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/host1x.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+
+#include <trace/events/host1x.h>
+
+#include "dev_t186.h"
+#include "channel.h"
+#include "intr.h"
+#include "job.h"
+
+#define TRACE_MAX_LENGTH 128U
+
+static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
+			       u32 offset, u32 words)
+{
+	struct device *dev = cdma_to_channel(cdma)->dev;
+	void *mem = NULL;
+
+	if (host1x_debug_trace_cmdbuf)
+		mem = host1x_bo_mmap(bo);
+
+	if (mem) {
+		u32 i;
+		/*
+		 * Write in batches of 128 as there seems to be a limit
+		 * of how much you can output to ftrace at once.
+		 */
+		for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
+			u32 num_words = min(words - i, TRACE_MAX_LENGTH);
+			u32 next_offset = offset + i * sizeof(u32);
+
+			trace_host1x_cdma_push_gather(dev_name(dev), bo,
+						      num_words,
+						      next_offset,
+						      mem);
+		}
+
+		host1x_bo_munmap(bo, mem);
+	}
+}
+
+static void channel_push_wait(struct host1x_channel *channel,
+			     u32 id, u32 thresh)
+{
+	host1x_cdma_push(&channel->cdma,
+			 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+				HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, 1),
+			 thresh);
+	host1x_cdma_push(&channel->cdma,
+			 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+				HOST1X_UCLASS_WAIT_SYNCPT_32, 1),
+			 id);
+}
+
+static inline void serialize(struct host1x_job *job)
+{
+	struct host1x_channel *ch = job->channel;
+	struct host1x *host = dev_get_drvdata(ch->dev->parent);
+	unsigned int i;
+
+	for (i = 0; i < job->num_syncpts; ++i) {
+		u32 syncpt_id = job->syncpts[i].id;
+		struct host1x_syncpt *syncpt =
+			host1x_syncpt_get(host, syncpt_id);
+
+		/*
+		 * Force serialization by inserting a host wait for the
+		 * previous job to finish before this one can commence.
+		 */
+		channel_push_wait(ch, syncpt_id, host1x_syncpt_read_max(syncpt));
+	}
+}
+
+static void add_sync_waits(struct host1x_job *job)
+{
+	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
+	unsigned int i;
+
+	for (i = 0; i < job->num_gathers; i++) {
+		struct host1x_job_gather *g = &job->gathers[i];
+
+		if (g->pre_fence)
+			host1x_sync_fence_wait(g->pre_fence, host,
+					       job->channel);
+	}
+}
+
+static void submit_gathers(struct host1x_job *job)
+{
+	struct host1x_cdma *cdma = &job->channel->cdma;
+	unsigned int i;
+	u32 cur_class = 0;
+
+	cur_class = HOST1X_CLASS_HOST1X;
+	host1x_cdma_push(cdma,
+		host1x_opcode_acquire_mlock(cur_class),
+		host1x_opcode_setclass(cur_class, 0, 0));
+
+	add_sync_waits(job);
+
+	for (i = 0; i < job->num_gathers; i++) {
+		struct host1x_job_gather *g = &job->gathers[i];
+		u32 op1 = host1x_opcode_gather(g->words);
+		u32 op2 = g->base + g->offset;
+
+		/* add a setclass for modules that require it */
+		if (cur_class != g->class_id) {
+			if (cur_class)
+				host1x_cdma_push(cdma,
+					HOST1X_OPCODE_NOP,
+					host1x_opcode_release_mlock(cur_class));
+
+			host1x_cdma_push(cdma,
+				host1x_opcode_acquire_mlock(g->class_id),
+				host1x_opcode_setclass(g->class_id, 0, 0));
+			cur_class = g->class_id;
+		}
+
+		trace_write_gather(cdma, g->bo, g->offset, op1 & 0xffff);
+		host1x_cdma_push(cdma, op1, op2);
+	}
+
+	if (job->serialize)
+		serialize(job);
+
+	if (cur_class)
+		host1x_cdma_push(cdma,
+			HOST1X_OPCODE_NOP,
+			host1x_opcode_release_mlock(cur_class));
+}
+
+static inline void synchronize_syncpt_base(struct host1x_job *job)
+{
+	struct host1x_channel *ch = job->channel;
+	struct host1x *host = dev_get_drvdata(ch->dev->parent);
+	unsigned int i;
+
+	for (i = 0; i < job->num_syncpts; ++i) {
+		u32 syncpt_id = job->syncpts[i].id;
+		struct host1x_syncpt *syncpt =
+			host1x_syncpt_get(host, syncpt_id);
+		u32 base_id, value;
+
+		if (!syncpt->base)
+			continue;
+
+		value = host1x_syncpt_read_max(syncpt);
+		base_id = syncpt->base->id;
+
+		host1x_cdma_push(&job->channel->cdma,
+			 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+				HOST1X_UCLASS_LOAD_SYNCPT_BASE, 1),
+			 HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(base_id) |
+			 HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(value));
+	}
+}
+
+static int channel_submit(struct host1x_job *job)
+{
+	struct host1x_channel *ch = job->channel;
+	struct host1x_syncpt *syncpt;
+	u32 prev_max = 0;
+	int err;
+	struct host1x_waitlist *completed_waiter[job->num_syncpts];
+	struct host1x *host = dev_get_drvdata(ch->dev->parent);
+	unsigned int i;
+	int streamid = 0;
+
+	trace_host1x_channel_submit(dev_name(ch->dev),
+				    job->num_gathers, job->num_relocs,
+				    job->num_waitchk, job->syncpts[0].id,
+				    job->syncpts[0].incrs);
+
+	/* before error checks, return current max */
+	syncpt = host1x_syncpt_get(host, job->syncpts[0].id);
+	prev_max = host1x_syncpt_read_max(syncpt);
+
+	/* get submit lock */
+	err = mutex_lock_interruptible(&ch->submitlock);
+	if (err)
+		goto error;
+
+	for (i = 0; i < job->num_syncpts; i++) {
+		completed_waiter[i] = kzalloc(sizeof(*completed_waiter[i]),
+					      GFP_KERNEL);
+		if (!completed_waiter[i]) {
+			mutex_unlock(&ch->submitlock);
+			err = -ENOMEM;
+			goto error;
+		}
+	}
+
+	streamid = iommu_get_hwid(host->dev->archdata.iommu, host->dev, 0);
+	if (streamid >= 0)
+		host1x_ch_writel(ch, streamid, HOST1X_CHANNEL_SMMU_STREAMID);
+
+	/* begin a CDMA submit */
+	err = host1x_cdma_begin(&ch->cdma, job);
+	if (err) {
+		mutex_unlock(&ch->submitlock);
+		goto error;
+	}
+
+	/* Synchronize base register to allow using it for relative waiting */
+	synchronize_syncpt_base(job);
+
+	/* Increment syncpoint maximum values */
+	for (i = 0; i < job->num_syncpts; ++i) {
+		u32 id;
+		u32 incrs;
+
+		id = job->syncpts[i].id;
+		incrs = job->syncpts[i].incrs;
+		syncpt = host1x_syncpt_get(host, id);
+		job->syncpts[i].end = host1x_syncpt_incr_max(syncpt, incrs);
+	}
+
+	submit_gathers(job);
+
+	/* end CDMA submit & stash pinned hMems into sync queue */
+	host1x_cdma_end(&ch->cdma, job);
+
+	trace_host1x_channel_submitted(dev_name(ch->dev), prev_max,
+				       job->syncpts[0].end);
+
+	/* schedule submit complete interrupts */
+	for (i = 0; i < job->num_syncpts; ++i) {
+		u32 syncpt_id = job->syncpts[i].id;
+		u32 syncpt_end = job->syncpts[i].end;
+
+		err = host1x_intr_add_action(host, syncpt_id, syncpt_end,
+					     HOST1X_INTR_ACTION_SUBMIT_COMPLETE,
+					     ch, completed_waiter[i], NULL);
+		completed_waiter[i] = NULL;
+		WARN(err, "Failed to set submit complete interrupt");
+	}
+
+	mutex_unlock(&ch->submitlock);
+
+	return 0;
+
+error:
+	for (i = 0; i < job->num_syncpts; i++)
+		kfree(completed_waiter[i]);
+	return err;
+}
+
+static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
+			       unsigned int index)
+{
+	ch->id = index;
+	mutex_init(&ch->reflock);
+	mutex_init(&ch->submitlock);
+
+	ch->regs = dev->regs + (HOST1X_CHANNEL_CH_APERTURE_START +
+				index * HOST1X_CHANNEL_CH_APERTURE_SIZE);
+	return 0;
+}
+
+static void channel_enable_gather_filter(struct host1x_channel *ch)
+{
+	struct host1x *host = dev_get_drvdata(ch->dev->parent);
+	u32 val;
+
+	val = host1x_readl(host, HOST1X_CHANNEL_FILTER_GBUFFER
+				 + BIT_WORD(ch->id) * sizeof(u32));
+
+	host1x_writel(host, val | BIT_MASK(ch->id),
+				HOST1X_CHANNEL_FILTER_GBUFFER
+				+ BIT_WORD(ch->id) * sizeof(u32));
+}
+
+static const struct host1x_channel_ops host1x_channel_t186_ops = {
+	.init = host1x_channel_init,
+	.submit = channel_submit,
+	.push_wait = channel_push_wait,
+	.enable_gather_filter = channel_enable_gather_filter,
+};
diff --git a/drivers/gpu/host1x/hw/debug_hw_t186.c b/drivers/gpu/host1x/hw/debug_hw_t186.c
new file mode 100644
index 00000000..d6111668
--- /dev/null
+++ b/drivers/gpu/host1x/hw/debug_hw_t186.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Erik Gilling <konkers@android.com>
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "dev.h"
+#include "debug.h"
+#include "cdma.h"
+#include "channel.h"
+
+#define HOST1X_DEBUG_MAX_PAGE_OFFSET 102400
+
+enum {
+	HOST1X_OPCODE_SETCLASS	= 0x00,
+	HOST1X_OPCODE_INCR	= 0x01,
+	HOST1X_OPCODE_NONINCR	= 0x02,
+	HOST1X_OPCODE_MASK	= 0x03,
+	HOST1X_OPCODE_IMM	= 0x04,
+	HOST1X_OPCODE_RESTART	= 0x05,
+	HOST1X_OPCODE_GATHER	= 0x06,
+	HOST1X_OPCODE_EXTEND	= 0x0e,
+};
+
+enum {
+	HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK	= 0x00,
+	HOST1X_OPCODE_EXTEND_RELEASE_MLOCK	= 0x01,
+};
+
+static unsigned int show_channel_command(struct output *o, u32 val)
+{
+	unsigned mask;
+	unsigned subop;
+
+	switch (val >> 28) {
+	case HOST1X_OPCODE_SETCLASS:
+		mask = val & 0x3f;
+		if (mask) {
+			host1x_debug_output(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [",
+					    val >> 6 & 0x3ff,
+					    val >> 16 & 0xfff, mask);
+			return hweight8(mask);
+		} else {
+			host1x_debug_output(o, "SETCL(class=%03x)\n",
+					    val >> 6 & 0x3ff);
+			return 0;
+		}
+
+	case HOST1X_OPCODE_INCR:
+		host1x_debug_output(o, "INCR(offset=%03x, [",
+				    val >> 16 & 0xfff);
+		return val & 0xffff;
+
+	case HOST1X_OPCODE_NONINCR:
+		host1x_debug_output(o, "NONINCR(offset=%03x, [",
+				    val >> 16 & 0xfff);
+		return val & 0xffff;
+
+	case HOST1X_OPCODE_MASK:
+		mask = val & 0xffff;
+		host1x_debug_output(o, "MASK(offset=%03x, mask=%03x, [",
+				    val >> 16 & 0xfff, mask);
+		return hweight16(mask);
+
+	case HOST1X_OPCODE_IMM:
+		host1x_debug_output(o, "IMM(offset=%03x, data=%03x)\n",
+				    val >> 16 & 0xfff, val & 0xffff);
+		return 0;
+
+	case HOST1X_OPCODE_RESTART:
+		host1x_debug_output(o, "RESTART(offset=%08x)\n", val << 4);
+		return 0;
+
+	case HOST1X_OPCODE_GATHER:
+		host1x_debug_output(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[",
+				    val >> 16 & 0xfff, val >> 15 & 0x1,
+				    val >> 14 & 0x1, val & 0x3fff);
+		return 1;
+
+	case HOST1X_OPCODE_EXTEND:
+		subop = val >> 24 & 0xf;
+		if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
+			host1x_debug_output(o, "ACQUIRE_MLOCK(index=%d)\n",
+					    val & 0xff);
+		else if (subop == HOST1X_OPCODE_EXTEND_RELEASE_MLOCK)
+			host1x_debug_output(o, "RELEASE_MLOCK(index=%d)\n",
+					    val & 0xff);
+		else
+			host1x_debug_output(o, "EXTEND_UNKNOWN(%08x)\n", val);
+		return 0;
+
+	default:
+		return 0;
+	}
+}
+
+static void show_gather(struct output *o, phys_addr_t phys_addr,
+			unsigned int words, struct host1x_cdma *cdma,
+			phys_addr_t pin_addr, u32 *map_addr)
+{
+	/* Map dmaget cursor to corresponding mem handle */
+	u32 offset = phys_addr - pin_addr;
+	unsigned int data_count = 0, i;
+
+	/*
+	 * Sometimes we're given different hardware address to the same
+	 * page - in these cases the offset will get an invalid number and
+	 * we just have to bail out.
+	 */
+	if (offset > HOST1X_DEBUG_MAX_PAGE_OFFSET) {
+		host1x_debug_output(o, "[address mismatch]\n");
+		return;
+	}
+
+	for (i = 0; i < words; i++) {
+		u32 addr = phys_addr + i * 4;
+		u32 val = *(map_addr + offset / 4 + i);
+
+		if (!data_count) {
+			host1x_debug_output(o, "%08x: %08x:", addr, val);
+			data_count = show_channel_command(o, val);
+		} else {
+			host1x_debug_output(o, "%08x%s", val,
+					    data_count > 0 ? ", " : "])\n");
+			data_count--;
+		}
+	}
+}
+
+static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
+{
+	struct host1x_job *job;
+
+	list_for_each_entry(job, &cdma->sync_queue, list) {
+		int i;
+		host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n",
+				    job,
+				    (job->syncpts ? job->syncpts[0].id : 0),
+				    (job->syncpts ? job->syncpts[0].end : 0),
+				    job->first_get, job->timeout,
+				    job->num_slots, job->num_unpins);
+
+		for (i = 0; i < job->num_gathers; i++) {
+			struct host1x_job_gather *g = &job->gathers[i];
+			u32 *mapped;
+
+			if (job->gather_copy_mapped)
+				mapped = (u32 *)job->gather_copy_mapped;
+			else
+				mapped = host1x_bo_mmap(g->bo);
+
+			if (!mapped) {
+				host1x_debug_output(o, "[could not mmap]\n");
+				continue;
+			}
+
+			host1x_debug_output(o, "    GATHER at %pad+%#x, %d words\n",
+					    &g->base, g->offset, g->words);
+
+			show_gather(o, g->base + g->offset, g->words, cdma,
+				    g->base, mapped);
+
+			if (!job->gather_copy_mapped)
+				host1x_bo_munmap(g->bo, mapped);
+		}
+	}
+}
+
+static void host1x_debug_show_channel_cdma(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	struct host1x_cdma *cdma = &ch->cdma;
+	u32 val;
+
+	host1x_debug_output(o, "Host1x basic channel registers: \n");
+
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT);
+	host1x_debug_output(o, "CMDFIFO_STAT_0: %08x\n", val);
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_RDATA);
+	host1x_debug_output(o, "CMDFIFO_RDATA_0: %08x\n", val);
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_OFFSET);
+	host1x_debug_output(o, "CMDP_OFFSET_0: %08x\n", val);
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_CLASS);
+	host1x_debug_output(o, "CMDP_CLASS_0: %08x\n", val);
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_CHANNELSTAT);
+	host1x_debug_output(o, "CHANNELSTAT_0: %08x\n", val);
+
+	show_channel_gathers(o, cdma);
+	host1x_debug_output(o, "\n");
+}
+
+static void host1x_debug_show_channel_fifo(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	u32 val, rd_ptr, wr_ptr, start, end, temp;
+	unsigned int data_count = 0;
+
+	host1x_debug_output(o, "%d: fifo:\n", ch->id);
+
+	temp = host1x_sync_readl(host, HOST1X_THOST_COMMON_ICG_EN_OVERRIDE_0);
+	host1x_sync_writel(host, 0x1, HOST1X_THOST_COMMON_ICG_EN_OVERRIDE_0);
+
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT);
+	host1x_debug_output(o, "FIFOSTAT %08x\n", val);
+	if (HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(val)) {
+		host1x_debug_output(o, "[empty]\n");
+		return;
+	}
+
+	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+	host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
+			   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id),
+			   HOST1X_SYNC_CFPEEK_CTRL);
+
+	val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_PTRS);
+	rd_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(val);
+	wr_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(val);
+
+	val = host1x_sync_readl(host, HOST1X_SYNC_CF_SETUP(ch->id));
+	start = HOST1X_SYNC_CF_SETUP_BASE_V(val);
+	end = HOST1X_SYNC_CF_SETUP_LIMIT_V(val);
+
+	do {
+		host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+		host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
+				   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id) |
+				   HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(rd_ptr),
+				   HOST1X_SYNC_CFPEEK_CTRL);
+		val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ);
+
+		if (!data_count) {
+			host1x_debug_output(o, "%08x:", val);
+			data_count = show_channel_command(o, val);
+		} else {
+			host1x_debug_output(o, "%08x%s", val,
+					    data_count > 0 ? ", " : "])\n");
+			data_count--;
+		}
+
+		if (rd_ptr == end)
+			rd_ptr = start;
+		else
+			rd_ptr++;
+	} while (rd_ptr != wr_ptr);
+
+	if (data_count)
+		host1x_debug_output(o, ", ...])\n");
+	host1x_debug_output(o, "\n");
+
+	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+
+	host1x_sync_writel(host, temp, HOST1X_THOST_COMMON_ICG_EN_OVERRIDE_0);
+}
+
+static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
+{
+	int i;
+	unsigned int chid;
+	bool cpu, ch;
+
+	host1x_debug_output(o, "---- mlocks ----\n");
+	for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) {
+		host1x_hw_sync_get_mutex_owner(host, host->syncpt,
+				i, &cpu, &ch, &chid);
+		if (ch)
+			host1x_debug_output(o, "%d: locked by channel %d\n", i, chid);
+	}
+	host1x_debug_output(o, "\n");
+}
+
+static const struct host1x_debug_ops host1x_debug_t186_ops = {
+	.show_channel_cdma = host1x_debug_show_channel_cdma,
+	.show_channel_fifo = host1x_debug_show_channel_fifo,
+	.show_mlocks = host1x_debug_show_mlocks,
+};
diff --git a/drivers/gpu/host1x/hw/host1x05.c b/drivers/gpu/host1x/hw/host1x05.c
new file mode 100644
index 00000000..db64e7cf
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x05.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* include hw specification */
+#include "host1x05.h"
+#include "host1x05_hardware.h"
+
+/* include code */
+#include "cdma_hw_t186.c"
+#include "channel_hw_t186.c"
+#include "debug_hw_t186.c"
+#include "intr_hw_t186.c"
+#include "syncpt_hw_t186.c"
+
+#include "dev.h"
+
+int host1x05_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_t186_ops;
+	host->cdma_op = &host1x_cdma_t186_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_t186_ops;
+	host->syncpt_op = &host1x_syncpt_t186_ops;
+	host->intr_op = &host1x_intr_t186_ops;
+	host->debug_op = &host1x_debug_t186_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x05.h b/drivers/gpu/host1x/hw/host1x05.h
new file mode 100644
index 00000000..e1b84214
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x05.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HOST1X_HOST1X05_H
+#define HOST1X_HOST1X05_H
+
+struct host1x;
+
+int host1x05_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h
new file mode 100644
index 00000000..add00014
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x05_hardware.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __HOST1X_HOST1X05_HARDWARE_H
+#define __HOST1X_HOST1X05_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x05_channel.h"
+#include "hw_host1x05_sync.h"
+#include "hw_host1x05_uclass.h"
+
+static inline u32 host1x_class_host_wait_syncpt(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx)
+		| host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+		| host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+	unsigned indx, unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx)
+		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+	unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+	unsigned cond, unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond)
+		| host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+/* cdma opcodes */
+static inline u32 host1x_opcode_setclass(
+	unsigned class_id, unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+		host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+	return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+	return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+static inline u32 host1x_opcode_acquire_mlock(unsigned id)
+{
+	return (14 << 28) | id;
+}
+
+static inline u32 host1x_opcode_release_mlock(unsigned id)
+{
+	return (14 << 28) | (1 << 24) | id;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
new file mode 100644
index 00000000..8c3499f9
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X05_CHANNEL_H
+#define HOST1X_HW_HOST1X05_CHANNEL_H
+
+static inline u32 host1x_channel_ch_aperture_start_r(void)
+{
+	return 0x10000;
+}
+#define HOST1X_CHANNEL_CH_APERTURE_START \
+	host1x_channel_ch_aperture_start_r()
+static inline u32 host1x_channel_ch_aperture_size_r(void)
+{
+	return 0x100;
+}
+#define HOST1X_CHANNEL_CH_APERTURE_SIZE \
+	host1x_channel_ch_aperture_size_r()
+static inline u32 host1x_channel_fifostat_r(void)
+{
+	return 0x24;
+}
+#define HOST1X_CHANNEL_FIFOSTAT \
+	host1x_channel_fifostat_r()
+static inline u32 host1x_channel_fifostat_cfempty_v(u32 r)
+{
+	return (r >> 13) & 0x1;
+}
+#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \
+	host1x_channel_fifostat_cfempty_v(r)
+static inline u32 host1x_channel_rdata_r(void)
+{
+	return 0x28;
+}
+#define HOST1X_CHANNEL_RDATA \
+	host1x_channel_rdata_r()
+static inline u32 host1x_channel_cmdp_offset_r(void)
+{
+	return 0x30;
+}
+#define HOST1X_CHANNEL_CMDP_OFFSET \
+	host1x_channel_cmdp_offset_r()
+static inline u32 host1x_channel_cmdp_class_r(void)
+{
+	return 0x34;
+}
+#define HOST1X_CHANNEL_CMDP_CLASS \
+	host1x_channel_cmdp_class_r()
+static inline u32 host1x_channel_cmdp_channelstat_r(void)
+{
+	return 0x38;
+}
+#define HOST1X_CHANNEL_CMDP_CHANNELSTAT \
+	host1x_channel_cmdp_channelstat_r()
+static inline u32 host1x_channel_dmastart_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_CHANNEL_DMASTART \
+	host1x_channel_dmastart_r()
+static inline u32 host1x_channel_dmaput_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_CHANNEL_DMAPUT \
+	host1x_channel_dmaput_r()
+static inline u32 host1x_channel_dmaget_r(void)
+{
+	return 0x10;
+}
+#define HOST1X_CHANNEL_DMAGET \
+	host1x_channel_dmaget_r()
+static inline u32 host1x_channel_dmaend_r(void)
+{
+	return 0x18;
+}
+#define HOST1X_CHANNEL_DMAEND \
+	host1x_channel_dmaend_r()
+static inline u32 host1x_channel_dmactrl_r(void)
+{
+	return 0x20;
+}
+#define HOST1X_CHANNEL_DMACTRL \
+	host1x_channel_dmactrl_r()
+static inline u32 host1x_channel_dmactrl_dmastop(void)
+{
+	return 1 << 0;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP \
+	host1x_channel_dmactrl_dmastop()
+static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \
+	host1x_channel_dmactrl_dmastop_v(r)
+static inline u32 host1x_channel_dmactrl_dmagetrst(void)
+{
+	return 1 << 1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \
+	host1x_channel_dmactrl_dmagetrst()
+static inline u32 host1x_channel_dmactrl_dmainitget(void)
+{
+	return 1 << 2;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
+	host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_smmu_streamid_r(void)
+{
+	return 0x84;
+}
+#define HOST1X_CHANNEL_SMMU_STREAMID \
+	host1x_channel_smmu_streamid_r()
+static inline u32 host1x_channel_filter_gbuffer_r(void)
+{
+	return 0x2020;
+}
+#define HOST1X_CHANNEL_FILTER_GBUFFER \
+	host1x_channel_filter_gbuffer_r()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_sync.h b/drivers/gpu/host1x/hw/hw_host1x05_sync.h
new file mode 100644
index 00000000..55fe9885
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x05_sync.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X05_SYNC_H
+#define HOST1X_HW_HOST1X05_SYNC_H
+
+#define REGISTER_STRIDE	4
+
+static inline u32 host1x_sync_intstatus_r(void)
+{
+	return 0x1c;
+}
+#define HOST1X_SYNC_INTSTATUS \
+	host1x_sync_intstatus_r()
+static inline u32 host1x_sync_intmask_r(void)
+{
+	return 0x30;
+}
+#define HOST1X_SYNC_INTMASK \
+	host1x_sync_intmask_r()
+static inline u32 host1x_sync_intc0mask_r(void)
+{
+	return 0x4;
+}
+#define HOST1X_SYNC_INTC0MASK \
+	host1x_sync_intc0mask_r()
+static inline u32 host1x_sync_intgmask_r(void)
+{
+	return 0x44;
+}
+#define HOST1X_SYNC_INTGMASK \
+	host1x_sync_intgmask_r()
+static inline u32 host1x_sync_syncpt_intgmask_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_SYNC_SYNCPT_INTGMASK \
+	host1x_sync_syncpt_intgmask_r()
+static inline u32 host1x_sync_intstatus_ip_read_int_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_SYNC_INTSTATUS_IP_READ_INT_V(r) \
+	host1x_sync_intstatus_ip_read_int_v(r)
+static inline u32 host1x_sync_intstatus_ip_write_int_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+#define HOST1X_SYNC_INTSTATUS_IP_WRITE_INT_V(r) \
+	host1x_sync_intstatus_ip_write_int_v(r)
+static inline u32 host1x_sync_intstatus_illegal_pb_access_v(u32 r)
+{
+	return (r >> 28) & 0x1;
+}
+#define HOST1X_SYNC_INTSTATUS_ILLEGAL_PB_ACCESS_V(r) \
+	host1x_sync_intstatus_illegal_pb_access_v(r)
+static inline u32 host1x_sync_illegal_syncpt_access_frm_pb_r(void)
+{
+	return 0x2270;
+}
+#define HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_PB \
+	host1x_sync_illegal_syncpt_access_frm_pb_r()
+static inline u32 host1x_sync_illegal_syncpt_access_frm_pb_syncpt_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_PB_SYNCPT_V(r) \
+	host1x_sync_illegal_syncpt_access_frm_pb_syncpt_v(r)
+static inline u32 host1x_sync_illegal_syncpt_access_frm_pb_ch_v(u32 r)
+{
+	return (r >> 10) & 0x3f;
+}
+#define HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_PB_CH_V(r) \
+	host1x_sync_illegal_syncpt_access_frm_pb_ch_v(r)
+static inline u32 host1x_sync_intstatus_illegal_client_access_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+#define HOST1X_SYNC_INTSTATUS_ILLEGAL_CLIENT_ACCESS_V(r) \
+	host1x_sync_intstatus_illegal_client_access_v(r)
+static inline u32 host1x_sync_illegal_syncpt_access_frm_client_r(void)
+{
+	return 0x2268;
+}
+#define HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_CLIENT \
+	host1x_sync_illegal_syncpt_access_frm_client_r()
+static inline u32 host1x_sync_illegal_syncpt_access_frm_client_syncpt_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_CLIENT_SYNCPT_V(r) \
+	host1x_sync_illegal_syncpt_access_frm_client_syncpt_v(r)
+static inline u32 host1x_sync_illegal_syncpt_access_frm_client_ch_v(u32 r)
+{
+	return (r >> 10) & 0x3f;
+}
+#define HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_CLIENT_CH_V(r) \
+	host1x_sync_illegal_syncpt_access_frm_client_ch_v(r)
+static inline u32 host1x_sync_syncpt_r(unsigned int id)
+{
+	return 0x18080 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT(id) \
+	host1x_sync_syncpt_r(id)
+static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id)
+{
+	return 0x16464 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \
+	host1x_sync_syncpt_thresh_cpu0_int_status_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id)
+{
+	return 0x16590 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \
+	host1x_sync_syncpt_thresh_int_disable_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id)
+{
+	return 0x1652c + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \
+	host1x_sync_syncpt_thresh_int_enable_cpu0_r(id)
+static inline u32 host1x_sync_cf_setup_r(unsigned int channel)
+{
+	return 0x2588 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CF_SETUP(channel) \
+	host1x_sync_cf_setup_r(channel)
+static inline u32 host1x_sync_cf_setup_base_v(u32 r)
+{
+	return (r >> 0) & 0xfff;
+}
+#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \
+	host1x_sync_cf_setup_base_v(r)
+static inline u32 host1x_sync_cf_setup_limit_v(u32 r)
+{
+	return (r >> 16) & 0xfff;
+}
+#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \
+	host1x_sync_cf_setup_limit_v(r)
+static inline u32 host1x_sync_cmdproc_stop_r(void)
+{
+	return 0x48;
+}
+#define HOST1X_SYNC_CMDPROC_STOP \
+	host1x_sync_cmdproc_stop_r()
+static inline u32 host1x_sync_ch_teardown_r(void)
+{
+	return 0x4c;
+}
+#define HOST1X_SYNC_CH_TEARDOWN \
+	host1x_sync_ch_teardown_r()
+static inline u32 host1x_sync_usec_clk_r(void)
+{
+	return 0x2244;
+}
+#define HOST1X_SYNC_USEC_CLK \
+	host1x_sync_usec_clk_r()
+static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void)
+{
+	return 0x2248;
+}
+#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \
+	host1x_sync_ctxsw_timeout_cfg_r()
+static inline u32 host1x_sync_ip_busy_timeout_r(void)
+{
+	return 0x2250;
+}
+#define HOST1X_SYNC_IP_BUSY_TIMEOUT \
+	host1x_sync_ip_busy_timeout_r()
+static inline u32 host1x_sync_ip_read_timeout_addr_r(void)
+{
+	return 0x2254;
+}
+#define HOST1X_SYNC_IP_READ_TIMEOUT_ADDR \
+	host1x_sync_ip_read_timeout_addr_r()
+static inline u32 host1x_sync_ip_write_timeout_addr_r(void)
+{
+	return 0x225c;
+}
+#define HOST1X_SYNC_IP_WRITE_TIMEOUT_ADDR \
+	host1x_sync_ip_write_timeout_addr_r()
+static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id)
+{
+	return 0x18a00 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \
+	host1x_sync_syncpt_int_thresh_r(id)
+static inline u32 host1x_sync_syncpt_base_r(unsigned int id)
+{
+	return 0x18000 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_BASE(id) \
+	host1x_sync_syncpt_base_r(id)
+static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id)
+{
+	return 0x16400 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \
+	host1x_sync_syncpt_cpu_incr_r(id)
+static inline u32 host1x_sync_cfpeek_ctrl_r(void)
+{
+	return 0x233c;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL \
+	host1x_sync_cfpeek_ctrl_r()
+static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \
+	host1x_sync_cfpeek_ctrl_addr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v)
+{
+	return (v & 0x3f) << 16;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \
+	host1x_sync_cfpeek_ctrl_channr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \
+	host1x_sync_cfpeek_ctrl_ena_f(v)
+static inline u32 host1x_sync_cfpeek_read_r(void)
+{
+	return 0x2340;
+}
+#define HOST1X_SYNC_CFPEEK_READ \
+	host1x_sync_cfpeek_read_r()
+static inline u32 host1x_sync_cfpeek_ptrs_r(void)
+{
+	return 0x2344;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS \
+	host1x_sync_cfpeek_ptrs_r()
+static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r)
+{
+	return (r >> 0) & 0xfff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r)
+static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r)
+{
+	return (r >> 16) & 0xfff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r)
+static inline u32 host1x_sync_common_mlock_r(unsigned long id)
+{
+	return 0x2030 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_COMMON_MLOCK \
+	host1x_sync_common_mlock_r()
+static inline u32 host1x_sync_common_mlock_ch_v(u32 r)
+{
+	return (r >> 2) & 0x3f;
+}
+#define HOST1X_SYNC_COMMON_MLOCK_CH_V(r) \
+	host1x_sync_common_mlock_ch_v(r)
+static inline u32 host1x_sync_common_mlock_locked_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_SYNC_COMMON_MLOCK_LOCKED_V(r) \
+	host1x_sync_common_mlock_locked_v(r)
+static inline u32 host1x_thost_common_icg_en_override_0_r(void)
+{
+	return 0x2aa8;
+}
+#define HOST1X_THOST_COMMON_ICG_EN_OVERRIDE_0 \
+	host1x_thost_common_icg_en_override_0_r()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
new file mode 100644
index 00000000..01145f61
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X05_UCLASS_H
+#define HOST1X_HW_HOST1X05_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 10;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0x3ff) << 22;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0x3f) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/intr_hw_t186.c b/drivers/gpu/host1x/hw/intr_hw_t186.c
new file mode 100644
index 00000000..48bd1150
--- /dev/null
+++ b/drivers/gpu/host1x/hw/intr_hw_t186.c
@@ -0,0 +1,247 @@
+/*
+ * Tegra host1x Interrupt Management
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Copyright (C) 2016 NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+
+#include "intr.h"
+#include "dev.h"
+
+
+static inline u32 bit_mask(u32 nr)
+{
+	return 1UL << (nr % 32);
+}
+static inline u32 bit_word(u32 nr)
+{
+	return nr / 32;
+}
+
+/*
+ * Sync point threshold interrupt service function
+ * Handles sync point threshold triggers, in interrupt context
+ */
+static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt)
+{
+	unsigned int id = syncpt->id;
+	struct host1x *host = syncpt->host;
+
+	host1x_sync_writel(host, bit_mask(id),
+		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(bit_word(id)));
+	host1x_sync_writel(host, bit_mask(id),
+		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(bit_word(id)));
+
+	queue_work(host->intr_wq, &syncpt->intr.work);
+}
+
+static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
+{
+	struct host1x *host = dev_id;
+	unsigned long reg;
+	int i, set_bit;
+
+	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) {
+		reg = host1x_sync_readl(host,
+			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+		for_each_set_bit(set_bit, &reg, 32) {
+			struct host1x_syncpt *syncpt;
+			int id = i * 32 + set_bit;
+
+			if (unlikely(id < 0 || id >= host->info->nb_pts)) {
+				dev_err(host->dev,
+					"%s(): unexptected syncpt id %d\n",
+					__func__, id);
+				goto out;
+			}
+			syncpt = host->syncpt + id;
+			host1x_intr_syncpt_handle(syncpt);
+		}
+	}
+out:
+	return IRQ_HANDLED;
+}
+
+static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
+{
+	u32 i;
+
+	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); ++i) {
+		host1x_sync_writel(host, 0xffffffffu,
+			HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i));
+		host1x_sync_writel(host, 0xffffffffu,
+			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+	}
+}
+
+static int _host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
+	void (*syncpt_thresh_work)(struct work_struct *))
+{
+	int i, err;
+
+	host1x_hw_intr_disable_all_syncpt_intrs(host);
+
+	for (i = 0; i < host->info->nb_pts; i++)
+		INIT_WORK(&host->syncpt[i].intr.work, syncpt_thresh_work);
+
+	err = devm_request_irq(host->dev, host->intr_syncpt_irq,
+			       syncpt_thresh_isr, IRQF_SHARED,
+			       "host1x_syncpt", host);
+	if (IS_ERR_VALUE(err)) {
+		WARN_ON(1);
+		return err;
+	}
+
+	/* disable the ip_busy_timeout. this prevents write drops */
+	host1x_sync_writel(host, 0, HOST1X_SYNC_IP_BUSY_TIMEOUT);
+
+	/*
+	 * increase the auto-ack timout to the maximum value. 2d will hang
+	 * otherwise on Tegra2.
+	 */
+	host1x_sync_writel(host, 0xff, HOST1X_SYNC_CTXSW_TIMEOUT_CFG);
+
+	/* update host clocks per usec */
+	host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK);
+
+	return 0;
+}
+
+static void _host1x_intr_set_syncpt_threshold(struct host1x *host,
+	u32 id, u32 thresh)
+{
+	host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id));
+}
+
+static void _host1x_intr_enable_syncpt_intr(struct host1x *host, u32 id)
+{
+	host1x_sync_writel(host, bit_mask(id),
+		HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(bit_word(id)));
+}
+
+static void _host1x_intr_disable_syncpt_intr(struct host1x *host, u32 id)
+{
+	host1x_sync_writel(host, bit_mask(id),
+		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(bit_word(id)));
+	host1x_sync_writel(host, bit_mask(id),
+		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(bit_word(id)));
+}
+
+static int _host1x_free_syncpt_irq(struct host1x *host)
+{
+	devm_free_irq(host->dev, host->intr_syncpt_irq, host);
+	flush_workqueue(host->intr_wq);
+	return 0;
+}
+
+/**
+ * Host general interrupt service function
+ * Handles read / write failures
+ */
+static irqreturn_t general_isr(int irq, void *dev_id)
+{
+	struct host1x *host = dev_id;
+	u32 intstatus, addr;
+
+	/* Handle host1x interrupt in ISR */
+	intstatus = host1x_sync_readl(host, HOST1X_SYNC_INTSTATUS);
+
+	if (HOST1X_SYNC_INTSTATUS_IP_READ_INT_V(intstatus)) {
+		addr = host1x_sync_readl(host,
+					 HOST1X_SYNC_IP_READ_TIMEOUT_ADDR);
+		pr_err("Host read timeout at address %x\n", addr);
+	}
+
+	if (HOST1X_SYNC_INTSTATUS_IP_WRITE_INT_V(intstatus)) {
+		addr = host1x_sync_readl(host,
+					 HOST1X_SYNC_IP_WRITE_TIMEOUT_ADDR);
+		pr_err("Host write timeout at address %x\n", addr);
+	}
+
+	if (HOST1X_SYNC_INTSTATUS_ILLEGAL_PB_ACCESS_V(intstatus)) {
+		u32 stat = host1x_sync_readl(host, HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_PB);
+		u32 ch = HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_PB_CH_V(stat);
+		u32 id = HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_PB_SYNCPT_V(stat);
+		pr_err("Host illegal syncpoint pb access (ch=%u, id=%u)\n", ch, id);
+	}
+
+	if (HOST1X_SYNC_INTSTATUS_ILLEGAL_CLIENT_ACCESS_V(intstatus)) {
+		u32 stat = host1x_sync_readl(host, HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_CLIENT);
+		u32 ch = HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_CLIENT_CH_V(stat);
+		u32 id = HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_CLIENT_SYNCPT_V(stat);
+		pr_err("Host illegal syncpoint client access (ch=%u, id=%u)\n", ch, id);
+	}
+
+	host1x_sync_writel(host, intstatus, HOST1X_SYNC_INTSTATUS);
+
+	return IRQ_HANDLED;
+}
+
+static int _host1x_intr_request_host_general_irq(struct host1x *host)
+{
+	int err;
+
+	/* master disable for general (not syncpt) host interrupts */
+	host1x_sync_writel(host, 0, HOST1X_SYNC_INTC0MASK);
+	host1x_sync_writel(host, 0, HOST1X_SYNC_INTGMASK);
+	host1x_sync_writel(host, 0, HOST1X_SYNC_INTMASK);
+
+	err = devm_request_irq(host->dev, host->intr_general_irq,
+			       general_isr, 0,
+			       "host1x_general", host);
+	if (IS_ERR_VALUE(err)) {
+		WARN_ON(1);
+		return err;
+	}
+
+	/* enable host module interrupt to CPU0 */
+	host1x_sync_writel(host, BIT(0), HOST1X_SYNC_INTC0MASK);
+	host1x_sync_writel(host, BIT(0), HOST1X_SYNC_INTGMASK);
+	host1x_sync_writel(host, 0xff << 8, HOST1X_SYNC_SYNCPT_INTGMASK);
+
+	/* master enable for general (not syncpt) host interrupts
+	 * (AXIREAD, AXIWRITE, Syncpoint protection */
+	host1x_sync_writel(host, BIT(0) | BIT(1) | BIT(28) | BIT(30),
+				 HOST1X_SYNC_INTMASK);
+
+	return err;
+}
+
+static void _host1x_intr_free_host_general_irq(struct host1x *host)
+{
+	/* master disable for general (not syncpt) host interrupts */
+	host1x_sync_writel(host, 0, HOST1X_SYNC_INTMASK);
+	host1x_sync_writel(host, 0, HOST1X_SYNC_SYNCPT_INTGMASK);
+
+	devm_free_irq(host->dev, host->intr_general_irq, host);
+}
+
+static const struct host1x_intr_ops host1x_intr_t186_ops = {
+	/* Syncpt interrupts */
+	.init_host_sync = _host1x_intr_init_host_sync,
+	.set_syncpt_threshold = _host1x_intr_set_syncpt_threshold,
+	.enable_syncpt_intr = _host1x_intr_enable_syncpt_intr,
+	.disable_syncpt_intr = _host1x_intr_disable_syncpt_intr,
+	.disable_all_syncpt_intrs = _host1x_intr_disable_all_syncpt_intrs,
+	.free_syncpt_irq = _host1x_free_syncpt_irq,
+
+	/* Host general interrupts */
+	.request_host_general_irq = _host1x_intr_request_host_general_irq,
+	.free_host_general_irq = _host1x_intr_free_host_general_irq,
+};
diff --git a/drivers/gpu/host1x/hw/syncpt_hw_t186.c b/drivers/gpu/host1x/hw/syncpt_hw_t186.c
new file mode 100644
index 00000000..9b196854
--- /dev/null
+++ b/drivers/gpu/host1x/hw/syncpt_hw_t186.c
@@ -0,0 +1,126 @@
+/*
+ * Tegra host1x Syncpoints
+ *
+ * Copyright (c) 2016, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/io.h>
+
+#include "dev.h"
+#include "syncpt.h"
+
+/*
+ * Write the current syncpoint value back to hw.
+ */
+static void syncpt_restore(struct host1x_syncpt *sp)
+{
+	struct host1x *host = sp->host;
+	int min = host1x_syncpt_read_min(sp);
+	host1x_sync_writel(host, min, HOST1X_SYNC_SYNCPT(sp->id));
+}
+
+/*
+ * Write the current waitbase value back to hw.
+ */
+static void syncpt_restore_wait_base(struct host1x_syncpt *sp)
+{
+	struct host1x *host = sp->host;
+	host1x_sync_writel(host, sp->base_val,
+			   HOST1X_SYNC_SYNCPT_BASE(sp->id));
+}
+
+/*
+ * Read waitbase value from hw.
+ */
+static void syncpt_read_wait_base(struct host1x_syncpt *sp)
+{
+	struct host1x *host = sp->host;
+	sp->base_val =
+		host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(sp->id));
+}
+
+/*
+ * Updates the last value read from hardware.
+ */
+static u32 syncpt_load(struct host1x_syncpt *sp)
+{
+	struct host1x *host = sp->host;
+	u32 old, live;
+
+	/* Loop in case there's a race writing to min_val */
+	do {
+		old = host1x_syncpt_read_min(sp);
+		live = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT(sp->id));
+	} while ((u32)atomic_cmpxchg(&sp->min_val, old, live) != old);
+
+	if (!host1x_syncpt_check_max(sp, live))
+		dev_err(host->dev, "%s failed: id=%u, min=%d, max=%d\n",
+			__func__, sp->id, host1x_syncpt_read_min(sp),
+			host1x_syncpt_read_max(sp));
+
+	return live;
+}
+
+/*
+ * Write a cpu syncpoint increment to the hardware, without touching
+ * the cache.
+ */
+static int syncpt_cpu_incr(struct host1x_syncpt *sp)
+{
+	struct host1x *host = sp->host;
+	u32 reg_offset = sp->id / 32;
+
+	if (!host1x_syncpt_client_managed(sp) &&
+	    host1x_syncpt_idle(sp))
+		return -EINVAL;
+	host1x_sync_writel(host, BIT_MASK(sp->id),
+			   HOST1X_SYNC_SYNCPT_CPU_INCR(reg_offset));
+	wmb();
+
+	return 0;
+}
+
+/* remove a wait pointed to by patch_addr */
+static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
+{
+	u32 override = host1x_class_host_wait_syncpt(
+		HOST1X_SYNCPT_RESERVED, 0);
+
+	*((u32 *)patch_addr) = override;
+	return 0;
+}
+
+static void syncpt_get_mutex_owner(struct host1x_syncpt *sp,
+				   unsigned int mutex_id, bool *cpu, bool *ch,
+				   unsigned int *chid)
+{
+	struct host1x *host = sp->host;
+	u32 owner;
+
+	owner = host1x_sync_readl(host, host1x_sync_common_mlock_r(mutex_id));
+	*chid = HOST1X_SYNC_COMMON_MLOCK_CH_V(owner);
+	*ch = HOST1X_SYNC_COMMON_MLOCK_LOCKED_V(owner);
+	*cpu = false;
+}
+
+static const struct host1x_syncpt_ops host1x_syncpt_t186_ops = {
+	.restore = syncpt_restore,
+	.restore_wait_base = syncpt_restore_wait_base,
+	.load_wait_base = syncpt_read_wait_base,
+	.load = syncpt_load,
+	.cpu_incr = syncpt_cpu_incr,
+	.patch_wait = syncpt_patch_wait,
+	.get_mutex_owner = syncpt_get_mutex_owner,
+};

From 1859542c94d23b3cd39ee0040cf05c4b710a78da Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 3 Mar 2016 14:15:16 +0530
Subject: [PATCH 03/73] gpu: host1x: API to load streamid registers

Define host1x_dev_ops for T186

Add API load_streamid_regs() to load streamid registers
Set this API to host1x_dev_ops.load_regs so that we
set streamid registers during host1x_probe()

Add a static table which includes streamid mappings
for all the clients

Bug 1704301

Change-Id: I7aeefc43776472a7ccf868bfa18c810f3b80b52b
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1023438
Reviewed-by: Arto Merilainen <amerilainen@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Shridhar Rasal <srasal@nvidia.com>
---
 drivers/gpu/host1x/hw/host1x05.c        |  2 +
 drivers/gpu/host1x/hw/stremid_hw_t186.c | 95 +++++++++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 drivers/gpu/host1x/hw/stremid_hw_t186.c

diff --git a/drivers/gpu/host1x/hw/host1x05.c b/drivers/gpu/host1x/hw/host1x05.c
index db64e7cf..3a790bee 100644
--- a/drivers/gpu/host1x/hw/host1x05.c
+++ b/drivers/gpu/host1x/hw/host1x05.c
@@ -24,11 +24,13 @@
 #include "debug_hw_t186.c"
 #include "intr_hw_t186.c"
 #include "syncpt_hw_t186.c"
+#include "stremid_hw_t186.c"
 
 #include "dev.h"
 
 int host1x05_init(struct host1x *host)
 {
+	host->dev_op = &host1x_dev_t186_ops;
 	host->channel_op = &host1x_channel_t186_ops;
 	host->cdma_op = &host1x_cdma_t186_ops;
 	host->cdma_pb_op = &host1x_pushbuffer_t186_ops;
diff --git a/drivers/gpu/host1x/hw/stremid_hw_t186.c b/drivers/gpu/host1x/hw/stremid_hw_t186.c
new file mode 100644
index 00000000..9f4c3685
--- /dev/null
+++ b/drivers/gpu/host1x/hw/stremid_hw_t186.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2016, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "dev_t186.h"
+
+struct host1x_streamid_mapping {
+	u32 host1x_offset;
+	u32 client_offset;
+	u32 client_limit;
+};
+
+static struct host1x_streamid_mapping __attribute__((__unused__))
+        t18x_host1x_streamid_mapping[] = {
+	/* HOST1X_THOST_COMMON_SE1_STRMID_0_OFFSET_BASE_0 */
+	{ 0x00001ac8, 0x00000090, 0x00000090},
+	/* HOST1X_THOST_COMMON_SE2_STRMID_0_OFFSET_BASE_0 */
+	{ 0x00001ad0, 0x00000090, 0x00000090},
+	/* HOST1X_THOST_COMMON_SE3_STRMID_0_OFFSET_BASE_0 */
+	{ 0x00001ad8, 0x00000090, 0x00000090},
+	/* HOST1X_THOST_COMMON_SE4_STRMID_0_OFFSET_BASE_0 */
+	{ 0x00001ae0, 0x00000090, 0x00000090},
+	/* HOST1X_THOST_COMMON_ISP_STRMID_0_OFFSET_BASE_0 */
+	{ 0x00001ae8, 0x00000050, 0x00000050},
+	/* HOST1X_THOST_COMMON_VIC_STRMID_0_OFFSET_BASE_0 */
+	{ 0x00001af0, 0x00000030, 0x00000034},
+	/* HOST1X_THOST_COMMON_NVENC_STRMID_0_OFFSET_BASE_0 */
+	{ 0x00001af8, 0x00000030, 0x00000034},
+	/* HOST1X_THOST_COMMON_NVDEC_STRMID_0_OFFSET_BASE_0 */
+	{ 0x00001b00, 0x00000030, 0x00000034},
+	/* HOST1X_THOST_COMMON_NVJPG_STRMID_0_OFFSET_BASE_0 */
+	{ 0x00001b08, 0x00000030, 0x00000034},
+	/* HOST1X_THOST_COMMON_TSEC_STRMID_0_OFFSET_BASE_0 */
+	{ 0x00001b10, 0x00000030, 0x00000034},
+	/* HOST1X_THOST_COMMON_TSECB_STRMID_0_OFFSET_BASE_0 */
+	{ 0x00001b18, 0x00000030, 0x00000034},
+	/* HOST1X_THOST_COMMON_VI_STRMID_0_OFFSET_BASE_0 */
+	{ 0x00001b80, 0x00010000, 0x00010000},
+	/* HOST1X_THOST_COMMON_VI_STRMID_1_OFFSET_BASE_0 */
+	{ 0x00001b88, 0x00020000, 0x00020000},
+	/* HOST1X_THOST_COMMON_VI_STRMID_2_OFFSET_BASE_0 */
+	{ 0x00001b90, 0x00030000, 0x00030000},
+	/* HOST1X_THOST_COMMON_VI_STRMID_3_OFFSET_BASE_0 */
+	{ 0x00001b98, 0x00040000, 0x00040000},
+	/* HOST1X_THOST_COMMON_VI_STRMID_4_OFFSET_BASE_0 */
+	{ 0x00001ba0, 0x00050000, 0x00050000},
+	/* HOST1X_THOST_COMMON_VI_STRMID_5_OFFSET_BASE_0 */
+	{ 0x00001ba8, 0x00060000, 0x00060000},
+	/* HOST1X_THOST_COMMON_VI_STRMID_6_OFFSET_BASE_0 */
+	{ 0x00001bb0, 0x00070000, 0x00070000},
+	/* HOST1X_THOST_COMMON_VI_STRMID_7_OFFSET_BASE_0 */
+	{ 0x00001bb8, 0x00080000, 0x00080000},
+	/* HOST1X_THOST_COMMON_VI_STRMID_8_OFFSET_BASE_0 */
+	{ 0x00001bc0, 0x00090000, 0x00090000},
+	/* HOST1X_THOST_COMMON_VI_STRMID_9_OFFSET_BASE_0 */
+	{ 0x00001bc8, 0x000a0000, 0x000a0000},
+	/* HOST1X_THOST_COMMON_VI_STRMID_10_OFFSET_BASE_0 */
+	{ 0x00001bd0, 0x000b0000, 0x000b0000},
+	/* HOST1X_THOST_COMMON_VI_STRMID_11_OFFSET_BASE_0 */
+	{ 0x00001bd8, 0x000c0000, 0x000c0000},
+	{},
+};
+
+static int load_streamid_regs(struct host1x *host)
+{
+	struct host1x_streamid_mapping *map_regs = t18x_host1x_streamid_mapping;
+
+	while (map_regs->host1x_offset) {
+		host1x_writel(host, map_regs->client_offset,
+				    map_regs->host1x_offset);
+		host1x_writel(host, map_regs->client_limit,
+				    map_regs->host1x_offset + sizeof(u32));
+
+		map_regs++;
+	}
+
+	return 0;
+}
+
+static const struct host1x_dev_ops host1x_dev_t186_ops = {
+	.load_regs = load_streamid_regs,
+};

From 6bd4b830bf69d5af7fe6fa57dbae7303bbf0203c Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 10 Mar 2016 14:46:29 +0530
Subject: [PATCH 04/73] gpu: host1x: keep device powered on during submit

Get pm_runtime reference on device to keep device
powered on during submit

Bug 1704301

Change-Id: I71d4b4dc6317445bc3d1832ffd3bd4c7afadc654
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1029500
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Arto Merilainen <amerilainen@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Shridhar Rasal <srasal@nvidia.com>
---
 drivers/gpu/host1x/hw/channel_hw_t186.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/host1x/hw/channel_hw_t186.c b/drivers/gpu/host1x/hw/channel_hw_t186.c
index 2b90ba63..256368f2 100644
--- a/drivers/gpu/host1x/hw/channel_hw_t186.c
+++ b/drivers/gpu/host1x/hw/channel_hw_t186.c
@@ -19,6 +19,7 @@
 #include <linux/host1x.h>
 #include <linux/slab.h>
 #include <linux/iommu.h>
+#include <linux/pm_runtime.h>
 
 #include <trace/events/host1x.h>
 
@@ -194,6 +195,10 @@ static int channel_submit(struct host1x_job *job)
 	syncpt = host1x_syncpt_get(host, job->syncpts[0].id);
 	prev_max = host1x_syncpt_read_max(syncpt);
 
+	/* keep device powered on */
+	for (i = 0; i < job->num_syncpts; ++i)
+		pm_runtime_get_sync(ch->dev);
+
 	/* get submit lock */
 	err = mutex_lock_interruptible(&ch->submitlock);
 	if (err)

From 6d83772d005379ca3926b05f611cb805eb718ca8 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 10 Mar 2016 16:31:34 +0530
Subject: [PATCH 05/73] gpu: host1x: enable gather filter for T186

Bug 1704301

Change-Id: Id783a06f353ae4627103ac48ab903eefd9b2ec7b
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1029502
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Arto Merilainen <amerilainen@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Shridhar Rasal <srasal@nvidia.com>
---
 drivers/gpu/host1x/dev_t186.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/dev_t186.c b/drivers/gpu/host1x/dev_t186.c
index e4d14876..d29a48c6 100644
--- a/drivers/gpu/host1x/dev_t186.c
+++ b/drivers/gpu/host1x/dev_t186.c
@@ -26,7 +26,7 @@ struct host1x_info host1x05_info = {
 	.nb_bases = 16,
 	.init = host1x05_init,
 	.sync_offset = 0x0,
-	.gather_filter_enabled  = false,
+	.gather_filter_enabled  = true,
 };
 
 void host1x_writel(struct host1x *host1x, u32 v, u32 r)

From c7871478e1dceb7da03500bbe1fae070c381c6fd Mon Sep 17 00:00:00 2001
From: Pritesh Raithatha <praithatha@nvidia.com>
Date: Mon, 9 May 2016 12:30:15 +0530
Subject: [PATCH 06/73] kernel: t18x: change kernel path

All kernel versions are getting moved inside $TOP/kernel folder.
Changing kernel paths accordingly.

Bug 200190733

Change-Id: Ia0353dd4be3fb3d76bbfc82609be99546a217f92
Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
Reviewed-on: http://git-master/r/1143409
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
Tested-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/host1x/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 9c8c6467..25d54f73 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -1,5 +1,5 @@
 ccflags-y += -Idrivers/gpu/host1x
-ccflags-y += -I../kernel-t18x/drivers/gpu/host1x
+ccflags-y += -I../t18x/drivers/gpu/host1x
 
 host1x-t186-y = \
 	dev_t186.o \

From 440739938d0aab374a6d0d22765302aac451ccaf Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@nvidia.com>
Date: Sun, 3 Jul 2016 18:29:49 +0200
Subject: [PATCH 07/73] t18x: renaming KConfigs & Makefiles to add suffix

Bug 1783210

Signed-off-by: dmitry pervushin <dpervushin@nvidia.com>
Change-Id: I20c29b4dd5947d296a6ab1e83d47e569e386855b
Reviewed-on: http://git-master/r/1176347
Reviewed-on: http://git-master/r/1177303
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alexander Van Brunt <avanbrunt@nvidia.com>
Tested-by: Alexander Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/1183017
---
 drivers/gpu/host1x/{Makefile => Makefile.t18x} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename drivers/gpu/host1x/{Makefile => Makefile.t18x} (100%)

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile.t18x
similarity index 100%
rename from drivers/gpu/host1x/Makefile
rename to drivers/gpu/host1x/Makefile.t18x

From 2f242453cdd41166fd4b4cd23aadfe067db17238 Mon Sep 17 00:00:00 2001
From: Ishan Mittal <imittal@nvidia.com>
Date: Thu, 25 May 2017 17:36:03 +0530
Subject: [PATCH 08/73] nvidia: Setting proper names to Makefiles & Kconfig

Makefiles and Kconfig files are picked up based
on project name being the extension.

Makefiles and Kconfig files brought over from
t18x have .t18x suffix.

Changed to .nvidia

Bug 200295104

Change-Id: Idade26f293c1faaf74f53b2ab20798c47652bdb9
Signed-off-by: Ishan Mittal <imittal@nvidia.com>
---
 drivers/gpu/host1x/{Makefile.t18x => Makefile.nvidia} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename drivers/gpu/host1x/{Makefile.t18x => Makefile.nvidia} (100%)

diff --git a/drivers/gpu/host1x/Makefile.t18x b/drivers/gpu/host1x/Makefile.nvidia
similarity index 100%
rename from drivers/gpu/host1x/Makefile.t18x
rename to drivers/gpu/host1x/Makefile.nvidia

From b9a1518110c70bc500889c009fc37c2222a2c8e4 Mon Sep 17 00:00:00 2001
From: Ishan Mittal <imittal@nvidia.com>
Date: Thu, 25 May 2017 17:41:08 +0530
Subject: [PATCH 09/73] nvidia: Replace t18x/ in relative path with nvidia/

t18x repo will be discontinued and all code that
resides there is moved into nvidia/.
So, every reference to t18x/ should be corrected
to nvidia/

Bug 200295104

Change-Id: I01ca1118de043511edca829e1cb2b8b52fb398df
Signed-off-by: Vandana Salve <vsalve@nvidia.com>
---
 drivers/gpu/host1x/Makefile.nvidia | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/Makefile.nvidia b/drivers/gpu/host1x/Makefile.nvidia
index 25d54f73..00744ed9 100644
--- a/drivers/gpu/host1x/Makefile.nvidia
+++ b/drivers/gpu/host1x/Makefile.nvidia
@@ -1,5 +1,5 @@
 ccflags-y += -Idrivers/gpu/host1x
-ccflags-y += -I../t18x/drivers/gpu/host1x
+ccflags-y += -I../nvidia/drivers/gpu/host1x
 
 host1x-t186-y = \
 	dev_t186.o \

From 2b60a9ec66f04f35ed7d36b811e2fc5253092f04 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Thu, 14 Sep 2017 10:51:18 -0600
Subject: [PATCH 10/73] nvidia: use kernel overlay features

Update all Kconfig files and Makefiles to rely on the kernel overlay
feature. In particular, don't include any Kconfig files or Makefiles
from other overlays. -I directives in CFLAGS are not yet cleaned up.

Bug 1978395

Change-Id: I425d37d55f8ea61fb3a082a1504f994ff30cec03
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1561187
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/host1x/{Makefile.nvidia => Makefile} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename drivers/gpu/host1x/{Makefile.nvidia => Makefile} (71%)

diff --git a/drivers/gpu/host1x/Makefile.nvidia b/drivers/gpu/host1x/Makefile
similarity index 71%
rename from drivers/gpu/host1x/Makefile.nvidia
rename to drivers/gpu/host1x/Makefile
index 00744ed9..0d61d12a 100644
--- a/drivers/gpu/host1x/Makefile.nvidia
+++ b/drivers/gpu/host1x/Makefile
@@ -5,4 +5,4 @@ host1x-t186-y = \
 	dev_t186.o \
 	hw/host1x05.o
 
-obj-$(CONFIG_TEGRA_HOST1X) += host1x-t186.o
+obj-$(CONFIG_ARCH_TEGRA_18x_SOC) += host1x-t186.o

From 40abcf46b7283beecb335c6bc23cd609392de1b3 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Mon, 6 Nov 2017 14:53:42 -0700
Subject: [PATCH 11/73] nvidia: use $(srctree.$(overlay))

Update all Makefiles to make use of the new srctree.$(overlay) variables
to remove hard-coding the path to any overlays. One direct reference
remains in a hard-coded include statement in drivers/misc/tegra-cec/.

Bug 1978395

Change-Id: I5cdce04e019567a9bbfffaf0e92a61bd16806e99
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1593800
Reviewed-by: Alexander Van Brunt <avanbrunt@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bitan Biswas <bbiswas@nvidia.com>
Reviewed-by: Timo Alho <talho@nvidia.com>
Tested-by: Timo Alho <talho@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/host1x/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 0d61d12a..87be3871 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -1,5 +1,5 @@
 ccflags-y += -Idrivers/gpu/host1x
-ccflags-y += -I../nvidia/drivers/gpu/host1x
+ccflags-y += -I$(srctree.nvidia)/drivers/gpu/host1x
 
 host1x-t186-y = \
 	dev_t186.o \

From 5e5de808d95081d6eb7e936261ace391c3d5ca86 Mon Sep 17 00:00:00 2001
From: Bitan Biswas <bbiswas@nvidia.com>
Date: Wed, 19 Aug 2020 06:14:52 -0700
Subject: [PATCH 12/73] iommu: k5.9: remove archdata.iommu accesses

k5.9 build needs below changes:
read archdata.iommu replaced by iommu_get_domain_for_dev
write archdata.iommu removed as alternative implementation used.

k5.9 build error fixed by explicit include of header <linux/iommu.h>

Change-Id: I464fd4144e4317b10aaa5460482f35e46527339a
Signed-off-by: Bitan Biswas <bbiswas@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2400862
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/host1x/hw/channel_hw_t186.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/hw/channel_hw_t186.c b/drivers/gpu/host1x/hw/channel_hw_t186.c
index 256368f2..0cf34540 100644
--- a/drivers/gpu/host1x/hw/channel_hw_t186.c
+++ b/drivers/gpu/host1x/hw/channel_hw_t186.c
@@ -1,7 +1,7 @@
 /*
  * Tegra host1x Channel
  *
- * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (C) 2016-2020 NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -214,7 +214,7 @@ static int channel_submit(struct host1x_job *job)
 		}
 	}
 
-	streamid = iommu_get_hwid(host->dev->archdata.iommu, host->dev, 0);
+	streamid = iommu_get_hwid(iommu_get_domain_for_dev(host->dev), host->dev, 0);
 	if (streamid >= 0)
 		host1x_ch_writel(ch, streamid, HOST1X_CHANNEL_SMMU_STREAMID);
 

From 9953593c5f913156ac9d95a9d11358ad274f6c6e Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Fri, 23 Oct 2020 12:45:27 +0100
Subject: [PATCH 13/73] gpu: Remove unused host1x and tegra-drm drivers

The host1x and tegra-drm drivers located in the linux-nvidia repository
are not referenced by any upper level Makefile and so are never
compiled. Therefore, remove the unused host1x and tegra-drm drivers
from the linux-nvidia repository.

Bug 3156385

Change-Id: I770844f1255541628eb4fc962f1c19866cbae31b
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2435652
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/Makefile                 |   8 -
 drivers/gpu/host1x/dev_t186.c               |  40 ---
 drivers/gpu/host1x/dev_t186.h               |  34 --
 drivers/gpu/host1x/hw/cdma_hw_t186.c        | 340 --------------------
 drivers/gpu/host1x/hw/channel_hw_t186.c     | 302 -----------------
 drivers/gpu/host1x/hw/debug_hw_t186.c       | 288 -----------------
 drivers/gpu/host1x/hw/host1x05.c            |  42 ---
 drivers/gpu/host1x/hw/host1x05.h            |  24 --
 drivers/gpu/host1x/hw/host1x05_hardware.h   | 128 --------
 drivers/gpu/host1x/hw/hw_host1x05_channel.h | 169 ----------
 drivers/gpu/host1x/hw/hw_host1x05_sync.h    | 321 ------------------
 drivers/gpu/host1x/hw/hw_host1x05_uclass.h  | 157 ---------
 drivers/gpu/host1x/hw/intr_hw_t186.c        | 247 --------------
 drivers/gpu/host1x/hw/stremid_hw_t186.c     |  95 ------
 drivers/gpu/host1x/hw/syncpt_hw_t186.c      | 126 --------
 15 files changed, 2321 deletions(-)
 delete mode 100644 drivers/gpu/host1x/Makefile
 delete mode 100644 drivers/gpu/host1x/dev_t186.c
 delete mode 100644 drivers/gpu/host1x/dev_t186.h
 delete mode 100644 drivers/gpu/host1x/hw/cdma_hw_t186.c
 delete mode 100644 drivers/gpu/host1x/hw/channel_hw_t186.c
 delete mode 100644 drivers/gpu/host1x/hw/debug_hw_t186.c
 delete mode 100644 drivers/gpu/host1x/hw/host1x05.c
 delete mode 100644 drivers/gpu/host1x/hw/host1x05.h
 delete mode 100644 drivers/gpu/host1x/hw/host1x05_hardware.h
 delete mode 100644 drivers/gpu/host1x/hw/hw_host1x05_channel.h
 delete mode 100644 drivers/gpu/host1x/hw/hw_host1x05_sync.h
 delete mode 100644 drivers/gpu/host1x/hw/hw_host1x05_uclass.h
 delete mode 100644 drivers/gpu/host1x/hw/intr_hw_t186.c
 delete mode 100644 drivers/gpu/host1x/hw/stremid_hw_t186.c
 delete mode 100644 drivers/gpu/host1x/hw/syncpt_hw_t186.c

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
deleted file mode 100644
index 87be3871..00000000
--- a/drivers/gpu/host1x/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-ccflags-y += -Idrivers/gpu/host1x
-ccflags-y += -I$(srctree.nvidia)/drivers/gpu/host1x
-
-host1x-t186-y = \
-	dev_t186.o \
-	hw/host1x05.o
-
-obj-$(CONFIG_ARCH_TEGRA_18x_SOC) += host1x-t186.o
diff --git a/drivers/gpu/host1x/dev_t186.c b/drivers/gpu/host1x/dev_t186.c
deleted file mode 100644
index d29a48c6..00000000
--- a/drivers/gpu/host1x/dev_t186.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "dev.h"
-#include "dev_t186.h"
-
-#include "hw/host1x05.h"
-
-struct host1x_info host1x05_info = {
-	.nb_channels = 63,
-	.nb_pts = 576,
-	.nb_mlocks = 24,
-	.nb_bases = 16,
-	.init = host1x05_init,
-	.sync_offset = 0x0,
-	.gather_filter_enabled  = true,
-};
-
-void host1x_writel(struct host1x *host1x, u32 v, u32 r)
-{
-	writel(v, host1x->regs + r);
-}
-
-u32 host1x_readl(struct host1x *host1x, u32 r)
-{
-	return readl(host1x->regs + r);
-}
diff --git a/drivers/gpu/host1x/dev_t186.h b/drivers/gpu/host1x/dev_t186.h
deleted file mode 100644
index be9726e8..00000000
--- a/drivers/gpu/host1x/dev_t186.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef HOST1X_DEV_T186_H
-#define HOST1X_DEV_T186_H
-
-#include "dev.h"
-
-extern struct host1x_info host1x05_info;
-
-void host1x_writel(struct host1x *host1x, u32 v, u32 r);
-u32 host1x_readl(struct host1x *host1x, u32 r);
-
-static inline void host1x_hw_sync_get_mutex_owner(struct host1x *host,
-					struct host1x_syncpt *sp,
-					unsigned int mutex_id, bool *cpu, bool *ch,
-					unsigned int *chid)
-{
-	host->syncpt_op->get_mutex_owner(sp, mutex_id, cpu, ch, chid);
-}
-#endif
diff --git a/drivers/gpu/host1x/hw/cdma_hw_t186.c b/drivers/gpu/host1x/hw/cdma_hw_t186.c
deleted file mode 100644
index e903f3e8..00000000
--- a/drivers/gpu/host1x/hw/cdma_hw_t186.c
+++ /dev/null
@@ -1,340 +0,0 @@
-/*
- * Tegra host1x Command DMA
- *
- * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/slab.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-
-#include "cdma.h"
-#include "channel.h"
-#include "dev.h"
-#include "debug.h"
-
-/*
- * Put the restart at the end of pushbuffer memory
- */
-static void push_buffer_init(struct push_buffer *pb)
-{
-	*(u32 *)(pb->mapped + pb->size_bytes) = host1x_opcode_restart(0);
-}
-
-/*
- * Increment timedout buffer's syncpt via CPU.
- */
-static void cdma_timeout_handle(struct host1x_cdma *cdma, u32 getptr,
-				u32 nr_slots)
-{
-	struct host1x *host1x = cdma_to_host1x(cdma);
-	struct push_buffer *pb = &cdma->push_buffer;
-
-	/* NOP all the PB slots */
-	while (nr_slots--) {
-		u32 *p = (u32 *)(pb->mapped + getptr);
-		*(p++) = HOST1X_OPCODE_NOP;
-		*(p++) = HOST1X_OPCODE_NOP;
-		dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__,
-			&pb->phys, getptr);
-		getptr = (getptr + 8) & (pb->size_bytes - 1);
-	}
-	wmb();
-}
-
-/*
- * Start channel DMA
- */
-static void cdma_start(struct host1x_cdma *cdma)
-{
-	struct host1x_channel *ch = cdma_to_channel(cdma);
-
-	if (cdma->running)
-		return;
-
-	cdma->last_pos = cdma->push_buffer.pos;
-
-	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
-			 HOST1X_CHANNEL_DMACTRL);
-
-	/* set base, put and end pointer */
-	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
-	host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
-	host1x_ch_writel(ch, cdma->push_buffer.phys +
-			 cdma->push_buffer.size_bytes + 4,
-			 HOST1X_CHANNEL_DMAEND);
-
-	/* reset GET */
-	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP |
-			 HOST1X_CHANNEL_DMACTRL_DMAGETRST |
-			 HOST1X_CHANNEL_DMACTRL_DMAINITGET,
-			 HOST1X_CHANNEL_DMACTRL);
-
-	host1x_channel_enable_gather_filter(ch);
-
-	/* start the command DMA */
-	host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL);
-
-	cdma->running = true;
-}
-
-/*
- * Similar to cdma_start(), but rather than starting from an idle
- * state (where DMA GET is set to DMA PUT), on a timeout we restore
- * DMA GET from an explicit value (so DMA may again be pending).
- */
-static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
-{
-	struct host1x *host1x = cdma_to_host1x(cdma);
-	struct host1x_channel *ch = cdma_to_channel(cdma);
-
-	if (cdma->running)
-		return;
-
-	cdma->last_pos = cdma->push_buffer.pos;
-
-	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
-			 HOST1X_CHANNEL_DMACTRL);
-
-	/* set base, end pointer (all of memory) */
-	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
-	host1x_ch_writel(ch, cdma->push_buffer.phys +
-			 cdma->push_buffer.size_bytes,
-			 HOST1X_CHANNEL_DMAEND);
-
-	/* set GET, by loading the value in PUT (then reset GET) */
-	host1x_ch_writel(ch, getptr, HOST1X_CHANNEL_DMAPUT);
-	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP |
-			 HOST1X_CHANNEL_DMACTRL_DMAGETRST |
-			 HOST1X_CHANNEL_DMACTRL_DMAINITGET,
-			 HOST1X_CHANNEL_DMACTRL);
-
-	dev_dbg(host1x->dev,
-		"%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n", __func__,
-		host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET),
-		host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT),
-		cdma->last_pos);
-
-	/* deassert GET reset and set PUT */
-	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
-			 HOST1X_CHANNEL_DMACTRL);
-	host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
-
-	host1x_channel_enable_gather_filter(ch);
-
-	/* start the command DMA */
-	host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL);
-
-	cdma->running = true;
-}
-
-/*
- * Kick channel DMA into action by writing its PUT offset (if it has changed)
- */
-static void cdma_flush(struct host1x_cdma *cdma)
-{
-	struct host1x_channel *ch = cdma_to_channel(cdma);
-
-	if (cdma->push_buffer.pos != cdma->last_pos) {
-		host1x_ch_writel(ch, cdma->push_buffer.pos,
-				 HOST1X_CHANNEL_DMAPUT);
-		cdma->last_pos = cdma->push_buffer.pos;
-	}
-}
-
-static void cdma_stop(struct host1x_cdma *cdma)
-{
-	struct host1x_channel *ch = cdma_to_channel(cdma);
-
-	mutex_lock(&cdma->lock);
-	if (cdma->running) {
-		host1x_cdma_wait_locked(cdma, CDMA_EVENT_SYNC_QUEUE_EMPTY);
-		host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
-				 HOST1X_CHANNEL_DMACTRL);
-		cdma->running = false;
-	}
-	mutex_unlock(&cdma->lock);
-}
-
-/*
- * Stops both channel's command processor and CDMA immediately.
- * Also, tears down the channel and resets corresponding module.
- */
-static void cdma_freeze(struct host1x_cdma *cdma)
-{
-	struct host1x *host = cdma_to_host1x(cdma);
-	struct host1x_channel *ch = cdma_to_channel(cdma);
-	u32 cmdproc_stop;
-
-	if (cdma->torndown && !cdma->running) {
-		dev_warn(host->dev, "Already torn down\n");
-		return;
-	}
-
-	dev_dbg(host->dev, "freezing channel (id %d)\n", ch->id);
-
-	cmdproc_stop = host1x_ch_readl(ch, HOST1X_SYNC_CMDPROC_STOP);
-	cmdproc_stop |= BIT(0);
-	host1x_ch_writel(ch, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
-
-	dev_dbg(host->dev, "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n",
-		__func__, host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET),
-		host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT),
-		cdma->last_pos);
-
-	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
-			 HOST1X_CHANNEL_DMACTRL);
-
-	host1x_ch_writel(ch, BIT(0), HOST1X_SYNC_CH_TEARDOWN);
-
-	cdma->running = false;
-	cdma->torndown = true;
-}
-
-static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
-{
-	struct host1x *host1x = cdma_to_host1x(cdma);
-	struct host1x_channel *ch = cdma_to_channel(cdma);
-	u32 cmdproc_stop;
-
-	dev_dbg(host1x->dev,
-		"resuming channel (id %d, DMAGET restart = 0x%x)\n",
-		ch->id, getptr);
-
-	cmdproc_stop = host1x_ch_readl(ch, HOST1X_SYNC_CMDPROC_STOP);
-	cmdproc_stop &= ~(BIT(0));
-	host1x_ch_writel(ch, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
-
-	cdma->torndown = false;
-	cdma_timeout_restart(cdma, getptr);
-}
-
-/*
- * If this timeout fires, it indicates the current sync_queue entry has
- * exceeded its TTL and the userctx should be timed out and remaining
- * submits already issued cleaned up (future submits return an error).
- */
-static void cdma_timeout_handler(struct work_struct *work)
-{
-	struct host1x_cdma *cdma;
-	struct host1x *host1x;
-	struct host1x_channel *ch;
-	bool has_timedout = 0;
-
-	u32 prev_cmdproc, cmdproc_stop;
-
-	unsigned int i;
-
-	cdma = container_of(to_delayed_work(work), struct host1x_cdma,
-			    timeout.wq);
-	host1x = cdma_to_host1x(cdma);
-	ch = cdma_to_channel(cdma);
-
-	host1x_debug_dump(cdma_to_host1x(cdma));
-
-	mutex_lock(&cdma->lock);
-
-	if (!cdma->timeout.client) {
-		dev_dbg(host1x->dev,
-			"cdma_timeout: expired, but has no clientid\n");
-		mutex_unlock(&cdma->lock);
-		return;
-	}
-
-	/* stop processing to get a clean snapshot */
-	prev_cmdproc = host1x_ch_readl(ch, HOST1X_SYNC_CMDPROC_STOP);
-	cmdproc_stop = prev_cmdproc | BIT(0);
-	host1x_ch_writel(ch, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
-
-	dev_dbg(host1x->dev, "cdma_timeout: cmdproc was 0x%x is 0x%x\n",
-		prev_cmdproc, cmdproc_stop);
-
-	for (i = 0; i < cdma->timeout.num_syncpts; ++i) {
-		u32 id = cdma->timeout.syncpts[i].id;
-		u32 end = cdma->timeout.syncpts[i].end;
-		struct host1x_syncpt *syncpt = host1x_syncpt_get(host1x, id);
-
-		host1x_syncpt_load(syncpt);
-
-		has_timedout = !host1x_syncpt_is_expired(syncpt, end);
-		if (has_timedout)
-			break;
-	}
-
-	/* has buffer actually completed? */
-	if (!has_timedout) {
-		dev_dbg(host1x->dev,
-			"cdma_timeout: expired, but buffer had completed\n");
-		/* restore */
-		cmdproc_stop = prev_cmdproc & ~(BIT(0));
-		host1x_ch_writel(ch, cmdproc_stop,
-				   HOST1X_SYNC_CMDPROC_STOP);
-		mutex_unlock(&cdma->lock);
-		return;
-	}
-
-	for (i = 0; i < cdma->timeout.num_syncpts; ++i) {
-		u32 id = cdma->timeout.syncpts[i].id;
-		struct host1x_syncpt *syncpt = host1x_syncpt_get(host1x, id);
-		u32 syncpt_val = host1x_syncpt_read_min(syncpt);
-
-		dev_warn(host1x->dev, "%s: timeout: %d (%s), HW thresh %d, done %d\n",
-			__func__, syncpt->id, syncpt->name,
-			syncpt_val, syncpt_val);
-	}
-
-	/* stop HW, resetting channel/module */
-	host1x_hw_cdma_freeze(host1x, cdma);
-
-	host1x_cdma_update_sync_queue(cdma, ch->dev);
-	mutex_unlock(&cdma->lock);
-}
-
-/*
- * Init timeout resources
- */
-static int cdma_timeout_init(struct host1x_cdma *cdma)
-{
-	INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler);
-	cdma->timeout.initialized = true;
-
-	return 0;
-}
-
-/*
- * Clean up timeout resources
- */
-static void cdma_timeout_destroy(struct host1x_cdma *cdma)
-{
-	if (cdma->timeout.initialized)
-		cancel_delayed_work(&cdma->timeout.wq);
-	cdma->timeout.initialized = false;
-}
-
-static const struct host1x_cdma_ops host1x_cdma_t186_ops = {
-	.start = cdma_start,
-	.stop = cdma_stop,
-	.flush = cdma_flush,
-
-	.timeout_init = cdma_timeout_init,
-	.timeout_destroy = cdma_timeout_destroy,
-	.freeze = cdma_freeze,
-	.resume = cdma_resume,
-	.timeout_handle = cdma_timeout_handle,
-};
-
-static const struct host1x_pushbuffer_ops host1x_pushbuffer_t186_ops = {
-	.init = push_buffer_init,
-};
diff --git a/drivers/gpu/host1x/hw/channel_hw_t186.c b/drivers/gpu/host1x/hw/channel_hw_t186.c
deleted file mode 100644
index 0cf34540..00000000
--- a/drivers/gpu/host1x/hw/channel_hw_t186.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Tegra host1x Channel
- *
- * Copyright (C) 2016-2020 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/host1x.h>
-#include <linux/slab.h>
-#include <linux/iommu.h>
-#include <linux/pm_runtime.h>
-
-#include <trace/events/host1x.h>
-
-#include "dev_t186.h"
-#include "channel.h"
-#include "intr.h"
-#include "job.h"
-
-#define TRACE_MAX_LENGTH 128U
-
-static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
-			       u32 offset, u32 words)
-{
-	struct device *dev = cdma_to_channel(cdma)->dev;
-	void *mem = NULL;
-
-	if (host1x_debug_trace_cmdbuf)
-		mem = host1x_bo_mmap(bo);
-
-	if (mem) {
-		u32 i;
-		/*
-		 * Write in batches of 128 as there seems to be a limit
-		 * of how much you can output to ftrace at once.
-		 */
-		for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
-			u32 num_words = min(words - i, TRACE_MAX_LENGTH);
-			u32 next_offset = offset + i * sizeof(u32);
-
-			trace_host1x_cdma_push_gather(dev_name(dev), bo,
-						      num_words,
-						      next_offset,
-						      mem);
-		}
-
-		host1x_bo_munmap(bo, mem);
-	}
-}
-
-static void channel_push_wait(struct host1x_channel *channel,
-			     u32 id, u32 thresh)
-{
-	host1x_cdma_push(&channel->cdma,
-			 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
-				HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, 1),
-			 thresh);
-	host1x_cdma_push(&channel->cdma,
-			 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
-				HOST1X_UCLASS_WAIT_SYNCPT_32, 1),
-			 id);
-}
-
-static inline void serialize(struct host1x_job *job)
-{
-	struct host1x_channel *ch = job->channel;
-	struct host1x *host = dev_get_drvdata(ch->dev->parent);
-	unsigned int i;
-
-	for (i = 0; i < job->num_syncpts; ++i) {
-		u32 syncpt_id = job->syncpts[i].id;
-		struct host1x_syncpt *syncpt =
-			host1x_syncpt_get(host, syncpt_id);
-
-		/*
-		 * Force serialization by inserting a host wait for the
-		 * previous job to finish before this one can commence.
-		 */
-		channel_push_wait(ch, syncpt_id, host1x_syncpt_read_max(syncpt));
-	}
-}
-
-static void add_sync_waits(struct host1x_job *job)
-{
-	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
-	unsigned int i;
-
-	for (i = 0; i < job->num_gathers; i++) {
-		struct host1x_job_gather *g = &job->gathers[i];
-
-		if (g->pre_fence)
-			host1x_sync_fence_wait(g->pre_fence, host,
-					       job->channel);
-	}
-}
-
-static void submit_gathers(struct host1x_job *job)
-{
-	struct host1x_cdma *cdma = &job->channel->cdma;
-	unsigned int i;
-	u32 cur_class = 0;
-
-	cur_class = HOST1X_CLASS_HOST1X;
-	host1x_cdma_push(cdma,
-		host1x_opcode_acquire_mlock(cur_class),
-		host1x_opcode_setclass(cur_class, 0, 0));
-
-	add_sync_waits(job);
-
-	for (i = 0; i < job->num_gathers; i++) {
-		struct host1x_job_gather *g = &job->gathers[i];
-		u32 op1 = host1x_opcode_gather(g->words);
-		u32 op2 = g->base + g->offset;
-
-		/* add a setclass for modules that require it */
-		if (cur_class != g->class_id) {
-			if (cur_class)
-				host1x_cdma_push(cdma,
-					HOST1X_OPCODE_NOP,
-					host1x_opcode_release_mlock(cur_class));
-
-			host1x_cdma_push(cdma,
-				host1x_opcode_acquire_mlock(g->class_id),
-				host1x_opcode_setclass(g->class_id, 0, 0));
-			cur_class = g->class_id;
-		}
-
-		trace_write_gather(cdma, g->bo, g->offset, op1 & 0xffff);
-		host1x_cdma_push(cdma, op1, op2);
-	}
-
-	if (job->serialize)
-		serialize(job);
-
-	if (cur_class)
-		host1x_cdma_push(cdma,
-			HOST1X_OPCODE_NOP,
-			host1x_opcode_release_mlock(cur_class));
-}
-
-static inline void synchronize_syncpt_base(struct host1x_job *job)
-{
-	struct host1x_channel *ch = job->channel;
-	struct host1x *host = dev_get_drvdata(ch->dev->parent);
-	unsigned int i;
-
-	for (i = 0; i < job->num_syncpts; ++i) {
-		u32 syncpt_id = job->syncpts[i].id;
-		struct host1x_syncpt *syncpt =
-			host1x_syncpt_get(host, syncpt_id);
-		u32 base_id, value;
-
-		if (!syncpt->base)
-			continue;
-
-		value = host1x_syncpt_read_max(syncpt);
-		base_id = syncpt->base->id;
-
-		host1x_cdma_push(&job->channel->cdma,
-			 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
-				HOST1X_UCLASS_LOAD_SYNCPT_BASE, 1),
-			 HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(base_id) |
-			 HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(value));
-	}
-}
-
-static int channel_submit(struct host1x_job *job)
-{
-	struct host1x_channel *ch = job->channel;
-	struct host1x_syncpt *syncpt;
-	u32 prev_max = 0;
-	int err;
-	struct host1x_waitlist *completed_waiter[job->num_syncpts];
-	struct host1x *host = dev_get_drvdata(ch->dev->parent);
-	unsigned int i;
-	int streamid = 0;
-
-	trace_host1x_channel_submit(dev_name(ch->dev),
-				    job->num_gathers, job->num_relocs,
-				    job->num_waitchk, job->syncpts[0].id,
-				    job->syncpts[0].incrs);
-
-	/* before error checks, return current max */
-	syncpt = host1x_syncpt_get(host, job->syncpts[0].id);
-	prev_max = host1x_syncpt_read_max(syncpt);
-
-	/* keep device powered on */
-	for (i = 0; i < job->num_syncpts; ++i)
-		pm_runtime_get_sync(ch->dev);
-
-	/* get submit lock */
-	err = mutex_lock_interruptible(&ch->submitlock);
-	if (err)
-		goto error;
-
-	for (i = 0; i < job->num_syncpts; i++) {
-		completed_waiter[i] = kzalloc(sizeof(*completed_waiter[i]),
-					      GFP_KERNEL);
-		if (!completed_waiter[i]) {
-			mutex_unlock(&ch->submitlock);
-			err = -ENOMEM;
-			goto error;
-		}
-	}
-
-	streamid = iommu_get_hwid(iommu_get_domain_for_dev(host->dev), host->dev, 0);
-	if (streamid >= 0)
-		host1x_ch_writel(ch, streamid, HOST1X_CHANNEL_SMMU_STREAMID);
-
-	/* begin a CDMA submit */
-	err = host1x_cdma_begin(&ch->cdma, job);
-	if (err) {
-		mutex_unlock(&ch->submitlock);
-		goto error;
-	}
-
-	/* Synchronize base register to allow using it for relative waiting */
-	synchronize_syncpt_base(job);
-
-	/* Increment syncpoint maximum values */
-	for (i = 0; i < job->num_syncpts; ++i) {
-		u32 id;
-		u32 incrs;
-
-		id = job->syncpts[i].id;
-		incrs = job->syncpts[i].incrs;
-		syncpt = host1x_syncpt_get(host, id);
-		job->syncpts[i].end = host1x_syncpt_incr_max(syncpt, incrs);
-	}
-
-	submit_gathers(job);
-
-	/* end CDMA submit & stash pinned hMems into sync queue */
-	host1x_cdma_end(&ch->cdma, job);
-
-	trace_host1x_channel_submitted(dev_name(ch->dev), prev_max,
-				       job->syncpts[0].end);
-
-	/* schedule submit complete interrupts */
-	for (i = 0; i < job->num_syncpts; ++i) {
-		u32 syncpt_id = job->syncpts[i].id;
-		u32 syncpt_end = job->syncpts[i].end;
-
-		err = host1x_intr_add_action(host, syncpt_id, syncpt_end,
-					     HOST1X_INTR_ACTION_SUBMIT_COMPLETE,
-					     ch, completed_waiter[i], NULL);
-		completed_waiter[i] = NULL;
-		WARN(err, "Failed to set submit complete interrupt");
-	}
-
-	mutex_unlock(&ch->submitlock);
-
-	return 0;
-
-error:
-	for (i = 0; i < job->num_syncpts; i++)
-		kfree(completed_waiter[i]);
-	return err;
-}
-
-static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
-			       unsigned int index)
-{
-	ch->id = index;
-	mutex_init(&ch->reflock);
-	mutex_init(&ch->submitlock);
-
-	ch->regs = dev->regs + (HOST1X_CHANNEL_CH_APERTURE_START +
-				index * HOST1X_CHANNEL_CH_APERTURE_SIZE);
-	return 0;
-}
-
-static void channel_enable_gather_filter(struct host1x_channel *ch)
-{
-	struct host1x *host = dev_get_drvdata(ch->dev->parent);
-	u32 val;
-
-	val = host1x_readl(host, HOST1X_CHANNEL_FILTER_GBUFFER
-				 + BIT_WORD(ch->id) * sizeof(u32));
-
-	host1x_writel(host, val | BIT_MASK(ch->id),
-				HOST1X_CHANNEL_FILTER_GBUFFER
-				+ BIT_WORD(ch->id) * sizeof(u32));
-}
-
-static const struct host1x_channel_ops host1x_channel_t186_ops = {
-	.init = host1x_channel_init,
-	.submit = channel_submit,
-	.push_wait = channel_push_wait,
-	.enable_gather_filter = channel_enable_gather_filter,
-};
diff --git a/drivers/gpu/host1x/hw/debug_hw_t186.c b/drivers/gpu/host1x/hw/debug_hw_t186.c
deleted file mode 100644
index d6111668..00000000
--- a/drivers/gpu/host1x/hw/debug_hw_t186.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (C) 2010 Google, Inc.
- * Author: Erik Gilling <konkers@android.com>
- *
- * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include "dev.h"
-#include "debug.h"
-#include "cdma.h"
-#include "channel.h"
-
-#define HOST1X_DEBUG_MAX_PAGE_OFFSET 102400
-
-enum {
-	HOST1X_OPCODE_SETCLASS	= 0x00,
-	HOST1X_OPCODE_INCR	= 0x01,
-	HOST1X_OPCODE_NONINCR	= 0x02,
-	HOST1X_OPCODE_MASK	= 0x03,
-	HOST1X_OPCODE_IMM	= 0x04,
-	HOST1X_OPCODE_RESTART	= 0x05,
-	HOST1X_OPCODE_GATHER	= 0x06,
-	HOST1X_OPCODE_EXTEND	= 0x0e,
-};
-
-enum {
-	HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK	= 0x00,
-	HOST1X_OPCODE_EXTEND_RELEASE_MLOCK	= 0x01,
-};
-
-static unsigned int show_channel_command(struct output *o, u32 val)
-{
-	unsigned mask;
-	unsigned subop;
-
-	switch (val >> 28) {
-	case HOST1X_OPCODE_SETCLASS:
-		mask = val & 0x3f;
-		if (mask) {
-			host1x_debug_output(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [",
-					    val >> 6 & 0x3ff,
-					    val >> 16 & 0xfff, mask);
-			return hweight8(mask);
-		} else {
-			host1x_debug_output(o, "SETCL(class=%03x)\n",
-					    val >> 6 & 0x3ff);
-			return 0;
-		}
-
-	case HOST1X_OPCODE_INCR:
-		host1x_debug_output(o, "INCR(offset=%03x, [",
-				    val >> 16 & 0xfff);
-		return val & 0xffff;
-
-	case HOST1X_OPCODE_NONINCR:
-		host1x_debug_output(o, "NONINCR(offset=%03x, [",
-				    val >> 16 & 0xfff);
-		return val & 0xffff;
-
-	case HOST1X_OPCODE_MASK:
-		mask = val & 0xffff;
-		host1x_debug_output(o, "MASK(offset=%03x, mask=%03x, [",
-				    val >> 16 & 0xfff, mask);
-		return hweight16(mask);
-
-	case HOST1X_OPCODE_IMM:
-		host1x_debug_output(o, "IMM(offset=%03x, data=%03x)\n",
-				    val >> 16 & 0xfff, val & 0xffff);
-		return 0;
-
-	case HOST1X_OPCODE_RESTART:
-		host1x_debug_output(o, "RESTART(offset=%08x)\n", val << 4);
-		return 0;
-
-	case HOST1X_OPCODE_GATHER:
-		host1x_debug_output(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[",
-				    val >> 16 & 0xfff, val >> 15 & 0x1,
-				    val >> 14 & 0x1, val & 0x3fff);
-		return 1;
-
-	case HOST1X_OPCODE_EXTEND:
-		subop = val >> 24 & 0xf;
-		if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
-			host1x_debug_output(o, "ACQUIRE_MLOCK(index=%d)\n",
-					    val & 0xff);
-		else if (subop == HOST1X_OPCODE_EXTEND_RELEASE_MLOCK)
-			host1x_debug_output(o, "RELEASE_MLOCK(index=%d)\n",
-					    val & 0xff);
-		else
-			host1x_debug_output(o, "EXTEND_UNKNOWN(%08x)\n", val);
-		return 0;
-
-	default:
-		return 0;
-	}
-}
-
-static void show_gather(struct output *o, phys_addr_t phys_addr,
-			unsigned int words, struct host1x_cdma *cdma,
-			phys_addr_t pin_addr, u32 *map_addr)
-{
-	/* Map dmaget cursor to corresponding mem handle */
-	u32 offset = phys_addr - pin_addr;
-	unsigned int data_count = 0, i;
-
-	/*
-	 * Sometimes we're given different hardware address to the same
-	 * page - in these cases the offset will get an invalid number and
-	 * we just have to bail out.
-	 */
-	if (offset > HOST1X_DEBUG_MAX_PAGE_OFFSET) {
-		host1x_debug_output(o, "[address mismatch]\n");
-		return;
-	}
-
-	for (i = 0; i < words; i++) {
-		u32 addr = phys_addr + i * 4;
-		u32 val = *(map_addr + offset / 4 + i);
-
-		if (!data_count) {
-			host1x_debug_output(o, "%08x: %08x:", addr, val);
-			data_count = show_channel_command(o, val);
-		} else {
-			host1x_debug_output(o, "%08x%s", val,
-					    data_count > 0 ? ", " : "])\n");
-			data_count--;
-		}
-	}
-}
-
-static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
-{
-	struct host1x_job *job;
-
-	list_for_each_entry(job, &cdma->sync_queue, list) {
-		int i;
-		host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n",
-				    job,
-				    (job->syncpts ? job->syncpts[0].id : 0),
-				    (job->syncpts ? job->syncpts[0].end : 0),
-				    job->first_get, job->timeout,
-				    job->num_slots, job->num_unpins);
-
-		for (i = 0; i < job->num_gathers; i++) {
-			struct host1x_job_gather *g = &job->gathers[i];
-			u32 *mapped;
-
-			if (job->gather_copy_mapped)
-				mapped = (u32 *)job->gather_copy_mapped;
-			else
-				mapped = host1x_bo_mmap(g->bo);
-
-			if (!mapped) {
-				host1x_debug_output(o, "[could not mmap]\n");
-				continue;
-			}
-
-			host1x_debug_output(o, "    GATHER at %pad+%#x, %d words\n",
-					    &g->base, g->offset, g->words);
-
-			show_gather(o, g->base + g->offset, g->words, cdma,
-				    g->base, mapped);
-
-			if (!job->gather_copy_mapped)
-				host1x_bo_munmap(g->bo, mapped);
-		}
-	}
-}
-
-static void host1x_debug_show_channel_cdma(struct host1x *host,
-					   struct host1x_channel *ch,
-					   struct output *o)
-{
-	struct host1x_cdma *cdma = &ch->cdma;
-	u32 val;
-
-	host1x_debug_output(o, "Host1x basic channel registers: \n");
-
-	val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT);
-	host1x_debug_output(o, "CMDFIFO_STAT_0: %08x\n", val);
-	val = host1x_ch_readl(ch, HOST1X_CHANNEL_RDATA);
-	host1x_debug_output(o, "CMDFIFO_RDATA_0: %08x\n", val);
-	val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_OFFSET);
-	host1x_debug_output(o, "CMDP_OFFSET_0: %08x\n", val);
-	val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_CLASS);
-	host1x_debug_output(o, "CMDP_CLASS_0: %08x\n", val);
-	val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_CHANNELSTAT);
-	host1x_debug_output(o, "CHANNELSTAT_0: %08x\n", val);
-
-	show_channel_gathers(o, cdma);
-	host1x_debug_output(o, "\n");
-}
-
-static void host1x_debug_show_channel_fifo(struct host1x *host,
-					   struct host1x_channel *ch,
-					   struct output *o)
-{
-	u32 val, rd_ptr, wr_ptr, start, end, temp;
-	unsigned int data_count = 0;
-
-	host1x_debug_output(o, "%d: fifo:\n", ch->id);
-
-	temp = host1x_sync_readl(host, HOST1X_THOST_COMMON_ICG_EN_OVERRIDE_0);
-	host1x_sync_writel(host, 0x1, HOST1X_THOST_COMMON_ICG_EN_OVERRIDE_0);
-
-	val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT);
-	host1x_debug_output(o, "FIFOSTAT %08x\n", val);
-	if (HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(val)) {
-		host1x_debug_output(o, "[empty]\n");
-		return;
-	}
-
-	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
-	host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
-			   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id),
-			   HOST1X_SYNC_CFPEEK_CTRL);
-
-	val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_PTRS);
-	rd_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(val);
-	wr_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(val);
-
-	val = host1x_sync_readl(host, HOST1X_SYNC_CF_SETUP(ch->id));
-	start = HOST1X_SYNC_CF_SETUP_BASE_V(val);
-	end = HOST1X_SYNC_CF_SETUP_LIMIT_V(val);
-
-	do {
-		host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
-		host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
-				   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id) |
-				   HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(rd_ptr),
-				   HOST1X_SYNC_CFPEEK_CTRL);
-		val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ);
-
-		if (!data_count) {
-			host1x_debug_output(o, "%08x:", val);
-			data_count = show_channel_command(o, val);
-		} else {
-			host1x_debug_output(o, "%08x%s", val,
-					    data_count > 0 ? ", " : "])\n");
-			data_count--;
-		}
-
-		if (rd_ptr == end)
-			rd_ptr = start;
-		else
-			rd_ptr++;
-	} while (rd_ptr != wr_ptr);
-
-	if (data_count)
-		host1x_debug_output(o, ", ...])\n");
-	host1x_debug_output(o, "\n");
-
-	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
-
-	host1x_sync_writel(host, temp, HOST1X_THOST_COMMON_ICG_EN_OVERRIDE_0);
-}
-
-static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
-{
-	int i;
-	unsigned int chid;
-	bool cpu, ch;
-
-	host1x_debug_output(o, "---- mlocks ----\n");
-	for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) {
-		host1x_hw_sync_get_mutex_owner(host, host->syncpt,
-				i, &cpu, &ch, &chid);
-		if (ch)
-			host1x_debug_output(o, "%d: locked by channel %d\n", i, chid);
-	}
-	host1x_debug_output(o, "\n");
-}
-
-static const struct host1x_debug_ops host1x_debug_t186_ops = {
-	.show_channel_cdma = host1x_debug_show_channel_cdma,
-	.show_channel_fifo = host1x_debug_show_channel_fifo,
-	.show_mlocks = host1x_debug_show_mlocks,
-};
diff --git a/drivers/gpu/host1x/hw/host1x05.c b/drivers/gpu/host1x/hw/host1x05.c
deleted file mode 100644
index 3a790bee..00000000
--- a/drivers/gpu/host1x/hw/host1x05.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-/* include hw specification */
-#include "host1x05.h"
-#include "host1x05_hardware.h"
-
-/* include code */
-#include "cdma_hw_t186.c"
-#include "channel_hw_t186.c"
-#include "debug_hw_t186.c"
-#include "intr_hw_t186.c"
-#include "syncpt_hw_t186.c"
-#include "stremid_hw_t186.c"
-
-#include "dev.h"
-
-int host1x05_init(struct host1x *host)
-{
-	host->dev_op = &host1x_dev_t186_ops;
-	host->channel_op = &host1x_channel_t186_ops;
-	host->cdma_op = &host1x_cdma_t186_ops;
-	host->cdma_pb_op = &host1x_pushbuffer_t186_ops;
-	host->syncpt_op = &host1x_syncpt_t186_ops;
-	host->intr_op = &host1x_intr_t186_ops;
-	host->debug_op = &host1x_debug_t186_ops;
-
-	return 0;
-}
diff --git a/drivers/gpu/host1x/hw/host1x05.h b/drivers/gpu/host1x/hw/host1x05.h
deleted file mode 100644
index e1b84214..00000000
--- a/drivers/gpu/host1x/hw/host1x05.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef HOST1X_HOST1X05_H
-#define HOST1X_HOST1X05_H
-
-struct host1x;
-
-int host1x05_init(struct host1x *host);
-
-#endif
diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h
deleted file mode 100644
index add00014..00000000
--- a/drivers/gpu/host1x/hw/host1x05_hardware.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __HOST1X_HOST1X05_HARDWARE_H
-#define __HOST1X_HOST1X05_HARDWARE_H
-
-#include <linux/types.h>
-#include <linux/bitops.h>
-
-#include "hw_host1x05_channel.h"
-#include "hw_host1x05_sync.h"
-#include "hw_host1x05_uclass.h"
-
-static inline u32 host1x_class_host_wait_syncpt(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_wait_syncpt_indx_f(indx)
-		| host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
-		| host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
-	unsigned indx, unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_wait_syncpt_base_indx_f(indx)
-		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
-	unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
-	unsigned cond, unsigned indx)
-{
-	return host1x_uclass_incr_syncpt_cond_f(cond)
-		| host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
-	unsigned class_id, unsigned offset, unsigned mask)
-{
-	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
-	return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
-	return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
-	return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
-	return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
-	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
-		host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
-	return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
-	return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-static inline u32 host1x_opcode_acquire_mlock(unsigned id)
-{
-	return (14 << 28) | id;
-}
-
-static inline u32 host1x_opcode_release_mlock(unsigned id)
-{
-	return (14 << 28) | (1 << 24) | id;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
-
-#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
deleted file mode 100644
index 8c3499f9..00000000
--- a/drivers/gpu/host1x/hw/hw_host1x05_channel.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
- /*
-  * Function naming determines intended use:
-  *
-  *     <x>_r(void) : Returns the offset for register <x>.
-  *
-  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
-  *
-  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
-  *
-  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
-  *         and masked to place it at field <y> of register <x>.  This value
-  *         can be |'d with others to produce a full register value for
-  *         register <x>.
-  *
-  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
-  *         value can be ~'d and then &'d to clear the value of field <y> for
-  *         register <x>.
-  *
-  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
-  *         to place it at field <y> of register <x>.  This value can be |'d
-  *         with others to produce a full register value for <x>.
-  *
-  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
-  *         <x> value 'r' after being shifted to place its LSB at bit 0.
-  *         This value is suitable for direct comparison with other unshifted
-  *         values appropriate for use in field <y> of register <x>.
-  *
-  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
-  *         field <y> of register <x>.  This value is suitable for direct
-  *         comparison with unshifted values appropriate for use in field <y>
-  *         of register <x>.
-  */
-
-#ifndef HOST1X_HW_HOST1X05_CHANNEL_H
-#define HOST1X_HW_HOST1X05_CHANNEL_H
-
-static inline u32 host1x_channel_ch_aperture_start_r(void)
-{
-	return 0x10000;
-}
-#define HOST1X_CHANNEL_CH_APERTURE_START \
-	host1x_channel_ch_aperture_start_r()
-static inline u32 host1x_channel_ch_aperture_size_r(void)
-{
-	return 0x100;
-}
-#define HOST1X_CHANNEL_CH_APERTURE_SIZE \
-	host1x_channel_ch_aperture_size_r()
-static inline u32 host1x_channel_fifostat_r(void)
-{
-	return 0x24;
-}
-#define HOST1X_CHANNEL_FIFOSTAT \
-	host1x_channel_fifostat_r()
-static inline u32 host1x_channel_fifostat_cfempty_v(u32 r)
-{
-	return (r >> 13) & 0x1;
-}
-#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \
-	host1x_channel_fifostat_cfempty_v(r)
-static inline u32 host1x_channel_rdata_r(void)
-{
-	return 0x28;
-}
-#define HOST1X_CHANNEL_RDATA \
-	host1x_channel_rdata_r()
-static inline u32 host1x_channel_cmdp_offset_r(void)
-{
-	return 0x30;
-}
-#define HOST1X_CHANNEL_CMDP_OFFSET \
-	host1x_channel_cmdp_offset_r()
-static inline u32 host1x_channel_cmdp_class_r(void)
-{
-	return 0x34;
-}
-#define HOST1X_CHANNEL_CMDP_CLASS \
-	host1x_channel_cmdp_class_r()
-static inline u32 host1x_channel_cmdp_channelstat_r(void)
-{
-	return 0x38;
-}
-#define HOST1X_CHANNEL_CMDP_CHANNELSTAT \
-	host1x_channel_cmdp_channelstat_r()
-static inline u32 host1x_channel_dmastart_r(void)
-{
-	return 0x0;
-}
-#define HOST1X_CHANNEL_DMASTART \
-	host1x_channel_dmastart_r()
-static inline u32 host1x_channel_dmaput_r(void)
-{
-	return 0x8;
-}
-#define HOST1X_CHANNEL_DMAPUT \
-	host1x_channel_dmaput_r()
-static inline u32 host1x_channel_dmaget_r(void)
-{
-	return 0x10;
-}
-#define HOST1X_CHANNEL_DMAGET \
-	host1x_channel_dmaget_r()
-static inline u32 host1x_channel_dmaend_r(void)
-{
-	return 0x18;
-}
-#define HOST1X_CHANNEL_DMAEND \
-	host1x_channel_dmaend_r()
-static inline u32 host1x_channel_dmactrl_r(void)
-{
-	return 0x20;
-}
-#define HOST1X_CHANNEL_DMACTRL \
-	host1x_channel_dmactrl_r()
-static inline u32 host1x_channel_dmactrl_dmastop(void)
-{
-	return 1 << 0;
-}
-#define HOST1X_CHANNEL_DMACTRL_DMASTOP \
-	host1x_channel_dmactrl_dmastop()
-static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r)
-{
-	return (r >> 0) & 0x1;
-}
-#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \
-	host1x_channel_dmactrl_dmastop_v(r)
-static inline u32 host1x_channel_dmactrl_dmagetrst(void)
-{
-	return 1 << 1;
-}
-#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \
-	host1x_channel_dmactrl_dmagetrst()
-static inline u32 host1x_channel_dmactrl_dmainitget(void)
-{
-	return 1 << 2;
-}
-#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
-	host1x_channel_dmactrl_dmainitget()
-static inline u32 host1x_channel_smmu_streamid_r(void)
-{
-	return 0x84;
-}
-#define HOST1X_CHANNEL_SMMU_STREAMID \
-	host1x_channel_smmu_streamid_r()
-static inline u32 host1x_channel_filter_gbuffer_r(void)
-{
-	return 0x2020;
-}
-#define HOST1X_CHANNEL_FILTER_GBUFFER \
-	host1x_channel_filter_gbuffer_r()
-
-#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_sync.h b/drivers/gpu/host1x/hw/hw_host1x05_sync.h
deleted file mode 100644
index 55fe9885..00000000
--- a/drivers/gpu/host1x/hw/hw_host1x05_sync.h
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
- /*
-  * Function naming determines intended use:
-  *
-  *     <x>_r(void) : Returns the offset for register <x>.
-  *
-  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
-  *
-  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
-  *
-  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
-  *         and masked to place it at field <y> of register <x>.  This value
-  *         can be |'d with others to produce a full register value for
-  *         register <x>.
-  *
-  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
-  *         value can be ~'d and then &'d to clear the value of field <y> for
-  *         register <x>.
-  *
-  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
-  *         to place it at field <y> of register <x>.  This value can be |'d
-  *         with others to produce a full register value for <x>.
-  *
-  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
-  *         <x> value 'r' after being shifted to place its LSB at bit 0.
-  *         This value is suitable for direct comparison with other unshifted
-  *         values appropriate for use in field <y> of register <x>.
-  *
-  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
-  *         field <y> of register <x>.  This value is suitable for direct
-  *         comparison with unshifted values appropriate for use in field <y>
-  *         of register <x>.
-  */
-
-#ifndef HOST1X_HW_HOST1X05_SYNC_H
-#define HOST1X_HW_HOST1X05_SYNC_H
-
-#define REGISTER_STRIDE	4
-
-static inline u32 host1x_sync_intstatus_r(void)
-{
-	return 0x1c;
-}
-#define HOST1X_SYNC_INTSTATUS \
-	host1x_sync_intstatus_r()
-static inline u32 host1x_sync_intmask_r(void)
-{
-	return 0x30;
-}
-#define HOST1X_SYNC_INTMASK \
-	host1x_sync_intmask_r()
-static inline u32 host1x_sync_intc0mask_r(void)
-{
-	return 0x4;
-}
-#define HOST1X_SYNC_INTC0MASK \
-	host1x_sync_intc0mask_r()
-static inline u32 host1x_sync_intgmask_r(void)
-{
-	return 0x44;
-}
-#define HOST1X_SYNC_INTGMASK \
-	host1x_sync_intgmask_r()
-static inline u32 host1x_sync_syncpt_intgmask_r(void)
-{
-	return 0x50;
-}
-#define HOST1X_SYNC_SYNCPT_INTGMASK \
-	host1x_sync_syncpt_intgmask_r()
-static inline u32 host1x_sync_intstatus_ip_read_int_v(u32 r)
-{
-	return (r >> 0) & 0x1;
-}
-#define HOST1X_SYNC_INTSTATUS_IP_READ_INT_V(r) \
-	host1x_sync_intstatus_ip_read_int_v(r)
-static inline u32 host1x_sync_intstatus_ip_write_int_v(u32 r)
-{
-	return (r >> 1) & 0x1;
-}
-#define HOST1X_SYNC_INTSTATUS_IP_WRITE_INT_V(r) \
-	host1x_sync_intstatus_ip_write_int_v(r)
-static inline u32 host1x_sync_intstatus_illegal_pb_access_v(u32 r)
-{
-	return (r >> 28) & 0x1;
-}
-#define HOST1X_SYNC_INTSTATUS_ILLEGAL_PB_ACCESS_V(r) \
-	host1x_sync_intstatus_illegal_pb_access_v(r)
-static inline u32 host1x_sync_illegal_syncpt_access_frm_pb_r(void)
-{
-	return 0x2270;
-}
-#define HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_PB \
-	host1x_sync_illegal_syncpt_access_frm_pb_r()
-static inline u32 host1x_sync_illegal_syncpt_access_frm_pb_syncpt_v(u32 r)
-{
-	return (r >> 16) & 0x3ff;
-}
-#define HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_PB_SYNCPT_V(r) \
-	host1x_sync_illegal_syncpt_access_frm_pb_syncpt_v(r)
-static inline u32 host1x_sync_illegal_syncpt_access_frm_pb_ch_v(u32 r)
-{
-	return (r >> 10) & 0x3f;
-}
-#define HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_PB_CH_V(r) \
-	host1x_sync_illegal_syncpt_access_frm_pb_ch_v(r)
-static inline u32 host1x_sync_intstatus_illegal_client_access_v(u32 r)
-{
-	return (r >> 30) & 0x1;
-}
-#define HOST1X_SYNC_INTSTATUS_ILLEGAL_CLIENT_ACCESS_V(r) \
-	host1x_sync_intstatus_illegal_client_access_v(r)
-static inline u32 host1x_sync_illegal_syncpt_access_frm_client_r(void)
-{
-	return 0x2268;
-}
-#define HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_CLIENT \
-	host1x_sync_illegal_syncpt_access_frm_client_r()
-static inline u32 host1x_sync_illegal_syncpt_access_frm_client_syncpt_v(u32 r)
-{
-	return (r >> 16) & 0x3ff;
-}
-#define HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_CLIENT_SYNCPT_V(r) \
-	host1x_sync_illegal_syncpt_access_frm_client_syncpt_v(r)
-static inline u32 host1x_sync_illegal_syncpt_access_frm_client_ch_v(u32 r)
-{
-	return (r >> 10) & 0x3f;
-}
-#define HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_CLIENT_CH_V(r) \
-	host1x_sync_illegal_syncpt_access_frm_client_ch_v(r)
-static inline u32 host1x_sync_syncpt_r(unsigned int id)
-{
-	return 0x18080 + id * REGISTER_STRIDE;
-}
-#define HOST1X_SYNC_SYNCPT(id) \
-	host1x_sync_syncpt_r(id)
-static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id)
-{
-	return 0x16464 + id * REGISTER_STRIDE;
-}
-#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \
-	host1x_sync_syncpt_thresh_cpu0_int_status_r(id)
-static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id)
-{
-	return 0x16590 + id * REGISTER_STRIDE;
-}
-#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \
-	host1x_sync_syncpt_thresh_int_disable_r(id)
-static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id)
-{
-	return 0x1652c + id * REGISTER_STRIDE;
-}
-#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \
-	host1x_sync_syncpt_thresh_int_enable_cpu0_r(id)
-static inline u32 host1x_sync_cf_setup_r(unsigned int channel)
-{
-	return 0x2588 + channel * REGISTER_STRIDE;
-}
-#define HOST1X_SYNC_CF_SETUP(channel) \
-	host1x_sync_cf_setup_r(channel)
-static inline u32 host1x_sync_cf_setup_base_v(u32 r)
-{
-	return (r >> 0) & 0xfff;
-}
-#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \
-	host1x_sync_cf_setup_base_v(r)
-static inline u32 host1x_sync_cf_setup_limit_v(u32 r)
-{
-	return (r >> 16) & 0xfff;
-}
-#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \
-	host1x_sync_cf_setup_limit_v(r)
-static inline u32 host1x_sync_cmdproc_stop_r(void)
-{
-	return 0x48;
-}
-#define HOST1X_SYNC_CMDPROC_STOP \
-	host1x_sync_cmdproc_stop_r()
-static inline u32 host1x_sync_ch_teardown_r(void)
-{
-	return 0x4c;
-}
-#define HOST1X_SYNC_CH_TEARDOWN \
-	host1x_sync_ch_teardown_r()
-static inline u32 host1x_sync_usec_clk_r(void)
-{
-	return 0x2244;
-}
-#define HOST1X_SYNC_USEC_CLK \
-	host1x_sync_usec_clk_r()
-static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void)
-{
-	return 0x2248;
-}
-#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \
-	host1x_sync_ctxsw_timeout_cfg_r()
-static inline u32 host1x_sync_ip_busy_timeout_r(void)
-{
-	return 0x2250;
-}
-#define HOST1X_SYNC_IP_BUSY_TIMEOUT \
-	host1x_sync_ip_busy_timeout_r()
-static inline u32 host1x_sync_ip_read_timeout_addr_r(void)
-{
-	return 0x2254;
-}
-#define HOST1X_SYNC_IP_READ_TIMEOUT_ADDR \
-	host1x_sync_ip_read_timeout_addr_r()
-static inline u32 host1x_sync_ip_write_timeout_addr_r(void)
-{
-	return 0x225c;
-}
-#define HOST1X_SYNC_IP_WRITE_TIMEOUT_ADDR \
-	host1x_sync_ip_write_timeout_addr_r()
-static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id)
-{
-	return 0x18a00 + id * REGISTER_STRIDE;
-}
-#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \
-	host1x_sync_syncpt_int_thresh_r(id)
-static inline u32 host1x_sync_syncpt_base_r(unsigned int id)
-{
-	return 0x18000 + id * REGISTER_STRIDE;
-}
-#define HOST1X_SYNC_SYNCPT_BASE(id) \
-	host1x_sync_syncpt_base_r(id)
-static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id)
-{
-	return 0x16400 + id * REGISTER_STRIDE;
-}
-#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \
-	host1x_sync_syncpt_cpu_incr_r(id)
-static inline u32 host1x_sync_cfpeek_ctrl_r(void)
-{
-	return 0x233c;
-}
-#define HOST1X_SYNC_CFPEEK_CTRL \
-	host1x_sync_cfpeek_ctrl_r()
-static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v)
-{
-	return (v & 0xfff) << 0;
-}
-#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \
-	host1x_sync_cfpeek_ctrl_addr_f(v)
-static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v)
-{
-	return (v & 0x3f) << 16;
-}
-#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \
-	host1x_sync_cfpeek_ctrl_channr_f(v)
-static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v)
-{
-	return (v & 0x1) << 31;
-}
-#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \
-	host1x_sync_cfpeek_ctrl_ena_f(v)
-static inline u32 host1x_sync_cfpeek_read_r(void)
-{
-	return 0x2340;
-}
-#define HOST1X_SYNC_CFPEEK_READ \
-	host1x_sync_cfpeek_read_r()
-static inline u32 host1x_sync_cfpeek_ptrs_r(void)
-{
-	return 0x2344;
-}
-#define HOST1X_SYNC_CFPEEK_PTRS \
-	host1x_sync_cfpeek_ptrs_r()
-static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r)
-{
-	return (r >> 0) & 0xfff;
-}
-#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \
-	host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r)
-static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r)
-{
-	return (r >> 16) & 0xfff;
-}
-#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \
-	host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r)
-static inline u32 host1x_sync_common_mlock_r(unsigned long id)
-{
-	return 0x2030 + id * REGISTER_STRIDE;
-}
-#define HOST1X_SYNC_COMMON_MLOCK \
-	host1x_sync_common_mlock_r()
-static inline u32 host1x_sync_common_mlock_ch_v(u32 r)
-{
-	return (r >> 2) & 0x3f;
-}
-#define HOST1X_SYNC_COMMON_MLOCK_CH_V(r) \
-	host1x_sync_common_mlock_ch_v(r)
-static inline u32 host1x_sync_common_mlock_locked_v(u32 r)
-{
-	return (r >> 0) & 0x1;
-}
-#define HOST1X_SYNC_COMMON_MLOCK_LOCKED_V(r) \
-	host1x_sync_common_mlock_locked_v(r)
-static inline u32 host1x_thost_common_icg_en_override_0_r(void)
-{
-	return 0x2aa8;
-}
-#define HOST1X_THOST_COMMON_ICG_EN_OVERRIDE_0 \
-	host1x_thost_common_icg_en_override_0_r()
-
-#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
deleted file mode 100644
index 01145f61..00000000
--- a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (C) 2016 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
- /*
-  * Function naming determines intended use:
-  *
-  *     <x>_r(void) : Returns the offset for register <x>.
-  *
-  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
-  *
-  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
-  *
-  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
-  *         and masked to place it at field <y> of register <x>.  This value
-  *         can be |'d with others to produce a full register value for
-  *         register <x>.
-  *
-  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
-  *         value can be ~'d and then &'d to clear the value of field <y> for
-  *         register <x>.
-  *
-  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
-  *         to place it at field <y> of register <x>.  This value can be |'d
-  *         with others to produce a full register value for <x>.
-  *
-  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
-  *         <x> value 'r' after being shifted to place its LSB at bit 0.
-  *         This value is suitable for direct comparison with other unshifted
-  *         values appropriate for use in field <y> of register <x>.
-  *
-  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
-  *         field <y> of register <x>.  This value is suitable for direct
-  *         comparison with unshifted values appropriate for use in field <y>
-  *         of register <x>.
-  */
-
-#ifndef HOST1X_HW_HOST1X05_UCLASS_H
-#define HOST1X_HW_HOST1X05_UCLASS_H
-
-static inline u32 host1x_uclass_incr_syncpt_r(void)
-{
-	return 0x0;
-}
-#define HOST1X_UCLASS_INCR_SYNCPT \
-	host1x_uclass_incr_syncpt_r()
-static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
-{
-	return (v & 0xff) << 10;
-}
-#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
-	host1x_uclass_incr_syncpt_cond_f(v)
-static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
-{
-	return (v & 0x3ff) << 0;
-}
-#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
-	host1x_uclass_incr_syncpt_indx_f(v)
-static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
-{
-	return 0x4e;
-}
-#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
-	host1x_uclass_load_syncpt_payload_32_r()
-static inline u32 host1x_uclass_wait_syncpt_32_r(void)
-{
-	return 0x50;
-}
-#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
-	host1x_uclass_wait_syncpt_32_r()
-static inline u32 host1x_uclass_wait_syncpt_r(void)
-{
-	return 0x8;
-}
-#define HOST1X_UCLASS_WAIT_SYNCPT \
-	host1x_uclass_wait_syncpt_r()
-static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
-{
-	return (v & 0xff) << 24;
-}
-#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
-	host1x_uclass_wait_syncpt_indx_f(v)
-static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
-{
-	return (v & 0xffffff) << 0;
-}
-#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
-	host1x_uclass_wait_syncpt_thresh_f(v)
-static inline u32 host1x_uclass_wait_syncpt_base_r(void)
-{
-	return 0x9;
-}
-#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
-	host1x_uclass_wait_syncpt_base_r()
-static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
-{
-	return (v & 0x3ff) << 22;
-}
-#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
-	host1x_uclass_wait_syncpt_base_indx_f(v)
-static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
-{
-	return (v & 0x3f) << 16;
-}
-#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
-	host1x_uclass_wait_syncpt_base_base_indx_f(v)
-static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
-{
-	return (v & 0xffff) << 0;
-}
-#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
-	host1x_uclass_wait_syncpt_base_offset_f(v)
-static inline u32 host1x_uclass_load_syncpt_base_r(void)
-{
-	return 0xb;
-}
-#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
-	host1x_uclass_load_syncpt_base_r()
-static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
-{
-	return (v & 0xff) << 24;
-}
-#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
-	host1x_uclass_load_syncpt_base_base_indx_f(v)
-static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
-{
-	return (v & 0xffffff) << 0;
-}
-#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
-	host1x_uclass_load_syncpt_base_value_f(v)
-static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
-{
-	return (v & 0xff) << 24;
-}
-#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
-	host1x_uclass_incr_syncpt_base_base_indx_f(v)
-static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
-{
-	return (v & 0xffffff) << 0;
-}
-#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
-	host1x_uclass_incr_syncpt_base_offset_f(v)
-
-#endif
diff --git a/drivers/gpu/host1x/hw/intr_hw_t186.c b/drivers/gpu/host1x/hw/intr_hw_t186.c
deleted file mode 100644
index 48bd1150..00000000
--- a/drivers/gpu/host1x/hw/intr_hw_t186.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Tegra host1x Interrupt Management
- *
- * Copyright (C) 2010 Google, Inc.
- * Copyright (C) 2016 NVIDIA Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-
-#include "intr.h"
-#include "dev.h"
-
-
-static inline u32 bit_mask(u32 nr)
-{
-	return 1UL << (nr % 32);
-}
-static inline u32 bit_word(u32 nr)
-{
-	return nr / 32;
-}
-
-/*
- * Sync point threshold interrupt service function
- * Handles sync point threshold triggers, in interrupt context
- */
-static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt)
-{
-	unsigned int id = syncpt->id;
-	struct host1x *host = syncpt->host;
-
-	host1x_sync_writel(host, bit_mask(id),
-		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(bit_word(id)));
-	host1x_sync_writel(host, bit_mask(id),
-		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(bit_word(id)));
-
-	queue_work(host->intr_wq, &syncpt->intr.work);
-}
-
-static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
-{
-	struct host1x *host = dev_id;
-	unsigned long reg;
-	int i, set_bit;
-
-	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) {
-		reg = host1x_sync_readl(host,
-			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
-		for_each_set_bit(set_bit, &reg, 32) {
-			struct host1x_syncpt *syncpt;
-			int id = i * 32 + set_bit;
-
-			if (unlikely(id < 0 || id >= host->info->nb_pts)) {
-				dev_err(host->dev,
-					"%s(): unexptected syncpt id %d\n",
-					__func__, id);
-				goto out;
-			}
-			syncpt = host->syncpt + id;
-			host1x_intr_syncpt_handle(syncpt);
-		}
-	}
-out:
-	return IRQ_HANDLED;
-}
-
-static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
-{
-	u32 i;
-
-	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); ++i) {
-		host1x_sync_writel(host, 0xffffffffu,
-			HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i));
-		host1x_sync_writel(host, 0xffffffffu,
-			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
-	}
-}
-
-static int _host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
-	void (*syncpt_thresh_work)(struct work_struct *))
-{
-	int i, err;
-
-	host1x_hw_intr_disable_all_syncpt_intrs(host);
-
-	for (i = 0; i < host->info->nb_pts; i++)
-		INIT_WORK(&host->syncpt[i].intr.work, syncpt_thresh_work);
-
-	err = devm_request_irq(host->dev, host->intr_syncpt_irq,
-			       syncpt_thresh_isr, IRQF_SHARED,
-			       "host1x_syncpt", host);
-	if (IS_ERR_VALUE(err)) {
-		WARN_ON(1);
-		return err;
-	}
-
-	/* disable the ip_busy_timeout. this prevents write drops */
-	host1x_sync_writel(host, 0, HOST1X_SYNC_IP_BUSY_TIMEOUT);
-
-	/*
-	 * increase the auto-ack timout to the maximum value. 2d will hang
-	 * otherwise on Tegra2.
-	 */
-	host1x_sync_writel(host, 0xff, HOST1X_SYNC_CTXSW_TIMEOUT_CFG);
-
-	/* update host clocks per usec */
-	host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK);
-
-	return 0;
-}
-
-static void _host1x_intr_set_syncpt_threshold(struct host1x *host,
-	u32 id, u32 thresh)
-{
-	host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id));
-}
-
-static void _host1x_intr_enable_syncpt_intr(struct host1x *host, u32 id)
-{
-	host1x_sync_writel(host, bit_mask(id),
-		HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(bit_word(id)));
-}
-
-static void _host1x_intr_disable_syncpt_intr(struct host1x *host, u32 id)
-{
-	host1x_sync_writel(host, bit_mask(id),
-		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(bit_word(id)));
-	host1x_sync_writel(host, bit_mask(id),
-		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(bit_word(id)));
-}
-
-static int _host1x_free_syncpt_irq(struct host1x *host)
-{
-	devm_free_irq(host->dev, host->intr_syncpt_irq, host);
-	flush_workqueue(host->intr_wq);
-	return 0;
-}
-
-/**
- * Host general interrupt service function
- * Handles read / write failures
- */
-static irqreturn_t general_isr(int irq, void *dev_id)
-{
-	struct host1x *host = dev_id;
-	u32 intstatus, addr;
-
-	/* Handle host1x interrupt in ISR */
-	intstatus = host1x_sync_readl(host, HOST1X_SYNC_INTSTATUS);
-
-	if (HOST1X_SYNC_INTSTATUS_IP_READ_INT_V(intstatus)) {
-		addr = host1x_sync_readl(host,
-					 HOST1X_SYNC_IP_READ_TIMEOUT_ADDR);
-		pr_err("Host read timeout at address %x\n", addr);
-	}
-
-	if (HOST1X_SYNC_INTSTATUS_IP_WRITE_INT_V(intstatus)) {
-		addr = host1x_sync_readl(host,
-					 HOST1X_SYNC_IP_WRITE_TIMEOUT_ADDR);
-		pr_err("Host write timeout at address %x\n", addr);
-	}
-
-	if (HOST1X_SYNC_INTSTATUS_ILLEGAL_PB_ACCESS_V(intstatus)) {
-		u32 stat = host1x_sync_readl(host, HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_PB);
-		u32 ch = HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_PB_CH_V(stat);
-		u32 id = HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_PB_SYNCPT_V(stat);
-		pr_err("Host illegal syncpoint pb access (ch=%u, id=%u)\n", ch, id);
-	}
-
-	if (HOST1X_SYNC_INTSTATUS_ILLEGAL_CLIENT_ACCESS_V(intstatus)) {
-		u32 stat = host1x_sync_readl(host, HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_CLIENT);
-		u32 ch = HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_CLIENT_CH_V(stat);
-		u32 id = HOST1X_SYNC_ILLEGAL_SYNCPT_ACCESS_FRM_CLIENT_SYNCPT_V(stat);
-		pr_err("Host illegal syncpoint client access (ch=%u, id=%u)\n", ch, id);
-	}
-
-	host1x_sync_writel(host, intstatus, HOST1X_SYNC_INTSTATUS);
-
-	return IRQ_HANDLED;
-}
-
-static int _host1x_intr_request_host_general_irq(struct host1x *host)
-{
-	int err;
-
-	/* master disable for general (not syncpt) host interrupts */
-	host1x_sync_writel(host, 0, HOST1X_SYNC_INTC0MASK);
-	host1x_sync_writel(host, 0, HOST1X_SYNC_INTGMASK);
-	host1x_sync_writel(host, 0, HOST1X_SYNC_INTMASK);
-
-	err = devm_request_irq(host->dev, host->intr_general_irq,
-			       general_isr, 0,
-			       "host1x_general", host);
-	if (IS_ERR_VALUE(err)) {
-		WARN_ON(1);
-		return err;
-	}
-
-	/* enable host module interrupt to CPU0 */
-	host1x_sync_writel(host, BIT(0), HOST1X_SYNC_INTC0MASK);
-	host1x_sync_writel(host, BIT(0), HOST1X_SYNC_INTGMASK);
-	host1x_sync_writel(host, 0xff << 8, HOST1X_SYNC_SYNCPT_INTGMASK);
-
-	/* master enable for general (not syncpt) host interrupts
-	 * (AXIREAD, AXIWRITE, Syncpoint protection */
-	host1x_sync_writel(host, BIT(0) | BIT(1) | BIT(28) | BIT(30),
-				 HOST1X_SYNC_INTMASK);
-
-	return err;
-}
-
-static void _host1x_intr_free_host_general_irq(struct host1x *host)
-{
-	/* master disable for general (not syncpt) host interrupts */
-	host1x_sync_writel(host, 0, HOST1X_SYNC_INTMASK);
-	host1x_sync_writel(host, 0, HOST1X_SYNC_SYNCPT_INTGMASK);
-
-	devm_free_irq(host->dev, host->intr_general_irq, host);
-}
-
-static const struct host1x_intr_ops host1x_intr_t186_ops = {
-	/* Syncpt interrupts */
-	.init_host_sync = _host1x_intr_init_host_sync,
-	.set_syncpt_threshold = _host1x_intr_set_syncpt_threshold,
-	.enable_syncpt_intr = _host1x_intr_enable_syncpt_intr,
-	.disable_syncpt_intr = _host1x_intr_disable_syncpt_intr,
-	.disable_all_syncpt_intrs = _host1x_intr_disable_all_syncpt_intrs,
-	.free_syncpt_irq = _host1x_free_syncpt_irq,
-
-	/* Host general interrupts */
-	.request_host_general_irq = _host1x_intr_request_host_general_irq,
-	.free_host_general_irq = _host1x_intr_free_host_general_irq,
-};
diff --git a/drivers/gpu/host1x/hw/stremid_hw_t186.c b/drivers/gpu/host1x/hw/stremid_hw_t186.c
deleted file mode 100644
index 9f4c3685..00000000
--- a/drivers/gpu/host1x/hw/stremid_hw_t186.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2016, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#include "dev_t186.h"
-
-struct host1x_streamid_mapping {
-	u32 host1x_offset;
-	u32 client_offset;
-	u32 client_limit;
-};
-
-static struct host1x_streamid_mapping __attribute__((__unused__))
-        t18x_host1x_streamid_mapping[] = {
-	/* HOST1X_THOST_COMMON_SE1_STRMID_0_OFFSET_BASE_0 */
-	{ 0x00001ac8, 0x00000090, 0x00000090},
-	/* HOST1X_THOST_COMMON_SE2_STRMID_0_OFFSET_BASE_0 */
-	{ 0x00001ad0, 0x00000090, 0x00000090},
-	/* HOST1X_THOST_COMMON_SE3_STRMID_0_OFFSET_BASE_0 */
-	{ 0x00001ad8, 0x00000090, 0x00000090},
-	/* HOST1X_THOST_COMMON_SE4_STRMID_0_OFFSET_BASE_0 */
-	{ 0x00001ae0, 0x00000090, 0x00000090},
-	/* HOST1X_THOST_COMMON_ISP_STRMID_0_OFFSET_BASE_0 */
-	{ 0x00001ae8, 0x00000050, 0x00000050},
-	/* HOST1X_THOST_COMMON_VIC_STRMID_0_OFFSET_BASE_0 */
-	{ 0x00001af0, 0x00000030, 0x00000034},
-	/* HOST1X_THOST_COMMON_NVENC_STRMID_0_OFFSET_BASE_0 */
-	{ 0x00001af8, 0x00000030, 0x00000034},
-	/* HOST1X_THOST_COMMON_NVDEC_STRMID_0_OFFSET_BASE_0 */
-	{ 0x00001b00, 0x00000030, 0x00000034},
-	/* HOST1X_THOST_COMMON_NVJPG_STRMID_0_OFFSET_BASE_0 */
-	{ 0x00001b08, 0x00000030, 0x00000034},
-	/* HOST1X_THOST_COMMON_TSEC_STRMID_0_OFFSET_BASE_0 */
-	{ 0x00001b10, 0x00000030, 0x00000034},
-	/* HOST1X_THOST_COMMON_TSECB_STRMID_0_OFFSET_BASE_0 */
-	{ 0x00001b18, 0x00000030, 0x00000034},
-	/* HOST1X_THOST_COMMON_VI_STRMID_0_OFFSET_BASE_0 */
-	{ 0x00001b80, 0x00010000, 0x00010000},
-	/* HOST1X_THOST_COMMON_VI_STRMID_1_OFFSET_BASE_0 */
-	{ 0x00001b88, 0x00020000, 0x00020000},
-	/* HOST1X_THOST_COMMON_VI_STRMID_2_OFFSET_BASE_0 */
-	{ 0x00001b90, 0x00030000, 0x00030000},
-	/* HOST1X_THOST_COMMON_VI_STRMID_3_OFFSET_BASE_0 */
-	{ 0x00001b98, 0x00040000, 0x00040000},
-	/* HOST1X_THOST_COMMON_VI_STRMID_4_OFFSET_BASE_0 */
-	{ 0x00001ba0, 0x00050000, 0x00050000},
-	/* HOST1X_THOST_COMMON_VI_STRMID_5_OFFSET_BASE_0 */
-	{ 0x00001ba8, 0x00060000, 0x00060000},
-	/* HOST1X_THOST_COMMON_VI_STRMID_6_OFFSET_BASE_0 */
-	{ 0x00001bb0, 0x00070000, 0x00070000},
-	/* HOST1X_THOST_COMMON_VI_STRMID_7_OFFSET_BASE_0 */
-	{ 0x00001bb8, 0x00080000, 0x00080000},
-	/* HOST1X_THOST_COMMON_VI_STRMID_8_OFFSET_BASE_0 */
-	{ 0x00001bc0, 0x00090000, 0x00090000},
-	/* HOST1X_THOST_COMMON_VI_STRMID_9_OFFSET_BASE_0 */
-	{ 0x00001bc8, 0x000a0000, 0x000a0000},
-	/* HOST1X_THOST_COMMON_VI_STRMID_10_OFFSET_BASE_0 */
-	{ 0x00001bd0, 0x000b0000, 0x000b0000},
-	/* HOST1X_THOST_COMMON_VI_STRMID_11_OFFSET_BASE_0 */
-	{ 0x00001bd8, 0x000c0000, 0x000c0000},
-	{},
-};
-
-static int load_streamid_regs(struct host1x *host)
-{
-	struct host1x_streamid_mapping *map_regs = t18x_host1x_streamid_mapping;
-
-	while (map_regs->host1x_offset) {
-		host1x_writel(host, map_regs->client_offset,
-				    map_regs->host1x_offset);
-		host1x_writel(host, map_regs->client_limit,
-				    map_regs->host1x_offset + sizeof(u32));
-
-		map_regs++;
-	}
-
-	return 0;
-}
-
-static const struct host1x_dev_ops host1x_dev_t186_ops = {
-	.load_regs = load_streamid_regs,
-};
diff --git a/drivers/gpu/host1x/hw/syncpt_hw_t186.c b/drivers/gpu/host1x/hw/syncpt_hw_t186.c
deleted file mode 100644
index 9b196854..00000000
--- a/drivers/gpu/host1x/hw/syncpt_hw_t186.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Tegra host1x Syncpoints
- *
- * Copyright (c) 2016, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/io.h>
-
-#include "dev.h"
-#include "syncpt.h"
-
-/*
- * Write the current syncpoint value back to hw.
- */
-static void syncpt_restore(struct host1x_syncpt *sp)
-{
-	struct host1x *host = sp->host;
-	int min = host1x_syncpt_read_min(sp);
-	host1x_sync_writel(host, min, HOST1X_SYNC_SYNCPT(sp->id));
-}
-
-/*
- * Write the current waitbase value back to hw.
- */
-static void syncpt_restore_wait_base(struct host1x_syncpt *sp)
-{
-	struct host1x *host = sp->host;
-	host1x_sync_writel(host, sp->base_val,
-			   HOST1X_SYNC_SYNCPT_BASE(sp->id));
-}
-
-/*
- * Read waitbase value from hw.
- */
-static void syncpt_read_wait_base(struct host1x_syncpt *sp)
-{
-	struct host1x *host = sp->host;
-	sp->base_val =
-		host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(sp->id));
-}
-
-/*
- * Updates the last value read from hardware.
- */
-static u32 syncpt_load(struct host1x_syncpt *sp)
-{
-	struct host1x *host = sp->host;
-	u32 old, live;
-
-	/* Loop in case there's a race writing to min_val */
-	do {
-		old = host1x_syncpt_read_min(sp);
-		live = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT(sp->id));
-	} while ((u32)atomic_cmpxchg(&sp->min_val, old, live) != old);
-
-	if (!host1x_syncpt_check_max(sp, live))
-		dev_err(host->dev, "%s failed: id=%u, min=%d, max=%d\n",
-			__func__, sp->id, host1x_syncpt_read_min(sp),
-			host1x_syncpt_read_max(sp));
-
-	return live;
-}
-
-/*
- * Write a cpu syncpoint increment to the hardware, without touching
- * the cache.
- */
-static int syncpt_cpu_incr(struct host1x_syncpt *sp)
-{
-	struct host1x *host = sp->host;
-	u32 reg_offset = sp->id / 32;
-
-	if (!host1x_syncpt_client_managed(sp) &&
-	    host1x_syncpt_idle(sp))
-		return -EINVAL;
-	host1x_sync_writel(host, BIT_MASK(sp->id),
-			   HOST1X_SYNC_SYNCPT_CPU_INCR(reg_offset));
-	wmb();
-
-	return 0;
-}
-
-/* remove a wait pointed to by patch_addr */
-static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
-{
-	u32 override = host1x_class_host_wait_syncpt(
-		HOST1X_SYNCPT_RESERVED, 0);
-
-	*((u32 *)patch_addr) = override;
-	return 0;
-}
-
-static void syncpt_get_mutex_owner(struct host1x_syncpt *sp,
-				   unsigned int mutex_id, bool *cpu, bool *ch,
-				   unsigned int *chid)
-{
-	struct host1x *host = sp->host;
-	u32 owner;
-
-	owner = host1x_sync_readl(host, host1x_sync_common_mlock_r(mutex_id));
-	*chid = HOST1X_SYNC_COMMON_MLOCK_CH_V(owner);
-	*ch = HOST1X_SYNC_COMMON_MLOCK_LOCKED_V(owner);
-	*cpu = false;
-}
-
-static const struct host1x_syncpt_ops host1x_syncpt_t186_ops = {
-	.restore = syncpt_restore,
-	.restore_wait_base = syncpt_restore_wait_base,
-	.load_wait_base = syncpt_read_wait_base,
-	.load = syncpt_load,
-	.cpu_incr = syncpt_cpu_incr,
-	.patch_wait = syncpt_patch_wait,
-	.get_mutex_owner = syncpt_get_mutex_owner,
-};

From cc5bcef3f65ea2a64ab01b0875b6110ee43de8dd Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Fri, 23 Oct 2020 13:58:15 +0100
Subject: [PATCH 14/73] gpu: host1x: Add upstream host1x driver

Add the upstream host1x driver with the 'Host1x/Tegra UAPI' series [0]
applied. This driver will be built as an external module for use with
the NVGPU driver on upstream Linux kernels.

The following modifications have been made to the series posted upstream
1. Update the Makefile to always build the driver as a module
2. Remove the tests to see if CONFIG_DRM_TEGRA_STAGING is enabled
3. Rename the include/linux/host1x.h to include/linux/host1x-next.h to
   avoid conflicts with upstream headers when building as an external
   module.
4. Rename the include/uapi/linux/host1x.h to
   include/uapi/linux/host1x-next.h to avoid conflicts with upstream
   headers when building as an external module.
5. Rename the module that is built to be host1x-next.ko instead of
   host1x.ko to avoid any depmod conflicts with the upstream driver.

[0] https://patchwork.ozlabs.org/project/linux-tegra/list/?series=215770

Bug 3156385

Change-Id: Ic60299546809097dd0e4a9a7157bce1491d9f794
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2435801
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/Kconfig                    |  24 +
 drivers/gpu/host1x/Makefile                   |  24 +
 drivers/gpu/host1x/bus.c                      | 855 ++++++++++++++++++
 drivers/gpu/host1x/bus.h                      |  18 +
 drivers/gpu/host1x/cdma.c                     | 676 ++++++++++++++
 drivers/gpu/host1x/cdma.h                     |  92 ++
 drivers/gpu/host1x/channel.c                  | 156 ++++
 drivers/gpu/host1x/channel.h                  |  41 +
 drivers/gpu/host1x/debug.c                    | 230 +++++
 drivers/gpu/host1x/debug.h                    |  46 +
 drivers/gpu/host1x/dev.c                      | 581 ++++++++++++
 drivers/gpu/host1x/dev.h                      | 339 +++++++
 drivers/gpu/host1x/fence.c                    | 208 +++++
 drivers/gpu/host1x/fence.h                    |  13 +
 drivers/gpu/host1x/hw/cdma_hw.c               | 331 +++++++
 drivers/gpu/host1x/hw/channel_hw.c            | 258 ++++++
 drivers/gpu/host1x/hw/debug_hw.c              | 252 ++++++
 drivers/gpu/host1x/hw/debug_hw_1x01.c         | 145 +++
 drivers/gpu/host1x/hw/debug_hw_1x06.c         | 131 +++
 drivers/gpu/host1x/hw/host1x01.c              |  33 +
 drivers/gpu/host1x/hw/host1x01.h              |  14 +
 drivers/gpu/host1x/hw/host1x01_hardware.h     | 132 +++
 drivers/gpu/host1x/hw/host1x02.c              |  33 +
 drivers/gpu/host1x/hw/host1x02.h              |  15 +
 drivers/gpu/host1x/hw/host1x02_hardware.h     | 131 +++
 drivers/gpu/host1x/hw/host1x04.c              |  33 +
 drivers/gpu/host1x/hw/host1x04.h              |  15 +
 drivers/gpu/host1x/hw/host1x04_hardware.h     | 131 +++
 drivers/gpu/host1x/hw/host1x05.c              |  33 +
 drivers/gpu/host1x/hw/host1x05.h              |  15 +
 drivers/gpu/host1x/hw/host1x05_hardware.h     | 131 +++
 drivers/gpu/host1x/hw/host1x06.c              |  33 +
 drivers/gpu/host1x/hw/host1x06.h              |  15 +
 drivers/gpu/host1x/hw/host1x06_hardware.h     | 137 +++
 drivers/gpu/host1x/hw/host1x07.c              |  33 +
 drivers/gpu/host1x/hw/host1x07.h              |  15 +
 drivers/gpu/host1x/hw/host1x07_hardware.h     | 137 +++
 drivers/gpu/host1x/hw/hw_host1x01_channel.h   | 108 +++
 drivers/gpu/host1x/hw/hw_host1x01_sync.h      | 231 +++++
 drivers/gpu/host1x/hw/hw_host1x01_uclass.h    | 168 ++++
 drivers/gpu/host1x/hw/hw_host1x02_channel.h   | 109 +++
 drivers/gpu/host1x/hw/hw_host1x02_sync.h      | 231 +++++
 drivers/gpu/host1x/hw/hw_host1x02_uclass.h    | 169 ++++
 drivers/gpu/host1x/hw/hw_host1x04_channel.h   | 121 +++
 drivers/gpu/host1x/hw/hw_host1x04_sync.h      | 231 +++++
 drivers/gpu/host1x/hw/hw_host1x04_uclass.h    | 169 ++++
 drivers/gpu/host1x/hw/hw_host1x05_channel.h   | 121 +++
 drivers/gpu/host1x/hw/hw_host1x05_sync.h      | 231 +++++
 drivers/gpu/host1x/hw/hw_host1x05_uclass.h    | 169 ++++
 drivers/gpu/host1x/hw/hw_host1x06_channel.h   |  11 +
 .../gpu/host1x/hw/hw_host1x06_hypervisor.h    |  20 +
 drivers/gpu/host1x/hw/hw_host1x06_uclass.h    | 169 ++++
 drivers/gpu/host1x/hw/hw_host1x06_vm.h        |  35 +
 drivers/gpu/host1x/hw/hw_host1x07_channel.h   |  11 +
 .../gpu/host1x/hw/hw_host1x07_hypervisor.h    |  20 +
 drivers/gpu/host1x/hw/hw_host1x07_uclass.h    | 169 ++++
 drivers/gpu/host1x/hw/hw_host1x07_vm.h        |  34 +
 drivers/gpu/host1x/hw/intr_hw.c               | 148 +++
 drivers/gpu/host1x/hw/syncpt_hw.c             | 144 +++
 drivers/gpu/host1x/intr.c                     | 351 +++++++
 drivers/gpu/host1x/intr.h                     |  95 ++
 drivers/gpu/host1x/job.c                      | 728 +++++++++++++++
 drivers/gpu/host1x/job.h                      |  48 +
 drivers/gpu/host1x/mipi.c                     | 558 ++++++++++++
 drivers/gpu/host1x/syncpt.c                   | 579 ++++++++++++
 drivers/gpu/host1x/syncpt.h                   | 130 +++
 drivers/gpu/host1x/uapi.c                     | 379 ++++++++
 drivers/gpu/host1x/uapi.h                     |  22 +
 68 files changed, 11215 insertions(+)
 create mode 100644 drivers/gpu/host1x/Kconfig
 create mode 100644 drivers/gpu/host1x/Makefile
 create mode 100644 drivers/gpu/host1x/bus.c
 create mode 100644 drivers/gpu/host1x/bus.h
 create mode 100644 drivers/gpu/host1x/cdma.c
 create mode 100644 drivers/gpu/host1x/cdma.h
 create mode 100644 drivers/gpu/host1x/channel.c
 create mode 100644 drivers/gpu/host1x/channel.h
 create mode 100644 drivers/gpu/host1x/debug.c
 create mode 100644 drivers/gpu/host1x/debug.h
 create mode 100644 drivers/gpu/host1x/dev.c
 create mode 100644 drivers/gpu/host1x/dev.h
 create mode 100644 drivers/gpu/host1x/fence.c
 create mode 100644 drivers/gpu/host1x/fence.h
 create mode 100644 drivers/gpu/host1x/hw/cdma_hw.c
 create mode 100644 drivers/gpu/host1x/hw/channel_hw.c
 create mode 100644 drivers/gpu/host1x/hw/debug_hw.c
 create mode 100644 drivers/gpu/host1x/hw/debug_hw_1x01.c
 create mode 100644 drivers/gpu/host1x/hw/debug_hw_1x06.c
 create mode 100644 drivers/gpu/host1x/hw/host1x01.c
 create mode 100644 drivers/gpu/host1x/hw/host1x01.h
 create mode 100644 drivers/gpu/host1x/hw/host1x01_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/host1x02.c
 create mode 100644 drivers/gpu/host1x/hw/host1x02.h
 create mode 100644 drivers/gpu/host1x/hw/host1x02_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/host1x04.c
 create mode 100644 drivers/gpu/host1x/hw/host1x04.h
 create mode 100644 drivers/gpu/host1x/hw/host1x04_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/host1x05.c
 create mode 100644 drivers/gpu/host1x/hw/host1x05.h
 create mode 100644 drivers/gpu/host1x/hw/host1x05_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/host1x06.c
 create mode 100644 drivers/gpu/host1x/hw/host1x06.h
 create mode 100644 drivers/gpu/host1x/hw/host1x06_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/host1x07.c
 create mode 100644 drivers/gpu/host1x/hw/host1x07.h
 create mode 100644 drivers/gpu/host1x/hw/host1x07_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x01_channel.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x01_sync.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x01_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x02_channel.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x02_sync.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x02_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x04_channel.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x04_sync.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x04_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x05_channel.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x05_sync.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x05_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_channel.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_vm.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x07_channel.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x07_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x07_vm.h
 create mode 100644 drivers/gpu/host1x/hw/intr_hw.c
 create mode 100644 drivers/gpu/host1x/hw/syncpt_hw.c
 create mode 100644 drivers/gpu/host1x/intr.c
 create mode 100644 drivers/gpu/host1x/intr.h
 create mode 100644 drivers/gpu/host1x/job.c
 create mode 100644 drivers/gpu/host1x/job.h
 create mode 100644 drivers/gpu/host1x/mipi.c
 create mode 100644 drivers/gpu/host1x/syncpt.c
 create mode 100644 drivers/gpu/host1x/syncpt.h
 create mode 100644 drivers/gpu/host1x/uapi.c
 create mode 100644 drivers/gpu/host1x/uapi.h

diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
new file mode 100644
index 00000000..36800138
--- /dev/null
+++ b/drivers/gpu/host1x/Kconfig
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config TEGRA_HOST1X_NEXT
+	tristate "NVIDIA Tegra host1x driver"
+	depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
+	select IOMMU_IOVA
+	help
+	  Driver for the NVIDIA Tegra host1x hardware.
+
+	  The Tegra host1x module is the DMA engine for register access to
+	  Tegra's graphics- and multimedia-related modules. The modules served
+	  by host1x are referred to as clients. host1x includes some other
+	  functionality, such as synchronization.
+
+if TEGRA_HOST1X_NEXT
+
+config TEGRA_HOST1X_FIREWALL
+	bool "Enable HOST1X security firewall"
+	default y
+	help
+	  Say yes if kernel should protect command streams from tampering.
+
+	  If unsure, choose Y.
+
+endif
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
new file mode 100644
index 00000000..2f77263f
--- /dev/null
+++ b/drivers/gpu/host1x/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+CONFIG_TEGRA_HOST1X_NEXT := m
+ccflags-y := -I$(srctree.host1x)/include
+
+host1x-next-y = \
+	bus.o \
+	syncpt.o \
+	dev.o \
+	intr.o \
+	cdma.o \
+	channel.o \
+	job.o \
+	debug.o \
+	mipi.o \
+	uapi.o \
+	fence.o \
+	hw/host1x01.o \
+	hw/host1x02.o \
+	hw/host1x04.o \
+	hw/host1x05.o \
+	hw/host1x06.o \
+	hw/host1x07.o
+
+obj-$(CONFIG_TEGRA_HOST1X_NEXT) += host1x-next.o
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
new file mode 100644
index 00000000..38f90ad0
--- /dev/null
+++ b/drivers/gpu/host1x/bus.c
@@ -0,0 +1,855 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012-2013, NVIDIA Corporation
+ */
+
+#include <linux/debugfs.h>
+#include <linux/host1x-next.h>
+#include <linux/of.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/of_device.h>
+
+#include "bus.h"
+#include "dev.h"
+
+static DEFINE_MUTEX(clients_lock);
+static LIST_HEAD(clients);
+
+static DEFINE_MUTEX(drivers_lock);
+static LIST_HEAD(drivers);
+
+static DEFINE_MUTEX(devices_lock);
+static LIST_HEAD(devices);
+
+struct host1x_subdev {
+	struct host1x_client *client;
+	struct device_node *np;
+	struct list_head list;
+};
+
+/**
+ * host1x_subdev_add() - add a new subdevice with an associated device node
+ * @device: host1x device to add the subdevice to
+ * @np: device node
+ */
+static int host1x_subdev_add(struct host1x_device *device,
+			     struct host1x_driver *driver,
+			     struct device_node *np)
+{
+	struct host1x_subdev *subdev;
+	struct device_node *child;
+	int err;
+
+	subdev = kzalloc(sizeof(*subdev), GFP_KERNEL);
+	if (!subdev)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&subdev->list);
+	subdev->np = of_node_get(np);
+
+	mutex_lock(&device->subdevs_lock);
+	list_add_tail(&subdev->list, &device->subdevs);
+	mutex_unlock(&device->subdevs_lock);
+
+	/* recursively add children */
+	for_each_child_of_node(np, child) {
+		if (of_match_node(driver->subdevs, child) &&
+		    of_device_is_available(child)) {
+			err = host1x_subdev_add(device, driver, child);
+			if (err < 0) {
+				/* XXX cleanup? */
+				of_node_put(child);
+				return err;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * host1x_subdev_del() - remove subdevice
+ * @subdev: subdevice to remove
+ */
+static void host1x_subdev_del(struct host1x_subdev *subdev)
+{
+	list_del(&subdev->list);
+	of_node_put(subdev->np);
+	kfree(subdev);
+}
+
+/**
+ * host1x_device_parse_dt() - scan device tree and add matching subdevices
+ * @device: host1x logical device
+ * @driver: host1x driver
+ */
+static int host1x_device_parse_dt(struct host1x_device *device,
+				  struct host1x_driver *driver)
+{
+	struct device_node *np;
+	int err;
+
+	for_each_child_of_node(device->dev.parent->of_node, np) {
+		if (of_match_node(driver->subdevs, np) &&
+		    of_device_is_available(np)) {
+			err = host1x_subdev_add(device, driver, np);
+			if (err < 0) {
+				of_node_put(np);
+				return err;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void host1x_subdev_register(struct host1x_device *device,
+				   struct host1x_subdev *subdev,
+				   struct host1x_client *client)
+{
+	int err;
+
+	/*
+	 * Move the subdevice to the list of active (registered) subdevices
+	 * and associate it with a client. At the same time, associate the
+	 * client with its parent device.
+	 */
+	mutex_lock(&device->subdevs_lock);
+	mutex_lock(&device->clients_lock);
+	list_move_tail(&client->list, &device->clients);
+	list_move_tail(&subdev->list, &device->active);
+	client->host = &device->dev;
+	subdev->client = client;
+	mutex_unlock(&device->clients_lock);
+	mutex_unlock(&device->subdevs_lock);
+
+	if (list_empty(&device->subdevs)) {
+		err = device_add(&device->dev);
+		if (err < 0)
+			dev_err(&device->dev, "failed to add: %d\n", err);
+		else
+			device->registered = true;
+	}
+}
+
+static void __host1x_subdev_unregister(struct host1x_device *device,
+				       struct host1x_subdev *subdev)
+{
+	struct host1x_client *client = subdev->client;
+
+	/*
+	 * If all subdevices have been activated, we're about to remove the
+	 * first active subdevice, so unload the driver first.
+	 */
+	if (list_empty(&device->subdevs)) {
+		if (device->registered) {
+			device->registered = false;
+			device_del(&device->dev);
+		}
+	}
+
+	/*
+	 * Move the subdevice back to the list of idle subdevices and remove
+	 * it from list of clients.
+	 */
+	mutex_lock(&device->clients_lock);
+	subdev->client = NULL;
+	client->host = NULL;
+	list_move_tail(&subdev->list, &device->subdevs);
+	/*
+	 * XXX: Perhaps don't do this here, but rather explicitly remove it
+	 * when the device is about to be deleted.
+	 *
+	 * This is somewhat complicated by the fact that this function is
+	 * used to remove the subdevice when a client is unregistered but
+	 * also when the composite device is about to be removed.
+	 */
+	list_del_init(&client->list);
+	mutex_unlock(&device->clients_lock);
+}
+
+static void host1x_subdev_unregister(struct host1x_device *device,
+				     struct host1x_subdev *subdev)
+{
+	mutex_lock(&device->subdevs_lock);
+	__host1x_subdev_unregister(device, subdev);
+	mutex_unlock(&device->subdevs_lock);
+}
+
+/**
+ * host1x_device_init() - initialize a host1x logical device
+ * @device: host1x logical device
+ *
+ * The driver for the host1x logical device can call this during execution of
+ * its &host1x_driver.probe implementation to initialize each of its clients.
+ * The client drivers access the subsystem specific driver data using the
+ * &host1x_client.parent field and driver data associated with it (usually by
+ * calling dev_get_drvdata()).
+ */
+int host1x_device_init(struct host1x_device *device)
+{
+	struct host1x_client *client;
+	int err;
+
+	mutex_lock(&device->clients_lock);
+
+	list_for_each_entry(client, &device->clients, list) {
+		if (client->ops && client->ops->init) {
+			err = client->ops->init(client);
+			if (err < 0) {
+				dev_err(&device->dev,
+					"failed to initialize %s: %d\n",
+					dev_name(client->dev), err);
+				goto teardown;
+			}
+		}
+	}
+
+	mutex_unlock(&device->clients_lock);
+
+	return 0;
+
+teardown:
+	list_for_each_entry_continue_reverse(client, &device->clients, list)
+		if (client->ops->exit)
+			client->ops->exit(client);
+
+	mutex_unlock(&device->clients_lock);
+	return err;
+}
+EXPORT_SYMBOL(host1x_device_init);
+
+/**
+ * host1x_device_exit() - uninitialize host1x logical device
+ * @device: host1x logical device
+ *
+ * When the driver for a host1x logical device is unloaded, it can call this
+ * function to tear down each of its clients. Typically this is done after a
+ * subsystem-specific data structure is removed and the functionality can no
+ * longer be used.
+ */
+int host1x_device_exit(struct host1x_device *device)
+{
+	struct host1x_client *client;
+	int err;
+
+	mutex_lock(&device->clients_lock);
+
+	list_for_each_entry_reverse(client, &device->clients, list) {
+		if (client->ops && client->ops->exit) {
+			err = client->ops->exit(client);
+			if (err < 0) {
+				dev_err(&device->dev,
+					"failed to cleanup %s: %d\n",
+					dev_name(client->dev), err);
+				mutex_unlock(&device->clients_lock);
+				return err;
+			}
+		}
+	}
+
+	mutex_unlock(&device->clients_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(host1x_device_exit);
+
+static int host1x_add_client(struct host1x *host1x,
+			     struct host1x_client *client)
+{
+	struct host1x_device *device;
+	struct host1x_subdev *subdev;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry(device, &host1x->devices, list) {
+		list_for_each_entry(subdev, &device->subdevs, list) {
+			if (subdev->np == client->dev->of_node) {
+				host1x_subdev_register(device, subdev, client);
+				mutex_unlock(&host1x->devices_lock);
+				return 0;
+			}
+		}
+	}
+
+	mutex_unlock(&host1x->devices_lock);
+	return -ENODEV;
+}
+
+static int host1x_del_client(struct host1x *host1x,
+			     struct host1x_client *client)
+{
+	struct host1x_device *device, *dt;
+	struct host1x_subdev *subdev;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry_safe(device, dt, &host1x->devices, list) {
+		list_for_each_entry(subdev, &device->active, list) {
+			if (subdev->client == client) {
+				host1x_subdev_unregister(device, subdev);
+				mutex_unlock(&host1x->devices_lock);
+				return 0;
+			}
+		}
+	}
+
+	mutex_unlock(&host1x->devices_lock);
+	return -ENODEV;
+}
+
+static int host1x_device_match(struct device *dev, struct device_driver *drv)
+{
+	return strcmp(dev_name(dev), drv->name) == 0;
+}
+
+static int host1x_device_uevent(struct device *dev,
+				struct kobj_uevent_env *env)
+{
+	struct device_node *np = dev->parent->of_node;
+	unsigned int count = 0;
+	struct property *p;
+	const char *compat;
+
+	/*
+	 * This duplicates most of of_device_uevent(), but the latter cannot
+	 * be called from modules and operates on dev->of_node, which is not
+	 * available in this case.
+	 *
+	 * Note that this is really only needed for backwards compatibility
+	 * with libdrm, which parses this information from sysfs and will
+	 * fail if it can't find the OF_FULLNAME, specifically.
+	 */
+	add_uevent_var(env, "OF_NAME=%pOFn", np);
+	add_uevent_var(env, "OF_FULLNAME=%pOF", np);
+
+	of_property_for_each_string(np, "compatible", p, compat) {
+		add_uevent_var(env, "OF_COMPATIBLE_%u=%s", count, compat);
+		count++;
+	}
+
+	add_uevent_var(env, "OF_COMPATIBLE_N=%u", count);
+
+	return 0;
+}
+
+static int host1x_dma_configure(struct device *dev)
+{
+	return of_dma_configure(dev, dev->of_node, true);
+}
+
+static const struct dev_pm_ops host1x_device_pm_ops = {
+	.suspend = pm_generic_suspend,
+	.resume = pm_generic_resume,
+	.freeze = pm_generic_freeze,
+	.thaw = pm_generic_thaw,
+	.poweroff = pm_generic_poweroff,
+	.restore = pm_generic_restore,
+};
+
+struct bus_type host1x_bus_type = {
+	.name = "host1x",
+	.match = host1x_device_match,
+	.uevent = host1x_device_uevent,
+	.dma_configure = host1x_dma_configure,
+	.pm = &host1x_device_pm_ops,
+};
+
+static void __host1x_device_del(struct host1x_device *device)
+{
+	struct host1x_subdev *subdev, *sd;
+	struct host1x_client *client, *cl;
+
+	mutex_lock(&device->subdevs_lock);
+
+	/* unregister subdevices */
+	list_for_each_entry_safe(subdev, sd, &device->active, list) {
+		/*
+		 * host1x_subdev_unregister() will remove the client from
+		 * any lists, so we'll need to manually add it back to the
+		 * list of idle clients.
+		 *
+		 * XXX: Alternatively, perhaps don't remove the client from
+		 * any lists in host1x_subdev_unregister() and instead do
+		 * that explicitly from host1x_unregister_client()?
+		 */
+		client = subdev->client;
+
+		__host1x_subdev_unregister(device, subdev);
+
+		/* add the client to the list of idle clients */
+		mutex_lock(&clients_lock);
+		list_add_tail(&client->list, &clients);
+		mutex_unlock(&clients_lock);
+	}
+
+	/* remove subdevices */
+	list_for_each_entry_safe(subdev, sd, &device->subdevs, list)
+		host1x_subdev_del(subdev);
+
+	mutex_unlock(&device->subdevs_lock);
+
+	/* move clients to idle list */
+	mutex_lock(&clients_lock);
+	mutex_lock(&device->clients_lock);
+
+	list_for_each_entry_safe(client, cl, &device->clients, list)
+		list_move_tail(&client->list, &clients);
+
+	mutex_unlock(&device->clients_lock);
+	mutex_unlock(&clients_lock);
+
+	/* finally remove the device */
+	list_del_init(&device->list);
+}
+
+static void host1x_device_release(struct device *dev)
+{
+	struct host1x_device *device = to_host1x_device(dev);
+
+	__host1x_device_del(device);
+	kfree(device);
+}
+
+static int host1x_device_add(struct host1x *host1x,
+			     struct host1x_driver *driver)
+{
+	struct host1x_client *client, *tmp;
+	struct host1x_subdev *subdev;
+	struct host1x_device *device;
+	int err;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device)
+		return -ENOMEM;
+
+	device_initialize(&device->dev);
+
+	mutex_init(&device->subdevs_lock);
+	INIT_LIST_HEAD(&device->subdevs);
+	INIT_LIST_HEAD(&device->active);
+	mutex_init(&device->clients_lock);
+	INIT_LIST_HEAD(&device->clients);
+	INIT_LIST_HEAD(&device->list);
+	device->driver = driver;
+
+	device->dev.coherent_dma_mask = host1x->dev->coherent_dma_mask;
+	device->dev.dma_mask = &device->dev.coherent_dma_mask;
+	dev_set_name(&device->dev, "%s", driver->driver.name);
+	device->dev.release = host1x_device_release;
+	device->dev.bus = &host1x_bus_type;
+	device->dev.parent = host1x->dev;
+
+	of_dma_configure(&device->dev, host1x->dev->of_node, true);
+
+	device->dev.dma_parms = &device->dma_parms;
+	dma_set_max_seg_size(&device->dev, UINT_MAX);
+
+	err = host1x_device_parse_dt(device, driver);
+	if (err < 0) {
+		kfree(device);
+		return err;
+	}
+
+	list_add_tail(&device->list, &host1x->devices);
+
+	mutex_lock(&clients_lock);
+
+	list_for_each_entry_safe(client, tmp, &clients, list) {
+		list_for_each_entry(subdev, &device->subdevs, list) {
+			if (subdev->np == client->dev->of_node) {
+				host1x_subdev_register(device, subdev, client);
+				break;
+			}
+		}
+	}
+
+	mutex_unlock(&clients_lock);
+
+	return 0;
+}
+
+/*
+ * Removes a device by first unregistering any subdevices and then removing
+ * itself from the list of devices.
+ *
+ * This function must be called with the host1x->devices_lock held.
+ */
+static void host1x_device_del(struct host1x *host1x,
+			      struct host1x_device *device)
+{
+	if (device->registered) {
+		device->registered = false;
+		device_del(&device->dev);
+	}
+
+	put_device(&device->dev);
+}
+
+static void host1x_attach_driver(struct host1x *host1x,
+				 struct host1x_driver *driver)
+{
+	struct host1x_device *device;
+	int err;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry(device, &host1x->devices, list) {
+		if (device->driver == driver) {
+			mutex_unlock(&host1x->devices_lock);
+			return;
+		}
+	}
+
+	err = host1x_device_add(host1x, driver);
+	if (err < 0)
+		dev_err(host1x->dev, "failed to allocate device: %d\n", err);
+
+	mutex_unlock(&host1x->devices_lock);
+}
+
+static void host1x_detach_driver(struct host1x *host1x,
+				 struct host1x_driver *driver)
+{
+	struct host1x_device *device, *tmp;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry_safe(device, tmp, &host1x->devices, list)
+		if (device->driver == driver)
+			host1x_device_del(host1x, device);
+
+	mutex_unlock(&host1x->devices_lock);
+}
+
+static int host1x_devices_show(struct seq_file *s, void *data)
+{
+	struct host1x *host1x = s->private;
+	struct host1x_device *device;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry(device, &host1x->devices, list) {
+		struct host1x_subdev *subdev;
+
+		seq_printf(s, "%s\n", dev_name(&device->dev));
+
+		mutex_lock(&device->subdevs_lock);
+
+		list_for_each_entry(subdev, &device->active, list)
+			seq_printf(s, "  %pOFf: %s\n", subdev->np,
+				   dev_name(subdev->client->dev));
+
+		list_for_each_entry(subdev, &device->subdevs, list)
+			seq_printf(s, "  %pOFf:\n", subdev->np);
+
+		mutex_unlock(&device->subdevs_lock);
+	}
+
+	mutex_unlock(&host1x->devices_lock);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(host1x_devices);
+
+/**
+ * host1x_register() - register a host1x controller
+ * @host1x: host1x controller
+ *
+ * The host1x controller driver uses this to register a host1x controller with
+ * the infrastructure. Note that all Tegra SoC generations have only ever come
+ * with a single host1x instance, so this function is somewhat academic.
+ */
+int host1x_register(struct host1x *host1x)
+{
+	struct host1x_driver *driver;
+
+	mutex_lock(&devices_lock);
+	list_add_tail(&host1x->list, &devices);
+	mutex_unlock(&devices_lock);
+
+	mutex_lock(&drivers_lock);
+
+	list_for_each_entry(driver, &drivers, list)
+		host1x_attach_driver(host1x, driver);
+
+	mutex_unlock(&drivers_lock);
+
+	debugfs_create_file("devices", S_IRUGO, host1x->debugfs, host1x,
+			    &host1x_devices_fops);
+
+	return 0;
+}
+
+/**
+ * host1x_unregister() - unregister a host1x controller
+ * @host1x: host1x controller
+ *
+ * The host1x controller driver uses this to remove a host1x controller from
+ * the infrastructure.
+ */
+int host1x_unregister(struct host1x *host1x)
+{
+	struct host1x_driver *driver;
+
+	mutex_lock(&drivers_lock);
+
+	list_for_each_entry(driver, &drivers, list)
+		host1x_detach_driver(host1x, driver);
+
+	mutex_unlock(&drivers_lock);
+
+	mutex_lock(&devices_lock);
+	list_del_init(&host1x->list);
+	mutex_unlock(&devices_lock);
+
+	return 0;
+}
+
+static int host1x_device_probe(struct device *dev)
+{
+	struct host1x_driver *driver = to_host1x_driver(dev->driver);
+	struct host1x_device *device = to_host1x_device(dev);
+
+	if (driver->probe)
+		return driver->probe(device);
+
+	return 0;
+}
+
+static int host1x_device_remove(struct device *dev)
+{
+	struct host1x_driver *driver = to_host1x_driver(dev->driver);
+	struct host1x_device *device = to_host1x_device(dev);
+
+	if (driver->remove)
+		return driver->remove(device);
+
+	return 0;
+}
+
+static void host1x_device_shutdown(struct device *dev)
+{
+	struct host1x_driver *driver = to_host1x_driver(dev->driver);
+	struct host1x_device *device = to_host1x_device(dev);
+
+	if (driver->shutdown)
+		driver->shutdown(device);
+}
+
+/**
+ * host1x_driver_register_full() - register a host1x driver
+ * @driver: host1x driver
+ * @owner: owner module
+ *
+ * Drivers for host1x logical devices call this function to register a driver
+ * with the infrastructure. Note that since these drive logical devices, the
+ * registration of the driver actually triggers tho logical device creation.
+ * A logical device will be created for each host1x instance.
+ */
+int host1x_driver_register_full(struct host1x_driver *driver,
+				struct module *owner)
+{
+	struct host1x *host1x;
+
+	INIT_LIST_HEAD(&driver->list);
+
+	mutex_lock(&drivers_lock);
+	list_add_tail(&driver->list, &drivers);
+	mutex_unlock(&drivers_lock);
+
+	mutex_lock(&devices_lock);
+
+	list_for_each_entry(host1x, &devices, list)
+		host1x_attach_driver(host1x, driver);
+
+	mutex_unlock(&devices_lock);
+
+	driver->driver.bus = &host1x_bus_type;
+	driver->driver.owner = owner;
+	driver->driver.probe = host1x_device_probe;
+	driver->driver.remove = host1x_device_remove;
+	driver->driver.shutdown = host1x_device_shutdown;
+
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL(host1x_driver_register_full);
+
+/**
+ * host1x_driver_unregister() - unregister a host1x driver
+ * @driver: host1x driver
+ *
+ * Unbinds the driver from each of the host1x logical devices that it is
+ * bound to, effectively removing the subsystem devices that they represent.
+ */
+void host1x_driver_unregister(struct host1x_driver *driver)
+{
+	struct host1x *host1x;
+
+	driver_unregister(&driver->driver);
+
+	mutex_lock(&devices_lock);
+
+	list_for_each_entry(host1x, &devices, list)
+		host1x_detach_driver(host1x, driver);
+
+	mutex_unlock(&devices_lock);
+
+	mutex_lock(&drivers_lock);
+	list_del_init(&driver->list);
+	mutex_unlock(&drivers_lock);
+}
+EXPORT_SYMBOL(host1x_driver_unregister);
+
+/**
+ * host1x_client_register() - register a host1x client
+ * @client: host1x client
+ *
+ * Registers a host1x client with each host1x controller instance. Note that
+ * each client will only match their parent host1x controller and will only be
+ * associated with that instance. Once all clients have been registered with
+ * their parent host1x controller, the infrastructure will set up the logical
+ * device and call host1x_device_init(), which will in turn call each client's
+ * &host1x_client_ops.init implementation.
+ */
+int __host1x_client_register(struct host1x_client *client,
+			   struct lock_class_key *key)
+{
+	struct host1x *host1x;
+	int err;
+
+	INIT_LIST_HEAD(&client->list);
+	__mutex_init(&client->lock, "host1x client lock", key);
+	client->usecount = 0;
+
+	mutex_lock(&devices_lock);
+
+	list_for_each_entry(host1x, &devices, list) {
+		err = host1x_add_client(host1x, client);
+		if (!err) {
+			mutex_unlock(&devices_lock);
+			return 0;
+		}
+	}
+
+	mutex_unlock(&devices_lock);
+
+	mutex_lock(&clients_lock);
+	list_add_tail(&client->list, &clients);
+	mutex_unlock(&clients_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(__host1x_client_register);
+
+/**
+ * host1x_client_unregister() - unregister a host1x client
+ * @client: host1x client
+ *
+ * Removes a host1x client from its host1x controller instance. If a logical
+ * device has already been initialized, it will be torn down.
+ */
+int host1x_client_unregister(struct host1x_client *client)
+{
+	struct host1x_client *c;
+	struct host1x *host1x;
+	int err;
+
+	mutex_lock(&devices_lock);
+
+	list_for_each_entry(host1x, &devices, list) {
+		err = host1x_del_client(host1x, client);
+		if (!err) {
+			mutex_unlock(&devices_lock);
+			return 0;
+		}
+	}
+
+	mutex_unlock(&devices_lock);
+	mutex_lock(&clients_lock);
+
+	list_for_each_entry(c, &clients, list) {
+		if (c == client) {
+			list_del_init(&c->list);
+			break;
+		}
+	}
+
+	mutex_unlock(&clients_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(host1x_client_unregister);
+
+int host1x_client_suspend(struct host1x_client *client)
+{
+	int err = 0;
+
+	mutex_lock(&client->lock);
+
+	if (client->usecount == 1) {
+		if (client->ops && client->ops->suspend) {
+			err = client->ops->suspend(client);
+			if (err < 0)
+				goto unlock;
+		}
+	}
+
+	client->usecount--;
+	dev_dbg(client->dev, "use count: %u\n", client->usecount);
+
+	if (client->parent) {
+		err = host1x_client_suspend(client->parent);
+		if (err < 0)
+			goto resume;
+	}
+
+	goto unlock;
+
+resume:
+	if (client->usecount == 0)
+		if (client->ops && client->ops->resume)
+			client->ops->resume(client);
+
+	client->usecount++;
+unlock:
+	mutex_unlock(&client->lock);
+	return err;
+}
+EXPORT_SYMBOL(host1x_client_suspend);
+
+int host1x_client_resume(struct host1x_client *client)
+{
+	int err = 0;
+
+	mutex_lock(&client->lock);
+
+	if (client->parent) {
+		err = host1x_client_resume(client->parent);
+		if (err < 0)
+			goto unlock;
+	}
+
+	if (client->usecount == 0) {
+		if (client->ops && client->ops->resume) {
+			err = client->ops->resume(client);
+			if (err < 0)
+				goto suspend;
+		}
+	}
+
+	client->usecount++;
+	dev_dbg(client->dev, "use count: %u\n", client->usecount);
+
+	goto unlock;
+
+suspend:
+	if (client->parent)
+		host1x_client_suspend(client->parent);
+unlock:
+	mutex_unlock(&client->lock);
+	return err;
+}
+EXPORT_SYMBOL(host1x_client_resume);
diff --git a/drivers/gpu/host1x/bus.h b/drivers/gpu/host1x/bus.h
new file mode 100644
index 00000000..a4adf9ab
--- /dev/null
+++ b/drivers/gpu/host1x/bus.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012-2013, NVIDIA Corporation
+ */
+
+#ifndef HOST1X_BUS_H
+#define HOST1X_BUS_H
+
+struct bus_type;
+struct host1x;
+
+extern struct bus_type host1x_bus_type;
+
+int host1x_register(struct host1x *host1x);
+int host1x_unregister(struct host1x *host1x);
+
+#endif
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
new file mode 100644
index 00000000..8b9e7b4e
--- /dev/null
+++ b/drivers/gpu/host1x/cdma.c
@@ -0,0 +1,676 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Tegra host1x Command DMA
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ */
+
+
+#include <asm/cacheflush.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/host1x-next.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/kfifo.h>
+#include <linux/slab.h>
+#include <trace/events/host1x.h>
+
+#include "cdma.h"
+#include "channel.h"
+#include "dev.h"
+#include "debug.h"
+#include "job.h"
+
+/*
+ * push_buffer
+ *
+ * The push buffer is a circular array of words to be fetched by command DMA.
+ * Note that it works slightly differently to the sync queue; fence == pos
+ * means that the push buffer is full, not empty.
+ */
+
+/*
+ * Typically the commands written into the push buffer are a pair of words. We
+ * use slots to represent each of these pairs and to simplify things. Note the
+ * strange number of slots allocated here. 512 slots will fit exactly within a
+ * single memory page. We also need one additional word at the end of the push
+ * buffer for the RESTART opcode that will instruct the CDMA to jump back to
+ * the beginning of the push buffer. With 512 slots, this means that we'll use
+ * 2 memory pages and waste 4092 bytes of the second page that will never be
+ * used.
+ */
+#define HOST1X_PUSHBUFFER_SLOTS	511
+
+/*
+ * Clean up push buffer resources
+ */
+static void host1x_pushbuffer_destroy(struct push_buffer *pb)
+{
+	struct host1x_cdma *cdma = pb_to_cdma(pb);
+	struct host1x *host1x = cdma_to_host1x(cdma);
+
+	if (!pb->mapped)
+		return;
+
+	if (host1x->domain) {
+		iommu_unmap(host1x->domain, pb->dma, pb->alloc_size);
+		free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
+	}
+
+	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
+
+	pb->mapped = NULL;
+	pb->phys = 0;
+}
+
+/*
+ * Init push buffer resources
+ */
+static int host1x_pushbuffer_init(struct push_buffer *pb)
+{
+	struct host1x_cdma *cdma = pb_to_cdma(pb);
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct iova *alloc;
+	u32 size;
+	int err;
+
+	pb->mapped = NULL;
+	pb->phys = 0;
+	pb->size = HOST1X_PUSHBUFFER_SLOTS * 8;
+
+	size = pb->size + 4;
+
+	/* initialize buffer pointers */
+	pb->fence = pb->size - 8;
+	pb->pos = 0;
+
+	if (host1x->domain) {
+		unsigned long shift;
+
+		size = iova_align(&host1x->iova, size);
+
+		pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
+					  GFP_KERNEL);
+		if (!pb->mapped)
+			return -ENOMEM;
+
+		shift = iova_shift(&host1x->iova);
+		alloc = alloc_iova(&host1x->iova, size >> shift,
+				   host1x->iova_end >> shift, true);
+		if (!alloc) {
+			err = -ENOMEM;
+			goto iommu_free_mem;
+		}
+
+		pb->dma = iova_dma_addr(&host1x->iova, alloc);
+		err = iommu_map(host1x->domain, pb->dma, pb->phys, size,
+				IOMMU_READ);
+		if (err)
+			goto iommu_free_iova;
+	} else {
+		pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
+					  GFP_KERNEL);
+		if (!pb->mapped)
+			return -ENOMEM;
+
+		pb->dma = pb->phys;
+	}
+
+	pb->alloc_size = size;
+
+	host1x_hw_pushbuffer_init(host1x, pb);
+
+	return 0;
+
+iommu_free_iova:
+	__free_iova(&host1x->iova, alloc);
+iommu_free_mem:
+	dma_free_wc(host1x->dev, size, pb->mapped, pb->phys);
+
+	return err;
+}
+
+/*
+ * Push two words to the push buffer
+ * Caller must ensure push buffer is not full
+ */
+static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
+{
+	u32 *p = (u32 *)((void *)pb->mapped + pb->pos);
+
+	WARN_ON(pb->pos == pb->fence);
+	*(p++) = op1;
+	*(p++) = op2;
+	pb->pos += 8;
+
+	if (pb->pos >= pb->size)
+		pb->pos -= pb->size;
+}
+
+/*
+ * Pop a number of two word slots from the push buffer
+ * Caller must ensure push buffer is not empty
+ */
+static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
+{
+	/* Advance the next write position */
+	pb->fence += slots * 8;
+
+	if (pb->fence >= pb->size)
+		pb->fence -= pb->size;
+}
+
+/*
+ * Return the number of two word slots free in the push buffer
+ */
+static u32 host1x_pushbuffer_space(struct push_buffer *pb)
+{
+	unsigned int fence = pb->fence;
+
+	if (pb->fence < pb->pos)
+		fence += pb->size;
+
+	return (fence - pb->pos) / 8;
+}
+
+/*
+ * Sleep (if necessary) until the requested event happens
+ *   - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty.
+ *     - Returns 1
+ *   - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer
+ *     - Return the amount of space (> 0)
+ * Must be called with the cdma lock held.
+ */
+unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
+				     enum cdma_event event)
+{
+	for (;;) {
+		struct push_buffer *pb = &cdma->push_buffer;
+		unsigned int space;
+
+		switch (event) {
+		case CDMA_EVENT_SYNC_QUEUE_EMPTY:
+			space = list_empty(&cdma->sync_queue) ? 1 : 0;
+			break;
+
+		case CDMA_EVENT_PUSH_BUFFER_SPACE:
+			space = host1x_pushbuffer_space(pb);
+			break;
+
+		default:
+			WARN_ON(1);
+			return -EINVAL;
+		}
+
+		if (space)
+			return space;
+
+		trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev),
+				       event);
+
+		/* If somebody has managed to already start waiting, yield */
+		if (cdma->event != CDMA_EVENT_NONE) {
+			mutex_unlock(&cdma->lock);
+			schedule();
+			mutex_lock(&cdma->lock);
+			continue;
+		}
+
+		cdma->event = event;
+
+		mutex_unlock(&cdma->lock);
+		wait_for_completion(&cdma->complete);
+		mutex_lock(&cdma->lock);
+	}
+
+	return 0;
+}
+
+/*
+ * Sleep (if necessary) until the push buffer has enough free space.
+ *
+ * Must be called with the cdma lock held.
+ */
+static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x,
+					     struct host1x_cdma *cdma,
+					     unsigned int needed)
+{
+	while (true) {
+		struct push_buffer *pb = &cdma->push_buffer;
+		unsigned int space;
+
+		space = host1x_pushbuffer_space(pb);
+		if (space >= needed)
+			break;
+
+		trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev),
+				       CDMA_EVENT_PUSH_BUFFER_SPACE);
+
+		host1x_hw_cdma_flush(host1x, cdma);
+
+		/* If somebody has managed to already start waiting, yield */
+		if (cdma->event != CDMA_EVENT_NONE) {
+			mutex_unlock(&cdma->lock);
+			schedule();
+			mutex_lock(&cdma->lock);
+			continue;
+		}
+
+		cdma->event = CDMA_EVENT_PUSH_BUFFER_SPACE;
+
+		mutex_unlock(&cdma->lock);
+		wait_for_completion(&cdma->complete);
+		mutex_lock(&cdma->lock);
+	}
+
+	return 0;
+}
+/*
+ * Start timer that tracks the time spent by the job.
+ * Must be called with the cdma lock held.
+ */
+static void cdma_start_timer_locked(struct host1x_cdma *cdma,
+				    struct host1x_job *job)
+{
+	if (cdma->timeout.client) {
+		/* timer already started */
+		return;
+	}
+
+	cdma->timeout.client = job->client;
+	cdma->timeout.syncpt = job->syncpt;
+	cdma->timeout.syncpt_val = job->syncpt_end;
+	cdma->timeout.start_ktime = ktime_get();
+
+	schedule_delayed_work(&cdma->timeout.wq,
+			      msecs_to_jiffies(job->timeout));
+}
+
+/*
+ * Stop timer when a buffer submission completes.
+ * Must be called with the cdma lock held.
+ */
+static void stop_cdma_timer_locked(struct host1x_cdma *cdma)
+{
+	cancel_delayed_work(&cdma->timeout.wq);
+	cdma->timeout.client = NULL;
+}
+
+/*
+ * For all sync queue entries that have already finished according to the
+ * current sync point registers:
+ *  - unpin & unref their mems
+ *  - pop their push buffer slots
+ *  - remove them from the sync queue
+ * This is normally called from the host code's worker thread, but can be
+ * called manually if necessary.
+ * Must be called with the cdma lock held.
+ */
+static void update_cdma_locked(struct host1x_cdma *cdma)
+{
+	bool signal = false;
+	struct host1x_job *job, *n;
+
+	/*
+	 * Walk the sync queue, reading the sync point registers as necessary,
+	 * to consume as many sync queue entries as possible without blocking
+	 */
+	list_for_each_entry_safe(job, n, &cdma->sync_queue, list) {
+		struct host1x_syncpt *sp = job->syncpt;
+
+		/* Check whether this syncpt has completed, and bail if not */
+		if (!host1x_syncpt_is_expired(sp, job->syncpt_end) &&
+		    !job->cancelled) {
+			/* Start timer on next pending syncpt */
+			if (job->timeout)
+				cdma_start_timer_locked(cdma, job);
+
+			break;
+		}
+
+		/* Cancel timeout, when a buffer completes */
+		if (cdma->timeout.client)
+			stop_cdma_timer_locked(cdma);
+
+		/* Unpin the memory */
+		host1x_job_unpin(job);
+
+		/* Pop push buffer slots */
+		if (job->num_slots) {
+			struct push_buffer *pb = &cdma->push_buffer;
+
+			host1x_pushbuffer_pop(pb, job->num_slots);
+
+			if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE)
+				signal = true;
+		}
+
+		list_del(&job->list);
+		host1x_job_put(job);
+	}
+
+	if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY &&
+	    list_empty(&cdma->sync_queue))
+		signal = true;
+
+	if (signal) {
+		cdma->event = CDMA_EVENT_NONE;
+		complete(&cdma->complete);
+	}
+}
+
+void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
+				   struct device *dev)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	u32 restart_addr, syncpt_incrs, syncpt_val;
+	struct host1x_job *job, *next_job = NULL;
+
+	syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt);
+
+	dev_dbg(dev, "%s: starting cleanup (thresh %d)\n",
+		__func__, syncpt_val);
+
+	/*
+	 * Move the sync_queue read pointer to the first entry that hasn't
+	 * completed based on the current HW syncpt value. It's likely there
+	 * won't be any (i.e. we're still at the head), but covers the case
+	 * where a syncpt incr happens just prior/during the teardown.
+	 */
+
+	dev_dbg(dev, "%s: skip completed buffers still in sync_queue\n",
+		__func__);
+
+	list_for_each_entry(job, &cdma->sync_queue, list) {
+		if (syncpt_val < job->syncpt_end) {
+
+			if (!list_is_last(&job->list, &cdma->sync_queue))
+				next_job = list_next_entry(job, list);
+
+			goto syncpt_incr;
+		}
+
+		host1x_job_dump(dev, job);
+	}
+
+	/* all jobs have been completed */
+	job = NULL;
+
+syncpt_incr:
+
+	/*
+	 * Increment with CPU the remaining syncpts of a partially executed job.
+	 *
+	 * CDMA will continue execution starting with the next job or will get
+	 * into idle state.
+	 */
+	if (next_job)
+		restart_addr = next_job->first_get;
+	else
+		restart_addr = cdma->last_pos;
+
+	if (!job)
+		goto resume;
+
+	/* do CPU increments for the remaining syncpts */
+	if (job->syncpt_recovery) {
+		dev_dbg(dev, "%s: perform CPU incr on pending buffers\n",
+			__func__);
+
+		/* won't need a timeout when replayed */
+		job->timeout = 0;
+
+		syncpt_incrs = job->syncpt_end - syncpt_val;
+		dev_dbg(dev, "%s: CPU incr (%d)\n", __func__, syncpt_incrs);
+
+		host1x_job_dump(dev, job);
+
+		/* safe to use CPU to incr syncpts */
+		host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get,
+						syncpt_incrs, job->syncpt_end,
+						job->num_slots);
+
+		dev_dbg(dev, "%s: finished sync_queue modification\n",
+			__func__);
+	} else {
+		struct host1x_job *failed_job = job;
+
+		host1x_job_dump(dev, job);
+
+		host1x_syncpt_set_locked(job->syncpt);
+		failed_job->cancelled = true;
+
+		list_for_each_entry_continue(job, &cdma->sync_queue, list) {
+			unsigned int i;
+
+			if (job->syncpt != failed_job->syncpt)
+				continue;
+
+			for (i = 0; i < job->num_slots; i++) {
+				unsigned int slot = (job->first_get/8 + i) %
+						    HOST1X_PUSHBUFFER_SLOTS;
+				u32 *mapped = cdma->push_buffer.mapped;
+
+				/*
+				 * Overwrite opcodes with 0 word writes to
+				 * to offset 0xbad. This does nothing but
+				 * has a easily detected signature in debug
+				 * traces.
+				 */
+				mapped[2*slot+0] = 0x1bad0000;
+				mapped[2*slot+1] = 0x1bad0000;
+			}
+
+			job->cancelled = true;
+		}
+
+		wmb();
+
+		update_cdma_locked(cdma);
+	}
+
+resume:
+	/* roll back DMAGET and start up channel again */
+	host1x_hw_cdma_resume(host1x, cdma, restart_addr);
+}
+
+/*
+ * Create a cdma
+ */
+int host1x_cdma_init(struct host1x_cdma *cdma)
+{
+	int err;
+
+	mutex_init(&cdma->lock);
+	init_completion(&cdma->complete);
+
+	INIT_LIST_HEAD(&cdma->sync_queue);
+
+	cdma->event = CDMA_EVENT_NONE;
+	cdma->running = false;
+	cdma->torndown = false;
+
+	err = host1x_pushbuffer_init(&cdma->push_buffer);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/*
+ * Destroy a cdma
+ */
+int host1x_cdma_deinit(struct host1x_cdma *cdma)
+{
+	struct push_buffer *pb = &cdma->push_buffer;
+	struct host1x *host1x = cdma_to_host1x(cdma);
+
+	if (cdma->running) {
+		pr_warn("%s: CDMA still running\n", __func__);
+		return -EBUSY;
+	}
+
+	host1x_pushbuffer_destroy(pb);
+	host1x_hw_cdma_timeout_destroy(host1x, cdma);
+
+	return 0;
+}
+
+/*
+ * Begin a cdma submit
+ */
+int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+
+	mutex_lock(&cdma->lock);
+
+	/*
+	 * Check if syncpoint was locked due to previous job timeout.
+	 * This needs to be done within the cdma lock to avoid a race
+	 * with the timeout handler.
+	 */
+	if (job->syncpt->locked) {
+		mutex_unlock(&cdma->lock);
+		return -EPERM;
+	}
+
+	if (job->timeout) {
+		/* init state on first submit with timeout value */
+		if (!cdma->timeout.initialized) {
+			int err;
+
+			err = host1x_hw_cdma_timeout_init(host1x, cdma);
+			if (err) {
+				mutex_unlock(&cdma->lock);
+				return err;
+			}
+		}
+	}
+
+	if (!cdma->running)
+		host1x_hw_cdma_start(host1x, cdma);
+
+	cdma->slots_free = 0;
+	cdma->slots_used = 0;
+	cdma->first_get = cdma->push_buffer.pos;
+
+	trace_host1x_cdma_begin(dev_name(job->channel->dev));
+	return 0;
+}
+
+/*
+ * Push two words into a push buffer slot
+ * Blocks as necessary if the push buffer is full.
+ */
+void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct push_buffer *pb = &cdma->push_buffer;
+	u32 slots_free = cdma->slots_free;
+
+	if (host1x_debug_trace_cmdbuf)
+		trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev),
+				       op1, op2);
+
+	if (slots_free == 0) {
+		host1x_hw_cdma_flush(host1x, cdma);
+		slots_free = host1x_cdma_wait_locked(cdma,
+						CDMA_EVENT_PUSH_BUFFER_SPACE);
+	}
+
+	cdma->slots_free = slots_free - 1;
+	cdma->slots_used++;
+	host1x_pushbuffer_push(pb, op1, op2);
+}
+
+/*
+ * Push four words into two consecutive push buffer slots. Note that extra
+ * care needs to be taken not to split the two slots across the end of the
+ * push buffer. Otherwise the RESTART opcode at the end of the push buffer
+ * that ensures processing will restart at the beginning will break up the
+ * four words.
+ *
+ * Blocks as necessary if the push buffer is full.
+ */
+void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
+			   u32 op3, u32 op4)
+{
+	struct host1x_channel *channel = cdma_to_channel(cdma);
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct push_buffer *pb = &cdma->push_buffer;
+	unsigned int needed = 2, extra = 0, i;
+	unsigned int space = cdma->slots_free;
+
+	if (host1x_debug_trace_cmdbuf)
+		trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2,
+					    op3, op4);
+
+	/* compute number of extra slots needed for padding */
+	if (pb->pos + 16 > pb->size) {
+		extra = (pb->size - pb->pos) / 8;
+		needed += extra;
+	}
+
+	host1x_cdma_wait_pushbuffer_space(host1x, cdma, needed);
+	space = host1x_pushbuffer_space(pb);
+
+	cdma->slots_free = space - needed;
+	cdma->slots_used += needed;
+
+	/*
+	 * Note that we rely on the fact that this is only used to submit wide
+	 * gather opcodes, which consist of 3 words, and they are padded with
+	 * a NOP to avoid having to deal with fractional slots (a slot always
+	 * represents 2 words). The fourth opcode passed to this function will
+	 * therefore always be a NOP.
+	 *
+	 * This works around a slight ambiguity when it comes to opcodes. For
+	 * all current host1x incarnations the NOP opcode uses the exact same
+	 * encoding (0x20000000), so we could hard-code the value here, but a
+	 * new incarnation may change it and break that assumption.
+	 */
+	for (i = 0; i < extra; i++)
+		host1x_pushbuffer_push(pb, op4, op4);
+
+	host1x_pushbuffer_push(pb, op1, op2);
+	host1x_pushbuffer_push(pb, op3, op4);
+}
+
+/*
+ * End a cdma submit
+ * Kick off DMA, add job to the sync queue, and a number of slots to be freed
+ * from the pushbuffer. The handles for a submit must all be pinned at the same
+ * time, but they can be unpinned in smaller chunks.
+ */
+void host1x_cdma_end(struct host1x_cdma *cdma,
+		     struct host1x_job *job)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	bool idle = list_empty(&cdma->sync_queue);
+
+	host1x_hw_cdma_flush(host1x, cdma);
+
+	job->first_get = cdma->first_get;
+	job->num_slots = cdma->slots_used;
+	host1x_job_get(job);
+	list_add_tail(&job->list, &cdma->sync_queue);
+
+	/* start timer on idle -> active transitions */
+	if (job->timeout && idle)
+		cdma_start_timer_locked(cdma, job);
+
+	trace_host1x_cdma_end(dev_name(job->channel->dev));
+	mutex_unlock(&cdma->lock);
+}
+
+/*
+ * Update cdma state according to current sync point values
+ */
+void host1x_cdma_update(struct host1x_cdma *cdma)
+{
+	mutex_lock(&cdma->lock);
+	update_cdma_locked(cdma);
+	mutex_unlock(&cdma->lock);
+}
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
new file mode 100644
index 00000000..12c4327c
--- /dev/null
+++ b/drivers/gpu/host1x/cdma.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Command DMA
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_CDMA_H
+#define __HOST1X_CDMA_H
+
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/list.h>
+
+struct host1x_syncpt;
+struct host1x_userctx_timeout;
+struct host1x_job;
+
+/*
+ * cdma
+ *
+ * This is in charge of a host command DMA channel.
+ * Sends ops to a push buffer, and takes responsibility for unpinning
+ * (& possibly freeing) of memory after those ops have completed.
+ * Producer:
+ *	begin
+ *		push - send ops to the push buffer
+ *	end - start command DMA and enqueue handles to be unpinned
+ * Consumer:
+ *	update - call to update sync queue and push buffer, unpin memory
+ */
+
+struct push_buffer {
+	void *mapped;			/* mapped pushbuffer memory */
+	dma_addr_t dma;			/* device address of pushbuffer */
+	dma_addr_t phys;		/* physical address of pushbuffer */
+	u32 fence;			/* index we've written */
+	u32 pos;			/* index to write to */
+	u32 size;
+	u32 alloc_size;
+};
+
+struct buffer_timeout {
+	struct delayed_work wq;		/* work queue */
+	bool initialized;		/* timer one-time setup flag */
+	struct host1x_syncpt *syncpt;	/* buffer completion syncpt */
+	u32 syncpt_val;			/* syncpt value when completed */
+	ktime_t start_ktime;		/* starting time */
+	/* context timeout information */
+	struct host1x_client *client;
+};
+
+enum cdma_event {
+	CDMA_EVENT_NONE,		/* not waiting for any event */
+	CDMA_EVENT_SYNC_QUEUE_EMPTY,	/* wait for empty sync queue */
+	CDMA_EVENT_PUSH_BUFFER_SPACE	/* wait for space in push buffer */
+};
+
+struct host1x_cdma {
+	struct mutex lock;		/* controls access to shared state */
+	struct completion complete;	/* signalled when event occurs */
+	enum cdma_event event;		/* event that complete is waiting for */
+	unsigned int slots_used;	/* pb slots used in current submit */
+	unsigned int slots_free;	/* pb slots free in current submit */
+	unsigned int first_get;		/* DMAGET value, where submit begins */
+	unsigned int last_pos;		/* last value written to DMAPUT */
+	struct push_buffer push_buffer;	/* channel's push buffer */
+	struct list_head sync_queue;	/* job queue */
+	struct buffer_timeout timeout;	/* channel's timeout state/wq */
+	bool running;
+	bool torndown;
+};
+
+#define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma)
+#define cdma_to_host1x(cdma) dev_get_drvdata(cdma_to_channel(cdma)->dev->parent)
+#define pb_to_cdma(pb) container_of(pb, struct host1x_cdma, push_buffer)
+
+int host1x_cdma_init(struct host1x_cdma *cdma);
+int host1x_cdma_deinit(struct host1x_cdma *cdma);
+int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job);
+void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2);
+void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
+			   u32 op3, u32 op4);
+void host1x_cdma_end(struct host1x_cdma *cdma, struct host1x_job *job);
+void host1x_cdma_update(struct host1x_cdma *cdma);
+void host1x_cdma_peek(struct host1x_cdma *cdma, u32 dmaget, int slot,
+		      u32 *out);
+unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
+				     enum cdma_event event);
+void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
+				   struct device *dev);
+#endif
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
new file mode 100644
index 00000000..4cd212bb
--- /dev/null
+++ b/drivers/gpu/host1x/channel.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Tegra host1x Channel
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include "channel.h"
+#include "dev.h"
+#include "job.h"
+
+/* Constructor for the host1x device list */
+int host1x_channel_list_init(struct host1x_channel_list *chlist,
+			     unsigned int num_channels)
+{
+	chlist->channels = kcalloc(num_channels, sizeof(struct host1x_channel),
+				   GFP_KERNEL);
+	if (!chlist->channels)
+		return -ENOMEM;
+
+	chlist->allocated_channels =
+		kcalloc(BITS_TO_LONGS(num_channels), sizeof(unsigned long),
+			GFP_KERNEL);
+	if (!chlist->allocated_channels) {
+		kfree(chlist->channels);
+		return -ENOMEM;
+	}
+
+	bitmap_zero(chlist->allocated_channels, num_channels);
+
+	return 0;
+}
+
+void host1x_channel_list_free(struct host1x_channel_list *chlist)
+{
+	kfree(chlist->allocated_channels);
+	kfree(chlist->channels);
+}
+
+int host1x_job_submit(struct host1x_job *job)
+{
+	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
+
+	return host1x_hw_channel_submit(host, job);
+}
+EXPORT_SYMBOL(host1x_job_submit);
+
+struct host1x_channel *host1x_channel_get(struct host1x_channel *channel)
+{
+	kref_get(&channel->refcount);
+
+	return channel;
+}
+EXPORT_SYMBOL(host1x_channel_get);
+
+/**
+ * host1x_channel_get_index() - Attempt to get channel reference by index
+ * @host: Host1x device object
+ * @index: Index of channel
+ *
+ * If channel number @index is currently allocated, increase its refcount
+ * and return a pointer to it. Otherwise, return NULL.
+ */
+struct host1x_channel *host1x_channel_get_index(struct host1x *host,
+						unsigned int index)
+{
+	struct host1x_channel *ch = &host->channel_list.channels[index];
+
+	if (!kref_get_unless_zero(&ch->refcount))
+		return NULL;
+
+	return ch;
+}
+
+static void release_channel(struct kref *kref)
+{
+	struct host1x_channel *channel =
+		container_of(kref, struct host1x_channel, refcount);
+	struct host1x *host = dev_get_drvdata(channel->dev->parent);
+	struct host1x_channel_list *chlist = &host->channel_list;
+
+	host1x_hw_cdma_stop(host, &channel->cdma);
+	host1x_cdma_deinit(&channel->cdma);
+
+	clear_bit(channel->id, chlist->allocated_channels);
+}
+
+void host1x_channel_put(struct host1x_channel *channel)
+{
+	kref_put(&channel->refcount, release_channel);
+}
+EXPORT_SYMBOL(host1x_channel_put);
+
+static struct host1x_channel *acquire_unused_channel(struct host1x *host)
+{
+	struct host1x_channel_list *chlist = &host->channel_list;
+	unsigned int max_channels = host->info->nb_channels;
+	unsigned int index;
+
+	index = find_first_zero_bit(chlist->allocated_channels, max_channels);
+	if (index >= max_channels) {
+		dev_err(host->dev, "failed to find free channel\n");
+		return NULL;
+	}
+
+	chlist->channels[index].id = index;
+
+	set_bit(index, chlist->allocated_channels);
+
+	return &chlist->channels[index];
+}
+
+/**
+ * host1x_channel_request() - Allocate a channel
+ * @client: Host1x client this channel will be used to send commands to
+ *
+ * Allocates a new host1x channel for @client. May return NULL if CDMA
+ * initialization fails.
+ */
+struct host1x_channel *host1x_channel_request(struct host1x_client *client)
+{
+	struct host1x *host = dev_get_drvdata(client->dev->parent);
+	struct host1x_channel_list *chlist = &host->channel_list;
+	struct host1x_channel *channel;
+	int err;
+
+	channel = acquire_unused_channel(host);
+	if (!channel)
+		return NULL;
+
+	kref_init(&channel->refcount);
+	mutex_init(&channel->submitlock);
+	channel->client = client;
+	channel->dev = client->dev;
+
+	err = host1x_hw_channel_init(host, channel, channel->id);
+	if (err < 0)
+		goto fail;
+
+	err = host1x_cdma_init(&channel->cdma);
+	if (err < 0)
+		goto fail;
+
+	return channel;
+
+fail:
+	clear_bit(channel->id, chlist->allocated_channels);
+
+	dev_err(client->dev, "failed to initialize channel\n");
+
+	return NULL;
+}
+EXPORT_SYMBOL(host1x_channel_request);
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
new file mode 100644
index 00000000..39044ff6
--- /dev/null
+++ b/drivers/gpu/host1x/channel.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Channel
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_CHANNEL_H
+#define __HOST1X_CHANNEL_H
+
+#include <linux/io.h>
+#include <linux/kref.h>
+
+#include "cdma.h"
+
+struct host1x;
+struct host1x_channel;
+
+struct host1x_channel_list {
+	struct host1x_channel *channels;
+	unsigned long *allocated_channels;
+};
+
+struct host1x_channel {
+	struct kref refcount;
+	unsigned int id;
+	struct mutex submitlock;
+	void __iomem *regs;
+	struct host1x_client *client;
+	struct device *dev;
+	struct host1x_cdma cdma;
+};
+
+/* channel list operations */
+int host1x_channel_list_init(struct host1x_channel_list *chlist,
+			     unsigned int num_channels);
+void host1x_channel_list_free(struct host1x_channel_list *chlist);
+struct host1x_channel *host1x_channel_get_index(struct host1x *host,
+						unsigned int index);
+
+#endif
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
new file mode 100644
index 00000000..8a14880c
--- /dev/null
+++ b/drivers/gpu/host1x/debug.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Erik Gilling <konkers@android.com>
+ *
+ * Copyright (C) 2011-2013 NVIDIA Corporation
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+#include <linux/io.h>
+
+#include "dev.h"
+#include "debug.h"
+#include "channel.h"
+
+static DEFINE_MUTEX(debug_lock);
+
+unsigned int host1x_debug_trace_cmdbuf;
+
+static pid_t host1x_debug_force_timeout_pid;
+static u32 host1x_debug_force_timeout_val;
+static u32 host1x_debug_force_timeout_channel;
+
+void host1x_debug_output(struct output *o, const char *fmt, ...)
+{
+	va_list args;
+	int len;
+
+	va_start(args, fmt);
+	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+	va_end(args);
+
+	o->fn(o->ctx, o->buf, len, false);
+}
+
+void host1x_debug_cont(struct output *o, const char *fmt, ...)
+{
+	va_list args;
+	int len;
+
+	va_start(args, fmt);
+	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+	va_end(args);
+
+	o->fn(o->ctx, o->buf, len, true);
+}
+
+static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
+{
+	struct host1x *m = dev_get_drvdata(ch->dev->parent);
+	struct output *o = data;
+
+	mutex_lock(&ch->cdma.lock);
+	mutex_lock(&debug_lock);
+
+	if (show_fifo)
+		host1x_hw_show_channel_fifo(m, ch, o);
+
+	host1x_hw_show_channel_cdma(m, ch, o);
+
+	mutex_unlock(&debug_lock);
+	mutex_unlock(&ch->cdma.lock);
+
+	return 0;
+}
+
+static void show_syncpts(struct host1x *m, struct output *o)
+{
+	struct list_head *pos;
+	unsigned int i;
+
+	host1x_debug_output(o, "---- syncpts ----\n");
+
+	for (i = 0; i < host1x_syncpt_nb_pts(m); i++) {
+		u32 max = host1x_syncpt_read_max(m->syncpt + i);
+		u32 min = host1x_syncpt_load(m->syncpt + i);
+		unsigned int waiters = 0;
+
+		spin_lock(&m->syncpt[i].intr.lock);
+		list_for_each(pos, &m->syncpt[i].intr.wait_head)
+			waiters++;
+		spin_unlock(&m->syncpt[i].intr.lock);
+
+		if (!min && !max && !waiters)
+			continue;
+
+		host1x_debug_output(o,
+				    "id %u (%s) min %d max %d (%d waiters)\n",
+				    i, m->syncpt[i].name, min, max, waiters);
+	}
+
+	for (i = 0; i < host1x_syncpt_nb_bases(m); i++) {
+		u32 base_val;
+
+		base_val = host1x_syncpt_load_wait_base(m->syncpt + i);
+		if (base_val)
+			host1x_debug_output(o, "waitbase id %u val %d\n", i,
+					    base_val);
+	}
+
+	host1x_debug_output(o, "\n");
+}
+
+static void show_all(struct host1x *m, struct output *o, bool show_fifo)
+{
+	unsigned int i;
+
+	host1x_hw_show_mlocks(m, o);
+	show_syncpts(m, o);
+	host1x_debug_output(o, "---- channels ----\n");
+
+	for (i = 0; i < m->info->nb_channels; ++i) {
+		struct host1x_channel *ch = host1x_channel_get_index(m, i);
+
+		if (ch) {
+			show_channel(ch, o, show_fifo);
+			host1x_channel_put(ch);
+		}
+	}
+}
+
+static int host1x_debug_show_all(struct seq_file *s, void *unused)
+{
+	struct output o = {
+		.fn = write_to_seqfile,
+		.ctx = s
+	};
+
+	show_all(s->private, &o, true);
+
+	return 0;
+}
+
+static int host1x_debug_show(struct seq_file *s, void *unused)
+{
+	struct output o = {
+		.fn = write_to_seqfile,
+		.ctx = s
+	};
+
+	show_all(s->private, &o, false);
+
+	return 0;
+}
+
+static int host1x_debug_open_all(struct inode *inode, struct file *file)
+{
+	return single_open(file, host1x_debug_show_all, inode->i_private);
+}
+
+static const struct file_operations host1x_debug_all_fops = {
+	.open = host1x_debug_open_all,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int host1x_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, host1x_debug_show, inode->i_private);
+}
+
+static const struct file_operations host1x_debug_fops = {
+	.open = host1x_debug_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void host1x_debugfs_init(struct host1x *host1x)
+{
+	struct dentry *de = debugfs_create_dir("tegra-host1x", NULL);
+
+	/* Store the created entry */
+	host1x->debugfs = de;
+
+	debugfs_create_file("status", S_IRUGO, de, host1x, &host1x_debug_fops);
+	debugfs_create_file("status_all", S_IRUGO, de, host1x,
+			    &host1x_debug_all_fops);
+
+	debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, de,
+			   &host1x_debug_trace_cmdbuf);
+
+	host1x_hw_debug_init(host1x, de);
+
+	debugfs_create_u32("force_timeout_pid", S_IRUGO|S_IWUSR, de,
+			   &host1x_debug_force_timeout_pid);
+	debugfs_create_u32("force_timeout_val", S_IRUGO|S_IWUSR, de,
+			   &host1x_debug_force_timeout_val);
+	debugfs_create_u32("force_timeout_channel", S_IRUGO|S_IWUSR, de,
+			   &host1x_debug_force_timeout_channel);
+}
+
+static void host1x_debugfs_exit(struct host1x *host1x)
+{
+	debugfs_remove_recursive(host1x->debugfs);
+}
+
+void host1x_debug_init(struct host1x *host1x)
+{
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
+		host1x_debugfs_init(host1x);
+}
+
+void host1x_debug_deinit(struct host1x *host1x)
+{
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
+		host1x_debugfs_exit(host1x);
+}
+
+void host1x_debug_dump(struct host1x *host1x)
+{
+	struct output o = {
+		.fn = write_to_printk
+	};
+
+	show_all(host1x, &o, true);
+}
+
+void host1x_debug_dump_syncpts(struct host1x *host1x)
+{
+	struct output o = {
+		.fn = write_to_printk
+	};
+
+	show_syncpts(host1x, &o);
+}
diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h
new file mode 100644
index 00000000..62bd8a09
--- /dev/null
+++ b/drivers/gpu/host1x/debug.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Debug
+ *
+ * Copyright (c) 2011-2013 NVIDIA Corporation.
+ */
+#ifndef __HOST1X_DEBUG_H
+#define __HOST1X_DEBUG_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+struct host1x;
+
+struct output {
+	void (*fn)(void *ctx, const char *str, size_t len, bool cont);
+	void *ctx;
+	char buf[256];
+};
+
+static inline void write_to_seqfile(void *ctx, const char *str, size_t len,
+				    bool cont)
+{
+	seq_write((struct seq_file *)ctx, str, len);
+}
+
+static inline void write_to_printk(void *ctx, const char *str, size_t len,
+				   bool cont)
+{
+	if (cont)
+		pr_cont("%s", str);
+	else
+		pr_info("%s", str);
+}
+
+void __printf(2, 3) host1x_debug_output(struct output *o, const char *fmt, ...);
+void __printf(2, 3) host1x_debug_cont(struct output *o, const char *fmt, ...);
+
+extern unsigned int host1x_debug_trace_cmdbuf;
+
+void host1x_debug_init(struct host1x *host1x);
+void host1x_debug_deinit(struct host1x *host1x);
+void host1x_debug_dump(struct host1x *host1x);
+void host1x_debug_dump_syncpts(struct host1x *host1x);
+
+#endif
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
new file mode 100644
index 00000000..8b50fbb2
--- /dev/null
+++ b/drivers/gpu/host1x/dev.c
@@ -0,0 +1,581 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Tegra host1x driver
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/host1x.h>
+#undef CREATE_TRACE_POINTS
+
+#include "bus.h"
+#include "channel.h"
+#include "debug.h"
+#include "dev.h"
+#include "intr.h"
+
+#include "hw/host1x01.h"
+#include "hw/host1x02.h"
+#include "hw/host1x04.h"
+#include "hw/host1x05.h"
+#include "hw/host1x06.h"
+#include "hw/host1x07.h"
+
+void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r)
+{
+	writel(v, host1x->hv_regs + r);
+}
+
+u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r)
+{
+	return readl(host1x->hv_regs + r);
+}
+
+void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r)
+{
+	void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset;
+
+	writel(v, sync_regs + r);
+}
+
+u32 host1x_sync_readl(struct host1x *host1x, u32 r)
+{
+	void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset;
+
+	return readl(sync_regs + r);
+}
+
+void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r)
+{
+	writel(v, ch->regs + r);
+}
+
+u32 host1x_ch_readl(struct host1x_channel *ch, u32 r)
+{
+	return readl(ch->regs + r);
+}
+
+static const struct host1x_info host1x01_info = {
+	.nb_channels = 8,
+	.nb_pts = 32,
+	.nb_mlocks = 16,
+	.nb_bases = 8,
+	.init = host1x01_init,
+	.sync_offset = 0x3000,
+	.dma_mask = DMA_BIT_MASK(32),
+	.has_wide_gather = false,
+	.has_hypervisor = false,
+	.num_sid_entries = 0,
+	.sid_table = NULL,
+	.reserve_vblank_syncpts = true,
+};
+
+static const struct host1x_info host1x02_info = {
+	.nb_channels = 9,
+	.nb_pts = 32,
+	.nb_mlocks = 16,
+	.nb_bases = 12,
+	.init = host1x02_init,
+	.sync_offset = 0x3000,
+	.dma_mask = DMA_BIT_MASK(32),
+	.has_wide_gather = false,
+	.has_hypervisor = false,
+	.num_sid_entries = 0,
+	.sid_table = NULL,
+	.reserve_vblank_syncpts = true,
+};
+
+static const struct host1x_info host1x04_info = {
+	.nb_channels = 12,
+	.nb_pts = 192,
+	.nb_mlocks = 16,
+	.nb_bases = 64,
+	.init = host1x04_init,
+	.sync_offset = 0x2100,
+	.dma_mask = DMA_BIT_MASK(34),
+	.has_wide_gather = false,
+	.has_hypervisor = false,
+	.num_sid_entries = 0,
+	.sid_table = NULL,
+	.reserve_vblank_syncpts = false,
+};
+
+static const struct host1x_info host1x05_info = {
+	.nb_channels = 14,
+	.nb_pts = 192,
+	.nb_mlocks = 16,
+	.nb_bases = 64,
+	.init = host1x05_init,
+	.sync_offset = 0x2100,
+	.dma_mask = DMA_BIT_MASK(34),
+	.has_wide_gather = false,
+	.has_hypervisor = false,
+	.num_sid_entries = 0,
+	.sid_table = NULL,
+	.reserve_vblank_syncpts = false,
+};
+
+static const struct host1x_sid_entry tegra186_sid_table[] = {
+	{
+		/* VIC */
+		.base = 0x1af0,
+		.offset = 0x30,
+		.limit = 0x34
+	},
+};
+
+static const struct host1x_info host1x06_info = {
+	.nb_channels = 63,
+	.nb_pts = 576,
+	.nb_mlocks = 24,
+	.nb_bases = 16,
+	.init = host1x06_init,
+	.sync_offset = 0x0,
+	.dma_mask = DMA_BIT_MASK(40),
+	.has_wide_gather = true,
+	.has_hypervisor = true,
+	.num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
+	.sid_table = tegra186_sid_table,
+	.reserve_vblank_syncpts = false,
+};
+
+static const struct host1x_sid_entry tegra194_sid_table[] = {
+	{
+		/* VIC */
+		.base = 0x1af0,
+		.offset = 0x30,
+		.limit = 0x34
+	},
+};
+
+static const struct host1x_info host1x07_info = {
+	.nb_channels = 63,
+	.nb_pts = 704,
+	.nb_mlocks = 32,
+	.nb_bases = 0,
+	.init = host1x07_init,
+	.sync_offset = 0x0,
+	.dma_mask = DMA_BIT_MASK(40),
+	.has_wide_gather = true,
+	.has_hypervisor = true,
+	.num_sid_entries = ARRAY_SIZE(tegra194_sid_table),
+	.sid_table = tegra194_sid_table,
+	.reserve_vblank_syncpts = false,
+};
+
+static const struct of_device_id host1x_of_match[] = {
+	{ .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, },
+	{ .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, },
+	{ .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, },
+	{ .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, },
+	{ .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, },
+	{ .compatible = "nvidia,tegra30-host1x", .data = &host1x01_info, },
+	{ .compatible = "nvidia,tegra20-host1x", .data = &host1x01_info, },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, host1x_of_match);
+
+static void host1x_setup_sid_table(struct host1x *host)
+{
+	const struct host1x_info *info = host->info;
+	unsigned int i;
+
+	for (i = 0; i < info->num_sid_entries; i++) {
+		const struct host1x_sid_entry *entry = &info->sid_table[i];
+
+		host1x_hypervisor_writel(host, entry->offset, entry->base);
+		host1x_hypervisor_writel(host, entry->limit, entry->base + 4);
+	}
+}
+
+static bool host1x_wants_iommu(struct host1x *host1x)
+{
+	/*
+	 * If we support addressing a maximum of 32 bits of physical memory
+	 * and if the host1x firewall is enabled, there's no need to enable
+	 * IOMMU support. This can happen for example on Tegra20, Tegra30
+	 * and Tegra114.
+	 *
+	 * Tegra124 and later can address up to 34 bits of physical memory and
+	 * many platforms come equipped with more than 2 GiB of system memory,
+	 * which requires crossing the 4 GiB boundary. But there's a catch: on
+	 * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can
+	 * only address up to 32 bits of memory in GATHER opcodes, which means
+	 * that command buffers need to either be in the first 2 GiB of system
+	 * memory (which could quickly lead to memory exhaustion), or command
+	 * buffers need to be treated differently from other buffers (which is
+	 * not possible with the current ABI).
+	 *
+	 * A third option is to use the IOMMU in these cases to make sure all
+	 * buffers will be mapped into a 32-bit IOVA space that host1x can
+	 * address. This allows all of the system memory to be used and works
+	 * within the limitations of the host1x on these SoCs.
+	 *
+	 * In summary, default to enable IOMMU on Tegra124 and later. For any
+	 * of the earlier SoCs, only use the IOMMU for additional safety when
+	 * the host1x firewall is disabled.
+	 */
+	if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) {
+		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+			return false;
+	}
+
+	return true;
+}
+
+static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
+{
+	struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
+	int err;
+
+	/*
+	 * We may not always want to enable IOMMU support (for example if the
+	 * host1x firewall is already enabled and we don't support addressing
+	 * more than 32 bits of physical memory), so check for that first.
+	 *
+	 * Similarly, if host1x is already attached to an IOMMU (via the DMA
+	 * API), don't try to attach again.
+	 */
+	if (!host1x_wants_iommu(host) || domain)
+		return domain;
+
+	host->group = iommu_group_get(host->dev);
+	if (host->group) {
+		struct iommu_domain_geometry *geometry;
+		dma_addr_t start, end;
+		unsigned long order;
+
+		err = iova_cache_get();
+		if (err < 0)
+			goto put_group;
+
+		host->domain = iommu_domain_alloc(&platform_bus_type);
+		if (!host->domain) {
+			err = -ENOMEM;
+			goto put_cache;
+		}
+
+		err = iommu_attach_group(host->domain, host->group);
+		if (err) {
+			if (err == -ENODEV)
+				err = 0;
+
+			goto free_domain;
+		}
+
+		geometry = &host->domain->geometry;
+		start = geometry->aperture_start & host->info->dma_mask;
+		end = geometry->aperture_end & host->info->dma_mask;
+
+		order = __ffs(host->domain->pgsize_bitmap);
+		init_iova_domain(&host->iova, 1UL << order, start >> order);
+		host->iova_end = end;
+
+		domain = host->domain;
+	}
+
+	return domain;
+
+free_domain:
+	iommu_domain_free(host->domain);
+	host->domain = NULL;
+put_cache:
+	iova_cache_put();
+put_group:
+	iommu_group_put(host->group);
+	host->group = NULL;
+
+	return ERR_PTR(err);
+}
+
+static int host1x_iommu_init(struct host1x *host)
+{
+	u64 mask = host->info->dma_mask;
+	struct iommu_domain *domain;
+	int err;
+
+	domain = host1x_iommu_attach(host);
+	if (IS_ERR(domain)) {
+		err = PTR_ERR(domain);
+		dev_err(host->dev, "failed to attach to IOMMU: %d\n", err);
+		return err;
+	}
+
+	/*
+	 * If we're not behind an IOMMU make sure we don't get push buffers
+	 * that are allocated outside of the range addressable by the GATHER
+	 * opcode.
+	 *
+	 * Newer generations of Tegra (Tegra186 and later) support a wide
+	 * variant of the GATHER opcode that allows addressing more bits.
+	 */
+	if (!domain && !host->info->has_wide_gather)
+		mask = DMA_BIT_MASK(32);
+
+	err = dma_coerce_mask_and_coherent(host->dev, mask);
+	if (err < 0) {
+		dev_err(host->dev, "failed to set DMA mask: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static void host1x_iommu_exit(struct host1x *host)
+{
+	if (host->domain) {
+		put_iova_domain(&host->iova);
+		iommu_detach_group(host->domain, host->group);
+
+		iommu_domain_free(host->domain);
+		host->domain = NULL;
+
+		iova_cache_put();
+
+		iommu_group_put(host->group);
+		host->group = NULL;
+	}
+}
+
+static int host1x_probe(struct platform_device *pdev)
+{
+	struct host1x *host;
+	struct resource *regs, *hv_regs = NULL;
+	int syncpt_irq;
+	int err;
+
+	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
+	if (!host)
+		return -ENOMEM;
+
+	host->info = of_device_get_match_data(&pdev->dev);
+
+	if (host->info->has_hypervisor) {
+		regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm");
+		if (!regs) {
+			dev_err(&pdev->dev, "failed to get vm registers\n");
+			return -ENXIO;
+		}
+
+		hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						       "hypervisor");
+		if (!hv_regs) {
+			dev_err(&pdev->dev,
+				"failed to get hypervisor registers\n");
+			return -ENXIO;
+		}
+	} else {
+		regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!regs) {
+			dev_err(&pdev->dev, "failed to get registers\n");
+			return -ENXIO;
+		}
+	}
+
+	syncpt_irq = platform_get_irq(pdev, 0);
+	if (syncpt_irq < 0)
+		return syncpt_irq;
+
+	mutex_init(&host->devices_lock);
+	INIT_LIST_HEAD(&host->devices);
+	INIT_LIST_HEAD(&host->list);
+	host->dev = &pdev->dev;
+
+	/* set common host1x device data */
+	platform_set_drvdata(pdev, host);
+
+	host->regs = devm_ioremap_resource(&pdev->dev, regs);
+	if (IS_ERR(host->regs))
+		return PTR_ERR(host->regs);
+
+	if (host->info->has_hypervisor) {
+		host->hv_regs = devm_ioremap_resource(&pdev->dev, hv_regs);
+		if (IS_ERR(host->hv_regs))
+			return PTR_ERR(host->hv_regs);
+	}
+
+	host->dev->dma_parms = &host->dma_parms;
+	dma_set_max_seg_size(host->dev, UINT_MAX);
+
+	if (host->info->init) {
+		err = host->info->init(host);
+		if (err)
+			return err;
+	}
+
+	host->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(host->clk)) {
+		err = PTR_ERR(host->clk);
+
+		if (err != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "failed to get clock: %d\n", err);
+
+		return err;
+	}
+
+	host->rst = devm_reset_control_get(&pdev->dev, "host1x");
+	if (IS_ERR(host->rst)) {
+		err = PTR_ERR(host->rst);
+		dev_err(&pdev->dev, "failed to get reset: %d\n", err);
+		return err;
+	}
+
+	err = host1x_iommu_init(host);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err);
+		return err;
+	}
+
+	err = host1x_channel_list_init(&host->channel_list,
+				       host->info->nb_channels);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize channel list\n");
+		goto iommu_exit;
+	}
+
+	err = clk_prepare_enable(host->clk);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to enable clock\n");
+		goto free_channels;
+	}
+
+	err = reset_control_deassert(host->rst);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to deassert reset: %d\n", err);
+		goto unprepare_disable;
+	}
+
+	err = host1x_syncpt_init(host);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize syncpts\n");
+		goto reset_assert;
+	}
+
+	err = host1x_intr_init(host, syncpt_irq);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize interrupts\n");
+		goto deinit_syncpt;
+	}
+
+	err = host1x_uapi_init(&host->uapi, host);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize uapi\n");
+		goto deinit_intr;
+	}
+
+	host1x_debug_init(host);
+
+	if (host->info->has_hypervisor)
+		host1x_setup_sid_table(host);
+
+	err = host1x_register(host);
+	if (err < 0)
+		goto deinit_debugfs;
+
+	err = devm_of_platform_populate(&pdev->dev);
+	if (err < 0)
+		goto unregister;
+
+	return 0;
+
+unregister:
+	host1x_unregister(host);
+deinit_debugfs:
+	host1x_debug_deinit(host);
+	host1x_uapi_deinit(&host->uapi);
+deinit_intr:
+	host1x_intr_deinit(host);
+deinit_syncpt:
+	host1x_syncpt_deinit(host);
+reset_assert:
+	reset_control_assert(host->rst);
+unprepare_disable:
+	clk_disable_unprepare(host->clk);
+free_channels:
+	host1x_channel_list_free(&host->channel_list);
+iommu_exit:
+	host1x_iommu_exit(host);
+
+	return err;
+}
+
+static int host1x_remove(struct platform_device *pdev)
+{
+	struct host1x *host = platform_get_drvdata(pdev);
+
+	host1x_unregister(host);
+	host1x_debug_deinit(host);
+	host1x_uapi_deinit(&host->uapi);
+	host1x_intr_deinit(host);
+	host1x_syncpt_deinit(host);
+	reset_control_assert(host->rst);
+	clk_disable_unprepare(host->clk);
+	host1x_iommu_exit(host);
+
+	return 0;
+}
+
+static struct platform_driver tegra_host1x_driver = {
+	.driver = {
+		.name = "tegra-host1x",
+		.of_match_table = host1x_of_match,
+	},
+	.probe = host1x_probe,
+	.remove = host1x_remove,
+};
+
+static struct platform_driver * const drivers[] = {
+	&tegra_host1x_driver,
+	&tegra_mipi_driver,
+};
+
+static int __init tegra_host1x_init(void)
+{
+	int err;
+
+	err = bus_register(&host1x_bus_type);
+	if (err < 0)
+		return err;
+
+	err = platform_register_drivers(drivers, ARRAY_SIZE(drivers));
+	if (err < 0)
+		bus_unregister(&host1x_bus_type);
+
+	return err;
+}
+module_init(tegra_host1x_init);
+
+static void __exit tegra_host1x_exit(void)
+{
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
+	bus_unregister(&host1x_bus_type);
+}
+module_exit(tegra_host1x_exit);
+
+/**
+ * host1x_get_dma_mask() - query the supported DMA mask for host1x
+ * @host1x: host1x instance
+ *
+ * Note that this returns the supported DMA mask for host1x, which can be
+ * different from the applicable DMA mask under certain circumstances.
+ */
+u64 host1x_get_dma_mask(struct host1x *host1x)
+{
+	return host1x->info->dma_mask;
+}
+EXPORT_SYMBOL(host1x_get_dma_mask);
+
+MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
+MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
+MODULE_DESCRIPTION("Host1x driver for Tegra products");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
new file mode 100644
index 00000000..e360bc4a
--- /dev/null
+++ b/drivers/gpu/host1x/dev.h
@@ -0,0 +1,339 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2012-2015, NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_DEV_H
+#define HOST1X_DEV_H
+
+#include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/iova.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#include "cdma.h"
+#include "channel.h"
+#include "intr.h"
+#include "job.h"
+#include "syncpt.h"
+#include "uapi.h"
+
+struct host1x_syncpt;
+struct host1x_syncpt_base;
+struct host1x_channel;
+struct host1x_cdma;
+struct host1x_job;
+struct push_buffer;
+struct output;
+struct dentry;
+
+struct host1x_channel_ops {
+	int (*init)(struct host1x_channel *channel, struct host1x *host,
+		    unsigned int id);
+	int (*submit)(struct host1x_job *job);
+};
+
+struct host1x_cdma_ops {
+	void (*start)(struct host1x_cdma *cdma);
+	void (*stop)(struct host1x_cdma *cdma);
+	void (*flush)(struct  host1x_cdma *cdma);
+	int (*timeout_init)(struct host1x_cdma *cdma);
+	void (*timeout_destroy)(struct host1x_cdma *cdma);
+	void (*freeze)(struct host1x_cdma *cdma);
+	void (*resume)(struct host1x_cdma *cdma, u32 getptr);
+	void (*timeout_cpu_incr)(struct host1x_cdma *cdma, u32 getptr,
+				 u32 syncpt_incrs, u32 syncval, u32 nr_slots);
+};
+
+struct host1x_pushbuffer_ops {
+	void (*init)(struct push_buffer *pb);
+};
+
+struct host1x_debug_ops {
+	void (*debug_init)(struct dentry *de);
+	void (*show_channel_cdma)(struct host1x *host,
+				  struct host1x_channel *ch,
+				  struct output *o);
+	void (*show_channel_fifo)(struct host1x *host,
+				  struct host1x_channel *ch,
+				  struct output *o);
+	void (*show_mlocks)(struct host1x *host, struct output *output);
+
+};
+
+struct host1x_syncpt_ops {
+	void (*restore)(struct host1x_syncpt *syncpt);
+	void (*restore_wait_base)(struct host1x_syncpt *syncpt);
+	void (*load_wait_base)(struct host1x_syncpt *syncpt);
+	u32 (*load)(struct host1x_syncpt *syncpt);
+	int (*cpu_incr)(struct host1x_syncpt *syncpt);
+	void (*assign_to_channel)(struct host1x_syncpt *syncpt,
+	                          struct host1x_channel *channel);
+	void (*enable_protection)(struct host1x *host);
+};
+
+struct host1x_intr_ops {
+	int (*init_host_sync)(struct host1x *host, u32 cpm,
+		void (*syncpt_thresh_work)(struct work_struct *work));
+	void (*set_syncpt_threshold)(
+		struct host1x *host, unsigned int id, u32 thresh);
+	void (*enable_syncpt_intr)(struct host1x *host, unsigned int id);
+	void (*disable_syncpt_intr)(struct host1x *host, unsigned int id);
+	void (*disable_all_syncpt_intrs)(struct host1x *host);
+	int (*free_syncpt_irq)(struct host1x *host);
+};
+
+struct host1x_sid_entry {
+	unsigned int base;
+	unsigned int offset;
+	unsigned int limit;
+};
+
+struct host1x_info {
+	unsigned int nb_channels; /* host1x: number of channels supported */
+	unsigned int nb_pts; /* host1x: number of syncpoints supported */
+	unsigned int nb_bases; /* host1x: number of syncpoint bases supported */
+	unsigned int nb_mlocks; /* host1x: number of mlocks supported */
+	int (*init)(struct host1x *host1x); /* initialize per SoC ops */
+	unsigned int sync_offset; /* offset of syncpoint registers */
+	u64 dma_mask; /* mask of addressable memory */
+	bool has_wide_gather; /* supports GATHER_W opcode */
+	bool has_hypervisor; /* has hypervisor registers */
+	unsigned int num_sid_entries;
+	const struct host1x_sid_entry *sid_table;
+	/*
+	 * On T20-T148, the boot chain may setup DC to increment syncpoints
+	 * 26/27 on VBLANK. As such we cannot use these syncpoints until
+	 * the display driver disables VBLANK increments.
+	 */
+	bool reserve_vblank_syncpts;
+};
+
+struct host1x {
+	const struct host1x_info *info;
+
+	void __iomem *regs;
+	void __iomem *hv_regs; /* hypervisor region */
+	struct host1x_syncpt *syncpt;
+	struct host1x_syncpt_base *bases;
+	struct device *dev;
+	struct clk *clk;
+	struct reset_control *rst;
+
+	struct iommu_group *group;
+	struct iommu_domain *domain;
+	struct iova_domain iova;
+	dma_addr_t iova_end;
+
+	struct mutex intr_mutex;
+	int intr_syncpt_irq;
+
+	const struct host1x_syncpt_ops *syncpt_op;
+	const struct host1x_intr_ops *intr_op;
+	const struct host1x_channel_ops *channel_op;
+	const struct host1x_cdma_ops *cdma_op;
+	const struct host1x_pushbuffer_ops *cdma_pb_op;
+	const struct host1x_debug_ops *debug_op;
+
+	struct host1x_syncpt *nop_sp;
+
+	struct mutex syncpt_mutex;
+
+	struct host1x_channel_list channel_list;
+
+	struct dentry *debugfs;
+
+	struct mutex devices_lock;
+	struct list_head devices;
+
+	struct list_head list;
+
+	struct device_dma_parameters dma_parms;
+
+	struct host1x_uapi uapi;
+};
+
+void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);
+u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r);
+void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v);
+u32 host1x_sync_readl(struct host1x *host1x, u32 r);
+void host1x_ch_writel(struct host1x_channel *ch, u32 r, u32 v);
+u32 host1x_ch_readl(struct host1x_channel *ch, u32 r);
+
+static inline void host1x_hw_syncpt_restore(struct host1x *host,
+					    struct host1x_syncpt *sp)
+{
+	host->syncpt_op->restore(sp);
+}
+
+static inline void host1x_hw_syncpt_restore_wait_base(struct host1x *host,
+						      struct host1x_syncpt *sp)
+{
+	host->syncpt_op->restore_wait_base(sp);
+}
+
+static inline void host1x_hw_syncpt_load_wait_base(struct host1x *host,
+						   struct host1x_syncpt *sp)
+{
+	host->syncpt_op->load_wait_base(sp);
+}
+
+static inline u32 host1x_hw_syncpt_load(struct host1x *host,
+					struct host1x_syncpt *sp)
+{
+	return host->syncpt_op->load(sp);
+}
+
+static inline int host1x_hw_syncpt_cpu_incr(struct host1x *host,
+					    struct host1x_syncpt *sp)
+{
+	return host->syncpt_op->cpu_incr(sp);
+}
+
+static inline void host1x_hw_syncpt_assign_to_channel(
+	struct host1x *host, struct host1x_syncpt *sp,
+	struct host1x_channel *ch)
+{
+	return host->syncpt_op->assign_to_channel(sp, ch);
+}
+
+static inline void host1x_hw_syncpt_enable_protection(struct host1x *host)
+{
+	return host->syncpt_op->enable_protection(host);
+}
+
+static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
+			void (*syncpt_thresh_work)(struct work_struct *))
+{
+	return host->intr_op->init_host_sync(host, cpm, syncpt_thresh_work);
+}
+
+static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host,
+						       unsigned int id,
+						       u32 thresh)
+{
+	host->intr_op->set_syncpt_threshold(host, id, thresh);
+}
+
+static inline void host1x_hw_intr_enable_syncpt_intr(struct host1x *host,
+						     unsigned int id)
+{
+	host->intr_op->enable_syncpt_intr(host, id);
+}
+
+static inline void host1x_hw_intr_disable_syncpt_intr(struct host1x *host,
+						      unsigned int id)
+{
+	host->intr_op->disable_syncpt_intr(host, id);
+}
+
+static inline void host1x_hw_intr_disable_all_syncpt_intrs(struct host1x *host)
+{
+	host->intr_op->disable_all_syncpt_intrs(host);
+}
+
+static inline int host1x_hw_intr_free_syncpt_irq(struct host1x *host)
+{
+	return host->intr_op->free_syncpt_irq(host);
+}
+
+static inline int host1x_hw_channel_init(struct host1x *host,
+					 struct host1x_channel *channel,
+					 unsigned int id)
+{
+	return host->channel_op->init(channel, host, id);
+}
+
+static inline int host1x_hw_channel_submit(struct host1x *host,
+					   struct host1x_job *job)
+{
+	return host->channel_op->submit(job);
+}
+
+static inline void host1x_hw_cdma_start(struct host1x *host,
+					struct host1x_cdma *cdma)
+{
+	host->cdma_op->start(cdma);
+}
+
+static inline void host1x_hw_cdma_stop(struct host1x *host,
+				       struct host1x_cdma *cdma)
+{
+	host->cdma_op->stop(cdma);
+}
+
+static inline void host1x_hw_cdma_flush(struct host1x *host,
+					struct host1x_cdma *cdma)
+{
+	host->cdma_op->flush(cdma);
+}
+
+static inline int host1x_hw_cdma_timeout_init(struct host1x *host,
+					      struct host1x_cdma *cdma)
+{
+	return host->cdma_op->timeout_init(cdma);
+}
+
+static inline void host1x_hw_cdma_timeout_destroy(struct host1x *host,
+						  struct host1x_cdma *cdma)
+{
+	host->cdma_op->timeout_destroy(cdma);
+}
+
+static inline void host1x_hw_cdma_freeze(struct host1x *host,
+					 struct host1x_cdma *cdma)
+{
+	host->cdma_op->freeze(cdma);
+}
+
+static inline void host1x_hw_cdma_resume(struct host1x *host,
+					 struct host1x_cdma *cdma, u32 getptr)
+{
+	host->cdma_op->resume(cdma, getptr);
+}
+
+static inline void host1x_hw_cdma_timeout_cpu_incr(struct host1x *host,
+						   struct host1x_cdma *cdma,
+						   u32 getptr,
+						   u32 syncpt_incrs,
+						   u32 syncval, u32 nr_slots)
+{
+	host->cdma_op->timeout_cpu_incr(cdma, getptr, syncpt_incrs, syncval,
+					nr_slots);
+}
+
+static inline void host1x_hw_pushbuffer_init(struct host1x *host,
+					     struct push_buffer *pb)
+{
+	host->cdma_pb_op->init(pb);
+}
+
+static inline void host1x_hw_debug_init(struct host1x *host, struct dentry *de)
+{
+	if (host->debug_op && host->debug_op->debug_init)
+		host->debug_op->debug_init(de);
+}
+
+static inline void host1x_hw_show_channel_cdma(struct host1x *host,
+					       struct host1x_channel *channel,
+					       struct output *o)
+{
+	host->debug_op->show_channel_cdma(host, channel, o);
+}
+
+static inline void host1x_hw_show_channel_fifo(struct host1x *host,
+					       struct host1x_channel *channel,
+					       struct output *o)
+{
+	host->debug_op->show_channel_fifo(host, channel, o);
+}
+
+static inline void host1x_hw_show_mlocks(struct host1x *host, struct output *o)
+{
+	host->debug_op->show_mlocks(host, o);
+}
+
+extern struct platform_driver tegra_mipi_driver;
+
+#endif
diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
new file mode 100644
index 00000000..b2484606
--- /dev/null
+++ b/drivers/gpu/host1x/fence.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Syncpoint dma_fence implementation
+ *
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/sync_file.h>
+
+#include "fence.h"
+#include "intr.h"
+#include "syncpt.h"
+
+DEFINE_SPINLOCK(lock);
+
+struct host1x_syncpt_fence {
+	struct dma_fence base;
+
+	atomic_t signaling;
+
+	struct host1x_syncpt *sp;
+	u32 threshold;
+
+	struct host1x_waitlist *waiter;
+	void *waiter_ref;
+
+	struct delayed_work timeout_work;
+};
+
+static const char *syncpt_fence_get_driver_name(struct dma_fence *f)
+{
+	return "host1x";
+}
+
+static const char *syncpt_fence_get_timeline_name(struct dma_fence *f)
+{
+	return "syncpoint";
+}
+
+static bool syncpt_fence_enable_signaling(struct dma_fence *f)
+{
+	struct host1x_syncpt_fence *sf =
+		container_of(f, struct host1x_syncpt_fence, base);
+	int err;
+
+	if (host1x_syncpt_is_expired(sf->sp, sf->threshold))
+		return false;
+
+	dma_fence_get(f);
+
+	/*
+	 * The dma_fence framework requires the fence driver to keep a
+	 * reference to any fences for which 'enable_signaling' has been
+	 * called (and that have not been signalled).
+	 * 
+	 * We provide a userspace API to create arbitrary syncpoint fences,
+	 * so we cannot normally guarantee that all fences get signalled.
+	 * As such, setup a timeout, so that long-lasting fences will get
+	 * reaped eventually.
+	 */
+	schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000));
+
+	err = host1x_intr_add_action(sf->sp->host, sf->sp, sf->threshold,
+				     HOST1X_INTR_ACTION_SIGNAL_FENCE, f,
+				     sf->waiter, &sf->waiter_ref);
+	if (err) {
+		cancel_delayed_work_sync(&sf->timeout_work);
+		dma_fence_put(f);
+		return false;
+	}
+
+	/* intr framework takes ownership of waiter */
+	sf->waiter = NULL;
+
+	/*
+	 * The fence may get signalled at any time after the above call,
+	 * so we need to initialize all state used by signalling
+	 * before it.
+	 */
+
+	return true;
+}
+
+static void syncpt_fence_release(struct dma_fence *f)
+{
+	struct host1x_syncpt_fence *sf =
+		container_of(f, struct host1x_syncpt_fence, base);
+
+	if (sf->waiter)
+		kfree(sf->waiter);
+
+	dma_fence_free(f);
+}
+
+const struct dma_fence_ops syncpt_fence_ops = {
+	.get_driver_name = syncpt_fence_get_driver_name,
+	.get_timeline_name = syncpt_fence_get_timeline_name,
+	.enable_signaling = syncpt_fence_enable_signaling,
+	.release = syncpt_fence_release,
+};
+
+void host1x_fence_signal(struct host1x_syncpt_fence *f)
+{
+	if (atomic_xchg(&f->signaling, 1))
+		return;
+
+	/*
+	 * Cancel pending timeout work - if it races, it will
+	 * not get 'f->signaling' and return.
+	 */
+	cancel_delayed_work_sync(&f->timeout_work);
+
+	host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref);
+
+	dma_fence_signal(&f->base);
+	dma_fence_put(&f->base);
+}
+
+static void do_fence_timeout(struct work_struct *work)
+{
+	struct delayed_work *dwork = (struct delayed_work *)work;
+	struct host1x_syncpt_fence *f =
+		container_of(dwork, struct host1x_syncpt_fence, timeout_work);
+
+	if (atomic_xchg(&f->signaling, 1))
+		return;
+
+	/*
+	 * Cancel pending timeout work - if it races, it will
+	 * not get 'f->signaling' and return.
+	 */
+	host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref);
+
+	dma_fence_set_error(&f->base, -ETIMEDOUT);
+	dma_fence_signal(&f->base);
+	dma_fence_put(&f->base);
+}
+
+struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
+{
+	struct host1x_syncpt_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return ERR_PTR(-ENOMEM);
+
+	fence->waiter = kzalloc(sizeof(*fence->waiter), GFP_KERNEL);
+	if (!fence->waiter)
+		return ERR_PTR(-ENOMEM);
+
+	fence->sp = sp;
+	fence->threshold = threshold;
+
+	dma_fence_init(&fence->base, &syncpt_fence_ops, &lock,
+		       dma_fence_context_alloc(1), 0);
+
+	INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout);
+
+	return &fence->base;
+}
+EXPORT_SYMBOL(host1x_fence_create);
+
+int host1x_fence_create_fd(struct host1x_syncpt *sp, u32 threshold)
+{
+	struct sync_file *file;
+	struct dma_fence *f;
+	int fd;
+
+	f = host1x_fence_create(sp, threshold);
+	if (IS_ERR(f))
+		return PTR_ERR(f);
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		dma_fence_put(f);
+		return fd;
+	}
+
+	file = sync_file_create(f);
+	dma_fence_put(f);
+	if (!file)
+		return -ENOMEM;
+
+	fd_install(fd, file->file);
+
+	return fd;
+}
+EXPORT_SYMBOL(host1x_fence_create_fd);
+
+int host1x_fence_extract(struct dma_fence *fence, u32 *id, u32 *threshold)
+{
+	struct host1x_syncpt_fence *f;
+
+	if (fence->ops != &syncpt_fence_ops)
+		return -EINVAL;
+
+	f = container_of(fence, struct host1x_syncpt_fence, base);
+
+	*id = f->sp->id;
+	*threshold = f->threshold;
+
+	return 0;
+}
+EXPORT_SYMBOL(host1x_fence_extract);
diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h
new file mode 100644
index 00000000..70c91de8
--- /dev/null
+++ b/drivers/gpu/host1x/fence.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_FENCE_H
+#define HOST1X_FENCE_H
+
+struct host1x_syncpt_fence;
+
+void host1x_fence_signal(struct host1x_syncpt_fence *fence);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
new file mode 100644
index 00000000..e49cd5b8
--- /dev/null
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Tegra host1x Command DMA
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ */
+
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+
+#include "../cdma.h"
+#include "../channel.h"
+#include "../dev.h"
+#include "../debug.h"
+
+/*
+ * Put the restart at the end of pushbuffer memory
+ */
+static void push_buffer_init(struct push_buffer *pb)
+{
+	*(u32 *)(pb->mapped + pb->size) = host1x_opcode_restart(0);
+}
+
+/*
+ * Increment timedout buffer's syncpt via CPU.
+ */
+static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
+				u32 syncpt_incrs, u32 syncval, u32 nr_slots)
+{
+	unsigned int i;
+
+	for (i = 0; i < syncpt_incrs; i++)
+		host1x_syncpt_incr(cdma->timeout.syncpt);
+
+	/* after CPU incr, ensure shadow is up to date */
+	host1x_syncpt_load(cdma->timeout.syncpt);
+}
+
+/*
+ * Start channel DMA
+ */
+static void cdma_start(struct host1x_cdma *cdma)
+{
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+	u64 start, end;
+
+	if (cdma->running)
+		return;
+
+	cdma->last_pos = cdma->push_buffer.pos;
+	start = cdma->push_buffer.dma;
+	end = cdma->push_buffer.size + 4;
+
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	/* set base, put and end pointer */
+	host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART);
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, upper_32_bits(start), HOST1X_CHANNEL_DMASTART_HI);
+#endif
+	host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMAPUT_HI);
+#endif
+	host1x_ch_writel(ch, lower_32_bits(end), HOST1X_CHANNEL_DMAEND);
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, upper_32_bits(end), HOST1X_CHANNEL_DMAEND_HI);
+#endif
+
+	/* reset GET */
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP |
+			 HOST1X_CHANNEL_DMACTRL_DMAGETRST |
+			 HOST1X_CHANNEL_DMACTRL_DMAINITGET,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	/* start the command DMA */
+	host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL);
+
+	cdma->running = true;
+}
+
+/*
+ * Similar to cdma_start(), but rather than starting from an idle
+ * state (where DMA GET is set to DMA PUT), on a timeout we restore
+ * DMA GET from an explicit value (so DMA may again be pending).
+ */
+static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+	u64 start, end;
+
+	if (cdma->running)
+		return;
+
+	cdma->last_pos = cdma->push_buffer.pos;
+
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	start = cdma->push_buffer.dma;
+	end = cdma->push_buffer.size + 4;
+
+	/* set base, end pointer (all of memory) */
+	host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART);
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, upper_32_bits(start), HOST1X_CHANNEL_DMASTART_HI);
+#endif
+	host1x_ch_writel(ch, lower_32_bits(end), HOST1X_CHANNEL_DMAEND);
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, upper_32_bits(end), HOST1X_CHANNEL_DMAEND_HI);
+#endif
+
+	/* set GET, by loading the value in PUT (then reset GET) */
+	host1x_ch_writel(ch, getptr, HOST1X_CHANNEL_DMAPUT);
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP |
+			 HOST1X_CHANNEL_DMACTRL_DMAGETRST |
+			 HOST1X_CHANNEL_DMACTRL_DMAINITGET,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	dev_dbg(host1x->dev,
+		"%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n", __func__,
+		host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET),
+		host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT),
+		cdma->last_pos);
+
+	/* deassert GET reset and set PUT */
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+			 HOST1X_CHANNEL_DMACTRL);
+	host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
+
+	/* start the command DMA */
+	host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL);
+
+	cdma->running = true;
+}
+
+/*
+ * Kick channel DMA into action by writing its PUT offset (if it has changed)
+ */
+static void cdma_flush(struct host1x_cdma *cdma)
+{
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+
+	if (cdma->push_buffer.pos != cdma->last_pos) {
+		host1x_ch_writel(ch, cdma->push_buffer.pos,
+				 HOST1X_CHANNEL_DMAPUT);
+		cdma->last_pos = cdma->push_buffer.pos;
+	}
+}
+
+static void cdma_stop(struct host1x_cdma *cdma)
+{
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+
+	mutex_lock(&cdma->lock);
+
+	if (cdma->running) {
+		host1x_cdma_wait_locked(cdma, CDMA_EVENT_SYNC_QUEUE_EMPTY);
+		host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+				 HOST1X_CHANNEL_DMACTRL);
+		cdma->running = false;
+	}
+
+	mutex_unlock(&cdma->lock);
+}
+
+static void cdma_hw_cmdproc_stop(struct host1x *host, struct host1x_channel *ch,
+				 bool stop)
+{
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, stop ? 0x1 : 0x0, HOST1X_CHANNEL_CMDPROC_STOP);
+#else
+	u32 cmdproc_stop = host1x_sync_readl(host, HOST1X_SYNC_CMDPROC_STOP);
+	if (stop)
+		cmdproc_stop |= BIT(ch->id);
+	else
+		cmdproc_stop &= ~BIT(ch->id);
+	host1x_sync_writel(host, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+#endif
+}
+
+static void cdma_hw_teardown(struct host1x *host, struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, 0x1, HOST1X_CHANNEL_TEARDOWN);
+#else
+	host1x_sync_writel(host, BIT(ch->id), HOST1X_SYNC_CH_TEARDOWN);
+#endif
+}
+
+/*
+ * Stops both channel's command processor and CDMA immediately.
+ * Also, tears down the channel and resets corresponding module.
+ */
+static void cdma_freeze(struct host1x_cdma *cdma)
+{
+	struct host1x *host = cdma_to_host1x(cdma);
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+
+	if (cdma->torndown && !cdma->running) {
+		dev_warn(host->dev, "Already torn down\n");
+		return;
+	}
+
+	dev_dbg(host->dev, "freezing channel (id %d)\n", ch->id);
+
+	cdma_hw_cmdproc_stop(host, ch, true);
+
+	dev_dbg(host->dev, "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n",
+		__func__, host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET),
+		host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT),
+		cdma->last_pos);
+
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	cdma_hw_teardown(host, ch);
+
+	cdma->running = false;
+	cdma->torndown = true;
+}
+
+static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+
+	dev_dbg(host1x->dev,
+		"resuming channel (id %u, DMAGET restart = 0x%x)\n",
+		ch->id, getptr);
+
+	cdma_hw_cmdproc_stop(host1x, ch, false);
+
+	cdma->torndown = false;
+	cdma_timeout_restart(cdma, getptr);
+}
+
+/*
+ * If this timeout fires, it indicates the current sync_queue entry has
+ * exceeded its TTL and the userctx should be timed out and remaining
+ * submits already issued cleaned up (future submits return an error).
+ */
+static void cdma_timeout_handler(struct work_struct *work)
+{
+	u32 syncpt_val;
+	struct host1x_cdma *cdma;
+	struct host1x *host1x;
+	struct host1x_channel *ch;
+
+	cdma = container_of(to_delayed_work(work), struct host1x_cdma,
+			    timeout.wq);
+	host1x = cdma_to_host1x(cdma);
+	ch = cdma_to_channel(cdma);
+
+	host1x_debug_dump(cdma_to_host1x(cdma));
+
+	mutex_lock(&cdma->lock);
+
+	if (!cdma->timeout.client) {
+		dev_dbg(host1x->dev,
+			"cdma_timeout: expired, but has no clientid\n");
+		mutex_unlock(&cdma->lock);
+		return;
+	}
+
+	/* stop processing to get a clean snapshot */
+	cdma_hw_cmdproc_stop(host1x, ch, true);
+
+	syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt);
+
+	/* has buffer actually completed? */
+	if ((s32)(syncpt_val - cdma->timeout.syncpt_val) >= 0) {
+		dev_dbg(host1x->dev,
+			"cdma_timeout: expired, but buffer had completed\n");
+		/* restore */
+		cdma_hw_cmdproc_stop(host1x, ch, false);
+		mutex_unlock(&cdma->lock);
+		return;
+	}
+
+	dev_warn(host1x->dev, "%s: timeout: %u (%s), HW thresh %d, done %d\n",
+		 __func__, cdma->timeout.syncpt->id, cdma->timeout.syncpt->name,
+		 syncpt_val, cdma->timeout.syncpt_val);
+
+	/* stop HW, resetting channel/module */
+	host1x_hw_cdma_freeze(host1x, cdma);
+
+	host1x_cdma_update_sync_queue(cdma, ch->dev);
+	mutex_unlock(&cdma->lock);
+}
+
+/*
+ * Init timeout resources
+ */
+static int cdma_timeout_init(struct host1x_cdma *cdma)
+{
+	INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler);
+	cdma->timeout.initialized = true;
+
+	return 0;
+}
+
+/*
+ * Clean up timeout resources
+ */
+static void cdma_timeout_destroy(struct host1x_cdma *cdma)
+{
+	if (cdma->timeout.initialized)
+		cancel_delayed_work(&cdma->timeout.wq);
+
+	cdma->timeout.initialized = false;
+}
+
+static const struct host1x_cdma_ops host1x_cdma_ops = {
+	.start = cdma_start,
+	.stop = cdma_stop,
+	.flush = cdma_flush,
+
+	.timeout_init = cdma_timeout_init,
+	.timeout_destroy = cdma_timeout_destroy,
+	.freeze = cdma_freeze,
+	.resume = cdma_resume,
+	.timeout_cpu_incr = cdma_timeout_cpu_incr,
+};
+
+static const struct host1x_pushbuffer_ops host1x_pushbuffer_ops = {
+	.init = push_buffer_init,
+};
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
new file mode 100644
index 00000000..1917e760
--- /dev/null
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Tegra host1x Channel
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ */
+
+#include <linux/host1x-next.h>
+#include <linux/iommu.h>
+#include <linux/slab.h>
+
+#include <trace/events/host1x.h>
+
+#include "../channel.h"
+#include "../dev.h"
+#include "../intr.h"
+#include "../job.h"
+
+#define TRACE_MAX_LENGTH 128U
+
+static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
+			       u32 offset, u32 words)
+{
+	struct device *dev = cdma_to_channel(cdma)->dev;
+	void *mem = NULL;
+
+	if (host1x_debug_trace_cmdbuf)
+		mem = host1x_bo_mmap(bo);
+
+	if (mem) {
+		u32 i;
+		/*
+		 * Write in batches of 128 as there seems to be a limit
+		 * of how much you can output to ftrace at once.
+		 */
+		for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
+			u32 num_words = min(words - i, TRACE_MAX_LENGTH);
+
+			offset += i * sizeof(u32);
+
+			trace_host1x_cdma_push_gather(dev_name(dev), bo,
+						      num_words, offset,
+						      mem);
+		}
+
+		host1x_bo_munmap(bo, mem);
+	}
+}
+
+static void submit_gathers(struct host1x_job *job)
+{
+	struct host1x_cdma *cdma = &job->channel->cdma;
+#if HOST1X_HW < 6
+	struct device *dev = job->channel->dev;
+#endif
+	unsigned int i;
+
+	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_job_cmd *cmd = &job->cmds[i];
+
+		if (cmd->is_wait) {
+			/* TODO use modern wait */
+			host1x_cdma_push(cdma,
+				 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+					host1x_uclass_wait_syncpt_r(), 1),
+				 host1x_class_host_wait_syncpt(cmd->wait.id,
+					cmd->wait.threshold));
+			host1x_cdma_push(
+				cdma, host1x_opcode_setclass(job->class, 0, 0),
+				HOST1X_OPCODE_NOP);
+		} else {
+			struct host1x_job_gather *g = &cmd->gather;
+
+			dma_addr_t addr = g->base + g->offset;
+			u32 op2, op3;
+
+			op2 = lower_32_bits(addr);
+			op3 = upper_32_bits(addr);
+
+			trace_write_gather(cdma, g->bo, g->offset, g->words);
+
+			if (op3 != 0) {
+#if HOST1X_HW >= 6
+				u32 op1 = host1x_opcode_gather_wide(g->words);
+				u32 op4 = HOST1X_OPCODE_NOP;
+
+				host1x_cdma_push_wide(cdma, op1, op2, op3, op4);
+#else
+				dev_err(dev, "invalid gather for push buffer %pad\n",
+					&addr);
+				continue;
+#endif
+			} else {
+				u32 op1 = host1x_opcode_gather(g->words);
+
+				host1x_cdma_push(cdma, op1, op2);
+			}
+		}
+	}
+}
+
+static inline void synchronize_syncpt_base(struct host1x_job *job)
+{
+	struct host1x_syncpt *sp = job->syncpt;
+	unsigned int id;
+	u32 value;
+
+	value = host1x_syncpt_read_max(sp);
+	id = sp->base->id;
+
+	host1x_cdma_push(&job->channel->cdma,
+			 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+				HOST1X_UCLASS_LOAD_SYNCPT_BASE, 1),
+			 HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(id) |
+			 HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(value));
+}
+
+static void host1x_channel_set_streamid(struct host1x_channel *channel)
+{
+#if HOST1X_HW >= 6
+	u32 sid = 0x7f;
+#ifdef CONFIG_IOMMU_API
+	struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent);
+	if (spec)
+		sid = spec->ids[0] & 0xffff;
+#endif
+
+	host1x_ch_writel(channel, sid, HOST1X_CHANNEL_SMMU_STREAMID);
+#endif
+}
+
+static int channel_submit(struct host1x_job *job)
+{
+	struct host1x_channel *ch = job->channel;
+	struct host1x_syncpt *sp = job->syncpt;
+	u32 user_syncpt_incrs = job->syncpt_incrs;
+	u32 prev_max = 0;
+	u32 syncval;
+	int err;
+	struct host1x_waitlist *completed_waiter = NULL;
+	struct host1x *host = dev_get_drvdata(ch->dev->parent);
+
+	trace_host1x_channel_submit(dev_name(ch->dev),
+				    job->num_cmds, job->num_relocs,
+				    job->syncpt->id, job->syncpt_incrs);
+
+	/* before error checks, return current max */
+	prev_max = job->syncpt_end = host1x_syncpt_read_max(sp);
+
+	/* get submit lock */
+	err = mutex_lock_interruptible(&ch->submitlock);
+	if (err)
+		goto error;
+
+	completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL);
+	if (!completed_waiter) {
+		mutex_unlock(&ch->submitlock);
+		err = -ENOMEM;
+		goto error;
+	}
+
+	host1x_channel_set_streamid(ch);
+
+	/* begin a CDMA submit */
+	err = host1x_cdma_begin(&ch->cdma, job);
+	if (err) {
+		mutex_unlock(&ch->submitlock);
+		goto error;
+	}
+
+	if (job->serialize) {
+		/*
+		 * Force serialization by inserting a host wait for the
+		 * previous job to finish before this one can commence.
+		 */
+		host1x_cdma_push(&ch->cdma,
+				 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+					host1x_uclass_wait_syncpt_r(), 1),
+				 host1x_class_host_wait_syncpt(job->syncpt->id,
+					host1x_syncpt_read_max(sp)));
+	}
+
+	/* Synchronize base register to allow using it for relative waiting */
+	if (sp->base)
+		synchronize_syncpt_base(job);
+
+	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
+
+	host1x_hw_syncpt_assign_to_channel(host, sp, ch);
+
+	job->syncpt_end = syncval;
+
+	/* add a setclass for modules that require it */
+	if (job->class)
+		host1x_cdma_push(&ch->cdma,
+				 host1x_opcode_setclass(job->class, 0, 0),
+				 HOST1X_OPCODE_NOP);
+
+	submit_gathers(job);
+
+	/* end CDMA submit & stash pinned hMems into sync queue */
+	host1x_cdma_end(&ch->cdma, job);
+
+	trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval);
+
+	/* schedule a submit complete interrupt */
+	err = host1x_intr_add_action(host, sp, syncval,
+				     HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch,
+				     completed_waiter, &job->waiter);
+	completed_waiter = NULL;
+	WARN(err, "Failed to set submit complete interrupt");
+
+	mutex_unlock(&ch->submitlock);
+
+	return 0;
+
+error:
+	kfree(completed_waiter);
+	return err;
+}
+
+static void enable_gather_filter(struct host1x *host,
+				 struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+	u32 val;
+
+	if (!host->hv_regs)
+		return;
+
+	val = host1x_hypervisor_readl(
+		host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+	val |= BIT(ch->id % 32);
+	host1x_hypervisor_writel(
+		host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+#elif HOST1X_HW >= 4
+	host1x_ch_writel(ch,
+			 HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
+			 HOST1X_CHANNEL_CHANNELCTRL);
+#endif
+}
+
+static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
+			       unsigned int index)
+{
+#if HOST1X_HW < 6
+	ch->regs = dev->regs + index * 0x4000;
+#else
+	ch->regs = dev->regs + index * 0x100;
+#endif
+	enable_gather_filter(dev, ch);
+	return 0;
+}
+
+static const struct host1x_channel_ops host1x_channel_ops = {
+	.init = host1x_channel_init,
+	.submit = channel_submit,
+};
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
new file mode 100644
index 00000000..35952fd5
--- /dev/null
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Erik Gilling <konkers@android.com>
+ *
+ * Copyright (C) 2011-2013 NVIDIA Corporation
+ */
+
+#include "../dev.h"
+#include "../debug.h"
+#include "../cdma.h"
+#include "../channel.h"
+
+#define HOST1X_DEBUG_MAX_PAGE_OFFSET 102400
+
+enum {
+	HOST1X_OPCODE_SETCLASS	= 0x00,
+	HOST1X_OPCODE_INCR	= 0x01,
+	HOST1X_OPCODE_NONINCR	= 0x02,
+	HOST1X_OPCODE_MASK	= 0x03,
+	HOST1X_OPCODE_IMM	= 0x04,
+	HOST1X_OPCODE_RESTART	= 0x05,
+	HOST1X_OPCODE_GATHER	= 0x06,
+	HOST1X_OPCODE_SETSTRMID = 0x07,
+	HOST1X_OPCODE_SETAPPID  = 0x08,
+	HOST1X_OPCODE_SETPYLD   = 0x09,
+	HOST1X_OPCODE_INCR_W    = 0x0a,
+	HOST1X_OPCODE_NONINCR_W = 0x0b,
+	HOST1X_OPCODE_GATHER_W  = 0x0c,
+	HOST1X_OPCODE_RESTART_W = 0x0d,
+	HOST1X_OPCODE_EXTEND	= 0x0e,
+};
+
+enum {
+	HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK	= 0x00,
+	HOST1X_OPCODE_EXTEND_RELEASE_MLOCK	= 0x01,
+};
+
+#define INVALID_PAYLOAD				0xffffffff
+
+static unsigned int show_channel_command(struct output *o, u32 val,
+					 u32 *payload)
+{
+	unsigned int mask, subop, num, opcode;
+
+	opcode = val >> 28;
+
+	switch (opcode) {
+	case HOST1X_OPCODE_SETCLASS:
+		mask = val & 0x3f;
+		if (mask) {
+			host1x_debug_cont(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [",
+					    val >> 6 & 0x3ff,
+					    val >> 16 & 0xfff, mask);
+			return hweight8(mask);
+		}
+
+		host1x_debug_cont(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
+		return 0;
+
+	case HOST1X_OPCODE_INCR:
+		num = val & 0xffff;
+		host1x_debug_cont(o, "INCR(offset=%03x, [",
+				    val >> 16 & 0xfff);
+		if (!num)
+			host1x_debug_cont(o, "])\n");
+
+		return num;
+
+	case HOST1X_OPCODE_NONINCR:
+		num = val & 0xffff;
+		host1x_debug_cont(o, "NONINCR(offset=%03x, [",
+				    val >> 16 & 0xfff);
+		if (!num)
+			host1x_debug_cont(o, "])\n");
+
+		return num;
+
+	case HOST1X_OPCODE_MASK:
+		mask = val & 0xffff;
+		host1x_debug_cont(o, "MASK(offset=%03x, mask=%03x, [",
+				    val >> 16 & 0xfff, mask);
+		if (!mask)
+			host1x_debug_cont(o, "])\n");
+
+		return hweight16(mask);
+
+	case HOST1X_OPCODE_IMM:
+		host1x_debug_cont(o, "IMM(offset=%03x, data=%03x)\n",
+				    val >> 16 & 0xfff, val & 0xffff);
+		return 0;
+
+	case HOST1X_OPCODE_RESTART:
+		host1x_debug_cont(o, "RESTART(offset=%08x)\n", val << 4);
+		return 0;
+
+	case HOST1X_OPCODE_GATHER:
+		host1x_debug_cont(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[",
+				    val >> 16 & 0xfff, val >> 15 & 0x1,
+				    val >> 14 & 0x1, val & 0x3fff);
+		return 1;
+
+#if HOST1X_HW >= 6
+	case HOST1X_OPCODE_SETSTRMID:
+		host1x_debug_cont(o, "SETSTRMID(offset=%06x)\n",
+				  val & 0x3fffff);
+		return 0;
+
+	case HOST1X_OPCODE_SETAPPID:
+		host1x_debug_cont(o, "SETAPPID(appid=%02x)\n", val & 0xff);
+		return 0;
+
+	case HOST1X_OPCODE_SETPYLD:
+		*payload = val & 0xffff;
+		host1x_debug_cont(o, "SETPYLD(data=%04x)\n", *payload);
+		return 0;
+
+	case HOST1X_OPCODE_INCR_W:
+	case HOST1X_OPCODE_NONINCR_W:
+		host1x_debug_cont(o, "%s(offset=%06x, ",
+				  opcode == HOST1X_OPCODE_INCR_W ?
+					"INCR_W" : "NONINCR_W",
+				  val & 0x3fffff);
+		if (*payload == 0) {
+			host1x_debug_cont(o, "[])\n");
+			return 0;
+		} else if (*payload == INVALID_PAYLOAD) {
+			host1x_debug_cont(o, "unknown)\n");
+			return 0;
+		} else {
+			host1x_debug_cont(o, "[");
+			return *payload;
+		}
+
+	case HOST1X_OPCODE_GATHER_W:
+		host1x_debug_cont(o, "GATHER_W(count=%04x, addr=[",
+				  val & 0x3fff);
+		return 2;
+#endif
+
+	case HOST1X_OPCODE_EXTEND:
+		subop = val >> 24 & 0xf;
+		if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
+			host1x_debug_cont(o, "ACQUIRE_MLOCK(index=%d)\n",
+					    val & 0xff);
+		else if (subop == HOST1X_OPCODE_EXTEND_RELEASE_MLOCK)
+			host1x_debug_cont(o, "RELEASE_MLOCK(index=%d)\n",
+					    val & 0xff);
+		else
+			host1x_debug_cont(o, "EXTEND_UNKNOWN(%08x)\n", val);
+		return 0;
+
+	default:
+		host1x_debug_cont(o, "UNKNOWN\n");
+		return 0;
+	}
+}
+
+static void show_gather(struct output *o, phys_addr_t phys_addr,
+			unsigned int words, struct host1x_cdma *cdma,
+			phys_addr_t pin_addr, u32 *map_addr)
+{
+	/* Map dmaget cursor to corresponding mem handle */
+	u32 offset = phys_addr - pin_addr;
+	unsigned int data_count = 0, i;
+	u32 payload = INVALID_PAYLOAD;
+
+	/*
+	 * Sometimes we're given different hardware address to the same
+	 * page - in these cases the offset will get an invalid number and
+	 * we just have to bail out.
+	 */
+	if (offset > HOST1X_DEBUG_MAX_PAGE_OFFSET) {
+		host1x_debug_output(o, "[address mismatch]\n");
+		return;
+	}
+
+	for (i = 0; i < words; i++) {
+		u32 addr = phys_addr + i * 4;
+		u32 val = *(map_addr + offset / 4 + i);
+
+		if (!data_count) {
+			host1x_debug_output(o, "%08x: %08x: ", addr, val);
+			data_count = show_channel_command(o, val, &payload);
+		} else {
+			host1x_debug_cont(o, "%08x%s", val,
+					    data_count > 1 ? ", " : "])\n");
+			data_count--;
+		}
+	}
+}
+
+static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
+{
+	struct push_buffer *pb = &cdma->push_buffer;
+	struct host1x_job *job;
+
+	host1x_debug_output(o, "PUSHBUF at %pad, %u words\n",
+			    &pb->dma, pb->size / 4);
+
+	show_gather(o, pb->dma, pb->size / 4, cdma, pb->dma, pb->mapped);
+
+	list_for_each_entry(job, &cdma->sync_queue, list) {
+		unsigned int i;
+
+		host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n",
+				    job, job->syncpt->id, job->syncpt_end,
+				    job->first_get, job->timeout,
+				    job->num_slots, job->num_unpins);
+
+		for (i = 0; i < job->num_cmds; i++) {
+			struct host1x_job_gather *g;
+			u32 *mapped;
+
+			if (job->cmds[i].is_wait)
+				continue;
+
+			g = &job->cmds[i].gather;
+
+			if (job->gather_copy_mapped)
+				mapped = (u32 *)job->gather_copy_mapped;
+			else
+				mapped = host1x_bo_mmap(g->bo);
+
+			if (!mapped) {
+				host1x_debug_output(o, "[could not mmap]\n");
+				continue;
+			}
+
+			host1x_debug_output(o, "    GATHER at %pad+%#x, %d words\n",
+					    &g->base, g->offset, g->words);
+
+			show_gather(o, g->base + g->offset, g->words, cdma,
+				    g->base, mapped);
+
+			if (!job->gather_copy_mapped)
+				host1x_bo_munmap(g->bo, mapped);
+		}
+	}
+}
+
+#if HOST1X_HW >= 6
+#include "debug_hw_1x06.c"
+#else
+#include "debug_hw_1x01.c"
+#endif
+
+static const struct host1x_debug_ops host1x_debug_ops = {
+	.show_channel_cdma = host1x_debug_show_channel_cdma,
+	.show_channel_fifo = host1x_debug_show_channel_fifo,
+	.show_mlocks = host1x_debug_show_mlocks,
+};
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c
new file mode 100644
index 00000000..02a93305
--- /dev/null
+++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Erik Gilling <konkers@android.com>
+ *
+ * Copyright (C) 2011-2013 NVIDIA Corporation
+ */
+
+#include "../dev.h"
+#include "../debug.h"
+#include "../cdma.h"
+#include "../channel.h"
+
+static void host1x_debug_show_channel_cdma(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	struct host1x_cdma *cdma = &ch->cdma;
+	u32 dmaput, dmaget, dmactrl;
+	u32 cbstat, cbread;
+	u32 val, base, baseval;
+
+	dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
+	dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
+	dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
+	cbread = host1x_sync_readl(host, HOST1X_SYNC_CBREAD(ch->id));
+	cbstat = host1x_sync_readl(host, HOST1X_SYNC_CBSTAT(ch->id));
+
+	host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev));
+
+	if (HOST1X_CHANNEL_DMACTRL_DMASTOP_V(dmactrl) ||
+	    !ch->cdma.push_buffer.mapped) {
+		host1x_debug_output(o, "inactive\n\n");
+		return;
+	}
+
+	if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == HOST1X_CLASS_HOST1X &&
+	    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
+			HOST1X_UCLASS_WAIT_SYNCPT)
+		host1x_debug_output(o, "waiting on syncpt %d val %d\n",
+				    cbread >> 24, cbread & 0xffffff);
+	else if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) ==
+				HOST1X_CLASS_HOST1X &&
+		 HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
+				HOST1X_UCLASS_WAIT_SYNCPT_BASE) {
+		base = (cbread >> 16) & 0xff;
+		baseval =
+			host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(base));
+		val = cbread & 0xffff;
+		host1x_debug_output(o, "waiting on syncpt %d val %d (base %d = %d; offset = %d)\n",
+				    cbread >> 24, baseval + val, base,
+				    baseval, val);
+	} else
+		host1x_debug_output(o, "active class %02x, offset %04x, val %08x\n",
+				    HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat),
+				    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat),
+				    cbread);
+
+	host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
+			    dmaput, dmaget, dmactrl);
+	host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat);
+
+	show_channel_gathers(o, cdma);
+	host1x_debug_output(o, "\n");
+}
+
+static void host1x_debug_show_channel_fifo(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	u32 val, rd_ptr, wr_ptr, start, end;
+	unsigned int data_count = 0;
+
+	host1x_debug_output(o, "%u: fifo:\n", ch->id);
+
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT);
+	host1x_debug_output(o, "FIFOSTAT %08x\n", val);
+	if (HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(val)) {
+		host1x_debug_output(o, "[empty]\n");
+		return;
+	}
+
+	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+	host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
+			   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id),
+			   HOST1X_SYNC_CFPEEK_CTRL);
+
+	val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_PTRS);
+	rd_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(val);
+	wr_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(val);
+
+	val = host1x_sync_readl(host, HOST1X_SYNC_CF_SETUP(ch->id));
+	start = HOST1X_SYNC_CF_SETUP_BASE_V(val);
+	end = HOST1X_SYNC_CF_SETUP_LIMIT_V(val);
+
+	do {
+		host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+		host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
+				   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id) |
+				   HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(rd_ptr),
+				   HOST1X_SYNC_CFPEEK_CTRL);
+		val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ);
+
+		if (!data_count) {
+			host1x_debug_output(o, "%08x: ", val);
+			data_count = show_channel_command(o, val, NULL);
+		} else {
+			host1x_debug_cont(o, "%08x%s", val,
+					  data_count > 1 ? ", " : "])\n");
+			data_count--;
+		}
+
+		if (rd_ptr == end)
+			rd_ptr = start;
+		else
+			rd_ptr++;
+	} while (rd_ptr != wr_ptr);
+
+	if (data_count)
+		host1x_debug_cont(o, ", ...])\n");
+	host1x_debug_output(o, "\n");
+
+	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+}
+
+static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
+{
+	unsigned int i;
+
+	host1x_debug_output(o, "---- mlocks ----\n");
+
+	for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) {
+		u32 owner =
+			host1x_sync_readl(host, HOST1X_SYNC_MLOCK_OWNER(i));
+		if (HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(owner))
+			host1x_debug_output(o, "%u: locked by channel %u\n",
+				i, HOST1X_SYNC_MLOCK_OWNER_CHID_V(owner));
+		else if (HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(owner))
+			host1x_debug_output(o, "%u: locked by cpu\n", i);
+		else
+			host1x_debug_output(o, "%u: unlocked\n", i);
+	}
+
+	host1x_debug_output(o, "\n");
+}
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c
new file mode 100644
index 00000000..6d1b583a
--- /dev/null
+++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Erik Gilling <konkers@android.com>
+ *
+ * Copyright (C) 2011-2017 NVIDIA Corporation
+ */
+
+#include "../dev.h"
+#include "../debug.h"
+#include "../cdma.h"
+#include "../channel.h"
+
+static void host1x_debug_show_channel_cdma(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	struct host1x_cdma *cdma = &ch->cdma;
+	u32 dmaput, dmaget, dmactrl;
+	u32 offset, class;
+	u32 ch_stat;
+
+	dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
+	dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
+	dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
+	offset = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_OFFSET);
+	class = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_CLASS);
+	ch_stat = host1x_ch_readl(ch, HOST1X_CHANNEL_CHANNELSTAT);
+
+	host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev));
+
+	if (dmactrl & HOST1X_CHANNEL_DMACTRL_DMASTOP ||
+	    !ch->cdma.push_buffer.mapped) {
+		host1x_debug_output(o, "inactive\n\n");
+		return;
+	}
+
+	if (class == HOST1X_CLASS_HOST1X && offset == HOST1X_UCLASS_WAIT_SYNCPT)
+		host1x_debug_output(o, "waiting on syncpt\n");
+	else
+		host1x_debug_output(o, "active class %02x, offset %04x\n",
+				    class, offset);
+
+	host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
+			    dmaput, dmaget, dmactrl);
+	host1x_debug_output(o, "CHANNELSTAT %02x\n", ch_stat);
+
+	show_channel_gathers(o, cdma);
+	host1x_debug_output(o, "\n");
+}
+
+static void host1x_debug_show_channel_fifo(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+#if HOST1X_HW <= 6
+	u32 rd_ptr, wr_ptr, start, end;
+	u32 payload = INVALID_PAYLOAD;
+	unsigned int data_count = 0;
+#endif
+	u32 val;
+
+	host1x_debug_output(o, "%u: fifo:\n", ch->id);
+
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDFIFO_STAT);
+	host1x_debug_output(o, "CMDFIFO_STAT %08x\n", val);
+	if (val & HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY) {
+		host1x_debug_output(o, "[empty]\n");
+		return;
+	}
+
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDFIFO_RDATA);
+	host1x_debug_output(o, "CMDFIFO_RDATA %08x\n", val);
+
+#if HOST1X_HW <= 6
+	/* Peek pointer values are invalid during SLCG, so disable it */
+	host1x_hypervisor_writel(host, 0x1, HOST1X_HV_ICG_EN_OVERRIDE);
+
+	val = 0;
+	val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE;
+	val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(ch->id);
+	host1x_hypervisor_writel(host, val, HOST1X_HV_CMDFIFO_PEEK_CTRL);
+
+	val = host1x_hypervisor_readl(host, HOST1X_HV_CMDFIFO_PEEK_PTRS);
+	rd_ptr = HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(val);
+	wr_ptr = HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(val);
+
+	val = host1x_hypervisor_readl(host, HOST1X_HV_CMDFIFO_SETUP(ch->id));
+	start = HOST1X_HV_CMDFIFO_SETUP_BASE_V(val);
+	end = HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(val);
+
+	do {
+		val = 0;
+		val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE;
+		val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(ch->id);
+		val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(rd_ptr);
+		host1x_hypervisor_writel(host, val,
+					 HOST1X_HV_CMDFIFO_PEEK_CTRL);
+
+		val = host1x_hypervisor_readl(host,
+					      HOST1X_HV_CMDFIFO_PEEK_READ);
+
+		if (!data_count) {
+			host1x_debug_output(o, "%03x 0x%08x: ",
+					    rd_ptr - start, val);
+			data_count = show_channel_command(o, val, &payload);
+		} else {
+			host1x_debug_cont(o, "%08x%s", val,
+					  data_count > 1 ? ", " : "])\n");
+			data_count--;
+		}
+
+		if (rd_ptr == end)
+			rd_ptr = start;
+		else
+			rd_ptr++;
+	} while (rd_ptr != wr_ptr);
+
+	if (data_count)
+		host1x_debug_cont(o, ", ...])\n");
+	host1x_debug_output(o, "\n");
+
+	host1x_hypervisor_writel(host, 0x0, HOST1X_HV_CMDFIFO_PEEK_CTRL);
+	host1x_hypervisor_writel(host, 0x0, HOST1X_HV_ICG_EN_OVERRIDE);
+#endif
+}
+
+static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
+{
+	/* TODO */
+}
diff --git a/drivers/gpu/host1x/hw/host1x01.c b/drivers/gpu/host1x/hw/host1x01.c
new file mode 100644
index 00000000..8d8a117a
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x01.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for T20 and T30 Architecture Chips
+ *
+ * Copyright (c) 2011-2013, NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x01.h"
+#include "host1x01_hardware.h"
+
+/* include code */
+#define HOST1X_HW 1
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x01_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x01.h b/drivers/gpu/host1x/hw/host1x01.h
new file mode 100644
index 00000000..6516c3f1
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x01.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for T20 and T30 Architecture Chips
+ *
+ * Copyright (c) 2011-2013, NVIDIA Corporation.
+ */
+#ifndef HOST1X_HOST1X01_H
+#define HOST1X_HOST1X01_H
+
+struct host1x;
+
+int host1x01_init(struct host1x *host);
+
+#endif /* HOST1X_HOST1X01_H_ */
diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h b/drivers/gpu/host1x/hw/host1x01_hardware.h
new file mode 100644
index 00000000..fe59df1d
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x01_hardware.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra20 and Tegra30
+ *
+ * Copyright (c) 2010-2013 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X01_HARDWARE_H
+#define __HOST1X_HOST1X01_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x01_channel.h"
+#include "hw_host1x01_sync.h"
+#include "hw_host1x01_uclass.h"
+
+static inline u32 host1x_class_host_wait_syncpt(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx)
+		| host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+		| host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+	unsigned indx, unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx)
+		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+	unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+	unsigned cond, unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond)
+		| host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indbe_f(0xf)
+		| host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset);
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset)
+		| host1x_uclass_indoff_rwn_read_v();
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+
+/* cdma opcodes */
+static inline u32 host1x_opcode_setclass(
+	unsigned class_id, unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+		host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+	return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+	return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x02.c b/drivers/gpu/host1x/hw/host1x02.c
new file mode 100644
index 00000000..583b33c0
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x02.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra114 SoCs
+ *
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x02.h"
+#include "host1x02_hardware.h"
+
+/* include code */
+#define HOST1X_HW 2
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x02_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x02.h b/drivers/gpu/host1x/hw/host1x02.h
new file mode 100644
index 00000000..7e5c3e47
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x02.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra114 SoCs
+ *
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X02_H
+#define HOST1X_HOST1X02_H
+
+struct host1x;
+
+int host1x02_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x02_hardware.h b/drivers/gpu/host1x/hw/host1x02_hardware.h
new file mode 100644
index 00000000..af60d7fb
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x02_hardware.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra114
+ *
+ * Copyright (c) 2010-2013 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X02_HARDWARE_H
+#define __HOST1X_HOST1X02_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x02_channel.h"
+#include "hw_host1x02_sync.h"
+#include "hw_host1x02_uclass.h"
+
+static inline u32 host1x_class_host_wait_syncpt(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx)
+		| host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+		| host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+	unsigned indx, unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx)
+		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+	unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+	unsigned cond, unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond)
+		| host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indbe_f(0xf)
+		| host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset);
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset)
+		| host1x_uclass_indoff_rwn_read_v();
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+/* cdma opcodes */
+static inline u32 host1x_opcode_setclass(
+	unsigned class_id, unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+		host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+	return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+	return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x04.c b/drivers/gpu/host1x/hw/host1x04.c
new file mode 100644
index 00000000..26b459eb
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x04.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra124 SoCs
+ *
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x04.h"
+#include "host1x04_hardware.h"
+
+/* include code */
+#define HOST1X_HW 4
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x04_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x04.h b/drivers/gpu/host1x/hw/host1x04.h
new file mode 100644
index 00000000..2a1e8153
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x04.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra124 SoCs
+ *
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X04_H
+#define HOST1X_HOST1X04_H
+
+struct host1x;
+
+int host1x04_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x04_hardware.h b/drivers/gpu/host1x/hw/host1x04_hardware.h
new file mode 100644
index 00000000..4f9bcddf
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x04_hardware.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra124
+ *
+ * Copyright (c) 2010-2013 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X04_HARDWARE_H
+#define __HOST1X_HOST1X04_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x04_channel.h"
+#include "hw_host1x04_sync.h"
+#include "hw_host1x04_uclass.h"
+
+static inline u32 host1x_class_host_wait_syncpt(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx)
+		| host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+		| host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+	unsigned indx, unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx)
+		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+	unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+	unsigned cond, unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond)
+		| host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indbe_f(0xf)
+		| host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset);
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset)
+		| host1x_uclass_indoff_rwn_read_v();
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+/* cdma opcodes */
+static inline u32 host1x_opcode_setclass(
+	unsigned class_id, unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+		host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+	return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+	return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x05.c b/drivers/gpu/host1x/hw/host1x05.c
new file mode 100644
index 00000000..6d980334
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x05.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra210 SoCs
+ *
+ * Copyright (c) 2015 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x05.h"
+#include "host1x05_hardware.h"
+
+/* include code */
+#define HOST1X_HW 5
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x05_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x05.h b/drivers/gpu/host1x/hw/host1x05.h
new file mode 100644
index 00000000..addfd41e
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x05.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra210 SoCs
+ *
+ * Copyright (c) 2015 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X05_H
+#define HOST1X_HOST1X05_H
+
+struct host1x;
+
+int host1x05_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h
new file mode 100644
index 00000000..af3ab4b7
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x05_hardware.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra210
+ *
+ * Copyright (c) 2015 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X05_HARDWARE_H
+#define __HOST1X_HOST1X05_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x05_channel.h"
+#include "hw_host1x05_sync.h"
+#include "hw_host1x05_uclass.h"
+
+static inline u32 host1x_class_host_wait_syncpt(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx)
+		| host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+		| host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+	unsigned indx, unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx)
+		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+	unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+	unsigned cond, unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond)
+		| host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indbe_f(0xf)
+		| host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset);
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset)
+		| host1x_uclass_indoff_rwn_read_v();
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+/* cdma opcodes */
+static inline u32 host1x_opcode_setclass(
+	unsigned class_id, unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+		host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+	return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+	return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x06.c b/drivers/gpu/host1x/hw/host1x06.c
new file mode 100644
index 00000000..844f81ae
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x06.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra186 SoCs
+ *
+ * Copyright (c) 2017 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x06.h"
+#include "host1x06_hardware.h"
+
+/* include code */
+#define HOST1X_HW 6
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x06_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x06.h b/drivers/gpu/host1x/hw/host1x06.h
new file mode 100644
index 00000000..4ea75689
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x06.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra186 SoCs
+ *
+ * Copyright (c) 2017 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X06_H
+#define HOST1X_HOST1X06_H
+
+struct host1x;
+
+int host1x06_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h
new file mode 100644
index 00000000..01a142a0
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x06_hardware.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra186
+ *
+ * Copyright (c) 2017 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X06_HARDWARE_H
+#define __HOST1X_HOST1X06_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x06_channel.h"
+#include "hw_host1x06_uclass.h"
+#include "hw_host1x06_vm.h"
+#include "hw_host1x06_hypervisor.h"
+
+static inline u32 host1x_class_host_wait_syncpt(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx)
+		| host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+		| host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+	unsigned indx, unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx)
+		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+	unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+	unsigned cond, unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond)
+		| host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indbe_f(0xf)
+		| host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset);
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset)
+		| host1x_uclass_indoff_rwn_read_v();
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+/* cdma opcodes */
+static inline u32 host1x_opcode_setclass(
+	unsigned class_id, unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+		host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+	return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+	return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+static inline u32 host1x_opcode_gather_wide(unsigned count)
+{
+	return (12 << 28) | count;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x07.c b/drivers/gpu/host1x/hw/host1x07.c
new file mode 100644
index 00000000..0c6f14f7
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x07.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra194 SoCs
+ *
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x07.h"
+#include "host1x07_hardware.h"
+
+/* include code */
+#define HOST1X_HW 7
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x07_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x07.h b/drivers/gpu/host1x/hw/host1x07.h
new file mode 100644
index 00000000..419b6eaa
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x07.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra194 SoCs
+ *
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X07_H
+#define HOST1X_HOST1X07_H
+
+struct host1x;
+
+int host1x07_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h
new file mode 100644
index 00000000..e6582172
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x07_hardware.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra194
+ *
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X07_HARDWARE_H
+#define __HOST1X_HOST1X07_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x07_channel.h"
+#include "hw_host1x07_uclass.h"
+#include "hw_host1x07_vm.h"
+#include "hw_host1x07_hypervisor.h"
+
+static inline u32 host1x_class_host_wait_syncpt(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx)
+		| host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+		| host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+	unsigned indx, unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx)
+		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+	unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+	unsigned cond, unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond)
+		| host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indbe_f(0xf)
+		| host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset);
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset)
+		| host1x_uclass_indoff_rwn_read_v();
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+/* cdma opcodes */
+static inline u32 host1x_opcode_setclass(
+	unsigned class_id, unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+		host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+	return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+	return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+static inline u32 host1x_opcode_gather_wide(unsigned count)
+{
+	return (12 << 28) | count;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_channel.h b/drivers/gpu/host1x/hw/hw_host1x01_channel.h
new file mode 100644
index 00000000..8da43eab
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x01_channel.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2012-2013, NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef __hw_host1x_channel_host1x_h__
+#define __hw_host1x_channel_host1x_h__
+
+static inline u32 host1x_channel_fifostat_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_CHANNEL_FIFOSTAT \
+	host1x_channel_fifostat_r()
+static inline u32 host1x_channel_fifostat_cfempty_v(u32 r)
+{
+	return (r >> 10) & 0x1;
+}
+#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \
+	host1x_channel_fifostat_cfempty_v(r)
+static inline u32 host1x_channel_dmastart_r(void)
+{
+	return 0x14;
+}
+#define HOST1X_CHANNEL_DMASTART \
+	host1x_channel_dmastart_r()
+static inline u32 host1x_channel_dmaput_r(void)
+{
+	return 0x18;
+}
+#define HOST1X_CHANNEL_DMAPUT \
+	host1x_channel_dmaput_r()
+static inline u32 host1x_channel_dmaget_r(void)
+{
+	return 0x1c;
+}
+#define HOST1X_CHANNEL_DMAGET \
+	host1x_channel_dmaget_r()
+static inline u32 host1x_channel_dmaend_r(void)
+{
+	return 0x20;
+}
+#define HOST1X_CHANNEL_DMAEND \
+	host1x_channel_dmaend_r()
+static inline u32 host1x_channel_dmactrl_r(void)
+{
+	return 0x24;
+}
+#define HOST1X_CHANNEL_DMACTRL \
+	host1x_channel_dmactrl_r()
+static inline u32 host1x_channel_dmactrl_dmastop(void)
+{
+	return 1 << 0;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP \
+	host1x_channel_dmactrl_dmastop()
+static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \
+	host1x_channel_dmactrl_dmastop_v(r)
+static inline u32 host1x_channel_dmactrl_dmagetrst(void)
+{
+	return 1 << 1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \
+	host1x_channel_dmactrl_dmagetrst()
+static inline u32 host1x_channel_dmactrl_dmainitget(void)
+{
+	return 1 << 2;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
+	host1x_channel_dmactrl_dmainitget()
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_sync.h b/drivers/gpu/host1x/hw/hw_host1x01_sync.h
new file mode 100644
index 00000000..ec95e7ae
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x01_sync.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2012-2013, NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef __hw_host1x01_sync_h__
+#define __hw_host1x01_sync_h__
+
+#define REGISTER_STRIDE	4
+
+static inline u32 host1x_sync_syncpt_r(unsigned int id)
+{
+	return 0x400 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT(id) \
+	host1x_sync_syncpt_r(id)
+static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id)
+{
+	return 0x40 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \
+	host1x_sync_syncpt_thresh_cpu0_int_status_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id)
+{
+	return 0x60 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \
+	host1x_sync_syncpt_thresh_int_disable_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id)
+{
+	return 0x68 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \
+	host1x_sync_syncpt_thresh_int_enable_cpu0_r(id)
+static inline u32 host1x_sync_cf_setup_r(unsigned int channel)
+{
+	return 0x80 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CF_SETUP(channel) \
+	host1x_sync_cf_setup_r(channel)
+static inline u32 host1x_sync_cf_setup_base_v(u32 r)
+{
+	return (r >> 0) & 0x1ff;
+}
+#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \
+	host1x_sync_cf_setup_base_v(r)
+static inline u32 host1x_sync_cf_setup_limit_v(u32 r)
+{
+	return (r >> 16) & 0x1ff;
+}
+#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \
+	host1x_sync_cf_setup_limit_v(r)
+static inline u32 host1x_sync_cmdproc_stop_r(void)
+{
+	return 0xac;
+}
+#define HOST1X_SYNC_CMDPROC_STOP \
+	host1x_sync_cmdproc_stop_r()
+static inline u32 host1x_sync_ch_teardown_r(void)
+{
+	return 0xb0;
+}
+#define HOST1X_SYNC_CH_TEARDOWN \
+	host1x_sync_ch_teardown_r()
+static inline u32 host1x_sync_usec_clk_r(void)
+{
+	return 0x1a4;
+}
+#define HOST1X_SYNC_USEC_CLK \
+	host1x_sync_usec_clk_r()
+static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void)
+{
+	return 0x1a8;
+}
+#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \
+	host1x_sync_ctxsw_timeout_cfg_r()
+static inline u32 host1x_sync_ip_busy_timeout_r(void)
+{
+	return 0x1bc;
+}
+#define HOST1X_SYNC_IP_BUSY_TIMEOUT \
+	host1x_sync_ip_busy_timeout_r()
+static inline u32 host1x_sync_mlock_owner_r(unsigned int id)
+{
+	return 0x340 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_MLOCK_OWNER(id) \
+	host1x_sync_mlock_owner_r(id)
+static inline u32 host1x_sync_mlock_owner_chid_v(u32 v)
+{
+	return (v >> 8) & 0xf;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \
+	host1x_sync_mlock_owner_chid_v(v)
+static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \
+	host1x_sync_mlock_owner_cpu_owns_v(r)
+static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \
+	host1x_sync_mlock_owner_ch_owns_v(r)
+static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id)
+{
+	return 0x500 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \
+	host1x_sync_syncpt_int_thresh_r(id)
+static inline u32 host1x_sync_syncpt_base_r(unsigned int id)
+{
+	return 0x600 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_BASE(id) \
+	host1x_sync_syncpt_base_r(id)
+static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id)
+{
+	return 0x700 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \
+	host1x_sync_syncpt_cpu_incr_r(id)
+static inline u32 host1x_sync_cbread_r(unsigned int channel)
+{
+	return 0x720 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBREAD(channel) \
+	host1x_sync_cbread_r(channel)
+static inline u32 host1x_sync_cfpeek_ctrl_r(void)
+{
+	return 0x74c;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL \
+	host1x_sync_cfpeek_ctrl_r()
+static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v)
+{
+	return (v & 0x1ff) << 0;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \
+	host1x_sync_cfpeek_ctrl_addr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v)
+{
+	return (v & 0x7) << 16;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \
+	host1x_sync_cfpeek_ctrl_channr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \
+	host1x_sync_cfpeek_ctrl_ena_f(v)
+static inline u32 host1x_sync_cfpeek_read_r(void)
+{
+	return 0x750;
+}
+#define HOST1X_SYNC_CFPEEK_READ \
+	host1x_sync_cfpeek_read_r()
+static inline u32 host1x_sync_cfpeek_ptrs_r(void)
+{
+	return 0x754;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS \
+	host1x_sync_cfpeek_ptrs_r()
+static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r)
+{
+	return (r >> 0) & 0x1ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r)
+static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r)
+{
+	return (r >> 16) & 0x1ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r)
+static inline u32 host1x_sync_cbstat_r(unsigned int channel)
+{
+	return 0x758 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBSTAT(channel) \
+	host1x_sync_cbstat_r(channel)
+static inline u32 host1x_sync_cbstat_cboffset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \
+	host1x_sync_cbstat_cboffset_v(r)
+static inline u32 host1x_sync_cbstat_cbclass_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \
+	host1x_sync_cbstat_cbclass_v(r)
+
+#endif /* __hw_host1x01_sync_h__ */
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_uclass.h b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h
new file mode 100644
index 00000000..1239bfd4
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2012-2013, NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef __hw_host1x_uclass_host1x_h__
+#define __hw_host1x_uclass_host1x_h__
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x02_channel.h b/drivers/gpu/host1x/hw/hw_host1x02_channel.h
new file mode 100644
index 00000000..210d317a
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x02_channel.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X02_CHANNEL_H
+#define HOST1X_HW_HOST1X02_CHANNEL_H
+
+static inline u32 host1x_channel_fifostat_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_CHANNEL_FIFOSTAT \
+	host1x_channel_fifostat_r()
+static inline u32 host1x_channel_fifostat_cfempty_v(u32 r)
+{
+	return (r >> 11) & 0x1;
+}
+#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \
+	host1x_channel_fifostat_cfempty_v(r)
+static inline u32 host1x_channel_dmastart_r(void)
+{
+	return 0x14;
+}
+#define HOST1X_CHANNEL_DMASTART \
+	host1x_channel_dmastart_r()
+static inline u32 host1x_channel_dmaput_r(void)
+{
+	return 0x18;
+}
+#define HOST1X_CHANNEL_DMAPUT \
+	host1x_channel_dmaput_r()
+static inline u32 host1x_channel_dmaget_r(void)
+{
+	return 0x1c;
+}
+#define HOST1X_CHANNEL_DMAGET \
+	host1x_channel_dmaget_r()
+static inline u32 host1x_channel_dmaend_r(void)
+{
+	return 0x20;
+}
+#define HOST1X_CHANNEL_DMAEND \
+	host1x_channel_dmaend_r()
+static inline u32 host1x_channel_dmactrl_r(void)
+{
+	return 0x24;
+}
+#define HOST1X_CHANNEL_DMACTRL \
+	host1x_channel_dmactrl_r()
+static inline u32 host1x_channel_dmactrl_dmastop(void)
+{
+	return 1 << 0;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP \
+	host1x_channel_dmactrl_dmastop()
+static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \
+	host1x_channel_dmactrl_dmastop_v(r)
+static inline u32 host1x_channel_dmactrl_dmagetrst(void)
+{
+	return 1 << 1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \
+	host1x_channel_dmactrl_dmagetrst()
+static inline u32 host1x_channel_dmactrl_dmainitget(void)
+{
+	return 1 << 2;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
+	host1x_channel_dmactrl_dmainitget()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x02_sync.h b/drivers/gpu/host1x/hw/hw_host1x02_sync.h
new file mode 100644
index 00000000..44b4f837
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x02_sync.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X02_SYNC_H
+#define HOST1X_HW_HOST1X02_SYNC_H
+
+#define REGISTER_STRIDE	4
+
+static inline u32 host1x_sync_syncpt_r(unsigned int id)
+{
+	return 0x400 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT(id) \
+	host1x_sync_syncpt_r(id)
+static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id)
+{
+	return 0x40 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \
+	host1x_sync_syncpt_thresh_cpu0_int_status_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id)
+{
+	return 0x60 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \
+	host1x_sync_syncpt_thresh_int_disable_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id)
+{
+	return 0x68 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \
+	host1x_sync_syncpt_thresh_int_enable_cpu0_r(id)
+static inline u32 host1x_sync_cf_setup_r(unsigned int channel)
+{
+	return 0x80 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CF_SETUP(channel) \
+	host1x_sync_cf_setup_r(channel)
+static inline u32 host1x_sync_cf_setup_base_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \
+	host1x_sync_cf_setup_base_v(r)
+static inline u32 host1x_sync_cf_setup_limit_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \
+	host1x_sync_cf_setup_limit_v(r)
+static inline u32 host1x_sync_cmdproc_stop_r(void)
+{
+	return 0xac;
+}
+#define HOST1X_SYNC_CMDPROC_STOP \
+	host1x_sync_cmdproc_stop_r()
+static inline u32 host1x_sync_ch_teardown_r(void)
+{
+	return 0xb0;
+}
+#define HOST1X_SYNC_CH_TEARDOWN \
+	host1x_sync_ch_teardown_r()
+static inline u32 host1x_sync_usec_clk_r(void)
+{
+	return 0x1a4;
+}
+#define HOST1X_SYNC_USEC_CLK \
+	host1x_sync_usec_clk_r()
+static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void)
+{
+	return 0x1a8;
+}
+#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \
+	host1x_sync_ctxsw_timeout_cfg_r()
+static inline u32 host1x_sync_ip_busy_timeout_r(void)
+{
+	return 0x1bc;
+}
+#define HOST1X_SYNC_IP_BUSY_TIMEOUT \
+	host1x_sync_ip_busy_timeout_r()
+static inline u32 host1x_sync_mlock_owner_r(unsigned int id)
+{
+	return 0x340 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_MLOCK_OWNER(id) \
+	host1x_sync_mlock_owner_r(id)
+static inline u32 host1x_sync_mlock_owner_chid_v(u32 v)
+{
+	return (v >> 8) & 0xf;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \
+	host1x_sync_mlock_owner_chid_v(v)
+static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \
+	host1x_sync_mlock_owner_cpu_owns_v(r)
+static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \
+	host1x_sync_mlock_owner_ch_owns_v(r)
+static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id)
+{
+	return 0x500 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \
+	host1x_sync_syncpt_int_thresh_r(id)
+static inline u32 host1x_sync_syncpt_base_r(unsigned int id)
+{
+	return 0x600 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_BASE(id) \
+	host1x_sync_syncpt_base_r(id)
+static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id)
+{
+	return 0x700 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \
+	host1x_sync_syncpt_cpu_incr_r(id)
+static inline u32 host1x_sync_cbread_r(unsigned int channel)
+{
+	return 0x720 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBREAD(channel) \
+	host1x_sync_cbread_r(channel)
+static inline u32 host1x_sync_cfpeek_ctrl_r(void)
+{
+	return 0x74c;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL \
+	host1x_sync_cfpeek_ctrl_r()
+static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \
+	host1x_sync_cfpeek_ctrl_addr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \
+	host1x_sync_cfpeek_ctrl_channr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \
+	host1x_sync_cfpeek_ctrl_ena_f(v)
+static inline u32 host1x_sync_cfpeek_read_r(void)
+{
+	return 0x750;
+}
+#define HOST1X_SYNC_CFPEEK_READ \
+	host1x_sync_cfpeek_read_r()
+static inline u32 host1x_sync_cfpeek_ptrs_r(void)
+{
+	return 0x754;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS \
+	host1x_sync_cfpeek_ptrs_r()
+static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r)
+static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r)
+static inline u32 host1x_sync_cbstat_r(unsigned int channel)
+{
+	return 0x758 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBSTAT(channel) \
+	host1x_sync_cbstat_r(channel)
+static inline u32 host1x_sync_cbstat_cboffset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \
+	host1x_sync_cbstat_cboffset_v(r)
+static inline u32 host1x_sync_cbstat_cbclass_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \
+	host1x_sync_cbstat_cbclass_v(r)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
new file mode 100644
index 00000000..4fc51f70
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X02_UCLASS_H
+#define HOST1X_HW_HOST1X02_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
new file mode 100644
index 00000000..38d11064
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X04_CHANNEL_H
+#define HOST1X_HW_HOST1X04_CHANNEL_H
+
+static inline u32 host1x_channel_fifostat_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_CHANNEL_FIFOSTAT \
+	host1x_channel_fifostat_r()
+static inline u32 host1x_channel_fifostat_cfempty_v(u32 r)
+{
+	return (r >> 11) & 0x1;
+}
+#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \
+	host1x_channel_fifostat_cfempty_v(r)
+static inline u32 host1x_channel_dmastart_r(void)
+{
+	return 0x14;
+}
+#define HOST1X_CHANNEL_DMASTART \
+	host1x_channel_dmastart_r()
+static inline u32 host1x_channel_dmaput_r(void)
+{
+	return 0x18;
+}
+#define HOST1X_CHANNEL_DMAPUT \
+	host1x_channel_dmaput_r()
+static inline u32 host1x_channel_dmaget_r(void)
+{
+	return 0x1c;
+}
+#define HOST1X_CHANNEL_DMAGET \
+	host1x_channel_dmaget_r()
+static inline u32 host1x_channel_dmaend_r(void)
+{
+	return 0x20;
+}
+#define HOST1X_CHANNEL_DMAEND \
+	host1x_channel_dmaend_r()
+static inline u32 host1x_channel_dmactrl_r(void)
+{
+	return 0x24;
+}
+#define HOST1X_CHANNEL_DMACTRL \
+	host1x_channel_dmactrl_r()
+static inline u32 host1x_channel_dmactrl_dmastop(void)
+{
+	return 1 << 0;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP \
+	host1x_channel_dmactrl_dmastop()
+static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \
+	host1x_channel_dmactrl_dmastop_v(r)
+static inline u32 host1x_channel_dmactrl_dmagetrst(void)
+{
+	return 1 << 1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \
+	host1x_channel_dmactrl_dmagetrst()
+static inline u32 host1x_channel_dmactrl_dmainitget(void)
+{
+	return 1 << 2;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
+	host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+	return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+	host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+	host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_sync.h b/drivers/gpu/host1x/hw/hw_host1x04_sync.h
new file mode 100644
index 00000000..0be98562
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x04_sync.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X04_SYNC_H
+#define HOST1X_HW_HOST1X04_SYNC_H
+
+#define REGISTER_STRIDE	4
+
+static inline u32 host1x_sync_syncpt_r(unsigned int id)
+{
+	return 0xf80 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT(id) \
+	host1x_sync_syncpt_r(id)
+static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id)
+{
+	return 0xe80 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \
+	host1x_sync_syncpt_thresh_cpu0_int_status_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id)
+{
+	return 0xf00 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \
+	host1x_sync_syncpt_thresh_int_disable_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id)
+{
+	return 0xf20 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \
+	host1x_sync_syncpt_thresh_int_enable_cpu0_r(id)
+static inline u32 host1x_sync_cf_setup_r(unsigned int channel)
+{
+	return 0xc00 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CF_SETUP(channel) \
+	host1x_sync_cf_setup_r(channel)
+static inline u32 host1x_sync_cf_setup_base_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \
+	host1x_sync_cf_setup_base_v(r)
+static inline u32 host1x_sync_cf_setup_limit_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \
+	host1x_sync_cf_setup_limit_v(r)
+static inline u32 host1x_sync_cmdproc_stop_r(void)
+{
+	return 0xac;
+}
+#define HOST1X_SYNC_CMDPROC_STOP \
+	host1x_sync_cmdproc_stop_r()
+static inline u32 host1x_sync_ch_teardown_r(void)
+{
+	return 0xb0;
+}
+#define HOST1X_SYNC_CH_TEARDOWN \
+	host1x_sync_ch_teardown_r()
+static inline u32 host1x_sync_usec_clk_r(void)
+{
+	return 0x1a4;
+}
+#define HOST1X_SYNC_USEC_CLK \
+	host1x_sync_usec_clk_r()
+static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void)
+{
+	return 0x1a8;
+}
+#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \
+	host1x_sync_ctxsw_timeout_cfg_r()
+static inline u32 host1x_sync_ip_busy_timeout_r(void)
+{
+	return 0x1bc;
+}
+#define HOST1X_SYNC_IP_BUSY_TIMEOUT \
+	host1x_sync_ip_busy_timeout_r()
+static inline u32 host1x_sync_mlock_owner_r(unsigned int id)
+{
+	return 0x340 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_MLOCK_OWNER(id) \
+	host1x_sync_mlock_owner_r(id)
+static inline u32 host1x_sync_mlock_owner_chid_v(u32 v)
+{
+	return (v >> 8) & 0xf;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \
+	host1x_sync_mlock_owner_chid_v(v)
+static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \
+	host1x_sync_mlock_owner_cpu_owns_v(r)
+static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \
+	host1x_sync_mlock_owner_ch_owns_v(r)
+static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id)
+{
+	return 0x1380 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \
+	host1x_sync_syncpt_int_thresh_r(id)
+static inline u32 host1x_sync_syncpt_base_r(unsigned int id)
+{
+	return 0x600 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_BASE(id) \
+	host1x_sync_syncpt_base_r(id)
+static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id)
+{
+	return 0xf60 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \
+	host1x_sync_syncpt_cpu_incr_r(id)
+static inline u32 host1x_sync_cbread_r(unsigned int channel)
+{
+	return 0xc80 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBREAD(channel) \
+	host1x_sync_cbread_r(channel)
+static inline u32 host1x_sync_cfpeek_ctrl_r(void)
+{
+	return 0x74c;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL \
+	host1x_sync_cfpeek_ctrl_r()
+static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \
+	host1x_sync_cfpeek_ctrl_addr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \
+	host1x_sync_cfpeek_ctrl_channr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \
+	host1x_sync_cfpeek_ctrl_ena_f(v)
+static inline u32 host1x_sync_cfpeek_read_r(void)
+{
+	return 0x750;
+}
+#define HOST1X_SYNC_CFPEEK_READ \
+	host1x_sync_cfpeek_read_r()
+static inline u32 host1x_sync_cfpeek_ptrs_r(void)
+{
+	return 0x754;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS \
+	host1x_sync_cfpeek_ptrs_r()
+static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r)
+static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r)
+static inline u32 host1x_sync_cbstat_r(unsigned int channel)
+{
+	return 0xcc0 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBSTAT(channel) \
+	host1x_sync_cbstat_r(channel)
+static inline u32 host1x_sync_cbstat_cboffset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \
+	host1x_sync_cbstat_cboffset_v(r)
+static inline u32 host1x_sync_cbstat_cbclass_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \
+	host1x_sync_cbstat_cbclass_v(r)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
new file mode 100644
index 00000000..9e84a4ad
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X04_UCLASS_H
+#define HOST1X_HW_HOST1X04_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
new file mode 100644
index 00000000..7e628ef5
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X05_CHANNEL_H
+#define HOST1X_HW_HOST1X05_CHANNEL_H
+
+static inline u32 host1x_channel_fifostat_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_CHANNEL_FIFOSTAT \
+	host1x_channel_fifostat_r()
+static inline u32 host1x_channel_fifostat_cfempty_v(u32 r)
+{
+	return (r >> 11) & 0x1;
+}
+#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \
+	host1x_channel_fifostat_cfempty_v(r)
+static inline u32 host1x_channel_dmastart_r(void)
+{
+	return 0x14;
+}
+#define HOST1X_CHANNEL_DMASTART \
+	host1x_channel_dmastart_r()
+static inline u32 host1x_channel_dmaput_r(void)
+{
+	return 0x18;
+}
+#define HOST1X_CHANNEL_DMAPUT \
+	host1x_channel_dmaput_r()
+static inline u32 host1x_channel_dmaget_r(void)
+{
+	return 0x1c;
+}
+#define HOST1X_CHANNEL_DMAGET \
+	host1x_channel_dmaget_r()
+static inline u32 host1x_channel_dmaend_r(void)
+{
+	return 0x20;
+}
+#define HOST1X_CHANNEL_DMAEND \
+	host1x_channel_dmaend_r()
+static inline u32 host1x_channel_dmactrl_r(void)
+{
+	return 0x24;
+}
+#define HOST1X_CHANNEL_DMACTRL \
+	host1x_channel_dmactrl_r()
+static inline u32 host1x_channel_dmactrl_dmastop(void)
+{
+	return 1 << 0;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP \
+	host1x_channel_dmactrl_dmastop()
+static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \
+	host1x_channel_dmactrl_dmastop_v(r)
+static inline u32 host1x_channel_dmactrl_dmagetrst(void)
+{
+	return 1 << 1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \
+	host1x_channel_dmactrl_dmagetrst()
+static inline u32 host1x_channel_dmactrl_dmainitget(void)
+{
+	return 1 << 2;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
+	host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+	return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+	host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+	host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_sync.h b/drivers/gpu/host1x/hw/hw_host1x05_sync.h
new file mode 100644
index 00000000..1a85c793
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x05_sync.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X05_SYNC_H
+#define HOST1X_HW_HOST1X05_SYNC_H
+
+#define REGISTER_STRIDE	4
+
+static inline u32 host1x_sync_syncpt_r(unsigned int id)
+{
+	return 0xf80 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT(id) \
+	host1x_sync_syncpt_r(id)
+static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id)
+{
+	return 0xe80 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \
+	host1x_sync_syncpt_thresh_cpu0_int_status_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id)
+{
+	return 0xf00 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \
+	host1x_sync_syncpt_thresh_int_disable_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id)
+{
+	return 0xf20 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \
+	host1x_sync_syncpt_thresh_int_enable_cpu0_r(id)
+static inline u32 host1x_sync_cf_setup_r(unsigned int channel)
+{
+	return 0xc00 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CF_SETUP(channel) \
+	host1x_sync_cf_setup_r(channel)
+static inline u32 host1x_sync_cf_setup_base_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \
+	host1x_sync_cf_setup_base_v(r)
+static inline u32 host1x_sync_cf_setup_limit_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \
+	host1x_sync_cf_setup_limit_v(r)
+static inline u32 host1x_sync_cmdproc_stop_r(void)
+{
+	return 0xac;
+}
+#define HOST1X_SYNC_CMDPROC_STOP \
+	host1x_sync_cmdproc_stop_r()
+static inline u32 host1x_sync_ch_teardown_r(void)
+{
+	return 0xb0;
+}
+#define HOST1X_SYNC_CH_TEARDOWN \
+	host1x_sync_ch_teardown_r()
+static inline u32 host1x_sync_usec_clk_r(void)
+{
+	return 0x1a4;
+}
+#define HOST1X_SYNC_USEC_CLK \
+	host1x_sync_usec_clk_r()
+static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void)
+{
+	return 0x1a8;
+}
+#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \
+	host1x_sync_ctxsw_timeout_cfg_r()
+static inline u32 host1x_sync_ip_busy_timeout_r(void)
+{
+	return 0x1bc;
+}
+#define HOST1X_SYNC_IP_BUSY_TIMEOUT \
+	host1x_sync_ip_busy_timeout_r()
+static inline u32 host1x_sync_mlock_owner_r(unsigned int id)
+{
+	return 0x340 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_MLOCK_OWNER(id) \
+	host1x_sync_mlock_owner_r(id)
+static inline u32 host1x_sync_mlock_owner_chid_v(u32 r)
+{
+	return (r >> 8) & 0xf;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \
+	host1x_sync_mlock_owner_chid_v(v)
+static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \
+	host1x_sync_mlock_owner_cpu_owns_v(r)
+static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \
+	host1x_sync_mlock_owner_ch_owns_v(r)
+static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id)
+{
+	return 0x1380 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \
+	host1x_sync_syncpt_int_thresh_r(id)
+static inline u32 host1x_sync_syncpt_base_r(unsigned int id)
+{
+	return 0x600 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_BASE(id) \
+	host1x_sync_syncpt_base_r(id)
+static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id)
+{
+	return 0xf60 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \
+	host1x_sync_syncpt_cpu_incr_r(id)
+static inline u32 host1x_sync_cbread_r(unsigned int channel)
+{
+	return 0xc80 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBREAD(channel) \
+	host1x_sync_cbread_r(channel)
+static inline u32 host1x_sync_cfpeek_ctrl_r(void)
+{
+	return 0x74c;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL \
+	host1x_sync_cfpeek_ctrl_r()
+static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \
+	host1x_sync_cfpeek_ctrl_addr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \
+	host1x_sync_cfpeek_ctrl_channr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \
+	host1x_sync_cfpeek_ctrl_ena_f(v)
+static inline u32 host1x_sync_cfpeek_read_r(void)
+{
+	return 0x750;
+}
+#define HOST1X_SYNC_CFPEEK_READ \
+	host1x_sync_cfpeek_read_r()
+static inline u32 host1x_sync_cfpeek_ptrs_r(void)
+{
+	return 0x754;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS \
+	host1x_sync_cfpeek_ptrs_r()
+static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r)
+static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r)
+static inline u32 host1x_sync_cbstat_r(unsigned int channel)
+{
+	return 0xcc0 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBSTAT(channel) \
+	host1x_sync_cbstat_r(channel)
+static inline u32 host1x_sync_cbstat_cboffset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \
+	host1x_sync_cbstat_cboffset_v(r)
+static inline u32 host1x_sync_cbstat_cbclass_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \
+	host1x_sync_cbstat_cbclass_v(r)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
new file mode 100644
index 00000000..aee5a4e3
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X05_UCLASS_H
+#define HOST1X_HW_HOST1X05_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_channel.h b/drivers/gpu/host1x/hw/hw_host1x06_channel.h
new file mode 100644
index 00000000..18ae1c57
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x06_channel.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HW_HOST1X06_CHANNEL_H
+#define HOST1X_HW_HOST1X06_CHANNEL_H
+
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x084
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h
new file mode 100644
index 00000000..a7fc9ec4
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017 NVIDIA Corporation.
+ */
+
+#define HOST1X_HV_SYNCPT_PROT_EN			0x1ac4
+#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN			BIT(1)
+#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x)		(0x2020 + (x * 4))
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL			0x233c
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(x)		(x)
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(x)		((x) << 16)
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE		BIT(31)
+#define HOST1X_HV_CMDFIFO_PEEK_READ			0x2340
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS			0x2344
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(x)		(((x) >> 16) & 0xfff)
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(x)		((x) & 0xfff)
+#define HOST1X_HV_CMDFIFO_SETUP(x)			(0x2588 + (x * 4))
+#define HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(x)		(((x) >> 16) & 0xfff)
+#define HOST1X_HV_CMDFIFO_SETUP_BASE_V(x)		((x) & 0xfff)
+#define HOST1X_HV_ICG_EN_OVERRIDE			0x2aa8
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
new file mode 100644
index 00000000..c4bacdb7
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X06_UCLASS_H
+#define HOST1X_HW_HOST1X06_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 10;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_vm.h b/drivers/gpu/host1x/hw/hw_host1x06_vm.h
new file mode 100644
index 00000000..818564a7
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x06_vm.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017 NVIDIA Corporation.
+ */
+
+#define HOST1X_CHANNEL_DMASTART				0x0000
+#define HOST1X_CHANNEL_DMASTART_HI			0x0004
+#define HOST1X_CHANNEL_DMAPUT				0x0008
+#define HOST1X_CHANNEL_DMAPUT_HI			0x000c
+#define HOST1X_CHANNEL_DMAGET				0x0010
+#define HOST1X_CHANNEL_DMAGET_HI			0x0014
+#define HOST1X_CHANNEL_DMAEND				0x0018
+#define HOST1X_CHANNEL_DMAEND_HI			0x001c
+#define HOST1X_CHANNEL_DMACTRL				0x0020
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP			BIT(0)
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST		BIT(1)
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET		BIT(2)
+#define HOST1X_CHANNEL_CMDFIFO_STAT			0x0024
+#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY		BIT(13)
+#define HOST1X_CHANNEL_CMDFIFO_RDATA			0x0028
+#define HOST1X_CHANNEL_CMDP_OFFSET			0x0030
+#define HOST1X_CHANNEL_CMDP_CLASS			0x0034
+#define HOST1X_CHANNEL_CHANNELSTAT			0x0038
+#define HOST1X_CHANNEL_CMDPROC_STOP			0x0048
+#define HOST1X_CHANNEL_TEARDOWN				0x004c
+
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(x)			(0x6400 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x)	(0x6464 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x)	(0x652c + 4*(x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x)	(0x6590 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_BASE(x)			(0x8000 + 4*(x))
+#define HOST1X_SYNC_SYNCPT(x)				(0x8080 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(x)		(0x8a00 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_CH_APP(x)			(0x9384 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v)			(((v) & 0x3f) << 8)
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_channel.h b/drivers/gpu/host1x/hw/hw_host1x07_channel.h
new file mode 100644
index 00000000..96fa72bb
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x07_channel.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HW_HOST1X07_CHANNEL_H
+#define HOST1X_HW_HOST1X07_CHANNEL_H
+
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x084
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h
new file mode 100644
index 00000000..52141d53
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#define HOST1X_HV_SYNCPT_PROT_EN			0x1ac4
+#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN			BIT(1)
+#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x)		(0x2020 + (x * 4))
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL			0x233c
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(x)		(x)
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(x)		((x) << 16)
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE		BIT(31)
+#define HOST1X_HV_CMDFIFO_PEEK_READ			0x2340
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS			0x2344
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(x)		(((x) >> 16) & 0xfff)
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(x)		((x) & 0xfff)
+#define HOST1X_HV_CMDFIFO_SETUP(x)			(0x2588 + (x * 4))
+#define HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(x)		(((x) >> 16) & 0xfff)
+#define HOST1X_HV_CMDFIFO_SETUP_BASE_V(x)		((x) & 0xfff)
+#define HOST1X_HV_ICG_EN_OVERRIDE			0x2aa8
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
new file mode 100644
index 00000000..c74070f3
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X07_UCLASS_H
+#define HOST1X_HW_HOST1X07_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 10;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_vm.h b/drivers/gpu/host1x/hw/hw_host1x07_vm.h
new file mode 100644
index 00000000..3058b3c9
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x07_vm.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#define HOST1X_CHANNEL_DMASTART				0x0000
+#define HOST1X_CHANNEL_DMASTART_HI			0x0004
+#define HOST1X_CHANNEL_DMAPUT				0x0008
+#define HOST1X_CHANNEL_DMAPUT_HI			0x000c
+#define HOST1X_CHANNEL_DMAGET				0x0010
+#define HOST1X_CHANNEL_DMAGET_HI			0x0014
+#define HOST1X_CHANNEL_DMAEND				0x0018
+#define HOST1X_CHANNEL_DMAEND_HI			0x001c
+#define HOST1X_CHANNEL_DMACTRL				0x0020
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP			BIT(0)
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST		BIT(1)
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET		BIT(2)
+#define HOST1X_CHANNEL_CMDFIFO_STAT			0x0024
+#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY		BIT(13)
+#define HOST1X_CHANNEL_CMDFIFO_RDATA			0x0028
+#define HOST1X_CHANNEL_CMDP_OFFSET			0x0030
+#define HOST1X_CHANNEL_CMDP_CLASS			0x0034
+#define HOST1X_CHANNEL_CHANNELSTAT			0x0038
+#define HOST1X_CHANNEL_CMDPROC_STOP			0x0048
+#define HOST1X_CHANNEL_TEARDOWN				0x004c
+
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(x)			(0x6400 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x)	(0x6464 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x)	(0x652c + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x)	(0x6590 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT(x)				(0x8080 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(x)		(0x8d00 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP(x)			(0xa604 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v)			(((v) & 0x3f) << 8)
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
new file mode 100644
index 00000000..f56375ee
--- /dev/null
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Tegra host1x Interrupt Management
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+
+#include "../intr.h"
+#include "../dev.h"
+
+/*
+ * Sync point threshold interrupt service function
+ * Handles sync point threshold triggers, in interrupt context
+ */
+static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt)
+{
+	unsigned int id = syncpt->id;
+	struct host1x *host = syncpt->host;
+
+	host1x_sync_writel(host, BIT(id % 32),
+		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id / 32));
+	host1x_sync_writel(host, BIT(id % 32),
+		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32));
+
+	schedule_work(&syncpt->intr.work);
+}
+
+static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
+{
+	struct host1x *host = dev_id;
+	unsigned long reg;
+	unsigned int i, id;
+
+	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) {
+		reg = host1x_sync_readl(host,
+			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+		for_each_set_bit(id, &reg, 32) {
+			struct host1x_syncpt *syncpt =
+				host->syncpt + (i * 32 + id);
+			host1x_intr_syncpt_handle(syncpt);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
+{
+	unsigned int i;
+
+	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); ++i) {
+		host1x_sync_writel(host, 0xffffffffu,
+			HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i));
+		host1x_sync_writel(host, 0xffffffffu,
+			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+	}
+}
+
+static void intr_hw_init(struct host1x *host, u32 cpm)
+{
+#if HOST1X_HW < 6
+	/* disable the ip_busy_timeout. this prevents write drops */
+	host1x_sync_writel(host, 0, HOST1X_SYNC_IP_BUSY_TIMEOUT);
+
+	/*
+	 * increase the auto-ack timout to the maximum value. 2d will hang
+	 * otherwise on Tegra2.
+	 */
+	host1x_sync_writel(host, 0xff, HOST1X_SYNC_CTXSW_TIMEOUT_CFG);
+
+	/* update host clocks per usec */
+	host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK);
+#endif
+}
+
+static int
+_host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
+			    void (*syncpt_thresh_work)(struct work_struct *))
+{
+	unsigned int i;
+	int err;
+
+	host1x_hw_intr_disable_all_syncpt_intrs(host);
+
+	for (i = 0; i < host->info->nb_pts; i++)
+		INIT_WORK(&host->syncpt[i].intr.work, syncpt_thresh_work);
+
+	err = devm_request_irq(host->dev, host->intr_syncpt_irq,
+			       syncpt_thresh_isr, IRQF_SHARED,
+			       "host1x_syncpt", host);
+	if (err < 0) {
+		WARN_ON(1);
+		return err;
+	}
+
+	intr_hw_init(host, cpm);
+
+	return 0;
+}
+
+static void _host1x_intr_set_syncpt_threshold(struct host1x *host,
+					      unsigned int id,
+					      u32 thresh)
+{
+	host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id));
+}
+
+static void _host1x_intr_enable_syncpt_intr(struct host1x *host,
+					    unsigned int id)
+{
+	host1x_sync_writel(host, BIT(id % 32),
+		HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id / 32));
+}
+
+static void _host1x_intr_disable_syncpt_intr(struct host1x *host,
+					     unsigned int id)
+{
+	host1x_sync_writel(host, BIT(id % 32),
+		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id / 32));
+	host1x_sync_writel(host, BIT(id % 32),
+		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32));
+}
+
+static int _host1x_free_syncpt_irq(struct host1x *host)
+{
+	unsigned int i;
+
+	devm_free_irq(host->dev, host->intr_syncpt_irq, host);
+
+	for (i = 0; i < host->info->nb_pts; i++)
+		cancel_work_sync(&host->syncpt[i].intr.work);
+
+	return 0;
+}
+
+static const struct host1x_intr_ops host1x_intr_ops = {
+	.init_host_sync = _host1x_intr_init_host_sync,
+	.set_syncpt_threshold = _host1x_intr_set_syncpt_threshold,
+	.enable_syncpt_intr = _host1x_intr_enable_syncpt_intr,
+	.disable_syncpt_intr = _host1x_intr_disable_syncpt_intr,
+	.disable_all_syncpt_intrs = _host1x_intr_disable_all_syncpt_intrs,
+	.free_syncpt_irq = _host1x_free_syncpt_irq,
+};
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
new file mode 100644
index 00000000..dd39d67c
--- /dev/null
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Tegra host1x Syncpoints
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ */
+
+#include <linux/io.h>
+
+#include "../dev.h"
+#include "../syncpt.h"
+
+/*
+ * Write the current syncpoint value back to hw.
+ */
+static void syncpt_restore(struct host1x_syncpt *sp)
+{
+	u32 min = host1x_syncpt_read_min(sp);
+	struct host1x *host = sp->host;
+
+	host1x_sync_writel(host, min, HOST1X_SYNC_SYNCPT(sp->id));
+}
+
+/*
+ * Write the current waitbase value back to hw.
+ */
+static void syncpt_restore_wait_base(struct host1x_syncpt *sp)
+{
+#if HOST1X_HW < 7
+	struct host1x *host = sp->host;
+
+	host1x_sync_writel(host, sp->base_val,
+			   HOST1X_SYNC_SYNCPT_BASE(sp->id));
+#endif
+}
+
+/*
+ * Read waitbase value from hw.
+ */
+static void syncpt_read_wait_base(struct host1x_syncpt *sp)
+{
+#if HOST1X_HW < 7
+	struct host1x *host = sp->host;
+
+	sp->base_val =
+		host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(sp->id));
+#endif
+}
+
+/*
+ * Updates the last value read from hardware.
+ */
+static u32 syncpt_load(struct host1x_syncpt *sp)
+{
+	struct host1x *host = sp->host;
+	u32 old, live;
+
+	/* Loop in case there's a race writing to min_val */
+	do {
+		old = host1x_syncpt_read_min(sp);
+		live = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT(sp->id));
+	} while ((u32)atomic_cmpxchg(&sp->min_val, old, live) != old);
+
+	if (!host1x_syncpt_check_max(sp, live))
+		dev_err(host->dev, "%s failed: id=%u, min=%d, max=%d\n",
+			__func__, sp->id, host1x_syncpt_read_min(sp),
+			host1x_syncpt_read_max(sp));
+
+	return live;
+}
+
+/*
+ * Write a cpu syncpoint increment to the hardware, without touching
+ * the cache.
+ */
+static int syncpt_cpu_incr(struct host1x_syncpt *sp)
+{
+	struct host1x *host = sp->host;
+	u32 reg_offset = sp->id / 32;
+
+	if (!host1x_syncpt_client_managed(sp) &&
+	    host1x_syncpt_idle(sp))
+		return -EINVAL;
+
+	host1x_sync_writel(host, BIT(sp->id % 32),
+			   HOST1X_SYNC_SYNCPT_CPU_INCR(reg_offset));
+	wmb();
+
+	return 0;
+}
+
+/**
+ * syncpt_assign_to_channel() - Assign syncpoint to channel
+ * @sp: syncpoint
+ * @ch: channel
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), assign @sp to
+ * @ch, preventing other channels from incrementing the syncpoints. If @ch is
+ * NULL, unassigns the syncpoint.
+ *
+ * On older chips, do nothing.
+ */
+static void syncpt_assign_to_channel(struct host1x_syncpt *sp,
+				  struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+	struct host1x *host = sp->host;
+
+	if (!host->hv_regs)
+		return;
+
+	host1x_sync_writel(host,
+			   HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
+			   HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
+#endif
+}
+
+/**
+ * syncpt_enable_protection() - Enable syncpoint protection
+ * @host: host1x instance
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), enable this
+ * feature. On older chips, do nothing.
+ */
+static void syncpt_enable_protection(struct host1x *host)
+{
+#if HOST1X_HW >= 6
+	if (!host->hv_regs)
+		return;
+
+	host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN,
+				 HOST1X_HV_SYNCPT_PROT_EN);
+#endif
+}
+
+static const struct host1x_syncpt_ops host1x_syncpt_ops = {
+	.restore = syncpt_restore,
+	.restore_wait_base = syncpt_restore_wait_base,
+	.load_wait_base = syncpt_read_wait_base,
+	.load = syncpt_load,
+	.cpu_incr = syncpt_cpu_incr,
+	.assign_to_channel = syncpt_assign_to_channel,
+	.enable_protection = syncpt_enable_protection,
+};
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
new file mode 100644
index 00000000..19b59c5c
--- /dev/null
+++ b/drivers/gpu/host1x/intr.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Tegra host1x Interrupt Management
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ */
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+
+#include <trace/events/host1x.h>
+#include "channel.h"
+#include "dev.h"
+#include "fence.h"
+#include "intr.h"
+
+/* Wait list management */
+
+enum waitlist_state {
+	WLS_PENDING,
+	WLS_REMOVED,
+	WLS_CANCELLED,
+	WLS_HANDLED
+};
+
+static void waiter_release(struct kref *kref)
+{
+	kfree(container_of(kref, struct host1x_waitlist, refcount));
+}
+
+/*
+ * add a waiter to a waiter queue, sorted by threshold
+ * returns true if it was added at the head of the queue
+ */
+static bool add_waiter_to_queue(struct host1x_waitlist *waiter,
+				struct list_head *queue)
+{
+	struct host1x_waitlist *pos;
+	u32 thresh = waiter->thresh;
+
+	list_for_each_entry_reverse(pos, queue, list)
+		if ((s32)(pos->thresh - thresh) <= 0) {
+			list_add(&waiter->list, &pos->list);
+			return false;
+		}
+
+	list_add(&waiter->list, queue);
+	return true;
+}
+
+/*
+ * run through a waiter queue for a single sync point ID
+ * and gather all completed waiters into lists by actions
+ */
+static void remove_completed_waiters(struct list_head *head, u32 sync,
+			struct list_head completed[HOST1X_INTR_ACTION_COUNT])
+{
+	struct list_head *dest;
+	struct host1x_waitlist *waiter, *next, *prev;
+
+	list_for_each_entry_safe(waiter, next, head, list) {
+		if ((s32)(waiter->thresh - sync) > 0)
+			break;
+
+		dest = completed + waiter->action;
+
+		/* consolidate submit cleanups */
+		if (waiter->action == HOST1X_INTR_ACTION_SUBMIT_COMPLETE &&
+		    !list_empty(dest)) {
+			prev = list_entry(dest->prev,
+					  struct host1x_waitlist, list);
+			if (prev->data == waiter->data) {
+				prev->count++;
+				dest = NULL;
+			}
+		}
+
+		/* PENDING->REMOVED or CANCELLED->HANDLED */
+		if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) {
+			list_del(&waiter->list);
+			kref_put(&waiter->refcount, waiter_release);
+		} else
+			list_move_tail(&waiter->list, dest);
+	}
+}
+
+static void reset_threshold_interrupt(struct host1x *host,
+				      struct list_head *head,
+				      unsigned int id)
+{
+	u32 thresh =
+		list_first_entry(head, struct host1x_waitlist, list)->thresh;
+
+	host1x_hw_intr_set_syncpt_threshold(host, id, thresh);
+	host1x_hw_intr_enable_syncpt_intr(host, id);
+}
+
+static void action_submit_complete(struct host1x_waitlist *waiter)
+{
+	struct host1x_channel *channel = waiter->data;
+
+	host1x_cdma_update(&channel->cdma);
+
+	/*  Add nr_completed to trace */
+	trace_host1x_channel_submit_complete(dev_name(channel->dev),
+					     waiter->count, waiter->thresh);
+}
+
+static void action_wakeup(struct host1x_waitlist *waiter)
+{
+	wait_queue_head_t *wq = waiter->data;
+
+	wake_up(wq);
+}
+
+static void action_wakeup_interruptible(struct host1x_waitlist *waiter)
+{
+	wait_queue_head_t *wq = waiter->data;
+
+	wake_up_interruptible(wq);
+}
+
+static void action_signal_fence(struct host1x_waitlist *waiter)
+{
+	struct host1x_syncpt_fence *f = waiter->data;
+
+	host1x_fence_signal(f);
+}
+
+typedef void (*action_handler)(struct host1x_waitlist *waiter);
+
+static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
+	action_submit_complete,
+	action_wakeup,
+	action_wakeup_interruptible,
+	action_signal_fence,
+};
+
+static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT])
+{
+	struct list_head *head = completed;
+	unsigned int i;
+
+	for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) {
+		action_handler handler = action_handlers[i];
+		struct host1x_waitlist *waiter, *next;
+
+		list_for_each_entry_safe(waiter, next, head, list) {
+			list_del(&waiter->list);
+			handler(waiter);
+			WARN_ON(atomic_xchg(&waiter->state, WLS_HANDLED) !=
+				WLS_REMOVED);
+			kref_put(&waiter->refcount, waiter_release);
+		}
+	}
+}
+
+/*
+ * Remove & handle all waiters that have completed for the given syncpt
+ */
+static int process_wait_list(struct host1x *host,
+			     struct host1x_syncpt *syncpt,
+			     u32 threshold)
+{
+	struct list_head completed[HOST1X_INTR_ACTION_COUNT];
+	unsigned int i;
+	int empty;
+
+	for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i)
+		INIT_LIST_HEAD(completed + i);
+
+	spin_lock(&syncpt->intr.lock);
+
+	remove_completed_waiters(&syncpt->intr.wait_head, threshold,
+				 completed);
+
+	empty = list_empty(&syncpt->intr.wait_head);
+	if (empty)
+		host1x_hw_intr_disable_syncpt_intr(host, syncpt->id);
+	else
+		reset_threshold_interrupt(host, &syncpt->intr.wait_head,
+					  syncpt->id);
+
+	spin_unlock(&syncpt->intr.lock);
+
+	run_handlers(completed);
+
+	return empty;
+}
+
+/*
+ * Sync point threshold interrupt service thread function
+ * Handles sync point threshold triggers, in thread context
+ */
+
+static void syncpt_thresh_work(struct work_struct *work)
+{
+	struct host1x_syncpt_intr *syncpt_intr =
+		container_of(work, struct host1x_syncpt_intr, work);
+	struct host1x_syncpt *syncpt =
+		container_of(syncpt_intr, struct host1x_syncpt, intr);
+	unsigned int id = syncpt->id;
+	struct host1x *host = syncpt->host;
+
+	(void)process_wait_list(host, syncpt,
+				host1x_syncpt_load(host->syncpt + id));
+}
+
+int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
+			   u32 thresh, enum host1x_intr_action action,
+			   void *data, struct host1x_waitlist *waiter,
+			   void **ref)
+{
+	int queue_was_empty;
+
+	if (waiter == NULL) {
+		pr_warn("%s: NULL waiter\n", __func__);
+		return -EINVAL;
+	}
+
+	/* initialize a new waiter */
+	INIT_LIST_HEAD(&waiter->list);
+	kref_init(&waiter->refcount);
+	if (ref)
+		kref_get(&waiter->refcount);
+	waiter->thresh = thresh;
+	waiter->action = action;
+	atomic_set(&waiter->state, WLS_PENDING);
+	waiter->data = data;
+	waiter->count = 1;
+
+	spin_lock(&syncpt->intr.lock);
+
+	queue_was_empty = list_empty(&syncpt->intr.wait_head);
+
+	if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) {
+		/* added at head of list - new threshold value */
+		host1x_hw_intr_set_syncpt_threshold(host, syncpt->id, thresh);
+
+		/* added as first waiter - enable interrupt */
+		if (queue_was_empty)
+			host1x_hw_intr_enable_syncpt_intr(host, syncpt->id);
+	}
+
+	spin_unlock(&syncpt->intr.lock);
+
+	if (ref)
+		*ref = waiter;
+	return 0;
+}
+
+void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref)
+{
+	struct host1x_waitlist *waiter = ref;
+	struct host1x_syncpt *syncpt;
+
+	atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED);
+
+	syncpt = host->syncpt + id;
+
+	spin_lock(&syncpt->intr.lock);
+	if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) ==
+	    WLS_CANCELLED) {
+		list_del(&waiter->list);
+		kref_put(&waiter->refcount, waiter_release);
+	}
+	spin_unlock(&syncpt->intr.lock);
+
+	kref_put(&waiter->refcount, waiter_release);
+}
+
+int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
+{
+	unsigned int id;
+	u32 nb_pts = host1x_syncpt_nb_pts(host);
+
+	mutex_init(&host->intr_mutex);
+	host->intr_syncpt_irq = irq_sync;
+
+	for (id = 0; id < nb_pts; ++id) {
+		struct host1x_syncpt *syncpt = host->syncpt + id;
+
+		spin_lock_init(&syncpt->intr.lock);
+		INIT_LIST_HEAD(&syncpt->intr.wait_head);
+		snprintf(syncpt->intr.thresh_irq_name,
+			 sizeof(syncpt->intr.thresh_irq_name),
+			 "host1x_sp_%02u", id);
+	}
+
+	host1x_intr_start(host);
+
+	return 0;
+}
+
+void host1x_intr_deinit(struct host1x *host)
+{
+	host1x_intr_stop(host);
+}
+
+void host1x_intr_start(struct host1x *host)
+{
+	u32 hz = clk_get_rate(host->clk);
+	int err;
+
+	mutex_lock(&host->intr_mutex);
+	err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000),
+					    syncpt_thresh_work);
+	if (err) {
+		mutex_unlock(&host->intr_mutex);
+		return;
+	}
+	mutex_unlock(&host->intr_mutex);
+}
+
+void host1x_intr_stop(struct host1x *host)
+{
+	unsigned int id;
+	struct host1x_syncpt *syncpt = host->syncpt;
+	u32 nb_pts = host1x_syncpt_nb_pts(host);
+
+	mutex_lock(&host->intr_mutex);
+
+	host1x_hw_intr_disable_all_syncpt_intrs(host);
+
+	for (id = 0; id < nb_pts; ++id) {
+		struct host1x_waitlist *waiter, *next;
+
+		list_for_each_entry_safe(waiter, next,
+			&syncpt[id].intr.wait_head, list) {
+			if (atomic_cmpxchg(&waiter->state,
+			    WLS_CANCELLED, WLS_HANDLED) == WLS_CANCELLED) {
+				list_del(&waiter->list);
+				kref_put(&waiter->refcount, waiter_release);
+			}
+		}
+
+		if (!list_empty(&syncpt[id].intr.wait_head)) {
+			/* output diagnostics */
+			mutex_unlock(&host->intr_mutex);
+			pr_warn("%s cannot stop syncpt intr id=%u\n",
+				__func__, id);
+			return;
+		}
+	}
+
+	host1x_hw_intr_free_syncpt_irq(host);
+
+	mutex_unlock(&host->intr_mutex);
+}
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
new file mode 100644
index 00000000..dedbd0f7
--- /dev/null
+++ b/drivers/gpu/host1x/intr.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Interrupt Management
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_INTR_H
+#define __HOST1X_INTR_H
+
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+
+struct host1x_syncpt;
+struct host1x;
+
+enum host1x_intr_action {
+	/*
+	 * Perform cleanup after a submit has completed.
+	 * 'data' points to a channel
+	 */
+	HOST1X_INTR_ACTION_SUBMIT_COMPLETE = 0,
+
+	/*
+	 * Wake up a  task.
+	 * 'data' points to a wait_queue_head_t
+	 */
+	HOST1X_INTR_ACTION_WAKEUP,
+
+	/*
+	 * Wake up a interruptible task.
+	 * 'data' points to a wait_queue_head_t
+	 */
+	HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
+
+	HOST1X_INTR_ACTION_SIGNAL_FENCE,
+
+	HOST1X_INTR_ACTION_COUNT
+};
+
+struct host1x_syncpt_intr {
+	spinlock_t lock;
+	struct list_head wait_head;
+	char thresh_irq_name[12];
+	struct work_struct work;
+};
+
+struct host1x_waitlist {
+	struct list_head list;
+	struct kref refcount;
+	u32 thresh;
+	enum host1x_intr_action action;
+	atomic_t state;
+	void *data;
+	int count;
+};
+
+/*
+ * Schedule an action to be taken when a sync point reaches the given threshold.
+ *
+ * @id the sync point
+ * @thresh the threshold
+ * @action the action to take
+ * @data a pointer to extra data depending on action, see above
+ * @waiter waiter structure - assumes ownership
+ * @ref must be passed if cancellation is possible, else NULL
+ *
+ * This is a non-blocking api.
+ */
+int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
+			   u32 thresh, enum host1x_intr_action action,
+			   void *data, struct host1x_waitlist *waiter,
+			   void **ref);
+
+/*
+ * Unreference an action submitted to host1x_intr_add_action().
+ * You must call this if you passed non-NULL as ref.
+ * @ref the ref returned from host1x_intr_add_action()
+ */
+void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref);
+
+/* Initialize host1x sync point interrupt */
+int host1x_intr_init(struct host1x *host, unsigned int irq_sync);
+
+/* Deinitialize host1x sync point interrupt */
+void host1x_intr_deinit(struct host1x *host);
+
+/* Enable host1x sync point interrupt */
+void host1x_intr_start(struct host1x *host);
+
+/* Disable host1x sync point interrupt */
+void host1x_intr_stop(struct host1x *host);
+
+irqreturn_t host1x_syncpt_thresh_fn(void *dev_id);
+#endif
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
new file mode 100644
index 00000000..f445f1e4
--- /dev/null
+++ b/drivers/gpu/host1x/job.c
@@ -0,0 +1,728 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Tegra host1x Job
+ *
+ * Copyright (c) 2010-2015, NVIDIA Corporation.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/host1x-next.h>
+#include <linux/iommu.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <trace/events/host1x.h>
+
+#include "channel.h"
+#include "dev.h"
+#include "job.h"
+#include "syncpt.h"
+
+#define HOST1X_WAIT_SYNCPT_OFFSET 0x8
+
+struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
+				    u32 num_cmdbufs, u32 num_relocs)
+{
+	struct host1x_job *job = NULL;
+	unsigned int num_unpins = num_relocs;
+	u64 total;
+	void *mem;
+
+	if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+		num_unpins += num_cmdbufs;
+
+	/* Check that we're not going to overflow */
+	total = sizeof(struct host1x_job) +
+		(u64)num_relocs * sizeof(struct host1x_reloc) +
+		(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
+		(u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
+		(u64)num_unpins * sizeof(dma_addr_t) +
+		(u64)num_unpins * sizeof(u32 *);
+	if (total > ULONG_MAX)
+		return NULL;
+
+	mem = job = kzalloc(total, GFP_KERNEL);
+	if (!job)
+		return NULL;
+
+	kref_init(&job->ref);
+	job->channel = ch;
+
+	/* Redistribute memory to the structs  */
+	mem += sizeof(struct host1x_job);
+	job->relocs = num_relocs ? mem : NULL;
+	mem += num_relocs * sizeof(struct host1x_reloc);
+	job->unpins = num_unpins ? mem : NULL;
+	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
+	job->cmds = num_cmdbufs ? mem : NULL;
+	mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
+	job->addr_phys = num_unpins ? mem : NULL;
+
+	job->reloc_addr_phys = job->addr_phys;
+	job->gather_addr_phys = &job->addr_phys[num_relocs];
+
+	return job;
+}
+EXPORT_SYMBOL(host1x_job_alloc);
+
+struct host1x_job *host1x_job_get(struct host1x_job *job)
+{
+	kref_get(&job->ref);
+	return job;
+}
+EXPORT_SYMBOL(host1x_job_get);
+
+static void job_free(struct kref *ref)
+{
+	struct host1x_job *job = container_of(ref, struct host1x_job, ref);
+
+	if (job->release)
+		job->release(job);
+
+	if (job->waiter)
+		host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
+				    job->waiter);
+
+	if (job->syncpt)
+		host1x_syncpt_put(job->syncpt);
+
+	kfree(job);
+}
+
+void host1x_job_put(struct host1x_job *job)
+{
+	kref_put(&job->ref, job_free);
+}
+EXPORT_SYMBOL(host1x_job_put);
+
+void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
+			   unsigned int words, unsigned int offset)
+{
+	struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
+
+	gather->words = words;
+	gather->bo = bo;
+	gather->offset = offset;
+
+	job->num_cmds++;
+}
+EXPORT_SYMBOL(host1x_job_add_gather);
+
+void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh)
+{
+	struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
+
+	cmd->is_wait = true;
+	cmd->wait.id = id;
+	cmd->wait.threshold = thresh;
+
+	job->num_cmds++;
+}
+EXPORT_SYMBOL(host1x_job_add_wait);
+
+static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
+{
+	struct host1x_client *client = job->client;
+	struct device *dev = client->dev;
+	struct host1x_job_gather *g;
+	struct iommu_domain *domain;
+	struct sg_table *sgt;
+	unsigned int i;
+	int err;
+
+	domain = iommu_get_domain_for_dev(dev);
+	job->num_unpins = 0;
+
+	for (i = 0; i < job->num_relocs; i++) {
+		struct host1x_reloc *reloc = &job->relocs[i];
+		dma_addr_t phys_addr, *phys;
+
+		reloc->target.bo = host1x_bo_get(reloc->target.bo);
+		if (!reloc->target.bo) {
+			err = -EINVAL;
+			goto unpin;
+		}
+
+		/*
+		 * If the client device is not attached to an IOMMU, the
+		 * physical address of the buffer object can be used.
+		 *
+		 * Similarly, when an IOMMU domain is shared between all
+		 * host1x clients, the IOVA is already available, so no
+		 * need to map the buffer object again.
+		 *
+		 * XXX Note that this isn't always safe to do because it
+		 * relies on an assumption that no cache maintenance is
+		 * needed on the buffer objects.
+		 */
+		if (!domain || client->group)
+			phys = &phys_addr;
+		else
+			phys = NULL;
+
+		sgt = host1x_bo_pin(dev, reloc->target.bo, phys);
+		if (IS_ERR(sgt)) {
+			err = PTR_ERR(sgt);
+			goto unpin;
+		}
+
+		if (sgt) {
+			unsigned long mask = HOST1X_RELOC_READ |
+					     HOST1X_RELOC_WRITE;
+			enum dma_data_direction dir;
+
+			switch (reloc->flags & mask) {
+			case HOST1X_RELOC_READ:
+				dir = DMA_TO_DEVICE;
+				break;
+
+			case HOST1X_RELOC_WRITE:
+				dir = DMA_FROM_DEVICE;
+				break;
+
+			case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
+				dir = DMA_BIDIRECTIONAL;
+				break;
+
+			default:
+				err = -EINVAL;
+				goto unpin;
+			}
+
+			err = dma_map_sg(dev, sgt->sgl, sgt->nents, dir);
+			if (!err) {
+				err = -ENOMEM;
+				goto unpin;
+			}
+
+			job->unpins[job->num_unpins].dev = dev;
+			job->unpins[job->num_unpins].dir = dir;
+			phys_addr = sg_dma_address(sgt->sgl);
+		}
+
+		job->addr_phys[job->num_unpins] = phys_addr;
+		job->unpins[job->num_unpins].bo = reloc->target.bo;
+		job->unpins[job->num_unpins].sgt = sgt;
+		job->num_unpins++;
+	}
+
+	/*
+	 * We will copy gathers BO content later, so there is no need to
+	 * hold and pin them.
+	 */
+	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+		return 0;
+
+	for (i = 0; i < job->num_cmds; i++) {
+		size_t gather_size = 0;
+		struct scatterlist *sg;
+		dma_addr_t phys_addr;
+		unsigned long shift;
+		struct iova *alloc;
+		dma_addr_t *phys;
+		unsigned int j;
+
+		if (job->cmds[i].is_wait)
+			continue;
+
+		g = &job->cmds[i].gather;
+
+		g->bo = host1x_bo_get(g->bo);
+		if (!g->bo) {
+			err = -EINVAL;
+			goto unpin;
+		}
+
+		/**
+		 * If the host1x is not attached to an IOMMU, there is no need
+		 * to map the buffer object for the host1x, since the physical
+		 * address can simply be used.
+		 */
+		if (!iommu_get_domain_for_dev(host->dev))
+			phys = &phys_addr;
+		else
+			phys = NULL;
+
+		sgt = host1x_bo_pin(host->dev, g->bo, phys);
+		if (IS_ERR(sgt)) {
+			err = PTR_ERR(sgt);
+			goto put;
+		}
+
+		if (host->domain) {
+			for_each_sg(sgt->sgl, sg, sgt->nents, j)
+				gather_size += sg->length;
+			gather_size = iova_align(&host->iova, gather_size);
+
+			shift = iova_shift(&host->iova);
+			alloc = alloc_iova(&host->iova, gather_size >> shift,
+					   host->iova_end >> shift, true);
+			if (!alloc) {
+				err = -ENOMEM;
+				goto put;
+			}
+
+			err = iommu_map_sg(host->domain,
+					iova_dma_addr(&host->iova, alloc),
+					sgt->sgl, sgt->nents, IOMMU_READ);
+			if (err == 0) {
+				__free_iova(&host->iova, alloc);
+				err = -EINVAL;
+				goto put;
+			}
+
+			job->unpins[job->num_unpins].size = gather_size;
+			phys_addr = iova_dma_addr(&host->iova, alloc);
+		} else if (sgt) {
+			err = dma_map_sg(host->dev, sgt->sgl, sgt->nents,
+					 DMA_TO_DEVICE);
+			if (!err) {
+				err = -ENOMEM;
+				goto put;
+			}
+
+			job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
+			job->unpins[job->num_unpins].dev = host->dev;
+			phys_addr = sg_dma_address(sgt->sgl);
+		}
+
+		job->addr_phys[job->num_unpins] = phys_addr;
+		job->gather_addr_phys[i] = phys_addr;
+
+		job->unpins[job->num_unpins].bo = g->bo;
+		job->unpins[job->num_unpins].sgt = sgt;
+		job->num_unpins++;
+	}
+
+	return 0;
+
+put:
+	host1x_bo_put(g->bo);
+unpin:
+	host1x_job_unpin(job);
+	return err;
+}
+
+static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
+{
+	void *cmdbuf_addr = NULL;
+	struct host1x_bo *cmdbuf = g->bo;
+	unsigned int i;
+
+	/* pin & patch the relocs for one gather */
+	for (i = 0; i < job->num_relocs; i++) {
+		struct host1x_reloc *reloc = &job->relocs[i];
+		u32 reloc_addr = (job->reloc_addr_phys[i] +
+				  reloc->target.offset) >> reloc->shift;
+		u32 *target;
+
+		/* skip all other gathers */
+		if (cmdbuf != reloc->cmdbuf.bo)
+			continue;
+
+		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+			target = (u32 *)job->gather_copy_mapped +
+					reloc->cmdbuf.offset / sizeof(u32) +
+						g->offset / sizeof(u32);
+			goto patch_reloc;
+		}
+
+		if (!cmdbuf_addr) {
+			cmdbuf_addr = host1x_bo_mmap(cmdbuf);
+
+			if (unlikely(!cmdbuf_addr)) {
+				pr_err("Could not map cmdbuf for relocation\n");
+				return -ENOMEM;
+			}
+		}
+
+		target = cmdbuf_addr + reloc->cmdbuf.offset;
+patch_reloc:
+		*target = reloc_addr;
+	}
+
+	if (cmdbuf_addr)
+		host1x_bo_munmap(cmdbuf, cmdbuf_addr);
+
+	return 0;
+}
+
+static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
+			unsigned int offset)
+{
+	offset *= sizeof(u32);
+
+	if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
+		return false;
+
+	/* relocation shift value validation isn't implemented yet */
+	if (reloc->shift)
+		return false;
+
+	return true;
+}
+
+struct host1x_firewall {
+	struct host1x_job *job;
+	struct device *dev;
+
+	unsigned int num_relocs;
+	struct host1x_reloc *reloc;
+
+	struct host1x_bo *cmdbuf;
+	unsigned int offset;
+
+	u32 words;
+	u32 class;
+	u32 reg;
+	u32 mask;
+	u32 count;
+};
+
+static int check_register(struct host1x_firewall *fw, unsigned long offset)
+{
+	if (!fw->job->is_addr_reg)
+		return 0;
+
+	if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
+		if (!fw->num_relocs)
+			return -EINVAL;
+
+		if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
+			return -EINVAL;
+
+		fw->num_relocs--;
+		fw->reloc++;
+	}
+
+	return 0;
+}
+
+static int check_class(struct host1x_firewall *fw, u32 class)
+{
+	if (!fw->job->is_valid_class) {
+		if (fw->class != class)
+			return -EINVAL;
+	} else {
+		if (!fw->job->is_valid_class(fw->class))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int check_mask(struct host1x_firewall *fw)
+{
+	u32 mask = fw->mask;
+	u32 reg = fw->reg;
+	int ret;
+
+	while (mask) {
+		if (fw->words == 0)
+			return -EINVAL;
+
+		if (mask & 1) {
+			ret = check_register(fw, reg);
+			if (ret < 0)
+				return ret;
+
+			fw->words--;
+			fw->offset++;
+		}
+		mask >>= 1;
+		reg++;
+	}
+
+	return 0;
+}
+
+static int check_incr(struct host1x_firewall *fw)
+{
+	u32 count = fw->count;
+	u32 reg = fw->reg;
+	int ret;
+
+	while (count) {
+		if (fw->words == 0)
+			return -EINVAL;
+
+		ret = check_register(fw, reg);
+		if (ret < 0)
+			return ret;
+
+		reg++;
+		fw->words--;
+		fw->offset++;
+		count--;
+	}
+
+	return 0;
+}
+
+static int check_nonincr(struct host1x_firewall *fw)
+{
+	u32 count = fw->count;
+	int ret;
+
+	while (count) {
+		if (fw->words == 0)
+			return -EINVAL;
+
+		ret = check_register(fw, fw->reg);
+		if (ret < 0)
+			return ret;
+
+		fw->words--;
+		fw->offset++;
+		count--;
+	}
+
+	return 0;
+}
+
+static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
+{
+	u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
+		(g->offset / sizeof(u32));
+	u32 job_class = fw->class;
+	int err = 0;
+
+	fw->words = g->words;
+	fw->cmdbuf = g->bo;
+	fw->offset = 0;
+
+	while (fw->words && !err) {
+		u32 word = cmdbuf_base[fw->offset];
+		u32 opcode = (word & 0xf0000000) >> 28;
+
+		fw->mask = 0;
+		fw->reg = 0;
+		fw->count = 0;
+		fw->words--;
+		fw->offset++;
+
+		switch (opcode) {
+		case 0:
+			fw->class = word >> 6 & 0x3ff;
+			fw->mask = word & 0x3f;
+			fw->reg = word >> 16 & 0xfff;
+			err = check_class(fw, job_class);
+			if (!err)
+				err = check_mask(fw);
+			if (err)
+				goto out;
+			break;
+		case 1:
+			fw->reg = word >> 16 & 0xfff;
+			fw->count = word & 0xffff;
+			err = check_incr(fw);
+			if (err)
+				goto out;
+			break;
+
+		case 2:
+			fw->reg = word >> 16 & 0xfff;
+			fw->count = word & 0xffff;
+			err = check_nonincr(fw);
+			if (err)
+				goto out;
+			break;
+
+		case 3:
+			fw->mask = word & 0xffff;
+			fw->reg = word >> 16 & 0xfff;
+			err = check_mask(fw);
+			if (err)
+				goto out;
+			break;
+		case 4:
+		case 14:
+			break;
+		default:
+			err = -EINVAL;
+			break;
+		}
+	}
+
+out:
+	return err;
+}
+
+static inline int copy_gathers(struct device *host, struct host1x_job *job,
+			       struct device *dev)
+{
+	struct host1x_firewall fw;
+	size_t size = 0;
+	size_t offset = 0;
+	unsigned int i;
+
+	fw.job = job;
+	fw.dev = dev;
+	fw.reloc = job->relocs;
+	fw.num_relocs = job->num_relocs;
+	fw.class = job->class;
+
+	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_job_gather *g;
+
+		if (job->cmds[i].is_wait)
+			continue;
+
+		g = &job->cmds[i].gather;
+
+		size += g->words * sizeof(u32);
+	}
+
+	/*
+	 * Try a non-blocking allocation from a higher priority pools first,
+	 * as awaiting for the allocation here is a major performance hit.
+	 */
+	job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy,
+					       GFP_NOWAIT);
+
+	/* the higher priority allocation failed, try the generic-blocking */
+	if (!job->gather_copy_mapped)
+		job->gather_copy_mapped = dma_alloc_wc(host, size,
+						       &job->gather_copy,
+						       GFP_KERNEL);
+	if (!job->gather_copy_mapped)
+		return -ENOMEM;
+
+	job->gather_copy_size = size;
+
+	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_job_gather *g;
+		void *gather;
+
+		if (job->cmds[i].is_wait)
+			continue;
+		g = &job->cmds[i].gather;
+
+		/* Copy the gather */
+		gather = host1x_bo_mmap(g->bo);
+		memcpy(job->gather_copy_mapped + offset, gather + g->offset,
+		       g->words * sizeof(u32));
+		host1x_bo_munmap(g->bo, gather);
+
+		/* Store the location in the buffer */
+		g->base = job->gather_copy;
+		g->offset = offset;
+
+		/* Validate the job */
+		if (validate(&fw, g))
+			return -EINVAL;
+
+		offset += g->words * sizeof(u32);
+	}
+
+	/* No relocs should remain at this point */
+	if (fw.num_relocs)
+		return -EINVAL;
+
+	return 0;
+}
+
+int host1x_job_pin(struct host1x_job *job, struct device *dev)
+{
+	int err;
+	unsigned int i, j;
+	struct host1x *host = dev_get_drvdata(dev->parent);
+
+	/* pin memory */
+	err = pin_job(host, job);
+	if (err)
+		goto out;
+
+	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+		err = copy_gathers(host->dev, job, dev);
+		if (err)
+			goto out;
+	}
+
+	/* patch gathers */
+	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_job_gather *g;
+
+		if (job->cmds[i].is_wait)
+			continue;
+		g = &job->cmds[i].gather;
+
+		/* process each gather mem only once */
+		if (g->handled)
+			continue;
+
+		/* copy_gathers() sets gathers base if firewall is enabled */
+		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+			g->base = job->gather_addr_phys[i];
+
+		for (j = i + 1; j < job->num_cmds; j++) {
+			if (!job->cmds[j].is_wait &&
+			    job->cmds[j].gather.bo == g->bo) {
+				job->cmds[j].gather.handled = true;
+				job->cmds[j].gather.base = g->base;
+			}
+		}
+
+		err = do_relocs(job, g);
+		if (err)
+			break;
+	}
+
+out:
+	if (err)
+		host1x_job_unpin(job);
+	wmb();
+
+	return err;
+}
+EXPORT_SYMBOL(host1x_job_pin);
+
+void host1x_job_unpin(struct host1x_job *job)
+{
+	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
+	unsigned int i;
+
+	for (i = 0; i < job->num_unpins; i++) {
+		struct host1x_job_unpin_data *unpin = &job->unpins[i];
+		struct device *dev = unpin->dev ?: host->dev;
+		struct sg_table *sgt = unpin->sgt;
+
+		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
+		    unpin->size && host->domain) {
+			iommu_unmap(host->domain, job->addr_phys[i],
+				    unpin->size);
+			free_iova(&host->iova,
+				iova_pfn(&host->iova, job->addr_phys[i]));
+		}
+
+		if (unpin->dev && sgt)
+			dma_unmap_sg(unpin->dev, sgt->sgl, sgt->nents,
+				     unpin->dir);
+
+		host1x_bo_unpin(dev, unpin->bo, sgt);
+		host1x_bo_put(unpin->bo);
+	}
+
+	job->num_unpins = 0;
+
+	if (job->gather_copy_size)
+		dma_free_wc(host->dev, job->gather_copy_size,
+			    job->gather_copy_mapped, job->gather_copy);
+}
+EXPORT_SYMBOL(host1x_job_unpin);
+
+/*
+ * Debug routine used to dump job entries
+ */
+void host1x_job_dump(struct device *dev, struct host1x_job *job)
+{
+	dev_dbg(dev, "    SYNCPT_ID   %d\n", job->syncpt->id);
+	dev_dbg(dev, "    SYNCPT_VAL  %d\n", job->syncpt_end);
+	dev_dbg(dev, "    FIRST_GET   0x%x\n", job->first_get);
+	dev_dbg(dev, "    TIMEOUT     %d\n", job->timeout);
+	dev_dbg(dev, "    NUM_SLOTS   %d\n", job->num_slots);
+	dev_dbg(dev, "    NUM_HANDLES %d\n", job->num_unpins);
+}
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
new file mode 100644
index 00000000..33adfaed
--- /dev/null
+++ b/drivers/gpu/host1x/job.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Job
+ *
+ * Copyright (c) 2011-2013, NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_JOB_H
+#define __HOST1X_JOB_H
+
+#include <linux/dma-direction.h>
+
+struct host1x_job_gather {
+	unsigned int words;
+	dma_addr_t base;
+	struct host1x_bo *bo;
+	unsigned int offset;
+	bool handled;
+};
+
+struct host1x_job_wait {
+	u32 id;
+	u32 threshold;
+};
+
+struct host1x_job_cmd {
+	bool is_wait;
+
+	union {
+		struct host1x_job_gather gather;
+		struct host1x_job_wait wait;
+	};
+};
+
+struct host1x_job_unpin_data {
+	struct host1x_bo *bo;
+	struct sg_table *sgt;
+	struct device *dev;
+	size_t size;
+	enum dma_data_direction dir;
+};
+
+/*
+ * Dump contents of job to debug output.
+ */
+void host1x_job_dump(struct device *dev, struct host1x_job *job);
+
+#endif
diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c
new file mode 100644
index 00000000..8ded40bf
--- /dev/null
+++ b/drivers/gpu/host1x/mipi.c
@@ -0,0 +1,558 @@
+/*
+ * Copyright (C) 2013 NVIDIA Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <linux/clk.h>
+#include <linux/host1x-next.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dev.h"
+
+#define MIPI_CAL_CTRL			0x00
+#define MIPI_CAL_CTRL_NOISE_FILTER(x)	(((x) & 0xf) << 26)
+#define MIPI_CAL_CTRL_PRESCALE(x)	(((x) & 0x3) << 24)
+#define MIPI_CAL_CTRL_CLKEN_OVR		(1 << 4)
+#define MIPI_CAL_CTRL_START		(1 << 0)
+
+#define MIPI_CAL_AUTOCAL_CTRL		0x01
+
+#define MIPI_CAL_STATUS			0x02
+#define MIPI_CAL_STATUS_DONE		(1 << 16)
+#define MIPI_CAL_STATUS_ACTIVE		(1 <<  0)
+
+#define MIPI_CAL_CONFIG_CSIA		0x05
+#define MIPI_CAL_CONFIG_CSIB		0x06
+#define MIPI_CAL_CONFIG_CSIC		0x07
+#define MIPI_CAL_CONFIG_CSID		0x08
+#define MIPI_CAL_CONFIG_CSIE		0x09
+#define MIPI_CAL_CONFIG_CSIF		0x0a
+#define MIPI_CAL_CONFIG_DSIA		0x0e
+#define MIPI_CAL_CONFIG_DSIB		0x0f
+#define MIPI_CAL_CONFIG_DSIC		0x10
+#define MIPI_CAL_CONFIG_DSID		0x11
+
+#define MIPI_CAL_CONFIG_DSIA_CLK	0x19
+#define MIPI_CAL_CONFIG_DSIB_CLK	0x1a
+#define MIPI_CAL_CONFIG_CSIAB_CLK	0x1b
+#define MIPI_CAL_CONFIG_DSIC_CLK	0x1c
+#define MIPI_CAL_CONFIG_CSICD_CLK	0x1c
+#define MIPI_CAL_CONFIG_DSID_CLK	0x1d
+#define MIPI_CAL_CONFIG_CSIE_CLK	0x1d
+
+/* for data and clock lanes */
+#define MIPI_CAL_CONFIG_SELECT		(1 << 21)
+
+/* for data lanes */
+#define MIPI_CAL_CONFIG_HSPDOS(x)	(((x) & 0x1f) << 16)
+#define MIPI_CAL_CONFIG_HSPUOS(x)	(((x) & 0x1f) <<  8)
+#define MIPI_CAL_CONFIG_TERMOS(x)	(((x) & 0x1f) <<  0)
+
+/* for clock lanes */
+#define MIPI_CAL_CONFIG_HSCLKPDOSD(x)	(((x) & 0x1f) <<  8)
+#define MIPI_CAL_CONFIG_HSCLKPUOSD(x)	(((x) & 0x1f) <<  0)
+
+#define MIPI_CAL_BIAS_PAD_CFG0		0x16
+#define MIPI_CAL_BIAS_PAD_PDVCLAMP	(1 << 1)
+#define MIPI_CAL_BIAS_PAD_E_VCLAMP_REF	(1 << 0)
+
+#define MIPI_CAL_BIAS_PAD_CFG1		0x17
+#define MIPI_CAL_BIAS_PAD_DRV_DN_REF(x) (((x) & 0x7) << 16)
+#define MIPI_CAL_BIAS_PAD_DRV_UP_REF(x) (((x) & 0x7) << 8)
+
+#define MIPI_CAL_BIAS_PAD_CFG2		0x18
+#define MIPI_CAL_BIAS_PAD_VCLAMP(x)	(((x) & 0x7) << 16)
+#define MIPI_CAL_BIAS_PAD_VAUXP(x)	(((x) & 0x7) << 4)
+#define MIPI_CAL_BIAS_PAD_PDVREG	(1 << 1)
+
+struct tegra_mipi_pad {
+	unsigned long data;
+	unsigned long clk;
+};
+
+struct tegra_mipi_soc {
+	bool has_clk_lane;
+	const struct tegra_mipi_pad *pads;
+	unsigned int num_pads;
+
+	bool clock_enable_override;
+	bool needs_vclamp_ref;
+
+	/* bias pad configuration settings */
+	u8 pad_drive_down_ref;
+	u8 pad_drive_up_ref;
+
+	u8 pad_vclamp_level;
+	u8 pad_vauxp_level;
+
+	/* calibration settings for data lanes */
+	u8 hspdos;
+	u8 hspuos;
+	u8 termos;
+
+	/* calibration settings for clock lanes */
+	u8 hsclkpdos;
+	u8 hsclkpuos;
+};
+
+struct tegra_mipi {
+	const struct tegra_mipi_soc *soc;
+	struct device *dev;
+	void __iomem *regs;
+	struct mutex lock;
+	struct clk *clk;
+
+	unsigned long usage_count;
+};
+
+struct tegra_mipi_device {
+	struct platform_device *pdev;
+	struct tegra_mipi *mipi;
+	struct device *device;
+	unsigned long pads;
+};
+
+static inline u32 tegra_mipi_readl(struct tegra_mipi *mipi,
+				   unsigned long offset)
+{
+	return readl(mipi->regs + (offset << 2));
+}
+
+static inline void tegra_mipi_writel(struct tegra_mipi *mipi, u32 value,
+				     unsigned long offset)
+{
+	writel(value, mipi->regs + (offset << 2));
+}
+
+static int tegra_mipi_power_up(struct tegra_mipi *mipi)
+{
+	u32 value;
+	int err;
+
+	err = clk_enable(mipi->clk);
+	if (err < 0)
+		return err;
+
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG0);
+	value &= ~MIPI_CAL_BIAS_PAD_PDVCLAMP;
+
+	if (mipi->soc->needs_vclamp_ref)
+		value |= MIPI_CAL_BIAS_PAD_E_VCLAMP_REF;
+
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG0);
+
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG2);
+	value &= ~MIPI_CAL_BIAS_PAD_PDVREG;
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
+
+	clk_disable(mipi->clk);
+
+	return 0;
+}
+
+static int tegra_mipi_power_down(struct tegra_mipi *mipi)
+{
+	u32 value;
+	int err;
+
+	err = clk_enable(mipi->clk);
+	if (err < 0)
+		return err;
+
+	/*
+	 * The MIPI_CAL_BIAS_PAD_PDVREG controls a voltage regulator that
+	 * supplies the DSI pads. This must be kept enabled until none of the
+	 * DSI lanes are used anymore.
+	 */
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG2);
+	value |= MIPI_CAL_BIAS_PAD_PDVREG;
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
+
+	/*
+	 * MIPI_CAL_BIAS_PAD_PDVCLAMP and MIPI_CAL_BIAS_PAD_E_VCLAMP_REF
+	 * control a regulator that supplies current to the pre-driver logic.
+	 * Powering down this regulator causes DSI to fail, so it must remain
+	 * powered on until none of the DSI lanes are used anymore.
+	 */
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG0);
+
+	if (mipi->soc->needs_vclamp_ref)
+		value &= ~MIPI_CAL_BIAS_PAD_E_VCLAMP_REF;
+
+	value |= MIPI_CAL_BIAS_PAD_PDVCLAMP;
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG0);
+
+	return 0;
+}
+
+struct tegra_mipi_device *tegra_mipi_request(struct device *device,
+					     struct device_node *np)
+{
+	struct tegra_mipi_device *dev;
+	struct of_phandle_args args;
+	int err;
+
+	err = of_parse_phandle_with_args(np, "nvidia,mipi-calibrate",
+					 "#nvidia,mipi-calibrate-cells", 0,
+					 &args);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	dev->pdev = of_find_device_by_node(args.np);
+	if (!dev->pdev) {
+		err = -ENODEV;
+		goto free;
+	}
+
+	dev->mipi = platform_get_drvdata(dev->pdev);
+	if (!dev->mipi) {
+		err = -EPROBE_DEFER;
+		goto put;
+	}
+
+	of_node_put(args.np);
+
+	dev->pads = args.args[0];
+	dev->device = device;
+
+	return dev;
+
+put:
+	platform_device_put(dev->pdev);
+free:
+	kfree(dev);
+out:
+	of_node_put(args.np);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(tegra_mipi_request);
+
+void tegra_mipi_free(struct tegra_mipi_device *device)
+{
+	platform_device_put(device->pdev);
+	kfree(device);
+}
+EXPORT_SYMBOL(tegra_mipi_free);
+
+int tegra_mipi_enable(struct tegra_mipi_device *dev)
+{
+	int err = 0;
+
+	mutex_lock(&dev->mipi->lock);
+
+	if (dev->mipi->usage_count++ == 0)
+		err = tegra_mipi_power_up(dev->mipi);
+
+	mutex_unlock(&dev->mipi->lock);
+
+	return err;
+
+}
+EXPORT_SYMBOL(tegra_mipi_enable);
+
+int tegra_mipi_disable(struct tegra_mipi_device *dev)
+{
+	int err = 0;
+
+	mutex_lock(&dev->mipi->lock);
+
+	if (--dev->mipi->usage_count == 0)
+		err = tegra_mipi_power_down(dev->mipi);
+
+	mutex_unlock(&dev->mipi->lock);
+
+	return err;
+
+}
+EXPORT_SYMBOL(tegra_mipi_disable);
+
+int tegra_mipi_wait(struct tegra_mipi_device *device)
+{
+	struct tegra_mipi *mipi = device->mipi;
+	void __iomem *status_reg = mipi->regs + (MIPI_CAL_STATUS << 2);
+	u32 value;
+	int err;
+
+	err = clk_enable(device->mipi->clk);
+	if (err < 0)
+		return err;
+
+	mutex_lock(&device->mipi->lock);
+
+	err = readl_relaxed_poll_timeout(status_reg, value,
+					 !(value & MIPI_CAL_STATUS_ACTIVE) &&
+					 (value & MIPI_CAL_STATUS_DONE), 50,
+					 250000);
+	mutex_unlock(&device->mipi->lock);
+	clk_disable(device->mipi->clk);
+
+	return err;
+}
+EXPORT_SYMBOL(tegra_mipi_wait);
+
+int tegra_mipi_calibrate(struct tegra_mipi_device *device)
+{
+	const struct tegra_mipi_soc *soc = device->mipi->soc;
+	unsigned int i;
+	u32 value;
+	int err;
+
+	err = clk_enable(device->mipi->clk);
+	if (err < 0)
+		return err;
+
+	mutex_lock(&device->mipi->lock);
+
+	value = MIPI_CAL_BIAS_PAD_DRV_DN_REF(soc->pad_drive_down_ref) |
+		MIPI_CAL_BIAS_PAD_DRV_UP_REF(soc->pad_drive_up_ref);
+	tegra_mipi_writel(device->mipi, value, MIPI_CAL_BIAS_PAD_CFG1);
+
+	value = tegra_mipi_readl(device->mipi, MIPI_CAL_BIAS_PAD_CFG2);
+	value &= ~MIPI_CAL_BIAS_PAD_VCLAMP(0x7);
+	value &= ~MIPI_CAL_BIAS_PAD_VAUXP(0x7);
+	value |= MIPI_CAL_BIAS_PAD_VCLAMP(soc->pad_vclamp_level);
+	value |= MIPI_CAL_BIAS_PAD_VAUXP(soc->pad_vauxp_level);
+	tegra_mipi_writel(device->mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
+
+	for (i = 0; i < soc->num_pads; i++) {
+		u32 clk = 0, data = 0;
+
+		if (device->pads & BIT(i)) {
+			data = MIPI_CAL_CONFIG_SELECT |
+			       MIPI_CAL_CONFIG_HSPDOS(soc->hspdos) |
+			       MIPI_CAL_CONFIG_HSPUOS(soc->hspuos) |
+			       MIPI_CAL_CONFIG_TERMOS(soc->termos);
+			clk = MIPI_CAL_CONFIG_SELECT |
+			      MIPI_CAL_CONFIG_HSCLKPDOSD(soc->hsclkpdos) |
+			      MIPI_CAL_CONFIG_HSCLKPUOSD(soc->hsclkpuos);
+		}
+
+		tegra_mipi_writel(device->mipi, data, soc->pads[i].data);
+
+		if (soc->has_clk_lane && soc->pads[i].clk != 0)
+			tegra_mipi_writel(device->mipi, clk, soc->pads[i].clk);
+	}
+
+	value = tegra_mipi_readl(device->mipi, MIPI_CAL_CTRL);
+	value &= ~MIPI_CAL_CTRL_NOISE_FILTER(0xf);
+	value &= ~MIPI_CAL_CTRL_PRESCALE(0x3);
+	value |= MIPI_CAL_CTRL_NOISE_FILTER(0xa);
+	value |= MIPI_CAL_CTRL_PRESCALE(0x2);
+
+	if (!soc->clock_enable_override)
+		value &= ~MIPI_CAL_CTRL_CLKEN_OVR;
+	else
+		value |= MIPI_CAL_CTRL_CLKEN_OVR;
+
+	tegra_mipi_writel(device->mipi, value, MIPI_CAL_CTRL);
+
+	/* clear any pending status bits */
+	value = tegra_mipi_readl(device->mipi, MIPI_CAL_STATUS);
+	tegra_mipi_writel(device->mipi, value, MIPI_CAL_STATUS);
+
+	value = tegra_mipi_readl(device->mipi, MIPI_CAL_CTRL);
+	value |= MIPI_CAL_CTRL_START;
+	tegra_mipi_writel(device->mipi, value, MIPI_CAL_CTRL);
+
+	mutex_unlock(&device->mipi->lock);
+	clk_disable(device->mipi->clk);
+
+	return 0;
+}
+EXPORT_SYMBOL(tegra_mipi_calibrate);
+
+static const struct tegra_mipi_pad tegra114_mipi_pads[] = {
+	{ .data = MIPI_CAL_CONFIG_CSIA },
+	{ .data = MIPI_CAL_CONFIG_CSIB },
+	{ .data = MIPI_CAL_CONFIG_CSIC },
+	{ .data = MIPI_CAL_CONFIG_CSID },
+	{ .data = MIPI_CAL_CONFIG_CSIE },
+	{ .data = MIPI_CAL_CONFIG_DSIA },
+	{ .data = MIPI_CAL_CONFIG_DSIB },
+	{ .data = MIPI_CAL_CONFIG_DSIC },
+	{ .data = MIPI_CAL_CONFIG_DSID },
+};
+
+static const struct tegra_mipi_soc tegra114_mipi_soc = {
+	.has_clk_lane = false,
+	.pads = tegra114_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra114_mipi_pads),
+	.clock_enable_override = true,
+	.needs_vclamp_ref = true,
+	.pad_drive_down_ref = 0x2,
+	.pad_drive_up_ref = 0x0,
+	.pad_vclamp_level = 0x0,
+	.pad_vauxp_level = 0x0,
+	.hspdos = 0x0,
+	.hspuos = 0x4,
+	.termos = 0x5,
+	.hsclkpdos = 0x0,
+	.hsclkpuos = 0x4,
+};
+
+static const struct tegra_mipi_pad tegra124_mipi_pads[] = {
+	{ .data = MIPI_CAL_CONFIG_CSIA, .clk = MIPI_CAL_CONFIG_CSIAB_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSIB, .clk = MIPI_CAL_CONFIG_CSIAB_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSIC, .clk = MIPI_CAL_CONFIG_CSICD_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSID, .clk = MIPI_CAL_CONFIG_CSICD_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSIE, .clk = MIPI_CAL_CONFIG_CSIE_CLK  },
+	{ .data = MIPI_CAL_CONFIG_DSIA, .clk = MIPI_CAL_CONFIG_DSIA_CLK  },
+	{ .data = MIPI_CAL_CONFIG_DSIB, .clk = MIPI_CAL_CONFIG_DSIB_CLK  },
+};
+
+static const struct tegra_mipi_soc tegra124_mipi_soc = {
+	.has_clk_lane = true,
+	.pads = tegra124_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra124_mipi_pads),
+	.clock_enable_override = true,
+	.needs_vclamp_ref = true,
+	.pad_drive_down_ref = 0x2,
+	.pad_drive_up_ref = 0x0,
+	.pad_vclamp_level = 0x0,
+	.pad_vauxp_level = 0x0,
+	.hspdos = 0x0,
+	.hspuos = 0x0,
+	.termos = 0x0,
+	.hsclkpdos = 0x1,
+	.hsclkpuos = 0x2,
+};
+
+static const struct tegra_mipi_soc tegra132_mipi_soc = {
+	.has_clk_lane = true,
+	.pads = tegra124_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra124_mipi_pads),
+	.clock_enable_override = false,
+	.needs_vclamp_ref = false,
+	.pad_drive_down_ref = 0x0,
+	.pad_drive_up_ref = 0x3,
+	.pad_vclamp_level = 0x0,
+	.pad_vauxp_level = 0x0,
+	.hspdos = 0x0,
+	.hspuos = 0x0,
+	.termos = 0x0,
+	.hsclkpdos = 0x3,
+	.hsclkpuos = 0x2,
+};
+
+static const struct tegra_mipi_pad tegra210_mipi_pads[] = {
+	{ .data = MIPI_CAL_CONFIG_CSIA, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSIB, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSIC, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSID, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSIE, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSIF, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_DSIA, .clk = MIPI_CAL_CONFIG_DSIA_CLK },
+	{ .data = MIPI_CAL_CONFIG_DSIB, .clk = MIPI_CAL_CONFIG_DSIB_CLK },
+	{ .data = MIPI_CAL_CONFIG_DSIC, .clk = MIPI_CAL_CONFIG_DSIC_CLK },
+	{ .data = MIPI_CAL_CONFIG_DSID, .clk = MIPI_CAL_CONFIG_DSID_CLK },
+};
+
+static const struct tegra_mipi_soc tegra210_mipi_soc = {
+	.has_clk_lane = true,
+	.pads = tegra210_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra210_mipi_pads),
+	.clock_enable_override = true,
+	.needs_vclamp_ref = false,
+	.pad_drive_down_ref = 0x0,
+	.pad_drive_up_ref = 0x3,
+	.pad_vclamp_level = 0x1,
+	.pad_vauxp_level = 0x1,
+	.hspdos = 0x0,
+	.hspuos = 0x2,
+	.termos = 0x0,
+	.hsclkpdos = 0x0,
+	.hsclkpuos = 0x2,
+};
+
+static const struct of_device_id tegra_mipi_of_match[] = {
+	{ .compatible = "nvidia,tegra114-mipi", .data = &tegra114_mipi_soc },
+	{ .compatible = "nvidia,tegra124-mipi", .data = &tegra124_mipi_soc },
+	{ .compatible = "nvidia,tegra132-mipi", .data = &tegra132_mipi_soc },
+	{ .compatible = "nvidia,tegra210-mipi", .data = &tegra210_mipi_soc },
+	{ },
+};
+
+static int tegra_mipi_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct tegra_mipi *mipi;
+	struct resource *res;
+	int err;
+
+	match = of_match_node(tegra_mipi_of_match, pdev->dev.of_node);
+	if (!match)
+		return -ENODEV;
+
+	mipi = devm_kzalloc(&pdev->dev, sizeof(*mipi), GFP_KERNEL);
+	if (!mipi)
+		return -ENOMEM;
+
+	mipi->soc = match->data;
+	mipi->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mipi->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mipi->regs))
+		return PTR_ERR(mipi->regs);
+
+	mutex_init(&mipi->lock);
+
+	mipi->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(mipi->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		return PTR_ERR(mipi->clk);
+	}
+
+	err = clk_prepare(mipi->clk);
+	if (err < 0)
+		return err;
+
+	platform_set_drvdata(pdev, mipi);
+
+	return 0;
+}
+
+static int tegra_mipi_remove(struct platform_device *pdev)
+{
+	struct tegra_mipi *mipi = platform_get_drvdata(pdev);
+
+	clk_unprepare(mipi->clk);
+
+	return 0;
+}
+
+struct platform_driver tegra_mipi_driver = {
+	.driver = {
+		.name = "tegra-mipi",
+		.of_match_table = tegra_mipi_of_match,
+	},
+	.probe = tegra_mipi_probe,
+	.remove = tegra_mipi_remove,
+};
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
new file mode 100644
index 00000000..d0be7bdb
--- /dev/null
+++ b/drivers/gpu/host1x/syncpt.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Tegra host1x Syncpoints
+ *
+ * Copyright (c) 2010-2015, NVIDIA Corporation.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include <trace/events/host1x.h>
+
+#include "syncpt.h"
+#include "dev.h"
+#include "intr.h"
+#include "debug.h"
+
+#define SYNCPT_CHECK_PERIOD (2 * HZ)
+#define MAX_STUCK_CHECK_COUNT 15
+
+static struct host1x_syncpt_base *
+host1x_syncpt_base_request(struct host1x *host)
+{
+	struct host1x_syncpt_base *bases = host->bases;
+	unsigned int i;
+
+	for (i = 0; i < host->info->nb_bases; i++)
+		if (!bases[i].requested)
+			break;
+
+	if (i >= host->info->nb_bases)
+		return NULL;
+
+	bases[i].requested = true;
+	return &bases[i];
+}
+
+static void host1x_syncpt_base_free(struct host1x_syncpt_base *base)
+{
+	if (base)
+		base->requested = false;
+}
+
+struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
+					  unsigned long flags,
+					  const char *name)
+{
+	struct host1x_syncpt *sp = host->syncpt;
+	char *full_name;
+	unsigned int i;
+
+	mutex_lock(&host->syncpt_mutex);
+
+	for (i = 0; i < host->info->nb_pts && kref_read(&sp->ref); i++, sp++)
+		;
+
+	if (i >= host->info->nb_pts)
+		goto unlock;
+
+	if (flags & HOST1X_SYNCPT_HAS_BASE) {
+		sp->base = host1x_syncpt_base_request(host);
+		if (!sp->base)
+			goto unlock;
+	}
+
+	full_name = kasprintf(GFP_KERNEL, "%u-%s", sp->id, name);
+	if (!full_name)
+		goto free_base;
+
+	sp->name = full_name;
+
+	if (flags & HOST1X_SYNCPT_CLIENT_MANAGED)
+		sp->client_managed = true;
+	else
+		sp->client_managed = false;
+
+	kref_init(&sp->ref);
+
+	mutex_unlock(&host->syncpt_mutex);
+	return sp;
+
+free_base:
+	host1x_syncpt_base_free(sp->base);
+	sp->base = NULL;
+unlock:
+	mutex_unlock(&host->syncpt_mutex);
+	return NULL;
+}
+EXPORT_SYMBOL(host1x_syncpt_alloc);
+
+/**
+ * host1x_syncpt_id() - retrieve syncpoint ID
+ * @sp: host1x syncpoint
+ *
+ * Given a pointer to a struct host1x_syncpt, retrieves its ID. This ID is
+ * often used as a value to program into registers that control how hardware
+ * blocks interact with syncpoints.
+ */
+u32 host1x_syncpt_id(struct host1x_syncpt *sp)
+{
+	return sp->id;
+}
+EXPORT_SYMBOL(host1x_syncpt_id);
+
+/**
+ * host1x_syncpt_incr_max() - update the value sent to hardware
+ * @sp: host1x syncpoint
+ * @incrs: number of increments
+ */
+u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs)
+{
+	return (u32)atomic_add_return(incrs, &sp->max_val);
+}
+EXPORT_SYMBOL(host1x_syncpt_incr_max);
+
+ /*
+ * Write cached syncpoint and waitbase values to hardware.
+ */
+void host1x_syncpt_restore(struct host1x *host)
+{
+	struct host1x_syncpt *sp_base = host->syncpt;
+	unsigned int i;
+
+	for (i = 0; i < host1x_syncpt_nb_pts(host); i++)
+		host1x_hw_syncpt_restore(host, sp_base + i);
+
+	for (i = 0; i < host1x_syncpt_nb_bases(host); i++)
+		host1x_hw_syncpt_restore_wait_base(host, sp_base + i);
+
+	wmb();
+}
+
+/*
+ * Update the cached syncpoint and waitbase values by reading them
+ * from the registers.
+  */
+void host1x_syncpt_save(struct host1x *host)
+{
+	struct host1x_syncpt *sp_base = host->syncpt;
+	unsigned int i;
+
+	for (i = 0; i < host1x_syncpt_nb_pts(host); i++) {
+		if (host1x_syncpt_client_managed(sp_base + i))
+			host1x_hw_syncpt_load(host, sp_base + i);
+		else
+			WARN_ON(!host1x_syncpt_idle(sp_base + i));
+	}
+
+	for (i = 0; i < host1x_syncpt_nb_bases(host); i++)
+		host1x_hw_syncpt_load_wait_base(host, sp_base + i);
+}
+
+/*
+ * Updates the cached syncpoint value by reading a new value from the hardware
+ * register
+ */
+u32 host1x_syncpt_load(struct host1x_syncpt *sp)
+{
+	u32 val;
+
+	val = host1x_hw_syncpt_load(sp->host, sp);
+	trace_host1x_syncpt_load_min(sp->id, val);
+
+	return val;
+}
+
+/*
+ * Get the current syncpoint base
+ */
+u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp)
+{
+	host1x_hw_syncpt_load_wait_base(sp->host, sp);
+
+	return sp->base_val;
+}
+
+/**
+ * host1x_syncpt_incr() - increment syncpoint value from CPU, updating cache
+ * @sp: host1x syncpoint
+ */
+int host1x_syncpt_incr(struct host1x_syncpt *sp)
+{
+	return host1x_hw_syncpt_cpu_incr(sp->host, sp);
+}
+EXPORT_SYMBOL(host1x_syncpt_incr);
+
+/*
+ * Updated sync point form hardware, and returns true if syncpoint is expired,
+ * false if we may need to wait
+ */
+static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
+{
+	host1x_hw_syncpt_load(sp->host, sp);
+
+	return host1x_syncpt_is_expired(sp, thresh);
+}
+
+/**
+ * host1x_syncpt_wait() - wait for a syncpoint to reach a given value
+ * @sp: host1x syncpoint
+ * @thresh: threshold
+ * @timeout: maximum time to wait for the syncpoint to reach the given value
+ * @value: return location for the syncpoint value
+ */
+int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
+		       u32 *value)
+{
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+	void *ref;
+	struct host1x_waitlist *waiter;
+	int err = 0, check_count = 0;
+	u32 val;
+
+	if (value)
+		*value = 0;
+
+	/* first check cache */
+	if (host1x_syncpt_is_expired(sp, thresh)) {
+		if (value)
+			*value = host1x_syncpt_load(sp);
+
+		return 0;
+	}
+
+	/* try to read from register */
+	val = host1x_hw_syncpt_load(sp->host, sp);
+	if (host1x_syncpt_is_expired(sp, thresh)) {
+		if (value)
+			*value = val;
+
+		goto done;
+	}
+
+	if (!timeout) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* allocate a waiter */
+	waiter = kzalloc(sizeof(*waiter), GFP_KERNEL);
+	if (!waiter) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	/* schedule a wakeup when the syncpoint value is reached */
+	err = host1x_intr_add_action(sp->host, sp, thresh,
+				     HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
+				     &wq, waiter, &ref);
+	if (err)
+		goto done;
+
+	err = -EAGAIN;
+	/* Caller-specified timeout may be impractically low */
+	if (timeout < 0)
+		timeout = LONG_MAX;
+
+	/* wait for the syncpoint, or timeout, or signal */
+	while (timeout) {
+		long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout);
+		int remain;
+
+		remain = wait_event_interruptible_timeout(wq,
+				syncpt_load_min_is_expired(sp, thresh),
+				check);
+		if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) {
+			if (value)
+				*value = host1x_syncpt_load(sp);
+
+			err = 0;
+
+			break;
+		}
+
+		if (remain < 0) {
+			err = remain;
+			break;
+		}
+
+		timeout -= check;
+
+		if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) {
+			dev_warn(sp->host->dev,
+				"%s: syncpoint id %u (%s) stuck waiting %d, timeout=%ld\n",
+				 current->comm, sp->id, sp->name,
+				 thresh, timeout);
+
+			host1x_debug_dump_syncpts(sp->host);
+
+			if (check_count == MAX_STUCK_CHECK_COUNT)
+				host1x_debug_dump(sp->host);
+
+			check_count++;
+		}
+	}
+
+	host1x_intr_put_ref(sp->host, sp->id, ref);
+
+done:
+	return err;
+}
+EXPORT_SYMBOL(host1x_syncpt_wait);
+
+/*
+ * Returns true if syncpoint is expired, false if we may need to wait
+ */
+bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh)
+{
+	u32 current_val;
+
+	smp_rmb();
+
+	current_val = (u32)atomic_read(&sp->min_val);
+
+	return ((current_val - thresh) & 0x80000000U) == 0U;
+}
+
+int host1x_syncpt_init(struct host1x *host)
+{
+	struct host1x_syncpt_base *bases;
+	struct host1x_syncpt *syncpt;
+	unsigned int i;
+
+	syncpt = devm_kcalloc(host->dev, host->info->nb_pts, sizeof(*syncpt),
+			      GFP_KERNEL);
+	if (!syncpt)
+		return -ENOMEM;
+
+	bases = devm_kcalloc(host->dev, host->info->nb_bases, sizeof(*bases),
+			     GFP_KERNEL);
+	if (!bases)
+		return -ENOMEM;
+
+	for (i = 0; i < host->info->nb_pts; i++) {
+		syncpt[i].id = i;
+		syncpt[i].host = host;
+
+		/*
+		 * Unassign syncpt from channels for purposes of Tegra186
+		 * syncpoint protection. This prevents any channel from
+		 * accessing it until it is reassigned.
+		 */
+		host1x_hw_syncpt_assign_to_channel(host, &syncpt[i], NULL);
+	}
+
+	for (i = 0; i < host->info->nb_bases; i++)
+		bases[i].id = i;
+
+	mutex_init(&host->syncpt_mutex);
+	host->syncpt = syncpt;
+	host->bases = bases;
+
+	host1x_syncpt_restore(host);
+	host1x_hw_syncpt_enable_protection(host);
+
+	/* Allocate sync point to use for clearing waits for expired fences */
+	host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop");
+	if (!host->nop_sp)
+		return -ENOMEM;
+
+	if (host->info->reserve_vblank_syncpts) {
+		kref_init(&host->syncpt[26].ref);
+		kref_init(&host->syncpt[27].ref);
+	}
+
+	return 0;
+}
+
+/**
+ * host1x_syncpt_request() - request a syncpoint
+ * @client: client requesting the syncpoint
+ * @flags: flags
+ *
+ * host1x client drivers can use this function to allocate a syncpoint for
+ * subsequent use. A syncpoint returned by this function will be reserved for
+ * use by the client exclusively. When no longer using a syncpoint, a host1x
+ * client driver needs to release it using host1x_syncpt_put().
+ */
+struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client,
+					    unsigned long flags)
+{
+	struct host1x *host = dev_get_drvdata(client->host->parent);
+
+	return host1x_syncpt_alloc(host, flags, dev_name(client->dev));
+}
+EXPORT_SYMBOL(host1x_syncpt_request);
+
+static void syncpt_release(struct kref *ref)
+{
+	struct host1x_syncpt *sp = container_of(ref, struct host1x_syncpt, ref);
+
+	atomic_set(&sp->max_val, host1x_syncpt_read(sp));
+	sp->locked = false;
+
+	mutex_lock(&sp->host->syncpt_mutex);
+
+	host1x_syncpt_base_free(sp->base);
+	kfree(sp->name);
+	sp->base = NULL;
+	sp->name = NULL;
+	sp->client_managed = false;
+
+	mutex_unlock(&sp->host->syncpt_mutex);
+}
+
+/**
+ * host1x_syncpt_put() - free a requested syncpoint
+ * @sp: host1x syncpoint
+ *
+ * Release a syncpoint previously allocated using host1x_syncpt_request(). A
+ * host1x client driver should call this when the syncpoint is no longer in
+ * use.
+ */
+void host1x_syncpt_put(struct host1x_syncpt *sp)
+{
+	if (!sp)
+		return;
+
+	kref_put(&sp->ref, syncpt_release);
+}
+EXPORT_SYMBOL(host1x_syncpt_put);
+
+void host1x_syncpt_deinit(struct host1x *host)
+{
+	struct host1x_syncpt *sp = host->syncpt;
+	unsigned int i;
+
+	for (i = 0; i < host->info->nb_pts; i++, sp++)
+		kfree(sp->name);
+}
+
+/**
+ * host1x_syncpt_read_max() - read maximum syncpoint value
+ * @sp: host1x syncpoint
+ *
+ * The maximum syncpoint value indicates how many operations there are in
+ * queue, either in channel or in a software thread.
+ */
+u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
+{
+	smp_rmb();
+
+	return (u32)atomic_read(&sp->max_val);
+}
+EXPORT_SYMBOL(host1x_syncpt_read_max);
+
+/**
+ * host1x_syncpt_read_min() - read minimum syncpoint value
+ * @sp: host1x syncpoint
+ *
+ * The minimum syncpoint value is a shadow of the current sync point value in
+ * hardware.
+ */
+u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
+{
+	smp_rmb();
+
+	return (u32)atomic_read(&sp->min_val);
+}
+EXPORT_SYMBOL(host1x_syncpt_read_min);
+
+/**
+ * host1x_syncpt_read() - read the current syncpoint value
+ * @sp: host1x syncpoint
+ */
+u32 host1x_syncpt_read(struct host1x_syncpt *sp)
+{
+	return host1x_syncpt_load(sp);
+}
+EXPORT_SYMBOL(host1x_syncpt_read);
+
+unsigned int host1x_syncpt_nb_pts(struct host1x *host)
+{
+	return host->info->nb_pts;
+}
+
+unsigned int host1x_syncpt_nb_bases(struct host1x *host)
+{
+	return host->info->nb_bases;
+}
+
+unsigned int host1x_syncpt_nb_mlocks(struct host1x *host)
+{
+	return host->info->nb_mlocks;
+}
+
+/**
+ * host1x_syncpt_get_by_id() - obtain a syncpoint by ID
+ * @host: host1x controller
+ * @id: syncpoint ID
+ */
+struct host1x_syncpt *host1x_syncpt_get_by_id(struct host1x *host,
+					      unsigned int id)
+{
+	if (id >= host->info->nb_pts)
+		return NULL;
+
+	if (kref_get_unless_zero(&host->syncpt[id].ref))
+		return &host->syncpt[id];
+	else
+		return NULL;
+}
+EXPORT_SYMBOL(host1x_syncpt_get_by_id);
+
+/**
+ * host1x_syncpt_get_by_id_noref() - obtain a syncpoint by ID but don't
+ * 	increase the refcount.
+ * @host: host1x controller
+ * @id: syncpoint ID
+ */
+struct host1x_syncpt *host1x_syncpt_get_by_id_noref(struct host1x *host,
+						    unsigned int id)
+{
+	if (id >= host->info->nb_pts)
+		return NULL;
+
+	return &host->syncpt[id];
+}
+EXPORT_SYMBOL(host1x_syncpt_get_by_id_noref);
+
+/**
+ * host1x_syncpt_get() - increment syncpoint refcount
+ * @sp: syncpoint
+ */
+struct host1x_syncpt *host1x_syncpt_get(struct host1x_syncpt *sp)
+{
+	kref_get(&sp->ref);
+
+	return sp;
+}
+EXPORT_SYMBOL(host1x_syncpt_get);
+
+/**
+ * host1x_syncpt_get_base() - obtain the wait base associated with a syncpoint
+ * @sp: host1x syncpoint
+ */
+struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp)
+{
+	return sp ? sp->base : NULL;
+}
+EXPORT_SYMBOL(host1x_syncpt_get_base);
+
+/**
+ * host1x_syncpt_base_id() - retrieve the ID of a syncpoint wait base
+ * @base: host1x syncpoint wait base
+ */
+u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base)
+{
+	return base->id;
+}
+EXPORT_SYMBOL(host1x_syncpt_base_id);
+
+static void do_nothing(struct kref *ref)
+{
+}
+
+/**
+ * host1x_syncpt_release_vblank_reservation() - Make VBLANK syncpoint
+ *   available for allocation
+ *
+ * @client: host1x bus client
+ *
+ * Makes VBLANK<i> syncpoint available for allocatation if it was
+ * reserved at initialization time. This should be called by the display
+ * driver after it has ensured that any VBLANK increment programming configured
+ * by the boot chain has been disabled.
+ */
+void host1x_syncpt_release_vblank_reservation(struct host1x_client *client,
+					      u32 syncpt_id)
+{
+	struct host1x *host = dev_get_drvdata(client->host->parent);
+
+	if (!host->info->reserve_vblank_syncpts)
+		return;
+
+	kref_put(&host->syncpt[syncpt_id].ref, do_nothing);
+}
+EXPORT_SYMBOL(host1x_syncpt_release_vblank_reservation);
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
new file mode 100644
index 00000000..b42b4254
--- /dev/null
+++ b/drivers/gpu/host1x/syncpt.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Syncpoints
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_SYNCPT_H
+#define __HOST1X_SYNCPT_H
+
+#include <linux/atomic.h>
+#include <linux/host1x-next.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/sched.h>
+
+#include "intr.h"
+
+struct host1x;
+
+/* Reserved for replacing an expired wait with a NOP */
+#define HOST1X_SYNCPT_RESERVED			0
+
+struct host1x_syncpt_base {
+	unsigned int id;
+	bool requested;
+};
+
+struct host1x_syncpt {
+	struct kref ref;
+
+	unsigned int id;
+	atomic_t min_val;
+	atomic_t max_val;
+	u32 base_val;
+	const char *name;
+	bool client_managed;
+	struct host1x *host;
+	struct host1x_syncpt_base *base;
+
+	/* interrupt data */
+	struct host1x_syncpt_intr intr;
+
+	/* 
+	 * If a submission incrementing this syncpoint fails, lock it so that
+	 * further submission cannot be made until application has handled the
+	 * failure.
+	 */
+	bool locked;
+};
+
+/* Initialize sync point array  */
+int host1x_syncpt_init(struct host1x *host);
+
+/*  Free sync point array */
+void host1x_syncpt_deinit(struct host1x *host);
+
+/* Return number of sync point supported. */
+unsigned int host1x_syncpt_nb_pts(struct host1x *host);
+
+/* Return number of wait bases supported. */
+unsigned int host1x_syncpt_nb_bases(struct host1x *host);
+
+/* Return number of mlocks supported. */
+unsigned int host1x_syncpt_nb_mlocks(struct host1x *host);
+
+/*
+ * Check sync point sanity. If max is larger than min, there have too many
+ * sync point increments.
+ *
+ * Client managed sync point are not tracked.
+ * */
+static inline bool host1x_syncpt_check_max(struct host1x_syncpt *sp, u32 real)
+{
+	u32 max;
+	if (sp->client_managed)
+		return true;
+	max = host1x_syncpt_read_max(sp);
+	return (s32)(max - real) >= 0;
+}
+
+/* Return true if sync point is client managed. */
+static inline bool host1x_syncpt_client_managed(struct host1x_syncpt *sp)
+{
+	return sp->client_managed;
+}
+
+/*
+ * Returns true if syncpoint min == max, which means that there are no
+ * outstanding operations.
+ */
+static inline bool host1x_syncpt_idle(struct host1x_syncpt *sp)
+{
+	int min, max;
+	smp_rmb();
+	min = atomic_read(&sp->min_val);
+	max = atomic_read(&sp->max_val);
+	return (min == max);
+}
+
+/* Load current value from hardware to the shadow register. */
+u32 host1x_syncpt_load(struct host1x_syncpt *sp);
+
+/* Check if the given syncpoint value has already passed */
+bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh);
+
+/* Save host1x sync point state into shadow registers. */
+void host1x_syncpt_save(struct host1x *host);
+
+/* Reset host1x sync point state from shadow registers. */
+void host1x_syncpt_restore(struct host1x *host);
+
+/* Read current wait base value into shadow register and return it. */
+u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp);
+
+/* Indicate future operations by incrementing the sync point max. */
+u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs);
+
+/* Check if sync point id is valid. */
+static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp)
+{
+	return sp->id < host1x_syncpt_nb_pts(sp->host);
+}
+
+static inline void host1x_syncpt_set_locked(struct host1x_syncpt *sp)
+{
+	sp->locked = true;
+}
+
+#endif
diff --git a/drivers/gpu/host1x/uapi.c b/drivers/gpu/host1x/uapi.c
new file mode 100644
index 00000000..6a12c183
--- /dev/null
+++ b/drivers/gpu/host1x/uapi.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * /dev/host1x syncpoint interface
+ *
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#include <linux/anon_inodes.h>
+#include <linux/cdev.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/host1x-next.h>
+#include <linux/nospec.h>
+#include <linux/sync_file.h>
+
+#include "dev.h"
+#include "fence.h"
+#include "syncpt.h"
+#include "uapi.h"
+
+#include <uapi/linux/host1x-next.h>
+
+static int syncpt_file_release(struct inode *inode, struct file *file)
+{
+	struct host1x_syncpt *sp = file->private_data;
+
+	host1x_syncpt_put(sp);
+
+	return 0;
+}
+
+static int syncpt_file_ioctl_info(struct host1x_syncpt *sp, void __user *data)
+{
+	struct host1x_syncpoint_info args;
+	unsigned long copy_err;
+
+	copy_err = copy_from_user(&args, data, sizeof(args));
+	if (copy_err)
+		return -EFAULT;
+
+	if (args.reserved[0] || args.reserved[1] || args.reserved[2])
+		return -EINVAL;
+
+	args.id = sp->id;
+
+	copy_err = copy_to_user(data, &args, sizeof(args));
+	if (copy_err)
+		return -EFAULT;
+
+	return 0;
+}
+
+static int syncpt_file_ioctl_incr(struct host1x_syncpt *sp, void __user *data)
+{
+	struct host1x_syncpoint_increment args;
+	unsigned long copy_err;
+	u32 i;
+
+	copy_err = copy_from_user(&args, data, sizeof(args));
+	if (copy_err)
+		return -EFAULT;
+
+	for (i = 0; i < args.count; i++) {
+		host1x_syncpt_incr(sp);
+		if (signal_pending(current))
+			return -EINTR;
+	}
+
+	return 0;
+}
+
+static long syncpt_file_ioctl(struct file *file, unsigned int cmd,
+			      unsigned long arg)
+{
+	void __user *data = (void __user *)arg;
+	long err;
+
+	switch (cmd) {
+	case HOST1X_IOCTL_SYNCPOINT_INFO:
+		err = syncpt_file_ioctl_info(file->private_data, data);
+		break;
+
+	case HOST1X_IOCTL_SYNCPOINT_INCREMENT:
+		err = syncpt_file_ioctl_incr(file->private_data, data);
+		break;
+
+	default:
+		err = -ENOTTY;
+	}
+
+	return err;
+}
+
+static const struct file_operations syncpt_file_fops = {
+	.owner = THIS_MODULE,
+	.release = syncpt_file_release,
+	.unlocked_ioctl = syncpt_file_ioctl,
+	.compat_ioctl = syncpt_file_ioctl,
+};
+
+struct host1x_syncpt *host1x_syncpt_fd_get(int fd)
+{
+	struct host1x_syncpt *sp;
+	struct file *file = fget(fd);
+
+	if (!file)
+		return ERR_PTR(-EINVAL);
+
+	if (file->f_op != &syncpt_file_fops) {
+		fput(file);
+		return ERR_PTR(-EINVAL);
+	}
+
+	sp = file->private_data;
+
+	host1x_syncpt_get(sp);
+
+	fput(file);
+
+	return sp;
+}
+EXPORT_SYMBOL(host1x_syncpt_fd_get);
+
+static int dev_file_open(struct inode *inode, struct file *file)
+{
+	struct host1x_uapi *uapi =
+		container_of(inode->i_cdev, struct host1x_uapi, cdev);
+
+	file->private_data = container_of(uapi, struct host1x, uapi);
+
+	return 0;
+}
+
+static int dev_file_ioctl_read_syncpoint(struct host1x *host1x,
+					 void __user *data)
+{
+	struct host1x_read_syncpoint args;
+	unsigned long copy_err;
+
+	copy_err = copy_from_user(&args, data, sizeof(args));
+	if (copy_err)
+		return -EFAULT;
+
+	if (args.id >= host1x_syncpt_nb_pts(host1x))
+		return -EINVAL;
+
+	args.id = array_index_nospec(args.id, host1x_syncpt_nb_pts(host1x));
+	args.value = host1x_syncpt_read(&host1x->syncpt[args.id]);
+
+	copy_err = copy_to_user(data, &args, sizeof(args));
+	if (copy_err)
+		return -EFAULT;
+
+	return 0;
+}
+
+static int dev_file_ioctl_alloc_syncpoint(struct host1x *host1x,
+					  void __user *data)
+{
+	struct host1x_allocate_syncpoint args;
+	struct host1x_syncpt *sp;
+	unsigned long copy_err;
+	int err;
+
+	copy_err = copy_from_user(&args, data, sizeof(args));
+	if (copy_err)
+		return -EFAULT;
+
+	if (args.reserved[0] || args.reserved[1] || args.reserved[2])
+		return -EINVAL;
+
+	sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED,
+				 current->comm);
+	if (!sp)
+		return -EBUSY;
+
+	err = anon_inode_getfd("host1x_syncpt", &syncpt_file_fops, sp,
+			       O_CLOEXEC);
+	if (err < 0)
+		goto free_syncpt;
+
+	args.fd = err;
+
+	copy_err = copy_to_user(data, &args, sizeof(args));
+	if (copy_err) {
+		err = -EFAULT;
+		goto put_fd;
+	}
+
+	return 0;
+
+put_fd:
+	put_unused_fd(args.fd);
+free_syncpt:
+	host1x_syncpt_put(sp);
+
+	return err;
+}
+
+static int dev_file_ioctl_create_fence(struct host1x *host1x, void __user *data)
+{
+	struct host1x_create_fence args;
+	unsigned long copy_err;
+	int fd;
+
+	copy_err = copy_from_user(&args, data, sizeof(args));
+	if (copy_err)
+		return -EFAULT;
+
+	if (args.reserved[0])
+		return -EINVAL;
+
+	if (args.id >= host1x_syncpt_nb_pts(host1x))
+		return -EINVAL;
+
+	args.id = array_index_nospec(args.id, host1x_syncpt_nb_pts(host1x));
+
+	fd = host1x_fence_create_fd(&host1x->syncpt[args.id], args.threshold);
+	if (fd < 0)
+		return fd;
+
+	args.fence_fd = fd;
+
+	copy_err = copy_to_user(data, &args, sizeof(args));
+	if (copy_err)
+		return -EFAULT;
+
+	return 0;
+}
+
+static int dev_file_ioctl_fence_extract(struct host1x *host1x, void __user *data)
+{
+	struct host1x_fence_extract_fence __user *fences_user_ptr;
+	struct dma_fence *fence, **fences;
+	struct host1x_fence_extract args;
+	struct dma_fence_array *array;
+	unsigned int num_fences, i;
+	unsigned long copy_err;
+	int err;
+
+	copy_err = copy_from_user(&args, data, sizeof(args));
+	if (copy_err)
+		return -EFAULT;
+
+	fences_user_ptr = u64_to_user_ptr(args.fences_ptr);
+
+	if (args.reserved[0] || args.reserved[1])
+		return -EINVAL;
+
+	fence = sync_file_get_fence(args.fence_fd);
+	if (!fence)
+		return -EINVAL;
+
+	array = to_dma_fence_array(fence);
+	if (array) {
+		fences = array->fences;
+		num_fences = array->num_fences;
+	} else {
+		fences = &fence;
+		num_fences = 1;
+	}
+
+	for (i = 0; i < min(num_fences, args.num_fences); i++) {
+		struct host1x_fence_extract_fence f;
+
+		err = host1x_fence_extract(fences[i], &f.id, &f.threshold);
+		if (err)
+			goto put_fence;
+
+		copy_err = copy_to_user(fences_user_ptr + i, &f, sizeof(f));
+		if (copy_err) {
+			err = -EFAULT;
+			goto put_fence;
+		}
+	}
+
+	args.num_fences = i+1;
+
+	copy_err = copy_to_user(data, &args, sizeof(args));
+	if (copy_err) {
+		err = -EFAULT;
+		goto put_fence;
+	}
+
+	return 0;
+
+put_fence:
+	dma_fence_put(fence);
+
+	return err;
+}
+
+static long dev_file_ioctl(struct file *file, unsigned int cmd,
+			   unsigned long arg)
+{
+	void __user *data = (void __user *)arg;
+	long err;
+
+	switch (cmd) {
+	case HOST1X_IOCTL_READ_SYNCPOINT:
+		err = dev_file_ioctl_read_syncpoint(file->private_data, data);
+		break;
+
+	case HOST1X_IOCTL_ALLOCATE_SYNCPOINT:
+		err = dev_file_ioctl_alloc_syncpoint(file->private_data, data);
+		break;
+
+	case HOST1X_IOCTL_CREATE_FENCE:
+		err = dev_file_ioctl_create_fence(file->private_data, data);
+		break;
+
+	case HOST1X_IOCTL_FENCE_EXTRACT:
+		err = dev_file_ioctl_fence_extract(file->private_data, data);
+		break;
+
+	default:
+		err = -ENOTTY;
+	}
+
+	return err;
+}
+
+static const struct file_operations dev_file_fops = {
+	.owner = THIS_MODULE,
+	.open = dev_file_open,
+	.unlocked_ioctl = dev_file_ioctl,
+	.compat_ioctl = dev_file_ioctl,
+};
+
+int host1x_uapi_init(struct host1x_uapi *uapi, struct host1x *host1x)
+{
+	int err;
+	dev_t dev_num;
+
+	err = alloc_chrdev_region(&dev_num, 0, 1, "host1x");
+	if (err)
+		return err;
+
+	uapi->class = class_create(THIS_MODULE, "host1x");
+	if (IS_ERR(uapi->class)) {
+		err = PTR_ERR(uapi->class);
+		goto unregister_chrdev_region;
+	}
+
+	cdev_init(&uapi->cdev, &dev_file_fops);
+	err = cdev_add(&uapi->cdev, dev_num, 1);
+	if (err)
+		goto destroy_class;
+
+	uapi->dev = device_create(uapi->class, host1x->dev,
+				  dev_num, NULL, "host1x");
+	if (IS_ERR(uapi->dev)) {
+		err = PTR_ERR(uapi->dev);
+		goto del_cdev;
+	}
+
+	cdev_add(&uapi->cdev, dev_num, 1);
+
+	uapi->dev_num = dev_num;
+
+	return 0;
+
+del_cdev:
+	cdev_del(&uapi->cdev);
+destroy_class:
+	class_destroy(uapi->class);
+unregister_chrdev_region:
+	unregister_chrdev_region(dev_num, 1);
+
+	return err;
+}
+
+void host1x_uapi_deinit(struct host1x_uapi *uapi)
+{
+	device_destroy(uapi->class, uapi->dev_num);
+	cdev_del(&uapi->cdev);
+	class_destroy(uapi->class);
+	unregister_chrdev_region(uapi->dev_num, 1);
+}
diff --git a/drivers/gpu/host1x/uapi.h b/drivers/gpu/host1x/uapi.h
new file mode 100644
index 00000000..7beb5e44
--- /dev/null
+++ b/drivers/gpu/host1x/uapi.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_UAPI_H
+#define HOST1X_UAPI_H
+
+#include <linux/cdev.h>
+
+struct host1x_uapi {
+	struct class *class;
+
+	struct cdev cdev;
+	struct device *dev;
+	dev_t dev_num;
+};
+
+int host1x_uapi_init(struct host1x_uapi *uapi, struct host1x *host1x);
+void host1x_uapi_deinit(struct host1x_uapi *uapi);
+
+#endif

From 9f0b4d74ea450cdf1763cce55b11ac1f7c2b64b1 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Tue, 15 Dec 2020 12:12:47 +0000
Subject: [PATCH 15/73] gpu: host1x: Update host1x to align with v5.10

Update the host1x driver to Linux v5.10 with the 'Host1x/Tegra UAPI'
series [0] applied. This driver is built as an external module for
testing and development with upstream Linux kernels.

[0] https://patchwork.ozlabs.org/project/linux-tegra/list/?series=215770

Bug 3205478

Change-Id: I32c1a492293ca30dd47d1edf75aa53aa9e74b400
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2460074
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: Shanker Donthineni <sdonthineni@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/job.c  | 22 ++++++++--------------
 drivers/gpu/host1x/mipi.c | 22 ++++++++++------------
 2 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index f445f1e4..d7bb5d18 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -192,11 +192,9 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 				goto unpin;
 			}
 
-			err = dma_map_sg(dev, sgt->sgl, sgt->nents, dir);
-			if (!err) {
-				err = -ENOMEM;
+			err = dma_map_sgtable(dev, sgt, dir, 0);
+			if (err)
 				goto unpin;
-			}
 
 			job->unpins[job->num_unpins].dev = dev;
 			job->unpins[job->num_unpins].dir = dir;
@@ -253,7 +251,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 		}
 
 		if (host->domain) {
-			for_each_sg(sgt->sgl, sg, sgt->nents, j)
+			for_each_sgtable_sg(sgt, sg, j)
 				gather_size += sg->length;
 			gather_size = iova_align(&host->iova, gather_size);
 
@@ -265,9 +263,9 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 				goto put;
 			}
 
-			err = iommu_map_sg(host->domain,
+			err = iommu_map_sgtable(host->domain,
 					iova_dma_addr(&host->iova, alloc),
-					sgt->sgl, sgt->nents, IOMMU_READ);
+					sgt, IOMMU_READ);
 			if (err == 0) {
 				__free_iova(&host->iova, alloc);
 				err = -EINVAL;
@@ -277,12 +275,9 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 			job->unpins[job->num_unpins].size = gather_size;
 			phys_addr = iova_dma_addr(&host->iova, alloc);
 		} else if (sgt) {
-			err = dma_map_sg(host->dev, sgt->sgl, sgt->nents,
-					 DMA_TO_DEVICE);
-			if (!err) {
-				err = -ENOMEM;
+			err = dma_map_sgtable(host->dev, sgt, DMA_TO_DEVICE, 0);
+			if (err)
 				goto put;
-			}
 
 			job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
 			job->unpins[job->num_unpins].dev = host->dev;
@@ -699,8 +694,7 @@ void host1x_job_unpin(struct host1x_job *job)
 		}
 
 		if (unpin->dev && sgt)
-			dma_unmap_sg(unpin->dev, sgt->sgl, sgt->nents,
-				     unpin->dir);
+			dma_unmap_sgtable(unpin->dev, sgt, unpin->dir, 0);
 
 		host1x_bo_unpin(dev, unpin->bo, sgt);
 		host1x_bo_put(unpin->bo);
diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c
index 8ded40bf..614733c2 100644
--- a/drivers/gpu/host1x/mipi.c
+++ b/drivers/gpu/host1x/mipi.c
@@ -293,19 +293,13 @@ int tegra_mipi_disable(struct tegra_mipi_device *dev)
 }
 EXPORT_SYMBOL(tegra_mipi_disable);
 
-int tegra_mipi_wait(struct tegra_mipi_device *device)
+int tegra_mipi_finish_calibration(struct tegra_mipi_device *device)
 {
 	struct tegra_mipi *mipi = device->mipi;
 	void __iomem *status_reg = mipi->regs + (MIPI_CAL_STATUS << 2);
 	u32 value;
 	int err;
 
-	err = clk_enable(device->mipi->clk);
-	if (err < 0)
-		return err;
-
-	mutex_lock(&device->mipi->lock);
-
 	err = readl_relaxed_poll_timeout(status_reg, value,
 					 !(value & MIPI_CAL_STATUS_ACTIVE) &&
 					 (value & MIPI_CAL_STATUS_DONE), 50,
@@ -315,9 +309,9 @@ int tegra_mipi_wait(struct tegra_mipi_device *device)
 
 	return err;
 }
-EXPORT_SYMBOL(tegra_mipi_wait);
+EXPORT_SYMBOL(tegra_mipi_finish_calibration);
 
-int tegra_mipi_calibrate(struct tegra_mipi_device *device)
+int tegra_mipi_start_calibration(struct tegra_mipi_device *device)
 {
 	const struct tegra_mipi_soc *soc = device->mipi->soc;
 	unsigned int i;
@@ -381,12 +375,16 @@ int tegra_mipi_calibrate(struct tegra_mipi_device *device)
 	value |= MIPI_CAL_CTRL_START;
 	tegra_mipi_writel(device->mipi, value, MIPI_CAL_CTRL);
 
-	mutex_unlock(&device->mipi->lock);
-	clk_disable(device->mipi->clk);
+	/*
+	 * Wait for min 72uS to let calibration logic finish calibration
+	 * sequence codes before waiting for pads idle state to apply the
+	 * results.
+	 */
+	usleep_range(75, 80);
 
 	return 0;
 }
-EXPORT_SYMBOL(tegra_mipi_calibrate);
+EXPORT_SYMBOL(tegra_mipi_start_calibration);
 
 static const struct tegra_mipi_pad tegra114_mipi_pads[] = {
 	{ .data = MIPI_CAL_CONFIG_CSIA },

From 27ced8b3152e0d1a3f8141d2f9c0519b64c375c0 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Tue, 12 Jan 2021 10:41:11 +0000
Subject: [PATCH 16/73] gpu: host1x: Update to UAPI series v5

Update the host1x driver to the 'Host1x/Tegra UAPI v5' series [0]. This
fixes a few minor bugs found in the previous series.

[0] https://patchwork.ozlabs.org/project/linux-tegra/list/?series=223684

Bug 200687525

Change-Id: I680bfb9e8db73b9e2571551f22fadca1f2974498
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2469983
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/bus.c    | 1 +
 drivers/gpu/host1x/cdma.c   | 2 +-
 drivers/gpu/host1x/fence.c  | 4 ++--
 drivers/gpu/host1x/intr.c   | 9 ++++++++-
 drivers/gpu/host1x/intr.h   | 4 +++-
 drivers/gpu/host1x/job.c    | 2 +-
 drivers/gpu/host1x/syncpt.c | 2 +-
 7 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 38f90ad0..01538262 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -32,6 +32,7 @@ struct host1x_subdev {
 /**
  * host1x_subdev_add() - add a new subdevice with an associated device node
  * @device: host1x device to add the subdevice to
+ * @driver: host1x driver containing the subdevices
  * @np: device node
  */
 static int host1x_subdev_add(struct host1x_device *device,
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 8b9e7b4e..286e2f97 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -453,7 +453,7 @@ syncpt_incr:
 				u32 *mapped = cdma->push_buffer.mapped;
 
 				/*
-				 * Overwrite opcodes with 0 word writes to
+				 * Overwrite opcodes with 0 word writes
 				 * to offset 0xbad. This does nothing but
 				 * has a easily detected signature in debug
 				 * traces.
diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index b2484606..e96ad93f 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -114,7 +114,7 @@ void host1x_fence_signal(struct host1x_syncpt_fence *f)
 	 */
 	cancel_delayed_work_sync(&f->timeout_work);
 
-	host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref);
+	host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, false);
 
 	dma_fence_signal(&f->base);
 	dma_fence_put(&f->base);
@@ -133,7 +133,7 @@ static void do_fence_timeout(struct work_struct *work)
 	 * Cancel pending timeout work - if it races, it will
 	 * not get 'f->signaling' and return.
 	 */
-	host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref);
+	host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, true);
 
 	dma_fence_set_error(&f->base, -ETIMEDOUT);
 	dma_fence_signal(&f->base);
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 19b59c5c..bcffc4d7 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -251,7 +251,8 @@ int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
 	return 0;
 }
 
-void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref)
+void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
+			 bool flush)
 {
 	struct host1x_waitlist *waiter = ref;
 	struct host1x_syncpt *syncpt;
@@ -268,6 +269,12 @@ void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref)
 	}
 	spin_unlock(&syncpt->intr.lock);
 
+	if (flush) {
+		/* Wait until any concurrently executing handler has finished. */
+		while (atomic_read(&waiter->state) != WLS_HANDLED)
+			cpu_relax();
+	}
+
 	kref_put(&waiter->refcount, waiter_release);
 }
 
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
index dedbd0f7..e4c34609 100644
--- a/drivers/gpu/host1x/intr.h
+++ b/drivers/gpu/host1x/intr.h
@@ -76,8 +76,10 @@ int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
  * Unreference an action submitted to host1x_intr_add_action().
  * You must call this if you passed non-NULL as ref.
  * @ref the ref returned from host1x_intr_add_action()
+ * @flush wait until any pending handlers have completed before returning.
  */
-void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref);
+void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
+			 bool flush);
 
 /* Initialize host1x sync point interrupt */
 int host1x_intr_init(struct host1x *host, unsigned int irq_sync);
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index d7bb5d18..ae484d86 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -84,7 +84,7 @@ static void job_free(struct kref *ref)
 
 	if (job->waiter)
 		host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
-				    job->waiter);
+				    job->waiter, false);
 
 	if (job->syncpt)
 		host1x_syncpt_put(job->syncpt);
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index d0be7bdb..100270ac 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -295,7 +295,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 		}
 	}
 
-	host1x_intr_put_ref(sp->host, sp->id, ref);
+	host1x_intr_put_ref(sp->host, sp->id, ref, true);
 
 done:
 	return err;

From b06375f360bca6633b615e38911953535168ead4 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Mon, 25 Jan 2021 12:40:42 +0200
Subject: [PATCH 17/73] gpu: host1x: Set waiter output reference within lock

With the new fence implementation, we need to assign the reference
within the lock. Otherwise, it can happen that the waiter may get
signaled before the pointer has been set, and the fence implementation
will try to call intr_put_ref with a NULL pointer.

Bug 3237754

Change-Id: I465ec3ecf63c4f179ba8ef9cc5af0892a93f20af
Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2474949
Tested-by: Jonathan Hunter <jonathanh@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/intr.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index bcffc4d7..7a847545 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -244,10 +244,11 @@ int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
 			host1x_hw_intr_enable_syncpt_intr(host, syncpt->id);
 	}
 
-	spin_unlock(&syncpt->intr.lock);
-
 	if (ref)
 		*ref = waiter;
+
+	spin_unlock(&syncpt->intr.lock);
+
 	return 0;
 }
 

From 54584c92ca8120319e826e017f68e465d23ccdb3 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Fri, 12 Feb 2021 18:02:30 +0000
Subject: [PATCH 18/73] gpu: host1x: Clean-up build for Host1x

Trivial clean-up for building Host1x by ...

1. Removing the Kconfig file because this is not needed for building
   as an external module and we only plan to build this as an external
   module.
2. Moving both 'host1x-next.h' header files under the Host1x source
   directory so that we don't need to pass the path for finding the
   header when building Host1x as an external module.
3. Remove the 'CONFIG_TEGRA_HOST1X_NEXT' variable because we always
   build Host1x as a module.

Bug 200687525

Change-Id: Ica5534f92a3a2ee6f6500ec419fa05f03993b90b
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2485760
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/Kconfig                    |  24 --
 drivers/gpu/host1x/Makefile                   |   6 +-
 .../gpu/host1x/include/linux/host1x-next.h    | 374 ++++++++++++++++++
 .../host1x/include/uapi/linux/host1x-next.h   | 134 +++++++
 4 files changed, 512 insertions(+), 26 deletions(-)
 delete mode 100644 drivers/gpu/host1x/Kconfig
 create mode 100644 drivers/gpu/host1x/include/linux/host1x-next.h
 create mode 100644 drivers/gpu/host1x/include/uapi/linux/host1x-next.h

diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
deleted file mode 100644
index 36800138..00000000
--- a/drivers/gpu/host1x/Kconfig
+++ /dev/null
@@ -1,24 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config TEGRA_HOST1X_NEXT
-	tristate "NVIDIA Tegra host1x driver"
-	depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
-	select IOMMU_IOVA
-	help
-	  Driver for the NVIDIA Tegra host1x hardware.
-
-	  The Tegra host1x module is the DMA engine for register access to
-	  Tegra's graphics- and multimedia-related modules. The modules served
-	  by host1x are referred to as clients. host1x includes some other
-	  functionality, such as synchronization.
-
-if TEGRA_HOST1X_NEXT
-
-config TEGRA_HOST1X_FIREWALL
-	bool "Enable HOST1X security firewall"
-	default y
-	help
-	  Say yes if kernel should protect command streams from tampering.
-
-	  If unsure, choose Y.
-
-endif
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 2f77263f..5b6bfcae 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-CONFIG_TEGRA_HOST1X_NEXT := m
+
+srctree.host1x := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
+
 ccflags-y := -I$(srctree.host1x)/include
 
 host1x-next-y = \
@@ -21,4 +23,4 @@ host1x-next-y = \
 	hw/host1x06.o \
 	hw/host1x07.o
 
-obj-$(CONFIG_TEGRA_HOST1X_NEXT) += host1x-next.o
+obj-m := host1x-next.o
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
new file mode 100644
index 00000000..5890f91d
--- /dev/null
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -0,0 +1,374 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2009-2013, NVIDIA Corporation. All rights reserved.
+ */
+
+#ifndef __LINUX_HOST1X_H
+#define __LINUX_HOST1X_H
+
+#include <linux/device.h>
+#include <linux/types.h>
+
+enum host1x_class {
+	HOST1X_CLASS_HOST1X = 0x1,
+	HOST1X_CLASS_GR2D = 0x51,
+	HOST1X_CLASS_GR2D_SB = 0x52,
+	HOST1X_CLASS_VIC = 0x5D,
+	HOST1X_CLASS_GR3D = 0x60,
+};
+
+struct host1x;
+struct host1x_client;
+struct iommu_group;
+
+u64 host1x_get_dma_mask(struct host1x *host1x);
+
+/**
+ * struct host1x_client_ops - host1x client operations
+ * @init: host1x client initialization code
+ * @exit: host1x client tear down code
+ * @suspend: host1x client suspend code
+ * @resume: host1x client resume code
+ */
+struct host1x_client_ops {
+	int (*init)(struct host1x_client *client);
+	int (*exit)(struct host1x_client *client);
+	int (*suspend)(struct host1x_client *client);
+	int (*resume)(struct host1x_client *client);
+};
+
+/**
+ * struct host1x_client - host1x client structure
+ * @list: list node for the host1x client
+ * @host: pointer to struct device representing the host1x controller
+ * @dev: pointer to struct device backing this host1x client
+ * @group: IOMMU group that this client is a member of
+ * @ops: host1x client operations
+ * @class: host1x class represented by this client
+ * @channel: host1x channel associated with this client
+ * @syncpts: array of syncpoints requested for this client
+ * @num_syncpts: number of syncpoints requested for this client
+ * @parent: pointer to parent structure
+ * @usecount: reference count for this structure
+ * @lock: mutex for mutually exclusive concurrency
+ */
+struct host1x_client {
+	struct list_head list;
+	struct device *host;
+	struct device *dev;
+	struct iommu_group *group;
+
+	const struct host1x_client_ops *ops;
+
+	enum host1x_class class;
+	struct host1x_channel *channel;
+
+	struct host1x_syncpt **syncpts;
+	unsigned int num_syncpts;
+
+	struct host1x_client *parent;
+	unsigned int usecount;
+	struct mutex lock;
+};
+
+/*
+ * host1x buffer objects
+ */
+
+struct host1x_bo;
+struct sg_table;
+
+struct host1x_bo_ops {
+	struct host1x_bo *(*get)(struct host1x_bo *bo);
+	void (*put)(struct host1x_bo *bo);
+	struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo,
+				dma_addr_t *phys);
+	void (*unpin)(struct device *dev, struct sg_table *sgt);
+	void *(*mmap)(struct host1x_bo *bo);
+	void (*munmap)(struct host1x_bo *bo, void *addr);
+};
+
+struct host1x_bo {
+	const struct host1x_bo_ops *ops;
+};
+
+static inline void host1x_bo_init(struct host1x_bo *bo,
+				  const struct host1x_bo_ops *ops)
+{
+	bo->ops = ops;
+}
+
+static inline struct host1x_bo *host1x_bo_get(struct host1x_bo *bo)
+{
+	return bo->ops->get(bo);
+}
+
+static inline void host1x_bo_put(struct host1x_bo *bo)
+{
+	bo->ops->put(bo);
+}
+
+static inline struct sg_table *host1x_bo_pin(struct device *dev,
+					     struct host1x_bo *bo,
+					     dma_addr_t *phys)
+{
+	return bo->ops->pin(dev, bo, phys);
+}
+
+static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo,
+				   struct sg_table *sgt)
+{
+	bo->ops->unpin(dev, sgt);
+}
+
+static inline void *host1x_bo_mmap(struct host1x_bo *bo)
+{
+	return bo->ops->mmap(bo);
+}
+
+static inline void host1x_bo_munmap(struct host1x_bo *bo, void *addr)
+{
+	bo->ops->munmap(bo, addr);
+}
+
+/*
+ * host1x syncpoints
+ */
+
+#define HOST1X_SYNCPT_CLIENT_MANAGED	(1 << 0)
+#define HOST1X_SYNCPT_HAS_BASE		(1 << 1)
+
+struct host1x_syncpt_base;
+struct host1x_syncpt;
+struct host1x;
+
+struct host1x_syncpt *host1x_syncpt_get_by_id(struct host1x *host, u32 id);
+struct host1x_syncpt *host1x_syncpt_get_by_id_noref(struct host1x *host, u32 id);
+struct host1x_syncpt *host1x_syncpt_get(struct host1x_syncpt *sp);
+u32 host1x_syncpt_id(struct host1x_syncpt *sp);
+u32 host1x_syncpt_read_min(struct host1x_syncpt *sp);
+u32 host1x_syncpt_read_max(struct host1x_syncpt *sp);
+u32 host1x_syncpt_read(struct host1x_syncpt *sp);
+int host1x_syncpt_incr(struct host1x_syncpt *sp);
+u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs);
+int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
+		       u32 *value);
+struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client,
+					    unsigned long flags);
+void host1x_syncpt_put(struct host1x_syncpt *sp);
+struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
+					  unsigned long flags,
+					  const char *name);
+
+struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp);
+u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base);
+
+void host1x_syncpt_release_vblank_reservation(struct host1x_client *client,
+					      u32 syncpt_id);
+
+struct host1x_syncpt *host1x_syncpt_fd_get(int fd);
+
+struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold);
+int host1x_fence_create_fd(struct host1x_syncpt *sp, u32 threshold);
+int host1x_fence_extract(struct dma_fence *fence, u32 *id, u32 *threshold);
+
+/*
+ * host1x channel
+ */
+
+struct host1x_channel;
+struct host1x_job;
+
+struct host1x_channel *host1x_channel_request(struct host1x_client *client);
+struct host1x_channel *host1x_channel_get(struct host1x_channel *channel);
+void host1x_channel_put(struct host1x_channel *channel);
+int host1x_job_submit(struct host1x_job *job);
+
+/*
+ * host1x job
+ */
+
+#define HOST1X_RELOC_READ	(1 << 0)
+#define HOST1X_RELOC_WRITE	(1 << 1)
+
+struct host1x_reloc {
+	struct {
+		struct host1x_bo *bo;
+		unsigned long offset;
+	} cmdbuf;
+	struct {
+		struct host1x_bo *bo;
+		unsigned long offset;
+	} target;
+	unsigned long shift;
+	unsigned long flags;
+};
+
+struct host1x_job {
+	/* When refcount goes to zero, job can be freed */
+	struct kref ref;
+
+	/* List entry */
+	struct list_head list;
+
+	/* Channel where job is submitted to */
+	struct host1x_channel *channel;
+
+	/* client where the job originated */
+	struct host1x_client *client;
+
+	/* Gathers and their memory */
+	struct host1x_job_cmd *cmds;
+	unsigned int num_cmds;
+
+	/* Array of handles to be pinned & unpinned */
+	struct host1x_reloc *relocs;
+	unsigned int num_relocs;
+	struct host1x_job_unpin_data *unpins;
+	unsigned int num_unpins;
+
+	dma_addr_t *addr_phys;
+	dma_addr_t *gather_addr_phys;
+	dma_addr_t *reloc_addr_phys;
+
+	/* Sync point id, number of increments and end related to the submit */
+	struct host1x_syncpt *syncpt;
+	u32 syncpt_incrs;
+	u32 syncpt_end;
+
+	/* Completion waiter ref */
+	void *waiter;
+
+	/* Maximum time to wait for this job */
+	unsigned int timeout;
+
+	/* Job has timed out and should be released */
+	bool cancelled;
+
+	/* Index and number of slots used in the push buffer */
+	unsigned int first_get;
+	unsigned int num_slots;
+
+	/* Copy of gathers */
+	size_t gather_copy_size;
+	dma_addr_t gather_copy;
+	u8 *gather_copy_mapped;
+
+	/* Check if register is marked as an address reg */
+	int (*is_addr_reg)(struct device *dev, u32 class, u32 reg);
+
+	/* Check if class belongs to the unit */
+	int (*is_valid_class)(u32 class);
+
+	/* Request a SETCLASS to this class */
+	u32 class;
+
+	/* Add a channel wait for previous ops to complete */
+	bool serialize;
+
+	/* Fast-forward syncpoint increments on job timeout */
+	bool syncpt_recovery;
+
+	/* Callback called when job is freed */
+	void (*release)(struct host1x_job *job);
+	void *user_data;
+};
+
+struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
+				    u32 num_cmdbufs, u32 num_relocs);
+void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
+			   unsigned int words, unsigned int offset);
+void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh);
+struct host1x_job *host1x_job_get(struct host1x_job *job);
+void host1x_job_put(struct host1x_job *job);
+int host1x_job_pin(struct host1x_job *job, struct device *dev);
+void host1x_job_unpin(struct host1x_job *job);
+
+/*
+ * subdevice probe infrastructure
+ */
+
+struct host1x_device;
+
+/**
+ * struct host1x_driver - host1x logical device driver
+ * @driver: core driver
+ * @subdevs: table of OF device IDs matching subdevices for this driver
+ * @list: list node for the driver
+ * @probe: called when the host1x logical device is probed
+ * @remove: called when the host1x logical device is removed
+ * @shutdown: called when the host1x logical device is shut down
+ */
+struct host1x_driver {
+	struct device_driver driver;
+
+	const struct of_device_id *subdevs;
+	struct list_head list;
+
+	int (*probe)(struct host1x_device *device);
+	int (*remove)(struct host1x_device *device);
+	void (*shutdown)(struct host1x_device *device);
+};
+
+static inline struct host1x_driver *
+to_host1x_driver(struct device_driver *driver)
+{
+	return container_of(driver, struct host1x_driver, driver);
+}
+
+int host1x_driver_register_full(struct host1x_driver *driver,
+				struct module *owner);
+void host1x_driver_unregister(struct host1x_driver *driver);
+
+#define host1x_driver_register(driver) \
+	host1x_driver_register_full(driver, THIS_MODULE)
+
+struct host1x_device {
+	struct host1x_driver *driver;
+	struct list_head list;
+	struct device dev;
+
+	struct mutex subdevs_lock;
+	struct list_head subdevs;
+	struct list_head active;
+
+	struct mutex clients_lock;
+	struct list_head clients;
+
+	bool registered;
+
+	struct device_dma_parameters dma_parms;
+};
+
+static inline struct host1x_device *to_host1x_device(struct device *dev)
+{
+	return container_of(dev, struct host1x_device, dev);
+}
+
+int host1x_device_init(struct host1x_device *device);
+int host1x_device_exit(struct host1x_device *device);
+
+int __host1x_client_register(struct host1x_client *client,
+			     struct lock_class_key *key);
+#define host1x_client_register(class) \
+	({ \
+		static struct lock_class_key __key; \
+		__host1x_client_register(class, &__key); \
+	})
+
+int host1x_client_unregister(struct host1x_client *client);
+
+int host1x_client_suspend(struct host1x_client *client);
+int host1x_client_resume(struct host1x_client *client);
+
+struct tegra_mipi_device;
+
+struct tegra_mipi_device *tegra_mipi_request(struct device *device,
+					     struct device_node *np);
+void tegra_mipi_free(struct tegra_mipi_device *device);
+int tegra_mipi_enable(struct tegra_mipi_device *device);
+int tegra_mipi_disable(struct tegra_mipi_device *device);
+int tegra_mipi_start_calibration(struct tegra_mipi_device *device);
+int tegra_mipi_finish_calibration(struct tegra_mipi_device *device);
+
+#endif
diff --git a/drivers/gpu/host1x/include/uapi/linux/host1x-next.h b/drivers/gpu/host1x/include/uapi/linux/host1x-next.h
new file mode 100644
index 00000000..9c8fb942
--- /dev/null
+++ b/drivers/gpu/host1x/include/uapi/linux/host1x-next.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#ifndef _UAPI__LINUX_HOST1X_H
+#define _UAPI__LINUX_HOST1X_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+struct host1x_allocate_syncpoint {
+	/**
+	 * @fd: [out]
+	 *
+	 * New file descriptor representing the allocated syncpoint.
+	 */
+	__s32 fd;
+
+	__u32 reserved[3];
+};
+
+struct host1x_syncpoint_info {
+	/**
+	 * @id: [out]
+	 *
+	 * System-global ID of the syncpoint.
+	 */
+	__u32 id;
+
+	__u32 reserved[3];
+};
+
+struct host1x_syncpoint_increment {
+	/**
+	 * @count: [in]
+	 *
+	 * Number of times to increment the syncpoint. The syncpoint can
+	 * be observed at in-between values, but each increment is atomic.
+	 */
+	__u32 count;
+};
+
+struct host1x_read_syncpoint {
+	/**
+	 * @id: [in]
+	 *
+	 * ID of the syncpoint to read.
+	 */
+	__u32 id;
+
+	/**
+	 * @value: [out]
+	 *
+	 * Current value of the syncpoint.
+	 */
+	__u32 value;
+};
+
+struct host1x_create_fence {
+	/**
+	 * @id: [in]
+	 *
+	 * ID of the syncpoint to create a fence for.
+	 */
+	__u32 id;
+
+	/**
+	 * @threshold: [in]
+	 *
+	 * When the syncpoint reaches this value, the fence will be signaled.
+	 * The syncpoint is considered to have reached the threshold when the
+	 * following condition is true:
+	 *
+	 * 	((value - threshold) & 0x80000000U) == 0U
+	 *
+	 */
+	__u32 threshold;
+
+	/**
+	 * @fence_fd: [out]
+	 *
+	 * New sync_file file descriptor containing the created fence.
+	 */
+	__s32 fence_fd;
+
+	__u32 reserved[1];
+};
+
+struct host1x_fence_extract_fence {
+	__u32 id;
+	__u32 threshold;
+};
+
+struct host1x_fence_extract {
+	/**
+	 * @fence_fd: [in]
+	 *
+	 * sync_file file descriptor
+	 */
+	__s32 fence_fd;
+
+	/**
+	 * @num_fences: [in,out]
+	 *
+	 * In: size of the `fences_ptr` array counted in elements.
+	 * Out: required size of the `fences_ptr` array counted in elements.
+	 */
+	__u32 num_fences;
+
+	/**
+	 * @fences_ptr: [in]
+	 *
+	 * Pointer to array of `struct host1x_fence_extract_fence`.
+	 */
+	__u64 fences_ptr;
+
+	__u32 reserved[2];
+};
+
+#define HOST1X_IOCTL_ALLOCATE_SYNCPOINT  _IOWR('X', 0x00, struct host1x_allocate_syncpoint)
+#define HOST1X_IOCTL_READ_SYNCPOINT      _IOR ('X', 0x01, struct host1x_read_syncpoint)
+#define HOST1X_IOCTL_CREATE_FENCE        _IOWR('X', 0x02, struct host1x_create_fence)
+#define HOST1X_IOCTL_SYNCPOINT_INFO      _IOWR('X', 0x03, struct host1x_syncpoint_info)
+#define HOST1X_IOCTL_SYNCPOINT_INCREMENT _IOWR('X', 0x04, struct host1x_syncpoint_increment)
+#define HOST1X_IOCTL_FENCE_EXTRACT       _IOWR('X', 0x05, struct host1x_fence_extract)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif

From 7171a758d3956d91138fea740e16bd1f8e14667d Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Mon, 29 Mar 2021 16:38:36 +0300
Subject: [PATCH 19/73] UPSTREAM: gpu: host1x: Fix Tegra194 syncpt interrupt
 threshold

Syncpoint interrupts are not working as expected on Tegra194. The
problem is that the syncpoint interrupt threshold being used is the
global interrupt threshold and not the virtual interrupt threshold.
Fix this by using the virtual interrupt threshold which aligns with
downstream.

JIRA LS-34

Change-Id: I4a3191c12298d4f0af264fd1f89754171a3829e9
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2498820
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/host1x/hw/hw_host1x07_vm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/hw/hw_host1x07_vm.h b/drivers/gpu/host1x/hw/hw_host1x07_vm.h
index 3058b3c9..b766851d 100644
--- a/drivers/gpu/host1x/hw/hw_host1x07_vm.h
+++ b/drivers/gpu/host1x/hw/hw_host1x07_vm.h
@@ -29,6 +29,6 @@
 #define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x)	(0x652c + 4 * (x))
 #define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x)	(0x6590 + 4 * (x))
 #define HOST1X_SYNC_SYNCPT(x)				(0x8080 + 4 * (x))
-#define HOST1X_SYNC_SYNCPT_INT_THRESH(x)		(0x8d00 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(x)		(0x9980 + 4 * (x))
 #define HOST1X_SYNC_SYNCPT_CH_APP(x)			(0xa604 + 4 * (x))
 #define HOST1X_SYNC_SYNCPT_CH_APP_CH(v)			(((v) & 0x3f) << 8)

From 558557ab0260d11d14a08afa3fa2f33845d54af7 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 13 Apr 2021 14:47:28 +0300
Subject: [PATCH 20/73] drm/tegra: Add NVDEC support

Support NVDEC (both instances on Tegra194).

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I8f6c841a16103fbf37bff3a3440d69059f2644f0
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2532426
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/host1x/dev.c                       | 18 ++++++++++++++++++
 drivers/gpu/host1x/include/linux/host1x-next.h |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 8b50fbb2..1aebefc3 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -132,6 +132,12 @@ static const struct host1x_sid_entry tegra186_sid_table[] = {
 		.offset = 0x30,
 		.limit = 0x34
 	},
+	{
+		/* NVDEC */
+		.base = 0x1b00,
+		.offset = 0x30,
+		.limit = 0x34
+	},
 };
 
 static const struct host1x_info host1x06_info = {
@@ -156,6 +162,18 @@ static const struct host1x_sid_entry tegra194_sid_table[] = {
 		.offset = 0x30,
 		.limit = 0x34
 	},
+	{
+		/* NVDEC */
+		.base = 0x1b00,
+		.offset = 0x30,
+		.limit = 0x34
+	},
+	{
+		/* NVDEC1 */
+		.base = 0x1bc0,
+		.offset = 0x30,
+		.limit = 0x34
+	},
 };
 
 static const struct host1x_info host1x07_info = {
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 5890f91d..4b637aa6 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -15,6 +15,8 @@ enum host1x_class {
 	HOST1X_CLASS_GR2D_SB = 0x52,
 	HOST1X_CLASS_VIC = 0x5D,
 	HOST1X_CLASS_GR3D = 0x60,
+	HOST1X_CLASS_NVDEC = 0xF0,
+	HOST1X_CLASS_NVDEC1 = 0xF5,
 };
 
 struct host1x;

From 44c9f6b676dea1df2e3fe8cb759f12997721dbbd Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Mon, 29 Mar 2021 16:38:27 +0300
Subject: [PATCH 21/73] UPSTREAM: gpu:host1x: Use different lock classes for
 each client

To avoid false lockdep warnings, give each client lock a different
lock class, passed from the initialization site by macro.

Change-Id: Iacfb5683a0ed975911459f43258c4704d8e7fd9c
Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2545955
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/bus.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 01538262..cc5e30a2 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -705,8 +705,9 @@ void host1x_driver_unregister(struct host1x_driver *driver)
 EXPORT_SYMBOL(host1x_driver_unregister);
 
 /**
- * host1x_client_register() - register a host1x client
+ * __host1x_client_register() - register a host1x client
  * @client: host1x client
+ * @key: lock class key for the client-specific mutex
  *
  * Registers a host1x client with each host1x controller instance. Note that
  * each client will only match their parent host1x controller and will only be
@@ -716,7 +717,7 @@ EXPORT_SYMBOL(host1x_driver_unregister);
  * &host1x_client_ops.init implementation.
  */
 int __host1x_client_register(struct host1x_client *client,
-			   struct lock_class_key *key)
+			     struct lock_class_key *key)
 {
 	struct host1x *host1x;
 	int err;

From f6a7adab82baa528f96dcef494f19a4644050f01 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 26 Mar 2021 15:51:37 +0100
Subject: [PATCH 22/73] UPSTREAM: gpu:host1x: Add early init and late exit
 callbacks

These callbacks can be used by client drivers to run code during early
init and during late exit. Early init callbacks are run prior to the
regular init callbacks while late exit callbacks run after the regular
exit callbacks.

Change-Id: Ic89babe0dc343459ac1e7e7fb705d4091b74a6af
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2545956
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/bus.c                      | 31 +++++++++++++++++++
 .../gpu/host1x/include/linux/host1x-next.h    |  4 +++
 2 files changed, 35 insertions(+)

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index cc5e30a2..b4029371 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -196,6 +196,17 @@ int host1x_device_init(struct host1x_device *device)
 
 	mutex_lock(&device->clients_lock);
 
+	list_for_each_entry(client, &device->clients, list) {
+		if (client->ops && client->ops->early_init) {
+			err = client->ops->early_init(client);
+			if (err < 0) {
+				dev_err(&device->dev, "failed to early initialize %s: %d\n",
+					dev_name(client->dev), err);
+				goto teardown_late;
+			}
+		}
+	}
+
 	list_for_each_entry(client, &device->clients, list) {
 		if (client->ops && client->ops->init) {
 			err = client->ops->init(client);
@@ -217,6 +228,14 @@ teardown:
 		if (client->ops->exit)
 			client->ops->exit(client);
 
+	/* reset client to end of list for late teardown */
+	client = list_entry(&device->clients, struct host1x_client, list);
+
+teardown_late:
+	list_for_each_entry_continue_reverse(client, &device->clients, list)
+		if (client->ops->late_exit)
+			client->ops->late_exit(client);
+
 	mutex_unlock(&device->clients_lock);
 	return err;
 }
@@ -251,6 +270,18 @@ int host1x_device_exit(struct host1x_device *device)
 		}
 	}
 
+	list_for_each_entry_reverse(client, &device->clients, list) {
+		if (client->ops && client->ops->late_exit) {
+			err = client->ops->late_exit(client);
+			if (err < 0) {
+				dev_err(&device->dev, "failed to late cleanup %s: %d\n",
+					dev_name(client->dev), err);
+				mutex_unlock(&device->clients_lock);
+				return err;
+			}
+		}
+	}
+
 	mutex_unlock(&device->clients_lock);
 
 	return 0;
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 4b637aa6..fc738f83 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -27,14 +27,18 @@ u64 host1x_get_dma_mask(struct host1x *host1x);
 
 /**
  * struct host1x_client_ops - host1x client operations
+ * @early_init: host1x client early initialization code
  * @init: host1x client initialization code
  * @exit: host1x client tear down code
+ * @late_exit: host1x client late tear down code
  * @suspend: host1x client suspend code
  * @resume: host1x client resume code
  */
 struct host1x_client_ops {
+	int (*early_init)(struct host1x_client *client);
 	int (*init)(struct host1x_client *client);
 	int (*exit)(struct host1x_client *client);
+	int (*late_exit)(struct host1x_client *client);
 	int (*suspend)(struct host1x_client *client);
 	int (*resume)(struct host1x_client *client);
 };

From f6e4773a0daf5ca5dd3a0d4ac3468cda7d3d979e Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 1 Apr 2021 17:41:04 +0200
Subject: [PATCH 23/73] UPSTREAM: gpu:host1x: Split up client initalization and
 registration

In some cases we may need to initialize the host1x client first before
registering it. This commit adds a new helper that will do nothing but
the initialization of the data structure.

At the same time, the initialization is removed from the registration
function. Note, however, that for simplicity we explicitly initialize
the client when the host1x_client_register() function is called, as
opposed to the low-level __host1x_client_register() function. This
allows existing callers to remain unchanged.

Change-Id: I504462a1749d8f9602f6b4075f1aec56eefb14e5
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2545957
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/bus.c                      | 30 +++++++++++++++----
 .../gpu/host1x/include/linux/host1x-next.h    | 30 +++++++++++++++----
 2 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index b4029371..2ce40a3e 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -735,6 +735,29 @@ void host1x_driver_unregister(struct host1x_driver *driver)
 }
 EXPORT_SYMBOL(host1x_driver_unregister);
 
+/**
+ * __host1x_client_init() - initialize a host1x client
+ * @client: host1x client
+ * @key: lock class key for the client-specific mutex
+ */
+void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key)
+{
+	INIT_LIST_HEAD(&client->list);
+	__mutex_init(&client->lock, "host1x client lock", key);
+	client->usecount = 0;
+}
+EXPORT_SYMBOL(__host1x_client_init);
+
+/**
+ * host1x_client_exit() - uninitialize a host1x client
+ * @client: host1x client
+ */
+void host1x_client_exit(struct host1x_client *client)
+{
+	mutex_destroy(&client->lock);
+}
+EXPORT_SYMBOL(host1x_client_exit);
+
 /**
  * __host1x_client_register() - register a host1x client
  * @client: host1x client
@@ -747,16 +770,11 @@ EXPORT_SYMBOL(host1x_driver_unregister);
  * device and call host1x_device_init(), which will in turn call each client's
  * &host1x_client_ops.init implementation.
  */
-int __host1x_client_register(struct host1x_client *client,
-			     struct lock_class_key *key)
+int __host1x_client_register(struct host1x_client *client)
 {
 	struct host1x *host1x;
 	int err;
 
-	INIT_LIST_HEAD(&client->list);
-	__mutex_init(&client->lock, "host1x client lock", key);
-	client->usecount = 0;
-
 	mutex_lock(&devices_lock);
 
 	list_for_each_entry(host1x, &devices, list) {
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index fc738f83..7987d219 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -354,12 +354,30 @@ static inline struct host1x_device *to_host1x_device(struct device *dev)
 int host1x_device_init(struct host1x_device *device);
 int host1x_device_exit(struct host1x_device *device);
 
-int __host1x_client_register(struct host1x_client *client,
-			     struct lock_class_key *key);
-#define host1x_client_register(class) \
-	({ \
-		static struct lock_class_key __key; \
-		__host1x_client_register(class, &__key); \
+void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key);
+void host1x_client_exit(struct host1x_client *client);
+
+#define host1x_client_init(client)			\
+	({						\
+		static struct lock_class_key __key;	\
+		__host1x_client_init(client, &__key);	\
+	})
+
+int __host1x_client_register(struct host1x_client *client);
+
+/*
+ * Note that this wrapper calls __host1x_client_init() for compatibility
+ * with existing callers. Callers that want to separately initialize and
+ * register a host1x client must first initialize using either of the
+ * __host1x_client_init() or host1x_client_init() functions and then use
+ * the low-level __host1x_client_register() function to avoid the client
+ * getting reinitialized.
+ */
+#define host1x_client_register(client)			\
+	({						\
+		static struct lock_class_key __key;	\
+		__host1x_client_init(client, &__key);	\
+		__host1x_client_register(client);	\
 	})
 
 int host1x_client_unregister(struct host1x_client *client);

From d42946f160500ce82f80cd4d0f13de9479feda98 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 2 Sep 2021 22:33:09 +0200
Subject: [PATCH 24/73] gpu: host1x: Plug potential memory leak

The memory allocated for a DMA fence could be leaked if the code failed
to allocate the waiter object. Make sure to release the fence allocation
on failure.

Change-Id: Ie5608246225f17883d005b7975bed1ca4742b321
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2604935
(cherry picked from commit 3142088147c0a8b69cd692405c98a9cdb96b6006)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2605432
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/fence.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index e96ad93f..ea4a0c7d 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -149,8 +149,10 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
 		return ERR_PTR(-ENOMEM);
 
 	fence->waiter = kzalloc(sizeof(*fence->waiter), GFP_KERNEL);
-	if (!fence->waiter)
+	if (!fence->waiter) {
+		kfree(fence);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	fence->sp = sp;
 	fence->threshold = threshold;

From d802ae96a70174bf8f33552501b81c267d204a7d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 9 Sep 2021 15:51:24 +0200
Subject: [PATCH 25/73] drm/tegra: Implement correct DMA-BUF semantics

DMA-BUF requires that each device that accesses a DMA-BUF attaches to it
separately. To do so the host1x_bo_pin() and host1x_bo_unpin() functions
need to be reimplemented so that they can return a mapping, which either
represents an attachment or a map of the driver's own GEM object.

Bug 200768479

Signed-off-by: Thierry Reding <treding@nvidia.com>
Change-Id: Ia380b7dcc371ce47f5f35d44a60fbd6b4ab9d636
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2596398
(cherry picked from commit 28960586000fca025689edfd45645ab28e497bca)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2620137
Tested-by: Jonathan Hunter <jonathanh@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 .../gpu/host1x/include/linux/host1x-next.h    |  31 ++--
 drivers/gpu/host1x/job.c                      | 161 +++++++-----------
 drivers/gpu/host1x/job.h                      |   6 +-
 3 files changed, 80 insertions(+), 118 deletions(-)

diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 7987d219..5198e54b 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -7,6 +7,7 @@
 #define __LINUX_HOST1X_H
 
 #include <linux/device.h>
+#include <linux/dma-direction.h>
 #include <linux/types.h>
 
 enum host1x_class {
@@ -84,12 +85,24 @@ struct host1x_client {
 struct host1x_bo;
 struct sg_table;
 
+struct host1x_bo_mapping {
+	struct dma_buf_attachment *attach;
+	enum dma_data_direction direction;
+	struct list_head list;
+	struct host1x_bo *bo;
+	struct sg_table *sgt;
+	unsigned int chunks;
+	struct device *dev;
+	dma_addr_t phys;
+	size_t size;
+};
+
 struct host1x_bo_ops {
 	struct host1x_bo *(*get)(struct host1x_bo *bo);
 	void (*put)(struct host1x_bo *bo);
-	struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo,
-				dma_addr_t *phys);
-	void (*unpin)(struct device *dev, struct sg_table *sgt);
+	struct host1x_bo_mapping *(*pin)(struct device *dev, struct host1x_bo *bo,
+					 enum dma_data_direction direction);
+	void (*unpin)(struct host1x_bo_mapping *map);
 	void *(*mmap)(struct host1x_bo *bo);
 	void (*munmap)(struct host1x_bo *bo, void *addr);
 };
@@ -114,17 +127,15 @@ static inline void host1x_bo_put(struct host1x_bo *bo)
 	bo->ops->put(bo);
 }
 
-static inline struct sg_table *host1x_bo_pin(struct device *dev,
-					     struct host1x_bo *bo,
-					     dma_addr_t *phys)
+static inline struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
+						      enum dma_data_direction dir)
 {
-	return bo->ops->pin(dev, bo, phys);
+	return bo->ops->pin(dev, bo, dir);
 }
 
-static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo,
-				   struct sg_table *sgt)
+static inline void host1x_bo_unpin(struct host1x_bo_mapping *map)
 {
-	bo->ops->unpin(dev, sgt);
+	map->bo->ops->unpin(map);
 }
 
 static inline void *host1x_bo_mmap(struct host1x_bo *bo)
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index ae484d86..3aa8854d 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -125,11 +125,11 @@ EXPORT_SYMBOL(host1x_job_add_wait);
 
 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 {
+	unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE;
 	struct host1x_client *client = job->client;
 	struct device *dev = client->dev;
 	struct host1x_job_gather *g;
 	struct iommu_domain *domain;
-	struct sg_table *sgt;
 	unsigned int i;
 	int err;
 
@@ -138,7 +138,9 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 
 	for (i = 0; i < job->num_relocs; i++) {
 		struct host1x_reloc *reloc = &job->relocs[i];
-		dma_addr_t phys_addr, *phys;
+		enum dma_data_direction direction;
+		struct host1x_bo_mapping *map;
+		struct host1x_bo *bo;
 
 		reloc->target.bo = host1x_bo_get(reloc->target.bo);
 		if (!reloc->target.bo) {
@@ -146,64 +148,44 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 			goto unpin;
 		}
 
-		/*
-		 * If the client device is not attached to an IOMMU, the
-		 * physical address of the buffer object can be used.
-		 *
-		 * Similarly, when an IOMMU domain is shared between all
-		 * host1x clients, the IOVA is already available, so no
-		 * need to map the buffer object again.
-		 *
-		 * XXX Note that this isn't always safe to do because it
-		 * relies on an assumption that no cache maintenance is
-		 * needed on the buffer objects.
-		 */
-		if (!domain || client->group)
-			phys = &phys_addr;
-		else
-			phys = NULL;
+		bo = reloc->target.bo;
 
-		sgt = host1x_bo_pin(dev, reloc->target.bo, phys);
-		if (IS_ERR(sgt)) {
-			err = PTR_ERR(sgt);
+		switch (reloc->flags & mask) {
+		case HOST1X_RELOC_READ:
+			direction = DMA_TO_DEVICE;
+			break;
+
+		case HOST1X_RELOC_WRITE:
+			direction = DMA_FROM_DEVICE;
+			break;
+
+		case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
+			direction = DMA_BIDIRECTIONAL;
+			break;
+
+		default:
+			err = -EINVAL;
 			goto unpin;
 		}
 
-		if (sgt) {
-			unsigned long mask = HOST1X_RELOC_READ |
-					     HOST1X_RELOC_WRITE;
-			enum dma_data_direction dir;
-
-			switch (reloc->flags & mask) {
-			case HOST1X_RELOC_READ:
-				dir = DMA_TO_DEVICE;
-				break;
-
-			case HOST1X_RELOC_WRITE:
-				dir = DMA_FROM_DEVICE;
-				break;
-
-			case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
-				dir = DMA_BIDIRECTIONAL;
-				break;
-
-			default:
-				err = -EINVAL;
-				goto unpin;
-			}
-
-			err = dma_map_sgtable(dev, sgt, dir, 0);
-			if (err)
-				goto unpin;
-
-			job->unpins[job->num_unpins].dev = dev;
-			job->unpins[job->num_unpins].dir = dir;
-			phys_addr = sg_dma_address(sgt->sgl);
+		map = host1x_bo_pin(dev, bo, direction);
+		if (IS_ERR(map)) {
+			err = PTR_ERR(map);
+			goto unpin;
 		}
 
-		job->addr_phys[job->num_unpins] = phys_addr;
-		job->unpins[job->num_unpins].bo = reloc->target.bo;
-		job->unpins[job->num_unpins].sgt = sgt;
+		/*
+		 * host1x clients are generally not able to do scatter-gather themselves, so fail
+		 * if the buffer is discontiguous and we fail to map its SG table to a single
+		 * contiguous chunk of I/O virtual memory.
+		 */
+		if (map->chunks > 1) {
+			err = -EINVAL;
+			goto unpin;
+		}
+
+		job->addr_phys[job->num_unpins] = map->phys;
+		job->unpins[job->num_unpins].map = map;
 		job->num_unpins++;
 	}
 
@@ -215,12 +197,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 		return 0;
 
 	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_bo_mapping *map;
 		size_t gather_size = 0;
 		struct scatterlist *sg;
-		dma_addr_t phys_addr;
 		unsigned long shift;
 		struct iova *alloc;
-		dma_addr_t *phys;
 		unsigned int j;
 
 		if (job->cmds[i].is_wait)
@@ -234,25 +215,16 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 			goto unpin;
 		}
 
-		/**
-		 * If the host1x is not attached to an IOMMU, there is no need
-		 * to map the buffer object for the host1x, since the physical
-		 * address can simply be used.
-		 */
-		if (!iommu_get_domain_for_dev(host->dev))
-			phys = &phys_addr;
-		else
-			phys = NULL;
-
-		sgt = host1x_bo_pin(host->dev, g->bo, phys);
-		if (IS_ERR(sgt)) {
-			err = PTR_ERR(sgt);
-			goto put;
+		map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE);
+		if (IS_ERR(map)) {
+			err = PTR_ERR(map);
+			goto unpin;
 		}
 
 		if (host->domain) {
-			for_each_sgtable_sg(sgt, sg, j)
+			for_each_sgtable_sg(map->sgt, sg, j)
 				gather_size += sg->length;
+
 			gather_size = iova_align(&host->iova, gather_size);
 
 			shift = iova_shift(&host->iova);
@@ -263,33 +235,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 				goto put;
 			}
 
-			err = iommu_map_sgtable(host->domain,
-					iova_dma_addr(&host->iova, alloc),
-					sgt, IOMMU_READ);
+			err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc),
+						map->sgt, IOMMU_READ);
 			if (err == 0) {
 				__free_iova(&host->iova, alloc);
 				err = -EINVAL;
 				goto put;
 			}
 
-			job->unpins[job->num_unpins].size = gather_size;
-			phys_addr = iova_dma_addr(&host->iova, alloc);
-		} else if (sgt) {
-			err = dma_map_sgtable(host->dev, sgt, DMA_TO_DEVICE, 0);
-			if (err)
-				goto put;
-
-			job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
-			job->unpins[job->num_unpins].dev = host->dev;
-			phys_addr = sg_dma_address(sgt->sgl);
+			map->phys = iova_dma_addr(&host->iova, alloc);
+			map->size = gather_size;
 		}
 
-		job->addr_phys[job->num_unpins] = phys_addr;
-		job->gather_addr_phys[i] = phys_addr;
-
-		job->unpins[job->num_unpins].bo = g->bo;
-		job->unpins[job->num_unpins].sgt = sgt;
+		job->addr_phys[job->num_unpins] = map->phys;
+		job->unpins[job->num_unpins].map = map;
 		job->num_unpins++;
+
+		job->gather_addr_phys[i] = map->phys;
 	}
 
 	return 0;
@@ -681,23 +643,16 @@ void host1x_job_unpin(struct host1x_job *job)
 	unsigned int i;
 
 	for (i = 0; i < job->num_unpins; i++) {
-		struct host1x_job_unpin_data *unpin = &job->unpins[i];
-		struct device *dev = unpin->dev ?: host->dev;
-		struct sg_table *sgt = unpin->sgt;
+		struct host1x_bo_mapping *map = job->unpins[i].map;
+		struct host1x_bo *bo = map->bo;
 
-		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
-		    unpin->size && host->domain) {
-			iommu_unmap(host->domain, job->addr_phys[i],
-				    unpin->size);
-			free_iova(&host->iova,
-				iova_pfn(&host->iova, job->addr_phys[i]));
+		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && map->size && host->domain) {
+			iommu_unmap(host->domain, job->addr_phys[i], map->size);
+			free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i]));
 		}
 
-		if (unpin->dev && sgt)
-			dma_unmap_sgtable(unpin->dev, sgt, unpin->dir, 0);
-
-		host1x_bo_unpin(dev, unpin->bo, sgt);
-		host1x_bo_put(unpin->bo);
+		host1x_bo_unpin(map);
+		host1x_bo_put(bo);
 	}
 
 	job->num_unpins = 0;
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 33adfaed..657e2400 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -33,11 +33,7 @@ struct host1x_job_cmd {
 };
 
 struct host1x_job_unpin_data {
-	struct host1x_bo *bo;
-	struct sg_table *sgt;
-	struct device *dev;
-	size_t size;
-	enum dma_data_direction dir;
+	struct host1x_bo_mapping *map;
 };
 
 /*

From 2a938c39ec31380098e1dcae8a1afb7a98d03be1 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 7 Feb 2020 16:50:52 +0100
Subject: [PATCH 26/73] drm/tegra: Implement buffer object cache

This cache is used to avoid mapping and unmapping buffer objects
unnecessarily. Mappings are cached per client and stay hot until
the buffer object is destroyed.

Bug 200768479

Signed-off-by: Thierry Reding <treding@nvidia.com>
Change-Id: Ia2efe2b77b5043b665bae403d9eba4698e6a5228
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2596399
(cherry picked from commit d23156d94fa9499256e8082250e3bc02559f9c8d)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2620152
Tested-by: Jonathan Hunter <jonathanh@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/bus.c                      | 78 +++++++++++++++++++
 drivers/gpu/host1x/dev.c                      |  2 +
 drivers/gpu/host1x/dev.h                      |  1 +
 .../gpu/host1x/include/linux/host1x-next.h    | 52 ++++++++++---
 drivers/gpu/host1x/job.c                      |  4 +-
 5 files changed, 125 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 2ce40a3e..c02360cf 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -742,6 +742,7 @@ EXPORT_SYMBOL(host1x_driver_unregister);
  */
 void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key)
 {
+	host1x_bo_cache_init(&client->cache);
 	INIT_LIST_HEAD(&client->list);
 	__mutex_init(&client->lock, "host1x client lock", key);
 	client->usecount = 0;
@@ -830,6 +831,8 @@ int host1x_client_unregister(struct host1x_client *client)
 
 	mutex_unlock(&clients_lock);
 
+	host1x_bo_cache_destroy(&client->cache);
+
 	return 0;
 }
 EXPORT_SYMBOL(host1x_client_unregister);
@@ -904,3 +907,78 @@ unlock:
 	return err;
 }
 EXPORT_SYMBOL(host1x_client_resume);
+
+struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
+					enum dma_data_direction dir,
+					struct host1x_bo_cache *cache)
+{
+	struct host1x_bo_mapping *mapping;
+
+	if (cache) {
+		mutex_lock(&cache->lock);
+
+		list_for_each_entry(mapping, &cache->mappings, entry) {
+			if (mapping->bo == bo && mapping->direction == dir) {
+				kref_get(&mapping->ref);
+				goto unlock;
+			}
+		}
+	}
+
+	mapping = bo->ops->pin(dev, bo, dir);
+	if (IS_ERR(mapping))
+		goto unlock;
+
+	spin_lock(&mapping->bo->lock);
+	list_add_tail(&mapping->list, &bo->mappings);
+	spin_unlock(&mapping->bo->lock);
+
+	if (cache) {
+		INIT_LIST_HEAD(&mapping->entry);
+		mapping->cache = cache;
+
+		list_add_tail(&mapping->entry, &cache->mappings);
+
+		/* bump reference count to track the copy in the cache */
+		kref_get(&mapping->ref);
+	}
+
+unlock:
+	if (cache)
+		mutex_unlock(&cache->lock);
+
+	return mapping;
+}
+EXPORT_SYMBOL(host1x_bo_pin);
+
+static void __host1x_bo_unpin(struct kref *ref)
+{
+	struct host1x_bo_mapping *mapping = to_host1x_bo_mapping(ref);
+
+	/*
+	 * When the last reference of the mapping goes away, make sure to remove the mapping from
+	 * the cache.
+	 */
+	if (mapping->cache)
+		list_del(&mapping->entry);
+
+	spin_lock(&mapping->bo->lock);
+	list_del(&mapping->list);
+	spin_unlock(&mapping->bo->lock);
+
+	mapping->bo->ops->unpin(mapping);
+}
+
+void host1x_bo_unpin(struct host1x_bo_mapping *mapping)
+{
+	struct host1x_bo_cache *cache = mapping->cache;
+
+	if (cache)
+		mutex_lock(&cache->lock);
+
+	kref_put(&mapping->ref, __host1x_bo_unpin);
+
+	if (cache)
+		mutex_unlock(&cache->lock);
+}
+EXPORT_SYMBOL(host1x_bo_unpin);
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 1aebefc3..6364f01b 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -404,6 +404,7 @@ static int host1x_probe(struct platform_device *pdev)
 	if (syncpt_irq < 0)
 		return syncpt_irq;
 
+	host1x_bo_cache_init(&host->cache);
 	mutex_init(&host->devices_lock);
 	INIT_LIST_HEAD(&host->devices);
 	INIT_LIST_HEAD(&host->list);
@@ -539,6 +540,7 @@ static int host1x_remove(struct platform_device *pdev)
 	reset_control_assert(host->rst);
 	clk_disable_unprepare(host->clk);
 	host1x_iommu_exit(host);
+	host1x_bo_cache_destroy(&host->cache);
 
 	return 0;
 }
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index e360bc4a..95c78451 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -151,6 +151,7 @@ struct host1x {
 
 	struct device_dma_parameters dma_parms;
 
+	struct host1x_bo_cache cache;
 	struct host1x_uapi uapi;
 };
 
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 5198e54b..1ab8088b 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -8,6 +8,7 @@
 
 #include <linux/device.h>
 #include <linux/dma-direction.h>
+#include <linux/spinlock.h>
 #include <linux/types.h>
 
 enum host1x_class {
@@ -26,6 +27,28 @@ struct iommu_group;
 
 u64 host1x_get_dma_mask(struct host1x *host1x);
 
+/**
+ * struct host1x_bo_cache - host1x buffer object cache
+ * @mappings: list of mappings
+ * @lock: synchronizes accesses to the list of mappings
+ */
+struct host1x_bo_cache {
+	struct list_head mappings;
+	struct mutex lock;
+};
+
+static inline void host1x_bo_cache_init(struct host1x_bo_cache *cache)
+{
+	INIT_LIST_HEAD(&cache->mappings);
+	mutex_init(&cache->lock);
+}
+
+static inline void host1x_bo_cache_destroy(struct host1x_bo_cache *cache)
+{
+	/* XXX warn if not empty? */
+	mutex_destroy(&cache->lock);
+}
+
 /**
  * struct host1x_client_ops - host1x client operations
  * @early_init: host1x client early initialization code
@@ -76,6 +99,8 @@ struct host1x_client {
 	struct host1x_client *parent;
 	unsigned int usecount;
 	struct mutex lock;
+
+	struct host1x_bo_cache cache;
 };
 
 /*
@@ -86,6 +111,7 @@ struct host1x_bo;
 struct sg_table;
 
 struct host1x_bo_mapping {
+	struct kref ref;
 	struct dma_buf_attachment *attach;
 	enum dma_data_direction direction;
 	struct list_head list;
@@ -95,8 +121,16 @@ struct host1x_bo_mapping {
 	struct device *dev;
 	dma_addr_t phys;
 	size_t size;
+
+	struct host1x_bo_cache *cache;
+	struct list_head entry;
 };
 
+static inline struct host1x_bo_mapping *to_host1x_bo_mapping(struct kref *ref)
+{
+	return container_of(ref, struct host1x_bo_mapping, ref);
+}
+
 struct host1x_bo_ops {
 	struct host1x_bo *(*get)(struct host1x_bo *bo);
 	void (*put)(struct host1x_bo *bo);
@@ -109,11 +143,15 @@ struct host1x_bo_ops {
 
 struct host1x_bo {
 	const struct host1x_bo_ops *ops;
+	struct list_head mappings;
+	spinlock_t lock;
 };
 
 static inline void host1x_bo_init(struct host1x_bo *bo,
 				  const struct host1x_bo_ops *ops)
 {
+	INIT_LIST_HEAD(&bo->mappings);
+	spin_lock_init(&bo->lock);
 	bo->ops = ops;
 }
 
@@ -127,16 +165,10 @@ static inline void host1x_bo_put(struct host1x_bo *bo)
 	bo->ops->put(bo);
 }
 
-static inline struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
-						      enum dma_data_direction dir)
-{
-	return bo->ops->pin(dev, bo, dir);
-}
-
-static inline void host1x_bo_unpin(struct host1x_bo_mapping *map)
-{
-	map->bo->ops->unpin(map);
-}
+struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
+					enum dma_data_direction dir,
+					struct host1x_bo_cache *cache);
+void host1x_bo_unpin(struct host1x_bo_mapping *map);
 
 static inline void *host1x_bo_mmap(struct host1x_bo *bo)
 {
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 3aa8854d..b5896749 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -168,7 +168,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 			goto unpin;
 		}
 
-		map = host1x_bo_pin(dev, bo, direction);
+		map = host1x_bo_pin(dev, bo, direction, &client->cache);
 		if (IS_ERR(map)) {
 			err = PTR_ERR(map);
 			goto unpin;
@@ -215,7 +215,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 			goto unpin;
 		}
 
-		map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE);
+		map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, &host->cache);
 		if (IS_ERR(map)) {
 			err = PTR_ERR(map);
 			goto unpin;

From 7df2aa44f2fa35453577ec0f28696c921fd5438d Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Fri, 12 Mar 2021 17:39:49 +0000
Subject: [PATCH 27/73] drm/tegra: Add support for NVENC and NVJPG

Add support for the Host1x NVENC and NVJPG engines.

JIRA LS-411

Change-Id: Ied1c64c17d337e571512c053ef9552642faa430c
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2490297
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/host1x/dev.c                      | 30 +++++++++++++++++++
 .../gpu/host1x/include/linux/host1x-next.h    |  3 ++
 2 files changed, 33 insertions(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 6364f01b..429efe17 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -132,12 +132,24 @@ static const struct host1x_sid_entry tegra186_sid_table[] = {
 		.offset = 0x30,
 		.limit = 0x34
 	},
+	{
+		/* NVENC */
+		.base = 0x1af8,
+		.offset = 0x30,
+		.limit = 0x34
+	},
 	{
 		/* NVDEC */
 		.base = 0x1b00,
 		.offset = 0x30,
 		.limit = 0x34
 	},
+	{
+		/* NVJPG */
+		.base = 0x1b08,
+		.offset = 0x30,
+		.limit = 0x34
+	},
 };
 
 static const struct host1x_info host1x06_info = {
@@ -162,12 +174,30 @@ static const struct host1x_sid_entry tegra194_sid_table[] = {
 		.offset = 0x30,
 		.limit = 0x34
 	},
+	{
+		/* NVENC */
+		.base = 0x1af8,
+		.offset = 0x30,
+		.limit = 0x34
+	},
 	{
 		/* NVDEC */
 		.base = 0x1b00,
 		.offset = 0x30,
 		.limit = 0x34
 	},
+	{
+		/* NVJPG */
+		.base = 0x1b08,
+		.offset = 0x30,
+		.limit = 0x34
+	},
+	{
+		/* NVENC1 */
+		.base = 0x1bb8,
+		.offset = 0x30,
+		.limit = 0x34
+	},
 	{
 		/* NVDEC1 */
 		.base = 0x1bc0,
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 1ab8088b..45e0a712 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -13,10 +13,13 @@
 
 enum host1x_class {
 	HOST1X_CLASS_HOST1X = 0x1,
+	HOST1X_CLASS_NVENC = 0x21,
+	HOST1X_CLASS_NVENC1 = 0x22,
 	HOST1X_CLASS_GR2D = 0x51,
 	HOST1X_CLASS_GR2D_SB = 0x52,
 	HOST1X_CLASS_VIC = 0x5D,
 	HOST1X_CLASS_GR3D = 0x60,
+	HOST1X_CLASS_NVJPG = 0xC0,
 	HOST1X_CLASS_NVDEC = 0xF0,
 	HOST1X_CLASS_NVDEC1 = 0xF5,
 };

From 174f4e87f659b1c4fd920afdd5d84e1b20b2d286 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Tue, 11 Jan 2022 12:27:45 +0000
Subject: [PATCH 28/73] gpu: host1x: Add SMMU SID information for DLA

Add the SMMU SID information for the DLA devices to the upstream
host1x driver to ensure the SMMU is configured correctly.

JIRA LS-410

Change-Id: I6a7b89a94463aeeaa3532cd0aa4f363b5b809940
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2653098
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 429efe17..a89f2350 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -192,6 +192,18 @@ static const struct host1x_sid_entry tegra194_sid_table[] = {
 		.offset = 0x30,
 		.limit = 0x34
 	},
+	{
+		/* NVDLA */
+		.base = 0x1ba8,
+		.offset = 0x30,
+		.limit = 0x34
+	},
+	{
+		/* NVDLA1 */
+		.base = 0x1bb0,
+		.offset = 0x30,
+		.limit = 0x34
+	},
 	{
 		/* NVENC1 */
 		.base = 0x1bb8,

From 65b76aee13f9aa56d7ba716ad01501b12d913bdd Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Thu, 2 Sep 2021 14:52:52 +0300
Subject: [PATCH 29/73] gpu: host1x: Merge upstream changes

Merge upstream changes from linux-next, including merged version
of new UAPI.

JIRA LS-128

Change-Id: I32c6745d2c17902e8216e9b376fd74d38a3b5904
Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2653092
Tested-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/Makefile                   |   1 -
 drivers/gpu/host1x/dev.c                      |   9 -
 drivers/gpu/host1x/dev.h                      |   2 -
 drivers/gpu/host1x/fence.c                    |  76 +---
 drivers/gpu/host1x/hw/channel_hw.c            |  52 ++-
 drivers/gpu/host1x/hw/debug_hw.c              |  23 +-
 drivers/gpu/host1x/hw/debug_hw_1x01.c         |   8 +-
 drivers/gpu/host1x/hw/debug_hw_1x06.c         |  16 +-
 drivers/gpu/host1x/hw/hw_host1x02_uclass.h    |  12 +
 drivers/gpu/host1x/hw/hw_host1x04_uclass.h    |  12 +
 drivers/gpu/host1x/hw/hw_host1x05_uclass.h    |  12 +
 drivers/gpu/host1x/hw/hw_host1x06_uclass.h    |  12 +
 drivers/gpu/host1x/hw/hw_host1x07_uclass.h    |  12 +
 .../gpu/host1x/include/linux/host1x-next.h    |  13 +-
 .../host1x/include/uapi/linux/host1x-next.h   | 134 -------
 drivers/gpu/host1x/intr.c                     |   2 +-
 drivers/gpu/host1x/job.c                      |  25 +-
 drivers/gpu/host1x/job.h                      |   2 +
 drivers/gpu/host1x/syncpt.c                   |  17 +
 drivers/gpu/host1x/syncpt.h                   |   2 +-
 drivers/gpu/host1x/uapi.c                     | 379 ------------------
 drivers/gpu/host1x/uapi.h                     |  22 -
 22 files changed, 195 insertions(+), 648 deletions(-)
 delete mode 100644 drivers/gpu/host1x/include/uapi/linux/host1x-next.h
 delete mode 100644 drivers/gpu/host1x/uapi.c
 delete mode 100644 drivers/gpu/host1x/uapi.h

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 5b6bfcae..34344be2 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -14,7 +14,6 @@ host1x-next-y = \
 	job.o \
 	debug.o \
 	mipi.o \
-	uapi.o \
 	fence.o \
 	hw/host1x01.o \
 	hw/host1x02.o \
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index a89f2350..2aed4592 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -528,12 +528,6 @@ static int host1x_probe(struct platform_device *pdev)
 		goto deinit_syncpt;
 	}
 
-	err = host1x_uapi_init(&host->uapi, host);
-	if (err) {
-		dev_err(&pdev->dev, "failed to initialize uapi\n");
-		goto deinit_intr;
-	}
-
 	host1x_debug_init(host);
 
 	if (host->info->has_hypervisor)
@@ -553,8 +547,6 @@ unregister:
 	host1x_unregister(host);
 deinit_debugfs:
 	host1x_debug_deinit(host);
-	host1x_uapi_deinit(&host->uapi);
-deinit_intr:
 	host1x_intr_deinit(host);
 deinit_syncpt:
 	host1x_syncpt_deinit(host);
@@ -576,7 +568,6 @@ static int host1x_remove(struct platform_device *pdev)
 
 	host1x_unregister(host);
 	host1x_debug_deinit(host);
-	host1x_uapi_deinit(&host->uapi);
 	host1x_intr_deinit(host);
 	host1x_syncpt_deinit(host);
 	reset_control_assert(host->rst);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 95c78451..5b7fdea5 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -17,7 +17,6 @@
 #include "intr.h"
 #include "job.h"
 #include "syncpt.h"
-#include "uapi.h"
 
 struct host1x_syncpt;
 struct host1x_syncpt_base;
@@ -152,7 +151,6 @@ struct host1x {
 	struct device_dma_parameters dma_parms;
 
 	struct host1x_bo_cache cache;
-	struct host1x_uapi uapi;
 };
 
 void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);
diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index ea4a0c7d..4a8fdbfb 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -31,20 +31,24 @@ struct host1x_syncpt_fence {
 	struct delayed_work timeout_work;
 };
 
-static const char *syncpt_fence_get_driver_name(struct dma_fence *f)
+static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f)
 {
 	return "host1x";
 }
 
-static const char *syncpt_fence_get_timeline_name(struct dma_fence *f)
+static const char *host1x_syncpt_fence_get_timeline_name(struct dma_fence *f)
 {
 	return "syncpoint";
 }
 
-static bool syncpt_fence_enable_signaling(struct dma_fence *f)
+static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f)
 {
-	struct host1x_syncpt_fence *sf =
-		container_of(f, struct host1x_syncpt_fence, base);
+	return container_of(f, struct host1x_syncpt_fence, base);
+}
+
+static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
+{
+	struct host1x_syncpt_fence *sf = to_host1x_fence(f);
 	int err;
 
 	if (host1x_syncpt_is_expired(sf->sp, sf->threshold))
@@ -56,7 +60,7 @@ static bool syncpt_fence_enable_signaling(struct dma_fence *f)
 	 * The dma_fence framework requires the fence driver to keep a
 	 * reference to any fences for which 'enable_signaling' has been
 	 * called (and that have not been signalled).
-	 * 
+	 *
 	 * We provide a userspace API to create arbitrary syncpoint fences,
 	 * so we cannot normally guarantee that all fences get signalled.
 	 * As such, setup a timeout, so that long-lasting fences will get
@@ -85,10 +89,9 @@ static bool syncpt_fence_enable_signaling(struct dma_fence *f)
 	return true;
 }
 
-static void syncpt_fence_release(struct dma_fence *f)
+static void host1x_syncpt_fence_release(struct dma_fence *f)
 {
-	struct host1x_syncpt_fence *sf =
-		container_of(f, struct host1x_syncpt_fence, base);
+	struct host1x_syncpt_fence *sf = to_host1x_fence(f);
 
 	if (sf->waiter)
 		kfree(sf->waiter);
@@ -96,11 +99,11 @@ static void syncpt_fence_release(struct dma_fence *f)
 	dma_fence_free(f);
 }
 
-const struct dma_fence_ops syncpt_fence_ops = {
-	.get_driver_name = syncpt_fence_get_driver_name,
-	.get_timeline_name = syncpt_fence_get_timeline_name,
-	.enable_signaling = syncpt_fence_enable_signaling,
-	.release = syncpt_fence_release,
+const struct dma_fence_ops host1x_syncpt_fence_ops = {
+	.get_driver_name = host1x_syncpt_fence_get_driver_name,
+	.get_timeline_name = host1x_syncpt_fence_get_timeline_name,
+	.enable_signaling = host1x_syncpt_fence_enable_signaling,
+	.release = host1x_syncpt_fence_release,
 };
 
 void host1x_fence_signal(struct host1x_syncpt_fence *f)
@@ -157,7 +160,7 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
 	fence->sp = sp;
 	fence->threshold = threshold;
 
-	dma_fence_init(&fence->base, &syncpt_fence_ops, &lock,
+	dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &lock,
 		       dma_fence_context_alloc(1), 0);
 
 	INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout);
@@ -165,46 +168,3 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
 	return &fence->base;
 }
 EXPORT_SYMBOL(host1x_fence_create);
-
-int host1x_fence_create_fd(struct host1x_syncpt *sp, u32 threshold)
-{
-	struct sync_file *file;
-	struct dma_fence *f;
-	int fd;
-
-	f = host1x_fence_create(sp, threshold);
-	if (IS_ERR(f))
-		return PTR_ERR(f);
-
-	fd = get_unused_fd_flags(O_CLOEXEC);
-	if (fd < 0) {
-		dma_fence_put(f);
-		return fd;
-	}
-
-	file = sync_file_create(f);
-	dma_fence_put(f);
-	if (!file)
-		return -ENOMEM;
-
-	fd_install(fd, file->file);
-
-	return fd;
-}
-EXPORT_SYMBOL(host1x_fence_create_fd);
-
-int host1x_fence_extract(struct dma_fence *fence, u32 *id, u32 *threshold)
-{
-	struct host1x_syncpt_fence *f;
-
-	if (fence->ops != &syncpt_fence_ops)
-		return -EINVAL;
-
-	f = container_of(fence, struct host1x_syncpt_fence, base);
-
-	*id = f->sp->id;
-	*threshold = f->threshold;
-
-	return 0;
-}
-EXPORT_SYMBOL(host1x_fence_extract);
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 1917e760..4eda10a3 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -47,27 +47,57 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
 	}
 }
 
-static void submit_gathers(struct host1x_job *job)
+static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold,
+			u32 next_class)
+{
+#if HOST1X_HW >= 2
+	host1x_cdma_push_wide(cdma,
+		host1x_opcode_setclass(
+			HOST1X_CLASS_HOST1X,
+			HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32,
+			/* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */
+			BIT(0) | BIT(2)
+		),
+		threshold,
+		id,
+		host1x_opcode_setclass(next_class, 0, 0)
+	);
+#else
+	/* TODO add waitchk or use waitbases or other mitigation */
+	host1x_cdma_push(cdma,
+		host1x_opcode_setclass(
+			HOST1X_CLASS_HOST1X,
+			host1x_uclass_wait_syncpt_r(),
+			BIT(0)
+		),
+		host1x_class_host_wait_syncpt(id, threshold)
+	);
+	host1x_cdma_push(cdma,
+		host1x_opcode_setclass(next_class, 0, 0),
+		HOST1X_OPCODE_NOP
+	);
+#endif
+}
+
+static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
 {
 	struct host1x_cdma *cdma = &job->channel->cdma;
 #if HOST1X_HW < 6
 	struct device *dev = job->channel->dev;
 #endif
 	unsigned int i;
+	u32 threshold;
 
 	for (i = 0; i < job->num_cmds; i++) {
 		struct host1x_job_cmd *cmd = &job->cmds[i];
 
 		if (cmd->is_wait) {
-			/* TODO use modern wait */
-			host1x_cdma_push(cdma,
-				 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
-					host1x_uclass_wait_syncpt_r(), 1),
-				 host1x_class_host_wait_syncpt(cmd->wait.id,
-					cmd->wait.threshold));
-			host1x_cdma_push(
-				cdma, host1x_opcode_setclass(job->class, 0, 0),
-				HOST1X_OPCODE_NOP);
+			if (cmd->wait.relative)
+				threshold = job_syncpt_base + cmd->wait.threshold;
+			else
+				threshold = cmd->wait.threshold;
+
+			submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class);
 		} else {
 			struct host1x_job_gather *g = &cmd->gather;
 
@@ -196,7 +226,7 @@ static int channel_submit(struct host1x_job *job)
 				 host1x_opcode_setclass(job->class, 0, 0),
 				 HOST1X_OPCODE_NOP);
 
-	submit_gathers(job);
+	submit_gathers(job, syncval - user_syncpt_incrs);
 
 	/* end CDMA submit & stash pinned hMems into sync queue */
 	host1x_cdma_end(&ch->cdma, job);
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 35952fd5..54e31d81 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -156,9 +156,9 @@ static unsigned int show_channel_command(struct output *o, u32 val,
 	}
 }
 
-static void show_gather(struct output *o, phys_addr_t phys_addr,
+static void show_gather(struct output *o, dma_addr_t phys_addr,
 			unsigned int words, struct host1x_cdma *cdma,
-			phys_addr_t pin_addr, u32 *map_addr)
+			dma_addr_t pin_addr, u32 *map_addr)
 {
 	/* Map dmaget cursor to corresponding mem handle */
 	u32 offset = phys_addr - pin_addr;
@@ -176,11 +176,11 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,
 	}
 
 	for (i = 0; i < words; i++) {
-		u32 addr = phys_addr + i * 4;
+		dma_addr_t addr = phys_addr + i * 4;
 		u32 val = *(map_addr + offset / 4 + i);
 
 		if (!data_count) {
-			host1x_debug_output(o, "%08x: %08x: ", addr, val);
+			host1x_debug_output(o, "    %pad: %08x: ", &addr, val);
 			data_count = show_channel_command(o, val, &payload);
 		} else {
 			host1x_debug_cont(o, "%08x%s", val,
@@ -195,19 +195,16 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
 	struct push_buffer *pb = &cdma->push_buffer;
 	struct host1x_job *job;
 
-	host1x_debug_output(o, "PUSHBUF at %pad, %u words\n",
-			    &pb->dma, pb->size / 4);
-
-	show_gather(o, pb->dma, pb->size / 4, cdma, pb->dma, pb->mapped);
-
 	list_for_each_entry(job, &cdma->sync_queue, list) {
 		unsigned int i;
 
-		host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n",
-				    job, job->syncpt->id, job->syncpt_end,
-				    job->first_get, job->timeout,
+		host1x_debug_output(o, "JOB, syncpt %u: %u timeout: %u num_slots: %u num_handles: %u\n",
+				    job->syncpt->id, job->syncpt_end, job->timeout,
 				    job->num_slots, job->num_unpins);
 
+		show_gather(o, pb->dma + job->first_get, job->num_slots * 2, cdma,
+			    pb->dma + job->first_get, pb->mapped + job->first_get);
+
 		for (i = 0; i < job->num_cmds; i++) {
 			struct host1x_job_gather *g;
 			u32 *mapped;
@@ -227,7 +224,7 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
 				continue;
 			}
 
-			host1x_debug_output(o, "    GATHER at %pad+%#x, %d words\n",
+			host1x_debug_output(o, "  GATHER at %pad+%#x, %d words\n",
 					    &g->base, g->offset, g->words);
 
 			show_gather(o, g->base + g->offset, g->words, cdma,
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c
index 02a93305..85242a59 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x01.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
@@ -16,10 +16,13 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
 					   struct output *o)
 {
 	struct host1x_cdma *cdma = &ch->cdma;
+	dma_addr_t dmastart, dmaend;
 	u32 dmaput, dmaget, dmactrl;
 	u32 cbstat, cbread;
 	u32 val, base, baseval;
 
+	dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART);
+	dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND);
 	dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
 	dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
 	dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
@@ -56,9 +59,10 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
 				    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat),
 				    cbread);
 
-	host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
+	host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend);
+	host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n",
 			    dmaput, dmaget, dmactrl);
-	host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat);
+	host1x_debug_output(o, "CBREAD %08x CBSTAT %08x\n", cbread, cbstat);
 
 	show_channel_gathers(o, cdma);
 	host1x_debug_output(o, "\n");
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c
index 6d1b583a..9d066787 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x06.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c
@@ -16,10 +16,23 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
 					   struct output *o)
 {
 	struct host1x_cdma *cdma = &ch->cdma;
+	dma_addr_t dmastart = 0, dmaend = 0;
 	u32 dmaput, dmaget, dmactrl;
 	u32 offset, class;
 	u32 ch_stat;
 
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6
+	dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART_HI);
+	dmastart <<= 32;
+#endif
+	dmastart |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART);
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6
+	dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND_HI);
+	dmaend <<= 32;
+#endif
+	dmaend |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND);
+
 	dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
 	dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
 	dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
@@ -41,7 +54,8 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
 		host1x_debug_output(o, "active class %02x, offset %04x\n",
 				    class, offset);
 
-	host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
+	host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend);
+	host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n",
 			    dmaput, dmaget, dmactrl);
 	host1x_debug_output(o, "CHANNELSTAT %02x\n", ch_stat);
 
diff --git a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
index 4fc51f70..0a2ab8f1 100644
--- a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
 }
 #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
 	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
 
 #endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
index 9e84a4ad..60c692b9 100644
--- a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
 }
 #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
 	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
 
 #endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
index aee5a4e3..2fcc9a2a 100644
--- a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
 }
 #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
 	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
 
 #endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
index c4bacdb7..5f831438 100644
--- a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
 }
 #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
 	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
 
 #endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
index c74070f3..8cd2ef08 100644
--- a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
 }
 #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
 	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
 
 #endif
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 45e0a712..c9f47fad 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -218,11 +218,7 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base);
 void host1x_syncpt_release_vblank_reservation(struct host1x_client *client,
 					      u32 syncpt_id);
 
-struct host1x_syncpt *host1x_syncpt_fd_get(int fd);
-
 struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold);
-int host1x_fence_create_fd(struct host1x_syncpt *sp, u32 threshold);
-int host1x_fence_extract(struct dma_fence *fence, u32 *id, u32 *threshold);
 
 /*
  * host1x channel
@@ -324,13 +320,18 @@ struct host1x_job {
 	/* Callback called when job is freed */
 	void (*release)(struct host1x_job *job);
 	void *user_data;
+
+	/* Whether host1x-side firewall should be ran for this job or not */
+	bool enable_firewall;
 };
 
 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
-				    u32 num_cmdbufs, u32 num_relocs);
+				    u32 num_cmdbufs, u32 num_relocs,
+				    bool skip_firewall);
 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
 			   unsigned int words, unsigned int offset);
-void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh);
+void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
+			 bool relative, u32 next_class);
 struct host1x_job *host1x_job_get(struct host1x_job *job);
 void host1x_job_put(struct host1x_job *job);
 int host1x_job_pin(struct host1x_job *job, struct device *dev);
diff --git a/drivers/gpu/host1x/include/uapi/linux/host1x-next.h b/drivers/gpu/host1x/include/uapi/linux/host1x-next.h
deleted file mode 100644
index 9c8fb942..00000000
--- a/drivers/gpu/host1x/include/uapi/linux/host1x-next.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/* Copyright (c) 2020 NVIDIA Corporation */
-
-#ifndef _UAPI__LINUX_HOST1X_H
-#define _UAPI__LINUX_HOST1X_H
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-struct host1x_allocate_syncpoint {
-	/**
-	 * @fd: [out]
-	 *
-	 * New file descriptor representing the allocated syncpoint.
-	 */
-	__s32 fd;
-
-	__u32 reserved[3];
-};
-
-struct host1x_syncpoint_info {
-	/**
-	 * @id: [out]
-	 *
-	 * System-global ID of the syncpoint.
-	 */
-	__u32 id;
-
-	__u32 reserved[3];
-};
-
-struct host1x_syncpoint_increment {
-	/**
-	 * @count: [in]
-	 *
-	 * Number of times to increment the syncpoint. The syncpoint can
-	 * be observed at in-between values, but each increment is atomic.
-	 */
-	__u32 count;
-};
-
-struct host1x_read_syncpoint {
-	/**
-	 * @id: [in]
-	 *
-	 * ID of the syncpoint to read.
-	 */
-	__u32 id;
-
-	/**
-	 * @value: [out]
-	 *
-	 * Current value of the syncpoint.
-	 */
-	__u32 value;
-};
-
-struct host1x_create_fence {
-	/**
-	 * @id: [in]
-	 *
-	 * ID of the syncpoint to create a fence for.
-	 */
-	__u32 id;
-
-	/**
-	 * @threshold: [in]
-	 *
-	 * When the syncpoint reaches this value, the fence will be signaled.
-	 * The syncpoint is considered to have reached the threshold when the
-	 * following condition is true:
-	 *
-	 * 	((value - threshold) & 0x80000000U) == 0U
-	 *
-	 */
-	__u32 threshold;
-
-	/**
-	 * @fence_fd: [out]
-	 *
-	 * New sync_file file descriptor containing the created fence.
-	 */
-	__s32 fence_fd;
-
-	__u32 reserved[1];
-};
-
-struct host1x_fence_extract_fence {
-	__u32 id;
-	__u32 threshold;
-};
-
-struct host1x_fence_extract {
-	/**
-	 * @fence_fd: [in]
-	 *
-	 * sync_file file descriptor
-	 */
-	__s32 fence_fd;
-
-	/**
-	 * @num_fences: [in,out]
-	 *
-	 * In: size of the `fences_ptr` array counted in elements.
-	 * Out: required size of the `fences_ptr` array counted in elements.
-	 */
-	__u32 num_fences;
-
-	/**
-	 * @fences_ptr: [in]
-	 *
-	 * Pointer to array of `struct host1x_fence_extract_fence`.
-	 */
-	__u64 fences_ptr;
-
-	__u32 reserved[2];
-};
-
-#define HOST1X_IOCTL_ALLOCATE_SYNCPOINT  _IOWR('X', 0x00, struct host1x_allocate_syncpoint)
-#define HOST1X_IOCTL_READ_SYNCPOINT      _IOR ('X', 0x01, struct host1x_read_syncpoint)
-#define HOST1X_IOCTL_CREATE_FENCE        _IOWR('X', 0x02, struct host1x_create_fence)
-#define HOST1X_IOCTL_SYNCPOINT_INFO      _IOWR('X', 0x03, struct host1x_syncpoint_info)
-#define HOST1X_IOCTL_SYNCPOINT_INCREMENT _IOWR('X', 0x04, struct host1x_syncpoint_increment)
-#define HOST1X_IOCTL_FENCE_EXTRACT       _IOWR('X', 0x05, struct host1x_fence_extract)
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 7a847545..45b6be92 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -273,7 +273,7 @@ void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
 	if (flush) {
 		/* Wait until any concurrently executing handler has finished. */
 		while (atomic_read(&waiter->state) != WLS_HANDLED)
-			cpu_relax();
+			schedule();
 	}
 
 	kref_put(&waiter->refcount, waiter_release);
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index b5896749..ee47735a 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -24,14 +24,18 @@
 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
 
 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
-				    u32 num_cmdbufs, u32 num_relocs)
+				    u32 num_cmdbufs, u32 num_relocs,
+				    bool skip_firewall)
 {
 	struct host1x_job *job = NULL;
 	unsigned int num_unpins = num_relocs;
+	bool enable_firewall;
 	u64 total;
 	void *mem;
 
-	if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+	enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall;
+
+	if (!enable_firewall)
 		num_unpins += num_cmdbufs;
 
 	/* Check that we're not going to overflow */
@@ -48,6 +52,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
 	if (!job)
 		return NULL;
 
+	job->enable_firewall = enable_firewall;
+
 	kref_init(&job->ref);
 	job->channel = ch;
 
@@ -111,13 +117,16 @@ void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
 }
 EXPORT_SYMBOL(host1x_job_add_gather);
 
-void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh)
+void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
+			 bool relative, u32 next_class)
 {
 	struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
 
 	cmd->is_wait = true;
 	cmd->wait.id = id;
 	cmd->wait.threshold = thresh;
+	cmd->wait.next_class = next_class;
+	cmd->wait.relative = relative;
 
 	job->num_cmds++;
 }
@@ -193,7 +202,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 	 * We will copy gathers BO content later, so there is no need to
 	 * hold and pin them.
 	 */
-	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+	if (job->enable_firewall)
 		return 0;
 
 	for (i = 0; i < job->num_cmds; i++) {
@@ -280,7 +289,7 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
 		if (cmdbuf != reloc->cmdbuf.bo)
 			continue;
 
-		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+		if (job->enable_firewall) {
 			target = (u32 *)job->gather_copy_mapped +
 					reloc->cmdbuf.offset / sizeof(u32) +
 						g->offset / sizeof(u32);
@@ -593,7 +602,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 	if (err)
 		goto out;
 
-	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+	if (job->enable_firewall) {
 		err = copy_gathers(host->dev, job, dev);
 		if (err)
 			goto out;
@@ -612,7 +621,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 			continue;
 
 		/* copy_gathers() sets gathers base if firewall is enabled */
-		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+		if (!job->enable_firewall)
 			g->base = job->gather_addr_phys[i];
 
 		for (j = i + 1; j < job->num_cmds; j++) {
@@ -646,7 +655,7 @@ void host1x_job_unpin(struct host1x_job *job)
 		struct host1x_bo_mapping *map = job->unpins[i].map;
 		struct host1x_bo *bo = map->bo;
 
-		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && map->size && host->domain) {
+		if (!job->enable_firewall && map->size && host->domain) {
 			iommu_unmap(host->domain, job->addr_phys[i], map->size);
 			free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i]));
 		}
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 657e2400..dad5a194 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -21,6 +21,8 @@ struct host1x_job_gather {
 struct host1x_job_wait {
 	u32 id;
 	u32 threshold;
+	u32 next_class;
+	bool relative;
 };
 
 struct host1x_job_cmd {
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 100270ac..d198a108 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -42,6 +42,18 @@ static void host1x_syncpt_base_free(struct host1x_syncpt_base *base)
 		base->requested = false;
 }
 
+/**
+ * host1x_syncpt_alloc() - allocate a syncpoint
+ * @host: host1x device data
+ * @flags: bitfield of HOST1X_SYNCPT_* flags
+ * @name: name for the syncpoint for use in debug prints
+ *
+ * Allocates a hardware syncpoint for the caller's use. The caller then has
+ * the sole authority to mutate the syncpoint's value until it is freed again.
+ *
+ * If no free syncpoints are available, or a NULL name was specified, returns
+ * NULL.
+ */
 struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
 					  unsigned long flags,
 					  const char *name)
@@ -50,6 +62,9 @@ struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
 	char *full_name;
 	unsigned int i;
 
+	if (!name)
+		return NULL;
+
 	mutex_lock(&host->syncpt_mutex);
 
 	for (i = 0; i < host->info->nb_pts && kref_read(&sp->ref); i++, sp++)
@@ -391,6 +406,7 @@ static void syncpt_release(struct kref *ref)
 	struct host1x_syncpt *sp = container_of(ref, struct host1x_syncpt, ref);
 
 	atomic_set(&sp->max_val, host1x_syncpt_read(sp));
+
 	sp->locked = false;
 
 	mutex_lock(&sp->host->syncpt_mutex);
@@ -560,6 +576,7 @@ static void do_nothing(struct kref *ref)
  *   available for allocation
  *
  * @client: host1x bus client
+ * @syncpt_id: syncpoint ID to make available
  *
  * Makes VBLANK<i> syncpoint available for allocatation if it was
  * reserved at initialization time. This should be called by the display
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index b42b4254..d6a4e3fb 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -41,7 +41,7 @@ struct host1x_syncpt {
 	/* interrupt data */
 	struct host1x_syncpt_intr intr;
 
-	/* 
+	/*
 	 * If a submission incrementing this syncpoint fails, lock it so that
 	 * further submission cannot be made until application has handled the
 	 * failure.
diff --git a/drivers/gpu/host1x/uapi.c b/drivers/gpu/host1x/uapi.c
deleted file mode 100644
index 6a12c183..00000000
--- a/drivers/gpu/host1x/uapi.c
+++ /dev/null
@@ -1,379 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * /dev/host1x syncpoint interface
- *
- * Copyright (c) 2020, NVIDIA Corporation.
- */
-
-#include <linux/anon_inodes.h>
-#include <linux/cdev.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/host1x-next.h>
-#include <linux/nospec.h>
-#include <linux/sync_file.h>
-
-#include "dev.h"
-#include "fence.h"
-#include "syncpt.h"
-#include "uapi.h"
-
-#include <uapi/linux/host1x-next.h>
-
-static int syncpt_file_release(struct inode *inode, struct file *file)
-{
-	struct host1x_syncpt *sp = file->private_data;
-
-	host1x_syncpt_put(sp);
-
-	return 0;
-}
-
-static int syncpt_file_ioctl_info(struct host1x_syncpt *sp, void __user *data)
-{
-	struct host1x_syncpoint_info args;
-	unsigned long copy_err;
-
-	copy_err = copy_from_user(&args, data, sizeof(args));
-	if (copy_err)
-		return -EFAULT;
-
-	if (args.reserved[0] || args.reserved[1] || args.reserved[2])
-		return -EINVAL;
-
-	args.id = sp->id;
-
-	copy_err = copy_to_user(data, &args, sizeof(args));
-	if (copy_err)
-		return -EFAULT;
-
-	return 0;
-}
-
-static int syncpt_file_ioctl_incr(struct host1x_syncpt *sp, void __user *data)
-{
-	struct host1x_syncpoint_increment args;
-	unsigned long copy_err;
-	u32 i;
-
-	copy_err = copy_from_user(&args, data, sizeof(args));
-	if (copy_err)
-		return -EFAULT;
-
-	for (i = 0; i < args.count; i++) {
-		host1x_syncpt_incr(sp);
-		if (signal_pending(current))
-			return -EINTR;
-	}
-
-	return 0;
-}
-
-static long syncpt_file_ioctl(struct file *file, unsigned int cmd,
-			      unsigned long arg)
-{
-	void __user *data = (void __user *)arg;
-	long err;
-
-	switch (cmd) {
-	case HOST1X_IOCTL_SYNCPOINT_INFO:
-		err = syncpt_file_ioctl_info(file->private_data, data);
-		break;
-
-	case HOST1X_IOCTL_SYNCPOINT_INCREMENT:
-		err = syncpt_file_ioctl_incr(file->private_data, data);
-		break;
-
-	default:
-		err = -ENOTTY;
-	}
-
-	return err;
-}
-
-static const struct file_operations syncpt_file_fops = {
-	.owner = THIS_MODULE,
-	.release = syncpt_file_release,
-	.unlocked_ioctl = syncpt_file_ioctl,
-	.compat_ioctl = syncpt_file_ioctl,
-};
-
-struct host1x_syncpt *host1x_syncpt_fd_get(int fd)
-{
-	struct host1x_syncpt *sp;
-	struct file *file = fget(fd);
-
-	if (!file)
-		return ERR_PTR(-EINVAL);
-
-	if (file->f_op != &syncpt_file_fops) {
-		fput(file);
-		return ERR_PTR(-EINVAL);
-	}
-
-	sp = file->private_data;
-
-	host1x_syncpt_get(sp);
-
-	fput(file);
-
-	return sp;
-}
-EXPORT_SYMBOL(host1x_syncpt_fd_get);
-
-static int dev_file_open(struct inode *inode, struct file *file)
-{
-	struct host1x_uapi *uapi =
-		container_of(inode->i_cdev, struct host1x_uapi, cdev);
-
-	file->private_data = container_of(uapi, struct host1x, uapi);
-
-	return 0;
-}
-
-static int dev_file_ioctl_read_syncpoint(struct host1x *host1x,
-					 void __user *data)
-{
-	struct host1x_read_syncpoint args;
-	unsigned long copy_err;
-
-	copy_err = copy_from_user(&args, data, sizeof(args));
-	if (copy_err)
-		return -EFAULT;
-
-	if (args.id >= host1x_syncpt_nb_pts(host1x))
-		return -EINVAL;
-
-	args.id = array_index_nospec(args.id, host1x_syncpt_nb_pts(host1x));
-	args.value = host1x_syncpt_read(&host1x->syncpt[args.id]);
-
-	copy_err = copy_to_user(data, &args, sizeof(args));
-	if (copy_err)
-		return -EFAULT;
-
-	return 0;
-}
-
-static int dev_file_ioctl_alloc_syncpoint(struct host1x *host1x,
-					  void __user *data)
-{
-	struct host1x_allocate_syncpoint args;
-	struct host1x_syncpt *sp;
-	unsigned long copy_err;
-	int err;
-
-	copy_err = copy_from_user(&args, data, sizeof(args));
-	if (copy_err)
-		return -EFAULT;
-
-	if (args.reserved[0] || args.reserved[1] || args.reserved[2])
-		return -EINVAL;
-
-	sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED,
-				 current->comm);
-	if (!sp)
-		return -EBUSY;
-
-	err = anon_inode_getfd("host1x_syncpt", &syncpt_file_fops, sp,
-			       O_CLOEXEC);
-	if (err < 0)
-		goto free_syncpt;
-
-	args.fd = err;
-
-	copy_err = copy_to_user(data, &args, sizeof(args));
-	if (copy_err) {
-		err = -EFAULT;
-		goto put_fd;
-	}
-
-	return 0;
-
-put_fd:
-	put_unused_fd(args.fd);
-free_syncpt:
-	host1x_syncpt_put(sp);
-
-	return err;
-}
-
-static int dev_file_ioctl_create_fence(struct host1x *host1x, void __user *data)
-{
-	struct host1x_create_fence args;
-	unsigned long copy_err;
-	int fd;
-
-	copy_err = copy_from_user(&args, data, sizeof(args));
-	if (copy_err)
-		return -EFAULT;
-
-	if (args.reserved[0])
-		return -EINVAL;
-
-	if (args.id >= host1x_syncpt_nb_pts(host1x))
-		return -EINVAL;
-
-	args.id = array_index_nospec(args.id, host1x_syncpt_nb_pts(host1x));
-
-	fd = host1x_fence_create_fd(&host1x->syncpt[args.id], args.threshold);
-	if (fd < 0)
-		return fd;
-
-	args.fence_fd = fd;
-
-	copy_err = copy_to_user(data, &args, sizeof(args));
-	if (copy_err)
-		return -EFAULT;
-
-	return 0;
-}
-
-static int dev_file_ioctl_fence_extract(struct host1x *host1x, void __user *data)
-{
-	struct host1x_fence_extract_fence __user *fences_user_ptr;
-	struct dma_fence *fence, **fences;
-	struct host1x_fence_extract args;
-	struct dma_fence_array *array;
-	unsigned int num_fences, i;
-	unsigned long copy_err;
-	int err;
-
-	copy_err = copy_from_user(&args, data, sizeof(args));
-	if (copy_err)
-		return -EFAULT;
-
-	fences_user_ptr = u64_to_user_ptr(args.fences_ptr);
-
-	if (args.reserved[0] || args.reserved[1])
-		return -EINVAL;
-
-	fence = sync_file_get_fence(args.fence_fd);
-	if (!fence)
-		return -EINVAL;
-
-	array = to_dma_fence_array(fence);
-	if (array) {
-		fences = array->fences;
-		num_fences = array->num_fences;
-	} else {
-		fences = &fence;
-		num_fences = 1;
-	}
-
-	for (i = 0; i < min(num_fences, args.num_fences); i++) {
-		struct host1x_fence_extract_fence f;
-
-		err = host1x_fence_extract(fences[i], &f.id, &f.threshold);
-		if (err)
-			goto put_fence;
-
-		copy_err = copy_to_user(fences_user_ptr + i, &f, sizeof(f));
-		if (copy_err) {
-			err = -EFAULT;
-			goto put_fence;
-		}
-	}
-
-	args.num_fences = i+1;
-
-	copy_err = copy_to_user(data, &args, sizeof(args));
-	if (copy_err) {
-		err = -EFAULT;
-		goto put_fence;
-	}
-
-	return 0;
-
-put_fence:
-	dma_fence_put(fence);
-
-	return err;
-}
-
-static long dev_file_ioctl(struct file *file, unsigned int cmd,
-			   unsigned long arg)
-{
-	void __user *data = (void __user *)arg;
-	long err;
-
-	switch (cmd) {
-	case HOST1X_IOCTL_READ_SYNCPOINT:
-		err = dev_file_ioctl_read_syncpoint(file->private_data, data);
-		break;
-
-	case HOST1X_IOCTL_ALLOCATE_SYNCPOINT:
-		err = dev_file_ioctl_alloc_syncpoint(file->private_data, data);
-		break;
-
-	case HOST1X_IOCTL_CREATE_FENCE:
-		err = dev_file_ioctl_create_fence(file->private_data, data);
-		break;
-
-	case HOST1X_IOCTL_FENCE_EXTRACT:
-		err = dev_file_ioctl_fence_extract(file->private_data, data);
-		break;
-
-	default:
-		err = -ENOTTY;
-	}
-
-	return err;
-}
-
-static const struct file_operations dev_file_fops = {
-	.owner = THIS_MODULE,
-	.open = dev_file_open,
-	.unlocked_ioctl = dev_file_ioctl,
-	.compat_ioctl = dev_file_ioctl,
-};
-
-int host1x_uapi_init(struct host1x_uapi *uapi, struct host1x *host1x)
-{
-	int err;
-	dev_t dev_num;
-
-	err = alloc_chrdev_region(&dev_num, 0, 1, "host1x");
-	if (err)
-		return err;
-
-	uapi->class = class_create(THIS_MODULE, "host1x");
-	if (IS_ERR(uapi->class)) {
-		err = PTR_ERR(uapi->class);
-		goto unregister_chrdev_region;
-	}
-
-	cdev_init(&uapi->cdev, &dev_file_fops);
-	err = cdev_add(&uapi->cdev, dev_num, 1);
-	if (err)
-		goto destroy_class;
-
-	uapi->dev = device_create(uapi->class, host1x->dev,
-				  dev_num, NULL, "host1x");
-	if (IS_ERR(uapi->dev)) {
-		err = PTR_ERR(uapi->dev);
-		goto del_cdev;
-	}
-
-	cdev_add(&uapi->cdev, dev_num, 1);
-
-	uapi->dev_num = dev_num;
-
-	return 0;
-
-del_cdev:
-	cdev_del(&uapi->cdev);
-destroy_class:
-	class_destroy(uapi->class);
-unregister_chrdev_region:
-	unregister_chrdev_region(dev_num, 1);
-
-	return err;
-}
-
-void host1x_uapi_deinit(struct host1x_uapi *uapi)
-{
-	device_destroy(uapi->class, uapi->dev_num);
-	cdev_del(&uapi->cdev);
-	class_destroy(uapi->class);
-	unregister_chrdev_region(uapi->dev_num, 1);
-}
diff --git a/drivers/gpu/host1x/uapi.h b/drivers/gpu/host1x/uapi.h
deleted file mode 100644
index 7beb5e44..00000000
--- a/drivers/gpu/host1x/uapi.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2020, NVIDIA Corporation.
- */
-
-#ifndef HOST1X_UAPI_H
-#define HOST1X_UAPI_H
-
-#include <linux/cdev.h>
-
-struct host1x_uapi {
-	struct class *class;
-
-	struct cdev cdev;
-	struct device *dev;
-	dev_t dev_num;
-};
-
-int host1x_uapi_init(struct host1x_uapi *uapi, struct host1x *host1x);
-void host1x_uapi_deinit(struct host1x_uapi *uapi);
-
-#endif

From 45652ce93fc8aebf5fbcece8ce86b97bc2ff0847 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Thu, 2 Sep 2021 15:19:41 +0300
Subject: [PATCH 30/73] gpu: host1x: Restore host1x_fence_extract

host1x_fence_extract is currently not in upstream due to no upstream
callers, but nvgpu needs it, so restore it here for now.

JIRA LS-128

Change-Id: Ie2ccf6380e4bf506378a04bc8fd894c7ca7414c6
Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2653093
GVS: Gerrit_Virtual_Submit
Tested-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
---
 drivers/gpu/host1x/fence.c                     | 16 ++++++++++++++++
 drivers/gpu/host1x/include/linux/host1x-next.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index 4a8fdbfb..2e0e4eae 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -168,3 +168,19 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
 	return &fence->base;
 }
 EXPORT_SYMBOL(host1x_fence_create);
+
+int host1x_fence_extract(struct dma_fence *fence, u32 *id, u32 *threshold)
+{
+	struct host1x_syncpt_fence *f;
+
+	if (fence->ops != &host1x_syncpt_fence_ops)
+		return -EINVAL;
+
+	f = container_of(fence, struct host1x_syncpt_fence, base);
+
+	*id = f->sp->id;
+	*threshold = f->threshold;
+
+	return 0;
+}
+EXPORT_SYMBOL(host1x_fence_extract);
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index c9f47fad..7d69840b 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -219,6 +219,7 @@ void host1x_syncpt_release_vblank_reservation(struct host1x_client *client,
 					      u32 syncpt_id);
 
 struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold);
+int host1x_fence_extract(struct dma_fence *fence, u32 *id, u32 *threshold);
 
 /*
  * host1x channel

From 6adab12fb10cfdc45e31fe6d677b8be956b28e1a Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Fri, 3 Sep 2021 11:45:05 +0300
Subject: [PATCH 31/73] gpu: host1x: Always return syncpoint value when waiting

The new TegraDRM UAPI uses syncpoint waiting with timeout set to
zero to indicate reading the syncpoint value. To support that we
need to return the syncpoint value always when waiting.

JIRA LS-128

Change-Id: I9bcb19e71cef30175994447d5409b621b5e58a07
Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2653094
Tested-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/syncpt.c | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index d198a108..a89a4081 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -225,27 +225,12 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 	void *ref;
 	struct host1x_waitlist *waiter;
 	int err = 0, check_count = 0;
-	u32 val;
 
 	if (value)
-		*value = 0;
-
-	/* first check cache */
-	if (host1x_syncpt_is_expired(sp, thresh)) {
-		if (value)
-			*value = host1x_syncpt_load(sp);
+		*value = host1x_syncpt_load(sp);
 
+	if (host1x_syncpt_is_expired(sp, thresh))
 		return 0;
-	}
-
-	/* try to read from register */
-	val = host1x_hw_syncpt_load(sp->host, sp);
-	if (host1x_syncpt_is_expired(sp, thresh)) {
-		if (value)
-			*value = val;
-
-		goto done;
-	}
 
 	if (!timeout) {
 		err = -EAGAIN;

From dae057752c425d943d90d965c8383438325fa4fd Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Wed, 19 Jan 2022 19:30:43 +0000
Subject: [PATCH 32/73] gpu: host1x: Fix BO caching

BOs pinned by calling host1x_bo_pin() are never being freed as
expected. Fix this by removing the unneeded refcount increment
in host1x_bo_pin() when creating a new mapping.

JIRA LS-128

Change-Id: I948a289e7953621302d75f5509464e7607d72299
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2657439
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/bus.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index c02360cf..72bac3fb 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -938,9 +938,6 @@ struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo
 		mapping->cache = cache;
 
 		list_add_tail(&mapping->entry, &cache->mappings);
-
-		/* bump reference count to track the copy in the cache */
-		kref_get(&mapping->ref);
 	}
 
 unlock:

From 64b7110c1d283bdc624849d5b06f94e83614d9b2 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Fri, 1 Apr 2022 17:00:47 +0100
Subject: [PATCH 33/73] gpu: host1x: Revert fix for BO caching

A proper fix has been created for the BO caching for host1x that has
been submitted upstream and so revert this change.

Bug 3556250

Change-Id: Ic712befb2c5f987df348d5406446bcbad8e88a4d
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2693867
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/bus.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 72bac3fb..c02360cf 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -938,6 +938,9 @@ struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo
 		mapping->cache = cache;
 
 		list_add_tail(&mapping->entry, &cache->mappings);
+
+		/* bump reference count to track the copy in the cache */
+		kref_get(&mapping->ref);
 	}
 
 unlock:

From 7d90780d4969abe21719da2e8fe1fcac9dec5528 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 24 Mar 2022 11:30:25 +0100
Subject: [PATCH 34/73] gpu: host1x: Do not use mapping cache for job
 submissions

Buffer mappings used in job submissions are usually small and not
rapidly reused as opposed to framebuffers (which are usually large and
rapidly reused, for example when page-flipping between double-buffered
framebuffers). Avoid going through the mapping cache for these buffers
since the cache would also lead to leaks if nobody is ever releasing
the cache's last reference. For DRM/KMS these last references are
dropped when the framebuffers are removed and therefore no longer
needed.

While at it, also add a note about the need to explicitly remove the
final reference to the mapping in the cache.

Bug 3556250

Change-Id: If6043dec54d6f73d1dc773cf854fa68e25c6556e
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2693869
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/include/linux/host1x-next.h | 5 +++++
 drivers/gpu/host1x/job.c                       | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 7d69840b..406a401b 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -34,6 +34,11 @@ u64 host1x_get_dma_mask(struct host1x *host1x);
  * struct host1x_bo_cache - host1x buffer object cache
  * @mappings: list of mappings
  * @lock: synchronizes accesses to the list of mappings
+ *
+ * Note that entries are not periodically evicted from this cache and instead need to be
+ * explicitly released. This is used primarily for DRM/KMS where the cache's reference is
+ * released when the last reference to a buffer object represented by a mapping in this
+ * cache is dropped.
  */
 struct host1x_bo_cache {
 	struct list_head mappings;
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index ee47735a..117a8f5f 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -177,7 +177,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 			goto unpin;
 		}
 
-		map = host1x_bo_pin(dev, bo, direction, &client->cache);
+		map = host1x_bo_pin(dev, bo, direction, NULL);
 		if (IS_ERR(map)) {
 			err = PTR_ERR(map);
 			goto unpin;
@@ -224,7 +224,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 			goto unpin;
 		}
 
-		map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, &host->cache);
+		map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL);
 		if (IS_ERR(map)) {
 			err = PTR_ERR(map);
 			goto unpin;

From 0955b8b8a539e42dace0242dbf44287fade1c139 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Wed, 15 Jun 2022 14:42:09 +0300
Subject: [PATCH 35/73] drm/tegra: Add support for secondary syncpoint

Add support for secondary (non-job tracking) syncpoint used for
NVENC slice encoding.

Bug 3589873

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I80204c23486dd476c6b67a3897934f301833c7f5
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2729281
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/host1x/hw/channel_hw.c             | 2 ++
 drivers/gpu/host1x/include/linux/host1x-next.h | 3 +++
 drivers/gpu/host1x/job.c                       | 3 +++
 3 files changed, 8 insertions(+)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 4eda10a3..ba556ba1 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -217,6 +217,8 @@ static int channel_submit(struct host1x_job *job)
 	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
 
 	host1x_hw_syncpt_assign_to_channel(host, sp, ch);
+	if (job->secondary_syncpt)
+		host1x_hw_syncpt_assign_to_channel(host, job->secondary_syncpt, ch);
 
 	job->syncpt_end = syncval;
 
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 406a401b..251cd471 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -290,6 +290,9 @@ struct host1x_job {
 	u32 syncpt_incrs;
 	u32 syncpt_end;
 
+	/* Non-job tracking related syncpoint */
+	struct host1x_syncpt *secondary_syncpt;
+
 	/* Completion waiter ref */
 	void *waiter;
 
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 117a8f5f..2754f9ac 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -95,6 +95,9 @@ static void job_free(struct kref *ref)
 	if (job->syncpt)
 		host1x_syncpt_put(job->syncpt);
 
+	if (job->secondary_syncpt)
+		host1x_syncpt_put(job->secondary_syncpt);
+
 	kfree(job);
 }
 

From 9edef092b1a6b8222f64c3c93d41b00979cfc831 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Tue, 28 Jun 2022 14:46:38 +0100
Subject: [PATCH 36/73] gpu: host1x: Update to Linux v5.19-rc1

Update the host1x driver to align with the latest upstream host1x
driver from Linux v5.19-rc1. Please note that the context bus support
is not included, because this needs to be built into the kernel.

Bug 3724727

Change-Id: Ic6fbe001462d160d1bb24f76038dd755c5550690
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2739538
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
---
 drivers/gpu/host1x/bus.c                      |   2 +-
 drivers/gpu/host1x/channel.c                  |   8 +
 drivers/gpu/host1x/debug.c                    |  26 ++-
 drivers/gpu/host1x/dev.c                      | 176 +++++++++++++++---
 drivers/gpu/host1x/dev.h                      |   3 +-
 drivers/gpu/host1x/fence.c                    |   2 +-
 drivers/gpu/host1x/hw/channel_hw.c            |  44 ++---
 .../gpu/host1x/include/linux/host1x-next.h    |   4 +-
 drivers/gpu/host1x/intr.c                     |   3 -
 drivers/gpu/host1x/job.c                      |   2 -
 drivers/gpu/host1x/syncpt.c                   |  21 +--
 11 files changed, 216 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index c02360cf..712ce483 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
 #include <linux/host1x-next.h>
 #include <linux/of.h>
 #include <linux/seq_file.h>
@@ -762,7 +763,6 @@ EXPORT_SYMBOL(host1x_client_exit);
 /**
  * __host1x_client_register() - register a host1x client
  * @client: host1x client
- * @key: lock class key for the client-specific mutex
  *
  * Registers a host1x client with each host1x controller instance. Note that
  * each client will only match their parent host1x controller and will only be
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 4cd212bb..2a9a3a8d 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -75,6 +75,14 @@ struct host1x_channel *host1x_channel_get_index(struct host1x *host,
 	return ch;
 }
 
+void host1x_channel_stop(struct host1x_channel *channel)
+{
+	struct host1x *host = dev_get_drvdata(channel->dev->parent);
+
+	host1x_hw_cdma_stop(host, &channel->cdma);
+}
+EXPORT_SYMBOL(host1x_channel_stop);
+
 static void release_channel(struct kref *kref)
 {
 	struct host1x_channel *channel =
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 8a14880c..34c2e36d 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/debugfs.h>
+#include <linux/pm_runtime.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 
@@ -52,6 +53,11 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
 {
 	struct host1x *m = dev_get_drvdata(ch->dev->parent);
 	struct output *o = data;
+	int err;
+
+	err = pm_runtime_resume_and_get(m->dev);
+	if (err < 0)
+		return err;
 
 	mutex_lock(&ch->cdma.lock);
 	mutex_lock(&debug_lock);
@@ -64,16 +70,23 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
 	mutex_unlock(&debug_lock);
 	mutex_unlock(&ch->cdma.lock);
 
+	pm_runtime_put(m->dev);
+
 	return 0;
 }
 
-static void show_syncpts(struct host1x *m, struct output *o)
+static void show_syncpts(struct host1x *m, struct output *o, bool show_all)
 {
 	struct list_head *pos;
 	unsigned int i;
+	int err;
 
 	host1x_debug_output(o, "---- syncpts ----\n");
 
+	err = pm_runtime_resume_and_get(m->dev);
+	if (err < 0)
+		return;
+
 	for (i = 0; i < host1x_syncpt_nb_pts(m); i++) {
 		u32 max = host1x_syncpt_read_max(m->syncpt + i);
 		u32 min = host1x_syncpt_load(m->syncpt + i);
@@ -84,7 +97,10 @@ static void show_syncpts(struct host1x *m, struct output *o)
 			waiters++;
 		spin_unlock(&m->syncpt[i].intr.lock);
 
-		if (!min && !max && !waiters)
+		if (!kref_read(&m->syncpt[i].ref))
+			continue;
+
+		if (!show_all && !min && !max && !waiters)
 			continue;
 
 		host1x_debug_output(o,
@@ -101,6 +117,8 @@ static void show_syncpts(struct host1x *m, struct output *o)
 					    base_val);
 	}
 
+	pm_runtime_put(m->dev);
+
 	host1x_debug_output(o, "\n");
 }
 
@@ -109,7 +127,7 @@ static void show_all(struct host1x *m, struct output *o, bool show_fifo)
 	unsigned int i;
 
 	host1x_hw_show_mlocks(m, o);
-	show_syncpts(m, o);
+	show_syncpts(m, o, true);
 	host1x_debug_output(o, "---- channels ----\n");
 
 	for (i = 0; i < m->info->nb_channels; ++i) {
@@ -226,5 +244,5 @@ void host1x_debug_dump_syncpts(struct host1x *host1x)
 		.fn = write_to_printk
 	};
 
-	show_syncpts(host1x, &o);
+	show_syncpts(host1x, &o, false);
 }
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 2aed4592..8faaf31d 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -6,18 +6,27 @@
  */
 
 #include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
+#include <linux/version.h>
+
+#include <soc/tegra/common.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/host1x.h>
 #undef CREATE_TRACE_POINTS
 
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+#include <asm/dma-iommu.h>
+#endif
+
 #include "bus.h"
 #include "channel.h"
 #include "debug.h"
@@ -250,6 +259,9 @@ static void host1x_setup_sid_table(struct host1x *host)
 	const struct host1x_info *info = host->info;
 	unsigned int i;
 
+	if (!info->has_hypervisor)
+		return;
+
 	for (i = 0; i < info->num_sid_entries; i++) {
 		const struct host1x_sid_entry *entry = &info->sid_table[i];
 
@@ -298,6 +310,17 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
 	struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
 	int err;
 
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+	if (host->dev->archdata.mapping) {
+		struct dma_iommu_mapping *mapping =
+				to_dma_iommu_mapping(host->dev);
+		arm_iommu_detach_device(host->dev);
+		arm_iommu_release_mapping(mapping);
+
+		domain = iommu_get_domain_for_dev(host->dev);
+	}
+#endif
+
 	/*
 	 * We may not always want to enable IOMMU support (for example if the
 	 * host1x firewall is already enabled and we don't support addressing
@@ -407,6 +430,27 @@ static void host1x_iommu_exit(struct host1x *host)
 	}
 }
 
+static int host1x_get_resets(struct host1x *host)
+{
+	int err;
+
+	host->resets[0].id = "mc";
+	host->resets[1].id = "host1x";
+	host->nresets = ARRAY_SIZE(host->resets);
+
+	err = devm_reset_control_bulk_get_optional_exclusive_released(
+				host->dev, host->nresets, host->resets);
+	if (err) {
+		dev_err(host->dev, "failed to get reset: %d\n", err);
+		return err;
+	}
+
+	if (WARN_ON(!host->resets[1].rstc))
+		return -ENOENT;
+
+	return 0;
+}
+
 static int host1x_probe(struct platform_device *pdev)
 {
 	struct host1x *host;
@@ -446,7 +490,6 @@ static int host1x_probe(struct platform_device *pdev)
 	if (syncpt_irq < 0)
 		return syncpt_irq;
 
-	host1x_bo_cache_init(&host->cache);
 	mutex_init(&host->devices_lock);
 	INIT_LIST_HEAD(&host->devices);
 	INIT_LIST_HEAD(&host->list);
@@ -484,17 +527,16 @@ static int host1x_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	host->rst = devm_reset_control_get(&pdev->dev, "host1x");
-	if (IS_ERR(host->rst)) {
-		err = PTR_ERR(host->rst);
-		dev_err(&pdev->dev, "failed to get reset: %d\n", err);
+	err = host1x_get_resets(host);
+	if (err)
 		return err;
-	}
+
+	host1x_bo_cache_init(&host->cache);
 
 	err = host1x_iommu_init(host);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err);
-		return err;
+		goto destroy_cache;
 	}
 
 	err = host1x_channel_list_init(&host->channel_list,
@@ -504,22 +546,10 @@ static int host1x_probe(struct platform_device *pdev)
 		goto iommu_exit;
 	}
 
-	err = clk_prepare_enable(host->clk);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to enable clock\n");
-		goto free_channels;
-	}
-
-	err = reset_control_deassert(host->rst);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to deassert reset: %d\n", err);
-		goto unprepare_disable;
-	}
-
 	err = host1x_syncpt_init(host);
 	if (err) {
 		dev_err(&pdev->dev, "failed to initialize syncpts\n");
-		goto reset_assert;
+		goto free_channels;
 	}
 
 	err = host1x_intr_init(host, syncpt_irq);
@@ -528,10 +558,20 @@ static int host1x_probe(struct platform_device *pdev)
 		goto deinit_syncpt;
 	}
 
-	host1x_debug_init(host);
+	pm_runtime_enable(&pdev->dev);
 
-	if (host->info->has_hypervisor)
-		host1x_setup_sid_table(host);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0)
+	err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev);
+	if (err)
+		goto pm_disable;
+#endif
+
+	/* the driver's code isn't ready yet for the dynamic RPM */
+	err = pm_runtime_resume_and_get(&pdev->dev);
+	if (err)
+		goto pm_disable;
+
+	host1x_debug_init(host);
 
 	err = host1x_register(host);
 	if (err < 0)
@@ -547,17 +587,20 @@ unregister:
 	host1x_unregister(host);
 deinit_debugfs:
 	host1x_debug_deinit(host);
+
+	pm_runtime_put_sync_suspend(&pdev->dev);
+pm_disable:
+	pm_runtime_disable(&pdev->dev);
+
 	host1x_intr_deinit(host);
 deinit_syncpt:
 	host1x_syncpt_deinit(host);
-reset_assert:
-	reset_control_assert(host->rst);
-unprepare_disable:
-	clk_disable_unprepare(host->clk);
 free_channels:
 	host1x_channel_list_free(&host->channel_list);
 iommu_exit:
 	host1x_iommu_exit(host);
+destroy_cache:
+	host1x_bo_cache_destroy(&host->cache);
 
 	return err;
 }
@@ -568,20 +611,95 @@ static int host1x_remove(struct platform_device *pdev)
 
 	host1x_unregister(host);
 	host1x_debug_deinit(host);
+
+	pm_runtime_force_suspend(&pdev->dev);
+
 	host1x_intr_deinit(host);
 	host1x_syncpt_deinit(host);
-	reset_control_assert(host->rst);
-	clk_disable_unprepare(host->clk);
+	host1x_channel_list_free(&host->channel_list);
 	host1x_iommu_exit(host);
 	host1x_bo_cache_destroy(&host->cache);
 
 	return 0;
 }
 
+static int __maybe_unused host1x_runtime_suspend(struct device *dev)
+{
+	struct host1x *host = dev_get_drvdata(dev);
+	int err;
+
+	host1x_intr_stop(host);
+	host1x_syncpt_save(host);
+
+	err = reset_control_bulk_assert(host->nresets, host->resets);
+	if (err) {
+		dev_err(dev, "failed to assert reset: %d\n", err);
+		goto resume_host1x;
+	}
+
+	usleep_range(1000, 2000);
+
+	clk_disable_unprepare(host->clk);
+	reset_control_bulk_release(host->nresets, host->resets);
+
+	return 0;
+
+resume_host1x:
+	host1x_setup_sid_table(host);
+	host1x_syncpt_restore(host);
+	host1x_intr_start(host);
+
+	return err;
+}
+
+static int __maybe_unused host1x_runtime_resume(struct device *dev)
+{
+	struct host1x *host = dev_get_drvdata(dev);
+	int err;
+
+	err = reset_control_bulk_acquire(host->nresets, host->resets);
+	if (err) {
+		dev_err(dev, "failed to acquire reset: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(host->clk);
+	if (err) {
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		goto release_reset;
+	}
+
+	err = reset_control_bulk_deassert(host->nresets, host->resets);
+	if (err < 0) {
+		dev_err(dev, "failed to deassert reset: %d\n", err);
+		goto disable_clk;
+	}
+
+	host1x_setup_sid_table(host);
+	host1x_syncpt_restore(host);
+	host1x_intr_start(host);
+
+	return 0;
+
+disable_clk:
+	clk_disable_unprepare(host->clk);
+release_reset:
+	reset_control_bulk_release(host->nresets, host->resets);
+
+	return err;
+}
+
+static const struct dev_pm_ops host1x_pm_ops = {
+	SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume,
+			   NULL)
+	/* TODO: add system suspend-resume once driver will be ready for that */
+};
+
 static struct platform_driver tegra_host1x_driver = {
 	.driver = {
 		.name = "tegra-host1x",
 		.of_match_table = host1x_of_match,
+		.pm = &host1x_pm_ops,
 	},
 	.probe = host1x_probe,
 	.remove = host1x_remove,
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 5b7fdea5..ca4b082f 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -118,7 +118,8 @@ struct host1x {
 	struct host1x_syncpt_base *bases;
 	struct device *dev;
 	struct clk *clk;
-	struct reset_control *rst;
+	struct reset_control_bulk_data resets[2];
+	unsigned int nresets;
 
 	struct iommu_group *group;
 	struct iommu_domain *domain;
diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index 2e0e4eae..f989f7fd 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -15,7 +15,7 @@
 #include "intr.h"
 #include "syncpt.h"
 
-DEFINE_SPINLOCK(lock);
+static DEFINE_SPINLOCK(lock);
 
 struct host1x_syncpt_fence {
 	struct dma_fence base;
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index ba556ba1..57a21bd3 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -159,6 +159,27 @@ static void host1x_channel_set_streamid(struct host1x_channel *channel)
 #endif
 }
 
+static void host1x_enable_gather_filter(struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+	struct host1x *host = dev_get_drvdata(ch->dev->parent);
+	u32 val;
+
+	if (!host->hv_regs)
+		return;
+
+	val = host1x_hypervisor_readl(
+		host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+	val |= BIT(ch->id % 32);
+	host1x_hypervisor_writel(
+		host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+#elif HOST1X_HW >= 4
+	host1x_ch_writel(ch,
+			 HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
+			 HOST1X_CHANNEL_CHANNELCTRL);
+#endif
+}
+
 static int channel_submit(struct host1x_job *job)
 {
 	struct host1x_channel *ch = job->channel;
@@ -190,6 +211,7 @@ static int channel_submit(struct host1x_job *job)
 	}
 
 	host1x_channel_set_streamid(ch);
+	host1x_enable_gather_filter(ch);
 
 	/* begin a CDMA submit */
 	err = host1x_cdma_begin(&ch->cdma, job);
@@ -251,27 +273,6 @@ error:
 	return err;
 }
 
-static void enable_gather_filter(struct host1x *host,
-				 struct host1x_channel *ch)
-{
-#if HOST1X_HW >= 6
-	u32 val;
-
-	if (!host->hv_regs)
-		return;
-
-	val = host1x_hypervisor_readl(
-		host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
-	val |= BIT(ch->id % 32);
-	host1x_hypervisor_writel(
-		host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
-#elif HOST1X_HW >= 4
-	host1x_ch_writel(ch,
-			 HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
-			 HOST1X_CHANNEL_CHANNELCTRL);
-#endif
-}
-
 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
 			       unsigned int index)
 {
@@ -280,7 +281,6 @@ static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
 #else
 	ch->regs = dev->regs + index * 0x100;
 #endif
-	enable_gather_filter(dev, ch);
 	return 0;
 }
 
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 251cd471..215802c4 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -89,6 +89,7 @@ struct host1x_client_ops {
  * @parent: pointer to parent structure
  * @usecount: reference count for this structure
  * @lock: mutex for mutually exclusive concurrency
+ * @cache: host1x buffer object cache
  */
 struct host1x_client {
 	struct list_head list;
@@ -143,7 +144,7 @@ struct host1x_bo_ops {
 	struct host1x_bo *(*get)(struct host1x_bo *bo);
 	void (*put)(struct host1x_bo *bo);
 	struct host1x_bo_mapping *(*pin)(struct device *dev, struct host1x_bo *bo,
-					 enum dma_data_direction direction);
+					 enum dma_data_direction dir);
 	void (*unpin)(struct host1x_bo_mapping *map);
 	void *(*mmap)(struct host1x_bo *bo);
 	void (*munmap)(struct host1x_bo *bo, void *addr);
@@ -235,6 +236,7 @@ struct host1x_job;
 
 struct host1x_channel *host1x_channel_request(struct host1x_client *client);
 struct host1x_channel *host1x_channel_get(struct host1x_channel *channel);
+void host1x_channel_stop(struct host1x_channel *channel);
 void host1x_channel_put(struct host1x_channel *channel);
 int host1x_job_submit(struct host1x_job *job);
 
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 45b6be92..965ba218 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -297,14 +297,11 @@ int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
 			 "host1x_sp_%02u", id);
 	}
 
-	host1x_intr_start(host);
-
 	return 0;
 }
 
 void host1x_intr_deinit(struct host1x *host)
 {
-	host1x_intr_stop(host);
 }
 
 void host1x_intr_start(struct host1x *host)
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 2754f9ac..dc652481 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -141,11 +141,9 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 	struct host1x_client *client = job->client;
 	struct device *dev = client->dev;
 	struct host1x_job_gather *g;
-	struct iommu_domain *domain;
 	unsigned int i;
 	int err;
 
-	domain = iommu_get_domain_for_dev(dev);
 	job->num_unpins = 0;
 
 	for (i = 0; i < job->num_relocs; i++) {
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index a89a4081..f87a8705 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -137,12 +137,21 @@ void host1x_syncpt_restore(struct host1x *host)
 	struct host1x_syncpt *sp_base = host->syncpt;
 	unsigned int i;
 
-	for (i = 0; i < host1x_syncpt_nb_pts(host); i++)
+	for (i = 0; i < host1x_syncpt_nb_pts(host); i++) {
+		/*
+		 * Unassign syncpt from channels for purposes of Tegra186
+		 * syncpoint protection. This prevents any channel from
+		 * accessing it until it is reassigned.
+		 */
+		host1x_hw_syncpt_assign_to_channel(host, sp_base + i, NULL);
 		host1x_hw_syncpt_restore(host, sp_base + i);
+	}
 
 	for (i = 0; i < host1x_syncpt_nb_bases(host); i++)
 		host1x_hw_syncpt_restore_wait_base(host, sp_base + i);
 
+	host1x_hw_syncpt_enable_protection(host);
+
 	wmb();
 }
 
@@ -335,13 +344,6 @@ int host1x_syncpt_init(struct host1x *host)
 	for (i = 0; i < host->info->nb_pts; i++) {
 		syncpt[i].id = i;
 		syncpt[i].host = host;
-
-		/*
-		 * Unassign syncpt from channels for purposes of Tegra186
-		 * syncpoint protection. This prevents any channel from
-		 * accessing it until it is reassigned.
-		 */
-		host1x_hw_syncpt_assign_to_channel(host, &syncpt[i], NULL);
 	}
 
 	for (i = 0; i < host->info->nb_bases; i++)
@@ -351,9 +353,6 @@ int host1x_syncpt_init(struct host1x *host)
 	host->syncpt = syncpt;
 	host->bases = bases;
 
-	host1x_syncpt_restore(host);
-	host1x_hw_syncpt_enable_protection(host);
-
 	/* Allocate sync point to use for clearing waits for expired fences */
 	host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop");
 	if (!host->nop_sp)

From 7b4206605df8d275dd967a4a4c1d19e7d9d54a48 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 12 Jan 2021 12:40:05 +0200
Subject: [PATCH 37/73] gpu: host1x: Add context device management code

Add code to register context devices from device tree, allocate them
out and manage their refcounts.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I1e50462fc07247faf4cb217f4ab75fb610b10024
---
 drivers/gpu/host1x/Makefile  |   3 +
 drivers/gpu/host1x/context.c | 160 +++++++++++++++++++++++++++++++++++
 drivers/gpu/host1x/context.h |  38 +++++++++
 drivers/gpu/host1x/dev.c     |  12 ++-
 drivers/gpu/host1x/dev.h     |   2 +
 5 files changed, 214 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/host1x/context.c
 create mode 100644 drivers/gpu/host1x/context.h

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 34344be2..8da51264 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -22,4 +22,7 @@ host1x-next-y = \
 	hw/host1x06.o \
 	hw/host1x07.o
 
+host1x-next-$(CONFIG_IOMMU_API) += \
+	context.o
+
 obj-m := host1x-next.o
diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c
new file mode 100644
index 00000000..b08cf11f
--- /dev/null
+++ b/drivers/gpu/host1x/context.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA Corporation.
+ */
+
+#include <linux/device.h>
+#include <linux/kref.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/pid.h>
+#include <linux/slab.h>
+
+#include "context.h"
+#include "dev.h"
+
+int host1x_memory_context_list_init(struct host1x *host1x)
+{
+	struct host1x_memory_context_list *cdl = &host1x->context_list;
+	struct device_node *node = host1x->dev->of_node;
+	struct host1x_memory_context *ctx;
+	unsigned int i;
+	int err;
+
+	cdl->devs = NULL;
+	cdl->len = 0;
+	mutex_init(&cdl->lock);
+
+	err = of_property_count_u32_elems(node, "iommu-map");
+	if (err < 0)
+		return 0;
+
+	cdl->devs = kcalloc(err, sizeof(*cdl->devs), GFP_KERNEL);
+	if (!cdl->devs)
+		return -ENOMEM;
+	cdl->len = err / 4;
+
+	for (i = 0; i < cdl->len; i++) {
+		struct iommu_fwspec *fwspec;
+
+		ctx = &cdl->devs[i];
+
+		ctx->host = host1x;
+
+		device_initialize(&ctx->dev);
+
+		/*
+		 * Due to an issue with T194 NVENC, only 38 bits can be used.
+		 * Anyway, 256GiB of IOVA ought to be enough for anyone.
+		 */
+		ctx->dma_mask = DMA_BIT_MASK(38);
+		ctx->dev.dma_mask = &ctx->dma_mask;
+		ctx->dev.coherent_dma_mask = ctx->dma_mask;
+		dev_set_name(&ctx->dev, "host1x-ctx.%d", i);
+		ctx->dev.bus = &host1x_context_device_bus_type;
+		ctx->dev.parent = host1x->dev;
+
+		dma_set_max_seg_size(&ctx->dev, UINT_MAX);
+
+		err = device_add(&ctx->dev);
+		if (err) {
+			dev_err(host1x->dev, "could not add context device %d: %d\n", i, err);
+			goto del_devices;
+		}
+
+		err = of_dma_configure_id(&ctx->dev, node, true, &i);
+		if (err) {
+			dev_err(host1x->dev, "IOMMU configuration failed for context device %d: %d\n",
+				i, err);
+			device_del(&ctx->dev);
+			goto del_devices;
+		}
+
+		fwspec = dev_iommu_fwspec_get(&ctx->dev);
+		if (!fwspec || !device_iommu_mapped(&ctx->dev)) {
+			dev_err(host1x->dev, "Context device %d has no IOMMU!\n", i);
+			device_del(&ctx->dev);
+			goto del_devices;
+		}
+
+		ctx->stream_id = fwspec->ids[0] & 0xffff;
+	}
+
+	return 0;
+
+del_devices:
+	while (i--)
+		device_del(&cdl->devs[i].dev);
+
+	kfree(cdl->devs);
+	cdl->len = 0;
+
+	return err;
+}
+
+void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl)
+{
+	unsigned int i;
+
+	for (i = 0; i < cdl->len; i++)
+		device_del(&cdl->devs[i].dev);
+
+	kfree(cdl->devs);
+	cdl->len = 0;
+}
+
+struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x,
+							  struct pid *pid)
+{
+	struct host1x_memory_context_list *cdl = &host1x->context_list;
+	struct host1x_memory_context *free = NULL;
+	int i;
+
+	if (!cdl->len)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	mutex_lock(&cdl->lock);
+
+	for (i = 0; i < cdl->len; i++) {
+		struct host1x_memory_context *cd = &cdl->devs[i];
+
+		if (cd->owner == pid) {
+			refcount_inc(&cd->ref);
+			mutex_unlock(&cdl->lock);
+			return cd;
+		} else if (!cd->owner && !free) {
+			free = cd;
+		}
+	}
+
+	if (!free) {
+		mutex_unlock(&cdl->lock);
+		return ERR_PTR(-EBUSY);
+	}
+
+	refcount_set(&free->ref, 1);
+	free->owner = get_pid(pid);
+
+	mutex_unlock(&cdl->lock);
+
+	return free;
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_alloc);
+
+void host1x_memory_context_get(struct host1x_memory_context *cd)
+{
+	refcount_inc(&cd->ref);
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_get);
+
+void host1x_memory_context_put(struct host1x_memory_context *cd)
+{
+	struct host1x_memory_context_list *cdl = &cd->host->context_list;
+
+	if (refcount_dec_and_mutex_lock(&cd->ref, &cdl->lock)) {
+		put_pid(cd->owner);
+		cd->owner = NULL;
+		mutex_unlock(&cdl->lock);
+	}
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_put);
diff --git a/drivers/gpu/host1x/context.h b/drivers/gpu/host1x/context.h
new file mode 100644
index 00000000..3e03bc1d
--- /dev/null
+++ b/drivers/gpu/host1x/context.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x context devices
+ *
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_CONTEXT_H
+#define __HOST1X_CONTEXT_H
+
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+
+struct host1x;
+
+extern struct bus_type host1x_context_device_bus_type;
+
+struct host1x_memory_context_list {
+	struct mutex lock;
+	struct host1x_memory_context *devs;
+	unsigned int len;
+};
+
+#ifdef CONFIG_IOMMU_API
+int host1x_memory_context_list_init(struct host1x *host1x);
+void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl);
+#else
+static inline int host1x_memory_context_list_init(struct host1x *host1x)
+{
+	return 0;
+}
+
+static inline void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 8faaf31d..367897a4 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -29,6 +29,7 @@
 
 #include "bus.h"
 #include "channel.h"
+#include "context.h"
 #include "debug.h"
 #include "dev.h"
 #include "intr.h"
@@ -546,10 +547,16 @@ static int host1x_probe(struct platform_device *pdev)
 		goto iommu_exit;
 	}
 
+	err = host1x_memory_context_list_init(host);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize context list\n");
+		goto free_channels;
+	}
+
 	err = host1x_syncpt_init(host);
 	if (err) {
 		dev_err(&pdev->dev, "failed to initialize syncpts\n");
-		goto free_channels;
+		goto free_contexts;
 	}
 
 	err = host1x_intr_init(host, syncpt_irq);
@@ -595,6 +602,8 @@ pm_disable:
 	host1x_intr_deinit(host);
 deinit_syncpt:
 	host1x_syncpt_deinit(host);
+free_contexts:
+	host1x_memory_context_list_free(&host->context_list);
 free_channels:
 	host1x_channel_list_free(&host->channel_list);
 iommu_exit:
@@ -616,6 +625,7 @@ static int host1x_remove(struct platform_device *pdev)
 
 	host1x_intr_deinit(host);
 	host1x_syncpt_deinit(host);
+	host1x_memory_context_list_free(&host->context_list);
 	host1x_channel_list_free(&host->channel_list);
 	host1x_iommu_exit(host);
 	host1x_bo_cache_destroy(&host->cache);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index ca4b082f..7552a455 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -14,6 +14,7 @@
 
 #include "cdma.h"
 #include "channel.h"
+#include "context.h"
 #include "intr.h"
 #include "job.h"
 #include "syncpt.h"
@@ -141,6 +142,7 @@ struct host1x {
 	struct mutex syncpt_mutex;
 
 	struct host1x_channel_list channel_list;
+	struct host1x_memory_context_list context_list;
 
 	struct dentry *debugfs;
 

From 933d4c99000ddc23167ed2d61e08227f767b814f Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 12 Jan 2021 12:40:05 +0200
Subject: [PATCH 38/73] gpu: host1x: Add context device management code

Add code to register context devices from device tree, allocate them
out and manage their refcounts.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I5a48ef57c8e9c8bab199dbc2d449535251c7f198
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2745961
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Brad Griffis <bgriffis@nvidia.com>
---
 .../gpu/host1x/include/linux/host1x-next.h    | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 215802c4..6e943851 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -453,4 +453,38 @@ int tegra_mipi_disable(struct tegra_mipi_device *device);
 int tegra_mipi_start_calibration(struct tegra_mipi_device *device);
 int tegra_mipi_finish_calibration(struct tegra_mipi_device *device);
 
+/* host1x memory contexts */
+
+struct host1x_memory_context {
+	struct host1x *host;
+
+	refcount_t ref;
+	struct pid *owner;
+
+	struct device dev;
+	u64 dma_mask;
+	u32 stream_id;
+};
+
+#ifdef CONFIG_IOMMU_API
+struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x,
+							  struct pid *pid);
+void host1x_memory_context_get(struct host1x_memory_context *cd);
+void host1x_memory_context_put(struct host1x_memory_context *cd);
+#else
+static inline struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x,
+									struct pid *pid)
+{
+	return NULL;
+}
+
+static inline void host1x_memory_context_get(struct host1x_memory_context *cd)
+{
+}
+
+static inline void host1x_memory_context_put(struct host1x_memory_context *cd)
+{
+}
+#endif
+
 #endif

From 9e4475c80042db4c05b5b2bf6f401158f580957b Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 12 Jan 2021 12:41:22 +0200
Subject: [PATCH 39/73] gpu: host1x: Program context stream ID on submission

Add code to do stream ID switching at the beginning of a job. The
stream ID is switched to the stream ID specified by the context
passed in the job structure.

Before switching the stream ID, an OP_DONE wait is done on the
channel's engine to ensure that there is no residual ongoing
work that might do DMA using the new stream ID.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I7705c33417e27d99bb265db13eb3c19960f5a363
---
 drivers/gpu/host1x/hw/channel_hw.c            | 52 +++++++++++++++++--
 drivers/gpu/host1x/hw/host1x06_hardware.h     | 10 ++++
 drivers/gpu/host1x/hw/host1x07_hardware.h     | 10 ++++
 .../gpu/host1x/include/linux/host1x-next.h    |  8 +++
 4 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 57a21bd3..4e058e4c 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -180,6 +180,45 @@ static void host1x_enable_gather_filter(struct host1x_channel *ch)
 #endif
 }
 
+static void host1x_channel_program_engine_streamid(struct host1x_job *job)
+{
+#if HOST1X_HW >= 6
+	u32 fence;
+
+	if (!job->memory_context)
+		return;
+
+	fence = host1x_syncpt_incr_max(job->syncpt, 1);
+
+	/* First, increment a syncpoint on OP_DONE condition.. */
+
+	host1x_cdma_push(&job->channel->cdma,
+		host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
+		HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
+			HOST1X_UCLASS_INCR_SYNCPT_COND_F(1));
+
+	/* Wait for syncpoint to increment */
+
+	host1x_cdma_push(&job->channel->cdma,
+		host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+			host1x_uclass_wait_syncpt_r(), 1),
+		host1x_class_host_wait_syncpt(job->syncpt->id, fence));
+
+	/*
+	 * Now that we know the engine is idle, return to class and
+	 * change stream ID.
+	 */
+
+	host1x_cdma_push(&job->channel->cdma,
+		host1x_opcode_setclass(job->class, 0, 0),
+		HOST1X_OPCODE_NOP);
+
+	host1x_cdma_push(&job->channel->cdma,
+		host1x_opcode_setpayload(job->memory_context->stream_id),
+		host1x_opcode_setstreamid(job->engine_streamid_offset / 4));
+#endif
+}
+
 static int channel_submit(struct host1x_job *job)
 {
 	struct host1x_channel *ch = job->channel;
@@ -236,20 +275,25 @@ static int channel_submit(struct host1x_job *job)
 	if (sp->base)
 		synchronize_syncpt_base(job);
 
-	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
-
 	host1x_hw_syncpt_assign_to_channel(host, sp, ch);
 	if (job->secondary_syncpt)
 		host1x_hw_syncpt_assign_to_channel(host, job->secondary_syncpt, ch);
 
-	job->syncpt_end = syncval;
-
 	/* add a setclass for modules that require it */
 	if (job->class)
 		host1x_cdma_push(&ch->cdma,
 				 host1x_opcode_setclass(job->class, 0, 0),
 				 HOST1X_OPCODE_NOP);
 
+	/*
+	 * Ensure engine DMA is idle and set new stream ID. May increment
+	 * syncpt max.
+	 */
+	host1x_channel_program_engine_streamid(job);
+
+	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
+	job->syncpt_end = syncval;
+
 	submit_gathers(job, syncval - user_syncpt_incrs);
 
 	/* end CDMA submit & stash pinned hMems into sync queue */
diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h
index 01a142a0..5d515745 100644
--- a/drivers/gpu/host1x/hw/host1x06_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x06_hardware.h
@@ -127,6 +127,16 @@ static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
 	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
 }
 
+static inline u32 host1x_opcode_setstreamid(unsigned streamid)
+{
+	return (7 << 28) | streamid;
+}
+
+static inline u32 host1x_opcode_setpayload(unsigned payload)
+{
+	return (9 << 28) | payload;
+}
+
 static inline u32 host1x_opcode_gather_wide(unsigned count)
 {
 	return (12 << 28) | count;
diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h
index e6582172..82c0cc9b 100644
--- a/drivers/gpu/host1x/hw/host1x07_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x07_hardware.h
@@ -127,6 +127,16 @@ static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
 	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
 }
 
+static inline u32 host1x_opcode_setstreamid(unsigned streamid)
+{
+	return (7 << 28) | streamid;
+}
+
+static inline u32 host1x_opcode_setpayload(unsigned payload)
+{
+	return (9 << 28) | payload;
+}
+
 static inline u32 host1x_opcode_gather_wide(unsigned count)
 {
 	return (12 << 28) | count;
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 6e943851..1538cda5 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -334,6 +334,14 @@ struct host1x_job {
 
 	/* Whether host1x-side firewall should be ran for this job or not */
 	bool enable_firewall;
+
+	/* Options for configuring engine data stream ID */
+	/* Context device to use for job */
+	struct host1x_memory_context *memory_context;
+	/* Stream ID to use if context isolation is disabled (!memory_context) */
+	u32 engine_fallback_streamid;
+	/* Engine offset to program stream ID to */
+	u32 engine_streamid_offset;
 };
 
 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,

From 80828001d55176e82741430964d17c3700e807aa Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Fri, 8 Apr 2022 11:35:31 +0300
Subject: [PATCH 40/73] gpu: host1x: Deduplicate hardware headers

Host1x class information and opcodes are unchanged or backwards
compatible across SoCs so let's not duplicate them for each one
but have them in a shared header file.

At the same time, add opcode functions for acquire/release_mlock.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I06e28af3822b11684aef52e6d0776700beff1c69
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2745950
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Brad Griffis <bgriffis@nvidia.com>
---
 drivers/gpu/host1x/hw/host1x01_hardware.h | 114 +---------------
 drivers/gpu/host1x/hw/host1x02_hardware.h | 113 +---------------
 drivers/gpu/host1x/hw/host1x04_hardware.h | 113 +---------------
 drivers/gpu/host1x/hw/host1x05_hardware.h | 113 +---------------
 drivers/gpu/host1x/hw/host1x06_hardware.h | 128 +-----------------
 drivers/gpu/host1x/hw/host1x07_hardware.h | 128 +-----------------
 drivers/gpu/host1x/hw/opcodes.h           | 150 ++++++++++++++++++++++
 7 files changed, 156 insertions(+), 703 deletions(-)
 create mode 100644 drivers/gpu/host1x/hw/opcodes.h

diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h b/drivers/gpu/host1x/hw/host1x01_hardware.h
index fe59df1d..cb93d7c1 100644
--- a/drivers/gpu/host1x/hw/host1x01_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x01_hardware.h
@@ -15,118 +15,6 @@
 #include "hw_host1x01_sync.h"
 #include "hw_host1x01_uclass.h"
 
-static inline u32 host1x_class_host_wait_syncpt(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_wait_syncpt_indx_f(indx)
-		| host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
-		| host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
-	unsigned indx, unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_wait_syncpt_base_indx_f(indx)
-		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
-	unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
-	unsigned cond, unsigned indx)
-{
-	return host1x_uclass_incr_syncpt_cond_f(cond)
-		| host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indbe_f(0xf)
-		| host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset);
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset)
-		| host1x_uclass_indoff_rwn_read_v();
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
-	unsigned class_id, unsigned offset, unsigned mask)
-{
-	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
-	return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
-	return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
-	return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
-	return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
-	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
-		host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
-	return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
-	return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
 
 #endif
diff --git a/drivers/gpu/host1x/hw/host1x02_hardware.h b/drivers/gpu/host1x/hw/host1x02_hardware.h
index af60d7fb..2d1282b9 100644
--- a/drivers/gpu/host1x/hw/host1x02_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x02_hardware.h
@@ -15,117 +15,6 @@
 #include "hw_host1x02_sync.h"
 #include "hw_host1x02_uclass.h"
 
-static inline u32 host1x_class_host_wait_syncpt(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_wait_syncpt_indx_f(indx)
-		| host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
-		| host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
-	unsigned indx, unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_wait_syncpt_base_indx_f(indx)
-		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
-	unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
-	unsigned cond, unsigned indx)
-{
-	return host1x_uclass_incr_syncpt_cond_f(cond)
-		| host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indbe_f(0xf)
-		| host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset);
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset)
-		| host1x_uclass_indoff_rwn_read_v();
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
-	unsigned class_id, unsigned offset, unsigned mask)
-{
-	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
-	return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
-	return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
-	return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
-	return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
-	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
-		host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
-	return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
-	return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
 
 #endif
diff --git a/drivers/gpu/host1x/hw/host1x04_hardware.h b/drivers/gpu/host1x/hw/host1x04_hardware.h
index 4f9bcddf..84d244e8 100644
--- a/drivers/gpu/host1x/hw/host1x04_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x04_hardware.h
@@ -15,117 +15,6 @@
 #include "hw_host1x04_sync.h"
 #include "hw_host1x04_uclass.h"
 
-static inline u32 host1x_class_host_wait_syncpt(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_wait_syncpt_indx_f(indx)
-		| host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
-		| host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
-	unsigned indx, unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_wait_syncpt_base_indx_f(indx)
-		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
-	unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
-	unsigned cond, unsigned indx)
-{
-	return host1x_uclass_incr_syncpt_cond_f(cond)
-		| host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indbe_f(0xf)
-		| host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset);
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset)
-		| host1x_uclass_indoff_rwn_read_v();
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
-	unsigned class_id, unsigned offset, unsigned mask)
-{
-	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
-	return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
-	return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
-	return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
-	return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
-	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
-		host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
-	return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
-	return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
 
 #endif
diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h
index af3ab4b7..1dcde6ec 100644
--- a/drivers/gpu/host1x/hw/host1x05_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x05_hardware.h
@@ -15,117 +15,6 @@
 #include "hw_host1x05_sync.h"
 #include "hw_host1x05_uclass.h"
 
-static inline u32 host1x_class_host_wait_syncpt(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_wait_syncpt_indx_f(indx)
-		| host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
-		| host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
-	unsigned indx, unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_wait_syncpt_base_indx_f(indx)
-		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
-	unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
-	unsigned cond, unsigned indx)
-{
-	return host1x_uclass_incr_syncpt_cond_f(cond)
-		| host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indbe_f(0xf)
-		| host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset);
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset)
-		| host1x_uclass_indoff_rwn_read_v();
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
-	unsigned class_id, unsigned offset, unsigned mask)
-{
-	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
-	return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
-	return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
-	return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
-	return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
-	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
-		host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
-	return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
-	return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
 
 #endif
diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h
index 5d515745..c05cfa7e 100644
--- a/drivers/gpu/host1x/hw/host1x06_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x06_hardware.h
@@ -16,132 +16,6 @@
 #include "hw_host1x06_vm.h"
 #include "hw_host1x06_hypervisor.h"
 
-static inline u32 host1x_class_host_wait_syncpt(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_wait_syncpt_indx_f(indx)
-		| host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
-		| host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
-	unsigned indx, unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_wait_syncpt_base_indx_f(indx)
-		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
-	unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
-	unsigned cond, unsigned indx)
-{
-	return host1x_uclass_incr_syncpt_cond_f(cond)
-		| host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indbe_f(0xf)
-		| host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset);
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset)
-		| host1x_uclass_indoff_rwn_read_v();
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
-	unsigned class_id, unsigned offset, unsigned mask)
-{
-	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
-	return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
-	return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
-	return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
-	return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
-	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
-		host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
-	return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
-	return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-static inline u32 host1x_opcode_setstreamid(unsigned streamid)
-{
-	return (7 << 28) | streamid;
-}
-
-static inline u32 host1x_opcode_setpayload(unsigned payload)
-{
-	return (9 << 28) | payload;
-}
-
-static inline u32 host1x_opcode_gather_wide(unsigned count)
-{
-	return (12 << 28) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
 
 #endif
diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h
index 82c0cc9b..d67364e0 100644
--- a/drivers/gpu/host1x/hw/host1x07_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x07_hardware.h
@@ -16,132 +16,6 @@
 #include "hw_host1x07_vm.h"
 #include "hw_host1x07_hypervisor.h"
 
-static inline u32 host1x_class_host_wait_syncpt(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_wait_syncpt_indx_f(indx)
-		| host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
-		| host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
-	unsigned indx, unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_wait_syncpt_base_indx_f(indx)
-		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
-	unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
-	unsigned cond, unsigned indx)
-{
-	return host1x_uclass_incr_syncpt_cond_f(cond)
-		| host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indbe_f(0xf)
-		| host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset);
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset)
-		| host1x_uclass_indoff_rwn_read_v();
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
-	unsigned class_id, unsigned offset, unsigned mask)
-{
-	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
-	return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
-	return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
-	return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
-	return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
-	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
-		host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
-	return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
-	return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-static inline u32 host1x_opcode_setstreamid(unsigned streamid)
-{
-	return (7 << 28) | streamid;
-}
-
-static inline u32 host1x_opcode_setpayload(unsigned payload)
-{
-	return (9 << 28) | payload;
-}
-
-static inline u32 host1x_opcode_gather_wide(unsigned count)
-{
-	return (12 << 28) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
 
 #endif
diff --git a/drivers/gpu/host1x/hw/opcodes.h b/drivers/gpu/host1x/hw/opcodes.h
new file mode 100644
index 00000000..64961449
--- /dev/null
+++ b/drivers/gpu/host1x/hw/opcodes.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x opcodes
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_OPCODES_H
+#define __HOST1X_OPCODES_H
+
+#include <linux/types.h>
+
+static inline u32 host1x_class_host_wait_syncpt(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx)
+		| host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+		| host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+	unsigned indx, unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx)
+		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+	unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+	unsigned cond, unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond)
+		| host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indbe_f(0xf)
+		| host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset);
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset)
+		| host1x_uclass_indoff_rwn_read_v();
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_opcode_setclass(
+	unsigned class_id, unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+		host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+	return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+	return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+static inline u32 host1x_opcode_setstreamid(unsigned streamid)
+{
+	return (7 << 28) | streamid;
+}
+
+static inline u32 host1x_opcode_setpayload(unsigned payload)
+{
+	return (9 << 28) | payload;
+}
+
+static inline u32 host1x_opcode_gather_wide(unsigned count)
+{
+	return (12 << 28) | count;
+}
+
+static inline u32 host1x_opcode_acquire_mlock(unsigned mlock)
+{
+	return (14 << 28) | (0 << 24) | mlock;
+}
+
+static inline u32 host1x_opcode_release_mlock(unsigned mlock)
+{
+	return (14 << 28) | (1 << 24) | mlock;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif

From d9c7b1421de6dc2f258b2f83532cd3cf1a7d12a9 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Sun, 15 May 2022 12:30:19 +0300
Subject: [PATCH 41/73] gpu: host1x: Simplify register mapping and add common
 aperture

Refactor 'regs' property loading using devm_platform_ioremap_*
and add loading of the 'common' region found on Tegra234.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I4652e4225ca72d1e61b4bdae122e5765eaa2455b
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2745951
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Brad Griffis <bgriffis@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 46 +++++++++++++++++-----------------------
 drivers/gpu/host1x/dev.h |  3 +++
 2 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 367897a4..5ab40b5d 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -41,6 +41,11 @@
 #include "hw/host1x06.h"
 #include "hw/host1x07.h"
 
+void host1x_common_writel(struct host1x *host1x, u32 v, u32 r)
+{
+	writel(v, host1x->common_regs + r);
+}
+
 void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r)
 {
 	writel(v, host1x->hv_regs + r);
@@ -455,7 +460,6 @@ static int host1x_get_resets(struct host1x *host)
 static int host1x_probe(struct platform_device *pdev)
 {
 	struct host1x *host;
-	struct resource *regs, *hv_regs = NULL;
 	int syncpt_irq;
 	int err;
 
@@ -466,25 +470,23 @@ static int host1x_probe(struct platform_device *pdev)
 	host->info = of_device_get_match_data(&pdev->dev);
 
 	if (host->info->has_hypervisor) {
-		regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm");
-		if (!regs) {
-			dev_err(&pdev->dev, "failed to get vm registers\n");
-			return -ENXIO;
-		}
+		host->regs = devm_platform_ioremap_resource_byname(pdev, "vm");
+		if (IS_ERR(host->regs))
+			return PTR_ERR(host->regs);
 
-		hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-						       "hypervisor");
-		if (!hv_regs) {
-			dev_err(&pdev->dev,
-				"failed to get hypervisor registers\n");
-			return -ENXIO;
+		host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor");
+		if (IS_ERR(host->hv_regs))
+			return PTR_ERR(host->hv_regs);
+
+		if (host->info->has_common) {
+			host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common");
+			if (IS_ERR(host->common_regs))
+				return PTR_ERR(host->common_regs);
 		}
 	} else {
-		regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-		if (!regs) {
-			dev_err(&pdev->dev, "failed to get registers\n");
-			return -ENXIO;
-		}
+		host->regs = devm_platform_ioremap_resource(pdev, 0);
+		if (IS_ERR(host->regs))
+			return PTR_ERR(host->regs);
 	}
 
 	syncpt_irq = platform_get_irq(pdev, 0);
@@ -499,16 +501,6 @@ static int host1x_probe(struct platform_device *pdev)
 	/* set common host1x device data */
 	platform_set_drvdata(pdev, host);
 
-	host->regs = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(host->regs))
-		return PTR_ERR(host->regs);
-
-	if (host->info->has_hypervisor) {
-		host->hv_regs = devm_ioremap_resource(&pdev->dev, hv_regs);
-		if (IS_ERR(host->hv_regs))
-			return PTR_ERR(host->hv_regs);
-	}
-
 	host->dev->dma_parms = &host->dma_parms;
 	dma_set_max_seg_size(host->dev, UINT_MAX);
 
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 7552a455..85edcc6e 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -100,6 +100,7 @@ struct host1x_info {
 	u64 dma_mask; /* mask of addressable memory */
 	bool has_wide_gather; /* supports GATHER_W opcode */
 	bool has_hypervisor; /* has hypervisor registers */
+	bool has_common; /* has common registers separate from hypervisor */
 	unsigned int num_sid_entries;
 	const struct host1x_sid_entry *sid_table;
 	/*
@@ -115,6 +116,7 @@ struct host1x {
 
 	void __iomem *regs;
 	void __iomem *hv_regs; /* hypervisor region */
+	void __iomem *common_regs;
 	struct host1x_syncpt *syncpt;
 	struct host1x_syncpt_base *bases;
 	struct device *dev;
@@ -156,6 +158,7 @@ struct host1x {
 	struct host1x_bo_cache cache;
 };
 
+void host1x_common_writel(struct host1x *host1x, u32 v, u32 r);
 void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);
 u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r);
 void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v);

From d801058ffcd6c769c07f66c908d692053d764389 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 8 Mar 2022 12:16:16 +0200
Subject: [PATCH 42/73] gpu: host1x: Program virtualization tables

Program virtualization tables specifying which VMs have access to which
Host1x hardware resources. Programming these has become mandatory in
Tegra234.

For now, since the driver does not operate as a Host1x hypervisor, we
basically allow access to everything to everyone.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I67e09e7d9c55128c40c54c078912cc38c7616eac
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2745952
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Brad Griffis <bgriffis@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 21 ++++++++++++++++++---
 drivers/gpu/host1x/dev.h |  8 ++++++++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 5ab40b5d..e4644585 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -260,7 +260,7 @@ static const struct of_device_id host1x_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, host1x_of_match);
 
-static void host1x_setup_sid_table(struct host1x *host)
+static void host1x_setup_virtualization_tables(struct host1x *host)
 {
 	const struct host1x_info *info = host->info;
 	unsigned int i;
@@ -274,6 +274,21 @@ static void host1x_setup_sid_table(struct host1x *host)
 		host1x_hypervisor_writel(host, entry->offset, entry->base);
 		host1x_hypervisor_writel(host, entry->limit, entry->base + 4);
 	}
+
+	for (i = 0; i < info->streamid_vm_table.count; i++) {
+		/* Allow access to all stream IDs to all VMs. */
+		host1x_hypervisor_writel(host, 0xff, info->streamid_vm_table.base + 4 * i);
+	}
+
+	for (i = 0; i < info->classid_vm_table.count; i++) {
+		/* Allow access to all classes to all VMs. */
+		host1x_hypervisor_writel(host, 0xff, info->classid_vm_table.base + 4 * i);
+	}
+
+	for (i = 0; i < info->mmio_vm_table.count; i++) {
+		/* Use VM1 (that's us) as originator VMID for engine MMIO accesses. */
+		host1x_hypervisor_writel(host, 0x1, info->mmio_vm_table.base + 4 * i);
+	}
 }
 
 static bool host1x_wants_iommu(struct host1x *host1x)
@@ -647,7 +662,7 @@ static int __maybe_unused host1x_runtime_suspend(struct device *dev)
 	return 0;
 
 resume_host1x:
-	host1x_setup_sid_table(host);
+	host1x_setup_virtualization_tables(host);
 	host1x_syncpt_restore(host);
 	host1x_intr_start(host);
 
@@ -677,7 +692,7 @@ static int __maybe_unused host1x_runtime_resume(struct device *dev)
 		goto disable_clk;
 	}
 
-	host1x_setup_sid_table(host);
+	host1x_setup_virtualization_tables(host);
 	host1x_syncpt_restore(host);
 	host1x_intr_start(host);
 
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 85edcc6e..920e5548 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -90,6 +90,11 @@ struct host1x_sid_entry {
 	unsigned int limit;
 };
 
+struct host1x_table_desc {
+	unsigned int base;
+	unsigned int count;
+};
+
 struct host1x_info {
 	unsigned int nb_channels; /* host1x: number of channels supported */
 	unsigned int nb_pts; /* host1x: number of syncpoints supported */
@@ -103,6 +108,9 @@ struct host1x_info {
 	bool has_common; /* has common registers separate from hypervisor */
 	unsigned int num_sid_entries;
 	const struct host1x_sid_entry *sid_table;
+	struct host1x_table_desc streamid_vm_table;
+	struct host1x_table_desc classid_vm_table;
+	struct host1x_table_desc mmio_vm_table;
 	/*
 	 * On T20-T148, the boot chain may setup DC to increment syncpoints
 	 * 26/27 on VBLANK. As such we cannot use these syncpoints until

From 3ce86b27400ce5228a1b3c6b4929b0b958dab774 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 8 Mar 2022 12:17:07 +0200
Subject: [PATCH 43/73] gpu: host1x: Allow reset to be missing

Host1x on Tegra234 does not have a software-controllable reset line.
As such, don't bail out if we don't find one in the device tree.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I583492a11c6f6ce358f59728983ba31a06b63d2f
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2745953
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Brad Griffis <bgriffis@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index e4644585..e4dcfa1a 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -466,9 +466,6 @@ static int host1x_get_resets(struct host1x *host)
 		return err;
 	}
 
-	if (WARN_ON(!host->resets[1].rstc))
-		return -ENOENT;
-
 	return 0;
 }
 

From 7771d25b9c095f90f311bc4ae4e4e392a6cc94b9 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 8 Mar 2022 12:17:42 +0200
Subject: [PATCH 44/73] gpu: host1x: Program interrupt destinations on Tegra234

On Tegra234, each Host1x VM has 8 interrupt lines. Each syncpoint
can be configured with which interrupt line should be used for
threshold interrupt, allowing for load balancing.

For now, to keep backwards compatibility, just set all syncpoints
to the first interrupt.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I7058f9bd38bc59db83ee92613e4e733813db7a46
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2745954
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Brad Griffis <bgriffis@nvidia.com>
---
 drivers/gpu/host1x/hw/intr_hw.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index f56375ee..9acccdb1 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -76,6 +76,17 @@ static void intr_hw_init(struct host1x *host, u32 cpm)
 	/* update host clocks per usec */
 	host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK);
 #endif
+#if HOST1X_HW >= 8
+	u32 id;
+
+	/*
+	 * Program threshold interrupt destination among 8 lines per VM,
+	 * per syncpoint. For now, just direct all to the first interrupt
+	 * line.
+	 */
+	for (id = 0; id < host->info->nb_pts; id++)
+		host1x_sync_writel(host, 0, HOST1X_SYNC_SYNCPT_INTR_DEST(id));
+#endif
 }
 
 static int

From 994282e7921e00be6a0f5205af339ac1fe67cd90 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 8 Mar 2022 12:15:34 +0200
Subject: [PATCH 45/73] gpu: host1x: Tegra234 device data and headers

Add device data and chip headers for Tegra234.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: Ide1c653abed06fb880b8222cb0a00d65a4886e27
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2745955
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Brad Griffis <bgriffis@nvidia.com>
---
 drivers/gpu/host1x/Makefile                   |   3 +-
 drivers/gpu/host1x/dev.c                      |  42 ++++
 drivers/gpu/host1x/hw/host1x08.c              |  33 ++++
 drivers/gpu/host1x/hw/host1x08.h              |  15 ++
 drivers/gpu/host1x/hw/host1x08_hardware.h     |  21 ++
 drivers/gpu/host1x/hw/hw_host1x08_channel.h   |  11 ++
 drivers/gpu/host1x/hw/hw_host1x08_common.h    |   4 +
 .../gpu/host1x/hw/hw_host1x08_hypervisor.h    |   9 +
 drivers/gpu/host1x/hw/hw_host1x08_uclass.h    | 181 ++++++++++++++++++
 drivers/gpu/host1x/hw/hw_host1x08_vm.h        |  36 ++++
 10 files changed, 354 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/host1x/hw/host1x08.c
 create mode 100644 drivers/gpu/host1x/hw/host1x08.h
 create mode 100644 drivers/gpu/host1x/hw/host1x08_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_channel.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_common.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_vm.h

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 8da51264..b5e7e933 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -20,7 +20,8 @@ host1x-next-y = \
 	hw/host1x04.o \
 	hw/host1x05.o \
 	hw/host1x06.o \
-	hw/host1x07.o
+	hw/host1x07.o \
+	hw/host1x08.o
 
 host1x-next-$(CONFIG_IOMMU_API) += \
 	context.o
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index e4dcfa1a..b2e8dc77 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -40,6 +40,7 @@
 #include "hw/host1x05.h"
 #include "hw/host1x06.h"
 #include "hw/host1x07.h"
+#include "hw/host1x08.h"
 
 void host1x_common_writel(struct host1x *host1x, u32 v, u32 r)
 {
@@ -248,7 +249,48 @@ static const struct host1x_info host1x07_info = {
 	.reserve_vblank_syncpts = false,
 };
 
+/*
+ * Tegra234 has two stream ID protection tables, one for setting stream IDs
+ * through the channel path via SETSTREAMID, and one for setting them via
+ * MMIO. We program each engine's data stream ID in the channel path table
+ * and firmware stream ID in the MMIO path table.
+ */
+static const struct host1x_sid_entry tegra234_sid_table[] = {
+	{
+		/* VIC channel */
+		.base = 0x17b8,
+		.offset = 0x30,
+		.limit = 0x30
+	},
+	{
+		/* VIC MMIO */
+		.base = 0x1688,
+		.offset = 0x34,
+		.limit = 0x34
+	},
+};
+
+static const struct host1x_info host1x08_info = {
+	.nb_channels = 63,
+	.nb_pts = 1024,
+	.nb_mlocks = 24,
+	.nb_bases = 0,
+	.init = host1x08_init,
+	.sync_offset = 0x0,
+	.dma_mask = DMA_BIT_MASK(40),
+	.has_wide_gather = true,
+	.has_hypervisor = true,
+	.has_common = true,
+	.num_sid_entries = ARRAY_SIZE(tegra234_sid_table),
+	.sid_table = tegra234_sid_table,
+	.streamid_vm_table = { 0x1004, 128 },
+	.classid_vm_table = { 0x1404, 25 },
+	.mmio_vm_table = { 0x1504, 25 },
+	.reserve_vblank_syncpts = false,
+};
+
 static const struct of_device_id host1x_of_match[] = {
+	{ .compatible = "nvidia,tegra234-host1x", .data = &host1x08_info, },
 	{ .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, },
 	{ .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, },
 	{ .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, },
diff --git a/drivers/gpu/host1x/hw/host1x08.c b/drivers/gpu/host1x/hw/host1x08.c
new file mode 100644
index 00000000..754890c3
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra234 SoCs
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x08.h"
+#include "host1x08_hardware.h"
+
+/* include code */
+#define HOST1X_HW 8
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x08_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x08.h b/drivers/gpu/host1x/hw/host1x08.h
new file mode 100644
index 00000000..a6bad56e
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra234 SoCs
+ *
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X08_H
+#define HOST1X_HOST1X08_H
+
+struct host1x;
+
+int host1x08_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x08_hardware.h b/drivers/gpu/host1x/hw/host1x08_hardware.h
new file mode 100644
index 00000000..93624306
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08_hardware.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra234
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X08_HARDWARE_H
+#define __HOST1X_HOST1X08_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x08_uclass.h"
+#include "hw_host1x08_vm.h"
+#include "hw_host1x08_hypervisor.h"
+#include "hw_host1x08_common.h"
+
+#include "opcodes.h"
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_channel.h b/drivers/gpu/host1x/hw/hw_host1x08_channel.h
new file mode 100644
index 00000000..c9272d2a
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_channel.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HW_HOST1X08_CHANNEL_H
+#define HOST1X_HW_HOST1X08_CHANNEL_H
+
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x084
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_common.h b/drivers/gpu/host1x/hw/hw_host1x08_common.h
new file mode 100644
index 00000000..4df28440
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_common.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
new file mode 100644
index 00000000..22964324
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_HV_SYNCPT_PROT_EN			0x1724
+#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN			BIT(1)
+#define HOST1X_HV_CH_MLOCK_EN(x)			(0x1700 + (x * 4))
+#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x)		(0x1710 + (x * 4))
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
new file mode 100644
index 00000000..724cccd7
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X08_UCLASS_H
+#define HOST1X_HW_HOST1X08_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 10;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_vm.h b/drivers/gpu/host1x/hw/hw_host1x08_vm.h
new file mode 100644
index 00000000..1455a467
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_vm.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_CHANNEL_DMASTART				0x0000
+#define HOST1X_CHANNEL_DMASTART_HI			0x0004
+#define HOST1X_CHANNEL_DMAPUT				0x0008
+#define HOST1X_CHANNEL_DMAPUT_HI			0x000c
+#define HOST1X_CHANNEL_DMAGET				0x0010
+#define HOST1X_CHANNEL_DMAGET_HI			0x0014
+#define HOST1X_CHANNEL_DMAEND				0x0018
+#define HOST1X_CHANNEL_DMAEND_HI			0x001c
+#define HOST1X_CHANNEL_DMACTRL				0x0020
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP			BIT(0)
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST		BIT(1)
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET		BIT(2)
+#define HOST1X_CHANNEL_CMDFIFO_STAT			0x0024
+#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY		BIT(13)
+#define HOST1X_CHANNEL_CMDFIFO_RDATA			0x0028
+#define HOST1X_CHANNEL_CMDP_OFFSET			0x0030
+#define HOST1X_CHANNEL_CMDP_CLASS			0x0034
+#define HOST1X_CHANNEL_CHANNELSTAT			0x0038
+#define HOST1X_CHANNEL_CMDPROC_STOP			0x0048
+#define HOST1X_CHANNEL_TEARDOWN				0x004c
+#define HOST1X_CHANNEL_SMMU_STREAMID			0x0084
+
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(x)			(0x6400 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x)	(0x6600 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INTR_DEST(x)			(0x6684 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x)	(0x770c + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x)	(0x7790 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT(x)				(0x8080 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(x)		(0xa088 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP(x)			(0xb090 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v)			(((v) & 0x3f) << 8)

From 4354d2bf9225cced4ec11f9296a97eaffad60639 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 8 Mar 2022 12:18:03 +0200
Subject: [PATCH 46/73] gpu: host1x: Rewrite job opcode sequence

For new (Tegra186+) SoCs, use a new ('full-featured') job opcode
sequence that is compatible with virtualization. In particular,
the Host1x hardware in Tegra234 is more strict regarding the sequence,
requiring ACQUIRE_MLOCK-SETCLASS-SETSTREAMID opcodes to occur in
that sequence without gaps (except for SETPAYLOAD), so let's do it
properly in one go now.

Bug 3724727

Change-Id: Ifae148975457d2d275cfae25fcaf735e6529fbd3
Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2745964
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Brad Griffis <bgriffis@nvidia.com>
---
 drivers/gpu/host1x/hw/channel_hw.c | 150 +++++++++++++++++------------
 1 file changed, 88 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 4e058e4c..0901dd2c 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -47,10 +47,41 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
 	}
 }
 
-static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold,
+static void submit_wait(struct host1x_job *job, u32 id, u32 threshold,
 			u32 next_class)
 {
-#if HOST1X_HW >= 2
+	struct host1x_cdma *cdma = &job->channel->cdma;
+
+#if HOST1X_HW >= 6
+	u32 stream_id;
+
+	/*
+	 * If a memory context has been set, use it. Otherwise
+	 * (if context isolation is disabled) use the engine's
+	 * firmware stream ID.
+	 */
+	if (job->memory_context)
+		stream_id = job->memory_context->stream_id;
+	else
+		stream_id = job->engine_fallback_streamid;
+
+	host1x_cdma_push_wide(cdma,
+		host1x_opcode_setclass(
+			HOST1X_CLASS_HOST1X,
+			HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32,
+			/* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */
+			BIT(0) | BIT(2)
+		),
+		threshold,
+		id,
+		HOST1X_OPCODE_NOP
+	);
+	host1x_cdma_push_wide(&job->channel->cdma,
+		host1x_opcode_setclass(job->class, 0, 0),
+		host1x_opcode_setpayload(stream_id),
+		host1x_opcode_setstreamid(job->engine_streamid_offset / 4),
+		HOST1X_OPCODE_NOP);
+#elif HOST1X_HW >= 2
 	host1x_cdma_push_wide(cdma,
 		host1x_opcode_setclass(
 			HOST1X_CLASS_HOST1X,
@@ -97,7 +128,7 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
 			else
 				threshold = cmd->wait.threshold;
 
-			submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class);
+			submit_wait(job, cmd->wait.id, threshold, cmd->wait.next_class);
 		} else {
 			struct host1x_job_gather *g = &cmd->gather;
 
@@ -180,42 +211,70 @@ static void host1x_enable_gather_filter(struct host1x_channel *ch)
 #endif
 }
 
-static void host1x_channel_program_engine_streamid(struct host1x_job *job)
+static void channel_program_cdma(struct host1x_job *job)
 {
+	struct host1x_cdma *cdma = &job->channel->cdma;
+	struct host1x_syncpt *sp = job->syncpt;
+
 #if HOST1X_HW >= 6
 	u32 fence;
 
-	if (!job->memory_context)
-		return;
-
-	fence = host1x_syncpt_incr_max(job->syncpt, 1);
-
-	/* First, increment a syncpoint on OP_DONE condition.. */
+	/* Enter engine class with invalid stream ID. */
+	host1x_cdma_push_wide(cdma,
+		host1x_opcode_acquire_mlock(job->class),
+		host1x_opcode_setclass(job->class, 0, 0),
+		host1x_opcode_setpayload(0),
+		host1x_opcode_setstreamid(job->engine_streamid_offset / 4));
 
+	/* Before switching stream ID to real stream ID, ensure engine is idle. */
+	fence = host1x_syncpt_incr_max(sp, 1);
 	host1x_cdma_push(&job->channel->cdma,
 		host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
 		HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
-			HOST1X_UCLASS_INCR_SYNCPT_COND_F(1));
+			HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
+	submit_wait(job, job->syncpt->id, fence, job->class);
 
-	/* Wait for syncpoint to increment */
+	/* Submit work. */
+	job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
+	submit_gathers(job, job->syncpt_end - job->syncpt_incrs);
 
+	/* Before releasing MLOCK, ensure engine is idle again. */
+	fence = host1x_syncpt_incr_max(sp, 1);
 	host1x_cdma_push(&job->channel->cdma,
-		host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
-			host1x_uclass_wait_syncpt_r(), 1),
-		host1x_class_host_wait_syncpt(job->syncpt->id, fence));
+		host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
+		HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
+			HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
+	submit_wait(job, job->syncpt->id, fence, job->class);
 
-	/*
-	 * Now that we know the engine is idle, return to class and
-	 * change stream ID.
-	 */
+	/* Release MLOCK. */
+	host1x_cdma_push(cdma,
+		HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class));
+#else
+	if (job->serialize) {
+		/*
+		 * Force serialization by inserting a host wait for the
+		 * previous job to finish before this one can commence.
+		 */
+		host1x_cdma_push(cdma,
+				 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+					host1x_uclass_wait_syncpt_r(), 1),
+				 host1x_class_host_wait_syncpt(job->syncpt->id,
+					host1x_syncpt_read_max(sp)));
+	}
 
-	host1x_cdma_push(&job->channel->cdma,
-		host1x_opcode_setclass(job->class, 0, 0),
-		HOST1X_OPCODE_NOP);
+	/* Synchronize base register to allow using it for relative waiting */
+	if (sp->base)
+		synchronize_syncpt_base(job);
 
-	host1x_cdma_push(&job->channel->cdma,
-		host1x_opcode_setpayload(job->memory_context->stream_id),
-		host1x_opcode_setstreamid(job->engine_streamid_offset / 4));
+	/* add a setclass for modules that require it */
+	if (job->class)
+		host1x_cdma_push(cdma,
+				 host1x_opcode_setclass(job->class, 0, 0),
+				 HOST1X_OPCODE_NOP);
+
+	job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
+
+	submit_gathers(job, job->syncpt_end - job->syncpt_incrs);
 #endif
 }
 
@@ -223,7 +282,6 @@ static int channel_submit(struct host1x_job *job)
 {
 	struct host1x_channel *ch = job->channel;
 	struct host1x_syncpt *sp = job->syncpt;
-	u32 user_syncpt_incrs = job->syncpt_incrs;
 	u32 prev_max = 0;
 	u32 syncval;
 	int err;
@@ -251,6 +309,9 @@ static int channel_submit(struct host1x_job *job)
 
 	host1x_channel_set_streamid(ch);
 	host1x_enable_gather_filter(ch);
+	host1x_hw_syncpt_assign_to_channel(host, sp, ch);
+	if (job->secondary_syncpt)
+		host1x_hw_syncpt_assign_to_channel(host, job->secondary_syncpt, ch);
 
 	/* begin a CDMA submit */
 	err = host1x_cdma_begin(&ch->cdma, job);
@@ -259,42 +320,7 @@ static int channel_submit(struct host1x_job *job)
 		goto error;
 	}
 
-	if (job->serialize) {
-		/*
-		 * Force serialization by inserting a host wait for the
-		 * previous job to finish before this one can commence.
-		 */
-		host1x_cdma_push(&ch->cdma,
-				 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
-					host1x_uclass_wait_syncpt_r(), 1),
-				 host1x_class_host_wait_syncpt(job->syncpt->id,
-					host1x_syncpt_read_max(sp)));
-	}
-
-	/* Synchronize base register to allow using it for relative waiting */
-	if (sp->base)
-		synchronize_syncpt_base(job);
-
-	host1x_hw_syncpt_assign_to_channel(host, sp, ch);
-	if (job->secondary_syncpt)
-		host1x_hw_syncpt_assign_to_channel(host, job->secondary_syncpt, ch);
-
-	/* add a setclass for modules that require it */
-	if (job->class)
-		host1x_cdma_push(&ch->cdma,
-				 host1x_opcode_setclass(job->class, 0, 0),
-				 HOST1X_OPCODE_NOP);
-
-	/*
-	 * Ensure engine DMA is idle and set new stream ID. May increment
-	 * syncpt max.
-	 */
-	host1x_channel_program_engine_streamid(job);
-
-	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
-	job->syncpt_end = syncval;
-
-	submit_gathers(job, syncval - user_syncpt_incrs);
+	channel_program_cdma(job);
 
 	/* end CDMA submit & stash pinned hMems into sync queue */
 	host1x_cdma_end(&ch->cdma, job);

From 951a3d1efa968061d64bef198e97e48a20ebac99 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Sun, 15 May 2022 12:31:34 +0300
Subject: [PATCH 47/73] gpu: host1x: Add MLOCK release code on Tegra234

With the full-featured opcode sequence using MLOCKs, we need to also
unlock those MLOCKs in the event of a timeout. However, it turns out
that on Tegra186/Tegra194, by default, we don't need to do this;
furthermore, on Tegra234 it is much simpler to do; so only implement
this on Tegra234 for the time being.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: Icc15ae705844cd26ae3f1d1146ff20f1d9b7a14d
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2745956
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Brad Griffis <bgriffis@nvidia.com>
---
 drivers/gpu/host1x/hw/cdma_hw.c            | 34 ++++++++++++++++++++++
 drivers/gpu/host1x/hw/hw_host1x08_common.h |  7 +++++
 2 files changed, 41 insertions(+)

diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index e49cd5b8..1b65a10b 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -238,6 +238,37 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
 	cdma_timeout_restart(cdma, getptr);
 }
 
+static void timeout_release_mlock(struct host1x_cdma *cdma)
+{
+#if HOST1X_HW >= 8
+	/* Tegra186 and Tegra194 require a more complicated MLOCK release
+	 * sequence. Furthermore, those chips by default don't enforce MLOCKs,
+	 * so it turns out that if we don't /actually/ need MLOCKs, we can just
+	 * ignore them.
+	 *
+	 * As such, for now just implement this on Tegra234 where things are
+	 * stricter but also easy to implement.
+	 */
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	u32 offset;
+
+	switch (ch->client->class) {
+	case HOST1X_CLASS_VIC:
+		offset = HOST1X_COMMON_VIC_MLOCK;
+		break;
+	case HOST1X_CLASS_NVDEC:
+		offset = HOST1X_COMMON_NVDEC_MLOCK;
+		break;
+	default:
+		WARN(1, "%s was not updated for class %u", __func__, ch->client->class);
+		return;
+	}
+
+	host1x_common_writel(host1x, 0x0, offset);
+#endif
+}
+
 /*
  * If this timeout fires, it indicates the current sync_queue entry has
  * exceeded its TTL and the userctx should be timed out and remaining
@@ -288,6 +319,9 @@ static void cdma_timeout_handler(struct work_struct *work)
 	/* stop HW, resetting channel/module */
 	host1x_hw_cdma_freeze(host1x, cdma);
 
+	/* release any held MLOCK */
+	timeout_release_mlock(cdma);
+
 	host1x_cdma_update_sync_queue(cdma, ch->dev);
 	mutex_unlock(&cdma->lock);
 }
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_common.h b/drivers/gpu/host1x/hw/hw_host1x08_common.h
index 4df28440..8e0c9915 100644
--- a/drivers/gpu/host1x/hw/hw_host1x08_common.h
+++ b/drivers/gpu/host1x/hw/hw_host1x08_common.h
@@ -2,3 +2,10 @@
 /*
  * Copyright (c) 2022 NVIDIA Corporation.
  */
+
+#define HOST1X_COMMON_OFA_MLOCK			0x4050
+#define HOST1X_COMMON_NVJPG1_MLOCK		0x4070
+#define HOST1X_COMMON_VIC_MLOCK			0x4078
+#define HOST1X_COMMON_NVENC_MLOCK		0x407c
+#define HOST1X_COMMON_NVDEC_MLOCK		0x4080
+#define HOST1X_COMMON_NVJPG_MLOCK		0x4084

From da180e1e8b756441d0f12b7d5392a19bda6cec6a Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Mon, 16 May 2022 10:09:42 +0300
Subject: [PATCH 48/73] gpu: host1x: Use RESTART_W to skip timed out jobs on
 Tegra186+

When MLOCK enforcement is enabled, the 0-word write currently done
is rejected by the hardware outside of an MLOCK region. As such,
on these chips, which also have the newer, more convenient RESTART_W
opcode, use that instead to skip over the timed out job.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I9e22eb7ccd17127ca517a034f5dbd32326412f9d
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2745957
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Brad Griffis <bgriffis@nvidia.com>
---
 drivers/gpu/host1x/cdma.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 286e2f97..8a632d71 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -457,9 +457,24 @@ syncpt_incr:
 				 * to offset 0xbad. This does nothing but
 				 * has a easily detected signature in debug
 				 * traces.
+				 *
+				 * On systems with MLOCK enforcement enabled,
+				 * the above 0 word writes would fall foul of
+				 * the enforcement. As such, in the first slot
+				 * put a RESTART_W opcode to the beginning
+				 * of the next job. We don't use this for older
+				 * chips since those only support the RESTART
+				 * opcode with inconvenient alignment requirements.
 				 */
-				mapped[2*slot+0] = 0x1bad0000;
-				mapped[2*slot+1] = 0x1bad0000;
+				if (i == 0 && host1x->info->has_wide_gather) {
+					unsigned int next_job = (job->first_get/8 + job->num_slots)
+						% HOST1X_PUSHBUFFER_SLOTS;
+					mapped[2*slot+0] = (0xd << 28) | (next_job * 2);
+					mapped[2*slot+1] = 0x0;
+				} else {
+					mapped[2*slot+0] = 0x1bad0000;
+					mapped[2*slot+1] = 0x1bad0000;
+				}
 			}
 
 			job->cancelled = true;

From 7ef97475c02d37d6aaacd6f16fbc4c8ce3346cb2 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Fri, 8 Jul 2022 18:18:01 +0300
Subject: [PATCH 49/73] gpu: host1x: Initialize syncval in channel_submit

During the refactoring of channel_submit, assignment of syncval
was moved but it is also used in channel_submit. Add this assignment
back to channel_submit as well.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I11acff0a5d28ddaf35648cdb3be0e638b996d5c1
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2745958
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Brad Griffis <bgriffis@nvidia.com>
---
 drivers/gpu/host1x/hw/channel_hw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 0901dd2c..49629eab 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -321,6 +321,7 @@ static int channel_submit(struct host1x_job *job)
 	}
 
 	channel_program_cdma(job);
+	syncval = host1x_syncpt_read_max(sp);
 
 	/* end CDMA submit & stash pinned hMems into sync queue */
 	host1x_cdma_end(&ch->cdma, job);

From cc7156e5619035e4dbc98fa25f231d020c04c292 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Fri, 8 Jul 2022 18:18:02 +0300
Subject: [PATCH 50/73] gpu: host1x: Generalize host1x_cdma_push_wide

host1x_cdma_push_wide had the assumptions that the last parameter word
was a NOP opcode, and that NOP opcodes could be used in all situations.

Neither are true with the new job opcode sequence, so adjust the
function to not have these assumptions, and instead place an early
RESTART opcode when necessary to jump back to the beginning of the
pushbuffer.

Bug 3724727

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I88074e838e4f1471471f0848aca9d8d73c7b5f8c
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2745959
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Brad Griffis <bgriffis@nvidia.com>
---
 drivers/gpu/host1x/cdma.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 8a632d71..e408aefb 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -615,8 +615,8 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
 	struct host1x_channel *channel = cdma_to_channel(cdma);
 	struct host1x *host1x = cdma_to_host1x(cdma);
 	struct push_buffer *pb = &cdma->push_buffer;
-	unsigned int needed = 2, extra = 0, i;
 	unsigned int space = cdma->slots_free;
+	unsigned int needed = 2, extra = 0;
 
 	if (host1x_debug_trace_cmdbuf)
 		trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2,
@@ -634,20 +634,14 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
 	cdma->slots_free = space - needed;
 	cdma->slots_used += needed;
 
-	/*
-	 * Note that we rely on the fact that this is only used to submit wide
-	 * gather opcodes, which consist of 3 words, and they are padded with
-	 * a NOP to avoid having to deal with fractional slots (a slot always
-	 * represents 2 words). The fourth opcode passed to this function will
-	 * therefore always be a NOP.
-	 *
-	 * This works around a slight ambiguity when it comes to opcodes. For
-	 * all current host1x incarnations the NOP opcode uses the exact same
-	 * encoding (0x20000000), so we could hard-code the value here, but a
-	 * new incarnation may change it and break that assumption.
-	 */
-	for (i = 0; i < extra; i++)
-		host1x_pushbuffer_push(pb, op4, op4);
+	if (extra > 0) {
+		/*
+		 * If there isn't enough space at the tail of the pushbuffer,
+		 * insert a RESTART(0) here to go back to the beginning.
+		 * The code above adjusted the indexes appropriately.
+		 */
+		host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead0000);
+	}
 
 	host1x_pushbuffer_push(pb, op1, op2);
 	host1x_pushbuffer_push(pb, op3, op4);

From ea0f4f6a26b93a7dc03f92016408995de63a69d1 Mon Sep 17 00:00:00 2001
From: Brad Griffis <bgriffis@nvidia.com>
Date: Tue, 2 Aug 2022 18:13:38 +0000
Subject: [PATCH 51/73] gpu: host1x: ADD SID information for Tegra234 DLA

Add the SMMU SID information for the Tegra234 DLA devices to the
upstream host1x driver to ensure the SMMU is configured correctly.

Bug 3724727

Change-Id: I01edf2bc36f2b8a4b83077fc71f42463f9958a3d
Signed-off-by: Brad Griffis <bgriffis@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2754962
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Tested-by: Jonathan Hunter <jonathanh@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index b2e8dc77..8bef2289 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -2,7 +2,7 @@
 /*
  * Tegra host1x driver
  *
- * Copyright (c) 2010-2013, NVIDIA Corporation.
+ * Copyright (c) 2010-2022, NVIDIA Corporation.
  */
 
 #include <linux/clk.h>
@@ -268,6 +268,30 @@ static const struct host1x_sid_entry tegra234_sid_table[] = {
 		.offset = 0x34,
 		.limit = 0x34
 	},
+	{
+		/* NVDLA channel */
+		.base = 0x17e0,
+		.offset = 0x30,
+		.limit = 0x34
+	},
+	{
+		/* NVDLA MMIO */
+		.base = 0x16d8,
+		.offset = 0x0030,
+		.limit = 0x0034
+	},
+	{
+		/* NVDLA1 channel */
+		.base = 0x17e8,
+		.offset = 0x30,
+		.limit = 0x34
+	},
+	{
+		/* NVDLA1 MMIO */
+		.base = 0x16e0,
+		.offset = 0x0030,
+		.limit = 0x0034
+	},
 };
 
 static const struct host1x_info host1x08_info = {

From e1c4c53a8659c372df0255f85de862b8ade6da4c Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Wed, 14 Sep 2022 11:24:11 +0100
Subject: [PATCH 52/73] gpu: host1x: Update to Linux v6.0-rc5

Update the host1x driver to align with the latest upstream driver from
Linux v6.0-rc5. Please note that the context bus support is not
included, because this needs to be built into the kernel.

Bug 3767126

Change-Id: Ib962db616e9c1d72e46adf6c678ec26089e4b610
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2776689
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/channel.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 2a9a3a8d..2d0051d6 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -21,22 +21,18 @@ int host1x_channel_list_init(struct host1x_channel_list *chlist,
 	if (!chlist->channels)
 		return -ENOMEM;
 
-	chlist->allocated_channels =
-		kcalloc(BITS_TO_LONGS(num_channels), sizeof(unsigned long),
-			GFP_KERNEL);
+	chlist->allocated_channels = bitmap_zalloc(num_channels, GFP_KERNEL);
 	if (!chlist->allocated_channels) {
 		kfree(chlist->channels);
 		return -ENOMEM;
 	}
 
-	bitmap_zero(chlist->allocated_channels, num_channels);
-
 	return 0;
 }
 
 void host1x_channel_list_free(struct host1x_channel_list *chlist)
 {
-	kfree(chlist->allocated_channels);
+	bitmap_free(chlist->allocated_channels);
 	kfree(chlist->channels);
 }
 

From 9cfe33dc6879a615e0cdf0d7ac865bea3449fb4c Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Thu, 19 May 2022 16:37:55 +0300
Subject: [PATCH 53/73] gpu: host1x: Add stream ID register data for NVDEC on
 Tegra234

Add entries for NVDEC to the Tegra234 SID table.

Bug 3778105

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: Iff6c33795b74faf5e0aea72751ad6c4b43cf6bb2
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2771764
(cherry picked from commit 3e501a18ff2392292d19bc513db6f19d31f38227)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2768895
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
Tested-by: Jonathan Hunter <jonathanh@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 8bef2289..613f4d0e 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -268,6 +268,18 @@ static const struct host1x_sid_entry tegra234_sid_table[] = {
 		.offset = 0x34,
 		.limit = 0x34
 	},
+	{
+		/* NVDEC channel */
+		.base = 0x17c8,
+		.offset = 0x30,
+		.limit = 0x30,
+	},
+	{
+		/* NVDEC MMIO */
+		.base = 0x1698,
+		.offset = 0x34,
+		.limit = 0x34,
+	},
 	{
 		/* NVDLA channel */
 		.base = 0x17e0,

From 5de571fe8d36074f29042851b6726c63d4d0b153 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Wed, 10 Aug 2022 17:09:16 +0300
Subject: [PATCH 54/73] drm/tegra: Add NVENC/NVJPG support on Tegra234

Add NVENC/NVJPG support on Tegra234. These work similarly to the
ones on Tegra194, so no big changes.

Bug 3778105

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I4a4b12d5625f927c69a6dc1c6b6bade1bca7b171
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2771770
(cherry picked from commit ee6576a1493dae1ddc5c8dc3b415d0987f670390)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2759062
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
Tested-by: Jonathan Hunter <jonathanh@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 613f4d0e..3449aa5f 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -280,6 +280,42 @@ static const struct host1x_sid_entry tegra234_sid_table[] = {
 		.offset = 0x34,
 		.limit = 0x34,
 	},
+	{
+		/* NVENC channel */
+		.base = 0x17c0,
+		.offset = 0x30,
+		.limit = 0x30,
+	},
+	{
+		/* NVENC MMIO */
+		.base = 0x1690,
+		.offset = 0x34,
+		.limit = 0x34,
+	},
+	{
+		/* NVJPG channel */
+		.base = 0x17d0,
+		.offset = 0x30,
+		.limit = 0x30,
+	},
+	{
+		/* NVJPG MMIO */
+		.base = 0x16a0,
+		.offset = 0x34,
+		.limit = 0x34,
+	},
+	{
+		/* NVJPG1 channel */
+		.base = 0x17a8,
+		.offset = 0x30,
+		.limit = 0x30,
+	},
+	{
+		/* NVJPG1 MMIO */
+		.base = 0x16b0,
+		.offset = 0x34,
+		.limit = 0x34,
+	},
 	{
 		/* NVDLA channel */
 		.base = 0x17e0,

From a31d614845ea55c69a8b62dd255b088030dc65b6 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Wed, 7 Sep 2022 13:58:35 +0300
Subject: [PATCH 55/73] gpu: host1x: Add mlock release code for NVENC/NVJPG

Add definitions to allow mlock releasing on recovery for NVENC/NVJPG
on Tegra234.

Bug 3778105

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: Iea33063b0d71add9fe816d93c96401087a12a657
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2772958
(cherry picked from commit e5f17c5842af24bde32e74d6eb2a61b0d90cae3d)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2772920
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Tested-by: Jonathan Hunter <jonathanh@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/hw/cdma_hw.c                | 9 +++++++++
 drivers/gpu/host1x/include/linux/host1x-next.h | 1 +
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index 1b65a10b..96f341ad 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -260,6 +260,15 @@ static void timeout_release_mlock(struct host1x_cdma *cdma)
 	case HOST1X_CLASS_NVDEC:
 		offset = HOST1X_COMMON_NVDEC_MLOCK;
 		break;
+	case HOST1X_CLASS_NVENC:
+		offset = HOST1X_COMMON_NVENC_MLOCK;
+		break;
+	case HOST1X_CLASS_NVJPG:
+		offset = HOST1X_COMMON_NVJPG_MLOCK;
+		break;
+	case HOST1X_CLASS_NVJPG1:
+		offset = HOST1X_COMMON_NVJPG1_MLOCK;
+		break;
 	default:
 		WARN(1, "%s was not updated for class %u", __func__, ch->client->class);
 		return;
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 1538cda5..018ba549 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -13,6 +13,7 @@
 
 enum host1x_class {
 	HOST1X_CLASS_HOST1X = 0x1,
+	HOST1X_CLASS_NVJPG1 = 0x7,
 	HOST1X_CLASS_NVENC = 0x21,
 	HOST1X_CLASS_NVENC1 = 0x22,
 	HOST1X_CLASS_GR2D = 0x51,

From f374e1804ea3bfd7c4c31c5c58df403d9ca60802 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Thu, 29 Sep 2022 13:36:50 +0300
Subject: [PATCH 56/73] drm/tegra: Add OFA support

Add support for the optical flow accelerator. Implementation is the
same as for other Falcons except that we omit some legacy things
since the engine only exists from T234 onwards, and the addition
of having to initialize the OFA's safety RAM before boot.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I9612e82a116cc76be492a0c533afce67c42f6a2c
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2784964
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 3449aa5f..acf5130d 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -340,6 +340,18 @@ static const struct host1x_sid_entry tegra234_sid_table[] = {
 		.offset = 0x0030,
 		.limit = 0x0034
 	},
+	{
+		/* OFA channel */
+		.base = 0x1768,
+		.offset = 0x30,
+		.limit = 0x30,
+	},
+	{
+		/* OFA MMIO */
+		.base = 0x16e8,
+		.offset = 0x34,
+		.limit = 0x34,
+	},
 };
 
 static const struct host1x_info host1x08_info = {

From 128385051afa074c491f4bd8132a3613c3f9d24c Mon Sep 17 00:00:00 2001
From: Akhil R <akhilrajeev@nvidia.com>
Date: Tue, 27 Sep 2022 15:54:13 +0530
Subject: [PATCH 57/73] gpu: host1x: Add SE SID entries for Tegra234.

Add Security Engine Stream ID  entries in host1x SID table for
Tegra234.

Bug 3583641

Signed-off-by: Akhil R <akhilrajeev@nvidia.com>
Change-Id: I5d96478aad8d24208bad92e61942a57c0fd5df1a
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2782822
Reviewed-by: svcacv <svcacv@nvidia.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index acf5130d..80ebf0ef 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -2,7 +2,7 @@
 /*
  * Tegra host1x driver
  *
- * Copyright (c) 2010-2022, NVIDIA Corporation.
+ * Copyright (c) 2010-2022, NVIDIA CORPORATION & AFFILIATES. All Rights Reserved.
  */
 
 #include <linux/clk.h>
@@ -256,6 +256,42 @@ static const struct host1x_info host1x07_info = {
  * and firmware stream ID in the MMIO path table.
  */
 static const struct host1x_sid_entry tegra234_sid_table[] = {
+	{
+		/* SE1 MMIO */
+		.base = 0x1650,
+		.offset = 0x90,
+		.limit = 0x90
+	},
+	{
+		/* SE2 MMIO */
+		.base = 0x1658,
+		.offset = 0x90,
+		.limit = 0x90
+	},
+	{
+		/* SE4 MMIO */
+		.base = 0x1660,
+		.offset = 0x90,
+		.limit = 0x90
+	},
+	{
+		/* SE1 channel */
+		.base = 0x1730,
+		.offset = 0x90,
+		.limit = 0x90
+	},
+	{
+		/* SE2 channel */
+		.base = 0x1738,
+		.offset = 0x90,
+		.limit = 0x90
+	},
+	{
+		/* SE4 channel */
+		.base = 0x1740,
+		.offset = 0x90,
+		.limit = 0x90
+	},
 	{
 		/* VIC channel */
 		.base = 0x17b8,

From 242251a0e1f04bedb762e49347f37fc43c53c4df Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Thu, 26 Aug 2021 12:23:26 +0300
Subject: [PATCH 58/73] gpu: host1x: Implement syncpoint wait using DMA fences

In anticipation of removal of the intr API, move host1x_syncpt_wait
to use DMA fences instead. As of this patch, this means that waits
have a 30 second maximum timeout because of the implicit timeout
we have with fences, but that will be lifted in a follow-up patch.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I82a262b73861b35c4031983f4134d4b4006e3b16
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2786634
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/syncpt.c | 96 ++++++++-----------------------------
 1 file changed, 20 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index f87a8705..75f58ec2 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -7,6 +7,7 @@
 
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/dma-fence.h>
 #include <linux/slab.h>
 
 #include <trace/events/host1x.h>
@@ -209,17 +210,6 @@ int host1x_syncpt_incr(struct host1x_syncpt *sp)
 }
 EXPORT_SYMBOL(host1x_syncpt_incr);
 
-/*
- * Updated sync point form hardware, and returns true if syncpoint is expired,
- * false if we may need to wait
- */
-static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
-{
-	host1x_hw_syncpt_load(sp->host, sp);
-
-	return host1x_syncpt_is_expired(sp, thresh);
-}
-
 /**
  * host1x_syncpt_wait() - wait for a syncpoint to reach a given value
  * @sp: host1x syncpoint
@@ -230,10 +220,10 @@ static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
 int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 		       u32 *value)
 {
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-	void *ref;
-	struct host1x_waitlist *waiter;
-	int err = 0, check_count = 0;
+	struct dma_fence *fence;
+	long wait_err;
+
+	host1x_hw_syncpt_load(sp->host, sp);
 
 	if (value)
 		*value = host1x_syncpt_load(sp);
@@ -241,73 +231,27 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 	if (host1x_syncpt_is_expired(sp, thresh))
 		return 0;
 
-	if (!timeout) {
-		err = -EAGAIN;
-		goto done;
-	}
-
-	/* allocate a waiter */
-	waiter = kzalloc(sizeof(*waiter), GFP_KERNEL);
-	if (!waiter) {
-		err = -ENOMEM;
-		goto done;
-	}
-
-	/* schedule a wakeup when the syncpoint value is reached */
-	err = host1x_intr_add_action(sp->host, sp, thresh,
-				     HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
-				     &wq, waiter, &ref);
-	if (err)
-		goto done;
-
-	err = -EAGAIN;
-	/* Caller-specified timeout may be impractically low */
 	if (timeout < 0)
 		timeout = LONG_MAX;
+	else if (timeout == 0)
+		return -EAGAIN;
 
-	/* wait for the syncpoint, or timeout, or signal */
-	while (timeout) {
-		long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout);
-		int remain;
+	fence = host1x_fence_create(sp, thresh);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
 
-		remain = wait_event_interruptible_timeout(wq,
-				syncpt_load_min_is_expired(sp, thresh),
-				check);
-		if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) {
-			if (value)
-				*value = host1x_syncpt_load(sp);
+	wait_err = dma_fence_wait_timeout(fence, true, timeout);
+	dma_fence_put(fence);
 
-			err = 0;
+	if (value)
+		*value = host1x_syncpt_load(sp);
 
-			break;
-		}
-
-		if (remain < 0) {
-			err = remain;
-			break;
-		}
-
-		timeout -= check;
-
-		if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) {
-			dev_warn(sp->host->dev,
-				"%s: syncpoint id %u (%s) stuck waiting %d, timeout=%ld\n",
-				 current->comm, sp->id, sp->name,
-				 thresh, timeout);
-
-			host1x_debug_dump_syncpts(sp->host);
-
-			if (check_count == MAX_STUCK_CHECK_COUNT)
-				host1x_debug_dump(sp->host);
-
-			check_count++;
-		}
-	}
-
-	host1x_intr_put_ref(sp->host, sp->id, ref, true);
-
-done:
-	return err;
+	if (wait_err == 0)
+		return -EAGAIN;
+	else if (wait_err < 0)
+		return wait_err;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(host1x_syncpt_wait);
 

From 303267d828f6fd5a91228fd28e8f90af48677590 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Thu, 26 Aug 2021 12:24:59 +0300
Subject: [PATCH 59/73] gpu: host1x: Implement job tracking using DMA fences

In anticipation of removal of the intr API, implement job tracking
using DMA fences instead. The main two things about this are
making cdma_update schedule the work since fence completion can
now be called from interrupt context, and some complication in
ensuring the callback is not running when we free the fence.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I25f7f5a6cad24a00563eed79e0e17b1df1eadcdc
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2786636
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/cdma.c                     | 14 ++++--
 drivers/gpu/host1x/cdma.h                     |  2 +
 drivers/gpu/host1x/hw/channel_hw.c            | 49 +++++++++++--------
 .../gpu/host1x/include/linux/host1x-next.h    |  7 ++-
 drivers/gpu/host1x/job.c                      | 18 +++++--
 5 files changed, 61 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index e408aefb..582f0487 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -490,6 +490,15 @@ resume:
 	host1x_hw_cdma_resume(host1x, cdma, restart_addr);
 }
 
+void cdma_update_work(struct work_struct *work)
+{
+	struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, update_work);
+
+	mutex_lock(&cdma->lock);
+	update_cdma_locked(cdma);
+	mutex_unlock(&cdma->lock);
+}
+
 /*
  * Create a cdma
  */
@@ -499,6 +508,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma)
 
 	mutex_init(&cdma->lock);
 	init_completion(&cdma->complete);
+	INIT_WORK(&cdma->update_work, cdma_update_work);
 
 	INIT_LIST_HEAD(&cdma->sync_queue);
 
@@ -679,7 +689,5 @@ void host1x_cdma_end(struct host1x_cdma *cdma,
  */
 void host1x_cdma_update(struct host1x_cdma *cdma)
 {
-	mutex_lock(&cdma->lock);
-	update_cdma_locked(cdma);
-	mutex_unlock(&cdma->lock);
+	schedule_work(&cdma->update_work);
 }
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 12c4327c..7fd8168a 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/completion.h>
 #include <linux/list.h>
+#include <linux/workqueue.h>
 
 struct host1x_syncpt;
 struct host1x_userctx_timeout;
@@ -69,6 +70,7 @@ struct host1x_cdma {
 	struct buffer_timeout timeout;	/* channel's timeout state/wq */
 	bool running;
 	bool torndown;
+	struct work_struct update_work;
 };
 
 #define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma)
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 49629eab..00afe8fb 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -278,6 +278,15 @@ static void channel_program_cdma(struct host1x_job *job)
 #endif
 }
 
+static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb);
+
+	/* Schedules CDMA update. */
+	host1x_cdma_update(&job->channel->cdma);
+	complete(&job->fence_cb_done);
+}
+
 static int channel_submit(struct host1x_job *job)
 {
 	struct host1x_channel *ch = job->channel;
@@ -285,7 +294,6 @@ static int channel_submit(struct host1x_job *job)
 	u32 prev_max = 0;
 	u32 syncval;
 	int err;
-	struct host1x_waitlist *completed_waiter = NULL;
 	struct host1x *host = dev_get_drvdata(ch->dev->parent);
 
 	trace_host1x_channel_submit(dev_name(ch->dev),
@@ -298,14 +306,7 @@ static int channel_submit(struct host1x_job *job)
 	/* get submit lock */
 	err = mutex_lock_interruptible(&ch->submitlock);
 	if (err)
-		goto error;
-
-	completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL);
-	if (!completed_waiter) {
-		mutex_unlock(&ch->submitlock);
-		err = -ENOMEM;
-		goto error;
-	}
+		return err;
 
 	host1x_channel_set_streamid(ch);
 	host1x_enable_gather_filter(ch);
@@ -317,31 +318,37 @@ static int channel_submit(struct host1x_job *job)
 	err = host1x_cdma_begin(&ch->cdma, job);
 	if (err) {
 		mutex_unlock(&ch->submitlock);
-		goto error;
+		return err;
 	}
 
 	channel_program_cdma(job);
 	syncval = host1x_syncpt_read_max(sp);
 
+	/*
+	 * Create fence before submitting job to HW to avoid job completing
+	 * before the fence is set up.
+	 */
+	job->fence = host1x_fence_create(sp, syncval);
+	if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) {
+		job->fence = NULL;
+	} else {
+		err = dma_fence_add_callback(job->fence, &job->fence_cb,
+					     job_complete_callback);
+	}
+
 	/* end CDMA submit & stash pinned hMems into sync queue */
 	host1x_cdma_end(&ch->cdma, job);
 
 	trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval);
 
-	/* schedule a submit complete interrupt */
-	err = host1x_intr_add_action(host, sp, syncval,
-				     HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch,
-				     completed_waiter, &job->waiter);
-	completed_waiter = NULL;
-	WARN(err, "Failed to set submit complete interrupt");
-
 	mutex_unlock(&ch->submitlock);
 
-	return 0;
+	if (err == -ENOENT)
+		host1x_cdma_update(&ch->cdma);
+	else
+		WARN(err, "Failed to set submit complete interrupt");
 
-error:
-	kfree(completed_waiter);
-	return err;
+	return 0;
 }
 
 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 018ba549..f4b986cb 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -8,6 +8,7 @@
 
 #include <linux/device.h>
 #include <linux/dma-direction.h>
+#include <linux/dma-fence.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
@@ -296,8 +297,10 @@ struct host1x_job {
 	/* Non-job tracking related syncpoint */
 	struct host1x_syncpt *secondary_syncpt;
 
-	/* Completion waiter ref */
-	void *waiter;
+	/* Completion fence for job tracking */
+	struct dma_fence *fence;
+	struct dma_fence_cb fence_cb;
+	struct completion fence_cb_done;
 
 	/* Maximum time to wait for this job */
 	unsigned int timeout;
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index dc652481..a03aae1a 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -55,6 +55,7 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
 	job->enable_firewall = enable_firewall;
 
 	kref_init(&job->ref);
+	init_completion(&job->fence_cb_done);
 	job->channel = ch;
 
 	/* Redistribute memory to the structs  */
@@ -84,13 +85,24 @@ EXPORT_SYMBOL(host1x_job_get);
 static void job_free(struct kref *ref)
 {
 	struct host1x_job *job = container_of(ref, struct host1x_job, ref);
+	bool removed;
 
 	if (job->release)
 		job->release(job);
 
-	if (job->waiter)
-		host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
-				    job->waiter, false);
+	if (job->fence) {
+		removed = dma_fence_remove_callback(job->fence, &job->fence_cb);
+		if (!removed) {
+			/*
+			 * Wait until possible pending callback is no longer
+			 * using the job structure.
+			 */
+
+			wait_for_completion(&job->fence_cb_done);
+		}
+
+		dma_fence_put(job->fence);
+	}
 
 	if (job->syncpt)
 		host1x_syncpt_put(job->syncpt);

From 33a40367c02832ba7e7ee871cb8a1f8c85f2bae2 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 7 Sep 2021 19:15:08 +0300
Subject: [PATCH 60/73] gpu: host1x: Rewrite syncpoint interrupt handling

Move from the old, complex intr handling code to a new implementation
based on dma_fences. While there is a fair bit of churn to get there,
the new implementation is much simpler and likely faster as well due
to allowing signaling directly from interrupt context.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I81c47fa1946679813f90e3fd8e1d1e9d6342143e
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2786635
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/host1x/debug.c      |   7 +-
 drivers/gpu/host1x/dev.c        |   4 +-
 drivers/gpu/host1x/dev.h        |  10 +-
 drivers/gpu/host1x/fence.c      |  96 +++------
 drivers/gpu/host1x/fence.h      |  18 +-
 drivers/gpu/host1x/hw/intr_hw.c |  74 ++-----
 drivers/gpu/host1x/intr.c       | 368 +++++++-------------------------
 drivers/gpu/host1x/intr.h       |  83 +------
 drivers/gpu/host1x/syncpt.h     |   3 +-
 9 files changed, 166 insertions(+), 497 deletions(-)

diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 34c2e36d..b4d6a9e2 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -77,6 +77,7 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
 
 static void show_syncpts(struct host1x *m, struct output *o, bool show_all)
 {
+	unsigned long irqflags;
 	struct list_head *pos;
 	unsigned int i;
 	int err;
@@ -92,10 +93,10 @@ static void show_syncpts(struct host1x *m, struct output *o, bool show_all)
 		u32 min = host1x_syncpt_load(m->syncpt + i);
 		unsigned int waiters = 0;
 
-		spin_lock(&m->syncpt[i].intr.lock);
-		list_for_each(pos, &m->syncpt[i].intr.wait_head)
+		spin_lock_irqsave(&m->syncpt[i].fences.lock, irqflags);
+		list_for_each(pos, &m->syncpt[i].fences.list)
 			waiters++;
-		spin_unlock(&m->syncpt[i].intr.lock);
+		spin_unlock_irqrestore(&m->syncpt[i].fences.lock, irqflags);
 
 		if (!kref_read(&m->syncpt[i].ref))
 			continue;
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 80ebf0ef..46f143bc 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -663,7 +663,7 @@ static int host1x_probe(struct platform_device *pdev)
 			return PTR_ERR(host->regs);
 	}
 
-	syncpt_irq = platform_get_irq(pdev, 0);
+	host->syncpt_irq = platform_get_irq(pdev, 0);
 	if (syncpt_irq < 0)
 		return syncpt_irq;
 
@@ -725,7 +725,7 @@ static int host1x_probe(struct platform_device *pdev)
 		goto free_contexts;
 	}
 
-	err = host1x_intr_init(host, syncpt_irq);
+	err = host1x_intr_init(host);
 	if (err) {
 		dev_err(&pdev->dev, "failed to initialize interrupts\n");
 		goto deinit_syncpt;
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 920e5548..75de50fe 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -74,8 +74,7 @@ struct host1x_syncpt_ops {
 };
 
 struct host1x_intr_ops {
-	int (*init_host_sync)(struct host1x *host, u32 cpm,
-		void (*syncpt_thresh_work)(struct work_struct *work));
+	int (*init_host_sync)(struct host1x *host, u32 cpm);
 	void (*set_syncpt_threshold)(
 		struct host1x *host, unsigned int id, u32 thresh);
 	void (*enable_syncpt_intr)(struct host1x *host, unsigned int id);
@@ -125,6 +124,7 @@ struct host1x {
 	void __iomem *regs;
 	void __iomem *hv_regs; /* hypervisor region */
 	void __iomem *common_regs;
+	int syncpt_irq;
 	struct host1x_syncpt *syncpt;
 	struct host1x_syncpt_base *bases;
 	struct device *dev;
@@ -138,7 +138,6 @@ struct host1x {
 	dma_addr_t iova_end;
 
 	struct mutex intr_mutex;
-	int intr_syncpt_irq;
 
 	const struct host1x_syncpt_ops *syncpt_op;
 	const struct host1x_intr_ops *intr_op;
@@ -216,10 +215,9 @@ static inline void host1x_hw_syncpt_enable_protection(struct host1x *host)
 	return host->syncpt_op->enable_protection(host);
 }
 
-static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
-			void (*syncpt_thresh_work)(struct work_struct *))
+static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm)
 {
-	return host->intr_op->init_host_sync(host, cpm, syncpt_thresh_work);
+	return host->intr_op->init_host_sync(host, cpm);
 }
 
 static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host,
diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index f989f7fd..c77ce207 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -15,22 +15,6 @@
 #include "intr.h"
 #include "syncpt.h"
 
-static DEFINE_SPINLOCK(lock);
-
-struct host1x_syncpt_fence {
-	struct dma_fence base;
-
-	atomic_t signaling;
-
-	struct host1x_syncpt *sp;
-	u32 threshold;
-
-	struct host1x_waitlist *waiter;
-	void *waiter_ref;
-
-	struct delayed_work timeout_work;
-};
-
 static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f)
 {
 	return "host1x";
@@ -49,11 +33,12 @@ static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f)
 static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
 {
 	struct host1x_syncpt_fence *sf = to_host1x_fence(f);
-	int err;
 
 	if (host1x_syncpt_is_expired(sf->sp, sf->threshold))
 		return false;
 
+	/* One reference for interrupt path, one for timeout path. */
+	dma_fence_get(f);
 	dma_fence_get(f);
 
 	/*
@@ -61,24 +46,13 @@ static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
 	 * reference to any fences for which 'enable_signaling' has been
 	 * called (and that have not been signalled).
 	 *
-	 * We provide a userspace API to create arbitrary syncpoint fences,
-	 * so we cannot normally guarantee that all fences get signalled.
+	 * We cannot (for now) normally guarantee that all fences get signalled.
 	 * As such, setup a timeout, so that long-lasting fences will get
 	 * reaped eventually.
 	 */
 	schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000));
 
-	err = host1x_intr_add_action(sf->sp->host, sf->sp, sf->threshold,
-				     HOST1X_INTR_ACTION_SIGNAL_FENCE, f,
-				     sf->waiter, &sf->waiter_ref);
-	if (err) {
-		cancel_delayed_work_sync(&sf->timeout_work);
-		dma_fence_put(f);
-		return false;
-	}
-
-	/* intr framework takes ownership of waiter */
-	sf->waiter = NULL;
+	host1x_intr_add_fence_locked(sf->sp->host, sf);
 
 	/*
 	 * The fence may get signalled at any time after the above call,
@@ -89,37 +63,32 @@ static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
 	return true;
 }
 
-static void host1x_syncpt_fence_release(struct dma_fence *f)
-{
-	struct host1x_syncpt_fence *sf = to_host1x_fence(f);
-
-	if (sf->waiter)
-		kfree(sf->waiter);
-
-	dma_fence_free(f);
-}
-
 const struct dma_fence_ops host1x_syncpt_fence_ops = {
 	.get_driver_name = host1x_syncpt_fence_get_driver_name,
 	.get_timeline_name = host1x_syncpt_fence_get_timeline_name,
 	.enable_signaling = host1x_syncpt_fence_enable_signaling,
-	.release = host1x_syncpt_fence_release,
 };
 
 void host1x_fence_signal(struct host1x_syncpt_fence *f)
 {
-	if (atomic_xchg(&f->signaling, 1))
+	if (atomic_xchg(&f->signaling, 1)) {
+		/*
+		 * Already on timeout path, but we removed the fence before
+		 * timeout path could, so drop interrupt path reference.
+		 */
+		dma_fence_put(&f->base);
 		return;
+	}
 
-	/*
-	 * Cancel pending timeout work - if it races, it will
-	 * not get 'f->signaling' and return.
-	 */
-	cancel_delayed_work_sync(&f->timeout_work);
+	if (cancel_delayed_work(&f->timeout_work)) {
+		/*
+		 * We know that the timeout path will not be entered.
+		 * Safe to drop the timeout path's reference now.
+		 */
+		dma_fence_put(&f->base);
+	}
 
-	host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, false);
-
-	dma_fence_signal(&f->base);
+	dma_fence_signal_locked(&f->base);
 	dma_fence_put(&f->base);
 }
 
@@ -129,17 +98,24 @@ static void do_fence_timeout(struct work_struct *work)
 	struct host1x_syncpt_fence *f =
 		container_of(dwork, struct host1x_syncpt_fence, timeout_work);
 
-	if (atomic_xchg(&f->signaling, 1))
+	if (atomic_xchg(&f->signaling, 1)) {
+		/* Already on interrupt path, drop timeout path reference. */
+		dma_fence_put(&f->base);
 		return;
+	}
 
-	/*
-	 * Cancel pending timeout work - if it races, it will
-	 * not get 'f->signaling' and return.
-	 */
-	host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, true);
+	if (host1x_intr_remove_fence(f->sp->host, f)) {
+		/*
+		 * Managed to remove fence from queue, so it's safe to drop
+		 * the interrupt path's reference.
+		 */
+		dma_fence_put(&f->base);
+	}
 
 	dma_fence_set_error(&f->base, -ETIMEDOUT);
 	dma_fence_signal(&f->base);
+
+	/* Drop timeout path reference. */
 	dma_fence_put(&f->base);
 }
 
@@ -151,16 +127,10 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
 	if (!fence)
 		return ERR_PTR(-ENOMEM);
 
-	fence->waiter = kzalloc(sizeof(*fence->waiter), GFP_KERNEL);
-	if (!fence->waiter) {
-		kfree(fence);
-		return ERR_PTR(-ENOMEM);
-	}
-
 	fence->sp = sp;
 	fence->threshold = threshold;
 
-	dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &lock,
+	dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &sp->fences.lock,
 		       dma_fence_context_alloc(1), 0);
 
 	INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout);
diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h
index 70c91de8..4352d046 100644
--- a/drivers/gpu/host1x/fence.h
+++ b/drivers/gpu/host1x/fence.h
@@ -6,7 +6,23 @@
 #ifndef HOST1X_FENCE_H
 #define HOST1X_FENCE_H
 
-struct host1x_syncpt_fence;
+struct host1x_syncpt_fence {
+	struct dma_fence base;
+
+	atomic_t signaling;
+
+	struct host1x_syncpt *sp;
+	u32 threshold;
+
+	struct delayed_work timeout_work;
+
+	struct list_head list;
+};
+
+struct host1x_fence_list {
+	spinlock_t lock;
+	struct list_head list;
+};
 
 void host1x_fence_signal(struct host1x_syncpt_fence *fence);
 
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index 9acccdb1..b915ef7d 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -13,23 +13,6 @@
 #include "../intr.h"
 #include "../dev.h"
 
-/*
- * Sync point threshold interrupt service function
- * Handles sync point threshold triggers, in interrupt context
- */
-static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt)
-{
-	unsigned int id = syncpt->id;
-	struct host1x *host = syncpt->host;
-
-	host1x_sync_writel(host, BIT(id % 32),
-		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id / 32));
-	host1x_sync_writel(host, BIT(id % 32),
-		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32));
-
-	schedule_work(&syncpt->intr.work);
-}
-
 static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
 {
 	struct host1x *host = dev_id;
@@ -39,17 +22,20 @@ static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
 	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) {
 		reg = host1x_sync_readl(host,
 			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
-		for_each_set_bit(id, &reg, 32) {
-			struct host1x_syncpt *syncpt =
-				host->syncpt + (i * 32 + id);
-			host1x_intr_syncpt_handle(syncpt);
-		}
+
+		host1x_sync_writel(host, reg,
+			HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i));
+		host1x_sync_writel(host, reg,
+			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+
+		for_each_set_bit(id, &reg, 32)
+			host1x_intr_handle_interrupt(host, i * 32 + id);
 	}
 
 	return IRQ_HANDLED;
 }
 
-static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
+static void host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
 {
 	unsigned int i;
 
@@ -90,45 +76,38 @@ static void intr_hw_init(struct host1x *host, u32 cpm)
 }
 
 static int
-_host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
-			    void (*syncpt_thresh_work)(struct work_struct *))
+host1x_intr_init_host_sync(struct host1x *host, u32 cpm)
 {
-	unsigned int i;
 	int err;
 
 	host1x_hw_intr_disable_all_syncpt_intrs(host);
 
-	for (i = 0; i < host->info->nb_pts; i++)
-		INIT_WORK(&host->syncpt[i].intr.work, syncpt_thresh_work);
-
-	err = devm_request_irq(host->dev, host->intr_syncpt_irq,
+	err = devm_request_irq(host->dev, host->syncpt_irq,
 			       syncpt_thresh_isr, IRQF_SHARED,
 			       "host1x_syncpt", host);
-	if (err < 0) {
-		WARN_ON(1);
+	if (err < 0)
 		return err;
-	}
 
 	intr_hw_init(host, cpm);
 
 	return 0;
 }
 
-static void _host1x_intr_set_syncpt_threshold(struct host1x *host,
+static void host1x_intr_set_syncpt_threshold(struct host1x *host,
 					      unsigned int id,
 					      u32 thresh)
 {
 	host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id));
 }
 
-static void _host1x_intr_enable_syncpt_intr(struct host1x *host,
+static void host1x_intr_enable_syncpt_intr(struct host1x *host,
 					    unsigned int id)
 {
 	host1x_sync_writel(host, BIT(id % 32),
 		HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id / 32));
 }
 
-static void _host1x_intr_disable_syncpt_intr(struct host1x *host,
+static void host1x_intr_disable_syncpt_intr(struct host1x *host,
 					     unsigned int id)
 {
 	host1x_sync_writel(host, BIT(id % 32),
@@ -137,23 +116,10 @@ static void _host1x_intr_disable_syncpt_intr(struct host1x *host,
 		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32));
 }
 
-static int _host1x_free_syncpt_irq(struct host1x *host)
-{
-	unsigned int i;
-
-	devm_free_irq(host->dev, host->intr_syncpt_irq, host);
-
-	for (i = 0; i < host->info->nb_pts; i++)
-		cancel_work_sync(&host->syncpt[i].intr.work);
-
-	return 0;
-}
-
 static const struct host1x_intr_ops host1x_intr_ops = {
-	.init_host_sync = _host1x_intr_init_host_sync,
-	.set_syncpt_threshold = _host1x_intr_set_syncpt_threshold,
-	.enable_syncpt_intr = _host1x_intr_enable_syncpt_intr,
-	.disable_syncpt_intr = _host1x_intr_disable_syncpt_intr,
-	.disable_all_syncpt_intrs = _host1x_intr_disable_all_syncpt_intrs,
-	.free_syncpt_irq = _host1x_free_syncpt_irq,
+	.init_host_sync = host1x_intr_init_host_sync,
+	.set_syncpt_threshold = host1x_intr_set_syncpt_threshold,
+	.enable_syncpt_intr = host1x_intr_enable_syncpt_intr,
+	.disable_syncpt_intr = host1x_intr_disable_syncpt_intr,
+	.disable_all_syncpt_intrs = host1x_intr_disable_all_syncpt_intrs,
 };
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 965ba218..995bfa98 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -2,299 +2,113 @@
 /*
  * Tegra host1x Interrupt Management
  *
- * Copyright (c) 2010-2013, NVIDIA Corporation.
+ * Copyright (c) 2010-2021, NVIDIA Corporation.
  */
 
 #include <linux/clk.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/irq.h>
 
-#include <trace/events/host1x.h>
-#include "channel.h"
 #include "dev.h"
 #include "fence.h"
 #include "intr.h"
 
-/* Wait list management */
-
-enum waitlist_state {
-	WLS_PENDING,
-	WLS_REMOVED,
-	WLS_CANCELLED,
-	WLS_HANDLED
-};
-
-static void waiter_release(struct kref *kref)
+static void host1x_intr_add_fence_to_list(struct host1x_fence_list *list,
+					  struct host1x_syncpt_fence *fence)
 {
-	kfree(container_of(kref, struct host1x_waitlist, refcount));
+	struct host1x_syncpt_fence *fence_in_list;
+
+	list_for_each_entry_reverse(fence_in_list, &list->list, list) {
+		if ((s32)(fence_in_list->threshold - fence->threshold) <= 0) {
+			/* Fence in list is before us, we can insert here */
+			list_add(&fence->list, &fence_in_list->list);
+			return;
+		}
+	}
+
+	/* Add as first in list */
+	list_add(&fence->list, &list->list);
 }
 
-/*
- * add a waiter to a waiter queue, sorted by threshold
- * returns true if it was added at the head of the queue
- */
-static bool add_waiter_to_queue(struct host1x_waitlist *waiter,
-				struct list_head *queue)
+static void host1x_intr_update_hw_state(struct host1x *host, struct host1x_syncpt *sp)
 {
-	struct host1x_waitlist *pos;
-	u32 thresh = waiter->thresh;
+	struct host1x_syncpt_fence *fence;
 
-	list_for_each_entry_reverse(pos, queue, list)
-		if ((s32)(pos->thresh - thresh) <= 0) {
-			list_add(&waiter->list, &pos->list);
-			return false;
-		}
+	if (!list_empty(&sp->fences.list)) {
+		fence = list_first_entry(&sp->fences.list, struct host1x_syncpt_fence, list);
+
+		host1x_hw_intr_set_syncpt_threshold(host, sp->id, fence->threshold);
+		host1x_hw_intr_enable_syncpt_intr(host, sp->id);
+	} else {
+		host1x_hw_intr_disable_syncpt_intr(host, sp->id);
+	}
+}
+
+void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence)
+{
+	struct host1x_fence_list *fence_list = &fence->sp->fences;
+
+	INIT_LIST_HEAD(&fence->list);
+
+	host1x_intr_add_fence_to_list(fence_list, fence);
+	host1x_intr_update_hw_state(host, fence->sp);
+}
+
+bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence)
+{
+	struct host1x_fence_list *fence_list = &fence->sp->fences;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&fence_list->lock, irqflags);
+
+	if (list_empty(&fence->list)) {
+		spin_unlock_irqrestore(&fence_list->lock, irqflags);
+		return false;
+	}
+
+	list_del_init(&fence->list);
+	host1x_intr_update_hw_state(host, fence->sp);
+
+	spin_unlock_irqrestore(&fence_list->lock, irqflags);
 
-	list_add(&waiter->list, queue);
 	return true;
 }
 
-/*
- * run through a waiter queue for a single sync point ID
- * and gather all completed waiters into lists by actions
- */
-static void remove_completed_waiters(struct list_head *head, u32 sync,
-			struct list_head completed[HOST1X_INTR_ACTION_COUNT])
+void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id)
 {
-	struct list_head *dest;
-	struct host1x_waitlist *waiter, *next, *prev;
+	struct host1x_syncpt *sp = &host->syncpt[id];
+	struct host1x_syncpt_fence *fence, *tmp;
+	unsigned int value;
 
-	list_for_each_entry_safe(waiter, next, head, list) {
-		if ((s32)(waiter->thresh - sync) > 0)
+	value = host1x_syncpt_load(sp);
+
+	spin_lock(&sp->fences.lock);
+
+	list_for_each_entry_safe(fence, tmp, &sp->fences.list, list) {
+		if (((value - fence->threshold) & 0x80000000U) != 0U) {
+			/* Fence is not yet expired, we are done */
 			break;
-
-		dest = completed + waiter->action;
-
-		/* consolidate submit cleanups */
-		if (waiter->action == HOST1X_INTR_ACTION_SUBMIT_COMPLETE &&
-		    !list_empty(dest)) {
-			prev = list_entry(dest->prev,
-					  struct host1x_waitlist, list);
-			if (prev->data == waiter->data) {
-				prev->count++;
-				dest = NULL;
-			}
 		}
 
-		/* PENDING->REMOVED or CANCELLED->HANDLED */
-		if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) {
-			list_del(&waiter->list);
-			kref_put(&waiter->refcount, waiter_release);
-		} else
-			list_move_tail(&waiter->list, dest);
-	}
-}
-
-static void reset_threshold_interrupt(struct host1x *host,
-				      struct list_head *head,
-				      unsigned int id)
-{
-	u32 thresh =
-		list_first_entry(head, struct host1x_waitlist, list)->thresh;
-
-	host1x_hw_intr_set_syncpt_threshold(host, id, thresh);
-	host1x_hw_intr_enable_syncpt_intr(host, id);
-}
-
-static void action_submit_complete(struct host1x_waitlist *waiter)
-{
-	struct host1x_channel *channel = waiter->data;
-
-	host1x_cdma_update(&channel->cdma);
-
-	/*  Add nr_completed to trace */
-	trace_host1x_channel_submit_complete(dev_name(channel->dev),
-					     waiter->count, waiter->thresh);
-}
-
-static void action_wakeup(struct host1x_waitlist *waiter)
-{
-	wait_queue_head_t *wq = waiter->data;
-
-	wake_up(wq);
-}
-
-static void action_wakeup_interruptible(struct host1x_waitlist *waiter)
-{
-	wait_queue_head_t *wq = waiter->data;
-
-	wake_up_interruptible(wq);
-}
-
-static void action_signal_fence(struct host1x_waitlist *waiter)
-{
-	struct host1x_syncpt_fence *f = waiter->data;
-
-	host1x_fence_signal(f);
-}
-
-typedef void (*action_handler)(struct host1x_waitlist *waiter);
-
-static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
-	action_submit_complete,
-	action_wakeup,
-	action_wakeup_interruptible,
-	action_signal_fence,
-};
-
-static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT])
-{
-	struct list_head *head = completed;
-	unsigned int i;
-
-	for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) {
-		action_handler handler = action_handlers[i];
-		struct host1x_waitlist *waiter, *next;
-
-		list_for_each_entry_safe(waiter, next, head, list) {
-			list_del(&waiter->list);
-			handler(waiter);
-			WARN_ON(atomic_xchg(&waiter->state, WLS_HANDLED) !=
-				WLS_REMOVED);
-			kref_put(&waiter->refcount, waiter_release);
-		}
-	}
-}
-
-/*
- * Remove & handle all waiters that have completed for the given syncpt
- */
-static int process_wait_list(struct host1x *host,
-			     struct host1x_syncpt *syncpt,
-			     u32 threshold)
-{
-	struct list_head completed[HOST1X_INTR_ACTION_COUNT];
-	unsigned int i;
-	int empty;
-
-	for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i)
-		INIT_LIST_HEAD(completed + i);
-
-	spin_lock(&syncpt->intr.lock);
-
-	remove_completed_waiters(&syncpt->intr.wait_head, threshold,
-				 completed);
-
-	empty = list_empty(&syncpt->intr.wait_head);
-	if (empty)
-		host1x_hw_intr_disable_syncpt_intr(host, syncpt->id);
-	else
-		reset_threshold_interrupt(host, &syncpt->intr.wait_head,
-					  syncpt->id);
-
-	spin_unlock(&syncpt->intr.lock);
-
-	run_handlers(completed);
-
-	return empty;
-}
-
-/*
- * Sync point threshold interrupt service thread function
- * Handles sync point threshold triggers, in thread context
- */
-
-static void syncpt_thresh_work(struct work_struct *work)
-{
-	struct host1x_syncpt_intr *syncpt_intr =
-		container_of(work, struct host1x_syncpt_intr, work);
-	struct host1x_syncpt *syncpt =
-		container_of(syncpt_intr, struct host1x_syncpt, intr);
-	unsigned int id = syncpt->id;
-	struct host1x *host = syncpt->host;
-
-	(void)process_wait_list(host, syncpt,
-				host1x_syncpt_load(host->syncpt + id));
-}
-
-int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
-			   u32 thresh, enum host1x_intr_action action,
-			   void *data, struct host1x_waitlist *waiter,
-			   void **ref)
-{
-	int queue_was_empty;
-
-	if (waiter == NULL) {
-		pr_warn("%s: NULL waiter\n", __func__);
-		return -EINVAL;
+		list_del_init(&fence->list);
+		host1x_fence_signal(fence);
 	}
 
-	/* initialize a new waiter */
-	INIT_LIST_HEAD(&waiter->list);
-	kref_init(&waiter->refcount);
-	if (ref)
-		kref_get(&waiter->refcount);
-	waiter->thresh = thresh;
-	waiter->action = action;
-	atomic_set(&waiter->state, WLS_PENDING);
-	waiter->data = data;
-	waiter->count = 1;
+	/* Re-enable interrupt if necessary */
+	host1x_intr_update_hw_state(host, sp);
 
-	spin_lock(&syncpt->intr.lock);
-
-	queue_was_empty = list_empty(&syncpt->intr.wait_head);
-
-	if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) {
-		/* added at head of list - new threshold value */
-		host1x_hw_intr_set_syncpt_threshold(host, syncpt->id, thresh);
-
-		/* added as first waiter - enable interrupt */
-		if (queue_was_empty)
-			host1x_hw_intr_enable_syncpt_intr(host, syncpt->id);
-	}
-
-	if (ref)
-		*ref = waiter;
-
-	spin_unlock(&syncpt->intr.lock);
-
-	return 0;
+	spin_unlock(&sp->fences.lock);
 }
 
-void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
-			 bool flush)
-{
-	struct host1x_waitlist *waiter = ref;
-	struct host1x_syncpt *syncpt;
-
-	atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED);
-
-	syncpt = host->syncpt + id;
-
-	spin_lock(&syncpt->intr.lock);
-	if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) ==
-	    WLS_CANCELLED) {
-		list_del(&waiter->list);
-		kref_put(&waiter->refcount, waiter_release);
-	}
-	spin_unlock(&syncpt->intr.lock);
-
-	if (flush) {
-		/* Wait until any concurrently executing handler has finished. */
-		while (atomic_read(&waiter->state) != WLS_HANDLED)
-			schedule();
-	}
-
-	kref_put(&waiter->refcount, waiter_release);
-}
-
-int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
+int host1x_intr_init(struct host1x *host)
 {
 	unsigned int id;
-	u32 nb_pts = host1x_syncpt_nb_pts(host);
 
 	mutex_init(&host->intr_mutex);
-	host->intr_syncpt_irq = irq_sync;
 
-	for (id = 0; id < nb_pts; ++id) {
-		struct host1x_syncpt *syncpt = host->syncpt + id;
+	for (id = 0; id < host1x_syncpt_nb_pts(host); ++id) {
+		struct host1x_syncpt *syncpt = &host->syncpt[id];
 
-		spin_lock_init(&syncpt->intr.lock);
-		INIT_LIST_HEAD(&syncpt->intr.wait_head);
-		snprintf(syncpt->intr.thresh_irq_name,
-			 sizeof(syncpt->intr.thresh_irq_name),
-			 "host1x_sp_%02u", id);
+		spin_lock_init(&syncpt->fences.lock);
+		INIT_LIST_HEAD(&syncpt->fences.list);
 	}
 
 	return 0;
@@ -310,8 +124,7 @@ void host1x_intr_start(struct host1x *host)
 	int err;
 
 	mutex_lock(&host->intr_mutex);
-	err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000),
-					    syncpt_thresh_work);
+	err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000));
 	if (err) {
 		mutex_unlock(&host->intr_mutex);
 		return;
@@ -321,36 +134,5 @@ void host1x_intr_start(struct host1x *host)
 
 void host1x_intr_stop(struct host1x *host)
 {
-	unsigned int id;
-	struct host1x_syncpt *syncpt = host->syncpt;
-	u32 nb_pts = host1x_syncpt_nb_pts(host);
-
-	mutex_lock(&host->intr_mutex);
-
 	host1x_hw_intr_disable_all_syncpt_intrs(host);
-
-	for (id = 0; id < nb_pts; ++id) {
-		struct host1x_waitlist *waiter, *next;
-
-		list_for_each_entry_safe(waiter, next,
-			&syncpt[id].intr.wait_head, list) {
-			if (atomic_cmpxchg(&waiter->state,
-			    WLS_CANCELLED, WLS_HANDLED) == WLS_CANCELLED) {
-				list_del(&waiter->list);
-				kref_put(&waiter->refcount, waiter_release);
-			}
-		}
-
-		if (!list_empty(&syncpt[id].intr.wait_head)) {
-			/* output diagnostics */
-			mutex_unlock(&host->intr_mutex);
-			pr_warn("%s cannot stop syncpt intr id=%u\n",
-				__func__, id);
-			return;
-		}
-	}
-
-	host1x_hw_intr_free_syncpt_irq(host);
-
-	mutex_unlock(&host->intr_mutex);
 }
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
index e4c34609..3b5610b5 100644
--- a/drivers/gpu/host1x/intr.h
+++ b/drivers/gpu/host1x/intr.h
@@ -2,87 +2,17 @@
 /*
  * Tegra host1x Interrupt Management
  *
- * Copyright (c) 2010-2013, NVIDIA Corporation.
+ * Copyright (c) 2010-2021, NVIDIA Corporation.
  */
 
 #ifndef __HOST1X_INTR_H
 #define __HOST1X_INTR_H
 
-#include <linux/interrupt.h>
-#include <linux/workqueue.h>
-
-struct host1x_syncpt;
 struct host1x;
-
-enum host1x_intr_action {
-	/*
-	 * Perform cleanup after a submit has completed.
-	 * 'data' points to a channel
-	 */
-	HOST1X_INTR_ACTION_SUBMIT_COMPLETE = 0,
-
-	/*
-	 * Wake up a  task.
-	 * 'data' points to a wait_queue_head_t
-	 */
-	HOST1X_INTR_ACTION_WAKEUP,
-
-	/*
-	 * Wake up a interruptible task.
-	 * 'data' points to a wait_queue_head_t
-	 */
-	HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
-
-	HOST1X_INTR_ACTION_SIGNAL_FENCE,
-
-	HOST1X_INTR_ACTION_COUNT
-};
-
-struct host1x_syncpt_intr {
-	spinlock_t lock;
-	struct list_head wait_head;
-	char thresh_irq_name[12];
-	struct work_struct work;
-};
-
-struct host1x_waitlist {
-	struct list_head list;
-	struct kref refcount;
-	u32 thresh;
-	enum host1x_intr_action action;
-	atomic_t state;
-	void *data;
-	int count;
-};
-
-/*
- * Schedule an action to be taken when a sync point reaches the given threshold.
- *
- * @id the sync point
- * @thresh the threshold
- * @action the action to take
- * @data a pointer to extra data depending on action, see above
- * @waiter waiter structure - assumes ownership
- * @ref must be passed if cancellation is possible, else NULL
- *
- * This is a non-blocking api.
- */
-int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
-			   u32 thresh, enum host1x_intr_action action,
-			   void *data, struct host1x_waitlist *waiter,
-			   void **ref);
-
-/*
- * Unreference an action submitted to host1x_intr_add_action().
- * You must call this if you passed non-NULL as ref.
- * @ref the ref returned from host1x_intr_add_action()
- * @flush wait until any pending handlers have completed before returning.
- */
-void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
-			 bool flush);
+struct host1x_syncpt_fence;
 
 /* Initialize host1x sync point interrupt */
-int host1x_intr_init(struct host1x *host, unsigned int irq_sync);
+int host1x_intr_init(struct host1x *host);
 
 /* Deinitialize host1x sync point interrupt */
 void host1x_intr_deinit(struct host1x *host);
@@ -93,5 +23,10 @@ void host1x_intr_start(struct host1x *host);
 /* Disable host1x sync point interrupt */
 void host1x_intr_stop(struct host1x *host);
 
-irqreturn_t host1x_syncpt_thresh_fn(void *dev_id);
+void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id);
+
+void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence);
+
+bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence);
+
 #endif
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index d6a4e3fb..76e12137 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -14,6 +14,7 @@
 #include <linux/kref.h>
 #include <linux/sched.h>
 
+#include "fence.h"
 #include "intr.h"
 
 struct host1x;
@@ -39,7 +40,7 @@ struct host1x_syncpt {
 	struct host1x_syncpt_base *base;
 
 	/* interrupt data */
-	struct host1x_syncpt_intr intr;
+	struct host1x_fence_list fences;
 
 	/*
 	 * If a submission incrementing this syncpoint fails, lock it so that

From 25f41bc2123b7752d7b9c311de37e9cb3e9e86c7 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Wed, 21 Sep 2022 12:51:55 +0300
Subject: [PATCH 61/73] gpu: host1x: External timeout/cancellation for fences

Currently all fences have a 30 second timeout to ensure they are
cleaned up if the fence never completes otherwise. However, this
one size fits all solution doesn't actually fit in every case,
such as syncpoint waiting where we want to be able to have timeouts
longer than 30 seconds. As such, we want to be able to give control
over fence cancellation to the caller (and maybe eventually get rid
of the internal timeout altogether).

Here we add this cancellation mechanism by essentially adding a
function for entering the timeout path by function call, and changing
the syncpoint wait function to use it.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I4600544afe21efdd3f7d06362bd124130ddec3db
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2786637
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/fence.c                    | 40 +++++++++++++------
 drivers/gpu/host1x/fence.h                    |  1 +
 drivers/gpu/host1x/hw/channel_hw.c            |  2 +-
 .../gpu/host1x/include/linux/host1x-next.h    |  4 +-
 drivers/gpu/host1x/syncpt.c                   |  4 +-
 5 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index c77ce207..ac6e07c1 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -37,8 +37,7 @@ static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
 	if (host1x_syncpt_is_expired(sf->sp, sf->threshold))
 		return false;
 
-	/* One reference for interrupt path, one for timeout path. */
-	dma_fence_get(f);
+	/* Reference for interrupt path. */
 	dma_fence_get(f);
 
 	/*
@@ -46,11 +45,15 @@ static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
 	 * reference to any fences for which 'enable_signaling' has been
 	 * called (and that have not been signalled).
 	 *
-	 * We cannot (for now) normally guarantee that all fences get signalled.
-	 * As such, setup a timeout, so that long-lasting fences will get
-	 * reaped eventually.
+	 * We cannot currently always guarantee that all fences get signalled
+	 * or cancelled. As such, for such situations, set up a timeout, so
+	 * that long-lasting fences will get reaped eventually.
 	 */
-	schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000));
+	if (sf->timeout) {
+		/* Reference for timeout path. */
+		dma_fence_get(f);
+		schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000));
+	}
 
 	host1x_intr_add_fence_locked(sf->sp->host, sf);
 
@@ -80,7 +83,7 @@ void host1x_fence_signal(struct host1x_syncpt_fence *f)
 		return;
 	}
 
-	if (cancel_delayed_work(&f->timeout_work)) {
+	if (f->timeout && cancel_delayed_work(&f->timeout_work)) {
 		/*
 		 * We know that the timeout path will not be entered.
 		 * Safe to drop the timeout path's reference now.
@@ -99,8 +102,9 @@ static void do_fence_timeout(struct work_struct *work)
 		container_of(dwork, struct host1x_syncpt_fence, timeout_work);
 
 	if (atomic_xchg(&f->signaling, 1)) {
-		/* Already on interrupt path, drop timeout path reference. */
-		dma_fence_put(&f->base);
+		/* Already on interrupt path, drop timeout path reference if any. */
+		if (f->timeout)
+			dma_fence_put(&f->base);
 		return;
 	}
 
@@ -114,12 +118,12 @@ static void do_fence_timeout(struct work_struct *work)
 
 	dma_fence_set_error(&f->base, -ETIMEDOUT);
 	dma_fence_signal(&f->base);
-
-	/* Drop timeout path reference. */
-	dma_fence_put(&f->base);
+	if (f->timeout)
+		dma_fence_put(&f->base);
 }
 
-struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
+struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold,
+				      bool timeout)
 {
 	struct host1x_syncpt_fence *fence;
 
@@ -129,6 +133,7 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
 
 	fence->sp = sp;
 	fence->threshold = threshold;
+	fence->timeout = timeout;
 
 	dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &sp->fences.lock,
 		       dma_fence_context_alloc(1), 0);
@@ -154,3 +159,12 @@ int host1x_fence_extract(struct dma_fence *fence, u32 *id, u32 *threshold)
 	return 0;
 }
 EXPORT_SYMBOL(host1x_fence_extract);
+
+void host1x_fence_cancel(struct dma_fence *f)
+{
+	struct host1x_syncpt_fence *sf = to_host1x_fence(f);
+
+	schedule_delayed_work(&sf->timeout_work, 0);
+	flush_delayed_work(&sf->timeout_work);
+}
+EXPORT_SYMBOL(host1x_fence_cancel);
diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h
index 4352d046..f3c644c7 100644
--- a/drivers/gpu/host1x/fence.h
+++ b/drivers/gpu/host1x/fence.h
@@ -13,6 +13,7 @@ struct host1x_syncpt_fence {
 
 	struct host1x_syncpt *sp;
 	u32 threshold;
+	bool timeout;
 
 	struct delayed_work timeout_work;
 
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 00afe8fb..d8dff41e 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -328,7 +328,7 @@ static int channel_submit(struct host1x_job *job)
 	 * Create fence before submitting job to HW to avoid job completing
 	 * before the fence is set up.
 	 */
-	job->fence = host1x_fence_create(sp, syncval);
+	job->fence = host1x_fence_create(sp, syncval, true);
 	if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) {
 		job->fence = NULL;
 	} else {
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index f4b986cb..6fc0bb28 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -226,8 +226,10 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base);
 void host1x_syncpt_release_vblank_reservation(struct host1x_client *client,
 					      u32 syncpt_id);
 
-struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold);
+struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold,
+				      bool timeout);
 int host1x_fence_extract(struct dma_fence *fence, u32 *id, u32 *threshold);
+void host1x_fence_cancel(struct dma_fence *fence);
 
 /*
  * host1x channel
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 75f58ec2..2d200776 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -236,11 +236,13 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 	else if (timeout == 0)
 		return -EAGAIN;
 
-	fence = host1x_fence_create(sp, thresh);
+	fence = host1x_fence_create(sp, thresh, false);
 	if (IS_ERR(fence))
 		return PTR_ERR(fence);
 
 	wait_err = dma_fence_wait_timeout(fence, true, timeout);
+	if (wait_err == 0)
+		host1x_fence_cancel(fence);
 	dma_fence_put(fence);
 
 	if (value)

From f6865a3c2d18f3212b0e67cce7226bc7edebbc9f Mon Sep 17 00:00:00 2001
From: Nikesh Oswal <noswal@nvidia.com>
Date: Wed, 2 Nov 2022 12:37:22 +0000
Subject: [PATCH 62/73] gpu: host1x: allow tsec streamid reg access via MMIO

Set Host1X registers to allow both the TSEC StreamID
registers to be programmed via MMIO from the TSEC driver

Bug 3817626

Change-Id: Ic344ecbca557d4c058accc8db1a9f874945e8280
Signed-off-by: Nikesh Oswal <noswal@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2801852
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 46f143bc..9b375fff 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -388,6 +388,12 @@ static const struct host1x_sid_entry tegra234_sid_table[] = {
 		.offset = 0x34,
 		.limit = 0x34,
 	},
+	{
+		/* TSEC MMIO */
+		.base = 0x16a8,
+		.offset = 0x30,
+		.limit = 0x34,
+	},
 };
 
 static const struct host1x_info host1x08_info = {

From a351c7681eb3a0db19a7c7ae8f77f39c93549c95 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 1 Nov 2022 12:01:19 +0200
Subject: [PATCH 63/73] gpu: host1x: Timestamp syncpoint wait completions

Collect CLOCK_MONOTONIC timestamps in the interrupt handler when
a syncpoint wait completes, and report it back in dma_fences and
syncpoint wait UAPI.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I9f783833698df7d96c99c9ffef3205aa82adceb5
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2801167
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/fence.c                    |  4 +--
 drivers/gpu/host1x/fence.h                    |  2 +-
 drivers/gpu/host1x/hw/intr_hw.c               |  6 +++-
 .../gpu/host1x/include/linux/host1x-next.h    |  3 ++
 drivers/gpu/host1x/intr.c                     |  4 +--
 drivers/gpu/host1x/intr.h                     |  4 ++-
 drivers/gpu/host1x/syncpt.c                   | 28 ++++++++++++++++---
 7 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index ac6e07c1..4374e249 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -72,7 +72,7 @@ const struct dma_fence_ops host1x_syncpt_fence_ops = {
 	.enable_signaling = host1x_syncpt_fence_enable_signaling,
 };
 
-void host1x_fence_signal(struct host1x_syncpt_fence *f)
+void host1x_fence_signal(struct host1x_syncpt_fence *f, ktime_t ts)
 {
 	if (atomic_xchg(&f->signaling, 1)) {
 		/*
@@ -91,7 +91,7 @@ void host1x_fence_signal(struct host1x_syncpt_fence *f)
 		dma_fence_put(&f->base);
 	}
 
-	dma_fence_signal_locked(&f->base);
+	dma_fence_signal_timestamp_locked(&f->base, ts);
 	dma_fence_put(&f->base);
 }
 
diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h
index f3c644c7..0a2e6b3f 100644
--- a/drivers/gpu/host1x/fence.h
+++ b/drivers/gpu/host1x/fence.h
@@ -25,6 +25,6 @@ struct host1x_fence_list {
 	struct list_head list;
 };
 
-void host1x_fence_signal(struct host1x_syncpt_fence *fence);
+void host1x_fence_signal(struct host1x_syncpt_fence *fence, ktime_t ts);
 
 #endif
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index b915ef7d..d6cdc489 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -9,6 +9,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/timekeeping.h>
 
 #include "../intr.h"
 #include "../dev.h"
@@ -18,6 +19,9 @@ static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
 	struct host1x *host = dev_id;
 	unsigned long reg;
 	unsigned int i, id;
+	ktime_t ts;
+
+	ts = ktime_get();
 
 	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) {
 		reg = host1x_sync_readl(host,
@@ -29,7 +33,7 @@ static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
 			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
 
 		for_each_set_bit(id, &reg, 32)
-			host1x_intr_handle_interrupt(host, i * 32 + id);
+			host1x_intr_handle_interrupt(host, i * 32 + id, ts);
 	}
 
 	return IRQ_HANDLED;
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 6fc0bb28..b9ab6475 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -10,6 +10,7 @@
 #include <linux/dma-direction.h>
 #include <linux/dma-fence.h>
 #include <linux/spinlock.h>
+#include <linux/timekeeping.h>
 #include <linux/types.h>
 
 enum host1x_class {
@@ -211,6 +212,8 @@ u32 host1x_syncpt_read_max(struct host1x_syncpt *sp);
 u32 host1x_syncpt_read(struct host1x_syncpt *sp);
 int host1x_syncpt_incr(struct host1x_syncpt *sp);
 u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs);
+int host1x_syncpt_wait_ts(struct host1x_syncpt *sp, u32 thresh, long timeout,
+			  u32 *value, ktime_t *ts);
 int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 		       u32 *value);
 struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client,
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 995bfa98..6f4b1820 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -72,7 +72,7 @@ bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *f
 	return true;
 }
 
-void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id)
+void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id, ktime_t ts)
 {
 	struct host1x_syncpt *sp = &host->syncpt[id];
 	struct host1x_syncpt_fence *fence, *tmp;
@@ -89,7 +89,7 @@ void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id)
 		}
 
 		list_del_init(&fence->list);
-		host1x_fence_signal(fence);
+		host1x_fence_signal(fence, ts);
 	}
 
 	/* Re-enable interrupt if necessary */
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
index 3b5610b5..e4f307ab 100644
--- a/drivers/gpu/host1x/intr.h
+++ b/drivers/gpu/host1x/intr.h
@@ -8,6 +8,8 @@
 #ifndef __HOST1X_INTR_H
 #define __HOST1X_INTR_H
 
+#include <linux/timekeeping.h>
+
 struct host1x;
 struct host1x_syncpt_fence;
 
@@ -23,7 +25,7 @@ void host1x_intr_start(struct host1x *host);
 /* Disable host1x sync point interrupt */
 void host1x_intr_stop(struct host1x *host);
 
-void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id);
+void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id, ktime_t ts);
 
 void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence);
 
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 2d200776..098ff002 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -9,6 +9,7 @@
 #include <linux/device.h>
 #include <linux/dma-fence.h>
 #include <linux/slab.h>
+#include <linux/timekeeping.h>
 
 #include <trace/events/host1x.h>
 
@@ -211,14 +212,15 @@ int host1x_syncpt_incr(struct host1x_syncpt *sp)
 EXPORT_SYMBOL(host1x_syncpt_incr);
 
 /**
- * host1x_syncpt_wait() - wait for a syncpoint to reach a given value
+ * host1x_syncpt_wait_ts() - wait for a syncpoint to reach a given value
  * @sp: host1x syncpoint
  * @thresh: threshold
  * @timeout: maximum time to wait for the syncpoint to reach the given value
  * @value: return location for the syncpoint value
+ * @ts: return location for completion timestamp
  */
-int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
-		       u32 *value)
+int host1x_syncpt_wait_ts(struct host1x_syncpt *sp, u32 thresh, long timeout, u32 *value,
+			  ktime_t *ts)
 {
 	struct dma_fence *fence;
 	long wait_err;
@@ -227,6 +229,8 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 
 	if (value)
 		*value = host1x_syncpt_load(sp);
+	if (ts)
+		*ts = ktime_get();
 
 	if (host1x_syncpt_is_expired(sp, thresh))
 		return 0;
@@ -243,10 +247,13 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 	wait_err = dma_fence_wait_timeout(fence, true, timeout);
 	if (wait_err == 0)
 		host1x_fence_cancel(fence);
-	dma_fence_put(fence);
 
 	if (value)
 		*value = host1x_syncpt_load(sp);
+	if (ts)
+		*ts = fence->timestamp;
+
+	dma_fence_put(fence);
 
 	if (wait_err == 0)
 		return -EAGAIN;
@@ -255,6 +262,19 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 	else
 		return 0;
 }
+EXPORT_SYMBOL(host1x_syncpt_wait_ts);
+
+/**
+ * host1x_syncpt_wait() - wait for a syncpoint to reach a given value
+ * @sp: host1x syncpoint
+ * @thresh: threshold
+ * @timeout: maximum time to wait for the syncpoint to reach the given value
+ * @value: return location for the syncpoint value
+ */
+int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, u32 *value)
+{
+	return host1x_syncpt_wait_ts(sp, thresh, timeout, value, NULL);
+}
 EXPORT_SYMBOL(host1x_syncpt_wait);
 
 /*

From 283b2ed68bbabb1fedca5a2ef6e99e42995aff6c Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Fri, 11 Nov 2022 13:07:12 +0200
Subject: [PATCH 64/73] gpu: host1x: Add stream ID protection table entries for
 camera engines

Add entries for camera engines (VI,VI2,VI_THI,VI2_THI,ISP_THI) in
the stream ID protection table.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: If65fbbd63f2b4eb2bf56b3a696e570f87c010d87
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2807581
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 9b375fff..dba0763f 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -394,6 +394,42 @@ static const struct host1x_sid_entry tegra234_sid_table[] = {
 		.offset = 0x30,
 		.limit = 0x34,
 	},
+	{
+		/* VI MMIO */
+		.base = 0x16b8,
+		.offset = 0x800,
+		.limit = 0x800,
+	},
+	{
+		/* VI_THI MMIO */
+		.base = 0x16c0,
+		.offset = 0x30,
+		.limit = 0x34,
+	},
+	{
+		/* ISP MMIO */
+		.base = 0x1680,
+		.offset = 0x800,
+		.limit = 0x800,
+	},
+	{
+		/* ISP_THI MMIO */
+		.base = 0x16c8,
+		.offset = 0x30,
+		.limit = 0x34,
+	},
+	{
+		/* VI2 MMIO */
+		.base = 0x16f0,
+		.offset = 0x800,
+		.limit = 0x800,
+	},
+	{
+		/* VI2_THI MMIO */
+		.base = 0x16f8,
+		.offset = 0x30,
+		.limit = 0x34,
+	},
 };
 
 static const struct host1x_info host1x08_info = {

From fb56e97a0ffd567e2c30c63cdf5c590319784131 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Wed, 7 Sep 2022 11:38:42 +0300
Subject: [PATCH 65/73] gpu: host1x: Select context device based on attached
 IOMMU

On Tegra234, engines that are programmed through Host1x channels can
be attached to either the NISO0 or NISO1 SMMU. Because of that, when
selecting a context device to use with an engine, we need to select
one that is also attached to the same SMMU.

Add a parameter to host1x_memory_context_alloc to specify which device
we are allocating a context for, and use it to pick an appropriate
context device.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Change-Id: I32af312c85164b72c14409d816d3b50ad5c7bfe5
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2811836
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/host1x/context.c                   | 4 ++++
 drivers/gpu/host1x/include/linux/host1x-next.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c
index b08cf11f..c8e7994c 100644
--- a/drivers/gpu/host1x/context.c
+++ b/drivers/gpu/host1x/context.c
@@ -104,6 +104,7 @@ void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl)
 }
 
 struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x,
+							  struct device *dev,
 							  struct pid *pid)
 {
 	struct host1x_memory_context_list *cdl = &host1x->context_list;
@@ -118,6 +119,9 @@ struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x,
 	for (i = 0; i < cdl->len; i++) {
 		struct host1x_memory_context *cd = &cdl->devs[i];
 
+		if (cd->dev.iommu->iommu_dev != dev->iommu->iommu_dev)
+			continue;
+
 		if (cd->owner == pid) {
 			refcount_inc(&cd->ref);
 			mutex_unlock(&cdl->lock);
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index b9ab6475..69a4044e 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -485,6 +485,7 @@ struct host1x_memory_context {
 
 #ifdef CONFIG_IOMMU_API
 struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x,
+							  struct device *dev,
 							  struct pid *pid);
 void host1x_memory_context_get(struct host1x_memory_context *cd);
 void host1x_memory_context_put(struct host1x_memory_context *cd);

From e0a90be04b2026097201d7c8428fabf408b7c71a Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Fri, 18 Nov 2022 13:39:12 +0200
Subject: [PATCH 66/73] gpu: host1x: Support for running as guest

Add support for running as a guest system under a hypervisor, using
Host1x HW's virtualization capabilities.

In effect this involves not touching apertures other than the 'vm'
aperture, and channels and syncpoints other than those that are
assigned to the VM.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: Ideec5b0b9a692aa3ee6b4a0240c5755c983cb7bd
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2811837
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/host1x/channel.c    |  6 ++--
 drivers/gpu/host1x/dev.c        | 52 +++++++++++++++++++++++++++++----
 drivers/gpu/host1x/dev.h        |  4 +++
 drivers/gpu/host1x/hw/cdma_hw.c |  7 +++++
 drivers/gpu/host1x/syncpt.c     | 27 ++++++++++++-----
 5 files changed, 81 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 2d0051d6..169e0d86 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -101,11 +101,11 @@ EXPORT_SYMBOL(host1x_channel_put);
 static struct host1x_channel *acquire_unused_channel(struct host1x *host)
 {
 	struct host1x_channel_list *chlist = &host->channel_list;
-	unsigned int max_channels = host->info->nb_channels;
 	unsigned int index;
 
-	index = find_first_zero_bit(chlist->allocated_channels, max_channels);
-	if (index >= max_channels) {
+	index = find_next_zero_bit(chlist->allocated_channels,
+		host->num_channels, host->channel_base);
+	if (index >= host->num_channels) {
 		dev_err(host->dev, "failed to find free channel\n");
 		return NULL;
 	}
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index dba0763f..4b90f58f 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -469,7 +469,7 @@ static void host1x_setup_virtualization_tables(struct host1x *host)
 	const struct host1x_info *info = host->info;
 	unsigned int i;
 
-	if (!info->has_hypervisor)
+	if (!host->hv_regs)
 		return;
 
 	for (i = 0; i < info->num_sid_entries; i++) {
@@ -673,6 +673,39 @@ static int host1x_get_resets(struct host1x *host)
 	return 0;
 }
 
+static int host1x_get_assigned_resources(struct host1x *host)
+{
+	struct device_node *np = host->dev->of_node;
+	u32 vals[2];
+	int err;
+
+	err = of_property_read_u32_array(np, "nvidia,channels", vals, 2);
+	if (err == 0) {
+		host->channel_base = vals[0];
+		host->num_channels = vals[1];
+	} else if (err == -EINVAL) {
+		host->channel_base = 0;
+		host->num_channels = host->info->nb_channels;
+	} else {
+		dev_err(host->dev, "invalid nvidia,channels property: %d\n", err);
+		return err;
+	}
+
+	err = of_property_read_u32_array(np, "nvidia,syncpoints", vals, 2);
+	if (err == 0) {
+		host->syncpt_base = vals[0];
+		host->syncpt_end = vals[0] + vals[1];
+	} else if (err == -EINVAL) {
+		host->syncpt_base = 0;
+		host->syncpt_end = host->info->nb_pts;
+	} else {
+		dev_err(host->dev, "invalid nvidia,syncpoints property: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
 static int host1x_probe(struct platform_device *pdev)
 {
 	struct host1x *host;
@@ -686,15 +719,20 @@ static int host1x_probe(struct platform_device *pdev)
 	host->info = of_device_get_match_data(&pdev->dev);
 
 	if (host->info->has_hypervisor) {
+		struct resource *res;
+
 		host->regs = devm_platform_ioremap_resource_byname(pdev, "vm");
 		if (IS_ERR(host->regs))
 			return PTR_ERR(host->regs);
 
-		host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor");
-		if (IS_ERR(host->hv_regs))
-			return PTR_ERR(host->hv_regs);
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "hypervisor");
+		if (res) {
+			host->hv_regs = devm_ioremap_resource(&pdev->dev, res);
+			if (IS_ERR(host->hv_regs))
+				return PTR_ERR(host->hv_regs);
+		}
 
-		if (host->info->has_common) {
+		if (res && host->info->has_common) {
 			host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common");
 			if (IS_ERR(host->common_regs))
 				return PTR_ERR(host->common_regs);
@@ -726,6 +764,10 @@ static int host1x_probe(struct platform_device *pdev)
 			return err;
 	}
 
+	err = host1x_get_assigned_resources(host);
+	if (err)
+		return err;
+
 	host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(host->clk)) {
 		err = PTR_ERR(host->clk);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 75de50fe..87e8c273 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -132,6 +132,10 @@ struct host1x {
 	struct reset_control_bulk_data resets[2];
 	unsigned int nresets;
 
+	/* Resources accessible by this VM */
+	unsigned int syncpt_base, syncpt_end;
+	unsigned int channel_base, num_channels;
+
 	struct iommu_group *group;
 	struct iommu_domain *domain;
 	struct iova_domain iova;
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index 96f341ad..b681a03d 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -253,6 +253,13 @@ static void timeout_release_mlock(struct host1x_cdma *cdma)
 	struct host1x *host1x = cdma_to_host1x(cdma);
 	u32 offset;
 
+	/*
+	 * On virtualized systems, we rely on the hypervisor to release
+	 * the MLOCK.
+	 */
+	if (!host1x->common_regs)
+		return;
+
 	switch (ch->client->class) {
 	case HOST1X_CLASS_VIC:
 		offset = HOST1X_COMMON_VIC_MLOCK;
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 098ff002..26b59b31 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -69,10 +69,10 @@ struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
 
 	mutex_lock(&host->syncpt_mutex);
 
-	for (i = 0; i < host->info->nb_pts && kref_read(&sp->ref); i++, sp++)
+	for (i = host->syncpt_base; i < host->syncpt_end && kref_read(&sp->ref); i++, sp++)
 		;
 
-	if (i >= host->info->nb_pts)
+	if (i >= host->syncpt_end)
 		goto unlock;
 
 	if (flags & HOST1X_SYNCPT_HAS_BASE) {
@@ -139,7 +139,7 @@ void host1x_syncpt_restore(struct host1x *host)
 	struct host1x_syncpt *sp_base = host->syncpt;
 	unsigned int i;
 
-	for (i = 0; i < host1x_syncpt_nb_pts(host); i++) {
+	for (i = host->syncpt_base; i < host->syncpt_end; i++) {
 		/*
 		 * Unassign syncpt from channels for purposes of Tegra186
 		 * syncpoint protection. This prevents any channel from
@@ -310,6 +310,13 @@ int host1x_syncpt_init(struct host1x *host)
 	for (i = 0; i < host->info->nb_pts; i++) {
 		syncpt[i].id = i;
 		syncpt[i].host = host;
+
+		/*
+		 * Make syncpoints client managed by default, so that
+		 * we don't try to compare to max_val for e.g. syncpoints
+		 * owner by other VMs.
+		 */
+		syncpt[i].client_managed = true;
 	}
 
 	for (i = 0; i < host->info->nb_bases; i++)
@@ -319,10 +326,16 @@ int host1x_syncpt_init(struct host1x *host)
 	host->syncpt = syncpt;
 	host->bases = bases;
 
-	/* Allocate sync point to use for clearing waits for expired fences */
-	host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop");
-	if (!host->nop_sp)
-		return -ENOMEM;
+	/*
+	 * Allocate sync point to use for clearing waits for expired fences.
+	 * On virtualized systems where syncpt_base is nonzero, we don't need
+	 * this for anything.
+	 */
+	if (host->syncpt_base == 0) {
+		host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop");
+		if (!host->nop_sp)
+			return -ENOMEM;
+	}
 
 	if (host->info->reserve_vblank_syncpts) {
 		kref_init(&host->syncpt[26].ref);

From de0a5c2c113b518bd22fd8961e93a54f2729ef69 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Thu, 8 Dec 2022 12:31:41 +0200
Subject: [PATCH 67/73] gpu: host1x: Support for syncpoint pools

Add support for syncpoint pools. These are configuration-dependent
subsets of syncpoints that can only be allocated by specifying
an allocation specifically from that pool.

In this patch, we add support for the GPU pool. On certain systems,
the GPU is for safety purposes limited to accessing a specific set
of syncpoints. Therefore, syncpoints allocated for GPU use need to
come from this pool.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I385b8bdfdac8573b26f0b1a0feaf05de071148a1
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2826198
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Arto Merilainen <amerilainen@nvidia.com>
Reviewed-by: Sanif Veeras <sveeras@nvidia.com>
Reviewed-by: Raghavendra Vishnu Kumar <rvk@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
Tested-by: Sanif Veeras <sveeras@nvidia.com>
---
 drivers/gpu/host1x/dev.c                      | 44 +++++++++++++++++++
 drivers/gpu/host1x/dev.h                      | 10 +++++
 .../gpu/host1x/include/linux/host1x-next.h    |  1 +
 drivers/gpu/host1x/syncpt.c                   | 36 ++++++++++++---
 drivers/gpu/host1x/syncpt.h                   |  1 +
 5 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 4b90f58f..424c8a67 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -706,6 +706,46 @@ static int host1x_get_assigned_resources(struct host1x *host)
 	return 0;
 }
 
+static int host1x_get_syncpt_pools(struct host1x *host)
+{
+	struct device_node *np = host->dev->of_node;
+	int err, i;
+
+	err = of_property_count_strings(np, "nvidia,syncpoint-pool-names");
+	if (err == -EINVAL)
+		return 0;
+	if (err < 0) {
+		dev_err(host->dev, "invalid nvidia,syncpoint-pool-names property: %d\n", err);
+		return err;
+	}
+
+	host->num_pools = err;
+	host->pools = devm_kcalloc(host->dev, host->num_pools, sizeof(struct host1x_syncpt_pool),
+				   GFP_KERNEL);
+	if (!host->pools)
+		return -ENOMEM;
+
+	for (i = 0; i < host->num_pools; i++) {
+		struct host1x_syncpt_pool *pool = &host->pools[i];
+
+		err = of_property_read_string_index(np, "nvidia,syncpoint-pool-names", i, &pool->name);
+		if (err)
+			return err;
+
+		err = of_property_read_u32_index(np, "nvidia,syncpoint-pools", i*2+0, &pool->base);
+		if (!err)
+			err = of_property_read_u32_index(np, "nvidia,syncpoint-pools", i*2+1, &pool->end);
+		if (err) {
+			dev_err(host->dev, "invalid nvidia,syncpoint-pools property: %d\n", err);
+			return err;
+		}
+
+		pool->end = pool->base + pool->end;
+	}
+
+	return 0;
+}
+
 static int host1x_probe(struct platform_device *pdev)
 {
 	struct host1x *host;
@@ -768,6 +808,10 @@ static int host1x_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
+	err = host1x_get_syncpt_pools(host);
+	if (err)
+		return err;
+
 	host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(host->clk)) {
 		err = PTR_ERR(host->clk);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 87e8c273..25e3f770 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -118,6 +118,12 @@ struct host1x_info {
 	bool reserve_vblank_syncpts;
 };
 
+struct host1x_syncpt_pool {
+	const char *name;
+	unsigned int base;
+	unsigned int end;
+};
+
 struct host1x {
 	const struct host1x_info *info;
 
@@ -136,6 +142,10 @@ struct host1x {
 	unsigned int syncpt_base, syncpt_end;
 	unsigned int channel_base, num_channels;
 
+	/* Restricted syncpoint pools */
+	struct host1x_syncpt_pool *pools;
+	unsigned int num_pools;
+
 	struct iommu_group *group;
 	struct iommu_domain *domain;
 	struct iova_domain iova;
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 69a4044e..3e67ed56 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -198,6 +198,7 @@ static inline void host1x_bo_munmap(struct host1x_bo *bo, void *addr)
 
 #define HOST1X_SYNCPT_CLIENT_MANAGED	(1 << 0)
 #define HOST1X_SYNCPT_HAS_BASE		(1 << 1)
+#define HOST1X_SYNCPT_GPU		(1 << 2)
 
 struct host1x_syncpt_base;
 struct host1x_syncpt;
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 26b59b31..9375000f 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -60,17 +60,34 @@ struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
 					  unsigned long flags,
 					  const char *name)
 {
-	struct host1x_syncpt *sp = host->syncpt;
+	struct host1x_syncpt *sp = host->syncpt + host->syncpt_base;
+	struct host1x_syncpt_pool *pool = NULL;
 	char *full_name;
 	unsigned int i;
 
 	if (!name)
 		return NULL;
 
+	if (flags & HOST1X_SYNCPT_GPU) {
+		for (i = 0; i < host->num_pools; i++) {
+			if (!strcmp(host->pools[i].name, "gpu")) {
+				pool = &host->pools[i];
+				break;
+			}
+		}
+
+		/* If no GPU pool configured, any syncpoint is OK. */
+	}
+
 	mutex_lock(&host->syncpt_mutex);
 
-	for (i = host->syncpt_base; i < host->syncpt_end && kref_read(&sp->ref); i++, sp++)
-		;
+	for (i = host->syncpt_base; i < host->syncpt_end; i++, sp++) {
+		if (sp->pool != pool)
+			continue;
+
+		if (kref_read(&sp->ref) == 0)
+			break;
+	}
 
 	if (i >= host->syncpt_end)
 		goto unlock;
@@ -322,6 +339,14 @@ int host1x_syncpt_init(struct host1x *host)
 	for (i = 0; i < host->info->nb_bases; i++)
 		bases[i].id = i;
 
+	for (i = 0; i < host->num_pools; i++) {
+		struct host1x_syncpt_pool *pool = &host->pools[i];
+		unsigned int j;
+
+		for (j = pool->base; j < pool->end; j++)
+			syncpt[j].pool = pool;
+	}
+
 	mutex_init(&host->syncpt_mutex);
 	host->syncpt = syncpt;
 	host->bases = bases;
@@ -332,9 +357,8 @@ int host1x_syncpt_init(struct host1x *host)
 	 * this for anything.
 	 */
 	if (host->syncpt_base == 0) {
-		host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop");
-		if (!host->nop_sp)
-			return -ENOMEM;
+		kref_init(&syncpt[0].ref);
+		syncpt[0].name = kstrdup("reserved", GFP_KERNEL);
 	}
 
 	if (host->info->reserve_vblank_syncpts) {
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index 76e12137..03a686f1 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -38,6 +38,7 @@ struct host1x_syncpt {
 	bool client_managed;
 	struct host1x *host;
 	struct host1x_syncpt_base *base;
+	struct host1x_syncpt_pool *pool;
 
 	/* interrupt data */
 	struct host1x_fence_list fences;

From 684ae650bffd888d62140f937467a49f1af6e049 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 27 Dec 2022 17:16:54 +0200
Subject: [PATCH 68/73] gpu: host1x: Fix mask for syncpoint increment register

On Tegra186+, the syncpoint ID has 10 bits of space. To allow
using more than 256 syncpoints, fix the mask.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I30a220432e6299a91ad667ad4289c444293c1e63
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2833918
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Santosh BS <santoshb@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/hw/hw_host1x06_uclass.h | 2 +-
 drivers/gpu/host1x/hw/hw_host1x07_uclass.h | 2 +-
 drivers/gpu/host1x/hw/hw_host1x08_uclass.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
index 5f831438..50c32de4 100644
--- a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
@@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
 	host1x_uclass_incr_syncpt_cond_f(v)
 static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
 {
-	return (v & 0xff) << 0;
+	return (v & 0x3ff) << 0;
 }
 #define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
 	host1x_uclass_incr_syncpt_indx_f(v)
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
index 8cd2ef08..887b878f 100644
--- a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
@@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
 	host1x_uclass_incr_syncpt_cond_f(v)
 static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
 {
-	return (v & 0xff) << 0;
+	return (v & 0x3ff) << 0;
 }
 #define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
 	host1x_uclass_incr_syncpt_indx_f(v)
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
index 724cccd7..4fb1d090 100644
--- a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
@@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
 	host1x_uclass_incr_syncpt_cond_f(v)
 static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
 {
-	return (v & 0xff) << 0;
+	return (v & 0x3ff) << 0;
 }
 #define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
 	host1x_uclass_incr_syncpt_indx_f(v)

From ff2271fec339aebede9629573571ec2fdbd6d9fd Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 27 Dec 2022 17:24:24 +0200
Subject: [PATCH 69/73] gpu: host1x: Assign syncpoints to channels on
 virtualized systems

The code to write the syncpoint channel assignment register
incorrectly skips the write if hypervisor registers are not available.

The register, however, is within the guest aperture so remove the
check and assign syncpoints properly on virtualized systems.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I9a0c8ed42a56c445d392e1ed2b670091be6654fb
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2833919
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Santosh BS <santoshb@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/hw/syncpt_hw.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
index dd39d67c..8cf35b2e 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,9 +106,6 @@ static void syncpt_assign_to_channel(struct host1x_syncpt *sp,
 #if HOST1X_HW >= 6
 	struct host1x *host = sp->host;
 
-	if (!host->hv_regs)
-		return;
-
 	host1x_sync_writel(host,
 			   HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
 			   HOST1X_SYNC_SYNCPT_CH_APP(sp->id));

From 3d26b00c171a169a269579aae0b55032c7b8ee76 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 27 Jan 2023 23:14:00 +0100
Subject: [PATCH 70/73] UPSTREAM: gpu: host1x: Fix uninitialized variable use

The error handling for platform_get_irq() failing no longer works after
a recent change, clang now points this out with a warning:

drivers/gpu/host1x/dev.c:520:6: error: variable 'syncpt_irq' is uninitialized when used here [-Werror,-Wuninitialized]
        if (syncpt_irq < 0)
            ^~~~~~~~~~

Fix this by removing the variable and checking the correct error status.

Fixes: 625d4ffb438c ("gpu: host1x: Rewrite syncpoint interrupt handling")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reported-by: "kernelci.org bot" <bot@kernelci.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I780c935101b8c65070eeba3552f96a1bfb109592
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2853776
Reviewed-by: Santosh BS <santoshb@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 424c8a67..2ea38483 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -749,7 +749,6 @@ static int host1x_get_syncpt_pools(struct host1x *host)
 static int host1x_probe(struct platform_device *pdev)
 {
 	struct host1x *host;
-	int syncpt_irq;
 	int err;
 
 	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
@@ -784,8 +783,8 @@ static int host1x_probe(struct platform_device *pdev)
 	}
 
 	host->syncpt_irq = platform_get_irq(pdev, 0);
-	if (syncpt_irq < 0)
-		return syncpt_irq;
+	if (host->syncpt_irq < 0)
+		return host->syncpt_irq;
 
 	mutex_init(&host->devices_lock);
 	INIT_LIST_HEAD(&host->devices);

From 4207a59049ee6abf32253425394467052866be71 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Mon, 13 Feb 2023 16:53:23 +0200
Subject: [PATCH 71/73] gpu: host1x: Don't rely on dma_fence_wait_timeout
 return value

dma_fence_wait_timeout (along with a host of other jiffies-based
timeouting functions) returns zero both in case of timeout and when
the wait completes during the last jiffy before timeout. As such,
we can't rely on it to distinguish between success and timeout.

To prevent confusing callers by returning -EAGAIN before the timeout
period has elapsed, check if the fence got signaled again after
the wait.

Bug 3955201

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: Ib90cbd3d78bac773a724a523925ae5d1b70107c8
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2857405
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/syncpt.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 9375000f..eb59bd92 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -272,7 +272,13 @@ int host1x_syncpt_wait_ts(struct host1x_syncpt *sp, u32 thresh, long timeout, u3
 
 	dma_fence_put(fence);
 
-	if (wait_err == 0)
+	/*
+	 * Don't rely on dma_fence_wait_timeout return value,
+	 * since it returns zero both on timeout and if the
+	 * wait completed with 0 jiffies left.
+	 */
+	host1x_hw_syncpt_load(sp->host, sp);
+	if (wait_err == 0 && !host1x_syncpt_is_expired(sp, thresh))
 		return -EAGAIN;
 	else if (wait_err < 0)
 		return wait_err;

From 41a9f72f78cf51d679be71b54bfafc9abe49695e Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Tue, 28 Feb 2023 11:12:18 +0200
Subject: [PATCH 72/73] gpu: host1x: Add API for reading actmon count

Add API for reading the activity monitor average count for VIC,
NVENC, and NVDEC. There is currently no support for initializing
actmon, so this relies on someone else on a virtualized system
having initialized it (and mapped the actmon region read-only).

Bug 3973633

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: Ia1bfec6a090d4effb288b17cbac4d42bf5d0b4e5
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2864719
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/Makefile                   |  1 +
 drivers/gpu/host1x/actmon.c                   | 36 +++++++++++++++++++
 drivers/gpu/host1x/dev.c                      | 10 ++++--
 drivers/gpu/host1x/dev.h                      |  1 +
 .../gpu/host1x/include/linux/host1x-next.h    |  2 ++
 5 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/host1x/actmon.c

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index b5e7e933..affedf2c 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -15,6 +15,7 @@ host1x-next-y = \
 	debug.o \
 	mipi.o \
 	fence.o \
+	actmon.o \
 	hw/host1x01.o \
 	hw/host1x02.o \
 	hw/host1x04.o \
diff --git a/drivers/gpu/host1x/actmon.c b/drivers/gpu/host1x/actmon.c
new file mode 100644
index 00000000..33bac0db
--- /dev/null
+++ b/drivers/gpu/host1x/actmon.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Tegra host1x activity monitor interfaes
+ *
+ * Copyright (c) 2023, NVIDIA Corporation.
+ */
+
+#include "dev.h"
+
+int host1x_actmon_read_avg_count(struct host1x_client *client)
+{
+	struct host1x *host = dev_get_drvdata(client->host->parent);
+	unsigned int offset;
+
+	if (!host->actmon_regs)
+		return -ENODEV;
+
+	/* FIXME: Only T234 supported */
+
+	switch (client->class) {
+	case HOST1X_CLASS_NVENC:
+		offset = 0x0;
+		break;
+	case HOST1X_CLASS_VIC:
+		offset = 0x10000;
+		break;
+	case HOST1X_CLASS_NVDEC:
+		offset = 0x20000;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return readl(host->actmon_regs + offset + 0xa4);
+}
+EXPORT_SYMBOL(host1x_actmon_read_avg_count);
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 2ea38483..9746969c 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -748,6 +748,7 @@ static int host1x_get_syncpt_pools(struct host1x *host)
 
 static int host1x_probe(struct platform_device *pdev)
 {
+	struct resource *res;
 	struct host1x *host;
 	int err;
 
@@ -758,8 +759,6 @@ static int host1x_probe(struct platform_device *pdev)
 	host->info = of_device_get_match_data(&pdev->dev);
 
 	if (host->info->has_hypervisor) {
-		struct resource *res;
-
 		host->regs = devm_platform_ioremap_resource_byname(pdev, "vm");
 		if (IS_ERR(host->regs))
 			return PTR_ERR(host->regs);
@@ -782,6 +781,13 @@ static int host1x_probe(struct platform_device *pdev)
 			return PTR_ERR(host->regs);
 	}
 
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "actmon");
+	if (res) {
+		host->actmon_regs = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(host->actmon_regs))
+			return PTR_ERR(host->actmon_regs);
+	}
+
 	host->syncpt_irq = platform_get_irq(pdev, 0);
 	if (host->syncpt_irq < 0)
 		return host->syncpt_irq;
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 25e3f770..7a3ebf70 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -130,6 +130,7 @@ struct host1x {
 	void __iomem *regs;
 	void __iomem *hv_regs; /* hypervisor region */
 	void __iomem *common_regs;
+	void __iomem *actmon_regs;
 	int syncpt_irq;
 	struct host1x_syncpt *syncpt;
 	struct host1x_syncpt_base *bases;
diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h
index 3e67ed56..0c0134f0 100644
--- a/drivers/gpu/host1x/include/linux/host1x-next.h
+++ b/drivers/gpu/host1x/include/linux/host1x-next.h
@@ -506,4 +506,6 @@ static inline void host1x_memory_context_put(struct host1x_memory_context *cd)
 }
 #endif
 
+int host1x_actmon_read_avg_count(struct host1x_client *client);
+
 #endif

From ee3c149b7f9e65b97c2598805f93863ff20da6e5 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Tue, 28 Feb 2023 17:17:22 +0000
Subject: [PATCH 73/73] gpu: host1x: Fix build for Linux v6.3

Upstream Linux commit ("1369459b2e21 iommu: Add a gfp parameter to
iommu_map()") adds a new parameter to the iommu_map function and this
breaks building the host1x driver with Linux v6.3.

Upstream Linux commit 2a81ada32f0e ("driver core: make struct
bus_type.uevent() take a const *") updates the uevent function pointer
type to make the device structure const which also breaks building the
host1x driver with Linux v6.3.

Address both of these issues to fix building the host1x driver with
Linux v6.3.

Bug 4014315

Change-Id: Ibd27f5e8442cc6970bcaac0dcfb9fc262860aee9
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2867136
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/host1x/bus.c  | 5 +++++
 drivers/gpu/host1x/cdma.c | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 712ce483..201750f4 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -11,6 +11,7 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/of_device.h>
+#include <linux/version.h>
 
 #include "bus.h"
 #include "dev.h"
@@ -338,7 +339,11 @@ static int host1x_device_match(struct device *dev, struct device_driver *drv)
 	return strcmp(dev_name(dev), drv->name) == 0;
 }
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 3, 0)
+static int host1x_device_uevent(const struct device *dev,
+#else
 static int host1x_device_uevent(struct device *dev,
+#endif
 				struct kobj_uevent_env *env)
 {
 	struct device_node *np = dev->parent->of_node;
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 582f0487..256fc395 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/kfifo.h>
 #include <linux/slab.h>
+#include <linux/version.h>
 #include <trace/events/host1x.h>
 
 #include "cdma.h"
@@ -105,7 +106,11 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
 
 		pb->dma = iova_dma_addr(&host1x->iova, alloc);
 		err = iommu_map(host1x->domain, pb->dma, pb->phys, size,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 3, 0)
+				IOMMU_READ, GFP_KERNEL);
+#else
 				IOMMU_READ);
+#endif
 		if (err)
 			goto iommu_free_iova;
 	} else {