From 76961fd57b9fe8a05c2cbdc44370fd490fda3ab5 Mon Sep 17 00:00:00 2001
From: omar <onemri@nvidia.com>
Date: Fri, 12 May 2023 21:14:06 +0000
Subject: [PATCH] drivers: add pva driver to nvidia-oot

- copy nvpva driver and headers to nvidia-oot directory.
- remove the file copy operation as part of the build process.

Bug 4097111

Change-Id: If040773833405f3941505cb8a2ec3440e0a84c92
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2903052
Reviewed-by: Omar Nemri <onemri@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
Tested-by: Omar Nemri <onemri@nvidia.com>
---
 drivers/video/tegra/host/pva/Kconfig          |   10 +
 drivers/video/tegra/host/pva/Makefile         |   65 +-
 .../video/tegra/host/pva/elf_include_fix.h    |   65 +
 .../tegra/host/pva/fw_include/fw_config.h     |  127 ++
 .../video/tegra/host/pva/fw_include/pva-bit.h |  103 ++
 .../tegra/host/pva/fw_include/pva-errors.h    |  208 +++
 .../host/pva/fw_include/pva-fw-address-map.h  |  129 ++
 .../host/pva/fw_include/pva-fw-version.h      |   36 +
 .../tegra/host/pva/fw_include/pva-interface.h |  481 +++++
 .../tegra/host/pva/fw_include/pva-packed.h    |   24 +
 .../tegra/host/pva/fw_include/pva-sys-dma.h   |  336 ++++
 .../host/pva/fw_include/pva-sys-params.h      |  119 ++
 .../tegra/host/pva/fw_include/pva-task.h      |  421 +++++
 .../tegra/host/pva/fw_include/pva-types.h     |   98 ++
 .../pva/fw_include/pva-ucode-header-types.h   |   49 +
 .../host/pva/fw_include/pva-ucode-header.h    |  105 ++
 .../tegra/host/pva/fw_include/pva-version.h   |   32 +
 drivers/video/tegra/host/pva/hw_cfg_pva_v1.h  |  125 ++
 drivers/video/tegra/host/pva/hw_cfg_pva_v2.h  |  136 ++
 drivers/video/tegra/host/pva/hw_dma_ch_pva.h  |   77 +
 .../video/tegra/host/pva/hw_dma_desc_pva.h    |  117 ++
 drivers/video/tegra/host/pva/hw_evp_pva.h     |   85 +
 drivers/video/tegra/host/pva/hw_hsp_pva.h     |  165 ++
 drivers/video/tegra/host/pva/hw_proc_pva.h    |   69 +
 drivers/video/tegra/host/pva/hw_sec_pva_v1.h  |   85 +
 drivers/video/tegra/host/pva/hw_sec_pva_v2.h  |   65 +
 drivers/video/tegra/host/pva/hw_vmem_pva.h    |   36 +
 drivers/video/tegra/host/pva/nvpva_buffer.c   |  607 +++++++
 drivers/video/tegra/host/pva/nvpva_buffer.h   |  224 +++
 drivers/video/tegra/host/pva/nvpva_client.c   |  218 +++
 drivers/video/tegra/host/pva/nvpva_client.h   |   61 +
 .../video/tegra/host/pva/nvpva_elf_parser.c   |  445 +++++
 .../video/tegra/host/pva/nvpva_elf_parser.h   |  210 +++
 drivers/video/tegra/host/pva/nvpva_queue.c    |  597 +++++++
 drivers/video/tegra/host/pva/nvpva_queue.h    |  304 ++++
 drivers/video/tegra/host/pva/nvpva_syncpt.c   |  300 ++++
 drivers/video/tegra/host/pva/nvpva_syncpt.h   |   39 +
 drivers/video/tegra/host/pva/pva-vpu-perf.h   |   31 +
 drivers/video/tegra/host/pva/pva.c            | 1484 ++++++++++++++++
 drivers/video/tegra/host/pva/pva.h            |  615 +++++++
 drivers/video/tegra/host/pva/pva_abort.c      |  105 ++
 .../video/tegra/host/pva/pva_bit_helpers.h    |  100 ++
 drivers/video/tegra/host/pva/pva_ccq_t19x.c   |   87 +
 drivers/video/tegra/host/pva/pva_ccq_t19x.h   |   29 +
 drivers/video/tegra/host/pva/pva_ccq_t23x.c   |  234 +++
 drivers/video/tegra/host/pva/pva_ccq_t23x.h   |   40 +
 drivers/video/tegra/host/pva/pva_debug.c      |  490 ++++++
 drivers/video/tegra/host/pva/pva_dma.c        | 1264 +++++++++++++
 drivers/video/tegra/host/pva/pva_dma.h        |   96 +
 .../video/tegra/host/pva/pva_fw_carveout.c    |   80 +
 .../video/tegra/host/pva/pva_fw_carveout.h    |   34 +
 drivers/video/tegra/host/pva/pva_hwseq.h      |   68 +
 .../tegra/host/pva/pva_interface_regs_t19x.c  |   66 +
 .../tegra/host/pva/pva_interface_regs_t19x.h  |   35 +
 .../tegra/host/pva/pva_interface_regs_t23x.c  |  141 ++
 .../tegra/host/pva/pva_interface_regs_t23x.h  |   74 +
 drivers/video/tegra/host/pva/pva_ioctl.c      | 1115 ++++++++++++
 .../tegra/host/pva/pva_iommu_context_dev.c    |  237 +++
 .../tegra/host/pva/pva_iommu_context_dev.h    |   28 +
 drivers/video/tegra/host/pva/pva_isr.c        |  155 ++
 drivers/video/tegra/host/pva/pva_isr_t23x.c   |  109 ++
 drivers/video/tegra/host/pva/pva_isr_t23x.h   |   25 +
 drivers/video/tegra/host/pva/pva_mailbox.c    |  207 +++
 drivers/video/tegra/host/pva/pva_mailbox.h    |  134 ++
 .../video/tegra/host/pva/pva_mailbox_t19x.c   |   80 +
 .../video/tegra/host/pva/pva_mailbox_t19x.h   |   54 +
 .../video/tegra/host/pva/pva_mailbox_t23x.c   |   56 +
 .../video/tegra/host/pva/pva_mailbox_t23x.h   |   54 +
 drivers/video/tegra/host/pva/pva_nvhost.h     |   91 +
 drivers/video/tegra/host/pva/pva_queue.c      | 1567 +++++++++++++++++
 drivers/video/tegra/host/pva/pva_queue.h      |  287 +++
 drivers/video/tegra/host/pva/pva_regs.h       |  205 +++
 drivers/video/tegra/host/pva/pva_sec_ec.c     |   65 +
 drivers/video/tegra/host/pva/pva_sec_ec.h     |   23 +
 drivers/video/tegra/host/pva/pva_sha256.c     |  214 +++
 drivers/video/tegra/host/pva/pva_sha256.h     |   93 +
 .../video/tegra/host/pva/pva_status_regs.h    |   44 +
 .../tegra/host/pva/pva_system_allow_list.c    |   40 +
 .../tegra/host/pva/pva_system_allow_list.h    |   19 +
 drivers/video/tegra/host/pva/pva_trace.c      |  107 ++
 drivers/video/tegra/host/pva/pva_trace.h      |   57 +
 .../tegra/host/pva/pva_version_config_t19x.c  |   47 +
 .../tegra/host/pva/pva_version_config_t19x.h  |   24 +
 .../tegra/host/pva/pva_version_config_t23x.c  |   29 +
 .../tegra/host/pva/pva_version_config_t23x.h  |   25 +
 .../video/tegra/host/pva/pva_vpu_app_auth.c   |  418 +++++
 .../video/tegra/host/pva/pva_vpu_app_auth.h   |  195 ++
 drivers/video/tegra/host/pva/pva_vpu_exe.c    | 1172 ++++++++++++
 drivers/video/tegra/host/pva/pva_vpu_exe.h    |  354 ++++
 drivers/video/tegra/host/pva/pva_vpu_ocd.c    |  104 ++
 drivers/video/tegra/host/pva/pva_vpu_ocd.h    |   26 +
 include/trace/events/nvhost_pva.h             |  254 +++
 include/uapi/linux/nvpva_ioctl.h              |  611 +++++++
 kernel-src-files-copy-list.txt                |    4 -
 94 files changed, 19763 insertions(+), 12 deletions(-)
 create mode 100644 drivers/video/tegra/host/pva/Kconfig
 create mode 100644 drivers/video/tegra/host/pva/elf_include_fix.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/fw_config.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-bit.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-errors.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-fw-address-map.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-fw-version.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-interface.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-packed.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-sys-dma.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-sys-params.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-task.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-types.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-ucode-header-types.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-ucode-header.h
 create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-version.h
 create mode 100644 drivers/video/tegra/host/pva/hw_cfg_pva_v1.h
 create mode 100644 drivers/video/tegra/host/pva/hw_cfg_pva_v2.h
 create mode 100644 drivers/video/tegra/host/pva/hw_dma_ch_pva.h
 create mode 100644 drivers/video/tegra/host/pva/hw_dma_desc_pva.h
 create mode 100644 drivers/video/tegra/host/pva/hw_evp_pva.h
 create mode 100644 drivers/video/tegra/host/pva/hw_hsp_pva.h
 create mode 100644 drivers/video/tegra/host/pva/hw_proc_pva.h
 create mode 100644 drivers/video/tegra/host/pva/hw_sec_pva_v1.h
 create mode 100644 drivers/video/tegra/host/pva/hw_sec_pva_v2.h
 create mode 100644 drivers/video/tegra/host/pva/hw_vmem_pva.h
 create mode 100644 drivers/video/tegra/host/pva/nvpva_buffer.c
 create mode 100644 drivers/video/tegra/host/pva/nvpva_buffer.h
 create mode 100644 drivers/video/tegra/host/pva/nvpva_client.c
 create mode 100644 drivers/video/tegra/host/pva/nvpva_client.h
 create mode 100644 drivers/video/tegra/host/pva/nvpva_elf_parser.c
 create mode 100644 drivers/video/tegra/host/pva/nvpva_elf_parser.h
 create mode 100644 drivers/video/tegra/host/pva/nvpva_queue.c
 create mode 100644 drivers/video/tegra/host/pva/nvpva_queue.h
 create mode 100644 drivers/video/tegra/host/pva/nvpva_syncpt.c
 create mode 100644 drivers/video/tegra/host/pva/nvpva_syncpt.h
 create mode 100644 drivers/video/tegra/host/pva/pva-vpu-perf.h
 create mode 100644 drivers/video/tegra/host/pva/pva.c
 create mode 100644 drivers/video/tegra/host/pva/pva.h
 create mode 100644 drivers/video/tegra/host/pva/pva_abort.c
 create mode 100644 drivers/video/tegra/host/pva/pva_bit_helpers.h
 create mode 100644 drivers/video/tegra/host/pva/pva_ccq_t19x.c
 create mode 100644 drivers/video/tegra/host/pva/pva_ccq_t19x.h
 create mode 100644 drivers/video/tegra/host/pva/pva_ccq_t23x.c
 create mode 100644 drivers/video/tegra/host/pva/pva_ccq_t23x.h
 create mode 100644 drivers/video/tegra/host/pva/pva_debug.c
 create mode 100644 drivers/video/tegra/host/pva/pva_dma.c
 create mode 100644 drivers/video/tegra/host/pva/pva_dma.h
 create mode 100644 drivers/video/tegra/host/pva/pva_fw_carveout.c
 create mode 100644 drivers/video/tegra/host/pva/pva_fw_carveout.h
 create mode 100644 drivers/video/tegra/host/pva/pva_hwseq.h
 create mode 100644 drivers/video/tegra/host/pva/pva_interface_regs_t19x.c
 create mode 100644 drivers/video/tegra/host/pva/pva_interface_regs_t19x.h
 create mode 100644 drivers/video/tegra/host/pva/pva_interface_regs_t23x.c
 create mode 100644 drivers/video/tegra/host/pva/pva_interface_regs_t23x.h
 create mode 100644 drivers/video/tegra/host/pva/pva_ioctl.c
 create mode 100644 drivers/video/tegra/host/pva/pva_iommu_context_dev.c
 create mode 100644 drivers/video/tegra/host/pva/pva_iommu_context_dev.h
 create mode 100644 drivers/video/tegra/host/pva/pva_isr.c
 create mode 100644 drivers/video/tegra/host/pva/pva_isr_t23x.c
 create mode 100644 drivers/video/tegra/host/pva/pva_isr_t23x.h
 create mode 100644 drivers/video/tegra/host/pva/pva_mailbox.c
 create mode 100644 drivers/video/tegra/host/pva/pva_mailbox.h
 create mode 100644 drivers/video/tegra/host/pva/pva_mailbox_t19x.c
 create mode 100644 drivers/video/tegra/host/pva/pva_mailbox_t19x.h
 create mode 100644 drivers/video/tegra/host/pva/pva_mailbox_t23x.c
 create mode 100644 drivers/video/tegra/host/pva/pva_mailbox_t23x.h
 create mode 100644 drivers/video/tegra/host/pva/pva_nvhost.h
 create mode 100644 drivers/video/tegra/host/pva/pva_queue.c
 create mode 100644 drivers/video/tegra/host/pva/pva_queue.h
 create mode 100644 drivers/video/tegra/host/pva/pva_regs.h
 create mode 100644 drivers/video/tegra/host/pva/pva_sec_ec.c
 create mode 100644 drivers/video/tegra/host/pva/pva_sec_ec.h
 create mode 100644 drivers/video/tegra/host/pva/pva_sha256.c
 create mode 100644 drivers/video/tegra/host/pva/pva_sha256.h
 create mode 100644 drivers/video/tegra/host/pva/pva_status_regs.h
 create mode 100644 drivers/video/tegra/host/pva/pva_system_allow_list.c
 create mode 100644 drivers/video/tegra/host/pva/pva_system_allow_list.h
 create mode 100644 drivers/video/tegra/host/pva/pva_trace.c
 create mode 100644 drivers/video/tegra/host/pva/pva_trace.h
 create mode 100644 drivers/video/tegra/host/pva/pva_version_config_t19x.c
 create mode 100644 drivers/video/tegra/host/pva/pva_version_config_t19x.h
 create mode 100644 drivers/video/tegra/host/pva/pva_version_config_t23x.c
 create mode 100644 drivers/video/tegra/host/pva/pva_version_config_t23x.h
 create mode 100644 drivers/video/tegra/host/pva/pva_vpu_app_auth.c
 create mode 100644 drivers/video/tegra/host/pva/pva_vpu_app_auth.h
 create mode 100644 drivers/video/tegra/host/pva/pva_vpu_exe.c
 create mode 100644 drivers/video/tegra/host/pva/pva_vpu_exe.h
 create mode 100644 drivers/video/tegra/host/pva/pva_vpu_ocd.c
 create mode 100644 drivers/video/tegra/host/pva/pva_vpu_ocd.h
 create mode 100644 include/trace/events/nvhost_pva.h
 create mode 100644 include/uapi/linux/nvpva_ioctl.h

diff --git a/drivers/video/tegra/host/pva/Kconfig b/drivers/video/tegra/host/pva/Kconfig
new file mode 100644
index 00000000..a02dcbea
--- /dev/null
+++ b/drivers/video/tegra/host/pva/Kconfig
@@ -0,0 +1,10 @@
+if ARCH_TEGRA
+
+config PVA_CO_DISABLED
+        bool "Tegra PVA FW CO disabled"
+        depends on TEGRA_GRHOST
+        default n
+        help
+          Allow PVA FW to be booted from file
+          Say n here if not sure.
+endif
diff --git a/drivers/video/tegra/host/pva/Makefile b/drivers/video/tegra/host/pva/Makefile
index 3aab963e..cb1ea5ec 100644
--- a/drivers/video/tegra/host/pva/Makefile
+++ b/drivers/video/tegra/host/pva/Makefile
@@ -1,9 +1,58 @@
-# SPDX-License-Identifier: GPL-2.0
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ifeq ($(CONFIG_TEGRA_OOT_MODULE),m)
+NVPVA_OOT = y
+NVPVA_OBJ = m
+else
+NVPVA_OBJ = $(CONFIG_TEGRA_GRHOST_PVA)
+endif
+
+GCOV_PROFILE := y
+ccflags-y += -I$(srctree.nvidia)/drivers/video/tegra/host/pva
+ccflags-y += -I$(srctree.nvidia)/drivers/video/tegra/host/pva/fw_include
+ccflags-y += -I$(srctree.nvidia)/include
+ccflags-y += -I$(srctree.nvidia)/include/linux
+ccflags-y += -Werror
+
+# When using the upstream host1x driver, the Makefile must define the
+# srctree.host1x path in order to find the necessary header files for
+# the upstream host1x driver.
+ccflags-$(NVPVA_OOT) += -I$(srctree.host1x)/include
+ccflags-$(NVPVA_OOT) += -DCONFIG_TEGRA_HOST1X
+ccflags-$(NVPVA_OOT) += -DCONFIG_TEGRA_FUSE_UPSTREAM
+ccflags-$(NVPVA_OOT) += -DTEGRA_OOT_MODULE
+
+
+nvhost-pva-objs  = \
+		pva.o \
+		pva_ioctl.o \
+		pva_mailbox.o \
+		pva_interface_regs_t19x.o \
+		pva_version_config_t19x.o \
+		pva_mailbox_t19x.o \
+		pva_isr.o \
+		pva_queue.o \
+		pva_debug.o \
+		pva_trace.o \
+		pva_abort.o \
+		pva_ccq_t19x.o \
+		nvpva_elf_parser.o \
+		pva_vpu_exe.o \
+		nvpva_client.o \
+		nvpva_queue.o \
+		pva_dma.o \
+		nvpva_buffer.o \
+		pva_vpu_ocd.o \
+		pva_sha256.o \
+		pva_system_allow_list.o \
+		pva_vpu_app_auth.o \
+		pva_iommu_context_dev.o \
+		nvpva_syncpt.o \
+		pva_fw_carveout.o \
+		pva_isr_t23x.o \
+		pva_mailbox_t23x.o \
+		pva_interface_regs_t23x.o \
+		pva_version_config_t23x.o \
+		pva_ccq_t23x.o \
+		pva_sec_ec.o
+
+obj-$(NVPVA_OBJ) += nvhost-pva.o
 
-# NOTE: Do not change or add anything in this makefile.
-# The source code and makefile rules are copied from the
-# kernel/nvidia/drivers/video/tegra/host/pva. This file is
-# just place-holder for empty makefile to avoid any build
-# issue when copy is not done from command line and building
-# the tree independent of source copy.
diff --git a/drivers/video/tegra/host/pva/elf_include_fix.h b/drivers/video/tegra/host/pva/elf_include_fix.h
new file mode 100644
index 00000000..b4e4ad9d
--- /dev/null
+++ b/drivers/video/tegra/host/pva/elf_include_fix.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef ELF_INCLUDE_FIX_H
+#undef ELF_INCLUDE_FIX_H
+
+#include <linux/elf.h>
+
+#undef SHT_NULL
+#undef SHT_PROGBITS
+#undef SHT_SYMTAB
+#undef SHT_STRTAB
+#undef SHT_RELA
+#undef SHT_HASH
+#undef SHT_DYNAMIC
+#undef SHT_NOTE
+#undef ELFCLASS32
+
+#undef SHT_NOBITS
+#undef SHT_REL
+#undef SHT_SHLIB
+#undef SHT_DYNSYM
+
+#undef SHT_LOPROC
+#undef SHT_HIPROC
+#undef SHT_LOUSER
+#undef SHT_HIUSER
+
+#undef SHN_UNDEF
+
+#undef ELF_ST_BIND
+#undef ELF_ST_TYPE
+
+#undef STT_NOTYPE
+#undef STT_OBJECT
+#undef STT_FUNC
+#undef STT_SECTION
+#undef STT_FILE
+#undef STT_COMMON
+
+#undef STB_LOCAL
+#undef STB_GLOBAL
+#undef STB_WEAK
+
+
+#undef SHN_LORESERVE
+#undef SHN_ABS
+#undef SHN_COMMON
+
+#endif // ELF_INCLUDE_FIX_H
diff --git a/drivers/video/tegra/host/pva/fw_include/fw_config.h b/drivers/video/tegra/host/pva/fw_include/fw_config.h
new file mode 100644
index 00000000..9545c17a
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/fw_config.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_CONFIG_H
+#define PVA_CONFIG_H
+
+/**
+ * @brief Number of DMA channels for T19x or Xavier.
+ */
+#define PVA_NUM_DMA_CHANNELS_T19X 14U
+
+/**
+ * @brief Number of DMA descriptors.
+ */
+#define PVA_NUM_DMA_DESCS 64U
+
+/**
+ * @brief Number of reserved DMA channels. These channels
+ * are reserved per DMA for R5 transfers. These channels
+ * will be used by R5 to transfer data which it needs.
+ */
+#define PVA_NUM_RESERVED_CHANNELS 1U
+
+/**
+ * @brief Number of reserved DMA descriptors. These descriptors
+ * are reserved per DMA for R5 transfers. These descriptors along
+ * with channels will be used by R5 to transfer data which it needs.
+ */
+
+#define PVA_NUM_RESERVED_DESCRIPTORS 4U
+/**
+ * @brief Number of dynamic DMA descriptors. These descriptors can be
+ * used by the VPU application transfer data. These exclude
+ * the reserved descriptors from total available ones.
+ */
+#define PVA_NUM_DYNAMIC_DESCS	(PVA_NUM_DMA_DESCS - \
+				 PVA_NUM_RESERVED_DESCRIPTORS)
+
+/**
+ * @brief Number of reserved AXI data buffers for T19x.
+ */
+#define PVA_NUM_RESERVED_ADB_BUFFERS_T19X	8U
+
+/**
+ * @brief Number of reserved VMEM data buffers.
+ */
+#define PVA_NUM_RESERVED_VDB_BUFFERS	0U
+
+/**
+ * @brief Total number of VMEM data buffers.
+ */
+#define PVA_NUM_DMA_VDB_BUFFS	128U
+
+/**
+ * @brief Total number of AXI data buffers for T19x.
+ */
+#define PVA_NUM_DMA_ADB_BUFFS_T19X	256U
+
+/**
+ * @brief Number of dynamic AXI data buffers for T19x.
+ * These exclude the reserved AXI data buffers from total available ones.
+ */
+#define PVA_NUM_DYNAMIC_ADB_BUFFS_T19X (PVA_NUM_DMA_ADB_BUFFS_T19X - \
+					PVA_NUM_RESERVED_ADB_BUFFERS_T19X)
+
+/**
+ * @brief Number of dynamic VMEM data buffers for T19x.
+ * These exclude the reserved VMEM data buffers from total available ones.
+ */
+#define PVA_NUM_DYNAMIC_VDB_BUFFS (PVA_NUM_DMA_VDB_BUFFS - \
+				   PVA_NUM_RESERVED_VDB_BUFFERS)
+
+/**
+ * @brief The first Reserved DMA descriptor. This is used as a
+ *        starting point to iterate over reserved DMA descriptors.
+ */
+#define PVA_RESERVED_DESC_START		PVA_NUM_DYNAMIC_DESCS
+
+/**
+ * @brief The first Reserved AXI data buffers. This is used as a
+ *        starting point to iterate over reserved AXI data buffers.
+ */
+#define PVA_RESERVED_ADB_BUFF_START	PVA_NUM_DYNAMIC_ADB_BUFFS
+
+/**
+ * @brief The first Reserved VMEM data buffers. This is used as a
+ *        starting point to iterate over reserved VMEM data buffers.
+ */
+#define PVA_RESERVED_VDB_BUFF_START	PVA_NUM_DYNAMIC_VDB_BUFFS
+/**
+ * @brief Maximum number of DMA channels for T23x.
+ */
+
+#define PVA_NUM_DMA_CHANNELS_T23X 16U
+
+/**
+ * @brief Number of reserved AXI data buffers for T23x.
+ */
+#define PVA_NUM_RESERVED_ADB_BUFFERS_T23X 16U
+
+/**
+ * @brief Total number of AXI data buffers for T23x.
+ */
+#define PVA_NUM_DMA_ADB_BUFFS_T23X 272U
+
+/**
+ * @brief Number of dynamic AXI data buffers for T23x.
+ * These exclude the reserved AXI data buffers from total available ones.
+ */
+
+#define PVA_NUM_DYNAMIC_ADB_BUFFS_T23X (PVA_NUM_DMA_ADB_BUFFS_T23X - \
+					PVA_NUM_RESERVED_ADB_BUFFERS_T23X)
+/** @} */
+#endif
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-bit.h b/drivers/video/tegra/host/pva/fw_include/pva-bit.h
new file mode 100644
index 00000000..bf24551b
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-bit.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_BIT_H
+#define PVA_BIT_H
+
+/*
+ * Bit manipulation macros
+ */
+
+#define PVA_BITS_PER_BYTE 8UL
+/*
+ * 8-bits
+ */
+#define PVA_BIT8(_b_) ((uint8_t)(((uint8_t)1U << (_b_)) & 0xffu))
+
+/*
+ * 8-bits
+ */
+#define PVA_BIT8(_b_) ((uint8_t)(((uint8_t)1U << (_b_)) & 0xffu))
+#define PVA_MASK8(_msb_, _lsb_)                                                \
+	((uint8_t)((((PVA_BIT8(_msb_) - 1U) | PVA_BIT8(_msb_)) &               \
+		    ~(PVA_BIT8(_lsb_) - 1U)) &                                 \
+		   0xff))
+#define PVA_EXTRACT8(_x_, _msb_, _lsb_, _type_)                                \
+	((_type_)(((_x_)&PVA_MASK8((_msb_), (_lsb_))) >> (_lsb_)))
+#define PVA_EXTRACT8_RANGE(_x_, _name_, _type_)                                \
+	PVA_EXTRACT8(_x_, (_name_##_MSB), (_name_##_LSB), _type_)
+#define PVA_INSERT8(_x_, _msb_, _lsb_)                                         \
+	((((uint8_t)(_x_)) << (_lsb_)) & PVA_MASK8((_msb_), (_lsb_)))
+
+/*
+ * 16-bits
+ */
+#define PVA_BIT16(_b_) ((uint16_t)(((uint16_t)1U << (_b_)) & 0xffffu))
+#define PVA_MASK16(_msb_, _lsb_)                                               \
+	((uint16_t)((((PVA_BIT16(_msb_) - 1U) | PVA_BIT16(_msb_)) &            \
+		     ~(PVA_BIT16(_lsb_) - 1U)) &                               \
+		    0xffff))
+#define PVA_EXTRACT16(_x_, _msb_, _lsb_, _type_)                               \
+	((_type_)(((_x_)&PVA_MASK16((_msb_), (_lsb_))) >> (_lsb_)))
+#define PVA_INSERT16(_x_, _msb_, _lsb_)                                        \
+	((((uint16_t)(_x_)) << (_lsb_)) & PVA_MASK16((_msb_), (_lsb_)))
+
+/*
+ * 32-bits
+ */
+#define PVA_BIT(_b_) ((uint32_t)(((uint32_t)1U << (_b_)) & 0xffffffffUL))
+#define PVA_MASK(_msb_, _lsb_)                                                 \
+	(((PVA_BIT(_msb_) - 1U) | PVA_BIT(_msb_)) & ~(PVA_BIT(_lsb_) - 1U))
+#define PVA_MASK_RANGE(_name_) PVA_MASK((_name_##_MSB), (_name_##_LSB))
+#define PVA_EXTRACT(_x_, _msb_, _lsb_, _type_)                                 \
+	((_type_)(((_x_)&PVA_MASK((_msb_), (_lsb_))) >> (_lsb_)))
+#define PVA_EXTRACT_RANGE(_x_, _name_, _type_)                                 \
+	PVA_EXTRACT(_x_, (_name_##_MSB), (_name_##_LSB), _type_)
+#define PVA_INSERT(_x_, _msb_, _lsb_)                                          \
+	((((uint32_t)(_x_)) << (_lsb_)) & (uint32_t)PVA_MASK((_msb_), (_lsb_)))
+#define PVA_INSERT_RANGE(_x_, _name_)                                          \
+	PVA_INSERT(_x_, (_name_##_MSB), (_name_##_LSB))
+
+/*
+ * 64-bits
+ */
+#define PVA_BIT64(_b_)                                                         \
+	((uint64_t)(((uint64_t)1UL << (_b_)) & ((uint64_t)(0U) - 1U)))
+#define PVA_MASK64(_msb_, _lsb_)                                               \
+	(((PVA_BIT64(_msb_) - (uint64_t)1U) | PVA_BIT64(_msb_)) &              \
+	 ~(PVA_BIT64(_lsb_) - (uint64_t)1U))
+#define PVA_MASK64_RANGE(_name_) PVA_MASK64((_name_##_MSB), (_name_##_LSB))
+#define PVA_EXTRACT64(_x_, _msb_, _lsb_, _type_)                               \
+	((_type_)(((_x_)&PVA_MASK64((_msb_), (_lsb_))) >> (_lsb_)))
+#define PVA_EXTRACT64_RANGE(_x_, _name_, _type_)                               \
+	PVA_EXTRACT64(_x_, (_name_##_MSB), (_name_##_LSB), _type_)
+#define PVA_INSERT64(_x_, _msb_, _lsb_)                                        \
+	((((uint64_t)(_x_)) << (_lsb_)) & PVA_MASK64((_msb_), (_lsb_)))
+#define PVA_INSERT64_RANGE(_x_, _name_)                                        \
+	PVA_INSERT64(_x_, (_name_##_MSB), (_name_##_LSB))
+
+#define PVA_PACK64(_l_, _h_)                                                   \
+	(PVA_INSERT64((_h_), 63U, 32U) | PVA_INSERT64((_l_), 31U, 0U))
+
+#define PVA_HI32(_x_) PVA_EXTRACT64((_x_), 63U, 32U, uint32_t)
+#define PVA_LOW32(_x_) PVA_EXTRACT64((_x_), 31U, 0U, uint32_t)
+
+#define PVA_RANGE_LOW(_name_) (_name_##_LSB)
+#define PVA_RANGE_HIGH(_name_) (_name_##_MSB)
+#define PVA_NUM_IN_RANGE(_n_, _name_)                                          \
+	((PVA_RANGE_LOW(_name_) <= (_n_)) && ((_n_) <= PVA_RANGE_HIGH(_name_)))
+
+#endif
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-errors.h b/drivers/video/tegra/host/pva/fw_include/pva-errors.h
new file mode 100644
index 00000000..e8ea7aed
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-errors.h
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_ERRORS_H
+#define PVA_ERRORS_H
+
+#include <pva-types.h>
+/*
+ * PVA Error codes that will be read from PVA_CCQ_STATUS3
+ */
+typedef uint16_t pva_errors_t;
+
+/*
+ * General and interface errors
+ */
+#define PVA_ERR_NO_ERROR 0x0U
+#define PVA_ERR_BAD_CMD 0x1U
+#define PVA_ERR_BAD_STATUS_ID 0x2U
+#define PVA_ERR_BAD_QUEUE_ID 0x3U
+#define PVA_ERR_BAD_PVE_ID 0x4U
+#define PVA_ERR_BUFF_TOO_SMALL 0x5U
+#define PVA_ERR_FEATURE_NOT_SUPPORTED 0x6U
+#define PVA_ERR_QUEUE_NOT_SUSPENDED 0x7U
+#define PVA_ERR_QUEUE_SUSPENDED 0x8U
+#define PVA_ERR_BAD_ADDRESS 0x9U
+#define PVA_ERR_BAD_THRESHOLD_ID 0xaU
+#define PVA_ERR_BAD_ATTR_ID 0xbU
+#define PVA_ERR_BAD_VMEM_ID 0xcU
+#define PVA_ERR_BAD_TIME_VALUE 0xdU
+#define PVA_ERR_BAD_SCHEDULER_ID 0xeU
+#define PVA_ERR_BAD_SCHEDULER_ATTR 0xfU
+#define PVA_ERR_BAD_STATUS_REG 0x10U
+#define PVA_ERR_BAD_REGION_ID 0x11U
+#define PVA_ERR_BAD_RESET_ID 0x12U
+#define PVA_ERR_BAD_STAT_ID 0x13U
+#define PVA_ERR_BAD_INSTANCE 0x14U
+#define PVA_ERR_BAD_TASK 0x15U
+#define PVA_ERR_BAD_TASK_ACTION_LIST 0x16U
+#define PVA_ERR_BAD_TASK_STATE 0x17U
+#define PVA_ERR_TASK_STATUS_MISMATCH 0x18U
+#define PVA_ERR_BAD_TASK_OFFSET 0x19U
+#define PVA_ERR_BAD_PARAMETERS 0x1aU
+#define PVA_ERR_VALUE_MISMATCH 0x1bU
+#define PVA_ERR_NO_VPU_HEADER 0x1cU
+#define PVA_ERR_BAD_SURFACE_ARRAY 0x1dU
+#define PVA_ERR_UNKNOWN_VPU_OP 0x1eU
+#define PVA_ERR_BAD_VPU_OP_VER 0x1fU
+#define PVA_ERR_UNKNOWN_R5_APP 0x20U
+#define PVA_ERR_NO_R5_DATA 0x21U
+#define PVA_ERR_NO_R5_HEADER 0x22U
+#define PVA_ERR_PVE_TIMEOUT 0x23U
+#define PVA_ERR_VPU_RESET 0x24U
+#define PVA_ERR_VPU_ERROR_HALT 0x25U
+#define PVA_ERR_VPU_ILLEGAL_INSTR 0x26U
+#define PVA_ERR_VPU_DIVIDE_BY_0 0x27U
+#define PVA_ERR_VPU_BAD_STATE 0x28U
+#define PVA_ERR_VPU_DEBUG 0x29U
+#define PVA_ERR_VPU_EXIT_ERROR 0x2aU
+#define PVA_ERR_PPE_EXIT_ERROR 0x2bU
+#define PVA_ERR_PVE_ABORT 0x2dU
+#define PVA_ERR_BAD_OVERLAY_SEG 0x2eU
+#define PVA_ERR_BAD_SEG_START 0x2fU
+#define PVA_ERR_SEGMENTS_OVERLAP 0x30U
+#define PVA_ERR_NO_VPU_DATA 0x31U
+#define PVA_ERR_VPU_FP_NAN 0x32U
+#define PVA_ERR_PPE_ILLEGAL_INSTR 0x33U
+#define PVA_ERR_PPE_DIVIDE_BY_0 0x34U
+#define PVA_ERR_PPE_FP_NAN 0x35U
+#define PVA_ERR_PPE_ILLEGAL_DEBUG 0x36U
+#define PVA_ERR_PPE_ILLEGAL_INSTR_ALIGN 0x37U
+#define PVA_ERR_BAD_CACHED_DRAM_SEG 0x3aU
+#define PVA_ERR_BAD_UNCACHED_DRAM_SEG 0x3bU
+#define PVA_ERR_BAD_DRAM_IOVA 0x3cU
+#define PVA_ERR_REG_MISMATCH 0x3dU
+#define PVA_ERR_UNSUPPORTED_TNSR_TYPE 0x3eU
+#define PVA_ERR_AISR_QUEUE_EMPTY 0x3fU
+#define PVA_ERR_AISR_QUEUE_FULL 0x40U
+#define PVA_ERR_BAD_L2SRAM_PARAMS 0x41U
+#define PVA_ERR_BAD_TASK_PARAMS 0x42U
+
+/*
+ * DMA errors
+ */
+#define PVA_ERR_DMA_NO_BPP 0x200U
+#define PVA_ERR_DMA_INVALID_WIDTH 0x201U
+#define PVA_ERR_DMA_DATA_TOO_LARGE 0x202U
+#define PVA_ERR_DMA_BPP_MISMATCH 0x203U
+#define PVA_ERR_DMA_TRANSFER_TYPE_INVALID 0x204U
+#define PVA_ERR_DMA_TILE_SIZE_MISMATCH 0x205U
+#define PVA_ERR_DMA_SIZE_MISMATCH 0x206U
+#define PVA_ERR_DMA_CHANNEL_TRANSFER 0x207U
+#define PVA_ERR_BAD_DMA_DESC_ID 0x208U
+#define PVA_ERR_BAD_DMA_CHANNEL_ID 0x209U
+#define PVA_ERR_DMA_TOO_MANY_BUFFERS 0x20aU
+#define PVA_ERR_DMA_TIMEOUT 0x20bU
+#define PVA_ERR_DMA_INSUFFICIENT_SPACE 0x20cU
+#define PVA_ERR_DMA_BAD_BLOCK_HEIGHT 0x20dU
+#define PVA_ERR_DMA_BAD_LAYOUT 0x20eU
+#define PVA_ERR_DMA_BAD_MEMORY 0x20fU
+#define PVA_ERR_DMA_UNALIGNED_ADDR 0x210U
+#define PVA_ERR_DMA_PRIV_ACCESS 0x211U
+#define PVA_ERR_DMA_BAD_CALLBACK 0x212U
+#define PVA_ERR_DMA_CALLBACK_REGISTERED 0x213U
+#define PVA_ERR_DMA_CHAN_NOT_IN_USE 0x214U
+#define PVA_ERR_DMA_INVALID_VDESC_FLAGS 0x215U
+#define PVA_ERR_DMA_HWSEQ_BAD_PROGRAM 0x216U
+#define PVA_ERR_DMA_HWSEQ_PROGRAM_TOO_LONG 0x217U
+#define PVA_ERR_DMA_HWSEQ_FIELD_OVERFLOW 0x218U
+#define PVA_ERR_DMA_HWSEQ_BAD_INDEX 0x219U
+#define PVA_ERR_DMA_INVALID_CONFIG 0x220U
+#define PVA_ERR_DMA_ERROR 0x221U
+/*
+ * MISR errors
+ */
+#define PVA_ERR_MISR_NOT_RUN 0x280U
+#define PVA_ERR_MISR_NOT_DONE 0x281U
+#define PVA_ERR_MISR_TIMEOUT 0x282U
+#define PVA_ERR_MISR_ADDR 0x283U
+#define PVA_ERR_MISR_DATA 0x284U
+#define PVA_ERR_MISR_ADDR_DATA 0x285U
+
+/*
+ * VPU Errors
+ */
+#define PVA_ERR_VPU_DMA_TIMEOUT 0x300U
+#define PVA_ERR_VPU_PARAMETER_MISMATCH 0x301U
+#define PVA_ERR_VPU_BAD_VALUE 0x302U
+#define PVA_ERR_VPU_DLUT_CFG 0x303U
+#define PVA_ERR_VPU_DLUT_MISS 0x304U
+#define PVA_ERR_VPU_CP_ACCESS 0x305U
+
+/*
+ * Fast reset errors
+ */
+
+#define PVA_ERR_FAST_RESET_R5_DMA_TIMEOUT 0x400U
+#define PVA_ERR_FAST_RESET_TIMEOUT_VPU 0x401U
+#define PVA_ERR_FAST_RESET_TIMEOUT_ICACHE1 0x402U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH0 0x403U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH1 0x404U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH2 0x405U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH3 0x406U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH4 0x407U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH5 0x408U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH6 0x409U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH7 0x410U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH8 0x411U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH9 0x412U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH10 0x413U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH11 0x414U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH12 0x415U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH13 0x416U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH14 0x417U
+#define PVA_ERR_FAST_RESET_TIMEOUT_CH15 0x418U
+#define PVA_ERR_FAST_RESET_TIMEOUT_ICACHE2 0x419U
+
+/*
+ * R5 Application Errors
+ */
+#define PVA_ERR_R5_APP_ARGS 0x800U
+#define PVA_ERR_R5_APP_COPY_NULL 0x801U
+#define PVA_ERR_BAD_QUEUE_HANDLE 0x802U
+#define PVA_ERR_QUEUE_EMPTY 0x803U
+#define PVA_ERR_SYS_QUEUE_ERROR 0x804U
+#define PVA_ERR_APP_WAKE_BREAK 0x805U
+#define PVA_ERR_TASK_QUEUE_FULL 0x806U
+#define PVA_ERR_APP_BAD_CALLBACK 0x807U
+#define PVA_ERR_TASK_QUEUE_EMPTY 0x808U
+#define PVA_ERR_VPU_RUNNING 0x809U
+#define PVA_ERR_VPU_NOT_STARTED 0x80aU
+#define PVA_ERR_VPU_BAD_CALLBACK 0x80bU
+#define PVA_ERR_APP_ABORT 0x80cU
+#define PVA_ERR_APP_ASSERT 0x80dU
+#define PVA_ERR_APP_BAD_CONTEXT 0x80eU
+#define PVA_ERR_INSUFFICIENT_MEMORY 0x80fU
+#define PVA_ERR_INSUFFICIENT_FAST_MEMORY 0x810U
+#define PVA_ERR_PARAMETER_MISMATCH 0x811U
+#define PVA_ERR_ALLOC_FAILED 0x812U
+#define PVA_ERR_FREE_FAILED 0x813U
+#define PVA_ERR_SMMU_NOT_WORKING 0x814U
+
+/*
+ * Informational errors
+ */
+#define PVA_ERR_NO_PARM_ARRAY 0x995U
+#define PVA_ERR_NOT_FOUND 0x996U
+#define PVA_ERR_NO_TASK 0x997U
+#define PVA_ERR_MINIMUM_LENGTH 0x998U
+#define PVA_ERR_LENGTH_PROVIDED 0x999U
+#define PVA_ERR_TRY_AGAIN 0x99AU
+
+/* Never used */
+#define PVA_ERR_MAX_ERR 0xFFFFU
+
+#endif /* _PVA_ERRORS_H_ */
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-fw-address-map.h b/drivers/video/tegra/host/pva/fw_include/pva-fw-address-map.h
new file mode 100644
index 00000000..1ebde4dd
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-fw-address-map.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_FW_ADDRESS_MAP_H
+#define PVA_FW_ADDRESS_MAP_H
+
+/**
+ * @brief Starting R5 address where FW code and data is placed.
+ * This address is expected to be programmed in PVA_CFG_AR1PRIV_START by KMD.
+ * This address is also expected to be used as offset where
+ * PVA_CFG_R5PRIV_LSEGREG1 and PVA_CFG_R5PRIV_USEGREG1 registers would point.
+ */
+#define FW_CODE_DATA_START_ADDR                     1610612736 //0x60000000
+
+/**
+ * @brief R5 address where FW code and data is expected to end.
+ * This address is expected to be programmed in PVA_CFG_AR1PRIV_END by KMD.
+ */
+#define FW_CODE_DATA_END_ADDR                       1612840960 //0x60220000
+
+/**
+ * @defgroup PVA_EXCEPTION_VECTORS
+ *
+ * @brief Following macros define R5 addresses that are expected to be
+ * programmed by KMD in EVP registers as is.
+ * @{
+ */
+/**
+ * @brief R5 address of reset exception vector
+ */
+#define EVP_RESET_VECTOR                       (1610877952) //0x60040C00
+/**
+ * @brief R5 address of undefined instruction exception vector
+ */
+#define EVP_UNDEFINED_INSTRUCTION_VECTOR       (1610878976) //0x60041000
+/**
+ * @brief R5 address of svc exception vector
+ */
+#define EVP_SVC_VECTOR                         (1610880000) //0x60041400
+/**
+ * @brief R5 address of prefetch abort exception vector
+ */
+#define EVP_PREFETCH_ABORT_VECTOR              (1610881024) //0x60041800
+/**
+ * @brief R5 address of data abort exception vector
+ */
+#define EVP_DATA_ABORT_VECTOR                  (1610882048) //0x60041C00
+/**
+ * @brief R5 address of reserved exception vector.
+ * It points to a dummy handler.
+ */
+#define EVP_RESERVED_VECTOR                    (1610883072) //0x60042000
+/**
+ * @brief R5 address of IRQ exception vector
+ */
+#define EVP_IRQ_VECTOR                         (1610884096) //0x60042400
+/**
+ * @brief R5 address of FIQ exception vector
+ */
+#define EVP_FIQ_VECTOR                         (1610885120) //0x60042800
+/** @} */
+
+/**
+ * @defgroup PVA_DEBUG_BUFFERS
+ *
+ * @brief These buffers are arranged in the following order:
+ * TRACE_BUFFER followed by CODE_COVERAGE_BUFFER followed by DEBUG_LOG_BUFFER.
+ * @{
+ */
+/**
+ * @brief Maximum size of trace buffer in bytes.
+ */
+#define FW_TRACE_BUFFER_SIZE                        262144 //0x40000
+/**
+ * @brief Maximum size of code coverage buffer in bytes.
+ */
+#define FW_CODE_COVERAGE_BUFFER_SIZE                524288 //0x80000
+/**
+ * @brief Maximum size of debug log buffer in bytes.
+ */
+#define FW_DEBUG_LOG_BUFFER_SIZE                    262144 //0x40000
+/** @} */
+
+/**
+ * @brief Total size of buffers used for FW debug in bytes.
+ * TBD: Update this address based on build configuration once KMD changes
+ * are merged.
+ */
+#define FW_DEBUG_DATA_TOTAL_SIZE	(FW_TRACE_BUFFER_SIZE + \
+					 FW_DEBUG_LOG_BUFFER_SIZE + \
+					 FW_CODE_COVERAGE_BUFFER_SIZE)
+
+/**
+ * @brief Starting R5 address where FW debug related data is placed.
+ * This address is expected to be programmed in PVA_CFG_AR2PRIV_START by KMD.
+ * This address is also expected to be used as offset where
+ * PVA_CFG_R5PRIV_LSEGREG2 and PVA_CFG_R5PRIV_USEGREG2 registers would point.
+ */
+#define FW_DEBUG_DATA_START_ADDR                    1879048192 //0x70000000
+
+/**
+ * @brief R5 address where FW debug related data is expected to end.
+ * This address is expected to be programmed in PVA_CFG_AR2PRIV_END by KMD.
+ */
+#define FW_DEBUG_DATA_END_ADDR	(FW_DEBUG_DATA_START_ADDR + \
+				 FW_DEBUG_DATA_TOTAL_SIZE)
+
+/**
+ * @brief Starting R5 address where FW expects shared buffers between KMD and
+ * FW to be placed. This is to be used as offset when programming
+ * PVA_CFG_R5USER_LSEGREG and PVA_CFG_R5USER_USEGREG.
+ */
+#define FW_SHARED_MEMORY_START                 2147483648 //0x80000000
+
+#endif
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-fw-version.h b/drivers/video/tegra/host/pva/fw_include/pva-fw-version.h
new file mode 100644
index 00000000..dcbcc896
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-fw-version.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_FW_VERSION_H
+#define PVA_FW_VERSION_H
+
+#define VERSION_TYPE                                                           \
+	(PVA_DEBUG | (SAFETY << 1) | (PVA_TEST_SUPPORT << 2) |                 \
+	 (STANDALONE_TESTS << 3))
+
+#define PVA_VERSION_MAJOR 0x08
+#define PVA_VERSION_MINOR 0x02
+#define PVA_VERSION_SUBMINOR 0x03
+
+#ifndef PVA_VERSION_GCID_REVISION
+#define PVA_VERSION_GCID_REVISION 0x00000000
+#endif
+
+#ifndef PVA_VERSION_BUILT_ON
+#define PVA_VERSION_BUILT_ON 0x00000000
+#endif
+
+#endif
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-interface.h b/drivers/video/tegra/host/pva/fw_include/pva-interface.h
new file mode 100644
index 00000000..05726a18
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-interface.h
@@ -0,0 +1,481 @@
+/*
+ * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_INTERFACE_H
+#define PVA_INTERFACE_H
+
+#include <pva-bit.h>
+#include <pva-version.h>
+#include <pva-fw-version.h>
+#include <pva-types.h>
+#include <pva-errors.h>
+
+/*
+ * Register definition for PVA_SHRD_SMP_STA0
+ *
+ * This is used to communicate various bits of information between the
+ * OS and the PVA.
+ */
+
+/*
+ * Bits set by the OS and examined by the R5
+ */
+#define PVA_BOOT_INT PVA_BIT(31U) /* OS wants an interrupt */
+#define PVA_OS_PRINT PVA_BIT(30U) /* OS will process print */
+#define PVA_TEST_WAIT PVA_BIT(29U) /* R5 wait to start tests */
+#define PVA_TEST_RUN PVA_BIT(28U) /* Start tests */
+#define PVA_WAIT_DEBUG PVA_BIT(24U) /* Spin-wait early in boot */
+#define PVA_CG_DISABLE PVA_BIT(20U) /* Disable PVA clock gating */
+#define PVA_VMEM_RD_WAR_DISABLE PVA_BIT(19U) /* Disable VMEM RD fail WAR */
+#define PVA_VMEM_MBX_WAR_ENABLE PVA_BIT(18U) /* WAR for Bug 2090939 enabled*/
+
+/*
+ * Bits set by the R5 and examined by the OS
+ */
+#define PVA_TESTS_STARTED PVA_BIT(10U) /* PVA Tests started */
+#define PVA_TESTS_PASSED PVA_BIT(9U) /* PVA Tests passed */
+#define PVA_TESTS_FAILED PVA_BIT(8U) /* PVA Tests failed */
+#define PVA_HALTED PVA_BIT(2U) /* PVA uCode halted */
+#define PVA_BOOT_DONE PVA_BIT(1U) /* PVA is "ready" */
+#define PVA_TEST_MODE PVA_BIT(0U) /* PVA is in "test mode" */
+
+/*
+ * Symbolic definitions of the mailbox registers (rather than using 0-7)
+ */
+#define PVA_MBOX_COMMAND 0U
+#define PVA_MBOX_ADDR 1U
+#define PVA_MBOX_LENGTH 2U
+#define PVA_MBOX_ARG 3U
+#define PVA_MBOX_SIDE_CHANNEL_HOST_WR 4U
+#define PVA_MBOX_AISR 5U
+#define PVA_MBOX_SIDE_CHANNEL_HOST_RD 6U
+#define PVA_MBOX_ISR 7U
+
+/*
+ * For using the mailboxes as a status interface, we overload them
+ */
+#define PVA_MBOX_STATUS4 1U
+#define PVA_MBOX_STATUS5 2U
+#define PVA_MBOX_STATUS6 3U
+#define PVA_MBOX_STATUS7 4U
+
+/*
+ * Mailbox side channel bit definitions
+ */
+#define PVA_SIDE_CHANNEL_MBOX_BIT 0U
+#define PVA_SIDE_CHANNEL_MBOX_BIT_MASK (~(1U << PVA_SIDE_CHANNEL_MBOX_BIT))
+
+/*
+ * Code checking the version of the R5 uCode should check
+ * the values returned from the R5_VERSION subcommand of
+ * CMD_GET_STATUS to determine if the version currently
+ * running on the PVA's R5 is compatible with what the
+ * driver was compiled against.
+ */
+#define PVA_R5_VERSION                                                         \
+	PVA_MAKE_VERSION(0, PVA_VERSION_MAJOR, PVA_VERSION_MINOR,              \
+			 PVA_VERSION_SUBMINOR)
+
+/*
+ * PVA interrupt status register contained in PVA_MBOX_ISR.
+ */
+#define PVA_INT_PENDING PVA_BIT(31U)
+#define PVA_READY PVA_BIT(30U)
+#define PVA_BUSY PVA_BIT(29U)
+#define PVA_CMD_COMPLETE PVA_BIT(28U)
+#define PVA_CMD_ERROR PVA_BIT(27U)
+#define PVA_VALID_STATUS7 PVA_BIT(26U)
+#define PVA_VALID_STATUS6 PVA_BIT(25U)
+#define PVA_VALID_STATUS5 PVA_BIT(24U)
+#define PVA_VALID_STATUS4 PVA_BIT(23U)
+#define PVA_VALID_STATUS3 PVA_BIT(22U)
+
+#define PVA_VALID_CCQ_ISR PVA_BIT(20U)
+#define PVA_VALID_CCQ_AISR PVA_BIT(24U)
+#define PVA_CCQ_OVERFLOW PVA_BIT(28U)
+
+/*
+ * On T23X we pack the ISR in with the ERR code
+ */
+#define PVA_STATUS_ISR_MSB 31
+#define PVA_STATUS_ISR_LSB 16
+#define PVA_STATUS_ERR_MSB 15
+#define PVA_STATUS_ERR_LSB 0
+
+/*
+ * PVA interrupt status register contained in PVA_MBOX_AISR
+ */
+#define PVA_AISR_INT_PENDING PVA_BIT(31U)
+#define PVA_AISR_TASK_COMPLETE PVA_BIT(30U)
+#define PVA_AISR_TASK_ERROR PVA_BIT(29U)
+#define PVA_AISR_ABORT PVA_BIT(0U)
+
+#define PVA_STATUS_AISR_TASK_ID_MSB (8U)
+#define PVA_STATUS_AISR_TASK_ID_LSB (1U)
+#define PVA_STATUS_AISR_VPU_ID_MSB (9U)
+#define PVA_STATUS_AISR_VPU_ID_LSB (9U)
+#define PVA_STATUS_AISR_QUEUE_MSB (12U)
+#define PVA_STATUS_AISR_QUEUE_LSB (10U)
+#define PVA_STATUS_AISR_ERR_MSB (28U)
+#define PVA_STATUS_AISR_ERR_LSB (13U)
+
+#define PVA_PACK_AISR_STATUS(e, q, v, t) (PVA_INSERT(e, PVA_STATUS_AISR_ERR_MSB,\
+				PVA_STATUS_AISR_ERR_LSB) \
+				| PVA_INSERT(q, PVA_STATUS_AISR_QUEUE_MSB, \
+				PVA_STATUS_AISR_QUEUE_LSB) \
+				| PVA_INSERT(v, PVA_STATUS_AISR_VPU_ID_MSB, \
+				PVA_STATUS_AISR_VPU_ID_LSB) \
+				| PVA_INSERT(t, PVA_STATUS_AISR_TASK_ID_MSB, \
+				PVA_STATUS_AISR_TASK_ID_LSB))
+#define PVA_GET_QUEUE_ID_FROM_STATUS(_s_) PVA_EXTRACT((_s_), \
+					PVA_STATUS_AISR_QUEUE_MSB, \
+					PVA_STATUS_AISR_QUEUE_LSB, \
+					uint8_t)
+#define PVA_GET_ERROR_FROM_STATUS(_s_) PVA_EXTRACT((_s_), \
+					PVA_STATUS_AISR_ERR_MSB, \
+					PVA_STATUS_AISR_ERR_LSB, \
+					uint16_t)
+#define PVA_GET_VPU_ID_FROM_STATUS(_s_) PVA_EXTRACT((_s_), \
+					PVA_STATUS_AISR_VPU_ID_MSB, \
+					PVA_STATUS_AISR_VPU_ID_LSB, \
+					uint8_t)
+#define PVA_GET_TASK_ID_FROM_STATUS(_s_) PVA_EXTRACT((_s_), \
+					PVA_STATUS_AISR_TASK_ID_MSB, \
+					PVA_STATUS_AISR_TASK_ID_LSB, \
+					uint8_t)
+
+#define PVA_GET_ERROR_CODE(_s_) PVA_EXTRACT((_s_), 15U, 0U, pva_errors_t)
+
+/*
+ * Commands that can be sent to the PVA through the PVA_SHRD_MBOX
+ * interface.
+ */
+typedef uint8_t pva_cmds_t;
+#define CMD_GET_STATUS		0U
+#define CMD_SUBMIT		1U
+#define CMD_ABORT_QUEUE		2U
+#define CMD_NOOP		3U
+#define CMD_SW_BIST		4U
+#define CMD_GET_VPU_STATS	5U
+#define CMD_SET_LOGGING		6U
+#define CMD_NEXT		7U /* Must be last */
+
+/*
+ * CMD_GET_STATUS subcommands
+ */
+typedef uint8_t pva_status_cmds_t;
+#define R5_VERSION 0U
+#define PVA_UPTIME 1U
+#define COMPLETED_TASK 2U
+#define GET_STATUS_NEXT 3U /* Deleted RUNNING TASKS as it is not used in FW */
+
+/*
+ * CCQ FIFO SUBMIT interface definition
+ */
+#define PVA_ADDR_LOWER_32BITS_MSB       (63U)
+#define PVA_ADDR_LOWER_32BITS_LSB       (32U)
+#define PVA_QUEUE_ID_MSB                (28U)
+#define PVA_QUEUE_ID_LSB                (24U)
+#define PVA_BATCH_SIZE_MSB              (23U)
+#define PVA_BATCH_SIZE_LSB              (16U)
+#define PVA_ADDR_HIGHER_8BITS_MSB       (15U)
+#define PVA_ADDR_HIGHER_8BITS_LSB       (8U)
+#define PVA_CMD_ID_MSB                  (7U)
+#define PVA_CMD_ID_LSB                  (0U)
+
+/*
+ * Macros to indicate LSB and MSB of SUBCOMMAND field in a command
+ */
+#define PVA_SUB_CMD_ID_MSB                  (15U)
+#define PVA_SUB_CMD_ID_LSB                  (8U)
+
+/*
+ * Macro used to indicate the most significant
+ * bit to extract higher 8 bits of the 40 bit address
+ */
+#define PVA_EXTRACT_ADDR_HIGHER_8BITS_MSB		39U
+/*
+ * Macro used to indicate the least significant
+ * bit to extract higher 8 bits of the 40 bit address
+ */
+#define PVA_EXTRACT_ADDR_HIGHER_8BITS_LSB		32U
+
+/**
+ * Macro used to specify most significant bit
+ * of the VPU stats enable field in CMD_SET_VPU_STATS_BUFFER command
+ */
+#define PVA_CMD_VPU_STATS_EN_MSB		23U
+/**
+ * Macro used to specify least significant bit
+ * of the VPU stats enable field in CMD_SET_VPU_STATS_BUFFER command
+ */
+#define PVA_CMD_VPU_STATS_EN_LSB		16U
+
+/*
+ * SW Bist subcommands
+ */
+#define PVA_SDL_SUBMIT 0xF1U
+#define PVA_SDL_SET_ERROR_INJECT_SDL 0xF2U
+#define PVA_SDL_SET_ERROR_INJECT_PANIC 0xF3U
+
+/*
+ * Generic fields in a command sent to the PVA through the PVA_SHRD_MBOX
+ * interface.
+ */
+#define PVA_CMD_INT_ON_ERR PVA_BIT(30U)
+#define PVA_CMD_INT_ON_COMPLETE PVA_BIT(29U)
+#define PVA_GET_BATCH_SIZE(_c_, _t_) PVA_EXTRACT(_c_, PVA_BATCH_SIZE_MSB, PVA_BATCH_SIZE_LSB, _t_)
+#define PVA_SET_BATCH_SIZE(_c_) PVA_INSERT(_c_, PVA_BATCH_SIZE_MSB, PVA_BATCH_SIZE_LSB)
+#define PVA_GET_SUBCOMMAND(_c_, _t_) PVA_EXTRACT(_c_, PVA_SUB_CMD_ID_MSB, PVA_SUB_CMD_ID_LSB, _t_)
+#define PVA_SET_SUBCOMMAND(_c_) PVA_INSERT(_c_, PVA_SUB_CMD_ID_MSB, PVA_SUB_CMD_ID_LSB)
+#define PVA_GET_COMMAND(_c_) PVA_EXTRACT(_c_, PVA_CMD_ID_MSB, PVA_CMD_ID_LSB, pva_cmds_t)
+#define PVA_SET_COMMAND(_c_) PVA_INSERT(_c_, PVA_CMD_ID_MSB, PVA_CMD_ID_LSB)
+
+/*
+ * Generic fields in a command sent through the command FIFO interface.
+ */
+#define PVA_FIFO_GET_COMMAND(_c_)                                              \
+	PVA_EXTRACT64_RANGE((_c_), PVA_CCQ_CMD, pva_cmds_t)
+#define PVA_CMD_MBOX_TO_FIFO_FLAG_SHIFT 29U
+#define PVA_FIFO_INT_ON_ERR PVA_BIT64(1U)
+#define PVA_FIFO_INT_ON_COMPLETE PVA_BIT64(0U)
+
+/*
+ * Reserved bits in mbox3 used and consumed internally by R5
+ */
+#define PVA_MBOX3_RESERVED_SOURCE_INTERFACE_MSB 31
+#define PVA_MBOX3_RESERVED_SOURCE_INTERFACE_LSB 24
+
+/*
+ * On T23X we map 4x32bit pushes to the CCQ to our mailbox command structure
+ * CCQ is delivered in 64bit chunks. This defines the mapping into each of the
+ * 64bit chunks.
+ */
+/* First 64bit write */
+#define PVA_CCQ_FIRST_PUSH_MBOX_0_MSB 31
+#define PVA_CCQ_FIRST_PUSH_MBOX_0_LSB 0
+
+#define PVA_CCQ_FIRST_PUSH_MBOX_1_MSB 63
+#define PVA_CCQ_FIRST_PUSH_MBOX_1_LSB 32
+/* Second 64bit write */
+#define PVA_CCQ_SECOND_PUSH_MBOX_2_MSB 31
+#define PVA_CCQ_SECOND_PUSH_MBOX_2_LSB 0
+
+#define PVA_CCQ_SECOND_PUSH_MBOX_3_MSB 63
+#define PVA_CCQ_SECOND_PUSH_MBOX_3_LSB 32
+
+/*
+ * Structure for managing commands through PVA_SHRD_MBOX*
+ */
+struct pva_cmd_s {
+	uint32_t cmd_field[4];
+};
+
+struct pva_vpu_stats_s {
+	/**
+	 * @brief The accumulated VPU utilization time in the current window.
+	 */
+	uint64_t total_utilization_time[2];
+	/**
+	 * @brief The timestamp which signifies start of the current window.
+	 */
+	uint64_t window_start_time;
+	/**
+	 * @brief The timestamp of end of the current window.
+	 */
+	uint64_t window_end_time;
+} __packed;
+
+/*
+ * CMD_NOOP command
+ */
+#define PVA_CMD_FL_NOOP_ECHO PVA_BIT(28U)
+#define PVA_CMD_FL_NOOP_ERROR PVA_BIT(27U)
+
+static inline uint32_t pva_cmd_noop(struct pva_cmd_s *const cmd,
+				    const uint32_t echo_data,
+				    const uint32_t status_reg,
+				    const uint32_t flags)
+{
+	cmd->cmd_field[0] = flags | PVA_SET_SUBCOMMAND(status_reg) |
+		       PVA_SET_COMMAND(CMD_NOOP);
+	cmd->cmd_field[1] = echo_data;
+
+	return 2U;
+}
+
+/*
+ * CMD_GET_STATUS
+ * Not used directly.
+ */
+static inline uint32_t pva_cmd_get_status(const pva_status_cmds_t subcommand,
+					  const uint32_t flags)
+{
+	return flags | PVA_SET_SUBCOMMAND(subcommand) |
+	       PVA_SET_COMMAND(CMD_GET_STATUS);
+}
+
+/*
+ * R5_VERSION get status command
+ */
+struct pva_status_R5_version_s {
+	uint32_t cur_version;
+	uint32_t oldest_version;
+	uint32_t change_id;
+	uint32_t build_date;
+};
+
+static inline uint32_t pva_cmd_R5_version(struct pva_cmd_s *const cmd,
+					  const uint32_t flags)
+{
+	cmd->cmd_field[0] = pva_cmd_get_status(R5_VERSION, flags);
+	return 1U;
+}
+
+/*
+ * PVA_UPTIME get status command
+ */
+struct pva_status_pva_uptime_s {
+	uint32_t uptime_lo;
+	uint32_t uptime_hi;
+};
+
+static inline uint32_t pva_cmd_pva_uptime(struct pva_cmd_s *const cmd,
+					  const pva_pve_id_t pve,
+					  const uint32_t flags)
+{
+	(void)pve; /*For Future use*/
+	cmd->cmd_field[0] = pva_cmd_get_status(PVA_UPTIME, flags);
+	return 1U;
+}
+
+/*
+ * COMPLETED_TASK get status command
+ */
+struct pva_status_completed_task_s {
+	uint32_t task_addr_lo;
+	uint32_t task_addr_hi;
+	uint32_t task_error;
+	uint32_t task_queue_vpu;
+};
+
+static inline uint32_t pva_cmd_completed_task(struct pva_cmd_s *const cmd,
+					      const uint32_t flags)
+{
+	cmd->cmd_field[0] = pva_cmd_get_status(COMPLETED_TASK, flags);
+	return 1U;
+}
+
+/*
+ * CMD_SET_LOGGING
+ */
+
+#define PVA_CMD_FL_LOG_PVA_ENABLE PVA_BIT(28U)
+#define PVA_CMD_FL_LOG_R5_ENABLE PVA_BIT(27U)
+#define PVA_CMD_FL_LOG_VPU_ENABLE PVA_BIT(26U)
+#define PVA_CMD_FL_LOG_NO_OVERFLOW PVA_BIT(25U)
+#define PVA_CMD_FL_LOG_OVERFLOW_INT PVA_BIT(24U)
+#define PVA_CMD_FL_PRT_PVA_ENABLE PVA_BIT(23U)
+#define PVA_CMD_FL_PRT_R5_ENABLE PVA_BIT(22U)
+#define PVA_CMD_FL_PRT_VPU_ENABLE PVA_BIT(21U)
+#define PVA_CMD_FL_PRT_NO_OVERFLOW PVA_BIT(20U)
+#define PVA_CMD_FL_PRT_OVERFLOW_INT PVA_BIT(19U)
+
+static inline uint32_t pva_cmd_set_logging_level(struct pva_cmd_s *const cmd,
+						 const uint32_t pva_log_level,
+						 const uint32_t flags)
+{
+	cmd->cmd_field[0] = flags | PVA_SET_COMMAND(CMD_SET_LOGGING);
+	cmd->cmd_field[1] = PVA_INSERT(pva_log_level, 31U, 0U);
+	return 2U;
+}
+
+/*
+ * CMD_SUBMIT (batch mode)
+ */
+static inline uint32_t pva_cmd_submit_batch(struct pva_cmd_s *const cmd,
+					    const uint8_t queue_id,
+					    const uint64_t addr,
+					    const uint8_t batch_size,
+					    const uint32_t flags)
+{
+	cmd->cmd_field[0] =
+		flags | PVA_SET_COMMAND(CMD_SUBMIT) |
+		PVA_INSERT(batch_size, PVA_BATCH_SIZE_MSB, PVA_BATCH_SIZE_LSB) |
+		PVA_INSERT(PVA_EXTRACT64(addr, PVA_EXTRACT_ADDR_HIGHER_8BITS_MSB,
+					PVA_EXTRACT_ADDR_HIGHER_8BITS_LSB, uint32_t),
+					PVA_ADDR_HIGHER_8BITS_MSB, PVA_ADDR_HIGHER_8BITS_LSB) |
+		PVA_INSERT(queue_id, PVA_QUEUE_ID_MSB, PVA_QUEUE_ID_LSB);
+	cmd->cmd_field[1] = PVA_LOW32(addr);
+	return 2U;
+}
+
+/*
+ * CMD_SUBMIT (single task)
+ */
+static inline uint32_t pva_cmd_submit(struct pva_cmd_s *const cmd,
+				      const uint8_t queue_id,
+				      const uint64_t addr, const uint32_t flags)
+{
+	return pva_cmd_submit_batch(cmd, queue_id, addr, 0U, flags);
+}
+
+/*
+ * CMD_SW_BIST
+ */
+static inline uint32_t pva_cmd_sw_bist(struct pva_cmd_s *const cmd,
+				       const uint32_t bist_cmd,
+				       const uint32_t inject_error,
+				       const uint32_t flags)
+{
+	cmd->cmd_field[0] = flags | PVA_SET_COMMAND(CMD_SW_BIST) |
+		       PVA_SET_SUBCOMMAND(bist_cmd);
+	cmd->cmd_field[1] = (inject_error == 1) ? 0xAAAAAAAA : 0xBBBBBBBB;
+	return 2U;
+}
+
+/*
+ * CMD_ABORT_QUEUE
+ */
+static inline uint32_t pva_cmd_abort_task(struct pva_cmd_s *const cmd,
+					  const uint8_t queue_id,
+					  const uint32_t flags)
+{
+	cmd->cmd_field[0] = flags | PVA_SET_COMMAND(CMD_ABORT_QUEUE) |
+		       PVA_SET_SUBCOMMAND(queue_id);
+	return 1U;
+}
+
+/*
+ * CMD_SET_VPU_STATS
+ */
+static inline uint32_t
+pva_cmd_get_vpu_stats(struct pva_cmd_s * const cmd,
+		      const uint64_t addr,
+		      const uint32_t flags,
+			  const uint8_t value)
+{
+	cmd->cmd_field[0] = flags
+		       | PVA_SET_COMMAND(CMD_GET_VPU_STATS)
+		       | PVA_INSERT(PVA_EXTRACT64(addr, PVA_EXTRACT_ADDR_HIGHER_8BITS_MSB,
+				    PVA_EXTRACT_ADDR_HIGHER_8BITS_LSB, uint32_t),
+				    PVA_ADDR_HIGHER_8BITS_MSB, PVA_ADDR_HIGHER_8BITS_LSB)
+			   | PVA_INSERT(value, PVA_CMD_VPU_STATS_EN_MSB, PVA_CMD_VPU_STATS_EN_LSB);
+	cmd->cmd_field[1] = PVA_LOW32(addr);
+
+	return 2U;
+}
+#endif
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-packed.h b/drivers/video/tegra/host/pva/fw_include/pva-packed.h
new file mode 100644
index 00000000..acdd9520
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-packed.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_PACKED_H
+#define PVA_PACKED_H
+#ifdef __chess__
+#define PVA_PACKED /* TODO: find chess compiler pragma if there is one. */
+#else
+#define PVA_PACKED __packed
+#endif
+#endif
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-sys-dma.h b/drivers/video/tegra/host/pva/fw_include/pva-sys-dma.h
new file mode 100644
index 00000000..cb7c4d2b
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-sys-dma.h
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * @file pva-sys-dma.h
+ *
+ * @brief Types and constants related to PVA DMA setup and DMA
+ * descriptors.
+ */
+
+#ifndef PVA_SYS_DMA_H
+#define PVA_SYS_DMA_H
+
+#include <pva-types.h>
+#include <pva-bit.h>
+#include <pva-packed.h>
+
+/*** Version number of the current DMA info structure */
+#define PVA_DMA_INFO_VERSION_ID (1U)
+
+/** @brief DMA channels for a VPU app.
+ *
+ * The DMA channel structure contains the set-up of a PVA DMA channel
+ * used by the VPU app.
+ */
+struct PVA_PACKED pva_dma_ch_config_s {
+	/**< HW channel number. Zero if this config is unused. */
+	uint32_t ch_number;
+	/**< DMA CH_CNTL0 register. */
+	uint32_t cntl0;
+	/**< DMA CH_CNTL1 register. */
+	uint32_t cntl1;
+	/**< Boundary pad register. */
+	uint32_t boundary_pad;
+	/**< HWSEQ control register, Ignored on t19x. */
+	uint32_t hwseqcntl;
+	 /**< HWSEQ Frame Seq control register, Ignored on t19x and t23x. */
+	uint32_t hwseqfscntl;
+	uint32_t pad_dma_channel0[2];
+};
+
+/** Number of dma done masks in DMA info structure. */
+#define PVA_SYS_DMA_NUM_TRIGGERS (9U)
+/** Number of DMA channel configurations in DMA info structure. */
+#define PVA_SYS_DMA_NUM_CHANNELS (15U)
+/** Maximum number of DMA descriptors allowed. */
+#define PVA_SYS_DMA_MAX_DESCRIPTORS (60U)
+
+/** @brief DMA info for a VPU app.
+ *
+ * The DMA info contains the set-up of a PVA DMA engine for a VPU app.
+ */
+struct PVA_PACKED pva_dma_info_s {
+	/**< size of this structure */
+	uint16_t dma_info_size;
+	/**< PVA_DMA_INFO_VERSION_ID */
+	uint16_t dma_info_version;
+	/**< Number of used channels */
+	uint8_t num_channels;
+	/**< Number of used descriptors*/
+	uint8_t num_descriptors;
+#ifdef SYSTEM_TESTS_ENABLED
+	uint16_t r5_channel_mask; /**< channel is used by R5*/
+#endif
+	/**< Number of bytes used in hwseq */
+	uint16_t num_hwseq;
+
+	/*
+	 *  * <First descriptor ID used.
+	 * Valid range:  [1,PVA_SYS_DMA_MAX_DESCRIPTORS]
+	 */
+	uint8_t descriptor_id;
+#ifndef SYSTEM_TESTS_ENABLED
+	uint8_t pva_dma_info_pad_0[3]; /**< Padding for alignment. */
+#else
+	uint8_t special_access; /**< Padding for alignment. */
+	uint32_t r5_descriptor_mask[2];
+#endif
+
+	/**@brief DMA done triggers used by the VPU app.
+	 * Correspond to COMMON_DMA_OUTPUT_ENABLE registers.
+	 */
+	uint32_t dma_triggers[PVA_SYS_DMA_NUM_TRIGGERS];
+	/** DMA channel config used by the VPU app. */
+	struct pva_dma_ch_config_s dma_channels[PVA_SYS_DMA_NUM_CHANNELS];
+	/** DMA common config used by the VPU app. */
+	uint32_t dma_common_config;
+
+	/** IOVA to an array of struct pva_dtd_s, aligned at 64 bytes */
+	pva_iova dma_descriptor_base;
+	/** IOVA to hwseq */
+	pva_iova dma_hwseq_base;
+	/** IOVA to MISR data (used by BIST/PFSD tests). */
+	pva_iova dma_misr_base;
+};
+
+/**
+ * @brief DMA descriptor.
+ *
+ * PVA DMA Descriptor in packed HW format.
+ */
+struct PVA_PACKED pva_dtd_s {
+	uint8_t transfer_control0;
+	uint8_t link_did;
+	uint8_t src_adr1;
+	uint8_t dst_adr1;
+	uint32_t src_adr0;
+	uint32_t dst_adr0;
+	uint16_t tx;
+	uint16_t ty;
+	uint16_t slp_adv;
+	uint16_t dlp_adv;
+	/** SRC PT1 CNTL has st1_adv in low 24 bits
+	 * and ns_adv in high 8 bits.
+	 */
+	uint32_t srcpt1_cntl;
+	/** DST PT1 CNTL has dt1_adv in low 24 bits
+	 * and nd1_adv in high 8 bits.
+	 */
+	uint32_t dstpt1_cntl;
+	/** SRC PT2 CNTL has st2_adv in low 24 bits
+	 * and ns2_adv in high 8 bits.
+	 */
+	uint32_t srcpt2_cntl;
+	/** DST PT2 CNTL has dt2_adv in low 24 bits
+	 * and nd2_adv in high 8 bits.
+	 */
+	uint32_t dstpt2_cntl;
+	/** SRC PT3 CNTL has st3_adv in low 24 bits
+	 * and ns3_adv in high 8 bits.
+	 */
+	uint32_t srcpt3_cntl;
+	/** DST PT3 CNTL has dt3_adv in low 24 bits
+	 * and nd3_adv in high 8 bits.
+	 */
+	uint32_t dstpt3_cntl;
+	uint16_t sb_start;
+	uint16_t db_start;
+	uint16_t sb_size;
+	uint16_t db_size;
+	uint16_t trig_ch_events;
+	uint16_t hw_sw_trig_events;
+	uint8_t px;
+	uint8_t py;
+	uint8_t transfer_control1;
+	uint8_t transfer_control2;
+	uint8_t cb_ext;
+	uint8_t rsvd;
+	uint16_t frda;
+};
+
+/**
+ *
+ * @brief DMA MISR configuration information. This information is used by R5
+ * to program MISR registers if a task requests MISR computation on its
+ * output DMA channels.
+ *
+ */
+struct PVA_PACKED pva_dma_misr_config_s {
+	/* Reference value for CRC computed on
+	 * write addresses, i.e., MISR 1
+	 */
+	uint32_t ref_addr;
+	/* Seed value for address CRC */
+	uint32_t seed_crc0;
+	/* Reference value for CRC computed on
+	 * first 256-bits of AXI write data
+	 */
+	uint32_t ref_data_1;
+	/* Seed value for write data CRC */
+	uint32_t seed_crc1;
+	/* Reference value for CRC computed on
+	 * second 256-bits of AXI write data
+	 */
+	uint32_t ref_data_2;
+	/*
+	 * MISR timeout value configured in DMA common register
+	 * @ref PVA_DMA_COMMON_MISR_ENABLE. Timeout is caclutated as
+	 * number of AXI clock cycles.
+	 */
+	uint32_t misr_timeout;
+};
+
+/**
+ * @defgroup PVA_DMA_TC0_BITS PVA Transfer Control 0 Bitfields
+ * @{
+ */
+#define PVA_DMA_TC0_DSTM_SHIFT (0U)
+#define PVA_DMA_TC0_DSTM_MASK (7U)
+
+#define PVA_DMA_TC0_SRC_TF_SHIFT (3U)
+#define PVA_DMA_TC0_SRC_TF_MASK (1U)
+
+#define PVA_DMA_TC0_DDTM_SHIFT (4U)
+#define PVA_DMA_TC0_DDTM_MASK (7U)
+
+#define PVA_DMA_TC0_DST_TF_SHIFT (7U)
+#define PVA_DMA_TC0_DST_TF_MASK (1U)
+/** @} */
+
+/**
+ * @defgroup PVA_DMA_TM DMA Transfer Modes
+ *
+ * @{
+ */
+#define PVA_DMA_TM_INVALID (0U)
+#define PVA_DMA_TM_MC (1U)
+#define PVA_DMA_TM_VMEM (2U)
+#define PVA_DMA_TM_CVNAS (3U)
+#define PVA_DMA_TM_L2RAM (3U)
+#define PVA_DMA_TM_TCM (4U)
+#define PVA_DMA_TM_MMIO (5U)
+#define PVA_DMA_TM_RSVD (6U)
+#define PVA_DMA_TM_VPU (7U)
+/** @} */
+
+/**
+ * @defgroup PVA_DMA_TF DMA Transfer Format
+ * @{
+ */
+#define PVA_DMA_TF_PITCH_LINEAR (0U)
+#define PVA_DMA_TF_BLOCK_LINEAR (1U)
+/** @} */
+
+/**
+ * @defgroup PVA_DMA_TC1_BITS PVA Transfer Control 1 Bitfields
+ * @{
+ */
+#define PVA_DMA_TC1_BPP_SHIFT (0U)
+#define PVA_DMA_TC1_BPP_MASK (3U)
+
+#define PVA_DMA_TC1_PXDIR_SHIFT (2U)
+#define PVA_DMA_TC1_PXDIR_MASK (1U)
+
+#define PVA_DMA_TC1_PYDIR_SHIFT (3U)
+#define PVA_DMA_TC1_PYDIR_MASK (1U)
+
+#define PVA_DMA_TC1_BPE_SHIFT (3U)
+#define PVA_DMA_TC1_BPE_MASK (1U)
+
+#define PVA_DMA_TC1_TTS_SHIFT (3U)
+#define PVA_DMA_TC1_TTS_MASK (1U)
+
+#define PVA_DMA_TC1_ITC_SHIFT (3U)
+#define PVA_DMA_TC1_ITC_MASK (1U)
+
+/** @} */
+
+/**@defgroup PVA_DMA_BPP PVA DMA Bits per Pixel
+ * @{
+ */
+#define PVA_DMA_BPP_INT8 (0U)
+#define PVA_DMA_BPP_INT16 (1U)
+#define PVA_DMA_BPP_INT32 (2U)
+/** @} */
+
+/**@defgroup PVA_DMA_PXDIR PVA DMA Pad X direction
+ * @{
+ */
+#define PVA_DMA_PXDIR_LEFT (0U)
+#define PVA_DMA_PXDIR_RIGHT (1U)
+/** @} */
+
+/**@defgroup PVA_DMA_PYDIR PVA DMA Pad Y direction
+ * @{
+ */
+#define PVA_DMA_PYDIR_TOP (0U)
+#define PVA_DMA_PYDIR_BOT (1U)
+/** @} */
+
+/**@defgroup PVA_DMA_TTS PVA DMA TCM Transfer Size
+ * @{
+ */
+#define PVA_DMA_TTS_4B (0U)
+#define PVA_DMA_TTS_8B (1U)
+/** @} */
+
+/**@defgroup PVA_DMA_BPE PVA DMA Boundary Pixel Extension
+ * @{
+ */
+#define PVA_DMA_BPE_DISABLE (0U)
+#define PVA_DMA_BPE_ENABLE (1U)
+/** @} */
+
+/**@defgroup PVA_DMA_ITC VPU and Channel trigger
+ * Intermediate Transfer Completion
+ * @{
+ */
+#define PVA_DMA_ITC_DISABLE (0U)
+#define PVA_DMA_ITC_ENABLE (1U)
+/** @} */
+
+/**
+ * @defgroup PVA_DMA_TC2_BITS PVA DMA Transfer Control 2 Bitfields
+ * @{
+ */
+#define PVA_DMA_TC2_PREFEN_SHIFT (0U)
+#define PVA_DMA_TC2_PREFEN_MASK (1U)
+
+#define PVA_DMA_TC2_DCBM_SHIFT (1U)
+#define PVA_DMA_TC2_DCBM_MASK (1U)
+
+#define PVA_DMA_TC2_SCBM_SHIFT (2U)
+#define PVA_DMA_TC2_SCBM_MASK (1U)
+
+#define PVA_DMA_TC2_SBADR_SHIFT (3U)
+#define PVA_DMA_TC2_SBADR_MASK (31U)
+/** @} */
+
+/**@defgroup PVA_DMA_PREFETCH PVA DMA Prefetch
+ * @{
+ */
+#define PVA_DMA_PREFETCH_DISABLE (0U)
+#define PVA_DMA_PREFETCH_ENABLE (1U)
+
+/**@defgroup PVA_DMA_CBM PVA DMA Circular Buffer Mode
+ * @{
+ */
+#define PVA_DMA_CBM_DISABLE (0U)
+#define PVA_DMA_CBM_ENABLE (1U)
+/** @} */
+
+#endif /* PVA_SYS_DMA_H */
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-sys-params.h b/drivers/video/tegra/host/pva/fw_include/pva-sys-params.h
new file mode 100644
index 00000000..94456d10
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-sys-params.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * @file pva-sys-params.h
+ *
+ * @brief Types and constants related to VPU application parameters.
+ */
+
+#ifndef PVA_SYS_PARAMS_H
+#define PVA_SYS_PARAMS_H
+
+#include <pva-types.h>
+#include <pva-packed.h>
+
+/** VPU parameter header.
+ *
+ * The VPU App parameters contains kernel-user-provided data to be
+ * copied into the VMEM before executing the VPU app. The parameter
+ * headers are stored in the parameter_data_iova memory area of
+ * parameter_info_base field.
+ *
+ * The FW can also initialize complex datatypes, which are marked by
+ * special param_base outside the normal IOVA space. See the structure
+ * struct pva_vpu_instance_data_s for an example.
+ */
+struct PVA_PACKED pva_vpu_parameters_s {
+	pva_iova param_base; /**< I/O address of the parameter data */
+	uint32_t addr; /**< Target address (VMEM offset) */
+	uint32_t size; /**< Size of the parameter data in bytes */
+};
+
+/**
+ * @brief The structure holds the wrapper information
+ * for the VMEM parameters that is provided by the user.
+ */
+struct PVA_PACKED pva_vpu_parameter_info_s {
+	/**
+	 * @brief The IOVA address of the parameter data.
+	 * This should point to an array of type @ref pva_vpu_parameter_list_t .
+	 * If no parameters are present this should be set to 0
+	 */
+	pva_iova parameter_data_iova;
+
+	/**
+	 * @brief The starting IOVA address of the parameter data whose size
+	 * is lower than @ref PVA_DMA_VMEM_COPY_THRESHOLD . This data needs to be
+	 * memcpied by FW to VMEM and DMA should not be used. If no small
+	 * parameters are present this should be set to 0.
+	 */
+	pva_iova small_vpu_param_data_iova;
+
+	/**
+	 * @brief The number of bytes of small VPU parameter data, i.e the
+	 * data whose size is lower than @ref PVA_DMA_VMEM_COPY_THRESHOLD . If no small
+	 * parameters are present, this should be set to 0
+	 */
+	uint32_t small_vpu_parameter_data_size;
+
+	/**
+	 * @brief The index of the array of type @ref pva_vpu_parameter_list_t from which
+	 * the VPU large parameters are present, i.e the vpu parameters whose size is greater
+	 * than @ref PVA_DMA_VMEM_COPY_THRESHOLD . This value will always point to the index
+	 * immediately after the small parameters. If no large parameter is present, then
+	 * this field value will be same as the value of
+	 * @ref pva_vpu_parameter_info_t.vpu_instance_parameter_list_start_index field
+	 */
+	uint32_t large_vpu_parameter_list_start_index;
+
+	/**
+	 * @brief The index of the array of type @ref pva_vpu_parameter_list_t from which
+	 * the VPU instance parameters are present. This value will always point to the index
+	 * immediately after the large parameters if large parameters are present,
+	 * else it will be the same value as
+	 * @ref pva_vpu_parameter_info_t.large_vpu_parameter_list_start_index field.
+	 */
+	uint32_t vpu_instance_parameter_list_start_index;
+};
+
+/**
+ * @brief The minimuim size of the VPU parameter for it to be considered
+ * as a large parameter
+ */
+#define PVA_DMA_VMEM_COPY_THRESHOLD		((uint32_t)(256U))
+
+/** Prefix for special param_base markers */
+#define PVA_COMPLEX_IOVA (0xDA7AULL << 48ULL)
+/** Versioned param_base marker */
+#define PVA_COMPLEX_IOVA_V(v) (PVA_COMPLEX_IOVA | ((uint64_t)(v) << 32ULL))
+
+/** Marker for struct pva_vpu_instance_data_s */
+#define PVA_SYS_INSTANCE_DATA_V1_IOVA (PVA_COMPLEX_IOVA_V(1) | 0x00000001ULL)
+
+/** ELF symbol for struct pva_vpu_instance_data_s */
+#define PVA_SYS_INSTANCE_DATA_V1_SYMBOL "_sys_instance_data_v1"
+
+/** FW-provided instance data */
+struct PVA_PACKED pva_vpu_instance_data_s {
+	uint32_t vpu_id;
+	uint32_t vmem_base;
+	uint32_t dma_descriptor_base;
+	uint32_t l2ram_base;
+	uint32_t l2ram_size;
+};
+
+#endif /* PVA_SYS_PARAMS_H */
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-task.h b/drivers/video/tegra/host/pva/fw_include/pva-task.h
new file mode 100644
index 00000000..4981cdd8
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-task.h
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_TASK_H
+#define PVA_TASK_H
+
+#include <pva-bit.h>
+#include <pva-types.h>
+#include <pva-packed.h>
+#include <pva-sys-dma.h>
+
+#define TASK_VERSION_ID 0x01U
+#define PVA_TASK_VERSION_ID 0x01U
+#define PVA_ENGINE_ID 'P'
+
+#define PVA_MAX_PREACTION_LISTS 26U
+#define PVA_MAX_POSTACTION_LISTS 28U
+
+#define PVA_TASK_POINTER_AUX_SIZE_MASK 0x00ffffffffffffffU
+#define PVA_TASK_POINTER_AUX_SIZE_SHIFT 0
+#define PVA_TASK_POINTER_AUX_FLAGS_MASK 0xff00000000000000U
+#define PVA_TASK_POINTER_AUX_FLAGS_SHIFT 56
+#define PVA_TASK_POINTER_AUX_FLAGS_CVNAS (1U << 0)
+
+#define NVPVA_TENSOR_MAX_DIMENSIONS (9u)
+
+#define NVPVA_TENSOR_ATTR_DIMENSION_ORDER_NHWC 0x00000001U
+#define NVPVA_TENSOR_ATTR_DIMENSION_ORDER_NCHW 0x00000002U
+#define NVPVA_TENSOR_ATTR_DIMENSION_ORDER_NCxHWx 0x00000003U
+#define NVPVA_TENSOR_ATTR_DIMENSION_ORDER_NDHWC 0x00000004U
+#define NVPVA_TENSOR_ATTR_DIMENSION_ORDER_NCDHW 0x00000005U
+#define NVPVA_TENSOR_ATTR_DIMENSION_ORDER_IMPLICIT 0x00000006U
+
+/*
+ * Generic task meta-data for the CV pipeline.
+ */
+typedef uint16_t pva_task_ofs;
+
+struct PVA_PACKED pva_gen_task_s {
+	pva_iova next; /* ptr to next task in the list */
+	uint8_t versionid;
+	uint8_t engineid;
+	pva_task_ofs length;
+	uint16_t sequence;
+	uint8_t n_preaction_lists;
+	uint8_t n_postaction_lists;
+	pva_task_ofs preaction_lists_p;
+	pva_task_ofs postaction_lists_p;
+};
+
+/*
+ * Structure pointed to by {pre/post}action_lists_p.  This points
+ * to the actual action list.
+ */
+struct PVA_PACKED pva_action_list_s {
+	pva_task_ofs offset;
+	uint16_t length;
+};
+
+/** @defgroup TASK_ACT PVA Task Action Identifiers.
+ *
+ * @{
+ */
+#define TASK_ACT_PVA_STATISTICS			0x00U
+#define TASK_ACT_PTR_BLK_GTREQL			0x01U
+#define TASK_ACT_READ_STATUS			0x02U
+#define TASK_ACT_WRITE_STATUS			0x03U
+#define TASK_ACT_PTR_WRITE_SOT_V		0x04U
+#define TASK_ACT_PTR_WRITE_SOT_R		0x05U
+#define TASK_ACT_PTR_WRITE_EOT_V		0x06U
+#define TASK_ACT_PTR_WRITE_EOT_R		0x07U
+#define TASK_ACT_PTR_WRITE_EOT			0x08U
+/** @} */
+
+struct PVA_PACKED pva_gen_task_status_s {
+	uint64_t timestamp;
+	uint32_t info32;
+	uint16_t info16;
+	uint16_t status;
+};
+
+struct PVA_PACKED pva_task_statistics_s {
+	uint64_t queued_time; /* Time when the task was queued by KMD */
+	uint64_t head_time; /* when task reached head of queue */
+	uint64_t input_actions_complete; /* when input actions done */
+	uint64_t vpu_assigned_time; /* when task assigned a VPU */
+	uint64_t vpu_start_time; /* when VPU started running task */
+	uint64_t vpu_complete_time; /* when execution completed */
+	uint64_t complete_time; /* when task considered complete */
+	uint8_t vpu_assigned; /* which VPU task was assigned */
+	uint8_t	queue_id; /* ID of the queue the task was submitted on*/
+	uint8_t reserved[6];
+};
+
+enum pva_task_parameter_type_e {
+	PVA_PARAM_FIRST = 0U, /* must match first type */
+	PVA_PARAM_SCALAR_LIST = 0U,
+	PVA_PARAM_SURFACE_LIST = 1U,
+	PVA_PARAM_ROI_LIST = 2U,
+	PVA_PARAM_2DPOINTS_LIST = 3U,
+	PVA_PARAM_OPAQUE_DATA = 4U,
+	PVA_PARAM_LAST = 5U /* must be last! */
+};
+
+struct PVA_PACKED pva_task_opaque_data_desc_s {
+	/* Number of bytes in the primary payload */
+	uint16_t primary_payload_size;
+};
+
+struct PVA_PACKED pva_task_pointer_s {
+	uint64_t address;
+	uint64_t aux;
+};
+
+struct PVA_PACKED pva_task_parameter_array_s {
+	pva_iova address;
+	uint32_t size;
+	uint32_t type; /* type = pva_task_parameter_type_e */
+};
+
+/*
+ * Parameter descriptor (all parameters have the same header)
+ * the specific data for the parameters immediately follows
+ * the descriptor.
+ */
+struct PVA_PACKED pva_task_parameter_desc_s {
+	uint32_t num_parameters;
+	uint32_t reserved;
+};
+
+/*
+ * Individual Region of Interest (ROI) descriptor
+ */
+struct PVA_PACKED pva_task_roi_desc_s {
+	uint32_t left;
+	uint32_t top;
+	uint32_t right;
+	uint32_t bottom;
+};
+
+/*
+ * Surface descriptor
+ */
+struct PVA_PACKED pva_task_surface_s {
+	pva_iova address;
+	pva_iova roi_addr;
+	uint32_t roi_size;
+	uint32_t surface_size;
+	uint32_t width;
+	uint32_t height;
+	uint32_t line_stride;
+	uint32_t plane_stride;
+	uint32_t num_planes;
+	uint8_t layout;
+	uint8_t block_height_log2;
+	uint8_t memory;
+	uint8_t reserved;
+	uint64_t format;
+};
+
+/*
+ * 2-dimensional point descriptor
+ */
+struct PVA_PACKED pva_task_point2d_s {
+	uint32_t x;
+	uint32_t y;
+};
+
+/*
+ * Surface Layout.
+ */
+#define PVA_TASK_SURFACE_LAYOUT_PITCH_LINEAR 0U
+#define PVA_TASK_SURFACE_LAYOUT_BLOCK_LINEAR 1U
+
+/*
+ * Where the surface is located.
+ */
+#define PVA_TASK_SURFACE_MEM_FL_CV_SURFACE PVA_BIT(0U)
+#define PVA_TASK_SURFACE_MEM_FL_CV_ROI PVA_BIT(1U)
+
+/**
+ * @brief Task descriptor for the new architecture.
+ *
+ * The runlist of the new task descriptor contains pointer to
+ * task-specific parameters of the VPU app, pointer to info structure
+ * describing its binary code, and its dma setup.
+ */
+struct PVA_PACKED pva_td_s {
+	/** @brief IOVA pointer to the next task */
+	pva_iova			next;
+	/** @brief Version of task descriptor internal to PVA.
+	 * Should hold a value of 2 for safety architecture
+	 */
+	uint8_t				runlist_version;
+	/** @brief Number of pre-actions.
+	 * Valid range is 0..PVA_MAX_PREACTION_LISTS - both inclusive
+	 */
+	uint8_t				num_preactions;
+	/** @brief Number of post-actions.
+	 * Valid range is 0..PVA_MAX_POSTACTION_LISTS - both inclusive
+	 */
+	uint8_t				num_postactions;
+	/** Index of the stream ID assigned to this task */
+	uint8_t				sid_index;
+	/** @brief Task configuration flags */
+	uint32_t			flags;
+	/** @brief IOVA pointer to an instance of pva_vpu_parameter_info_t */
+	pva_iova			parameter_info_base;
+	/** @brief IOVA pointer to a pva_bin_info_t structure */
+	pva_iova			bin_info;
+	/** @brief IOVA pointer to a pva_bin_info_t structure */
+	pva_iova			ppe_bin_info;
+	/** @brief IOVA pointer to a pva_dma_info_t structure */
+	pva_iova			dma_info;
+	/** IOVA pointer to a pva_circular_info_t structure */
+	pva_iova			stdout_info;
+	/** @brief IOVA pointer to an array of pva_task_action_t structure */
+	pva_iova			preactions;
+	/** @brief IOVA pointer to  an array of pva_task_action_t structure */
+	pva_iova			postactions;
+	/** @brief Timeout for the VPU algorithm in micro-seconds.
+	 *  Valid range is 0..PVA_MAX_TIMEOUT - both inclusive
+	 */
+	uint64_t			timeout;
+	/** @brief Variable to hold the queued time of the task */
+	uint64_t			queued_time;
+	/** @brief The ID of the batch that this task belongs to */
+	uint64_t			batch_id;
+	/** Size of L2SRAM required for the task */
+	uint32_t			l2sram_size;
+	/** Number of total tasks with timer resource utilization */
+	uint16_t			timer_ref_cnt;
+	/** Number of total tasks with L2SRAM resource utilization */
+	uint16_t			l2sram_ref_cnt;
+	/** @brief Number of parameters in parameter  array */
+	uint16_t			num_parameters;
+	/** @brief Interface on which FW should return status */
+	uint8_t				status_interface;
+	/** @brief The ID of this task used to identify it during AISR */
+	uint8_t				task_id;
+	/** @note The below two fields are added for backward
+	 * compatibility, will be removed once changes are merged
+	 */
+	/** Additional padding to maintain alignement */
+	uint8_t				pad0[4];
+};
+
+/** Runlist version for new task descriptor format */
+#define PVA_RUNLIST_VERSION_ID (0x02U)
+
+/** @addtogroup PVA_TASK_FL
+ * @{
+ */
+/** Schedule on VPU0 only */
+#define PVA_TASK_FL_VPU0 PVA_BIT(0U)
+
+/** Schedule on VPU1 only */
+#define PVA_TASK_FL_VPU1 PVA_BIT(1U)
+
+/** Flag to allow VPU debugger attach for the task */
+#define PVA_TASK_FL_VPU_DEBUG		PVA_BIT(2U)
+
+/** Flag to request masking of illegal instruction error for the task */
+#define PVA_TASK_FL_ERR_MASK_ILLEGAL_INSTR	PVA_BIT(3U)
+
+/** Flag to request masking of divide by zero error for the task */
+#define PVA_TASK_FL_ERR_MASK_DIVIDE_BY_0	PVA_BIT(4U)
+
+/** Flag to request masking of floating point NAN error for the task */
+#define PVA_TASK_FL_ERR_MASK_FP_NAN		PVA_BIT(5U)
+
+/** Schedule next task in list immediately on this VPU.
+ *
+ * Not allowed in the last task of batch list.
+ */
+#define PVA_TASK_FL_HOT_VPU PVA_BIT(10U)
+
+/** @brief Flag to identify a barrier task */
+#define PVA_TASK_FL_SYNC_TASKS PVA_BIT(11U)
+
+/** @brief Flag to identify L2SRAM is being utilized for
+ * the task and to decrement l2sram_ref_count after task is done
+ */
+#define PVA_TASK_FL_DEC_L2SRAM PVA_BIT(12U)
+
+#define PVA_TASK_FL_DEC_TIMER PVA_BIT(13U)
+
+/** Flag to indicate special access needed by task */
+#define PVA_TASK_FL_SPECIAL_ACCESS PVA_BIT(15U)
+
+/** Flag to indicate queued time is needed by task */
+#define PVA_TASK_FL_QUEUED_TS PVA_BIT(16U)
+
+/** Flag to indicate head time is needed by task */
+#define PVA_TASK_FL_HEAD_TS PVA_BIT(17U)
+
+/** Flag to indicate ready time is needed by task */
+#define PVA_TASK_FL_READY_TS PVA_BIT(18U)
+
+/** Flag to indicate R5 start time/vpu assigned time is needed by task */
+#define PVA_TASK_FL_SOT_R_TS PVA_BIT(19U)
+
+/** Flag to indicate VPU start time is needed by task */
+#define PVA_TASK_FL_SOT_V_TS PVA_BIT(20U)
+
+/** Flag to indicate VPU done time is  needed by task */
+#define PVA_TASK_FL_EOT_V_TS PVA_BIT(21U)
+
+/** Flag to indicate R5 complete time is needed by task */
+#define PVA_TASK_FL_EOT_R_TS PVA_BIT(22U)
+
+/** Flag to indicate Golden register check is needed by task */
+#define PVA_TASK_FL_GR_CHECK		PVA_BIT(23U)
+
+/** Flag to indicate that stats are enabled */
+#define PVA_TASK_FL_STATS_ENABLE (PVA_TASK_FL_QUEUED_TS | PVA_TASK_FL_HEAD_TS  |\
+				  PVA_TASK_FL_READY_TS  | PVA_TASK_FL_SOT_R_TS |\
+				  PVA_TASK_FL_SOT_V_TS  | PVA_TASK_FL_EOT_V_TS |\
+				  PVA_TASK_FL_EOT_R_TS)
+/** @} */
+
+/** Version of the binary info */
+#define PVA_BIN_INFO_VERSION_ID (0x01U)
+#define PVA_MAX_VPU_METADATA (4U)
+
+#define PVA_CODE_SEC_BASE_ADDR_ALIGN (128ULL)
+#define PVA_CODE_SEC_SIZE_ALIGN (32U)
+
+#define PVA_DATA_SEC_BASE_ADDR_ALIGN (64ULL)
+#define PVA_DATA_SEC_SIZE_ALIGN (32U)
+
+struct pva_vpu_data_section_s {
+	uint32_t offset; /**< Offset from the base source address */
+	uint32_t addr; /**< Target address (VMEM offset) */
+	uint32_t size; /**< Size of the section in bytes */
+};
+
+/** @brief Information of a VPU app binary.
+ *
+ * The PVA kernels are implemented as VPU apps, small VPU programs
+ * executed independently on a VPU. The information structure is used
+ * by PVA R5 to preload the code in the VPU icache as well as preload
+ * the data sections into the VPU VMEM.
+ *
+ * If PVA has multiple address spaces, the application code, data, and
+ * metadata may be placed in different address space domains accessed
+ * using different StreamIDs. The code is accessed by VPU, the data
+ * sections by PVA DMA, the metadata by R5.
+ *
+ * The metadata sections contain the ABI information of the VPU
+ * app. The metadata is stored as data sections in the ELF executable,
+ * however, the address of the metadata section is >= 768K (0xC0000).
+ */
+struct PVA_PACKED pva_bin_info_s {
+	uint16_t bin_info_size; /**< Size of this structure */
+	uint16_t bin_info_version; /**< PVA_BIN_INFO_VERSION_ID */
+
+	/** Size of the code */
+	uint32_t code_size;
+	/** Base address of the code. Should be aligned at 128.  */
+	pva_iova code_base;
+
+	/** Base address of the data. Should be aligned at 64. */
+	/** @brief Holds address of data section info of type
+	 * @ref pva_vpu_data_section_t
+	 */
+	pva_iova data_sec_base;
+
+	/** @brief Number of data section info stored @ref data_sec_base */
+	uint32_t data_sec_count;
+
+	pva_iova data_base;
+};
+
+/*
+ * Status structure that will be return to circular buffer
+ */
+struct PVA_PACKED pva_task_error_s {
+	/* IOVA address of task */
+	pva_iova addr;
+
+	/* Status of task execution */
+	uint16_t error;
+
+	/* Indicates if status is valid */
+	uint8_t valid;
+
+	/* VPU id on which the task was scheduled */
+	uint8_t vpu;
+
+	/* Queue to which the task belongs */
+	uint8_t queue;
+
+	/* Task ID of the task */
+	uint8_t task_id;
+};
+
+
+struct PVA_PACKED pva_circular_buffer_info_s {
+	pva_iova head;
+	pva_iova tail;
+	pva_iova err;
+	pva_iova buffer;
+	uint32_t buffer_size;
+};
+
+
+#endif
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-types.h b/drivers/video/tegra/host/pva/fw_include/pva-types.h
new file mode 100644
index 00000000..1caff658
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-types.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_TYPES_H
+#define PVA_TYPES_H
+#if !defined(__KERNEL__)
+#define __user
+#include <stdint.h>
+#include <inttypes.h>
+#else
+#include <linux/types.h>
+#endif
+#include <linux/stddef.h>
+
+typedef uint64_t pva_iova;
+
+/*
+ * Queue IDs
+ */
+enum pva_queue_id_e {
+	PVA_FW_QUEUE_0,
+	PVA_FW_QUEUE_1,
+	PVA_FW_QUEUE_2,
+	PVA_FW_QUEUE_3,
+	PVA_FW_QUEUE_4,
+	PVA_FW_QUEUE_5,
+	PVA_FW_QUEUE_6,
+	PVA_FW_QUEUE_7,
+	PVA_FW_QUEUE_8, /* PVA_SW_BIST_QUEUE_ID0 */
+	PVA_FW_QUEUE_9, /* PVA_SW_BIST_QUEUE_ID1 */
+	PVA_NUM_QUEUES
+};
+
+/*
+ * Hardware FIFO IDs
+ */
+typedef uint8_t pva_ccq_fifo_id_t;
+
+/*
+ * PVE IDs
+ */
+typedef uint8_t pva_pve_id_t;
+#define PVA_PVE_ID_NONE 0xffU
+
+/*
+ * VMEM IDs
+ */
+typedef uint8_t pva_vmem_id_t;
+
+/*
+ * DMA Descriptor IDs
+ */
+typedef uint8_t pva_dma_desc_t;
+
+/*
+ * DMA Channel IDs
+ */
+typedef uint8_t pva_dma_channel_id_t;
+
+/*
+ * DMA Channel Mask
+ */
+typedef uint16_t pva_dma_channel_mask_t;
+
+/*
+ * Address range
+ */
+struct pva_addr_range_s {
+	uint32_t offset;
+	uint32_t addr;
+	uint32_t size;
+};
+
+/*
+ * Macro to access size of a member of a struct
+ */
+#define PVA_MEMBER_SIZEOF(_struct_, _member_)                                  \
+	(sizeof(((_struct_ *)0)->_member_))
+
+/*
+ * SID
+ */
+typedef uint8_t pva_sid_t;
+
+#endif
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-ucode-header-types.h b/drivers/video/tegra/host/pva/fw_include/pva-ucode-header-types.h
new file mode 100644
index 00000000..71c8d548
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-ucode-header-types.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_UCODE_HEADER_TYPES_H
+#define PVA_UCODE_HEADER_TYPES_H
+
+/*
+ * This file is distinct from the other uCode header file because it
+ * defines constants/values that are used by the linker scripts and therefor
+ * cannot have C structures (only pre-processor directives).
+ */
+
+/*
+ * Define the length of a section header to be defined independently than
+ * the C structure (it will be larger).  Picking a value that is easy to
+ * compute.
+ */
+#define PVA_UCODE_SEG_HDR_LENGTH 128
+
+#define PVA_UCODE_SEG_NONE 0 /* not a segment */
+#define PVA_UCODE_SEG_EVP 1 /* EVP information */
+#define PVA_UCODE_SEG_R5 2 /* R5 code/data */
+#define PVA_UCODE_SEG_CRASHDUMP 3 /* space for crash dump */
+#define PVA_UCODE_SEG_TRACE_LOG 4 /* space for PVA trace logs */
+#define PVA_UCODE_SEG_DRAM_CACHED 5 /* cachable DRAM area */
+#define PVA_UCODE_SEG_CODE_COVERAGE 6 /* space for PVA FW code coverage */
+#define PVA_UCODE_SEG_DEBUG_LOG 7 /* space for PVA debug logs */
+#define PVA_UCODE_SEG_NEXT 8 /* must be last */
+
+/* PVA FW binary max segment size used for section alignment */
+#define PVA_BIN_MAX_HEADER_SIZE 0x1000
+#define PVA_BIN_MAX_EVP_SIZE 0x1000
+
+#define PVA_HDR_MAGIC 0x31415650 /* PVA1 in little endian */
+#define PVA_HDR_VERSION 0x00010000 /* version 1.0 of the header */
+#endif
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-ucode-header.h b/drivers/video/tegra/host/pva/fw_include/pva-ucode-header.h
new file mode 100644
index 00000000..bdbd8516
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-ucode-header.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_UCODE_HEADER_H
+#define PVA_UCODE_HEADER_H
+
+#include <pva-types.h>
+#include <pva-ucode-header-types.h>
+
+#define MAX_SEGMENT_NAME_LEN 64
+
+/*
+ * PVA uCode Header.
+ *
+ * There is a basic header that describes the uCode.  Other than the
+ * validation information (such as versions, checksums (MD5 hash?), etc)
+ * it describes the various segments of the uCode image.  The important
+ * thing to note is that there are multiple segments for various parts of
+ * the uCode.
+ *
+ * Each segment has:
+ *	- type: this indicates the type of segment it is.
+ *	- id: this gives a uniqueness to the segment when there are multiple
+ *	segments of the same type.  It also allows different segments types
+ *	to be related by using the same segment ID (such as relating VPU code,
+ *	R5 application code and parameter data together).
+ *	- name: this is NUL terminated string that is the "name" of the segment
+ *	- size: size of the segment in bytes
+ *	- offset: this is the offset from the start of the binary as to
+ *	where the data contained in the segment is to be placed.
+ *	- address: this is the address of where the data in the segment is
+ *	to be written to.
+ *      - physical address: this is used in some segments to denote where in
+ *      the 40-bit address space the segment is located.  This allows for
+ *      setting up some of the segment registers.
+ *
+ * A segment can define a region but contain no data.  In those cases, the
+ * file offset would be 0.
+ *
+ * In the case of DRAM the load address and size can be used to setup the
+ * relevant segment registers and DRAM apertures.
+ *
+ */
+
+/*
+ * There can be multiple segments of the same type.
+ */
+struct pva_ucode_seg_s {
+	uint32_t type; /* type of segment */
+	uint32_t id; /* ID of segment */
+	uint32_t size; /* size of the segment */
+	uint32_t offset; /* offset from header to segment start */
+	uint32_t addr; /* load address of segment */
+	uint8_t name[MAX_SEGMENT_NAME_LEN];
+	uint64_t phys_addr __aligned(8);
+};
+
+/*
+ * Ucode header gives information on what kind of images are contained in
+ * a binary.
+ *
+ * nsegments      : Number of segments available in pva_ucode_r5_sysfw_info_t.
+ *
+ * R5 system image layout used for booting R5.
+ *	+--------------------------------+
+ *	+          Ucode header          +
+ *	+--------------------------------+
+ *	+           struct               +
+ *	+   pva_ucode_r5_sysfw_info_t    +
+ *	+--------------------------------+
+ *	+                                +
+ *	+   pva firwmare data/code       +
+ *	+--------------------------------+
+ */
+struct __packed pva_ucode_hdr_s {
+	uint32_t magic;
+	uint32_t hdr_version;
+	uint32_t ucode_version;
+	uint32_t nsegments;
+};
+
+struct pva_ucode_r5_sysfw_info_s {
+	struct pva_ucode_seg_s evp __aligned(128);
+	struct pva_ucode_seg_s dram __aligned(128);
+	struct pva_ucode_seg_s crash_dump __aligned(128);
+	struct pva_ucode_seg_s trace_log __aligned(128);
+	struct pva_ucode_seg_s code_coverage __aligned(128);
+	struct pva_ucode_seg_s debug_log __aligned(128);
+	struct pva_ucode_seg_s cached_dram __aligned(128);
+};
+
+#endif
diff --git a/drivers/video/tegra/host/pva/fw_include/pva-version.h b/drivers/video/tegra/host/pva/fw_include/pva-version.h
new file mode 100644
index 00000000..9f4a84ea
--- /dev/null
+++ b/drivers/video/tegra/host/pva/fw_include/pva-version.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_VERSION_H
+#define PVA_VERSION_H
+
+#include <pva-types.h>
+#include <pva-bit.h>
+#include <pva-fw-version.h>
+
+#define PVA_MAKE_VERSION(_type_, _major_, _minor_, _subminor_)                 \
+	(PVA_INSERT(_type_, 31, 24) | PVA_INSERT(_major_, 23, 16) |            \
+	 PVA_INSERT(_minor_, 15, 8) | PVA_INSERT(_subminor_, 7, 0))
+
+#define PVA_VERSION(_type_)                                                    \
+	PVA_MAKE_VERSION(_type_, PVA_VERSION_MAJOR, PVA_VERSION_MINOR,         \
+			 PVA_VERSION_SUBMINOR)
+
+#endif
diff --git a/drivers/video/tegra/host/pva/hw_cfg_pva_v1.h b/drivers/video/tegra/host/pva/hw_cfg_pva_v1.h
new file mode 100644
index 00000000..c81582bb
--- /dev/null
+++ b/drivers/video/tegra/host/pva/hw_cfg_pva_v1.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2016-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_cfg_pva_v1_h_
+#define _hw_cfg_pva_v1_h_
+
+static inline u32 v1_cfg_user_sid_r(void)
+{
+	return 0x70000;
+}
+static inline u32 v1_cfg_ccq_r(void)
+{
+	return 0x71000;
+}
+static inline u32 v1_cfg_vps0user_lsegreg_r(void)
+{
+	return 0x71004;
+}
+static inline u32 v1_cfg_vps1user_lsegreg_r(void)
+{
+	return 0x71008;
+}
+static inline u32 v1_cfg_r5user_lsegreg_r(void)
+{
+	return 0x7100c;
+}
+static inline u32 v1_cfg_vps0user_usegreg_r(void)
+{
+	return 0x71010;
+}
+static inline u32 v1_cfg_vps1user_usegreg_r(void)
+{
+	return 0x71014;
+}
+static inline u32 v1_cfg_r5user_usegreg_r(void)
+{
+	return 0x71018;
+}
+static inline u32 v1_cfg_ccq_status_r(u32 status_id)
+{
+	return 0x72000U + 0x4U * status_id;
+}
+static inline u32 v1_cfg_priv_sid_r(void)
+{
+	return 0x80000;
+}
+static inline u32 v1_cfg_priv_ar1_lsegreg_r(void)
+{
+	return 0x80004;
+}
+static inline u32 v1_cfg_priv_ar1_usegreg_r(void)
+{
+	return 0x80008;
+}
+static inline u32 v1_cfg_priv_ar2_lsegreg_r(void)
+{
+	return 0x8000c;
+}
+static inline u32 v1_cfg_priv_ar2_usegreg_r(void)
+{
+	return 0x80010;
+}
+static inline u32 v1_cfg_priv_ar1_start_r(void)
+{
+	return 0x80014;
+}
+static inline u32 v1_cfg_priv_ar1_end_r(void)
+{
+	return 0x80018;
+}
+static inline u32 v1_cfg_priv_ar2_start_r(void)
+{
+	return 0x8001c;
+}
+static inline u32 v1_cfg_priv_ar2_end_r(void)
+{
+	return 0x80020;
+}
+#endif
diff --git a/drivers/video/tegra/host/pva/hw_cfg_pva_v2.h b/drivers/video/tegra/host/pva/hw_cfg_pva_v2.h
new file mode 100644
index 00000000..98224875
--- /dev/null
+++ b/drivers/video/tegra/host/pva/hw_cfg_pva_v2.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_cfg_pva_gen2_h_
+#define _hw_cfg_pva_gen2_h_
+#include "hw_cfg_pva_v1.h"
+#define V2_SID_CONTROL_BASE 0x240000U
+
+static inline u32 v2_cfg_user_sid_vm_r(u32 idx)
+{
+	return V2_SID_CONTROL_BASE + 0x4U * idx;
+}
+
+static inline u32 v2_cfg_priv_sid_r(void)
+{
+	return V2_SID_CONTROL_BASE + 0x20U;
+}
+
+static inline u32 v2_cfg_vps_sid_r(void)
+{
+	return V2_SID_CONTROL_BASE + 0x24U;
+}
+
+#define V2_ADDRESS_CONTROL_BASE 0x250000U
+
+static inline u32 v2_cfg_r5user_lsegreg_r(void)
+{
+	return V2_ADDRESS_CONTROL_BASE + 0x8U;
+}
+
+static inline u32 v2_cfg_priv_ar1_lsegreg_r(void)
+{
+	return V2_ADDRESS_CONTROL_BASE + 0xCU;
+}
+
+static inline u32 v2_cfg_priv_ar2_lsegreg_r(void)
+{
+	return V2_ADDRESS_CONTROL_BASE + 0x10U;
+}
+
+static inline u32 v2_cfg_r5user_usegreg_r(void)
+{
+	return V2_ADDRESS_CONTROL_BASE + 0x1CU;
+}
+
+static inline u32 v2_cfg_priv_ar1_usegreg_r(void)
+{
+	return V2_ADDRESS_CONTROL_BASE + 0x20U;
+}
+
+static inline u32 v2_cfg_priv_ar2_usegreg_r(void)
+{
+	return V2_ADDRESS_CONTROL_BASE + 0x24U;
+}
+
+static inline u32 v2_cfg_priv_ar1_start_r(void)
+{
+	return V2_ADDRESS_CONTROL_BASE + 0x28U;
+}
+
+static inline u32 v2_cfg_priv_ar1_end_r(void)
+{
+	return V2_ADDRESS_CONTROL_BASE + 0x2CU;
+}
+
+static inline u32 v2_cfg_priv_ar2_start_r(void)
+{
+	return V2_ADDRESS_CONTROL_BASE + 0x30U;
+}
+
+static inline u32 v2_cfg_priv_ar2_end_r(void)
+{
+	return V2_ADDRESS_CONTROL_BASE + 0x34U;
+}
+
+#define V2_CFG_CCQ_BASE 0x260000U
+#define V2_CFG_CCQ_SIZE 0x010000U
+
+static inline u32 v2_cfg_ccq_r(u32 idx)
+{
+	return V2_CFG_CCQ_BASE + V2_CFG_CCQ_SIZE * idx;
+}
+
+static inline u32 v2_cfg_ccq_status_r(u32 ccq_idx, u32 status_idx)
+{
+	return V2_CFG_CCQ_BASE + V2_CFG_CCQ_SIZE * ccq_idx + 0x4U
+		+ 0x4U * status_idx;
+}
+
+#endif
diff --git a/drivers/video/tegra/host/pva/hw_dma_ch_pva.h b/drivers/video/tegra/host/pva/hw_dma_ch_pva.h
new file mode 100644
index 00000000..91616865
--- /dev/null
+++ b/drivers/video/tegra/host/pva/hw_dma_ch_pva.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_dma_ch_pva_h_
+#define _hw_dma_ch_pva_h_
+
+static inline u32 dma_ch_base_r(void)
+{
+	return 0xa0000;
+}
+static inline u32 dma_ch_size_r(void)
+{
+	return 0x2000;
+}
+static inline u32 dma_ch_cntl0_r(void)
+{
+	return 0x0;
+}
+static inline u32 dma_ch_cntl0_enable_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 dma_ch_cntl0_did_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 dma_ch_status0_r(void)
+{
+	return 0x8;
+}
+#endif
diff --git a/drivers/video/tegra/host/pva/hw_dma_desc_pva.h b/drivers/video/tegra/host/pva/hw_dma_desc_pva.h
new file mode 100644
index 00000000..9c670a8e
--- /dev/null
+++ b/drivers/video/tegra/host/pva/hw_dma_desc_pva.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_dma_desc_pva_h_
+#define _hw_dma_desc_pva_h_
+
+static inline u32 dma_desc_base_r(void)
+{
+	return 0xc1000;
+}
+static inline u32 dma_desc_size_r(void)
+{
+	return 0x40;
+}
+static inline u32 dma_desc_cntl_r(void)
+{
+	return 0x0;
+}
+static inline u32 dma_desc_cntl_dstm_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 dma_desc_cntl_dstm_dstm_mc_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 dma_desc_cntl_dstm_dstm_tcm_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 dma_desc_cntl_ddtm_f(u32 v)
+{
+	return (v & 0x7) << 4;
+}
+static inline u32 dma_desc_cntl_ddtm_ddtm_mc_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 dma_desc_cntl_ddtm_ddtm_tcm_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 dma_desc_cntl_srch_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+static inline u32 dma_desc_cntl_dsth_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+static inline u32 dma_desc_srcl_r(void)
+{
+	return 0x4;
+}
+static inline u32 dma_desc_dstl_r(void)
+{
+	return 0x8;
+}
+static inline u32 dma_desc_tile_cntl_r(void)
+{
+	return 0xc;
+}
+static inline u32 dma_desc_tile_cntl_tx_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 dma_desc_tile_cntl_ty_f(u32 v)
+{
+	return (v & 0xffff) << 16;
+}
+#endif
diff --git a/drivers/video/tegra/host/pva/hw_evp_pva.h b/drivers/video/tegra/host/pva/hw_evp_pva.h
new file mode 100644
index 00000000..8732c5c3
--- /dev/null
+++ b/drivers/video/tegra/host/pva/hw_evp_pva.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_evp_pva_h_
+#define _hw_evp_pva_h_
+
+static inline u32 evp_reset_addr_r(void)
+{
+	return 0x20;
+}
+static inline u32 evp_undef_addr_r(void)
+{
+	return 0x24;
+}
+static inline u32 evp_swi_addr_r(void)
+{
+	return 0x28;
+}
+static inline u32 evp_prefetch_abort_addr_r(void)
+{
+	return 0x2c;
+}
+static inline u32 evp_data_abort_addr_r(void)
+{
+	return 0x30;
+}
+static inline u32 evp_rsvd_addr_r(void)
+{
+	return 0x34;
+}
+static inline u32 evp_irq_addr_r(void)
+{
+	return 0x38;
+}
+static inline u32 evp_fiq_addr_r(void)
+{
+	return 0x3c;
+}
+#endif
diff --git a/drivers/video/tegra/host/pva/hw_hsp_pva.h b/drivers/video/tegra/host/pva/hw_hsp_pva.h
new file mode 100644
index 00000000..62e6cab0
--- /dev/null
+++ b/drivers/video/tegra/host/pva/hw_hsp_pva.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_hsp_pva_h_
+#define _hw_hsp_pva_h_
+
+static inline u32 hsp_common_r(void)
+{
+	return 0x160000;
+}
+static inline u32 hsp_int_ie0_r(void)
+{
+	return 0x160100;
+}
+static inline u32 hsp_int_ie1_r(void)
+{
+	return 0x160104;
+}
+static inline u32 hsp_int_ie2_r(void)
+{
+	return 0x160108;
+}
+static inline u32 hsp_int_ie3_r(void)
+{
+	return 0x16010c;
+}
+static inline u32 hsp_int_ie4_r(void)
+{
+	return 0x160110;
+}
+static inline u32 hsp_int_external_r(void)
+{
+	return 0x160300;
+}
+static inline u32 hsp_int_internal_r(void)
+{
+	return 0x160304;
+}
+static inline u32 hsp_sm0_r(void)
+{
+	return 0x170000;
+}
+static inline u32 hsp_sm1_r(void)
+{
+	return 0x178000;
+}
+static inline u32 hsp_sm2_r(void)
+{
+	return 0x180000;
+}
+static inline u32 hsp_sm3_r(void)
+{
+	return 0x188000;
+}
+static inline u32 hsp_sm4_r(void)
+{
+	return 0x190000;
+}
+static inline u32 hsp_sm5_r(void)
+{
+	return 0x198000;
+}
+static inline u32 hsp_sm6_r(void)
+{
+	return 0x1a0000;
+}
+static inline u32 hsp_sm7_r(void)
+{
+	return 0x1a8000;
+}
+static inline u32 hsp_ss0_state_r(void)
+{
+	return 0x1b0000;
+}
+static inline u32 hsp_ss0_set_r(void)
+{
+	return 0x1b0004;
+}
+static inline u32 hsp_ss0_clr_r(void)
+{
+	return 0x1b0008;
+}
+static inline u32 hsp_ss1_state_r(void)
+{
+	return 0x1c0000;
+}
+static inline u32 hsp_ss1_set_r(void)
+{
+	return 0x1c0004;
+}
+static inline u32 hsp_ss1_clr_r(void)
+{
+	return 0x1c0008;
+}
+static inline u32 hsp_ss2_state_r(void)
+{
+	return 0x1d0000;
+}
+static inline u32 hsp_ss2_set_r(void)
+{
+	return 0x1d0004;
+}
+static inline u32 hsp_ss2_clr_r(void)
+{
+	return 0x1d0008;
+}
+static inline u32 hsp_ss3_state_r(void)
+{
+	return 0x1e0000;
+}
+static inline u32 hsp_ss3_set_r(void)
+{
+	return 0x1e0004;
+}
+static inline u32 hsp_ss3_clr_r(void)
+{
+	return 0x1e0008;
+}
+#endif
diff --git a/drivers/video/tegra/host/pva/hw_proc_pva.h b/drivers/video/tegra/host/pva/hw_proc_pva.h
new file mode 100644
index 00000000..e4c6145d
--- /dev/null
+++ b/drivers/video/tegra/host/pva/hw_proc_pva.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_proc_pva_h_
+#define _hw_proc_pva_h_
+
+static inline u32 proc_cpuhalt_r(void)
+{
+	return 0x30000;
+}
+static inline u32 proc_cpuhalt_ncpuhalt_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 proc_cpuhalt_ncpuhalt_halted_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 proc_cpuhalt_ncpuhalt_done_v(void)
+{
+	return 0x00000001;
+}
+#endif
diff --git a/drivers/video/tegra/host/pva/hw_sec_pva_v1.h b/drivers/video/tegra/host/pva/hw_sec_pva_v1.h
new file mode 100644
index 00000000..36b19893
--- /dev/null
+++ b/drivers/video/tegra/host/pva/hw_sec_pva_v1.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_sec_pva_v1_h_
+#define _hw_sec_pva_v1_h_
+
+static inline u32 v1_sec_lic_intr_enable_r(void)
+{
+	return 0x2804CU;
+}
+static inline u32 sec_lic_intr_enable_dma0_f(u32 v)
+{
+	return (v & 0x1) << 9;
+}
+static inline u32 sec_lic_intr_enable_dma1_f(u32 v)
+{
+	return (v & 0x1) << 8;
+}
+static inline u32 sec_lic_intr_enable_actmon_f(u32 v)
+{
+	return (v & 0x1) << 7;
+}
+static inline u32 sec_lic_intr_enable_h1x_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 sec_lic_intr_enable_hsp_f(u32 v)
+{
+	return (v & 0xf) << 1;
+}
+static inline u32 sec_lic_intr_enable_wdt_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 v1_sec_lic_intr_status_r(void)
+{
+	return 0x28054U;
+}
+#endif
diff --git a/drivers/video/tegra/host/pva/hw_sec_pva_v2.h b/drivers/video/tegra/host/pva/hw_sec_pva_v2.h
new file mode 100644
index 00000000..826a67a6
--- /dev/null
+++ b/drivers/video/tegra/host/pva/hw_sec_pva_v2.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_sec_pva_v2_h_
+#define _hw_sec_pva_v2_h_
+
+#define SEC_BASE 0x20000U
+
+static inline u32 v2_sec_lic_intr_enable_r(void)
+{
+	return SEC_BASE + 0x8064U;
+}
+
+static inline u32 v2_sec_lic_intr_status_r(void)
+{
+	return SEC_BASE + 0x806CU;
+}
+
+#endif
diff --git a/drivers/video/tegra/host/pva/hw_vmem_pva.h b/drivers/video/tegra/host/pva/hw_vmem_pva.h
new file mode 100644
index 00000000..561d833b
--- /dev/null
+++ b/drivers/video/tegra/host/pva/hw_vmem_pva.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _hw_vmem_pva_h_
+#define _hw_vmem_pva_h_
+
+#define NUM_HEM_GEN		2U
+#define VMEM_REGION_COUNT	3U
+#define T19X_VMEM0_START	0x40U
+#define T19X_VMEM0_END		0x10000U
+#define T19X_VMEM1_START	0x40000U
+#define T19X_VMEM1_END		0x50000U
+#define T19X_VMEM2_START	0x80000U
+#define T19X_VMEM2_END		0x90000U
+
+#define T23x_VMEM0_START	0x40U
+#define T23x_VMEM0_END		0x20000U
+#define T23x_VMEM1_START	0x40000U
+#define T23x_VMEM1_END		0x60000U
+#define T23x_VMEM2_START	0x80000U
+#define T23x_VMEM2_END		0xA0000U
+
+#endif
diff --git a/drivers/video/tegra/host/pva/nvpva_buffer.c b/drivers/video/tegra/host/pva/nvpva_buffer.c
new file mode 100644
index 00000000..9d9beba5
--- /dev/null
+++ b/drivers/video/tegra/host/pva/nvpva_buffer.c
@@ -0,0 +1,607 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2016-2023, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/dma-buf.h>
+#include <linux/cvnas.h>
+#include <linux/nvhost.h>
+
+#include "pva.h"
+#include "nvpva_buffer.h"
+
+/**
+ * nvpva_vm_buffer - Virtual mapping information for a buffer
+ *
+ * @attach:		Pointer to dma_buf_attachment struct
+ * @dmabuf:		Pointer to dma_buf struct
+ * @sgt:		Pointer to sg_table struct
+ * @addr:		Physical address of the buffer
+ * @size:		Size of the buffer
+ * @user_map_count:	Buffer reference count from user space
+ * @submit_map_count:	Buffer reference count from task submit
+ * @rb_node:		pinned buffer node
+ * @list_head:		List entry
+ *
+ */
+struct nvpva_vm_buffer {
+	struct dma_buf_attachment	*attach;
+	struct dma_buf			*dmabuf;
+	struct sg_table			*sgt;
+	dma_addr_t			addr;
+	size_t				size;
+	enum				nvpva_buffers_heap heap;
+	s32				user_map_count;
+	s32				submit_map_count;
+	u32				id;
+	dma_addr_t			user_addr;
+	u64				user_offset;
+	u64				user_size;
+	struct				rb_node rb_node;
+	struct				rb_node rb_node_id;
+	struct				list_head list_head;
+};
+
+static uint32_t get_unique_id(struct nvpva_buffers *nvpva_buffers)
+{
+	struct nvhost_device_data *pdata =
+		platform_get_drvdata(nvpva_buffers->pdev);
+	struct pva *pva = pdata->private_data;
+	uint32_t id = rmos_find_first_zero_bit(nvpva_buffers->ids,
+					 NVPVA_MAX_NUM_UNIQUE_IDS);
+	if (id == NVPVA_MAX_NUM_UNIQUE_IDS) {
+		nvpva_dbg_fn(pva, "No buffer ID available");
+		id = 0;
+		goto out;
+	}
+
+	rmos_set_bit32((id%NVPVA_ID_SEGMENT_SIZE),
+		&nvpva_buffers->ids[id/NVPVA_ID_SEGMENT_SIZE]);
+
+	++(nvpva_buffers->num_assigned_ids);
+	id |= 0x554c0000;
+out:
+	return id;
+}
+
+static int32_t put_unique_id(struct nvpva_buffers *nvpva_buffers, uint32_t id)
+{
+	id &= (~0x554c0000);
+	if (!rmos_test_bit32((id % 32), &nvpva_buffers->ids[id / 32]))
+		return -1;
+
+	rmos_clear_bit32((id % 32), &nvpva_buffers->ids[id/32]);
+	--(nvpva_buffers->num_assigned_ids);
+
+	return 0;
+}
+
+#define COMPARE_AND_ASSIGN(a1, a2, b1, b2, c1, c2, curr, n1, n2)	\
+	do {								\
+		is_equal = false;					\
+		if ((a1) > (a2))					\
+			(curr) = (n1);					\
+		else if ((a1) < (a2))					\
+			(curr) = (n2);					\
+		else if ((b1) > (b2))					\
+			(curr) = (n1);					\
+		else if ((b1) < (b2))					\
+			(curr) = (n2);					\
+		else if ((c1) > (c2))					\
+			(curr) = (n1);					\
+		else if ((c1) < (c2))					\
+			(curr) = (n2);					\
+		else							\
+			is_equal = true;				\
+	} while (0)
+
+
+
+static struct nvpva_vm_buffer *
+nvpva_find_map_buffer(struct nvpva_buffers *nvpva_buffers,
+		      u64 offset,
+		      u64 size,
+		      struct dma_buf *dmabuf)
+{
+	struct rb_root *root = &nvpva_buffers->rb_root;
+	struct rb_node *node = root->rb_node;
+	struct nvpva_vm_buffer *vm;
+	bool is_equal = false;
+
+	/* check in a sorted tree */
+	while (node) {
+		vm = rb_entry(node, struct nvpva_vm_buffer,
+						rb_node);
+		COMPARE_AND_ASSIGN(vm->dmabuf,
+				   dmabuf,
+				   vm->user_offset,
+				   offset,
+				   vm->user_size,
+				   size,
+				   node,
+				   node->rb_left,
+				   node->rb_right);
+		if (is_equal)
+			return vm;
+	}
+
+	return NULL;
+}
+
+static struct nvpva_vm_buffer *nvpva_find_map_buffer_id(
+	struct nvpva_buffers *nvpva_buffers, u32 id)
+{
+	struct rb_root *root = &nvpva_buffers->rb_root_id;
+	struct rb_node *node = root->rb_node;
+	struct nvpva_vm_buffer *vm;
+
+	/* check in a sorted tree */
+	while (node) {
+		vm = rb_entry(node, struct nvpva_vm_buffer,
+						rb_node_id);
+
+		if (vm->id > id)
+			node = node->rb_left;
+		else if (vm->id != id)
+			node = node->rb_right;
+		else
+			return vm;
+	}
+
+	return NULL;
+}
+static void nvpva_buffer_insert_map_buffer(
+				struct nvpva_buffers *nvpva_buffers,
+				struct nvpva_vm_buffer *new_vm)
+{
+	struct rb_node **new_node = &(nvpva_buffers->rb_root.rb_node);
+	struct rb_node *parent = NULL;
+	bool is_equal = false;
+
+	/* Figure out where to put the new node */
+	while (*new_node) {
+		struct nvpva_vm_buffer *vm =
+			rb_entry(*new_node, struct nvpva_vm_buffer,
+						rb_node);
+		parent = *new_node;
+
+		COMPARE_AND_ASSIGN(vm->dmabuf,
+				   new_vm->dmabuf,
+				   vm->user_offset,
+				   new_vm->user_offset,
+				   vm->user_size,
+				   new_vm->user_size,
+				   new_node,
+				   &((*new_node)->rb_left),
+				   &((*new_node)->rb_right));
+		if (is_equal)
+			new_node = &((*new_node)->rb_right);
+	}
+
+	/* Add new node and rebalance tree */
+	rb_link_node(&new_vm->rb_node, parent, new_node);
+	rb_insert_color(&new_vm->rb_node, &nvpva_buffers->rb_root);
+
+	/* Add the node into a list  */
+	list_add_tail(&new_vm->list_head, &nvpva_buffers->list_head);
+}
+
+static void nvpva_buffer_insert_map_buffer_id(
+				struct nvpva_buffers *nvpva_buffers,
+				struct nvpva_vm_buffer *new_vm)
+{
+	struct rb_node **new_node = &(nvpva_buffers->rb_root_id.rb_node);
+	struct rb_node *parent = NULL;
+
+	/* Figure out where to put the new node */
+	while (*new_node) {
+		struct nvpva_vm_buffer *vm =
+			rb_entry(*new_node, struct nvpva_vm_buffer,
+						rb_node_id);
+		parent = *new_node;
+
+		if (vm->id > new_vm->id)
+			new_node = &((*new_node)->rb_left);
+		else
+			new_node = &((*new_node)->rb_right);
+	}
+
+	/* Add new node and rebalance tree */
+	rb_link_node(&new_vm->rb_node_id, parent, new_node);
+	rb_insert_color(&new_vm->rb_node_id, &nvpva_buffers->rb_root_id);
+}
+
+static int
+nvpva_buffer_map(struct platform_device *pdev,
+		 struct platform_device *pdev_priv,
+		 struct platform_device *pdev_user,
+		 struct dma_buf *dmabuf,
+		 u64 offset,
+		 u64 size,
+		 struct nvpva_vm_buffer *vm,
+		 bool is_user)
+{
+
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	const dma_addr_t cvnas_begin = nvcvnas_get_cvsram_base();
+	const dma_addr_t cvnas_end = cvnas_begin + nvcvnas_get_cvsram_size();
+	struct dma_buf_attachment *attach;
+	struct sg_table *sgt;
+	dma_addr_t dma_addr;
+	dma_addr_t phys_addr;
+	int err = 0;
+
+	nvpva_dbg_fn(pva, "");
+
+	get_dma_buf(dmabuf);
+	if (is_user)
+		attach = dma_buf_attach(dmabuf, &pdev_user->dev);
+	else
+		attach = dma_buf_attach(dmabuf, &pdev_priv->dev);
+
+	if (IS_ERR_OR_NULL(attach)) {
+		err = PTR_ERR(dmabuf);
+		dev_err(&pdev->dev, "dma_attach failed: %d\n", err);
+		goto buf_attach_err;
+	}
+
+	sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		err = PTR_ERR(sgt);
+		dev_err(&pdev->dev, "dma mapping failed: %d\n", err);
+		goto buf_map_err;
+	}
+
+	phys_addr = sg_phys(sgt->sgl);
+	dma_addr = sg_dma_address(sgt->sgl);
+
+	/* Determine the heap */
+	if (phys_addr >= cvnas_begin && phys_addr < cvnas_end)
+		vm->heap = NVPVA_BUFFERS_HEAP_CVNAS;
+	else
+		vm->heap = NVPVA_BUFFERS_HEAP_DRAM;
+
+	/*
+	 * If dma address is not available or heap is in CVNAS, use the
+	 * physical address.
+	 */
+	if (!dma_addr || vm->heap == NVPVA_BUFFERS_HEAP_CVNAS)
+		dma_addr = phys_addr;
+
+	vm->sgt		= sgt;
+	vm->attach	= attach;
+	vm->dmabuf	= dmabuf;
+	vm->addr	= dma_addr;
+	vm->user_addr	= dma_addr + offset;
+
+	vm->size = dmabuf->size;
+	vm->user_offset = offset;
+	vm->user_size = size;
+	vm->user_map_count = 1;
+
+	if (is_user)
+		nvpva_dbg_fn(pva, "mapped user @ base %llx,  uaddr %llx,  size %llx\n",
+			     (u64) dma_addr, (u64) vm->user_addr, size);
+	else
+		nvpva_dbg_fn(pva, "mapped priv @ base %llx,  uaddr  %llx,  size %llx\n",
+			     (u64) dma_addr, (u64) vm->user_addr, size);
+
+	return err;
+
+buf_map_err:
+	dma_buf_detach(dmabuf, attach);
+buf_attach_err:
+	dma_buf_put(dmabuf);
+	return err;
+}
+
+static void nvpva_free_buffers(struct kref *kref)
+{
+	struct nvpva_buffers *nvpva_buffers =
+		container_of(kref, struct nvpva_buffers, kref);
+
+	kfree(nvpva_buffers);
+}
+
+static void nvpva_buffer_unmap(struct nvpva_buffers *nvpva_buffers,
+				struct nvpva_vm_buffer *vm)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(nvpva_buffers->pdev);
+	struct pva *pva = pdata->private_data;
+
+	nvpva_dbg_fn(pva, "");
+
+	if ((vm->user_map_count != 0) || (vm->submit_map_count != 0))
+		return;
+
+	dma_buf_unmap_attachment(vm->attach, vm->sgt, DMA_BIDIRECTIONAL);
+	dma_buf_detach(vm->dmabuf, vm->attach);
+	dma_buf_put(vm->dmabuf);
+
+	rb_erase(&vm->rb_node, &nvpva_buffers->rb_root);
+	list_del(&vm->list_head);
+	rb_erase(&vm->rb_node_id, &nvpva_buffers->rb_root_id);
+	put_unique_id(nvpva_buffers, vm->id);
+
+	kfree(vm);
+}
+
+struct nvpva_buffers
+*nvpva_buffer_init(struct platform_device *pdev,
+		   struct platform_device *pdev_priv,
+		   struct platform_device *pdev_user)
+{
+	struct nvpva_buffers *nvpva_buffers;
+	int err = 0;
+
+	nvpva_buffers = kzalloc(sizeof(struct nvpva_buffers), GFP_KERNEL);
+	if (!nvpva_buffers) {
+		err = -ENOMEM;
+		goto nvpva_buffer_init_err;
+	}
+
+	nvpva_buffers->pdev = pdev;
+	nvpva_buffers->pdev_priv = pdev_priv;
+	nvpva_buffers->pdev_user = pdev_user;
+	mutex_init(&nvpva_buffers->mutex);
+	nvpva_buffers->rb_root = RB_ROOT;
+	nvpva_buffers->rb_root_id = RB_ROOT;
+	INIT_LIST_HEAD(&nvpva_buffers->list_head);
+	kref_init(&nvpva_buffers->kref);
+	memset(nvpva_buffers->ids, 0, sizeof(nvpva_buffers->ids));
+	nvpva_buffers->num_assigned_ids = 0;
+
+	return nvpva_buffers;
+
+nvpva_buffer_init_err:
+	return ERR_PTR(err);
+}
+
+int nvpva_buffer_submit_pin_id(struct nvpva_buffers *nvpva_buffers,
+			       u32 *ids,
+			       u32 count,
+			       struct dma_buf **dmabuf,
+			       dma_addr_t *paddr,
+			       u64 *psize,
+			       enum nvpva_buffers_heap *heap)
+{
+	struct nvpva_vm_buffer *vm;
+	int i = 0;
+
+	kref_get(&nvpva_buffers->kref);
+
+	mutex_lock(&nvpva_buffers->mutex);
+
+	for (i = 0; i < count; i++) {
+		vm = nvpva_find_map_buffer_id(nvpva_buffers, ids[i]);
+		if (vm == NULL)
+			goto submit_err;
+
+		vm->submit_map_count++;
+		paddr[i]  = vm->user_addr;
+		dmabuf[i] = vm->dmabuf;
+		psize[i]  = vm->user_size;
+
+		/* Return heap only if requested */
+		if (heap != NULL)
+			heap[i] = vm->heap;
+	}
+
+	mutex_unlock(&nvpva_buffers->mutex);
+	return 0;
+
+submit_err:
+	mutex_unlock(&nvpva_buffers->mutex);
+
+	count = i;
+
+	nvpva_buffer_submit_unpin_id(nvpva_buffers, ids, count);
+
+	return -EINVAL;
+}
+
+int nvpva_buffer_pin(struct nvpva_buffers *nvpva_buffers,
+		     struct dma_buf **dmabufs,
+		     u64 *offset,
+		     u64 *size,
+		     u32 segment,
+		     u32 count,
+		     u32 *id,
+		     u32 *eerr)
+{
+	struct nvpva_vm_buffer *vm;
+	int i = 0;
+	int err = 0;
+
+	*eerr = 0;
+
+	if (segment >= NVPVA_SEGMENT_MAX)
+		return -EINVAL;
+
+	mutex_lock(&nvpva_buffers->mutex);
+
+	for (i = 0; i < count; i++) {
+		u64 limit;
+
+		if (U64_MAX - size[i] < offset[i]) {
+			err = -EFAULT;
+			goto unpin;
+		} else {
+			limit = (size[i] + offset[i]);
+		}
+
+		if (dmabufs[i]->size < limit) {
+			err = -EFAULT;
+			goto unpin;
+		}
+
+		vm = nvpva_find_map_buffer(nvpva_buffers,
+					   offset[i],
+					   size[i],
+					   dmabufs[i]);
+		if (vm) {
+			vm->user_map_count++;
+			id[i] = vm->id;
+			continue;
+		}
+
+		vm = kzalloc(sizeof(struct nvpva_vm_buffer), GFP_KERNEL);
+		if (!vm) {
+			err = -ENOMEM;
+			goto unpin;
+		}
+
+		vm->id = get_unique_id(nvpva_buffers);
+		if (vm->id  == 0) {
+			*eerr = NVPVA_ENOSLOT;
+			err = -EINVAL;
+			goto free_vm;
+		}
+
+		err = nvpva_buffer_map(nvpva_buffers->pdev,
+				       nvpva_buffers->pdev_priv,
+				       nvpva_buffers->pdev_user,
+				       dmabufs[i],
+				       offset[i],
+				       size[i],
+				       vm,
+				       (segment == NVPVA_SEGMENT_USER));
+		if (err) {
+			put_unique_id(nvpva_buffers, vm->id);
+			goto free_vm;
+		}
+
+		nvpva_buffer_insert_map_buffer(nvpva_buffers, vm);
+		nvpva_buffer_insert_map_buffer_id(nvpva_buffers, vm);
+		id[i] = vm->id;
+	}
+
+	mutex_unlock(&nvpva_buffers->mutex);
+
+	return err;
+
+free_vm:
+	kfree(vm);
+unpin:
+	mutex_unlock(&nvpva_buffers->mutex);
+
+	/* free pinned buffers */
+	count = i;
+	nvpva_buffer_unpin(nvpva_buffers, dmabufs, offset, size, count);
+
+	return err;
+}
+
+void nvpva_buffer_submit_unpin_id(struct nvpva_buffers *nvpva_buffers,
+				u32 *ids, u32 count)
+{
+	struct nvpva_vm_buffer *vm;
+	int i = 0;
+
+	mutex_lock(&nvpva_buffers->mutex);
+
+	for (i = 0; i < count; i++) {
+
+		vm = nvpva_find_map_buffer_id(nvpva_buffers, ids[i]);
+		if (vm == NULL)
+			continue;
+
+		--vm->submit_map_count;
+		if ((vm->submit_map_count) < 0)
+			vm->submit_map_count = 0;
+
+		nvpva_buffer_unmap(nvpva_buffers, vm);
+	}
+
+	mutex_unlock(&nvpva_buffers->mutex);
+
+	kref_put(&nvpva_buffers->kref, nvpva_free_buffers);
+}
+
+void
+nvpva_buffer_unpin(struct nvpva_buffers *nvpva_buffers,
+		   struct dma_buf **dmabufs,
+		   u64 *offset,
+		   u64 *size,
+		   u32 count)
+{
+	int i = 0;
+
+	mutex_lock(&nvpva_buffers->mutex);
+
+	for (i = 0; i < count; i++) {
+		struct nvpva_vm_buffer *vm = NULL;
+
+		vm = nvpva_find_map_buffer(nvpva_buffers,
+					   offset[i],
+					   size[i],
+					   dmabufs[i]);
+		if (vm == NULL)
+			continue;
+
+		--vm->user_map_count;
+		if (vm->user_map_count < 0)
+			vm->user_map_count = 0;
+
+		nvpva_buffer_unmap(nvpva_buffers, vm);
+	}
+
+	mutex_unlock(&nvpva_buffers->mutex);
+}
+
+void nvpva_buffer_unpin_id(struct nvpva_buffers *nvpva_buffers,
+			 u32 *ids, u32 count)
+{
+	int i = 0;
+
+	mutex_lock(&nvpva_buffers->mutex);
+
+	for (i = 0; i < count; i++) {
+		struct nvpva_vm_buffer *vm = NULL;
+
+		vm = nvpva_find_map_buffer_id(nvpva_buffers, ids[i]);
+		if (vm == NULL)
+			continue;
+
+		--vm->user_map_count;
+		if (vm->user_map_count < 0)
+			vm->user_map_count = 0;
+
+		nvpva_buffer_unmap(nvpva_buffers, vm);
+	}
+
+	mutex_unlock(&nvpva_buffers->mutex);
+}
+
+void nvpva_buffer_release(struct nvpva_buffers *nvpva_buffers)
+{
+	struct nvpva_vm_buffer *vm, *n;
+
+	/* Go through each entry and remove it safely */
+	mutex_lock(&nvpva_buffers->mutex);
+	list_for_each_entry_safe(vm, n, &nvpva_buffers->list_head,
+				 list_head) {
+		vm->user_map_count = 0;
+		nvpva_buffer_unmap(nvpva_buffers, vm);
+	}
+	mutex_unlock(&nvpva_buffers->mutex);
+
+	kref_put(&nvpva_buffers->kref, nvpva_free_buffers);
+}
diff --git a/drivers/video/tegra/host/pva/nvpva_buffer.h b/drivers/video/tegra/host/pva/nvpva_buffer.h
new file mode 100644
index 00000000..2a535533
--- /dev/null
+++ b/drivers/video/tegra/host/pva/nvpva_buffer.h
@@ -0,0 +1,224 @@
+/*
+ * NVPVA Buffer Management Header
+ *
+ * Copyright (c) 2016-2023, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVPVA_NVPVA_BUFFER_H__
+#define __NVPVA_NVPVA_BUFFER_H__
+
+#include <linux/dma-buf.h>
+#include "pva_bit_helpers.h"
+
+enum nvpva_buffers_heap {
+	NVPVA_BUFFERS_HEAP_DRAM = 0,
+	NVPVA_BUFFERS_HEAP_CVNAS
+};
+
+/**
+ * @brief		Information needed for buffers
+ *
+ * pdev			Pointer to NVHOST device
+ * rb_root		RB tree root for of all the buffers used by a file pointer
+ * list			List for traversing through all the buffers
+ * mutex		Mutex for the buffer tree and the buffer list
+ * kref			Reference count for the bufferlist
+ * ids			unique ID assigned to a pinned buffer
+ */
+#define NVPVA_ID_SEGMENT_SIZE		32
+#define NVPVA_MAX_NUM_UNIQUE_IDS	(NVPVA_ID_SEGMENT_SIZE * 1024)
+#define NVPVA_NUM_ID_SEGMENTS						\
+			(NVPVA_MAX_NUM_UNIQUE_IDS/NVPVA_ID_SEGMENT_SIZE)
+struct nvpva_buffers {
+	struct platform_device *pdev;
+	struct platform_device *pdev_priv;
+	struct platform_device *pdev_user;
+	struct list_head list_head;
+	struct rb_root rb_root;
+	struct rb_root rb_root_id;
+	struct mutex mutex;
+	struct kref kref;
+	uint32_t ids[NVPVA_NUM_ID_SEGMENTS];
+	uint32_t num_assigned_ids;
+};
+
+/**
+ * @brief			Initialize the nvpva_buffer per open request
+ *
+ * This function allocates	nvpva_buffers struct and init the bufferlist
+ * and mutex.
+ *
+ * @param nvpva_buffers		Pointer to nvpva_buffers struct
+ * @return			nvpva_buffers pointer on success
+ *				or negative on error
+ *
+ */
+struct nvpva_buffers
+*nvpva_buffer_init(struct platform_device *pdev,
+		   struct platform_device *pdev_priv,
+		   struct platform_device *pdev_user);
+
+/**
+ * @brief			Pin the memhandle using dma_buf functions
+ *
+ * This function maps the buffer memhandle list passed from user side
+ * to device iova.
+ *
+ * @param nvpva_buffers		Pointer to nvpva_buffers struct
+ * @param dmabufs		Pointer to dmabuffer list
+ * @param offset		pointer to offsets of regions to be pinned
+ * @param size			pointer to sizes of regions to be pinned
+ * @param count			Number of memhandles in the list
+ * @return			0 on success or negative on error
+ *
+ */
+int nvpva_buffer_pin(struct nvpva_buffers *nvpva_buffers,
+		     struct dma_buf **dmabufs,
+		     u64 *offset,
+		     u64 *size,
+		     u32 segment,
+		     u32 count,
+		     u32 *id,
+		     u32 *eerr);
+/**
+ * @brief			UnPins the mapped address space.
+ *
+ * @param nvpva_buffers		Pointer to nvpva_buffer struct
+ * @param dmabufs		Pointer to dmabuffer list
+ * @param count			Pointer to offset list
+ * @param offset		Pointer to size list
+ * @param count			Number of memhandles in the list
+ * @return			None
+ *
+ */
+void
+nvpva_buffer_unpin(struct nvpva_buffers *nvpva_buffers,
+		   struct dma_buf **dmabufs,
+		   u64 *offset,
+		   u64 *size,
+		   u32 count);
+/**
+ * @brief			UnPins the mapped address space.
+ *
+ * @param nvpva_buffers		Pointer to nvpva_buffer struct
+ * @param ids			Pointer to id list
+ * @param count			Number of memhandles in the list
+ * @param id			pointer to variable where assigned
+ *				ID is returned
+ * @return			None
+ *
+ */
+void nvpva_buffer_unpin_id(struct nvpva_buffers *nvpva_buffers,
+				u32 *ids,
+				u32 count);
+
+/**
+ * @brief			Pin the mapped buffer for a task submit
+ *
+ * This function increased the reference count for a mapped buffer during
+ * task submission.
+ *
+ * @param nvpva_buffers		Pointer to nvpva_buffer struct
+ * @param dmabufs		Pointer to dmabuffer list
+ * @param count			Number of memhandles in the list
+ * @param paddr			Pointer to IOVA list
+ * @param psize			Pointer to size of buffer to return
+ * @param heap			Pointer to a list of heaps. This is
+ *				filled by the routine.
+ *
+ * @return			0 on success or negative on error
+ *
+ */
+int nvpva_buffer_submit_pin(struct nvpva_buffers *nvpva_buffers,
+			     struct dma_buf **dmabufs, u32 count,
+			     dma_addr_t *paddr, size_t *psize,
+			     enum nvpva_buffers_heap *heap);
+/**
+ * @brief			Pin the mapped buffer for a task submit
+ *
+ * This function increased the reference count for a mapped buffer during
+ * task submission.
+ *
+ * @param nvpva_buffers		Pointer to nvpva_buffer struct
+ * @param ids			Pointer to id list
+ * @param count			Number of memhandles in the list
+ * @param paddr			Pointer to IOVA list
+ * @param psize			Pointer to size of buffer to return
+ * @param heap			Pointer to a list of heaps. This is
+ *				filled by the routine.
+ *
+ * @return			0 on success or negative on error
+ *
+ */
+int nvpva_buffer_submit_pin_id(struct nvpva_buffers *nvpva_buffers,
+			       u32 *ids,
+			       u32 count,
+			       struct dma_buf **dmabuf,
+			       dma_addr_t *paddr,
+			       u64 *psize,
+			       enum nvpva_buffers_heap *heap);
+
+/**
+ * @brief		UnPins the mapped address space on task completion.
+ *
+ * This function decrease the reference count for a mapped buffer when the
+ * task get completed or aborted.
+ *
+ * @param nvpva_buffers		Pointer to nvpva_buffer struct
+ * @param dmabufs		Pointer to dmabuffer list
+ * @param count			Number of memhandles in the list
+ * @return			None
+ *
+ */
+void nvpva_buffer_submit_unpin(struct nvpva_buffers *nvpva_buffers,
+					struct dma_buf **dmabufs, u32 count);
+
+/**
+ * @brief		UnPins the mapped address space on task completion.
+ *
+ * This function decrease the reference count for a mapped buffer when the
+ * task get completed or aborted.
+ *
+ * @param nvpva_buffers		Pointer to nvpva_buffer struct
+ * @param ids			Pointer to dmabuffer list
+ * @param count			Number of memhandles in the list
+ * @return			None
+ *
+ */
+void nvpva_buffer_submit_unpin_id(struct nvpva_buffers *nvpva_buffers,
+					u32 *ids, u32 count);
+
+/**
+ * @brief			Drop a user reference to buffer structure
+ *
+ * @param nvpva_buffers		Pointer to nvpva_buffer struct
+ * @return			None
+ *
+ */
+void nvpva_buffer_release(struct nvpva_buffers *nvpva_buffers);
+
+/**
+ * @brief		Returns dma buf and dma addr for a given handle
+ *
+ * @param nvpva_buffers		Pointer to nvpva_buffer struct
+ * @param dmabuf		dma buf pointer to search for
+ * @param addr			dma_addr_t pointer to return
+ * @return			0 on success or negative on error
+ *
+ */
+int nvpva_get_iova_addr(struct nvpva_buffers *nvpva_buffers,
+			struct dma_buf *dmabuf, dma_addr_t *addr);
+
+#endif /*__NVPVA_NVPVA_BUFFER_H__ */
diff --git a/drivers/video/tegra/host/pva/nvpva_client.c b/drivers/video/tegra/host/pva/nvpva_client.c
new file mode 100644
index 00000000..04537871
--- /dev/null
+++ b/drivers/video/tegra/host/pva/nvpva_client.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+#include "pva.h"
+#include "nvpva_buffer.h"
+#include "nvpva_client.h"
+#include "pva_iommu_context_dev.h"
+
+/* Maximum contexts KMD creates per engine */
+#define NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG (MAX_PVA_CLIENTS)
+
+/* Search if the pid already have a context
+ * The function does below things;
+ * 1. loop through each clients in the client array and validates pid.
+ * 2. Also tracks the first free client in the array
+ */
+static struct nvpva_client_context *
+client_context_search_locked(struct platform_device *pdev,
+			     struct pva *dev,
+			     pid_t pid)
+{
+	struct nvpva_client_context *c_node = NULL;
+	uint32_t i;
+	bool shared_cntxt_dev;
+
+	for (i = 0U; i < NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG; i++) {
+		c_node = &dev->clients[i];
+		if ((c_node->ref_count != 0U) && (c_node->pid == pid))
+			return c_node;
+	}
+
+	for (i = 0U; i < NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG; i++) {
+		c_node = &dev->clients[i];
+		if (c_node->ref_count == 0U)
+			break;
+	}
+
+	if (i >= NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG)
+		return NULL;
+
+	shared_cntxt_dev =  i > (NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG - 3);
+
+	c_node->pid = pid;
+	c_node->pva = dev;
+	c_node->curr_sema_value = 0;
+	mutex_init(&c_node->sema_val_lock);
+	if (dev->version == PVA_HW_GEN2) {
+		c_node->cntxt_dev =
+			nvpva_iommu_context_dev_allocate(NULL,
+							 0,
+							 shared_cntxt_dev);
+
+		if (c_node->cntxt_dev == NULL)
+			return NULL;
+
+		c_node->sid_index = nvpva_get_id_idx(dev, c_node->cntxt_dev) - 1;
+	} else {
+		c_node->cntxt_dev = pdev;
+		c_node->sid_index = 0;
+	}
+
+	c_node->elf_ctx.cntxt_dev = c_node->cntxt_dev;
+	c_node->buffers = nvpva_buffer_init(dev->pdev, dev->aux_pdev, c_node->cntxt_dev);
+	if (IS_ERR(c_node->buffers)) {
+		dev_err(&dev->pdev->dev,
+			"failed to init nvhost buffer for client:%lu",
+			PTR_ERR(c_node->buffers));
+		if (dev->version == PVA_HW_GEN2)
+			nvpva_iommu_context_dev_release(c_node->cntxt_dev);
+		c_node = NULL;
+	}
+
+	return c_node;
+}
+
+/* Allocate a client context from the client array
+ * The function does below things;
+ * 1. Search for an existing client context, if not found then a free client
+ * 2. Allocate a buffer pool if needed
+ */
+struct nvpva_client_context
+*nvpva_client_context_alloc(struct platform_device *pdev,
+			    struct pva *dev,
+			    pid_t pid)
+{
+	struct nvpva_client_context *client = NULL;
+
+	mutex_lock(&dev->clients_lock);
+	client = client_context_search_locked(pdev, dev, pid);
+	if (client != NULL)
+		client->ref_count += 1;
+
+	mutex_unlock(&dev->clients_lock);
+
+	return client;
+}
+
+void nvpva_client_context_get(struct nvpva_client_context *client)
+{
+	struct pva *dev = client->pva;
+
+	mutex_lock(&dev->clients_lock);
+	client->ref_count += 1;
+	mutex_unlock(&dev->clients_lock);
+}
+
+/* Free a client context from the client array */
+static void
+nvpva_client_context_free_locked(struct nvpva_client_context *client)
+{
+	nvpva_buffer_release(client->buffers);
+	nvpva_iommu_context_dev_release(client->cntxt_dev);
+	mutex_destroy(&client->sema_val_lock);
+	client->buffers = NULL;
+	client->pva = NULL;
+	client->pid = 0;
+	pva_unload_all_apps(&client->elf_ctx);
+}
+
+/* Release the client context
+ * The function does below things;
+ * 1. Reduce the active Q count
+ * 2. Initiate freeing if the count is 0
+ */
+void nvpva_client_context_put(struct nvpva_client_context *client)
+{
+	struct pva *pva = client->pva;
+
+	mutex_lock(&pva->clients_lock);
+	client->ref_count--;
+	if (client->ref_count == 0U)
+		nvpva_client_context_free_locked(client);
+
+	mutex_unlock(&pva->clients_lock);
+}
+
+/* De-initialize the client array for the device
+ * The function does below things;
+ * 1. Free all the remaining buffer pools if any
+ * 2. Release the memory
+ */
+void nvpva_client_context_deinit(struct pva *dev)
+{
+	struct nvpva_client_context *client;
+	uint32_t max_clients;
+	uint32_t i;
+
+	max_clients = NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG;
+	if (dev->clients != NULL) {
+		mutex_lock(&dev->clients_lock);
+		for (i = 0U; i < max_clients; i++) {
+			client = &dev->clients[i];
+			pva_vpu_deinit(&client->elf_ctx);
+		}
+		mutex_unlock(&dev->clients_lock);
+		mutex_destroy(&dev->clients_lock);
+		kfree(dev->clients);
+		dev->clients = NULL;
+	}
+}
+
+/* Initialize a set of clients for the device
+ * The function does below things;
+ * 1. Allocate memory for maximum number of clients
+ * 2. Assign stream ID for each client contexts
+ */
+int nvpva_client_context_init(struct pva *pva)
+{
+	struct nvpva_client_context *clients = NULL;
+	uint32_t max_clients;
+	uint32_t j = 0U;
+	int err = 0;
+
+	max_clients = NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG;
+	clients = kcalloc(max_clients, sizeof(struct nvpva_client_context),
+			  GFP_KERNEL);
+	if (clients == NULL) {
+		err = -ENOMEM;
+		goto done;
+	}
+	mutex_init(&pva->clients_lock);
+	for (j = 0U; j < NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG; j++) {
+		err = pva_vpu_init(pva, &clients[j].elf_ctx);
+		if (err < 0) {
+			dev_err(&pva->pdev->dev,
+				"No memory for allocating VPU parsing");
+			goto vpu_init_fail;
+		}
+	}
+
+	pva->clients = clients;
+	return err;
+
+vpu_init_fail:
+	while (j--)
+		pva_vpu_deinit(&clients[j].elf_ctx);
+
+	kfree(clients);
+	mutex_destroy(&pva->clients_lock);
+done:
+	return err;
+}
diff --git a/drivers/video/tegra/host/pva/nvpva_client.h b/drivers/video/tegra/host/pva/nvpva_client.h
new file mode 100644
index 00000000..1b217ede
--- /dev/null
+++ b/drivers/video/tegra/host/pva/nvpva_client.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef NVPVA_CLIENT_H
+#define NVPVA_CLIENT_H
+
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include "pva_vpu_exe.h"
+
+struct pva;
+
+struct nvpva_client_context {
+	/* Reference to the device*/
+	struct pva *pva;
+
+	/* context device */
+	struct platform_device *cntxt_dev;
+
+	/* PID of client process which uses this context */
+	pid_t pid;
+
+	/* This tracks active users */
+	u32 ref_count;
+
+	u32 sid_index;
+
+	/* Data structure to track pinned buffers for this client */
+	struct nvpva_buffers *buffers;
+
+	u32 curr_sema_value;
+	struct mutex sema_val_lock;
+
+	/* Data structure to track elf context for vpu parsing */
+	struct nvpva_elf_context elf_ctx;
+};
+
+struct pva;
+int nvpva_client_context_init(struct pva *pva);
+void nvpva_client_context_deinit(struct pva *pva);
+void nvpva_client_context_get(struct nvpva_client_context *client);
+struct nvpva_client_context
+*nvpva_client_context_alloc(struct platform_device *pdev,
+			    struct pva *dev,
+			    pid_t pid);
+void nvpva_client_context_put(struct nvpva_client_context *client);
+
+#endif /* NVPVA_CLIENT_H */
diff --git a/drivers/video/tegra/host/pva/nvpva_elf_parser.c b/drivers/video/tegra/host/pva/nvpva_elf_parser.c
new file mode 100644
index 00000000..bb69ba6b
--- /dev/null
+++ b/drivers/video/tegra/host/pva/nvpva_elf_parser.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if IS_ENABLED(CONFIG_TEGRA_GRHOST)
+#include "stdalign.h"
+#else
+#define alignof _Alignof /*stdalign.h not found*/
+#endif
+
+#include "nvpva_elf_parser.h"
+#include <linux/string.h>
+
+#define UINT_MAX (~0U)
+
+/* CERT complains about casts from const char*, so do intermediate cast to
+ * void*
+ */
+static inline const void *to_void(const char *const p)
+{
+	return (const void *)p;
+}
+
+bool image_is_elf(const void *const image)
+{
+	if (image == NULL)
+		return false;
+
+	/* assume little endian format */
+	if (ELFMAGIC_LSB == *(const u32 *)image)
+		return true;
+
+	return false;
+}
+
+bool elf_is_32bit(const void *e)
+{
+	if (image_is_elf(e)) {
+		const struct elf_file_header *efh =
+			(const struct elf_file_header *)e;
+		if (efh->oclass == ELFCLASS32)
+			return true;
+	}
+	return false;
+}
+
+static inline size_t get_table_end(u32 num, u16 entsize, size_t off)
+{
+	/* We need to ensure the off+(num*entsize) doesn't overflow. Originally
+	 * num and entsize are ushort, which C converts to int for multiply so
+	 * instead cast them up to u32 or u64
+	 */
+	size_t end;
+	u32 tablesize = num * entsize;
+
+	if (tablesize < num)
+		return UZERO; /* wraparound error */
+
+	end = off + tablesize;
+	if (end < off)
+		return UZERO; /* wraparound error */
+
+	return end;
+}
+
+static const struct elf_file_header *elf_file_header(const void *e)
+{
+	if (!elf_is_32bit(e))
+		return NULL;
+
+	return (const struct elf_file_header *)e;
+}
+
+static inline const struct elf_section_header *elf_section_table(const void *e)
+{
+	const struct elf_file_header *efh = elf_file_header(e);
+	const char *p = (const char *)e;
+
+	if ((e == NULL) || (efh == NULL))
+		return NULL;
+
+	p = &p[efh->shoff];
+	/* proper ELF should always have offsets be aligned, but add check just
+	 * in case.
+	 */
+	if (((uintptr_t)(p) % alignof(struct elf_section_header)) != UZERO)
+		return NULL; /* pointer not aligned */
+
+	return (const struct elf_section_header *)to_void(p);
+}
+
+static size_t elf_section_size(const void *e,
+			       const struct elf_section_header *esh)
+{
+	if ((e == NULL) || (esh == NULL))
+		return UZERO;
+
+	return (size_t)esh->size;
+}
+
+u32 elf_shnum(const void *e)
+{
+	const struct elf_file_header *efh = elf_file_header(e);
+
+	if (efh == NULL)
+		return UZERO;
+
+	if (efh->shnum == UZERO) {
+		/* get value from size of first (empty) section to avoid
+		 * recursion, don't call elf_section_header(0)
+		 */
+		const struct elf_section_header *esh = elf_section_table(e);
+		size_t size = elf_section_size(e, esh);
+
+		if (size > UINT_MAX) { /* make sure we don't lose precision */
+			return UZERO;
+		} else {
+			return (u32)size;
+		}
+	}
+	return efh->shnum;
+}
+
+const struct elf_section_header *elf_section_header(const void *e,
+						    unsigned int index)
+{
+	const struct elf_section_header *esh = elf_section_table(e);
+
+	if (esh == NULL)
+		return NULL;
+
+	if (index >= elf_shnum(e))
+		return NULL;
+
+	esh = &esh[index];
+	return esh;
+}
+
+size_t elf_size(const void *e)
+{
+	/* different elf writers emit elf in different orders, so look for end
+	 * after program headers, section headers, or sections
+	 */
+	size_t max_size;
+	unsigned int i;
+	const struct elf_file_header *efh = elf_file_header(e);
+
+	if (efh == NULL)
+		return UZERO;
+
+	if (efh->phoff > efh->shoff) {
+		max_size =
+			get_table_end(efh->phnum, efh->phentsize, efh->phoff);
+		if (max_size == UZERO)
+			return UZERO; /* wraparound error */
+	} else {
+		max_size =
+			get_table_end(elf_shnum(e), efh->shentsize, efh->shoff);
+		if (max_size == UZERO)
+			return UZERO; /* wraparound error */
+	}
+	for (i = UZERO; i < elf_shnum(e); ++i) {
+		u32 esh_end;
+		const struct elf_section_header *esh = elf_section_header(e, i);
+
+		if (esh == NULL)
+			return UZERO;
+
+		esh_end = esh->offset + esh->size;
+		if (esh_end < esh->offset)
+			return UZERO; /* wraparound error */
+
+		if ((esh->type != SHT_NOBITS) && (esh_end > max_size))
+			max_size = esh_end;
+	}
+	return max_size;
+}
+
+static u32 elf_shstrndx(const void *e)
+{
+	const struct elf_file_header *efh = elf_file_header(e);
+
+	if (efh == NULL)
+		return UZERO;
+
+	if (efh->shstrndx == SHN_XINDEX) {
+		/* get value from link field of first (empty) section to avoid
+		 * recursion, don't call elf_section_header(0)
+		 */
+		const struct elf_section_header *esh = elf_section_table(e);
+
+		if (esh == NULL)
+			return UZERO;
+
+		return esh->link;
+	}
+	return efh->shstrndx;
+}
+
+static const char *elf_string_at_offset(const void *e,
+					const struct elf_section_header *eshstr,
+					unsigned int offset)
+{
+	const char *strtab;
+	u32 stroffset;
+
+	if ((e == NULL) || (eshstr == NULL))
+		return NULL;
+
+	if (eshstr->type != SHT_STRTAB)
+		return NULL;
+
+	if (offset >= eshstr->size)
+		return NULL;
+
+	strtab = (const char *)e;
+	stroffset = eshstr->offset + offset;
+
+	if (stroffset < eshstr->offset)
+		return NULL;
+
+	strtab = &strtab[stroffset];
+	return strtab;
+}
+
+const char *elf_section_name(const void *e,
+			     const struct elf_section_header *esh)
+{
+	const char *name;
+	/* get section header string table */
+	u32 shstrndx = elf_shstrndx(e);
+	const struct elf_section_header *eshstr =
+		elf_section_header(e, shstrndx);
+	if ((esh == NULL) || (eshstr == NULL))
+		return NULL;
+
+	name = elf_string_at_offset(e, eshstr, esh->name);
+	return name;
+}
+
+const struct elf_section_header *elf_named_section_header(const void *e,
+							  const char *name)
+{
+	const struct elf_section_header *esh;
+	unsigned int i;
+
+	if (name == NULL)
+		return NULL;
+
+	esh = elf_section_table(e);
+	if (esh == NULL)
+		return NULL;
+
+	/* iterate through sections till find matching name */
+	for (i = UZERO; i < elf_shnum(e); ++i) {
+		const char *secname = elf_section_name(e, esh);
+
+		if (secname != NULL) {
+			size_t seclen = strlen(secname);
+
+			/* use strncmp to avoid problem with input not being
+			 * null-terminated, but then need to check for false
+			 * partial match
+			 */
+			if ((strncmp(secname, name, seclen) == ZERO) &&
+			    ((unsigned char)name[seclen]) == UZERO) {
+				return esh;
+			}
+		}
+		++esh;
+	}
+	return NULL;
+}
+
+static const struct elf_section_header *elf_typed_section_header(const void *e,
+								 u32 type)
+{
+	unsigned int i;
+	const struct elf_section_header *esh = elf_section_table(e);
+
+	if (esh == NULL)
+		return NULL;
+
+	/* iterate through sections till find matching type */
+	for (i = UZERO; i < elf_shnum(e); ++i) {
+		if (esh->type == type)
+			return esh;
+
+		++esh;
+	}
+	return NULL;
+}
+
+const struct elf_section_header *elf_offset_section_header(const void *e,
+							   u32 offset)
+{
+	unsigned int i;
+	const struct elf_section_header *esh = elf_section_table(e);
+
+	if (esh == NULL)
+		return NULL;
+
+	/* iterate through sections till find matching offset */
+	for (i = UZERO; i < elf_shnum(e); ++i) {
+		if (esh->offset == offset)
+			return esh;
+		++esh;
+	}
+	return NULL;
+}
+
+const u8 *elf_section_contents(const void *e,
+			       const struct elf_section_header *esh)
+{
+	const u8 *p;
+
+	if ((e == NULL) || (esh == NULL))
+		return NULL;
+
+	p = (const u8 *)e;
+	return &p[esh->offset];
+}
+
+const struct elf_symbol *elf_symbol(const void *e, unsigned int index)
+{
+	const struct elf_section_header *esh;
+	const struct elf_symbol *esymtab;
+	const uint8_t *p = e;
+	uint8_t align = 0;
+	/* get symbol table */
+	esh = elf_typed_section_header(e, SHT_SYMTAB);
+	if ((esh == NULL) || (esh->entsize == UZERO))
+		return NULL;
+
+	if (index >= (esh->size / esh->entsize))
+		return NULL;
+
+	align = esh->addralign;
+	p = &p[esh->offset];
+	if ((align != 0) && (((uintptr_t)(p) % align != UZERO)))
+		return NULL;
+
+	esymtab = (const struct elf_symbol *)(p);
+	return &esymtab[index];
+}
+
+const char *elf_symbol_name(const void *e, const struct elf_section_header *esh,
+			    unsigned int index)
+{
+	const struct elf_section_header *eshstr;
+	const struct elf_symbol *esymtab;
+	const struct elf_symbol *esym;
+	const char *name;
+	const char *p;
+	uint8_t align = 0;
+
+	if ((esh == NULL) || (esh->entsize == UZERO))
+		return NULL;
+
+	if (esh->type != SHT_SYMTAB)
+		return NULL;
+
+	if (index >= (esh->size / esh->entsize))
+		return NULL;
+
+	/* get string table */
+	eshstr = elf_section_header(e, esh->link);
+	if (eshstr == NULL)
+		return NULL;
+
+	p = (const char *)e;
+	align = esh->addralign;
+	p = &p[esh->offset];
+	if ((align != 0) && (((uintptr_t)(p) % align != UZERO)))
+		return NULL;
+
+	esymtab = (const struct elf_symbol *)to_void(p);
+	esym = &esymtab[index];
+	name = elf_string_at_offset(e, eshstr, esym->name);
+	return name;
+}
+
+u32 elf_symbol_shndx(const void *e, const struct elf_symbol *esym,
+		     unsigned int index)
+{
+	if ((e == NULL) || (esym == NULL))
+		return UZERO;
+
+	if (esym->shndx == SHN_XINDEX) {
+		const u8 *contents;
+		const u32 *shndx_array;
+		const struct elf_section_header *esh =
+			elf_typed_section_header(e, SHT_SYMTAB_SHNDX);
+		if (esh == NULL || esh->entsize == UZERO)
+			return UZERO;
+
+		contents = elf_section_contents(e, esh);
+		if (contents == NULL)
+			return UZERO;
+
+		if (((uintptr_t)(contents) % alignof(u32)) != UZERO)
+			return UZERO;
+
+		shndx_array = (const u32 *)(contents);
+		if (index >= (esh->size / esh->entsize))
+			return UZERO;
+
+		return shndx_array[index];
+	}
+	return esym->shndx;
+}
+
+const struct elf_program_header *elf_program_header(const void *e,
+						    unsigned int index)
+{
+	const struct elf_file_header *efh = elf_file_header(e);
+	const struct elf_program_header *eph;
+	const char *p = e;
+
+	if (efh == NULL)
+		return NULL;
+
+	if (index >= efh->phnum)
+		return NULL;
+
+	p = &p[efh->phoff];
+	if (((uintptr_t)(p) % alignof(struct elf_program_header)) != UZERO)
+		return NULL;
+
+	eph = (const struct elf_program_header *)to_void(p);
+	eph = &eph[index];
+	return eph;
+}
diff --git a/drivers/video/tegra/host/pva/nvpva_elf_parser.h b/drivers/video/tegra/host/pva/nvpva_elf_parser.h
new file mode 100644
index 00000000..c9242a19
--- /dev/null
+++ b/drivers/video/tegra/host/pva/nvpva_elf_parser.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef NVPVA_ELF_PARSER_H
+#define NVPVA_ELF_PARSER_H
+#include <linux/types.h>
+#include "elf_include_fix.h"
+
+#define ZERO 0
+#define UZERO 0U
+#define ULLZERO 0ULL
+
+/*---------------------------------- Header ----------------------------------*/
+
+struct elf_file_header {
+	u32 magic; /* 0x7f,0x45,0x4c,0x46 */
+	u8 oclass; /* Object file class */
+	u8 data; /* Data encoding */
+	u8 formatVersion; /* Object format version */
+	u8 abi; /* OS application binary interface */
+	u8 abiVersion; /* Version of abi */
+	u8 padd[7]; /* Elf ident padding */
+	u16 type; /* Object file type */
+	u16 machine; /* Architecture */
+	u32 version; /* Object file version */
+	u32 entry; /* Entry point virtual address */
+	u32 phoff; /* Program header table file offset */
+	u32 shoff; /* Section header table file offset */
+	u32 flags; /* Processor-specific flags */
+	u16 ehsize; /* ELF header size in bytes */
+	u16 phentsize; /* Program header table entry size */
+	u16 phnum; /* Program header table entry count */
+	u16 shentsize; /* Section header table entry size */
+	u16 shnum; /* Section header table entry count */
+	u16 shstrndx; /* Section header string table index */
+};
+
+#define ELFMAGIC 0x7f454c46U /* This is in big endian */
+#define ELFMAGIC_LSB 0x464c457fU /* This is in little endian */
+#define ELFCLASS32 1U /* 32 bit object file */
+
+#define EV_NONE 0 /* Invalid version */
+#define EV_CURRENT 1 /* Current version */
+
+/*---------------------------------- Section ---------------------------------*/
+
+struct elf_section_header {
+	u32 name; /* Section name, string table index */
+	u32 type; /* Type of section */
+	u32 flags; /* Miscellaneous section attributes */
+	u32 addr; /* Section virtual addr at execution */
+	u32 offset; /* Section file offset */
+	u32 size; /* Size of section in bytes */
+	u32 link; /* Index of another section */
+	u32 info; /* Additional section information */
+	u32 addralign; /* Section alignment */
+	u32 entsize; /* Entry size if section holds table */
+};
+
+/*
+ * Type
+ */
+#define SHT_NULL 0x00U /* NULL section */
+#define SHT_PROGBITS 0x01U /* Loadable program data */
+#define SHT_SYMTAB 0x02U /* Symbol table */
+#define SHT_STRTAB 0x03U /* String table */
+#define SHT_RELA 0x04U /* Relocation table with addents */
+#define SHT_HASH 0x05U /* Hash table */
+#define SHT_DYNAMIC 0x06U /* Information for dynamic linking */
+#define SHT_NOTE 0x07U /* Information that marks file */
+#define SHT_NOBITS 0x08U /* Section does not have data in file */
+#define SHT_REL 0x09U /* Relocation table without addents */
+#define SHT_SHLIB 0x0aU /* Reserved */
+#define SHT_DYNSYM 0x0bU /* Dynamic linker symbol table */
+#define SHT_INIT_ARRAY 0x0eU /* Array of pointers to init funcs */
+#define SHT_FINI_ARRAY 0x0fU /* Array of function to finish funcs */
+#define SHT_PREINIT_ARRAY 0x10U /* Array of pointers to pre-init functions */
+#define SHT_GROUP 0x11U /* Section group */
+#define SHT_SYMTAB_SHNDX 0x12U /* Table of 32bit symtab shndx */
+#define SHT_LOOS 0x60000000U /* Start OS-specific. */
+#define SHT_HIOS 0x6fffffffU /* End OS-specific type */
+#define SHT_LOPROC 0x70000000U /* Start of processor-specific */
+#define SHT_HIPROC 0x7fffffffU /* End of processor-specific */
+#define SHT_LOUSER 0x80000000U /* Start of application-specific */
+#define SHT_HIUSER 0x8fffffffU /* End of application-specific */
+
+/*
+ * Special section index
+ */
+#define SHN_UNDEF 0U /* Undefined section */
+#define SHN_LORESERVE 0xff00U /* lower bound of reserved indexes */
+#define SHN_ABS 0xfff1U /* Associated symbol is absolute */
+#define SHN_COMMON 0xfff2U /* Associated symbol is common */
+#define SHN_XINDEX 0xffffU /* Index is in symtab_shndx */
+
+/*
+ * Special section names
+ */
+#define SHNAME_SHSTRTAB ".shstrtab" /* section string table */
+#define SHNAME_STRTAB ".strtab" /* string table */
+#define SHNAME_SYMTAB ".symtab" /* symbol table */
+#define SHNAME_SYMTAB_SHNDX ".symtab_shndx" /* symbol table shndx array */
+#define SHNAME_TEXT ".text." /* suffix with entry name */
+
+/*---------------------------------- Program Segment -------------------------*/
+
+struct elf_program_header {
+	u32 type; /* Identifies program segment type */
+	u32 offset; /* Segment file offset */
+	u32 vaddr; /* Segment virtual address */
+	u32 paddr; /* Segment physical address */
+	u32 filesz; /* Segment size in file */
+	u32 memsz; /* Segment size in memory */
+	u32 flags; /* Segment flags */
+	u32 align; /* Segment alignment, file & memory */
+};
+
+/*----------------------------------- Symbol ---------------------------------*/
+
+struct elf_symbol {
+	u32 name; /* Symbol name, index in string tbl */
+	u32 value; /* Value of the symbol */
+	u32 size; /* Associated symbol size */
+	u8 info; /* Type and binding attributes */
+	u8 other; /* Extra flags */
+	u16 shndx; /* Associated section index */
+};
+
+#define ELF_ST_BIND(s) ((u32)((s)->info) >> 4)
+#define ELF_ST_TYPE(s) ((u32)((s)->info) & 0xfU)
+#define ELF_ST_INFO(b, t) (((b) << 4) + ((t)&0xfU))
+
+/*
+ * Type
+ */
+#define STT_NOTYPE 0U /* No type known */
+#define STT_OBJECT 1U /* Data symbol */
+#define STT_FUNC 2U /* Code symbol */
+#define STT_SECTION 3U /* Section */
+#define STT_FILE 4U /* File */
+#define STT_COMMON 5U /* Common symbol */
+#define STT_LOOS 10U /* Start of OS-specific */
+
+/*
+ * Scope
+ */
+#define STB_LOCAL 0U /* Symbol not visible outside object */
+#define STB_GLOBAL 1U /* Symbol visible outside object */
+#define STB_WEAK 2U /* Weak symbol */
+
+/*
+ * The following routines that return file/program/section headers
+ * all return NULL when not found.
+ */
+
+/*
+ *  Typical elf readers create a table of information that is passed
+ *  to the different routines.  For simplicity, we're going to just
+ *  keep the image of the whole file and pass that around.  Later, if we see
+ *  a need to speed this up, we could consider changing void * to be something
+ *  more complicated.
+ */
+
+bool image_is_elf(const void *const image);
+
+bool elf_is_32bit(const void *e);
+
+u32 elf_shnum(const void *e);
+
+const struct elf_section_header *elf_section_header(const void *e,
+						    unsigned int index);
+
+const char *elf_section_name(const void *e,
+			     const struct elf_section_header *esh);
+
+const struct elf_section_header *elf_named_section_header(const void *e,
+							  const char *name);
+
+const u8 *elf_section_contents(const void *e,
+			       const struct elf_section_header *esh);
+
+const struct elf_symbol *elf_symbol(const void *e, unsigned int index);
+
+const char *elf_symbol_name(const void *e, const struct elf_section_header *esh,
+			    unsigned int index);
+
+const struct elf_program_header *elf_program_header(const void *e,
+						    unsigned int index);
+
+u32 elf_symbol_shndx(const void *e, const struct elf_symbol *esym,
+		     unsigned int index);
+
+const struct elf_section_header *elf_offset_section_header(const void *e,
+							   u32 offset);
+
+size_t elf_size(const void *e);
+#endif
diff --git a/drivers/video/tegra/host/pva/nvpva_queue.c b/drivers/video/tegra/host/pva/nvpva_queue.c
new file mode 100644
index 00000000..f61cd406
--- /dev/null
+++ b/drivers/video/tegra/host/pva/nvpva_queue.c
@@ -0,0 +1,597 @@
+/*
+ * NVHOST queue management for T194
+ *
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/debugfs.h>
+
+#include <linux/nvhost.h>
+
+#include "nvpva_syncpt.h"
+#include "nvpva_queue.h"
+#include "pva_bit_helpers.h"
+#include "pva.h"
+
+#define CMDBUF_SIZE	4096
+
+/**
+ * @brief Describe a task pool struct
+ *
+ * Array of fixed task memory is allocated during queue_alloc call.
+ * The memory will be shared for various task based on availability
+ *
+ * dma_addr		Physical address of task memory pool
+ * aux_dma_addr		Physical address of task aux memory pool
+ * va			Virtual address of the task memory pool
+ * aux_va			Virtual address of the task memory pool
+ * kmem_addr		Kernel memory for task struct
+ * lock			Mutex lock for the array access.
+ * alloc_table		Keep track of the index being assigned
+ *			and freed for a task
+ * max_task_cnt		Maximum task count that can be supported.
+ *
+ */
+struct nvpva_queue_task_pool {
+	dma_addr_t dma_addr;
+	dma_addr_t aux_dma_addr;
+	void *va;
+	void *aux_va;
+	void *kmem_addr[MAX_PVA_SEG_COUNT_PER_QUEUE];
+	struct mutex lock;
+
+	unsigned long alloc_table[NUM_POOL_ALLOC_SUB_TABLES];
+	unsigned int max_task_cnt;
+};
+
+static int nvpva_queue_task_pool_alloc(struct platform_device *pdev,
+				       struct platform_device *pprim_dev,
+				       struct platform_device *paux_dev,
+				       struct nvpva_queue *queue,
+				       unsigned int num_tasks)
+{
+	int err = 0;
+	unsigned int i;
+	unsigned int num_segments = num_tasks/MAX_PVA_TASK_COUNT_PER_QUEUE_SEG;
+	struct nvpva_queue_task_pool *task_pool;
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	u64 mem_size;
+
+	task_pool = queue->task_pool;
+	memset(task_pool->kmem_addr, 0, sizeof(task_pool->kmem_addr));
+
+	/* Allocate the kernel memory needed for the task */
+	if (queue->task_kmem_size) {
+		for (i = 0; i < num_segments; i++) {
+			task_pool->kmem_addr[i] =
+				kcalloc(MAX_PVA_TASK_COUNT_PER_QUEUE_SEG,
+					queue->task_kmem_size, GFP_KERNEL);
+			if (!task_pool->kmem_addr[i]) {
+				nvpva_err(&pdev->dev,
+					   "failed to allocate " \
+					   "task_pool->kmem_addr");
+				err = -ENOMEM;
+				goto err_alloc_task_pool;
+			}
+		}
+	}
+
+	mem_size = queue->task_dma_size * num_tasks;
+	if (queue->task_dma_size !=  mem_size / num_tasks) {
+		nvpva_err(&pdev->dev, "mem size too large");
+		err = -EINVAL;
+		goto err_alloc_task_pool;
+	}
+
+	/* Allocate memory for the task itself */
+	task_pool->va = dma_alloc_attrs(&pprim_dev->dev,
+				mem_size,
+				&task_pool->dma_addr, GFP_KERNEL,
+				0);
+
+	if (task_pool->va == NULL) {
+		nvpva_err(&pdev->dev, "failed to allocate task_pool->va");
+		err = -ENOMEM;
+		goto err_alloc_task_pool;
+	}
+
+	mem_size = queue->aux_dma_size * num_tasks;
+	if (queue->aux_dma_size !=  mem_size / num_tasks) {
+		nvpva_err(&pdev->dev, "mem size too large");
+		err = -EINVAL;
+		goto err_alloc_aux_task_pool;
+	}
+
+	/* Allocate aux memory for the task itself */
+	task_pool->aux_va = dma_alloc_attrs(&paux_dev->dev,
+				mem_size,
+				&task_pool->aux_dma_addr, GFP_KERNEL,
+				0);
+
+	if (task_pool->aux_va == NULL) {
+		nvpva_err(&pdev->dev, "failed to allocate task_pool->aux_va");
+		err = -ENOMEM;
+		goto err_alloc_aux_task_pool;
+	}
+
+	nvpva_dbg_info(pva,
+		       "task_pool->dma_addr = %llx, task_pool->auxdma_addr = %llx",
+		       (u64)task_pool->dma_addr, (u64)task_pool->aux_dma_addr);
+
+	task_pool->max_task_cnt = num_tasks;
+	mutex_init(&task_pool->lock);
+
+	return err;
+
+err_alloc_aux_task_pool:
+	dma_free_attrs(&pprim_dev->dev,
+			queue->task_dma_size * task_pool->max_task_cnt,
+			task_pool->va, task_pool->dma_addr,
+			0);
+err_alloc_task_pool:
+	for (i = 0; i < num_segments; i++) {
+		if (task_pool->kmem_addr[i] == NULL)
+			continue;
+
+		kfree(task_pool->kmem_addr[i]);
+		task_pool->kmem_addr[i] = NULL;
+	}
+
+	return err;
+}
+
+static void nvpva_queue_task_free_pool(struct platform_device *pdev,
+				       struct nvpva_queue *queue)
+{
+	unsigned int i;
+	unsigned int segments;
+	u64 mem_size;
+	struct nvpva_queue_task_pool *task_pool =
+		(struct nvpva_queue_task_pool *)queue->task_pool;
+
+	segments = task_pool->max_task_cnt/MAX_PVA_TASK_COUNT_PER_QUEUE_SEG;
+
+	mem_size = queue->task_dma_size * task_pool->max_task_cnt;
+	if (queue->task_dma_size  !=  mem_size / task_pool->max_task_cnt) {
+		nvpva_err(&pdev->dev, "mem size too large");
+		return;
+	}
+
+	dma_free_attrs(&queue->vm_pprim_dev->dev,
+			mem_size,
+			task_pool->va, task_pool->dma_addr,
+			0);
+
+	mem_size = queue->aux_dma_size * task_pool->max_task_cnt;
+	if (queue->aux_dma_size  !=  mem_size / task_pool->max_task_cnt) {
+		nvpva_err(&pdev->dev, "mem size too large");
+		return;
+	}
+
+	dma_free_attrs(&queue->vm_paux_dev->dev,
+			mem_size,
+			task_pool->aux_va, task_pool->aux_dma_addr,
+			0);
+	for (i = 0; i < segments; i++)
+		kfree(task_pool->kmem_addr[i]);
+
+	memset(task_pool->alloc_table, 0, sizeof(task_pool->alloc_table));
+	task_pool->max_task_cnt = 0U;
+}
+
+static int nvpva_queue_dump(struct nvpva_queue_pool *pool,
+		struct nvpva_queue *queue,
+		struct seq_file *s)
+{
+	if (pool->ops && pool->ops->dump)
+		pool->ops->dump(queue, s);
+
+	return 0;
+}
+
+static int queue_dump(struct seq_file *s, void *data)
+{
+	struct nvpva_queue_pool *pool = s->private;
+	unsigned long queue_id;
+	u32 i;
+
+	mutex_lock(&pool->queue_lock);
+	for (i = 0; i < NUM_POOL_ALLOC_SUB_TABLES; i++)
+		for_each_set_bit(queue_id,
+				 &pool->alloc_table[i],
+				 pool->max_queue_cnt)
+			nvpva_queue_dump(pool,
+					&pool->queues[64 * i + queue_id], s);
+
+	mutex_unlock(&pool->queue_lock);
+
+	return 0;
+}
+
+static int queue_expose_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, queue_dump, inode->i_private);
+}
+
+static const struct file_operations queue_expose_operations = {
+	.open = queue_expose_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+struct nvpva_queue_pool *nvpva_queue_init(struct platform_device *pdev,
+					struct platform_device *paux_dev,
+					struct nvpva_queue_ops *ops,
+					unsigned int num_queues)
+{
+	struct nvhost_device_data *pdata;
+	struct nvpva_queue_pool *pool;
+	struct nvpva_queue *queues;
+	struct nvpva_queue *queue;
+	struct nvpva_queue_task_pool *task_pool;
+	unsigned int i;
+	int err;
+
+	pool = kzalloc(sizeof(struct nvpva_queue_pool), GFP_KERNEL);
+	if (pool == NULL) {
+		err = -ENOMEM;
+		goto fail_alloc_pool;
+	}
+
+	queues = kcalloc(num_queues, sizeof(struct nvpva_queue), GFP_KERNEL);
+	if (queues == NULL) {
+		err = -ENOMEM;
+		goto fail_alloc_queues;
+	}
+
+	task_pool = kcalloc(num_queues,
+			sizeof(struct nvpva_queue_task_pool), GFP_KERNEL);
+	if (task_pool == NULL) {
+		nvpva_err(&pdev->dev, "failed to allocate task_pool");
+		err = -ENOMEM;
+		goto fail_alloc_task_pool;
+	}
+
+	pdata = platform_get_drvdata(pdev);
+
+	/* initialize pool and queues */
+	pool->pdev = pdev;
+	pool->pprim_dev = paux_dev;
+	pool->ops = ops;
+	pool->queues = queues;
+	memset(pool->alloc_table, 0, sizeof(pool->alloc_table));
+	pool->max_queue_cnt = num_queues;
+	pool->queue_task_pool = task_pool;
+	mutex_init(&pool->queue_lock);
+
+	debugfs_create_file("queues", 0444,
+			pdata->debugfs, pool,
+			&queue_expose_operations);
+
+
+	for (i = 0; i < num_queues; i++) {
+		queue = &queues[i];
+		queue->id = i;
+		queue->pool = pool;
+		queue->task_pool = (void *)&task_pool[i];
+		queue->batch_id = 0U;
+		nvpva_queue_get_task_size(queue);
+	}
+
+	return pool;
+
+fail_alloc_task_pool:
+	kfree(pool->queues);
+fail_alloc_queues:
+	kfree(pool);
+fail_alloc_pool:
+	return ERR_PTR(err);
+}
+
+void nvpva_queue_deinit(struct nvpva_queue_pool *pool)
+{
+	if (!pool)
+		return;
+
+	kfree(pool->queue_task_pool);
+	kfree(pool->queues);
+	kfree(pool);
+	pool = NULL;
+}
+
+void nvpva_queue_abort_all(struct nvpva_queue_pool *pool)
+{
+	u32 id;
+	u32 i;
+
+	mutex_lock(&pool->queue_lock);
+	for (i = 0; i < NUM_POOL_ALLOC_SUB_TABLES; i++)
+		for_each_set_bit(id,
+				 &pool->alloc_table[i],
+				 pool->max_queue_cnt)
+			nvpva_queue_abort(&pool->queues[64 * i + id]);
+
+	mutex_unlock(&pool->queue_lock);
+}
+
+static void nvpva_queue_release(struct kref *ref)
+{
+	struct nvpva_queue *queue = container_of(ref, struct nvpva_queue,
+						kref);
+	struct nvpva_queue_pool *pool = queue->pool;
+
+	struct nvhost_device_data *pdata = platform_get_drvdata(pool->pdev);
+	struct pva *pva = pdata->private_data;
+
+	nvpva_dbg_fn(pva, "");
+
+	/* release allocated resources */
+	nvpva_syncpt_put_ref_ext(pool->pdev, queue->syncpt_id);
+
+	/* free the task_pool */
+	if (queue->task_dma_size)
+		nvpva_queue_task_free_pool(pool->pdev, queue);
+
+	/* free the queue mutex */
+	mutex_destroy(&queue->tail_lock);
+
+	/* ..and mark the queue free */
+	mutex_lock(&pool->queue_lock);
+	clear_bit(queue->id%64, &pool->alloc_table[queue->id/64]);
+	mutex_unlock(&pool->queue_lock);
+}
+
+void nvpva_queue_put(struct nvpva_queue *queue)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(queue->pool->pdev);
+	struct pva *pva = pdata->private_data;
+
+	nvpva_dbg_fn(pva, "");
+	kref_put(&queue->kref, nvpva_queue_release);
+}
+
+void nvpva_queue_get(struct nvpva_queue *queue)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(queue->pool->pdev);
+	struct pva *pva = pdata->private_data;
+
+	nvpva_dbg_fn(pva, "");
+	kref_get(&queue->kref);
+}
+
+struct nvpva_queue *nvpva_queue_alloc(struct nvpva_queue_pool *pool,
+				      struct platform_device *paux_dev,
+				      unsigned int num_tasks)
+{
+	struct platform_device *pdev = pool->pdev;
+	struct nvpva_queue *queues = pool->queues;
+	struct nvpva_queue *queue;
+	int index = 0;
+	int err = 0;
+	u32 syncpt_val;
+
+	mutex_lock(&pool->queue_lock);
+
+	index = rmos_find_first_zero_bit((u32 *) pool->alloc_table,
+					  pool->max_queue_cnt);
+
+	/* quit if we found a queue */
+	if (index >= pool->max_queue_cnt) {
+		dev_err(&pdev->dev, "failed to get free Queue\n");
+		err = -ENOMEM;
+		goto err_alloc_queue;
+	}
+
+	/* reserve the queue */
+	queue = &queues[index];
+	set_bit(index%64, &pool->alloc_table[index/64]);
+
+	/* allocate a syncpt for the queue */
+	queue->syncpt_id = nvpva_get_syncpt_client_managed(pdev, "pva_syncpt");
+	if (queue->syncpt_id == 0) {
+		dev_err(&pdev->dev, "failed to get syncpt\n");
+		err = -ENOMEM;
+		goto err_alloc_syncpt;
+	}
+
+	if (nvhost_syncpt_read_ext_check(pdev,
+					 queue->syncpt_id,
+					 &syncpt_val) != 0) {
+		err = -EIO;
+		goto err_read_syncpt;
+	}
+
+	atomic_set(&queue->syncpt_maxval, syncpt_val);
+
+	/* initialize queue ref count and sequence*/
+	kref_init(&queue->kref);
+	queue->sequence = 0;
+
+	/* initialize task list */
+	INIT_LIST_HEAD(&queue->tasklist);
+	mutex_init(&queue->list_lock);
+
+	/* initialize task list */
+	queue->attr = NULL;
+	mutex_init(&queue->attr_lock);
+
+	mutex_unlock(&pool->queue_lock);
+
+	queue->vm_pdev = pdev;
+	queue->vm_pprim_dev = pool->pprim_dev;
+
+	mutex_init(&queue->tail_lock);
+	queue->vm_paux_dev = paux_dev;
+
+	if (queue->task_dma_size) {
+		err = nvpva_queue_task_pool_alloc(queue->vm_pdev,
+						  queue->vm_pprim_dev,
+						  queue->vm_paux_dev,
+						  queue,
+						  num_tasks);
+		if (err < 0)
+			goto err_alloc_task_pool;
+	}
+
+	return queue;
+
+err_alloc_task_pool:
+	mutex_lock(&pool->queue_lock);
+err_read_syncpt:
+	nvpva_syncpt_put_ref_ext(pool->pdev, queue->syncpt_id);
+err_alloc_syncpt:
+	clear_bit(queue->id%64, &pool->alloc_table[queue->id/64]);
+err_alloc_queue:
+	mutex_unlock(&pool->queue_lock);
+	return ERR_PTR(err);
+}
+
+int nvpva_queue_abort(struct nvpva_queue *queue)
+{
+	struct nvpva_queue_pool *pool = queue->pool;
+
+	if (pool->ops && pool->ops->abort)
+		return pool->ops->abort(queue);
+
+	return 0;
+}
+
+int nvpva_queue_submit(struct nvpva_queue *queue, void *task_arg)
+{
+	struct nvpva_queue_pool *pool = queue->pool;
+
+	if (pool->ops && pool->ops->submit)
+		return pool->ops->submit(queue, task_arg);
+
+	return 0;
+}
+
+int nvpva_queue_set_attr(struct nvpva_queue *queue, void *arg)
+{
+	struct nvpva_queue_pool *pool = queue->pool;
+
+	if (pool->ops && pool->ops->set_attribute)
+		return pool->ops->set_attribute(queue, arg);
+
+	return 0;
+}
+
+struct nvpva_queue_task {
+	struct platform_device *host1x_pdev;
+
+	struct nvpva_queue *queue;
+
+	dma_addr_t dma_addr;
+	u32 *cpu_addr;
+};
+
+int nvpva_queue_get_task_size(struct nvpva_queue *queue)
+{
+	struct nvpva_queue_pool *pool = queue->pool;
+
+	if (pool->ops && pool->ops->get_task_size)
+		pool->ops->get_task_size(&queue->task_dma_size,
+					 &queue->task_kmem_size,
+					 &queue->aux_dma_size);
+
+	return 0;
+}
+
+int nvpva_queue_alloc_task_memory(
+			struct nvpva_queue *queue,
+			struct nvpva_queue_task_mem_info *task_mem_info)
+{
+	int err = 0;
+	unsigned int index;
+	unsigned int hw_offset;
+	unsigned int sw_offset;
+	unsigned int seg_base;
+	unsigned int seg_index;
+	size_t	aux_hw_offset;
+	struct platform_device *pdev = queue->pool->pdev;
+	struct nvpva_queue_task_pool *task_pool =
+		(struct nvpva_queue_task_pool *)queue->task_pool;
+
+	mutex_lock(&task_pool->lock);
+
+	index = rmos_find_first_zero_bit((u32 *) task_pool->alloc_table,
+					 task_pool->max_task_cnt);
+
+	/* quit if pre-allocated task array is not free */
+	if (index >= task_pool->max_task_cnt) {
+		dev_err(&pdev->dev,
+			"failed to get Task Pool Memory\n");
+		err = -EAGAIN;
+		goto err_alloc_task_mem;
+	}
+
+	/* assign the task array */
+	seg_index = index%MAX_PVA_TASK_COUNT_PER_QUEUE_SEG;
+	seg_base = (index/MAX_PVA_TASK_COUNT_PER_QUEUE_SEG);
+	set_bit(index%64, &task_pool->alloc_table[index/64]);
+	hw_offset = index * queue->task_dma_size;
+	aux_hw_offset = index * queue->aux_dma_size;
+	sw_offset = seg_index * queue->task_kmem_size;
+	task_mem_info->kmem_addr =
+		(void *)((u8 *)task_pool->kmem_addr[seg_base] + sw_offset);
+	task_mem_info->va = (void *)((u8 *)task_pool->va + hw_offset);
+	task_mem_info->dma_addr = task_pool->dma_addr + hw_offset;
+	task_mem_info->aux_va = (void *)((u8 *)task_pool->aux_va + aux_hw_offset);
+	if ((U64_MAX - task_pool->aux_dma_addr) < task_pool->aux_dma_addr) {
+		err = -EFAULT;
+		goto err_alloc_task_mem;
+	}
+
+	task_mem_info->aux_dma_addr = task_pool->aux_dma_addr + aux_hw_offset;
+	task_mem_info->pool_index = index;
+
+err_alloc_task_mem:
+	mutex_unlock(&task_pool->lock);
+
+	return err;
+}
+
+void nvpva_queue_free_task_memory(struct nvpva_queue *queue, int index)
+{
+	unsigned int hw_offset;
+	unsigned int sw_offset;
+	unsigned int seg_index;
+	unsigned int seg_base;
+
+	u8 *task_kmem, *task_dma_va;
+	struct nvpva_queue_task_pool *task_pool =
+			(struct nvpva_queue_task_pool *)queue->task_pool;
+
+	/* clear task kernel and dma virtual memory contents*/
+	seg_index = index%MAX_PVA_TASK_COUNT_PER_QUEUE_SEG;
+	seg_base = (index/MAX_PVA_TASK_COUNT_PER_QUEUE_SEG);
+	hw_offset = index * queue->task_dma_size;
+	sw_offset = seg_index * queue->task_kmem_size;
+	task_kmem = (u8 *)task_pool->kmem_addr[seg_base] + sw_offset;
+	task_dma_va = (u8 *)task_pool->va + hw_offset;
+
+	memset(task_kmem, 0, queue->task_kmem_size);
+	memset(task_dma_va, 0, queue->task_dma_size);
+
+	mutex_lock(&task_pool->lock);
+	clear_bit(index%64, &task_pool->alloc_table[index/64]);
+	mutex_unlock(&task_pool->lock);
+}
diff --git a/drivers/video/tegra/host/pva/nvpva_queue.h b/drivers/video/tegra/host/pva/nvpva_queue.h
new file mode 100644
index 00000000..c4924f5e
--- /dev/null
+++ b/drivers/video/tegra/host/pva/nvpva_queue.h
@@ -0,0 +1,304 @@
+/*
+ * NVPVA Queue management header for T194 and T234
+ *
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVPVA_NVPVA_QUEUE_H__
+#define __NVPVA_NVPVA_QUEUE_H__
+
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+
+#define NUM_POOL_ALLOC_SUB_TABLES	4
+
+struct nvpva_queue_task_pool;
+/** @brief Holds PVA HW task which can be submitted to PVA R5 FW */
+struct pva_hw_task;
+
+/**
+ * @brief	Describe a allocated task mem struct
+ *
+ * kmem_addr	Address for the task kernel memory
+ * dma_addr	Physical address of task memory
+ * aux_dma_addr	Physical address of aux task memory
+ * va		Virtual address of the task memory
+ * aux_va	Virtual address of the aux task memory
+ * pool_index	Index to the allocated task memory
+ *
+ * This is keep track of the memory details of the task
+ * struct that is being shared between kernel and firmware.
+ */
+struct nvpva_queue_task_mem_info {
+	void *kmem_addr;
+	dma_addr_t dma_addr;
+	dma_addr_t aux_dma_addr;
+	void *va;
+	void *aux_va;
+	int pool_index;
+};
+/**
+ * @brief		Information needed in a Queue
+ *
+ * pool			pointer queue pool
+ * kref			struct kref for reference count
+ * syncpt_id		Host1x syncpt id
+ * id			Queue id
+ * list_lock		mutex for tasks lists control
+ * tasklist		Head of tasks list
+ * sequence		monotonically incrementing task id per queue
+ * task_pool		pointer to struct for task memory pool
+ * task_dma_size	dma size used in hardware for a task
+ * task_kmem_size	kernel memory size for a task
+ * aux_dma_size		kernel memory size for a task aux buffer
+ * attr			queue attribute associated with the host module
+ *
+ */
+struct nvpva_queue {
+	struct nvpva_queue_task_pool *task_pool;
+	struct nvpva_queue_pool *pool;
+	struct kref kref;
+	u32 id;
+
+	/*wait list for task mem requester*/
+	struct semaphore task_pool_sem;
+
+	/* Host1x resources */
+	struct nvhost_channel *channel;
+	struct platform_device *vm_pdev;
+	struct platform_device *vm_pprim_dev;
+	struct platform_device *vm_paux_dev;
+	u32 syncpt_id;
+	u32 local_sync_counter;
+	atomic_t syncpt_maxval;
+
+	size_t task_dma_size;
+	size_t task_kmem_size;
+	size_t aux_dma_size;
+
+	u32 sequence;
+
+	struct mutex attr_lock;
+	void *attr;
+
+	struct mutex list_lock;
+	struct list_head tasklist;
+
+	/*! Mutex for exclusive access of tail task submit */
+	struct mutex tail_lock;
+	struct pva_hw_task *old_tail;
+	struct pva_hw_task *hw_task_tail;
+
+	u64 batch_id;
+};
+
+/**
+ * @brief	hardware specific queue callbacks
+ *
+ * dump			dump the task information
+ * abort		abort all tasks from a queue
+ * submit		submit the given list of tasks to hardware
+ * get_task_size	get the dma size needed for the task in hw
+ *			and the kernel memory size needed for task.
+ *
+ */
+struct nvpva_queue_ops {
+	void (*dump)(struct nvpva_queue *queue, struct seq_file *s);
+	int (*abort)(struct nvpva_queue *queue);
+	int (*submit)(struct nvpva_queue *queue, void *task_arg);
+	void (*get_task_size)(size_t *dma_size,
+			      size_t *kmem_size,
+			      size_t *aux_dma_size);
+	int (*set_attribute)(struct nvpva_queue *queue, void *arg);
+};
+
+/**
+ * @brief	Queue pool data structure to hold queue table
+ *
+ * pdev			Pointer to the Queue client device
+ * ops			Pointer to hardware specific queue ops
+ * queues		Queues available for the client
+ * queue_lock		Mutex for the bitmap of reserved queues
+ * alloc_table		Bitmap of allocated queues
+ * max_queue_cnt	Max number queues available for client
+ * queue_task_pool	Pointer to the task memory pool for queues.
+ *
+ */
+struct nvpva_queue_pool {
+	struct platform_device *pdev;
+	struct platform_device *pprim_dev;
+	struct nvpva_queue_ops *ops;
+	struct nvpva_queue *queues;
+	struct mutex queue_lock;
+	unsigned long alloc_table[NUM_POOL_ALLOC_SUB_TABLES];
+	unsigned int max_queue_cnt;
+	void *queue_task_pool;
+};
+
+/**
+ * @brief	Initialize queue structures
+ *
+ * This function allocates and initializes queue data structures.
+ *
+ * @param pdev		Pointer to the Queue client device
+ * @param paux_dev	Pointer to the Queue client aux device
+ * @param ops		Pointer to device speicific callbacks
+ * @param num_queues	Max number queues available for client
+ * @return		pointer to queue pool
+ *
+ */
+struct nvpva_queue_pool *nvpva_queue_init(struct platform_device *pdev,
+					struct platform_device *paux_dev,
+					struct nvpva_queue_ops *ops,
+					unsigned int num_queues);
+
+/**
+ * @brief	De-initialize queue structures
+ *
+ * This function free's all queue data structures.
+ *
+ * @param pool	pointer to queue pool
+ * @return	void
+ *
+ */
+void nvpva_queue_deinit(struct nvpva_queue_pool *pool);
+
+/**
+ * @brief	Release reference of a queue
+ *
+ * This function releases reference for a queue.
+ *
+ * @param queue	Pointer to an allocated queue.
+ * @return	void
+ *
+ */
+void nvpva_queue_put(struct nvpva_queue *queue);
+
+/**
+ * @brief	Get reference on a queue.
+ *
+ * This function used to get a reference to an already allocated queue.
+ *
+ * @param queue	Pointer to an allocated queue.
+ * @return	None
+ *
+ */
+void nvpva_queue_get(struct nvpva_queue *queue);
+
+/**
+ * @brief	Allocate a queue for client.
+ *
+ * This function allocates a queue from the pool to client for the user.
+ *
+ * @param pool		Pointer to a queue pool table
+ * @param paux_dev	pointer to auxiliary dev
+ * @param num_tasks	Max number of tasks per queue
+ *
+ * @return		Pointer to a queue struct on success
+ *			or negative error on failure.
+ *
+ */
+struct nvpva_queue *nvpva_queue_alloc(struct nvpva_queue_pool *pool,
+				      struct platform_device *paux_dev,
+				      unsigned int num_tasks);
+
+/**
+ * @brief		Abort all active queues
+ *
+ * @param pool		Pointer to a queue pool table
+ */
+void nvpva_queue_abort_all(struct nvpva_queue_pool *pool);
+
+/**
+ * @brief	Abort tasks within a client queue
+ *
+ * This function aborts all tasks from the given clinet queue. If there is no
+ * active tasks, the function call is no-op.
+ * It is expected to be called when an active device fd gets closed.
+ *
+ * @param queue	Pointer to an allocated queue
+ * @return	None
+ *
+ */
+int nvpva_queue_abort(struct nvpva_queue *queue);
+
+/**
+ * @brief	submits the given list of tasks to hardware
+ *
+ * This function submits the given list of tasks to hardware.
+ * The submit structure is updated with the fence values as appropriate.
+ *
+ * @param queue		Pointer to an allocated queue
+ * @param submit	Submit the given list of tasks to hardware
+ * @return		0 on success or negative error code on failure.
+ *
+ */
+int nvpva_queue_submit(struct nvpva_queue *queue, void *submit);
+
+/**
+ * @brief	Get the Task Size needed
+ *
+ * This function get the needed memory size for the task. This memory is
+ * shared memory between kernel and firmware
+ *
+ * @param queue	Pointer to an allocated queue
+ * @return	Size of the task
+ *
+ */
+int nvpva_queue_get_task_size(struct nvpva_queue *queue);
+
+/**
+ * @brief	Allocate a memory from task memory pool
+ *
+ * This function helps to assign a task memory from
+ * the preallocated task memory pool. This memory is shared memory between
+ * kernel and firmware
+ *
+ * @queue		Pointer to an allocated queue
+ * @task_mem_info	Pointer to nvpva_queue_task_mem_info struct
+ *
+ * @return	0 on success, otherwise a negative error code is returned
+ *
+ */
+int nvpva_queue_alloc_task_memory(
+			struct nvpva_queue *queue,
+			struct nvpva_queue_task_mem_info *task_mem_info);
+
+/**
+ * @brief	Free the assigned task memory
+ *
+ * This function helps to unset the assigned task memory
+ *
+ * @param queue	Pointer to an allocated queue
+ * @param index	Index of the assigned task pool memory
+ * @return	void
+ *
+ */
+void nvpva_queue_free_task_memory(struct nvpva_queue *queue, int index);
+
+/**
+ * @brief	Sets the attribute to the queue
+ *
+ * This function set the attribute of the queue with the arguments passed
+ *
+ * @param queue		Pointer to an allocated queue
+ * @param arg		The structure which consists of the id and value
+ * @return		0 on success or negative error code on failure.
+ *
+ */
+int nvpva_queue_set_attr(struct nvpva_queue *queue, void *arg);
+
+#endif
diff --git a/drivers/video/tegra/host/pva/nvpva_syncpt.c b/drivers/video/tegra/host/pva/nvpva_syncpt.c
new file mode 100644
index 00000000..5bbd7b78
--- /dev/null
+++ b/drivers/video/tegra/host/pva/nvpva_syncpt.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/nvhost.h>
+#include <linux/nvhost_t194.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/dma-mapping.h>
+#include "pva.h"
+
+int nvpva_map_region(struct device *dev,
+		     phys_addr_t start,
+		     size_t size,
+		     dma_addr_t *sp_start,
+		     u32 attr)
+{
+	/* If IOMMU is enabled, map it into the device memory */
+	if (iommu_get_domain_for_dev(dev)) {
+		*sp_start = dma_map_resource(dev, start, size, attr,
+					     DMA_ATTR_SKIP_CPU_SYNC);
+		if (dma_mapping_error(dev, *sp_start))
+			return -ENOMEM;
+	} else {
+		*sp_start = start;
+	}
+
+	return 0;
+}
+
+int nvpva_unmap_region(struct device *dev,
+		       dma_addr_t addr,
+		       size_t size,
+		       u32 attr)
+{
+	if (iommu_get_domain_for_dev(dev)) {
+		dma_unmap_resource(dev, addr, size, attr,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	}
+
+	return 0;
+}
+
+void nvpva_syncpt_put_ref_ext(struct platform_device *pdev,
+			       u32 id)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	int i;
+
+	if (pva->version == PVA_HW_GEN1) {
+		nvhost_syncpt_put_ref_ext(pdev, id);
+		return;
+	}
+
+	for (i = 0; i < MAX_PVA_QUEUE_COUNT; i++) {
+		if (pva->syncpts.syncpts_rw[i].id == id) {
+			pva->syncpts.syncpts_rw[i].assigned = 0;
+			break;
+		}
+	}
+}
+
+u32 nvpva_get_syncpt_client_managed(struct platform_device *pdev,
+				    const char *syncpt_name)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	u32 id = 0;
+	int i;
+
+	if (pva->version == PVA_HW_GEN1) {
+		id = nvhost_get_syncpt_client_managed(pdev, "pva_syncpt");
+		goto out;
+	}
+
+	for (i = 0; i < MAX_PVA_QUEUE_COUNT; i++) {
+		if (pva->syncpts.syncpts_rw[i].assigned == 0) {
+			id = pva->syncpts.syncpts_rw[i].id;
+			pva->syncpts.syncpts_rw[i].assigned = 1;
+			break;
+		}
+	}
+out:
+	return id;
+}
+
+dma_addr_t
+nvpva_syncpt_address(struct platform_device *pdev, u32 id, bool rw)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	struct platform_device *host_pdev = pva->syncpts.host_pdev;
+	dma_addr_t addr = 0;
+	u32 offset = 0;
+	int i;
+
+	if (pva->version == PVA_HW_GEN1) {
+		addr =  nvhost_syncpt_address(pdev, id);
+		goto out;
+	}
+
+	if (!rw) {
+		offset = nvhost_syncpt_unit_interface_get_byte_offset_ext(host_pdev, id);
+		addr = pva->syncpts.syncpt_start_iova_r + (dma_addr_t)offset;
+		goto out;
+	}
+
+	for (i = 0; i < MAX_PVA_QUEUE_COUNT; i++) {
+		if (pva->syncpts.syncpts_rw[i].id == id) {
+			addr = pva->syncpts.syncpts_rw[i].addr;
+			break;
+		}
+	}
+out:
+	nvpva_dbg_info(pva,
+		       "syncpt_addr:  id: %d   addr: %llx offset: %llx\n",
+		       id,
+		       addr,
+		       (u64)offset);
+
+	return addr;
+}
+
+void nvpva_syncpt_unit_interface_deinit(struct platform_device *pdev,
+					struct platform_device *paux_dev)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	int i;
+
+	if (!pva->syncpts.syncpts_mapped_r)
+		goto out;
+
+	if (pva->version == PVA_HW_GEN1) {
+		pva->syncpts.syncpts_mapped_rw = false;
+		pva->syncpts.syncpts_mapped_r = false;
+		goto out;
+	}
+
+	nvpva_unmap_region(&paux_dev->dev, pva->syncpts.syncpt_start_iova_r,
+		       pva->syncpts.syncpt_range_r, DMA_TO_DEVICE);
+	pva->syncpts.syncpts_mapped_r = false;
+	pva->syncpts.syncpt_start_iova_r = 0;
+	pva->syncpts.syncpt_range_r = 0;
+
+	for (i = 0; i < MAX_PVA_QUEUE_COUNT; i++) {
+		if (pva->syncpts.syncpts_rw[i].id == 0)
+			continue;
+
+		nvpva_unmap_region(&paux_dev->dev, pva->syncpts.syncpts_rw[i].addr,
+			       pva->syncpts.syncpts_rw[i].size,
+			       DMA_BIDIRECTIONAL);
+		pva->syncpts.syncpts_rw[i].addr = 0;
+		pva->syncpts.syncpts_rw[i].size = 0;
+		pva->syncpts.syncpts_rw[i].assigned = 0;
+		nvhost_syncpt_put_ref_ext(pdev,
+					  pva->syncpts.syncpts_rw[i].id);
+		pva->syncpts.syncpts_rw[i].id = 0;
+	}
+
+	pva->syncpts.syncpts_mapped_rw = false;
+out:
+	return;
+}
+
+int nvpva_syncpt_unit_interface_init(struct platform_device *pdev,
+				     struct platform_device *paux_dev)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	phys_addr_t base;
+	size_t size;
+	dma_addr_t syncpt_addr_rw;
+	u32 syncpt_offset;
+	int err = 0;
+	int i;
+	u32 id = 0;
+
+	if ((pva->syncpts.syncpts_mapped_r)
+	||  (pva->syncpts.syncpts_mapped_rw))
+		goto out;
+
+	if (pva->version == PVA_HW_GEN1) {
+		pva->syncpts.syncpt_start_iova_r = 0;
+		pva->syncpts.syncpt_range_r = 0;
+		pva->syncpts.page_size = 0;
+		pva->syncpts.syncpts_mapped_r = true;
+		pva->syncpts.syncpts_mapped_rw = true;
+		pva->syncpts.syncpt_start_iova_rw = 0;
+		pva->syncpts.syncpt_range_rw = 0;
+		goto out;
+	}
+
+	pva->syncpts.host_pdev = to_platform_device(pdev->dev.parent);
+	err = nvhost_syncpt_unit_interface_get_aperture(pva->syncpts.host_pdev,
+							&base,
+							&size);
+	if (err) {
+		dev_err(&pdev->dev, "failed to get aperture");
+		goto out;
+	}
+
+	syncpt_offset =
+		nvhost_syncpt_unit_interface_get_byte_offset_ext(pva->syncpts.host_pdev, 1);
+
+	err = nvpva_map_region(&paux_dev->dev,
+			       base,
+			       size,
+			       &syncpt_addr_rw,
+			       DMA_TO_DEVICE);
+	if (err)
+		goto out;
+
+	pva->syncpts.syncpt_start_iova_r = syncpt_addr_rw;
+	pva->syncpts.syncpt_range_r  = size;
+	pva->syncpts.page_size = syncpt_offset;
+	pva->syncpts.syncpts_mapped_r = true;
+
+	nvpva_dbg_info(pva,
+		       "syncpt_start_iova %llx,  size %llx\n",
+			pva->syncpts.syncpt_start_iova_rw,
+			pva->syncpts.syncpt_range_r);
+
+	for (i = 0; i < MAX_PVA_QUEUE_COUNT; i++) {
+		id = nvhost_get_syncpt_client_managed(pdev, "pva_syncpt");
+		if (id == 0) {
+			dev_err(&pdev->dev, "failed to get syncpt\n");
+			err = -ENOMEM;
+			goto err_alloc_syncpt;
+		}
+
+		syncpt_offset =
+			nvhost_syncpt_unit_interface_get_byte_offset_ext(pva->syncpts.host_pdev,
+									 id);
+		err = nvpva_map_region(&paux_dev->dev,
+				       (base + syncpt_offset),
+				       pva->syncpts.page_size,
+				       &syncpt_addr_rw,
+				       DMA_BIDIRECTIONAL);
+		if (err) {
+			dev_err(&pdev->dev, "failed to map syncpt %d\n", id);
+			goto err_map_sp;
+		}
+
+		pva->syncpts.syncpts_rw[i].addr = syncpt_addr_rw;
+		pva->syncpts.syncpts_rw[i].id = id;
+		pva->syncpts.syncpts_rw[i].assigned = 0;
+		nvpva_dbg_info(pva,
+			"syncpt_addr:  id: %d   addr: %llx offset: %llx\n",
+			id,
+			syncpt_addr_rw,
+			0LLU);
+	}
+
+	pva->syncpts.syncpts_mapped_rw = true;
+	syncpt_addr_rw = pva->syncpts.syncpts_rw[MAX_PVA_QUEUE_COUNT - 1].addr;
+	pva->syncpts.syncpt_start_iova_rw = syncpt_addr_rw;
+	pva->syncpts.syncpt_range_rw = MAX_PVA_QUEUE_COUNT *
+				       (pva->syncpts.syncpts_rw[0].addr -
+					pva->syncpts.syncpts_rw[1].addr);
+
+	if (pva->version == PVA_HW_GEN1)
+		goto out;
+
+	if (syncpt_addr_rw % (pva->syncpts.syncpt_range_rw) != 0) {
+		dev_err(&pdev->dev, "RW sync pts base not aligned to 512k");
+		err = -ENOMEM;
+		goto err_map_sp;
+	}
+
+	syncpt_addr_rw += (MAX_PVA_QUEUE_COUNT - 1) * pva->syncpts.page_size;
+	if (syncpt_addr_rw != pva->syncpts.syncpts_rw[0].addr) {
+		dev_err(&pdev->dev, "RW sync pts not contiguous");
+		err = -ENOMEM;
+		goto err_map_sp;
+	}
+
+	goto out;
+
+err_map_sp:
+err_alloc_syncpt:
+	nvpva_syncpt_unit_interface_deinit(pdev, paux_dev);
+out:
+	return err;
+}
diff --git a/drivers/video/tegra/host/pva/nvpva_syncpt.h b/drivers/video/tegra/host/pva/nvpva_syncpt.h
new file mode 100644
index 00000000..d6d2bccb
--- /dev/null
+++ b/drivers/video/tegra/host/pva/nvpva_syncpt.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVPVA_SYNCPT_H__
+#define __NVPVA_SYNCPT_H__
+
+void nvpva_syncpt_put_ref_ext(struct platform_device *pdev,
+			      u32 id);
+dma_addr_t nvpva_syncpt_address(struct platform_device *pdev, u32 id,
+				bool rw);
+void nvpva_syncpt_unit_interface_deinit(struct platform_device *pdev,
+					struct platform_device *paux_dev);
+int nvpva_syncpt_unit_interface_init(struct platform_device *pdev,
+				     struct platform_device *paux_dev);
+u32 nvpva_get_syncpt_client_managed(struct platform_device *pdev,
+				    const char *syncpt_name);
+int nvpva_map_region(struct device *dev,
+		     phys_addr_t start,
+		     size_t size,
+		     dma_addr_t *sp_start,
+		     u32 attr);
+int nvpva_unmap_region(struct device *dev,
+		       dma_addr_t addr,
+		       size_t size,
+		       u32 attr);
+#endif
diff --git a/drivers/video/tegra/host/pva/pva-vpu-perf.h b/drivers/video/tegra/host/pva/pva-vpu-perf.h
new file mode 100644
index 00000000..b499cb56
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva-vpu-perf.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PVA_VPU_PERF_H_
+#define _PVA_VPU_PERF_H_
+
+#define PVA_TASK_VPU_NUM_PERF_COUNTERS 8
+
+struct pva_task_vpu_perf_counter {
+	u32 count;
+	u32 sum;
+	u64 sum_squared;
+	u32 min;
+	u32 max;
+};
+
+#endif
+
diff --git a/drivers/video/tegra/host/pva/pva.c b/drivers/video/tegra/host/pva/pva.c
new file mode 100644
index 00000000..f11eb598
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva.c
@@ -0,0 +1,1484 @@
+/*
+ * Copyright (c) 2016-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pva_mailbox.h"
+#include <linux/workqueue.h>
+#include "nvpva_client.h"
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/fs.h>
+#include <linux/of.h>
+#include <linux/of_graph.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/firmware.h>
+#include <linux/iommu.h>
+#include <linux/reset.h>
+#include <linux/version.h>
+#include <linux/platform/tegra/emc_bwmgr.h>
+#include <linux/nvhost.h>
+#include <linux/interrupt.h>
+#if KERNEL_VERSION(5, 14, 0) > LINUX_VERSION_CODE
+#include <linux/tegra-ivc.h>
+#else
+#include <soc/tegra/virt/hv-ivc.h>
+#endif
+#include <dt-bindings/interconnect/tegra_icc_id.h>
+#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
+#include <soc/tegra/chip-id.h>
+#endif
+#include <soc/tegra/fuse-helper.h>
+
+#if !IS_ENABLED(CONFIG_TEGRA_GRHOST)
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#endif
+
+#include "pva_mailbox_t23x.h"
+#include "pva_interface_regs_t23x.h"
+#include "pva_version_config_t23x.h"
+#include "pva_ccq_t23x.h"
+#include "nvpva_queue.h"
+#include "pva_queue.h"
+#include "pva.h"
+#include "pva_regs.h"
+#include "pva_mailbox_t19x.h"
+#include "pva_interface_regs_t19x.h"
+#include "pva_version_config_t19x.h"
+#include "pva_ccq_t19x.h"
+#include "pva-ucode-header.h"
+#include "pva_system_allow_list.h"
+#include "pva_iommu_context_dev.h"
+#include "nvpva_syncpt.h"
+#include "pva-fw-address-map.h"
+#include "pva_sec_ec.h"
+
+/*
+ * NO IOMMU set 0x60000000 as start address.
+ * With IOMMU set 0x80000000(>2GB) as startaddress
+ */
+#define DRAM_PVA_IOVA_START_ADDRESS 0x80000000
+#define DRAM_PVA_NO_IOMMU_START_ADDRESS 0x60000000
+
+extern struct platform_driver nvpva_iommu_context_dev_driver;
+static u32 vm_regs_sid_idx_t19x[] = {0, 0, 0, 0, 0, 0, 0, 0,
+				     0, 0, 0, 0, 0, 0, 0, 0};
+static u32 vm_regs_reg_idx_t19x[] = {0, 1, 1, 0, 0, 0, 0, 0,
+				     0, 0, 0, 0, 0, 0, 0, 0};
+#ifdef CONFIG_PVA_CO_DISABLED
+static u32 vm_regs_sid_idx_t234[] = {1, 2, 3, 4, 5, 6, 7, 7,
+				     8, 8, 8, 8, 8, 0, 0, 0};
+#else
+static u32 vm_regs_sid_idx_t234[] = {1, 2, 3, 4, 5, 6, 7, 7,
+				     8, 0, 8, 8, 8, 0, 0, 0};
+#endif
+static u32 vm_regs_reg_idx_t234[] = {0, 1, 2, 3, 4, 5, 6, 7,
+				     8, 8, 8, 9, 9, 0, 0, 0};
+static char *aux_dev_name = "16000000.pva0:pva0_niso1_ctx7";
+static u32 aux_dev_name_len = 29;
+
+struct nvhost_device_data t19_pva1_info = {
+	.version = PVA_HW_GEN1,
+	.num_channels		= 1,
+	.clocks			= {
+		{"axi", UINT_MAX,},
+		{"vps0", UINT_MAX,},
+		{"vps1", UINT_MAX,},
+	},
+	.ctrl_ops		= &tegra_pva_ctrl_ops,
+	.devfs_name_family	= "pva",
+	.class			= NV_PVA1_CLASS_ID,
+	.autosuspend_delay      = 500,
+	.finalize_poweron	= pva_finalize_poweron,
+	.prepare_poweroff	= pva_prepare_poweroff,
+	.firmware_name		= "nvhost_pva010.fw",
+	.resource_policy	= RESOURCE_PER_CHANNEL_INSTANCE,
+	.vm_regs		= {
+		{0x70000, true, 0},
+		{0x80000, false, 0},
+		{0x80000, false, 8}
+	},
+	.poweron_reset		= true,
+	.serialize		= true,
+	.push_work_done		= true,
+	.get_reloc_phys_addr	= nvhost_t194_get_reloc_phys_addr,
+	.can_powergate		= true,
+};
+
+struct nvhost_device_data t19_pva0_info = {
+	.version = PVA_HW_GEN1,
+	.num_channels		= 1,
+	.clocks			= {
+		{"nafll_pva_vps", UINT_MAX,},
+		{"nafll_pva_core", UINT_MAX,},
+		{"axi", UINT_MAX,},
+		{"vps0", UINT_MAX,},
+		{"vps1", UINT_MAX,},
+	},
+	.ctrl_ops		= &tegra_pva_ctrl_ops,
+	.devfs_name_family	= "pva",
+	.class			= NV_PVA0_CLASS_ID,
+	.autosuspend_delay      = 500,
+	.finalize_poweron	= pva_finalize_poweron,
+	.prepare_poweroff	= pva_prepare_poweroff,
+	.firmware_name		= "nvhost_pva010.fw",
+	.resource_policy	= RESOURCE_PER_CHANNEL_INSTANCE,
+	.vm_regs		= {
+		{0x70000, true, 0},
+		{0x80000, false, 0},
+		{0x80000, false, 8}
+	},
+	.poweron_reset		= true,
+	.serialize		= true,
+	.get_reloc_phys_addr	= nvhost_t194_get_reloc_phys_addr,
+	.can_powergate		= true,
+};
+
+struct nvhost_device_data t23x_pva0_info = {
+	.version = PVA_HW_GEN2,
+	.num_channels		= 1,
+	.clocks			= {
+		{"axi", UINT_MAX,},
+		{"vps0", UINT_MAX,},
+		{"vps1", UINT_MAX,},
+	},
+	.ctrl_ops		= &tegra_pva_ctrl_ops,
+	.devfs_name_family	= "pva",
+	.class			= NV_PVA0_CLASS_ID,
+	.autosuspend_delay      = 500,
+	.finalize_poweron	= pva_finalize_poweron,
+	.prepare_poweroff	= pva_prepare_poweroff,
+	.firmware_name		= "nvhost_pva020.fw",
+	.resource_policy	= RESOURCE_PER_CHANNEL_INSTANCE,
+	.vm_regs		= {
+		{0x240000, false, 0},
+		{0x240004, false, 0},
+		{0x240008, false, 0},
+		{0x24000c, false, 0},
+		{0x240010, false, 0},
+		{0x240014, false, 0},
+		{0x240018, false, 0},
+		{0x24001c, false, 0},
+		{0x240020, false, 0},
+		{0x240020, false, 8},
+		{0x240020, false, 16},
+		{0x240024, false, 0},
+		{0x240024, false, 8}
+	},
+	.poweron_reset		= true,
+	.serialize		= true,
+	.get_reloc_phys_addr	= nvhost_t23x_get_reloc_phys_addr,
+	.can_powergate		= true,
+};
+
+/* Map PVA-A and PVA-B to respective configuration items in nvhost */
+static struct of_device_id tegra_pva_of_match[] = {
+	{
+		.name = "pva0",
+		.compatible = "nvidia,tegra194-pva",
+		.data = (struct nvhost_device_data *)&t19_pva0_info },
+	{
+		.name = "pva1",
+		.compatible = "nvidia,tegra194-pva",
+		.data = (struct nvhost_device_data *)&t19_pva1_info },
+	{
+		.name = "pva0",
+		.compatible = "nvidia,tegra234-pva",
+		.data = (struct nvhost_device_data *)&t23x_pva0_info },
+	{
+		.name = "pva0",
+		.compatible = "nvidia,tegra234-pva-hv",
+		.data = (struct nvhost_device_data *)&t23x_pva0_info },
+	{ },
+};
+
+MODULE_DEVICE_TABLE(of, tegra_pva_of_match);
+
+#define EVP_REG_NUM 8
+static u32 pva_get_evp_reg(u32 index)
+{
+	u32 evp_reg[EVP_REG_NUM] = {
+		evp_reset_addr_r(),
+		evp_undef_addr_r(),
+		evp_swi_addr_r(),
+		evp_prefetch_abort_addr_r(),
+		evp_data_abort_addr_r(),
+		evp_rsvd_addr_r(),
+		evp_irq_addr_r(),
+		evp_fiq_addr_r()
+	};
+
+	return evp_reg[index];
+}
+
+static u32 evp_reg_val[EVP_REG_NUM] = {
+	EVP_RESET_VECTOR,
+	EVP_UNDEFINED_INSTRUCTION_VECTOR,
+	EVP_SVC_VECTOR,
+	EVP_PREFETCH_ABORT_VECTOR,
+	EVP_DATA_ABORT_VECTOR,
+	EVP_RESERVED_VECTOR,
+	EVP_IRQ_VECTOR,
+	EVP_FIQ_VECTOR
+};
+
+/**
+ * Allocate and set a circular array for FW to provide status info about
+ * completed tasks from all the PVA R5 queues.
+ * To avoid possible overwrite of info, the size of circular array needs to be
+ * sufficient to hold the status info for maximum allowed number of tasks
+ * across all PVA R5 queues at any time.
+ * PVA R5 FW shall fill task status info at incremental positions in the array
+ * while PVA KMD shall read the task status info at incremental positions from
+ * the array.
+ * Both PVA R5 FW and PVA KMD shall independently maintain an internal index
+ * to dictate the current write position and read position respectively.
+ */
+static int pva_alloc_task_status_buffer(struct pva *pva)
+{
+	size_t min_size = 0U;
+
+	/* Determine worst case size required for circular array based on
+	 * maximum allowed per PVA engine and maximum allowed number of task
+	 * submissions per PVA queue at any time.
+	 */
+	min_size = MAX_PVA_TASK_COUNT * sizeof(struct pva_task_error_s);
+
+	pva->priv_circular_array.size = ALIGN(min_size + 64, 64);
+
+	pva->priv_circular_array.va =
+		dma_alloc_coherent(&pva->aux_pdev->dev,
+				   pva->priv_circular_array.size,
+				   &pva->priv_circular_array.pa, GFP_KERNEL);
+
+	if (pva->priv_circular_array.va == NULL) {
+		pr_err("pva: failed to alloc mem for task status info");
+		return -ENOMEM;
+	}
+
+	INIT_WORK(&pva->task_update_work, pva_task_update);
+
+	atomic_set(&pva->n_pending_tasks, 0);
+	pva->task_status_workqueue =
+		create_workqueue("pva_task_status_workqueue");
+	return 0;
+}
+
+static void pva_reset_task_status_buffer(struct pva *pva)
+{
+	flush_workqueue(pva->task_status_workqueue);
+	WARN_ON(atomic_read(&pva->n_pending_tasks) != 0);
+	atomic_set(&pva->n_pending_tasks, 0);
+	pva->circular_array_rd_pos = 0U;
+	pva->circular_array_wr_pos = 0U;
+}
+
+static void pva_free_task_status_buffer(struct pva *pva)
+{
+	flush_workqueue(pva->task_status_workqueue);
+	destroy_workqueue(pva->task_status_workqueue);
+	dma_free_coherent(&pva->aux_pdev->dev, pva->priv_circular_array.size,
+			  pva->priv_circular_array.va,
+			  pva->priv_circular_array.pa);
+}
+
+static int pva_init_fw(struct platform_device *pdev)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	struct pva_fw *fw_info = &pva->fw_info;
+	struct pva_dma_alloc_info *priv1_buffer;
+	struct pva_dma_alloc_info *priv2_buffer;
+	u32 *ucode_ptr;
+	int err = 0;
+	u64 ucode_useg_addr;
+	u32 sema_value = 0;
+	u32 dram_base;
+	uint64_t useg_addr;
+	u32 i;
+
+	nvpva_dbg_fn(pva, "");
+
+	priv1_buffer = &fw_info->priv1_buffer;
+	priv2_buffer = &fw_info->priv2_buffer;
+	ucode_ptr = priv1_buffer->va;
+
+	/* Set the Ucode Header address for R5 */
+	/* Program user seg subtracting the offset */
+	ucode_useg_addr = 0;
+	host1x_writel(pdev, cfg_r5user_lsegreg_r(pva->version),
+		      PVA_LOW32(ucode_useg_addr));
+	host1x_writel(pdev, cfg_r5user_usegreg_r(pva->version),
+		      PVA_EXTRACT64(ucode_useg_addr, 39, 32, u32));
+
+	/* Program the extra memory to be used by R5 */
+	ucode_useg_addr = priv2_buffer->pa - fw_info->priv2_reg_offset;
+	host1x_writel(pdev, cfg_priv_ar2_start_r(pva->version),
+		      fw_info->priv2_reg_offset);
+	host1x_writel(pdev, cfg_priv_ar2_end_r(pva->version),
+		      fw_info->priv2_reg_offset + priv2_buffer->size);
+	host1x_writel(pdev, cfg_priv_ar2_lsegreg_r(pva->version),
+		      PVA_LOW32(ucode_useg_addr));
+	host1x_writel(pdev, cfg_priv_ar2_usegreg_r(pva->version),
+		      PVA_EXTRACT64(ucode_useg_addr, 39, 32, u32));
+
+	/* Write EVP registers */
+	for (i = 0; i < EVP_REG_NUM; i++)
+		host1x_writel(pdev, pva_get_evp_reg(i), evp_reg_val[i]);
+
+	host1x_writel(pdev,
+		      cfg_priv_ar1_start_r(pva->version),
+		      FW_CODE_DATA_START_ADDR);
+	host1x_writel(pdev,
+		      cfg_priv_ar1_end_r(pva->version),
+		      FW_CODE_DATA_END_ADDR);
+	useg_addr = priv1_buffer->pa - FW_CODE_DATA_START_ADDR;
+	host1x_writel(pdev,
+		      cfg_priv_ar1_lsegreg_r(pva->version),
+		      PVA_LOW32(useg_addr));
+	host1x_writel(pdev,
+		      cfg_priv_ar1_usegreg_r(pva->version),
+		      PVA_EXTRACT64((useg_addr), 39, 32, u32));
+
+	/* Indicate the OS is waiting for PVA ready Interrupt */
+	pva->cmd_status[PVA_MAILBOX_INDEX] = PVA_CMD_STATUS_WFI;
+
+	if (pva->r5_dbg_wait) {
+		sema_value = PVA_WAIT_DEBUG;
+		pva->timeout_enabled = false;
+	}
+
+	if (pva->slcg_disable)
+		sema_value |= PVA_CG_DISABLE;
+
+	if (pva->vmem_war_disable)
+		sema_value |= PVA_VMEM_RD_WAR_DISABLE;
+
+	sema_value |= (PVA_BOOT_INT | PVA_TEST_WAIT | PVA_VMEM_MBX_WAR_ENABLE);
+	host1x_writel(pdev, hsp_ss0_set_r(), sema_value);
+
+	if (pva->version == PVA_HW_GEN1) {
+		host1x_writel(pdev, hsp_ss2_set_r(), 0xFFFFFFFF);
+		host1x_writel(pdev, hsp_ss3_set_r(), 0xFFFFFFFF);
+	} else {
+		if (pva->syncpts.syncpt_start_iova_r > 0xFBFFFFFF) {
+			dev_err(&pdev->dev,
+				"rd sema base greater than 32 bit ");
+			err = -EINVAL;
+			goto out;
+		}
+
+		sema_value = (u32)pva->syncpts.syncpt_start_iova_r;
+		if (iommu_get_domain_for_dev(&pdev->dev))
+			dram_base = DRAM_PVA_IOVA_START_ADDRESS;
+		else
+			dram_base = DRAM_PVA_NO_IOMMU_START_ADDRESS;
+
+		if (sema_value < dram_base) {
+			dev_err(&pdev->dev,
+				"rd sema base less than dram base");
+			err = -EINVAL;
+			goto out;
+		}
+
+		sema_value -= dram_base;
+
+		host1x_writel(pdev, hsp_ss2_clr_r(), 0xFFFFFFFF);
+		host1x_writel(pdev, hsp_ss2_set_r(), sema_value);
+
+		if (pva->syncpts.syncpt_start_iova_rw > 0xFFF7FFFF) {
+			dev_err(&pdev->dev,
+				"rw sema base greater than 32 bit ");
+			err = -EINVAL;
+			goto out;
+		}
+
+		sema_value = (u32)pva->syncpts.syncpt_start_iova_rw;
+		if (sema_value < dram_base) {
+			dev_err(&pdev->dev,
+				"rw sema base less than dram base");
+			err = -EINVAL;
+			goto out;
+		}
+
+		sema_value -= dram_base;
+
+		host1x_writel(pdev, hsp_ss3_clr_r(), 0xFFFFFFFF);
+		host1x_writel(pdev, hsp_ss3_set_r(), sema_value);
+	}
+
+	/* Take R5 out of reset */
+	host1x_writel(pdev, proc_cpuhalt_r(),
+		      proc_cpuhalt_ncpuhalt_f(proc_cpuhalt_ncpuhalt_done_v()));
+
+	nvpva_dbg_fn(pva, "Waiting for PVA to be READY");
+
+	/* Wait PVA to report itself as ready */
+	err = pva_mailbox_wait_event(pva, 60000);
+	if (err) {
+		dev_err(&pdev->dev, "mbox timedout boot sema=%x\n",
+			(host1x_readl(pdev, hsp_ss0_state_r())));
+		goto wait_timeout;
+	}
+
+	pva->cmd_status[PVA_MAILBOX_INDEX] = PVA_CMD_STATUS_INVALID;
+
+	nvpva_dbg_fn(pva, "PVA boot returned: %d", err);
+
+	pva_reset_task_status_buffer(pva);
+	(void)memset(pva->priv_circular_array.va, 0,
+		     pva->priv_circular_array.size);
+wait_timeout:
+out:
+	return err;
+}
+
+static int pva_free_fw(struct platform_device *pdev, struct pva *pva)
+{
+	struct pva_fw *fw_info = &pva->fw_info;
+
+	if (pva->boot_from_file) {
+		if (pva->priv1_dma.va)
+			dma_free_coherent(&pva->aux_pdev->dev, pva->priv1_dma.size,
+					  pva->priv1_dma.va, pva->priv1_dma.pa);
+	} else {
+		if (pva->map_co_needed && (pva->priv1_dma.pa != 0)) {
+			nvpva_unmap_region(&pdev->dev,
+					   pva->priv1_dma.pa,
+					   pva->co->size,
+					   DMA_BIDIRECTIONAL);
+		}
+
+		pva->co->base_pa = 0;
+		pva->co->base_va = 0;
+	}
+
+	pva->priv1_dma.pa = 0;
+	if (pva->priv2_dma.va) {
+		dma_free_coherent(&pva->aux_pdev->dev, pva->priv2_dma.size,
+				  pva->priv2_dma.va, pva->priv2_dma.pa);
+		pva->priv2_dma.va = 0;
+		pva->priv2_dma.pa = 0;
+	}
+
+	memset(fw_info, 0, sizeof(struct pva_fw));
+
+	return 0;
+}
+
+int nvpva_request_firmware(struct platform_device *pdev, const char *fw_name,
+			   const struct firmware **ucode_fw)
+{
+	int err = 0;
+
+#if IS_ENABLED(CONFIG_TEGRA_GRHOST)
+	*ucode_fw = nvhost_client_request_firmware(pdev, fw_name, true);
+	if (*ucode_fw == NULL)
+		err = -ENOENT;
+#else
+	err = request_firmware(ucode_fw, fw_name, &pdev->dev);
+#endif
+	return err;
+}
+
+static int
+pva_read_ucode_file(struct platform_device *pdev,
+		    const char *fw_name,
+		    struct pva *pva)
+{
+	int err = 0;
+	struct pva_fw *fw_info = &pva->fw_info;
+	int w;
+	u32 *ucode_ptr;
+	const struct firmware *ucode_fw = NULL;
+
+	err = nvpva_request_firmware(pva->pdev, fw_name, &ucode_fw);
+	if (err != 0) {
+		dev_err(&pdev->dev, "Failed to load the %s firmware\n",
+			fw_name);
+		return err;
+	}
+
+	fw_info->priv1_buffer.size = ucode_fw->size;
+	pva->priv1_dma.size = FW_CODE_DATA_END_ADDR - FW_CODE_DATA_START_ADDR;
+	pva->priv1_dma.size = ALIGN(pva->priv1_dma.size + SZ_4K, SZ_4K);
+	/* Allocate memory to R5 for app code, data or to log information */
+	pva->priv1_dma.va = dma_alloc_coherent(&pdev->dev, pva->priv1_dma.size,
+					       &pva->priv1_dma.pa, GFP_KERNEL);
+	if (!pva->priv1_dma.va) {
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	fw_info->priv1_buffer.va = pva->priv1_dma.va;
+	fw_info->priv1_buffer.pa = pva->priv1_dma.pa;
+	ucode_ptr = fw_info->priv1_buffer.va;
+
+	/* copy the whole thing taking into account endianness */
+	for (w = 0; w < ucode_fw->size / sizeof(u32); w++)
+		ucode_ptr[w] = le32_to_cpu(((__le32 *)ucode_fw->data)[w]);
+clean_up:
+	release_firmware(ucode_fw);
+
+	return err;
+}
+
+static int pva_read_ucode_co(struct platform_device *pdev,
+			     struct pva *pva)
+{
+	int err = 0;
+	struct pva_fw *fw_info = &pva->fw_info;
+
+	if (pva->map_co_needed) {
+		err = nvpva_map_region(&pdev->dev,
+				       pva->co->base,
+				       pva->co->size,
+				       &pva->priv1_dma.pa,
+				       DMA_BIDIRECTIONAL);
+		if (err) {
+			err = -ENOMEM;
+			goto out;
+		}
+	} else {
+		pva->priv1_dma.pa = pva->co->base;
+		pva->priv1_dma.va = 0;
+	}
+
+	fw_info->priv1_buffer.va = pva->priv1_dma.va;
+	fw_info->priv1_buffer.pa = pva->priv1_dma.pa;
+	fw_info->priv1_buffer.size = pva->co->size;
+	pva->priv1_dma.size = pva->co->size;
+
+out:
+	return err;
+}
+
+static int pva_read_ucode(struct platform_device *pdev, const char *fw_name,
+			  struct pva *pva)
+{
+	int err = 0;
+	struct pva_fw *fw_info = &pva->fw_info;
+
+	if (pva->boot_from_file)
+		err = pva_read_ucode_file(pdev, fw_name, pva);
+	else
+		err = pva_read_ucode_co(pdev, pva);
+
+	nvpva_dbg_fn(pva, "co iova = %llx\n", pva->priv1_dma.pa);
+
+	fw_info->priv2_buffer.size = FW_DEBUG_DATA_TOTAL_SIZE;
+
+	/* Make sure the address is aligned to 4K */
+	pva->priv2_dma.size = ALIGN(fw_info->priv2_buffer.size, SZ_4K);
+
+	/* Allocate memory to R5 for app code, data or to log information */
+	pva->priv2_dma.va = dma_alloc_coherent(&pva->aux_pdev->dev, pva->priv2_dma.size,
+					       &pva->priv2_dma.pa, GFP_KERNEL);
+	if (!pva->priv2_dma.va) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	fw_info->priv2_buffer.pa = pva->priv2_dma.pa;
+	fw_info->priv2_buffer.va = pva->priv2_dma.va;
+	fw_info->priv2_reg_offset = FW_DEBUG_DATA_START_ADDR;
+
+	/* setup trace buffer */
+	fw_info->trace_buffer_size = FW_TRACE_BUFFER_SIZE;
+	pva->pva_trace.addr = fw_info->priv2_buffer.va;
+	pva->pva_trace.size = FW_TRACE_BUFFER_SIZE;
+	pva->pva_trace.offset = 0L;
+
+	/* setup FW debug log buffer */
+	pva->fw_debug_log.addr = fw_info->priv2_buffer.va +
+				 FW_TRACE_BUFFER_SIZE +
+				 FW_CODE_COVERAGE_BUFFER_SIZE;
+out:
+	return err;
+}
+
+static int pva_load_fw(struct platform_device *pdev, struct pva *pva)
+{
+	int err = 0;
+	struct nvhost_device_data *pdata = platform_get_drvdata(pva->pdev);
+
+	nvpva_dbg_fn(pva, "");
+
+	err = pva_read_ucode(pdev, pdata->firmware_name, pva);
+	if (err < 0)
+		goto load_fw_err;
+
+	return err;
+
+load_fw_err:
+	pva_free_fw(pdev, pva);
+
+	return err;
+}
+
+int pva_get_firmware_version(struct pva *pva, struct pva_version_info *info)
+{
+	uint32_t flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE;
+	struct pva_cmd_status_regs status;
+	struct pva_cmd_s cmd;
+	int err = 0;
+	u32 nregs;
+
+	nregs = pva_cmd_R5_version(&cmd, flags);
+
+	/* Submit request to PVA and wait for response */
+	err = pva_mailbox_send_cmd_sync(pva, &cmd, nregs, &status);
+	if (err < 0) {
+		nvpva_warn(&pva->pdev->dev,
+			    "mbox get firmware version cmd failed: %d\n", err);
+
+		return err;
+	}
+
+	info->pva_r5_version = status.status[PVA_CMD_STATUS4_INDEX];
+	info->pva_compat_version = status.status[PVA_CMD_STATUS5_INDEX];
+	info->pva_revision = status.status[PVA_CMD_STATUS6_INDEX];
+	info->pva_built_on = status.status[PVA_CMD_STATUS7_INDEX];
+
+	return err;
+}
+
+int pva_boot_kpi(struct pva *pva, u64 *r5_boot_time)
+{
+	uint32_t flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE;
+	struct pva_cmd_status_regs status;
+	struct pva_cmd_s cmd;
+	int err = 0;
+	u32 nregs;
+
+	nregs = pva_cmd_pva_uptime(&cmd, 255, flags);
+
+	/* Submit request to PVA and wait for response */
+	err = pva_mailbox_send_cmd_sync(pva, &cmd, nregs, &status);
+	if (err < 0) {
+		nvpva_warn(&pva->pdev->dev, "mbox get uptime cmd failed: %d\n",
+			    err);
+		return err;
+	}
+	*r5_boot_time = status.status[PVA_CMD_STATUS7_INDEX];
+	*r5_boot_time = ((*r5_boot_time) << 32);
+	*r5_boot_time = (*r5_boot_time) | status.status[PVA_CMD_STATUS6_INDEX];
+
+	return err;
+}
+
+int pva_set_log_level(struct pva *pva, u32 log_level, bool mailbox_locked)
+{
+	uint32_t flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE;
+	struct pva_cmd_status_regs status;
+	struct pva_cmd_s cmd;
+	int err = 0;
+	u32 nregs;
+
+	nregs = pva_cmd_set_logging_level(&cmd, log_level, flags);
+
+	if (mailbox_locked)
+		pva_mailbox_send_cmd_sync_locked(pva, &cmd, nregs, &status);
+	else
+		pva_mailbox_send_cmd_sync(pva, &cmd, nregs, &status);
+
+	if (err < 0)
+		nvpva_warn(&pva->pdev->dev, "mbox set log level failed: %d\n",
+			    err);
+
+	return err;
+}
+
+u32 nvpva_get_id_idx(struct pva *dev, struct platform_device *pdev)
+{
+	s32 sid;
+	u32 i;
+
+	if (pdev == NULL)
+		return 0;
+
+	sid = nvpva_get_device_hwid(pdev, 0);
+	if (sid < 0)
+		return UINT_MAX;
+
+	for (i = 0; i < dev->sid_count; i++)
+		if (dev->sids[i] == sid)
+			return i;
+
+	return UINT_MAX;
+}
+
+int nvpva_get_device_hwid(struct platform_device *pdev,
+					   unsigned int id)
+{
+	struct device *dev = &pdev->dev;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0)
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+#else
+	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+#endif
+
+	if (!fwspec)
+		return -EINVAL;
+
+	if (id >= fwspec->num_ids)
+		return -EINVAL;
+
+	return fwspec->ids[id] & 0xffff;
+}
+
+static int nvpva_write_hwid(struct platform_device *pdev)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	int i;
+	u32 *id_idx;
+	u32 *reg_idx;
+	int *streamids = pva->sids;
+	u32 reg_array[16] = {0};
+
+	if (pva->version == PVA_HW_GEN1) {
+		id_idx = vm_regs_sid_idx_t19x;
+		reg_idx = vm_regs_reg_idx_t19x;
+	} else {
+		id_idx = vm_regs_sid_idx_t234;
+		reg_idx = vm_regs_reg_idx_t234;
+	}
+
+	/* Go through the StreamIDs and assemble register values */
+	for (i = 0; i < ARRAY_SIZE(pdata->vm_regs); i++) {
+		u64 addr = pdata->vm_regs[i].addr;
+		u32 shift = pdata->vm_regs[i].shift;
+		u32 val;
+
+		/* Break if this was the last StreamID */
+		if (!addr)
+			break;
+
+		/* Update the StreamID value */
+		val = ((streamids[id_idx[i]] & 0x000000FF) << shift);
+		reg_array[reg_idx[i]] |= val;
+	}
+
+	/*write register values */
+	for (i = 0; i < ARRAY_SIZE(pdata->vm_regs); i++) {
+		u64 addr = pdata->vm_regs[i].addr;
+		u32 val;
+
+		/* Break if this was the last StreamID */
+		if (!addr)
+			break;
+
+		val = reg_array[reg_idx[i]];
+		nvpva_dbg_fn(pva, "i= %d, reg_idx[i] =  %d, val = %d\n",
+			     i, reg_idx[i], val);
+		host1x_writel(pdev, addr, val);
+	}
+
+	return 0;
+}
+
+int pva_finalize_poweron(struct platform_device *pdev)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	int err = 0;
+	int i;
+	u64 timestamp;
+	u64 timestamp2;
+
+	timestamp = nvpva_get_tsc_stamp();
+
+	nvpva_dbg_fn(pva, "");
+	if (!pva->boot_from_file) {
+		nvpva_dbg_fn(pva, "boot from co");
+		pva->co = pva_fw_co_get_info(pva);
+
+		if (pva->co == NULL) {
+			nvpva_dbg_fn(pva, "failed to get carveout");
+			err = -ENOMEM;
+			goto err_poweron;
+		}
+
+		nvpva_dbg_fn(pva, "CO base = %llx, CO size = %llu\n",
+			  (u64)pva->co->base, (u64)pva->co->size);
+	}
+
+	/* Enable LIC_INTERRUPT line for HSP1, H1X and WDT */
+	if (pva->version == PVA_HW_GEN1) {
+		host1x_writel(pva->pdev, sec_lic_intr_enable_r(pva->version),
+			      sec_lic_intr_enable_hsp_f(SEC_LIC_INTR_HSP1) |
+			      sec_lic_intr_enable_h1x_f(SEC_LIC_INTR_H1X_ALL_19) |
+			      sec_lic_intr_enable_wdt_f(SEC_LIC_INTR_WDT));
+	} else {
+		host1x_writel(pva->pdev, sec_lic_intr_enable_r(pva->version),
+			      sec_lic_intr_enable_hsp_f(SEC_LIC_INTR_HSP1) |
+			      sec_lic_intr_enable_h1x_f(SEC_LIC_INTR_H1X_ALL_23) |
+			      sec_lic_intr_enable_wdt_f(SEC_LIC_INTR_WDT));
+
+	}
+
+	nvpva_write_hwid(pdev);
+	if (!pva->boot_from_file)
+		err = pva_load_fw(pdev, pva);
+	else
+		err = pva_load_fw(pva->aux_pdev, pva);
+
+	if (err < 0) {
+		nvpva_err(&pdev->dev, " pva fw failed to load\n");
+		goto err_poweron;
+	}
+
+	for (i = 0; i < pva->version_config->irq_count; i++)
+		enable_irq(pva->irq[i]);
+
+	err = pva_init_fw(pdev);
+	if (err < 0) {
+		nvpva_err(&pdev->dev, " pva fw failed to init\n");
+		goto err_poweron;
+	}
+
+	timestamp2 = nvpva_get_tsc_stamp() - timestamp;
+
+	pva_set_log_level(pva, pva->log_level, true);
+	pva->booted = true;
+
+	timestamp = nvpva_get_tsc_stamp() - timestamp;
+
+	nvpva_dbg_prof(pva, "Power on took %lld us, without log level%lld\n",
+		       (32 * timestamp)/1000, (32 * timestamp2)/1000);
+
+	return err;
+
+err_poweron:
+	for (i = 0; i < pva->version_config->irq_count; i++)
+		disable_irq(pva->irq[i]);
+	return err;
+}
+
+void save_fw_debug_log(struct pva *pva)
+{
+	if (pva->fw_debug_log.saved_log != NULL &&
+	    pva->fw_debug_log.addr != NULL) {
+		mutex_lock(&pva->fw_debug_log.saved_log_lock);
+		memcpy(pva->fw_debug_log.saved_log, pva->fw_debug_log.addr,
+		       pva->fw_debug_log.size);
+		mutex_unlock(&pva->fw_debug_log.saved_log_lock);
+	}
+}
+
+int pva_prepare_poweroff(struct platform_device *pdev)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	int i;
+
+	/*
+	 * Disable IRQs. Interrupt handler won't be under execution after the
+	 * call returns.
+	 */
+	for (i = 0; i < pva->version_config->irq_count; i++)
+		disable_irq(pva->irq[i]);
+
+	/* disable error reporting to HSM*/
+	pva_disable_ec_err_reporting(pva);
+
+	/* Put PVA to reset to ensure that the firmware doesn't get accessed */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	reset_control_acquire(pdata->reset_control);
+#endif
+	reset_control_assert(pdata->reset_control);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	reset_control_release(pdata->reset_control);
+#endif
+	save_fw_debug_log(pva);
+	pva->booted = false;
+	pva_free_fw(pdev, pva);
+
+	return 0;
+}
+
+#ifdef CONFIG_TEGRA_SOC_HWPM
+int pva_hwpm_ip_pm(void *ip_dev, bool disable)
+{
+	int err = 0;
+	struct platform_device *dev = (struct platform_device *)ip_dev;
+
+	struct nvhost_device_data *pdata = platform_get_drvdata(dev);
+	struct pva *pva = pdata->private_data;
+
+	nvpva_dbg_info(pva, "ip power management %s",
+			disable ? "disable" : "enable");
+
+	if (disable) {
+		err = nvhost_module_busy(ip_dev);
+		if (err < 0)
+			dev_err(&dev->dev, "nvhost_module_busy failed");
+	} else {
+		nvhost_module_idle(ip_dev);
+	}
+
+	return err;
+}
+
+int pva_hwpm_ip_reg_op(void *ip_dev, enum tegra_soc_hwpm_ip_reg_op reg_op,
+	u32 inst_element_index, u64 reg_offset, u32 *reg_data)
+{
+	struct platform_device *dev = (struct platform_device *)ip_dev;
+	struct nvhost_device_data *pdata = platform_get_drvdata(dev);
+	struct pva *pva = pdata->private_data;
+
+	if (reg_offset > UINT_MAX)
+		return -EINVAL;
+
+	nvpva_dbg_info(pva, "reg_op %d reg_offset %llu", reg_op, reg_offset);
+
+	if (reg_op == TEGRA_SOC_HWPM_IP_REG_OP_READ)
+		*reg_data = host1x_readl(dev,
+			(hwpm_get_offset() + (unsigned int)reg_offset));
+	else if (reg_op == TEGRA_SOC_HWPM_IP_REG_OP_WRITE)
+		host1x_writel(dev,
+			(hwpm_get_offset() + (unsigned int)reg_offset),
+			*reg_data);
+
+	return 0;
+}
+#endif
+
+#if !IS_ENABLED(CONFIG_TEGRA_GRHOST)
+static ssize_t clk_cap_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct nvhost_device_data *pdata =
+		container_of(kobj, struct nvhost_device_data, clk_cap_kobj);
+	/* i is indeed 'index' here after type conversion */
+	int ret, i = attr - pdata->clk_cap_attrs;
+	struct clk_bulk_data *clks = &pdata->clks[i];
+	struct clk *clk = clks->clk;
+	unsigned long freq_cap;
+	long freq_cap_signed;
+
+	ret = kstrtoul(buf, 0, &freq_cap);
+	if (ret)
+		return -EINVAL;
+	/* Remove previous freq cap to get correct rounted rate for new cap */
+	ret = clk_set_max_rate(clk, UINT_MAX);
+	if (ret < 0)
+		return ret;
+
+	freq_cap_signed = clk_round_rate(clk, freq_cap);
+	if (freq_cap_signed < 0)
+		return -EINVAL;
+	freq_cap = (unsigned long)freq_cap_signed;
+	/* Apply new freq cap */
+	ret = clk_set_max_rate(clk, freq_cap);
+	if (ret < 0)
+		return ret;
+
+	/* Update the clock rate */
+	clk_set_rate(clks->clk, freq_cap);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static ssize_t clk_cap_show(struct kobject *kobj,
+	struct kobj_attribute *attr, char *buf)
+{
+	struct nvhost_device_data *pdata =
+		container_of(kobj, struct nvhost_device_data, clk_cap_kobj);
+	/* i is indeed 'index' here after type conversion */
+	int i = attr - pdata->clk_cap_attrs;
+	struct clk_bulk_data *clks = &pdata->clks[i];
+	struct clk *clk = clks->clk;
+	long max_rate;
+
+	max_rate = clk_round_rate(clk, UINT_MAX);
+	if (max_rate < 0)
+		return max_rate;
+
+	return snprintf(buf, PAGE_SIZE, "%ld\n", max_rate);
+}
+
+static struct kobj_type nvpva_kobj_ktype = {
+	.sysfs_ops  = &kobj_sysfs_ops,
+};
+
+#endif
+
+static int pva_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct nvhost_device_data *pdata;
+	const struct of_device_id *match;
+	struct pva *pva;
+	int err = 0;
+	size_t i;
+
+#ifdef CONFIG_TEGRA_SOC_HWPM
+	u32 offset;
+#endif
+
+#if !IS_ENABLED(CONFIG_TEGRA_GRHOST)
+	struct kobj_attribute *attr = NULL;
+	int j = 0;
+	struct clk_bulk_data *clks;
+	struct clk *c;
+#endif
+
+	match = of_match_device(tegra_pva_of_match, dev);
+	if (!match) {
+		dev_err(dev, "no match for pva dev\n");
+		err = -ENODATA;
+		goto err_get_pdata;
+	}
+
+	pdata = (struct nvhost_device_data *)match->data;
+
+	WARN_ON(!pdata);
+	if (!pdata) {
+		dev_info(dev, "no platform data\n");
+		err = -ENODATA;
+		goto err_get_pdata;
+	}
+#if !IS_ENABLED(CONFIG_TEGRA_GRHOST)
+	of_platform_default_populate(dev->of_node, NULL, dev);
+#endif
+
+	if ((pdata->version != PVA_HW_GEN1)
+	     && !is_cntxt_initialized()) {
+		dev_warn(&pdev->dev,
+			 "nvpva cntxt was not initialized, deferring probe.");
+		return -EPROBE_DEFER;
+	}
+
+	if (pdata->version == PVA_HW_GEN1 &&
+	    tegra_get_sku_id() == 0x9E) {
+		dev_err(dev, "PVA IP is disabled in SKU\n");
+		err = -ENODEV;
+		goto err_no_ip;
+	}
+
+	if (pdata->version == PVA_HW_GEN1 &&
+	    tegra_get_sku_id() == 0x9F && pdata->class == NV_PVA1_CLASS_ID) {
+		dev_err(dev, "PVA1 IP is disabled in SKU\n");
+		err = -ENODEV;
+		goto err_no_ip;
+	}
+
+	pva = devm_kzalloc(dev, sizeof(*pva), GFP_KERNEL);
+	if (!pva) {
+		err = -ENOMEM;
+		goto err_alloc_pva;
+	}
+
+	/* Initialize PVA private data */
+	if (pdata->version == PVA_HW_GEN2) {
+		pva->version = PVA_HW_GEN2;
+		pdata->firmware_name = "nvpva_020.fw";
+		pdata->firmware_not_in_subdir = true;
+		pva->submit_cmd_mode = PVA_SUBMIT_MODE_MMIO_CCQ;
+		pva->version_config = &pva_t23x_config;
+	} else {
+		pva->version = PVA_HW_GEN1;
+		pdata->firmware_name = "nvpva_010.fw";
+		pdata->firmware_not_in_subdir = true;
+		pva->submit_cmd_mode = PVA_SUBMIT_MODE_MAILBOX;
+		pva->version_config = &pva_t19x_config;
+	}
+
+	pva->pdev = pdev;
+
+	/* Enable powergating and timeout only on silicon */
+	if (!tegra_platform_is_silicon()) {
+		pdata->can_powergate = false;
+		pva->timeout_enabled = false;
+	} else {
+		pva->timeout_enabled = true;
+	}
+
+	/* Initialize nvhost specific data */
+	pdata->pdev = pdev;
+	mutex_init(&pdata->lock);
+	pdata->private_data = pva;
+	platform_set_drvdata(pdev, pdata);
+	mutex_init(&pva->mailbox_mutex);
+	mutex_init(&pva->ccq_mutex);
+	pva->submit_task_mode = PVA_SUBMIT_MODE_MMIO_CCQ;
+	pva->slcg_disable = 0;
+	pva->vmem_war_disable = 0;
+	pva->vpu_printf_enabled = true;
+	pva->vpu_debug_enabled = true;
+	pva->driver_log_mask = NVPVA_DEFAULT_DBG_MASK;
+	pva->profiling_level = 0;
+	pva->stats_enabled = false;
+	memset(&pva->vpu_util_info, 0, sizeof(pva->vpu_util_info));
+	pva->syncpts.syncpts_mapped_r = false;
+	pva->syncpts.syncpts_mapped_rw = false;
+	nvpva_dbg_fn(pva, "match. compatible = %s", match->compatible);
+	if (is_tegra_hypervisor_mode())
+		pva->map_co_needed = false;
+	else
+		pva->map_co_needed = true;
+
+#ifdef CONFIG_PVA_CO_DISABLED
+	pva->boot_from_file = true;
+#else
+	if (pdata->version == PVA_HW_GEN1)
+		pva->boot_from_file = true;
+	else
+		pva->boot_from_file = false;
+#endif
+
+#ifdef __linux__
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
+	if (tegra_chip_get_revision() != TEGRA194_REVISION_A01)
+#else
+	if (pdata->version != PVA_HW_GEN1)
+#endif
+		pva->vmem_war_disable = 1;
+#endif
+#endif
+
+	/* Map MMIO range to kernel space */
+	err = nvhost_client_device_get_resources(pdev);
+	if (err < 0) {
+		dev_err(&pva->pdev->dev, "nvhost_client_device_get_resources failed\n");
+		goto err_get_resources;
+	}
+
+	/* Get clocks */
+	err = nvhost_module_init(pdev);
+	if (err < 0) {
+		dev_err(&pva->pdev->dev, "nvhost_module_init failed\n");
+		goto err_module_init;
+	}
+
+	/*
+	 * Add this to nvhost device list, initialize scaling,
+	 * setup memory management for the device, create dev nodes
+	 */
+	err = nvhost_client_device_init(pdev);
+	if (err < 0) {
+		dev_err(&pva->pdev->dev, "nvhost_client_device_init failed\n");
+		goto err_client_device_init;
+	}
+
+	if (pdata->version != PVA_HW_GEN1) {
+		pva->aux_pdev =
+			nvpva_iommu_context_dev_allocate(aux_dev_name,
+							 aux_dev_name_len,
+							 false);
+		if (pva->aux_pdev == NULL) {
+			dev_err(&pva->pdev->dev,
+				"failed to allocate aux device");
+			goto err_context_alloc;
+		}
+	} else {
+		pva->aux_pdev = pva->pdev;
+	}
+
+	pva->pool = nvpva_queue_init(pdev,  pva->aux_pdev, &pva_queue_ops,
+				     MAX_PVA_QUEUE_COUNT);
+	if (IS_ERR(pva->pool)) {
+		err = PTR_ERR(pva->pool);
+		goto err_queue_init;
+	}
+
+	err = pva_alloc_task_status_buffer(pva);
+	if (err) {
+		dev_err(&pva->pdev->dev, "failed to init task status buffer");
+		goto err_status_init;
+	}
+
+	err = nvpva_client_context_init(pva);
+	if (err) {
+		dev_err(&pva->pdev->dev, "failed to init client context");
+		goto err_client_ctx_init;
+	}
+
+	err = pva_register_isr(pdev);
+	if (err < 0) {
+		dev_err(&pva->pdev->dev, "failed to register isr");
+		goto err_isr_init;
+	}
+
+	for (i = 0; i < pva->version_config->irq_count; i++)
+		init_waitqueue_head(&pva->cmd_waitqueue[i]);
+
+	pva_abort_init(pva);
+
+	err = nvhost_syncpt_unit_interface_init(pdev);
+	if (err)
+		goto err_mss_init;
+
+	err = nvpva_syncpt_unit_interface_init(pdev, pva->aux_pdev);
+	if (err)
+		goto err_syncpt_xface_init;
+
+	mutex_init(&pva->pva_auth.allow_list_lock);
+	mutex_init(&pva->pva_auth_sys.allow_list_lock);
+	pva->pva_auth.pva_auth_enable = true;
+	pva->pva_auth_sys.pva_auth_enable = true;
+
+#ifdef CONFIG_DEBUG_FS
+	pva_debugfs_init(pdev);
+#endif
+
+	pva->sid_count = 0;
+	err = nvpva_iommu_context_dev_get_sids(&pva->sids[1],
+					 &pva->sid_count,
+					 NVPVA_USER_VM_COUNT);
+	if (err)
+		goto err_iommu_ctxt_init;
+
+	pva->sids[0] = nvpva_get_device_hwid(pdev, 0);
+	if (pva->sids[0] < 0) {
+		err =  pva->sids[0];
+		goto err_iommu_ctxt_init;
+	}
+
+	++(pva->sid_count);
+
+#ifdef CONFIG_TEGRA_SOC_HWPM
+	offset = hwpm_get_offset();
+
+	if ((UINT_MAX - offset) < pdev->resource[0].start) {
+		err = -ENODEV;
+		goto err_mss_init;
+	}
+
+	nvpva_dbg_info(pva, "hwpm ip %s register", pdev->name);
+	pva->hwpm_ip_ops.ip_dev = (void *)pdev;
+	pva->hwpm_ip_ops.ip_base_address = (pdev->resource[0].start + offset);
+	pva->hwpm_ip_ops.resource_enum = TEGRA_SOC_HWPM_RESOURCE_PVA;
+	pva->hwpm_ip_ops.hwpm_ip_pm = &pva_hwpm_ip_pm;
+	pva->hwpm_ip_ops.hwpm_ip_reg_op = &pva_hwpm_ip_reg_op;
+	tegra_soc_hwpm_ip_register(&pva->hwpm_ip_ops);
+#endif
+
+#if !IS_ENABLED(CONFIG_TEGRA_GRHOST)
+	if (pdata->num_clks > 0) {
+		err = kobject_init_and_add(&pdata->clk_cap_kobj, &nvpva_kobj_ktype,
+				&pdev->dev.kobj, "%s", "clk_cap");
+		if (err) {
+			dev_err(dev, "Could not add dir 'clk_cap'\n");
+				goto err_iommu_ctxt_init;
+		}
+
+		pdata->clk_cap_attrs = devm_kcalloc(dev, pdata->num_clks,
+			sizeof(*attr), GFP_KERNEL);
+		if (!pdata->clk_cap_attrs)
+			goto err_cleanup_sysfs;
+
+		for (j = 0; j < pdata->num_clks; ++j) {
+			clks = &pdata->clks[j];
+			c = clks->clk;
+			if (!c)
+				continue;
+
+			attr = &pdata->clk_cap_attrs[j];
+			attr->attr.name = __clk_get_name(c);
+			/* octal permission is preferred nowadays */
+			attr->attr.mode = 0644;
+			attr->show = clk_cap_show;
+			attr->store = clk_cap_store;
+			sysfs_attr_init(&attr->attr);
+			if (sysfs_create_file(&pdata->clk_cap_kobj, &attr->attr)) {
+				dev_err(dev, "Could not create sysfs attribute %s\n",
+					__clk_get_name(c));
+				err = -EIO;
+				goto err_cleanup_sysfs;
+			}
+		}
+	}
+#endif
+
+	return 0;
+
+#if !IS_ENABLED(CONFIG_TEGRA_GRHOST)
+err_cleanup_sysfs:
+	/* kobj of nvpva_kobj_ktype cleans up sysfs entries automatically */
+	kobject_put(&pdata->clk_cap_kobj);
+#endif
+err_iommu_ctxt_init:
+	nvpva_syncpt_unit_interface_deinit(pdev, pva->aux_pdev);
+err_syncpt_xface_init:
+err_mss_init:
+err_isr_init:
+	nvpva_client_context_deinit(pva);
+err_client_ctx_init:
+	pva_free_task_status_buffer(pva);
+err_status_init:
+	nvpva_queue_deinit(pva->pool);
+err_queue_init:
+	if (pdata->version != PVA_HW_GEN1)
+		nvpva_iommu_context_dev_release(pva->aux_pdev);
+err_context_alloc:
+	nvhost_client_device_release(pdev);
+err_client_device_init:
+	nvhost_module_deinit(pdev);
+err_module_init:
+err_get_resources:
+	devm_kfree(dev, pva);
+err_alloc_pva:
+err_no_ip:
+err_get_pdata:
+
+	return err;
+}
+
+static int __exit pva_remove(struct platform_device *pdev)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	int i;
+
+#if !IS_ENABLED(CONFIG_TEGRA_GRHOST)
+	struct kobj_attribute *attr = NULL;
+
+	if (&pdata->clk_cap_kobj) {
+		for (i = 0; i < pdata->num_clks; i++) {
+			attr = &pdata->clk_cap_attrs[i];
+			sysfs_remove_file(&pdata->clk_cap_kobj, &attr->attr);
+		}
+
+		kobject_put(&pdata->clk_cap_kobj);
+	}
+#endif
+
+#ifdef CONFIG_TEGRA_SOC_HWPM
+	tegra_soc_hwpm_ip_unregister(&pva->hwpm_ip_ops);
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+	pva_debugfs_deinit(pva);
+#endif
+	if (pdata->version != PVA_HW_GEN1)
+		nvpva_iommu_context_dev_release(pva->aux_pdev);
+
+	pva_auth_allow_list_destroy(&pva->pva_auth_sys);
+	pva_auth_allow_list_destroy(&pva->pva_auth);
+	pva_free_task_status_buffer(pva);
+	nvpva_syncpt_unit_interface_deinit(pdev, pva->aux_pdev);
+	nvpva_client_context_deinit(pva);
+	nvpva_queue_deinit(pva->pool);
+	nvhost_client_device_release(pdev);
+	for (i = 0; i < pva->version_config->irq_count; i++)
+		free_irq(pva->irq[i], pva);
+
+	nvhost_module_deinit(pdev);
+	mutex_destroy(&pdata->lock);
+	mutex_destroy(&pva->mailbox_mutex);
+	mutex_destroy(&pva->ccq_mutex);
+	mutex_destroy(&pva->pva_auth.allow_list_lock);
+	mutex_destroy(&pva->pva_auth_sys.allow_list_lock);
+
+	return 0;
+}
+
+static struct platform_driver pva_driver = {
+	.probe = pva_probe,
+	.remove = __exit_p(pva_remove),
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "pva",
+#ifdef CONFIG_OF
+		.of_match_table = tegra_pva_of_match,
+#endif
+#ifdef CONFIG_PM
+		.pm = &nvhost_module_pm_ops,
+#endif
+	},
+};
+#if IS_ENABLED(CONFIG_TEGRA_GRHOST)
+static int __init nvpva_init(void)
+{
+	int err;
+
+	err = platform_driver_register(&nvpva_iommu_context_dev_driver);
+	if (err < 0)
+		return err;
+
+	err = platform_driver_register(&pva_driver);
+	if (err < 0)
+		platform_driver_unregister(&nvpva_iommu_context_dev_driver);
+
+	return err;
+}
+module_init(nvpva_init);
+static void __exit nvpva_exit(void)
+{
+	platform_driver_unregister(&pva_driver);
+	platform_driver_unregister(&nvpva_iommu_context_dev_driver);
+}
+module_exit(nvpva_exit);
+#else
+static struct host1x_driver host1x_nvpva_driver = {
+	.driver = {
+		.name = "host1x-nvpva",
+	},
+	.subdevs = tegra_pva_of_match,
+};
+static int __init nvpva_init(void)
+{
+	int err;
+
+	err = host1x_driver_register(&host1x_nvpva_driver);
+	if (err < 0)
+		goto out;
+
+	err = platform_driver_register(&nvpva_iommu_context_dev_driver);
+	if (err < 0)
+		goto ctx_failed;
+
+	err = platform_driver_register(&pva_driver);
+	if (err)
+		goto pva_failed;
+
+	return err;
+
+pva_failed:
+	platform_driver_unregister(&nvpva_iommu_context_dev_driver);
+ctx_failed:
+	host1x_driver_unregister(&host1x_nvpva_driver);
+out:
+	return err;
+}
+
+module_init(nvpva_init);
+static void __exit nvpva_exit(void)
+{
+	platform_driver_unregister(&pva_driver);
+	platform_driver_unregister(&nvpva_iommu_context_dev_driver);
+	host1x_driver_unregister(&host1x_nvpva_driver);
+}
+
+module_exit(nvpva_exit);
+#endif
+
+#if KERNEL_VERSION(5, 16, 0) <= LINUX_VERSION_CODE
+MODULE_IMPORT_NS(DMA_BUF);
+#endif
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/video/tegra/host/pva/pva.h b/drivers/video/tegra/host/pva/pva.h
new file mode 100644
index 00000000..7e55c8c7
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva.h
@@ -0,0 +1,615 @@
+/*
+ * drivers/video/tegra/host/pva/pva.h
+ *
+ * Tegra PVA header
+ *
+ * Copyright (c) 2016-2023, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVHOST_PVA_H__
+#define __NVHOST_PVA_H__
+
+#include <linux/firmware.h>
+#include <linux/mutex.h>
+#include <linux/version.h>
+#include <linux/workqueue.h>
+
+#include "nvpva_queue.h"
+#include "pva_regs.h"
+#include "pva_nvhost.h"
+#include "pva-ucode-header.h"
+#include "pva_vpu_app_auth.h"
+#include "pva_fw_carveout.h"
+
+#ifdef CONFIG_TEGRA_SOC_HWPM
+#include <uapi/linux/tegra-soc-hwpm-uapi.h>
+#endif
+
+/**
+ * PVA Host1x class IDs
+ */
+enum {
+	NV_PVA0_CLASS_ID	= 0xF1,
+	NV_PVA1_CLASS_ID	= 0xF2,
+};
+
+struct nvpva_client_context;
+
+enum pva_submit_mode {
+	PVA_SUBMIT_MODE_MAILBOX = 0,
+	PVA_SUBMIT_MODE_MMIO_CCQ = 1,
+};
+
+struct pva_version_info {
+	u32 pva_r5_version;
+	u32 pva_compat_version;
+	u32 pva_revision;
+	u32 pva_built_on;
+};
+
+/**
+ * Queue count of 8 is maintained per PVA.
+ */
+#define MAX_PVA_QUEUE_COUNT 8
+#define MAX_PVA_CLIENTS 8
+#define MAX_PVA_TASK_COUNT_PER_QUEUE		256U
+#define MAX_PVA_SEG_COUNT_PER_QUEUE		4U
+#define MAX_PVA_TASK_COUNT_PER_QUEUE_SEG	\
+	(MAX_PVA_TASK_COUNT_PER_QUEUE/MAX_PVA_SEG_COUNT_PER_QUEUE)
+
+#define NVPVA_USER_VM_COUNT	MAX_PVA_CLIENTS
+
+/**
+ * Maximum task count that a PVA engine can support
+ */
+#define MAX_PVA_TASK_COUNT					\
+	((MAX_PVA_QUEUE_COUNT) * (MAX_PVA_TASK_COUNT_PER_QUEUE))
+
+/**
+ * Minium PVA frequency (10MHz)
+ */
+#define MIN_PVA_FREQUENCY 10000000
+
+/**
+ * Maximum number of IRQS to be serviced by the driver. Gen1 has a single IRQ,
+ * Gen2 has 9.
+ */
+#define MAX_PVA_IRQS 9
+#define MAX_PVA_INTERFACE 9
+#define PVA_MAILBOX_INDEX 0
+#define PVA_CCQ0_INDEX 1
+#define PVA_CCQ1_INDEX 2
+#define PVA_CCQ2_INDEX 3
+#define PVA_CCQ3_INDEX 4
+#define PVA_CCQ4_INDEX 5
+#define PVA_CCQ5_INDEX 6
+#define PVA_CCQ6_INDEX 7
+#define PVA_CCQ7_INDEX 8
+
+
+/**
+ * Number of VPUs for each PVA
+ */
+#define NUM_VPU_BLOCKS 2
+
+/**
+ * nvpva_dbg_* macros provide wrappers around kernel print functions
+ * that use a debug mask configurable at runtime to provide control over
+ * the level of detail that gets printed.
+ */
+#ifdef CONFIG_DEBUG_FS
+    /* debug info, default is compiled-in but effectively disabled (0 mask) */
+    #define NVPVA_DEBUG
+    /*e.g: echo 1 > /d/pva0/driver_dbg_mask */
+    #define NVPVA_DEFAULT_DBG_MASK 0
+#else
+    /* manually enable and turn on the mask */
+    #define NVPVA_DEFAULT_DBG_MASK (pva_dbg_info)
+#endif
+
+enum nvpva_dbg_categories {
+	pva_dbg_info    = BIT(0),  /* slightly verbose info */
+	pva_dbg_fn      = BIT(2),  /* fn name tracing */
+	pva_dbg_reg     = BIT(3),  /* register accesses, very verbose */
+	pva_dbg_prof    = BIT(7),  /* profiling info */
+	pva_dbg_mem     = BIT(31), /* memory accesses, very verbose */
+};
+
+#if defined(NVPVA_DEBUG)
+#define nvpva_dbg(pva, dbg_mask, format, arg...)                               \
+	do {                                                                   \
+		if (unlikely((dbg_mask)&pva->driver_log_mask)) {               \
+			pr_info("nvpva %s: " format "\n", __func__, ##arg);    \
+		}                                                              \
+	} while (0)
+
+#else /* NVPVA_DEBUG */
+#define nvpva_dbg(pva, dbg_mask, format, arg...)                               \
+	do {                                                                   \
+		if (0) {                                                       \
+			(void) pva; /* unused variable */                      \
+			pr_info("nvhost %s: " format "\n", __func__, ##arg);   \
+		}                                                              \
+	} while (0)
+
+#endif
+
+/* convenience,shorter err/fn/dbg_info */
+#define nvpva_err(d, fmt, arg...) \
+	dev_err(d, "%s: " fmt "\n", __func__, ##arg)
+
+#define nvpva_err_ratelimited(d, fmt, arg...) \
+	dev_err_ratelimited(d, "%s: " fmt "\n", __func__, ##arg)
+
+#define nvpva_warn(d, fmt, arg...) \
+	dev_warn(d, "%s: " fmt "\n", __func__, ##arg)
+
+#define nvpva_dbg_fn(pva, fmt, arg...) \
+	nvpva_dbg(pva, pva_dbg_fn, fmt, ##arg)
+
+#define nvpva_dbg_info(pva, fmt, arg...) \
+	nvpva_dbg(pva, pva_dbg_info, fmt, ##arg)
+
+#define nvpva_dbg_prof(pva, fmt, arg...) \
+	nvpva_dbg(pva, pva_dbg_prof, fmt, ##arg)
+
+/**
+ * @brief		struct to hold the segment details
+ *
+ * addr:		virtual addr of the segment from PRIV2 address base
+ * size:		segment size
+ * offset:		offset of the addr from priv2 base
+ *
+ */
+struct pva_seg_info {
+	void *addr;
+	u32 size;
+	u32 offset;
+};
+
+/**
+ * @breif		struct to hold the segment details for debug purpose
+ *
+ * pva			Pointer to pva struct
+ * seg_info		pva_seg_info struct
+ *
+ */
+struct pva_crashdump_debugfs_entry {
+	struct pva *pva;
+	struct pva_seg_info seg_info;
+};
+
+/**
+ * @brief		struct to handle dma alloc memory info
+ *
+ * size			size allocated
+ * phys_addr		physical address
+ * va			virtual address
+ *
+ */
+struct pva_dma_alloc_info {
+	size_t size;
+	dma_addr_t pa;
+	void *va;
+};
+
+/**
+ * @brief		struct to handle the PVA firmware information
+ *
+ * hdr			pointer to the pva_code_hdr struct
+ * priv1_buffer		pva_dma_alloc_info for priv1_buffer
+ * priv2_buffer		pva_dma_alloc_info for priv2_buffer
+ * priv2_reg_offset	priv2 register offset from uCode
+ * trace_buffer_size	buffer size for trace log
+ *
+ */
+struct pva_fw {
+	struct pva_ucode_hdr_s *hdr;
+
+	struct pva_dma_alloc_info priv1_buffer;
+	struct pva_dma_alloc_info priv2_buffer;
+	u32 priv2_reg_offset;
+
+	u32 trace_buffer_size;
+};
+
+/*
+ * @brief		store trace log segment's address and size
+ *
+ * addr		Pointer to the pva trace log segment
+ * size		Size of pva trace log segment
+ * offset		Offset in bytes for trace log segment
+ *
+ */
+struct pva_trace_log {
+	void *addr;
+	u32 size;
+	u32 offset;
+};
+
+struct pva_fw_debug_log {
+	void *addr;
+	u32 size;
+	struct mutex saved_log_lock;
+	u8 *saved_log;
+};
+void save_fw_debug_log(struct pva *pva);
+
+/*
+ * @brief	stores address and other attributes of the vpu function table
+ *
+ * addr		The pointer to start of the VPU function table
+ * size		Table size of the function table
+ * handle	The IOVA address of the function table
+ * entries	The total number of entries in the function table
+ *
+ */
+struct pva_func_table {
+	struct vpu_func *addr;
+	uint32_t size;
+	dma_addr_t handle;
+	uint32_t entries;
+};
+
+struct pva_status_interface_registers {
+	uint32_t registers[5];
+};
+
+#define PVA_HW_GEN1 1
+#define PVA_HW_GEN2 2
+
+/**
+ * @brief		HW version specific configuration and functions
+ * read_mailbox		Function to read from mailbox based on PVA revision
+ * write_mailbox	Function to write to mailbox based on PVA revision
+ * ccq_send_task	Function to submit task to ccq based on PVA revision
+ * submit_cmd_sync_locked
+ *			Function to submit command to PVA based on PVA revision
+ *			Should be called only if appropriate locks have been
+ *			acquired
+ *
+ * submit_cmd_sync	Function to submit command to PVA based on PVA revision
+ * irq_count		Number of IRQs associated with this PVA revision
+ *
+ */
+
+struct pva_version_config {
+	u32 (*read_mailbox)(struct platform_device *pdev, u32 mbox_id);
+	void (*write_mailbox)(struct platform_device *pdev, u32 mbox_id,
+			      u32 value);
+	void (*read_status_interface)(struct pva *pva, uint32_t interface_id,
+				      u32 isr_status,
+				      struct pva_cmd_status_regs *status_out);
+	int (*ccq_send_task)(struct pva *pva, u32 queue_id,
+			     dma_addr_t task_addr, u8 batchsize, u32 flags);
+	int (*submit_cmd_sync_locked)(struct pva *pva, struct pva_cmd_s *cmd,
+				      u32 nregs, u32 queue_id,
+				      struct pva_cmd_status_regs *status_regs);
+
+	int (*submit_cmd_sync)(struct pva *pva, struct pva_cmd_s *cmd,
+			       u32 nregs, u32 queue_id,
+			       struct pva_cmd_status_regs *status_regs);
+	int irq_count;
+};
+
+/**
+ * @brief		Describe a VPU hardware debug block
+ * vbase		Address mapped to virtual space
+ */
+struct pva_vpu_dbg_block {
+	void __iomem *vbase;
+};
+
+/**
+ * @brief		VPU utilization information
+ *
+ * start_stamp		time stamp when measurment started
+ * end_stamp		time stamp when measurment is to end
+ * vpu_stats		avaraged vpu utilization stats
+ * stats_fw_buffer_iova
+ * stats_fw_buffer_va
+ */
+struct pva_vpu_util_info {
+	u64			start_stamp;
+	u64			end_stamp;
+	u64			vpu_stats[2];
+	dma_addr_t		stats_fw_buffer_iova;
+	struct pva_vpu_stats_s	*stats_fw_buffer_va;
+};
+
+struct scatterlist;
+struct nvpva_syncpt_desc {
+	dma_addr_t addr;
+	size_t size;
+	u32 id;
+	u32 assigned;
+};
+
+struct nvpva_syncpts_desc {
+	struct platform_device *host_pdev;
+	struct nvpva_syncpt_desc syncpts_rw[MAX_PVA_QUEUE_COUNT];
+	dma_addr_t syncpt_start_iova_r;
+	dma_addr_t syncpt_range_r;
+	dma_addr_t syncpt_start_iova_rw;
+	dma_addr_t syncpt_range_rw;
+	uint32_t   page_size;
+	bool	   syncpts_mapped_r;
+	bool	   syncpts_mapped_rw;
+};
+
+/**
+ * @brief		Driver private data, shared with all applications
+ *
+ * version		pva version; 1 or 2
+ * pdev			Pointer to the PVA device
+ * pool			Pointer to Queue table available for the PVA
+ * fw_info		firmware information struct
+ * irq			IRQ number obtained on registering the module
+ * cmd_waitqueue	Command Waitqueue for response waiters
+ *			for syncronous commands
+ * cmd_status_regs	Response to commands is stored into this
+ *			structure temporarily
+ * cmd_status		Status of the command interface
+ * mailbox_mutex	Mutex to avoid concurrent mailbox accesses
+ * debugfs_entry_r5	debugfs segment information for r5
+ * debugfs_entry_vpu0	debugfs segment information for vpu0
+ * debugfs_entry_vpu1	debugfs segment information for vpu1
+ * priv1_dma		struct pva_dma_alloc_info for priv1_dma
+ * priv2_dma		struct pva_dma_alloc_info for priv2_dma
+ * pva_trace		struct for pva_trace_log
+ * submit_mode		Select the task submit mode
+ * dbg_vpu_app_id	Set the vpu_app id to debug
+ * r5_dbg_wait		Set the r5 debugger to wait
+ * timeout_enabled	Set pva timeout enabled based on debug
+ * slcg_disable		Second level Clock Gating control variable
+ * vpu_printf_enabled
+ * vpu_debug_enabled
+ * log_level		controls the level of detail printed by FW
+ *			debug statements
+ * profiling_level
+ * driver_log_mask	controls the level of detail printed by kernel
+ *			debug statements
+ */
+
+struct pva {
+	int version;
+	struct pva_version_config *version_config;
+	struct platform_device *pdev;
+	struct platform_device *aux_pdev;
+	struct nvpva_queue_pool *pool;
+	struct pva_fw fw_info;
+	struct nvpva_carveout_info *co;
+	struct nvpva_carveout_info fw_carveout;
+	struct pva_vpu_auth_s pva_auth;
+	struct pva_vpu_auth_s pva_auth_sys;
+	struct nvpva_syncpts_desc syncpts;
+
+	int irq[MAX_PVA_IRQS];
+	s32 sids[16];
+	u32 sid_count;
+	u32 ec_state[8];
+
+	wait_queue_head_t cmd_waitqueue[MAX_PVA_INTERFACE];
+	struct pva_cmd_status_regs cmd_status_regs[MAX_PVA_INTERFACE];
+	enum pva_cmd_status cmd_status[MAX_PVA_INTERFACE];
+	struct mutex mailbox_mutex;
+
+	struct mutex ccq_mutex;
+
+	struct pva_crashdump_debugfs_entry debugfs_entry_r5;
+	struct pva_crashdump_debugfs_entry debugfs_entry_vpu0;
+	struct pva_crashdump_debugfs_entry debugfs_entry_vpu1;
+
+	struct pva_dma_alloc_info priv1_dma;
+	struct pva_dma_alloc_info priv2_dma;
+	/* Circular array to share with PVA R5 FW for task status info */
+	struct pva_dma_alloc_info priv_circular_array;
+	/* Current position to read task status buffer from the circular
+	 * array
+	 */
+	u32 circular_array_rd_pos;
+	/* Current position to write task status buffer from the circular
+	 * array
+	 */
+	u32 circular_array_wr_pos;
+	struct work_struct task_update_work;
+	atomic_t n_pending_tasks;
+	struct workqueue_struct *task_status_workqueue;
+	struct pva_trace_log pva_trace;
+	struct pva_fw_debug_log fw_debug_log;
+	u32 submit_task_mode;
+	u32 submit_cmd_mode;
+
+	u32 r5_dbg_wait;
+	bool timeout_enabled;
+	u32 slcg_disable;
+	u32 vmem_war_disable;
+	bool vpu_printf_enabled;
+	bool vpu_debug_enabled;
+	bool stats_enabled;
+	bool map_co_needed;
+	bool boot_from_file;
+	struct pva_vpu_util_info vpu_util_info;
+	u32 profiling_level;
+
+	struct work_struct pva_abort_handler_work;
+	bool booted;
+	u32 log_level;
+	u32 driver_log_mask;
+	struct nvpva_client_context *clients;
+	struct mutex clients_lock;
+
+	struct pva_vpu_dbg_block vpu_dbg_blocks[NUM_VPU_BLOCKS];
+
+#ifdef CONFIG_TEGRA_SOC_HWPM
+	struct tegra_soc_hwpm_ip_ops hwpm_ip_ops;
+#endif
+};
+
+/**
+ * @brief	Copy traces to kernel trace buffer.
+ *
+ * When mailbox interrupt for copying ucode trace buffer to
+ * kernel-ucode shared trace buffer is arrived it copies the kernel-ucode
+ * shared trace buffer to kernel ftrace buffer
+ *
+ * @pva Pointer to pva structure
+ *
+ */
+void pva_trace_copy_to_ftrace(struct pva *pva);
+
+/**
+ * @brief	Register PVA ISR
+ *
+ * This function called from driver to register the
+ * PVA ISR with IRQ.
+ *
+ * @param pdev	Pointer to PVA device
+ * @return	0 on Success or negative error code
+ *
+ */
+int pva_register_isr(struct platform_device *dev);
+
+/**
+ * @brief	deInitiallze pva debug utils
+ *
+ * @param pva	Pointer to PVA device
+ * @return	none
+ *
+ */
+void pva_debugfs_deinit(struct pva *pva);
+
+/**
+ * @brief	Initiallze pva debug utils
+ *
+ * @param pdev	Pointer to PVA device
+ * @return	none
+ *
+ */
+void pva_debugfs_init(struct platform_device *pdev);
+
+/**
+ * @brief	Initiallze PVA abort handler
+ *
+ * @param pva	Pointer to PVA structure
+ * @return	none
+ *
+ */
+void pva_abort_init(struct pva *pva);
+
+/**
+ * @brief	Recover PVA back into working state
+ *
+ * @param pva	Pointer to PVA structure
+ * @return	none
+ *
+ */
+void pva_abort(struct pva *pva);
+
+/**
+ * @brief	Run the ucode selftests
+ *
+ * This function is invoked if the ucode is in selftest mode.
+ * The function will do the static memory allocation for the
+ * ucode self test to run.
+ *
+ * @param pdev	Pointer to PVA device
+ * @return	0 on Success or negative error code
+ *
+ */
+int pva_run_ucode_selftest(struct platform_device *pdev);
+
+/**
+ * @brief	Allocate and populate the function table to the memory
+ *
+ * This function is called when the vpu table needs to be populated.
+ * The function also allocates the memory required for the vpu table.
+ *
+ * @param pva			Pointer to PVA device
+ * @param pva_func_table	Pointer to the function table which contains
+ *				the address, table size and number of entries
+ * @return			0 on Success or negative error code
+ *
+ */
+int pva_alloc_and_populate_function_table(struct pva *pva,
+					  struct pva_func_table *fn_table);
+
+/**
+ * @brief	Deallocate the memory of the function table
+ *
+ * This function is called once the allocated memory for vpu table needs to
+ * be freed.
+ *
+ * @param pva			Pointer to PVA device
+ * @param pva_func_table	Pointer to the function table which contains
+ *				the address, table size and number of entries
+ *
+ */
+void pva_dealloc_vpu_function_table(struct pva *pva,
+				    struct pva_func_table *fn_table);
+
+/**
+ * @brief	Get PVA version information
+ *
+ * @param pva	Pointer to a PVA device node
+ * @param info	Pointer to an information structure to be filled
+ *
+ * @return	0 on success, otherwise a negative error code
+ */
+int pva_get_firmware_version(struct pva *pva, struct pva_version_info *info);
+
+/**
+ * @brief	Set trace log level of PVA
+ *
+ * @param pva	Pointer to a PVA device node
+ * @param log_level	32-bit mask for logs that we want to receive
+ *
+ * @return	0 on success, otherwise a negative error code
+ */
+
+/**
+ * @brief	Get PVA Boot KPI
+ *
+ * @param pva	Pointer to a PVA device node
+ * @param r5_boot_time	Pointer to a variable, where r5 boot time will be filled
+ *
+ * @return	0 on success, otherwise a negative error code
+ */
+int pva_boot_kpi(struct pva *pva, u64 *r5_boot_time);
+
+int pva_set_log_level(struct pva *pva, u32 log_level, bool mailbox_locked);
+
+int nvpva_request_firmware(struct platform_device *pdev, const char *fw_name,
+			   const struct firmware **ucode_fw);
+
+int nvpva_get_device_hwid(struct platform_device *pdev,
+			  unsigned int id);
+
+u32 nvpva_get_id_idx(struct pva *dev, struct platform_device *pdev);
+
+void pva_push_aisr_status(struct pva *pva, uint32_t aisr_status);
+
+static inline u64 nvpva_get_tsc_stamp(void)
+{
+	u64 timestamp;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0)
+	timestamp = arch_timer_read_counter();
+#else
+	timestamp = arch_counter_get_cntvct();
+#endif
+	return timestamp;
+}
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_abort.c b/drivers/video/tegra/host/pva/pva_abort.c
new file mode 100644
index 00000000..a15e73b5
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_abort.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/nvhost.h>
+#include <linux/wait.h>
+
+#include "pva.h"
+#include "pva_sec_ec.h"
+
+static void pva_abort_handler(struct work_struct *work)
+{
+	struct pva *pva = container_of(work, struct pva,
+				       pva_abort_handler_work);
+	struct platform_device *pdev = pva->pdev;
+	int i;
+
+	/* Dump nvhost state to show the pending jobs */
+	nvhost_debug_dump_device(pdev);
+
+
+	/*wake up sync cmd waiters*/
+        for (i = 0; i < pva->version_config->irq_count; i++) {
+		if (pva->cmd_status[i] == PVA_CMD_STATUS_WFI) {
+			pva->cmd_status[i] = PVA_CMD_STATUS_ABORTED;
+			wake_up(&pva->cmd_waitqueue[i]);
+			schedule();
+		}
+	}
+
+	/* lock mailbox mutex to avoid synchronous communication. */
+	do {
+		schedule();
+	} while (mutex_trylock(&pva->mailbox_mutex) == false);
+
+        /* There is no ongoing activity anymore. Update mailbox status */
+        for (i = 0; i < pva->version_config->irq_count; i++) {
+            pva->cmd_status[i] = PVA_CMD_STATUS_INVALID;
+        }
+
+        /* Lock CCQ mutex to avoid asynchornous communication */
+	mutex_lock(&pva->ccq_mutex);
+
+	/*
+	 * If boot was still on-going, skip over recovery and let boot-up
+	 * routine handle the failure
+	 */
+	if (!pva->booted) {
+		nvpva_warn(&pdev->dev, "Recovery skipped: PVA is not booted");
+		goto skip_recovery;
+	}
+
+	/* disable error reporting to hsm*/
+	pva_disable_ec_err_reporting(pva);
+
+	/* Reset the PVA and reload firmware */
+	nvhost_module_reset(pdev, true);
+
+	/* enable error reporting to hsm*/
+	pva_enable_ec_err_reporting(pva);
+
+	/* Remove pending tasks from the queue */
+	nvpva_queue_abort_all(pva->pool);
+
+	nvpva_warn(&pdev->dev, "Recovery finished");
+
+skip_recovery:
+	mutex_unlock(&pva->ccq_mutex);
+	mutex_unlock(&pva->mailbox_mutex);
+}
+
+void pva_abort(struct pva *pva)
+{
+	struct platform_device *pdev = pva->pdev;
+	size_t i;
+	/* For selftest mode to finish the test */
+	if (host1x_readl(pdev, hsp_ss0_state_r())
+		& PVA_TEST_MODE) {
+		for (i = 0; i < pva->version_config->irq_count; i++) {
+			pva->cmd_status[i] = PVA_CMD_STATUS_DONE;
+			wake_up(&pva->cmd_waitqueue[i]);
+		}
+		return;
+	}
+
+	WARN(true, "Attempting to recover the engine");
+	schedule_work(&pva->pva_abort_handler_work);
+}
+
+void pva_abort_init(struct pva *pva)
+{
+	INIT_WORK(&pva->pva_abort_handler_work, pva_abort_handler);
+}
diff --git a/drivers/video/tegra/host/pva/pva_bit_helpers.h b/drivers/video/tegra/host/pva/pva_bit_helpers.h
new file mode 100644
index 00000000..fc93b211
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_bit_helpers.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_BIT_HELPERS_H_
+#define PVA_BIT_HELPERS_H_
+
+#include <linux/types.h>
+#include <linux/limits.h>
+
+#define RMOS_BYTES_PER_WORD (sizeof(unsigned int))
+#define RMOS_BITS_PER_WORD (RMOS_BYTES_PER_WORD * 8U)
+
+static inline uint32_t rmos_get_first_set_bit(uint32_t val)
+{
+	uint32_t index = 0U;
+
+	for (index = 0U; index < 32U; index++) {
+		if (1U == (val & 1U))
+			break;
+
+		val = val >> 1U;
+	}
+
+	return index;
+}
+
+static inline uint32_t rmos_get_first_zero_bit(uint32_t val)
+{
+	if ((~(uint32_t)0U) == val)
+		return RMOS_BITS_PER_WORD;
+
+	return rmos_get_first_set_bit(~val);
+}
+
+static inline uint32_t rmos_find_first_zero_bit(uint32_t *addr, uint32_t size)
+{
+	const uint32_t *p = addr;
+	uint32_t result = 0U;
+	uint32_t tmp;
+	uint32_t first_zero_bit;
+
+	while (size >= RMOS_BITS_PER_WORD) {
+		tmp = *(p++);
+		if (0U != ~tmp) {
+			first_zero_bit = rmos_get_first_zero_bit(tmp);
+
+			/*
+			 * Result will not wrap around in any case as the
+			 * Maximum possible return value is the 'size' itself.
+			 */
+			return result + first_zero_bit;
+		}
+		result += RMOS_BITS_PER_WORD;
+		size -= RMOS_BITS_PER_WORD;
+	}
+
+	if (size == 0U)
+		return result;
+
+	tmp = (*p) | (~0U << size);
+	tmp = rmos_get_first_zero_bit(tmp);
+	if (tmp == 32U) {
+		if ((U32_MAX - result) < size)
+			return size;
+		else
+			return result + size;
+	}
+
+	return result + tmp;
+}
+
+static inline void rmos_set_bit32(unsigned int nr, unsigned int *addr)
+{
+	*addr |= (1U << nr);
+}
+
+static inline void rmos_clear_bit32(unsigned int nr, unsigned int *addr)
+{
+	*addr &= ~(1U << nr);
+}
+
+static inline bool rmos_test_bit32(unsigned int nr, const unsigned int *addr)
+{
+	return (*addr & (1 << nr)) != 0U;
+}
+
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_ccq_t19x.c b/drivers/video/tegra/host/pva/pva_ccq_t19x.c
new file mode 100644
index 00000000..998fb268
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_ccq_t19x.c
@@ -0,0 +1,87 @@
+/*
+ * PVA Command Queue Interface handling
+ *
+ * Copyright (c) 2017-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pva-interface.h"
+#include <linux/kernel.h>
+#include <linux/nvhost.h>
+#include <linux/delay.h>
+
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
+#include <soc/tegra/chip-id.h>
+#else
+#include <soc/tegra/fuse.h>
+#endif
+
+#include "pva.h"
+#include "pva_ccq_t19x.h"
+
+#include "pva_regs.h"
+#include "pva-interface.h"
+
+#define MAX_CCQ_ELEMENTS 6
+
+static int pva_ccq_wait(struct pva *pva, int timeout)
+{
+	unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
+	/*
+	 * Wait until there is free room in the CCQ. Otherwise the writes
+	 * could stall the CPU. Ignore the timeout in simulation.
+	 */
+
+	while (time_before(jiffies, end_jiffies) ||
+	       (pva->timeout_enabled == false)) {
+		u32 val = host1x_readl(pva->pdev,
+				       cfg_ccq_status_r(pva->version, 0,
+							PVA_CCQ_STATUS2_INDEX));
+		if (val <= MAX_CCQ_ELEMENTS)
+			return 0;
+
+		usleep_range(5, 10);
+	}
+
+	return -ETIMEDOUT;
+}
+
+int pva_ccq_send_task_t19x(struct pva *pva, u32 queue_id, dma_addr_t task_addr,
+			   u8 batchsize, u32 flags)
+{
+	int err = 0;
+	struct pva_cmd_s cmd = {0};
+
+	(void)pva_cmd_submit_batch(&cmd, queue_id, task_addr, batchsize, flags);
+
+	mutex_lock(&pva->ccq_mutex);
+	err = pva_ccq_wait(pva, 100);
+	if (err < 0)
+		goto err_wait_ccq;
+
+	/* Make the writes to CCQ */
+	host1x_writel(pva->pdev, cfg_ccq_r(pva->version, 0), cmd.cmd_field[1]);
+	host1x_writel(pva->pdev, cfg_ccq_r(pva->version, 0), cmd.cmd_field[0]);
+
+	mutex_unlock(&pva->ccq_mutex);
+
+	return err;
+
+err_wait_ccq:
+	mutex_unlock(&pva->ccq_mutex);
+	pva_abort(pva);
+
+	return err;
+}
diff --git a/drivers/video/tegra/host/pva/pva_ccq_t19x.h b/drivers/video/tegra/host/pva/pva_ccq_t19x.h
new file mode 100644
index 00000000..61c5a78b
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_ccq_t19x.h
@@ -0,0 +1,29 @@
+/*
+ * PVA Command Queue Interface handling
+ *
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_CCQ_T19X_H
+#define PVA_CCQ_T19X_H
+
+#include <linux/kernel.h>
+
+#include "pva.h"
+
+int pva_ccq_send_task_t19x(struct pva *pva, u32 queue_id, dma_addr_t task_addr,
+			   u8 batchsize, u32 flags);
+
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_ccq_t23x.c b/drivers/video/tegra/host/pva/pva_ccq_t23x.c
new file mode 100644
index 00000000..9122f9c0
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_ccq_t23x.c
@@ -0,0 +1,234 @@
+/*
+ * PVA Command Queue Interface handling
+ *
+ * Copyright (c) 2017-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/nvhost.h>
+#include <linux/delay.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
+#include <soc/tegra/chip-id.h>
+#else
+#include <soc/tegra/fuse.h>
+#endif
+
+#include "pva.h"
+#include "pva_mailbox.h"
+#include "pva_ccq_t23x.h"
+
+#include "pva_regs.h"
+
+#define MAX_CCQ_ELEMENTS 6
+
+static int pva_ccq_wait(struct pva *pva, int timeout, unsigned int queue_id)
+{
+	unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
+	/*
+	 * Wait until there is free room in the CCQ. Otherwise the writes
+	 * could stall the CPU. Ignore the timeout in simulation.
+	 */
+
+	while (time_before(jiffies, end_jiffies) ||
+	       (pva->timeout_enabled == false)) {
+		u32 val = PVA_EXTRACT(
+			host1x_readl(pva->pdev,
+				     cfg_ccq_status_r(pva->version, queue_id,
+						      PVA_CCQ_STATUS2_INDEX)),
+			4, 0, u32);
+		if (val <= MAX_CCQ_ELEMENTS)
+			return 0;
+
+		usleep_range(5, 10);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int pva_ccq_send_cmd(struct pva *pva, u32 queue_id,
+			    struct pva_cmd_s *cmd)
+{
+	int err = 0;
+	err = pva_ccq_wait(pva, 100, queue_id);
+	if (err < 0)
+		goto err_wait_ccq;
+
+	/* Make the writes to CCQ */
+	host1x_writel(pva->pdev, cfg_ccq_r(pva->version, queue_id),
+		      cmd->cmd_field[1]);
+	host1x_writel(pva->pdev, cfg_ccq_r(pva->version, queue_id),
+		      cmd->cmd_field[0]);
+	return err;
+
+err_wait_ccq:
+	pva_abort(pva);
+	return err;
+}
+
+int pva_ccq_send_task_t23x(struct pva *pva, u32 queue_id, dma_addr_t task_addr,
+			   u8 batchsize, u32 flags)
+{
+	int err = 0;
+	struct pva_cmd_s cmd = { 0 };
+
+	(void)pva_cmd_submit_batch(&cmd, queue_id, task_addr, batchsize, flags);
+
+	err = pva_ccq_send_cmd(pva, queue_id, &cmd);
+	return err;
+}
+
+void pva_ccq_isr_handler(struct pva *pva, unsigned int queue_id)
+{
+	struct platform_device *pdev = pva->pdev;
+	u32 int_status;
+	unsigned int cmd_status_index = queue_id + PVA_CCQ0_INDEX;
+	int_status =
+		host1x_readl(pdev, cfg_ccq_status_r(pva->version, queue_id,
+						    PVA_CCQ_STATUS7_INDEX));
+	if (pva->cmd_status[cmd_status_index] != PVA_CMD_STATUS_WFI) {
+		nvpva_warn(&pdev->dev, "No ISR for CCQ %u", queue_id);
+		return;
+	}
+	/* Save the current command and subcommand for later processing */
+
+	pva->version_config->read_status_interface(
+		pva, cmd_status_index, int_status,
+		&pva->cmd_status_regs[cmd_status_index]);
+	/* Clear the mailbox interrupt status */
+
+	/* Wake up the waiters */
+	pva->cmd_status[cmd_status_index] = PVA_CMD_STATUS_DONE;
+	wake_up(&pva->cmd_waitqueue[cmd_status_index]);
+}
+
+int pva_ccq_wait_event(struct pva *pva, unsigned int queue_id, int wait_time)
+{
+	int timeout = 1;
+	int err;
+	u32 interface = queue_id + 1;
+	/* Wait for the event being triggered in ISR */
+	if (pva->timeout_enabled == true) {
+		timeout = wait_event_timeout(
+			pva->cmd_waitqueue[interface],
+			pva->cmd_status[interface] == PVA_CMD_STATUS_DONE ||
+				pva->cmd_status[interface] ==
+					PVA_CMD_STATUS_ABORTED,
+			msecs_to_jiffies(wait_time));
+	} else {
+		wait_event(pva->cmd_waitqueue[interface],
+			   pva->cmd_status[interface] == PVA_CMD_STATUS_DONE ||
+				   pva->cmd_status[interface] ==
+					   PVA_CMD_STATUS_ABORTED);
+	}
+	if (timeout <= 0) {
+		err = -ETIMEDOUT;
+		pva_abort(pva);
+	} else if (pva->cmd_status[interface] == PVA_CMD_STATUS_ABORTED)
+		err = -EIO;
+	else
+		err = 0;
+	return err;
+}
+
+int pva_ccq_send_cmd_sync(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs,
+			  u32 queue_id, struct pva_cmd_status_regs *status_regs)
+{
+	int err = 0;
+	u32 interface = queue_id + 1U;
+
+	if (status_regs == NULL) {
+		err = -EINVAL;
+		goto err_invalid_parameter;
+	}
+
+	if (queue_id >= MAX_PVA_QUEUE_COUNT) {
+		err = -EINVAL;
+		goto err_invalid_parameter;
+	}
+
+	/* Ensure that mailbox state is sane */
+	if (WARN_ON(pva->cmd_status[interface] != PVA_CMD_STATUS_INVALID)) {
+		err = -EIO;
+		goto err_check_status;
+	}
+
+	/* Mark that we are waiting for an interrupt */
+	pva->cmd_status[interface] = PVA_CMD_STATUS_WFI;
+	memset(&pva->cmd_status_regs[interface], 0,
+	       sizeof(struct pva_cmd_status_regs));
+
+	/* Submit command to PVA */
+	err = pva_ccq_send_cmd(pva, queue_id, cmd);
+	if (err < 0)
+		goto err_send_command;
+
+	err = pva_ccq_wait_event(pva, queue_id, 100);
+	if (err < 0)
+		goto err_wait_response;
+	/* Return interrupt status back to caller */
+	memcpy(status_regs, &pva->cmd_status_regs[interface],
+	       sizeof(struct pva_cmd_status_regs));
+
+	pva->cmd_status[interface] = PVA_CMD_STATUS_INVALID;
+
+	return err;
+
+err_wait_response:
+err_send_command:
+	pva->cmd_status[interface] = PVA_CMD_STATUS_INVALID;
+err_check_status:
+err_invalid_parameter:
+	return err;
+}
+
+int pva_send_cmd_sync(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs,
+		      u32 queue_id, struct pva_cmd_status_regs *status_regs)
+{
+	int err = 0;
+
+	switch (pva->submit_cmd_mode) {
+	case PVA_SUBMIT_MODE_MAILBOX:
+		err = pva_mailbox_send_cmd_sync(pva, cmd, nregs, status_regs);
+		break;
+	case PVA_SUBMIT_MODE_MMIO_CCQ:
+		err = pva_ccq_send_cmd_sync(pva, cmd, nregs, queue_id,
+					    status_regs);
+		break;
+	}
+
+	return err;
+}
+
+int pva_send_cmd_sync_locked(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs,
+			     u32 queue_id,
+			     struct pva_cmd_status_regs *status_regs)
+{
+	int err = 0;
+
+	switch (pva->submit_cmd_mode) {
+	case PVA_SUBMIT_MODE_MAILBOX:
+		err = pva_mailbox_send_cmd_sync_locked(pva, cmd, nregs,
+						       status_regs);
+		break;
+	case PVA_SUBMIT_MODE_MMIO_CCQ:
+		err = pva_ccq_send_cmd_sync(pva, cmd, nregs, queue_id,
+					    status_regs);
+		break;
+	}
+
+	return err;
+}
diff --git a/drivers/video/tegra/host/pva/pva_ccq_t23x.h b/drivers/video/tegra/host/pva/pva_ccq_t23x.h
new file mode 100644
index 00000000..87c94fa7
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_ccq_t23x.h
@@ -0,0 +1,40 @@
+/*
+ * PVA Command Queue Interface handling
+ *
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_CCQ_T23X_H
+#define PVA_CCQ_T23X_H
+
+#include <linux/kernel.h>
+
+#include "pva.h"
+#include "pva_status_regs.h"
+
+int pva_ccq_send_task_t23x(struct pva *pva, u32 queue_id, dma_addr_t task_addr,
+			   u8 batchsize, u32 flags);
+void pva_ccq_isr_handler(struct pva *pva, unsigned int queue_id);
+int pva_ccq_send_cmd_sync(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs,
+			  u32 queue_id,
+			  struct pva_cmd_status_regs *ccq_status_regs);
+int pva_send_cmd_sync(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs,
+		      u32 queue_id,
+		      struct pva_cmd_status_regs *ccq_status_regs);
+int pva_send_cmd_sync_locked(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs,
+			     u32 queue_id,
+			     struct pva_cmd_status_regs *ccq_status_regs);
+
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_debug.c b/drivers/video/tegra/host/pva/pva_debug.c
new file mode 100644
index 00000000..618b027d
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_debug.c
@@ -0,0 +1,490 @@
+/*
+ * PVA Debug Information file
+ *
+ * Copyright (c) 2017-2023, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/nvhost.h>
+#include <linux/uaccess.h>
+#include <linux/dma-mapping.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include "pva.h"
+#include <uapi/linux/nvpva_ioctl.h>
+#include "pva_vpu_ocd.h"
+#include "pva-fw-address-map.h"
+
+static void pva_read_crashdump(struct seq_file *s, struct pva_seg_info *seg_info)
+{
+	int i = 0;
+	u32 *seg_addr = (u32 *) seg_info->addr;
+
+	if (!seg_addr)
+		return;
+
+	for (i = 0; i < (seg_info->size >> 4);) {
+		seq_printf(s, "0x%x 0x%x 0x%x 0x%x\n",
+			seg_addr[i], seg_addr[i+1],
+			seg_addr[i+2], seg_addr[i+3]);
+		i = i + 4;
+	}
+}
+
+static int pva_crashdump(struct seq_file *s, void *data)
+{
+	int err = 0;
+	struct pva_crashdump_debugfs_entry *entry =
+			(struct pva_crashdump_debugfs_entry *)s->private;
+	struct pva *pva = entry->pva;
+
+	err = nvhost_module_busy(pva->pdev);
+	if (err) {
+		nvpva_dbg_info(pva, "err in powering up pva\n");
+		goto err_poweron;
+	}
+
+	pva_read_crashdump(s, &entry->seg_info);
+
+	nvhost_module_idle(pva->pdev);
+
+err_poweron:
+	return err;
+}
+
+static int crashdump_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pva_crashdump, inode->i_private);
+}
+
+static const struct file_operations pva_crashdump_fops = {
+	.open = crashdump_open,
+	.read = seq_read,
+	.release = single_release,
+};
+
+struct pva_fw_debug_log_iter {
+	struct pva *pva;
+	u8 *buffer;
+	loff_t pos;
+	size_t size;
+};
+
+static void *log_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct pva_fw_debug_log_iter *iter;
+
+	iter = s->private;
+	if (*pos >= iter->size)
+		return NULL;
+
+	iter->pos = *pos;
+	return iter;
+}
+
+static void log_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static void *log_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct pva_fw_debug_log_iter *iter = v;
+
+	iter->pos += 1;
+	*pos = iter->pos;
+
+	if (iter->pos >= iter->size)
+		return NULL;
+
+	return iter;
+}
+
+static int log_seq_show(struct seq_file *s, void *v)
+{
+	struct pva_fw_debug_log_iter *iter = v;
+
+	seq_putc(s, iter->buffer[iter->pos]);
+	return 0;
+}
+
+static struct seq_operations const log_seq_ops = { .start = log_seq_start,
+						   .stop = log_seq_stop,
+						   .next = log_seq_next,
+						   .show = log_seq_show };
+
+static int fw_debug_log_open(struct inode *inode, struct file *file)
+{
+	struct pva_fw_debug_log_iter *iter =
+		__seq_open_private(file, &log_seq_ops, sizeof(*iter));
+	int err = 0;
+	struct pva *pva = inode->i_private;
+
+	if (IS_ERR_OR_NULL(iter)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	iter->pva = pva;
+
+	if (pva->booted) {
+		err = nvhost_module_busy(pva->pdev);
+		if (err) {
+			nvpva_err(&pva->pdev->dev, "err in powering up pva");
+			err = -EIO;
+			goto free_iter;
+		}
+
+		save_fw_debug_log(pva);
+
+		nvhost_module_idle(pva->pdev);
+	}
+
+	iter->buffer = pva->fw_debug_log.saved_log;
+	iter->size =
+		strnlen(pva->fw_debug_log.saved_log, pva->fw_debug_log.size);
+	iter->pos = 0;
+
+	return 0;
+free_iter:
+	kfree(iter);
+err_out:
+	return err;
+}
+
+static const struct file_operations pva_fw_debug_log_fops = {
+	.open = fw_debug_log_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private
+};
+
+static inline void print_version(struct seq_file *s,
+				 const char *version_str,
+				 const u32 version)
+{
+	const char type = PVA_EXTRACT(version, 31, 24, u8);
+	const u32 major = PVA_EXTRACT(version, 23, 16, u32);
+	const u32 minor = PVA_EXTRACT(version, 15, 8, u32);
+	const u32 subminor = PVA_EXTRACT(version, 7, 0, u32);
+
+	seq_printf(s, "%s: %c.%02u.%02u.%02u\n", version_str,
+		   type, major, minor, subminor);
+}
+
+static int print_firmware_versions(struct seq_file *s, void *data)
+{
+	struct pva *pva = s->private;
+	struct pva_version_info info;
+	int ret = 0;
+
+	ret = nvhost_module_busy(pva->pdev);
+	if (ret < 0)
+		goto err_poweron;
+
+	ret = pva_get_firmware_version(pva, &info);
+	if (ret < 0)
+		goto err_get_firmware_version;
+
+	nvhost_module_idle(pva->pdev);
+
+	print_version(s, "pva_r5_version", info.pva_r5_version);
+	print_version(s, "pva_compat_version", info.pva_compat_version);
+	seq_printf(s, "pva_revision: %x\n", info.pva_revision);
+	seq_printf(s, "pva_built_on: %u\n", info.pva_built_on);
+
+	return 0;
+
+err_get_firmware_version:
+	nvhost_module_idle(pva->pdev);
+err_poweron:
+	return ret;
+}
+
+static int print_version_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, print_firmware_versions, inode->i_private);
+}
+
+static const struct file_operations print_version_fops = {
+	.open = print_version_open,
+	.read = seq_read,
+	.release = single_release,
+};
+
+static int get_log_level(void *data, u64 *val)
+{
+	struct pva *pva = (struct pva *) data;
+
+	*val = pva->log_level;
+	return 0;
+}
+
+static int set_log_level(void *data, u64 val)
+{
+	struct pva *pva = (struct pva *) data;
+
+	pva->log_level = val;
+	if (pva->booted)
+		return pva_set_log_level(pva, val, false);
+	else
+		return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(log_level_fops, get_log_level, set_log_level, "%llu");
+
+static void update_vpu_stats(struct pva *pva, bool stats_enabled)
+{
+	u32 flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE;
+	struct pva_cmd_status_regs status = {};
+	struct pva_cmd_s cmd = {};
+	int err = 0;
+	u32 nregs;
+	u64 duration = 0;
+	struct pva_vpu_stats_s *stats_buf =
+		pva->vpu_util_info.stats_fw_buffer_va;
+	u64 *vpu_stats = pva->vpu_util_info.vpu_stats;
+
+	if (vpu_stats == 0)
+		goto err_out;
+
+	err = nvhost_module_busy(pva->pdev);
+	if (err < 0) {
+		dev_err(&pva->pdev->dev, "error in powering up pva %d",
+			err);
+		vpu_stats[0] = 0;
+		vpu_stats[1] = 0;
+		return;
+	}
+
+	nregs = pva_cmd_get_vpu_stats(&cmd,
+				      pva->vpu_util_info.stats_fw_buffer_iova,
+				      flags, stats_enabled);
+	err = pva_mailbox_send_cmd_sync(pva, &cmd, nregs, &status);
+	if (err < 0) {
+		nvpva_warn(&pva->pdev->dev, "get vpu stats cmd failed: %d\n",
+			    err);
+		goto err_out;
+	}
+
+	if (stats_enabled == false)
+		goto err_out;
+
+	duration = stats_buf->window_end_time - stats_buf->window_start_time;
+	if (duration == 0)
+		goto err_out;
+
+	vpu_stats[0] =
+		(10000ULL * stats_buf->total_utilization_time[0]) / duration;
+	vpu_stats[1] =
+		(10000ULL * stats_buf->total_utilization_time[1]) / duration;
+	pva->vpu_util_info.start_stamp = stats_buf->window_start_time;
+	pva->vpu_util_info.end_stamp = stats_buf->window_end_time;
+	goto out;
+err_out:
+	vpu_stats[0] = 0;
+	vpu_stats[1] = 0;
+out:
+	nvhost_module_idle(pva->pdev);
+}
+
+static int print_vpu_stats(struct seq_file *s, void *data)
+{
+	struct pva *pva = s->private;
+
+	update_vpu_stats(pva, pva->stats_enabled);
+	seq_printf(s, "%llu\n%llu\n%llu\n%llu\n",
+		   pva->vpu_util_info.start_stamp,
+		   pva->vpu_util_info.end_stamp,
+		   pva->vpu_util_info.vpu_stats[0],
+		   pva->vpu_util_info.vpu_stats[1]);
+
+	return 0;
+}
+
+static int pva_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, print_vpu_stats, inode->i_private);
+}
+
+static const struct file_operations pva_stats_fops = {
+	.open = pva_stats_open,
+	.read = seq_read,
+	.release = single_release,
+};
+
+static int get_authentication(void *data, u64 *val)
+{
+	struct pva *pva = (struct pva *) data;
+
+	*val = pva->pva_auth.pva_auth_enable ? 1 : 0;
+
+	return 0;
+}
+
+static int set_authentication(void *data, u64 val)
+{
+	struct pva *pva = (struct pva *) data;
+
+	pva->pva_auth.pva_auth_enable = (val == 1) ? true : false;
+
+	if (pva->pva_auth.pva_auth_enable)
+		pva->pva_auth.pva_auth_allow_list_parsed = false;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(pva_auth_fops, get_authentication, set_authentication, "%llu");
+
+static long vpu_ocd_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+	struct pva_vpu_dbg_block *dbg_block = f->f_inode->i_private;
+	int err = 0;
+
+	switch (cmd) {
+	case PVA_OCD_IOCTL_VPU_IO: {
+		struct pva_ocd_ioctl_vpu_io_param io_param;
+
+		if (copy_from_user(&io_param, (void __user *)arg,
+				   sizeof(io_param))) {
+			pr_err("failed copy ioctl buffer from user; size: %u",
+			       _IOC_SIZE(cmd));
+			err = -EFAULT;
+			goto out;
+		}
+		err = pva_vpu_ocd_io(dbg_block, io_param.instr,
+				     &io_param.data[0], io_param.n_write,
+				     &io_param.data[0], io_param.n_read);
+		if (err)
+			goto out;
+
+		err = copy_to_user((void __user *)arg, &io_param,
+				   sizeof(io_param));
+		if (err)
+			goto out;
+
+		break;
+	}
+	default:
+		err = -ENOIOCTLCMD;
+		break;
+	}
+
+out:
+	return err;
+}
+
+static const struct file_operations pva_vpu_ocd_fops = {
+	.unlocked_ioctl = vpu_ocd_ioctl
+};
+
+void pva_debugfs_deinit(struct pva *pva)
+{
+	if (pva->vpu_util_info.stats_fw_buffer_va != NULL) {
+		dma_free_coherent(&pva->aux_pdev->dev,
+				  sizeof(struct pva_vpu_stats_s),
+				  pva->vpu_util_info.stats_fw_buffer_va,
+				  pva->vpu_util_info.stats_fw_buffer_iova);
+		pva->vpu_util_info.stats_fw_buffer_va = 0;
+		pva->vpu_util_info.stats_fw_buffer_iova = 0;
+	}
+
+	if (pva->fw_debug_log.saved_log != NULL) {
+		mutex_destroy(&pva->fw_debug_log.saved_log_lock);
+		kfree(pva->fw_debug_log.saved_log);
+	}
+}
+
+void pva_debugfs_init(struct platform_device *pdev)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	struct dentry *de = pdata->debugfs;
+	static const char *vpu_ocd_names[NUM_VPU_BLOCKS] = { "ocd_vpu0",
+							     "ocd_vpu1" };
+	int i, err;
+
+	if (!de)
+		return;
+
+	pva->debugfs_entry_r5.pva = pva;
+	pva->debugfs_entry_vpu0.pva = pva;
+	pva->debugfs_entry_vpu1.pva = pva;
+
+	debugfs_create_file("r5_crashdump", S_IRUGO, de,
+				&pva->debugfs_entry_r5, &pva_crashdump_fops);
+	debugfs_create_file("vpu0_crashdump", S_IRUGO, de,
+				&pva->debugfs_entry_vpu0, &pva_crashdump_fops);
+	debugfs_create_file("vpu1_crashdump", S_IRUGO, de,
+				&pva->debugfs_entry_vpu1, &pva_crashdump_fops);
+	debugfs_create_u32("submit_task_mode", S_IRUGO | S_IWUSR, de,
+				 &pva->submit_task_mode);
+	debugfs_create_bool("vpu_debug", 0644, de,
+			   &pva->vpu_debug_enabled);
+	debugfs_create_u32("r5_dbg_wait", 0644, de,
+			   &pva->r5_dbg_wait);
+	debugfs_create_bool("r5_timeout_enable", 0644, de,
+			    &pva->timeout_enabled);
+	debugfs_create_file("firmware_version", S_IRUGO, de, pva,
+			    &print_version_fops);
+	debugfs_create_u32("cg_disable", 0644, de, &pva->slcg_disable);
+	debugfs_create_bool("vpu_printf_enabled", 0644, de,
+			    &pva->vpu_printf_enabled);
+	debugfs_create_file("fw_log_level", 0644, de, pva, &log_level_fops);
+	debugfs_create_u32("driver_log_mask", 0644, de, &pva->driver_log_mask);
+	debugfs_create_file("vpu_app_authentication", 0644, de, pva,
+			    &pva_auth_fops);
+	debugfs_create_u32("profiling_level", 0644, de, &pva->profiling_level);
+	debugfs_create_bool("stats_enabled", 0644, de, &pva->stats_enabled);
+	debugfs_create_file("vpu_stats", 0644, de, pva, &pva_stats_fops);
+
+	mutex_init(&pva->fw_debug_log.saved_log_lock);
+	pva->fw_debug_log.size = FW_DEBUG_LOG_BUFFER_SIZE;
+	pva->fw_debug_log.saved_log =
+		kzalloc(FW_DEBUG_LOG_BUFFER_SIZE, GFP_KERNEL);
+	if (IS_ERR_OR_NULL(pva->fw_debug_log.saved_log)) {
+		dev_err(&pva->pdev->dev,
+			"failed to allocate memory for saving debug log");
+		pva->fw_debug_log.saved_log = NULL;
+		mutex_destroy(&pva->fw_debug_log.saved_log_lock);
+	} else {
+		debugfs_create_file("fw_debug_log", 0444, de, pva,
+				    &pva_fw_debug_log_fops);
+	}
+
+	pva->vpu_util_info.stats_fw_buffer_va = dma_alloc_coherent(
+		&pva->aux_pdev->dev, sizeof(struct pva_vpu_stats_s),
+		&pva->vpu_util_info.stats_fw_buffer_iova, GFP_KERNEL);
+	if (IS_ERR_OR_NULL(pva->vpu_util_info.stats_fw_buffer_va)) {
+		err = PTR_ERR(pva->vpu_util_info.stats_fw_buffer_va);
+		dev_err(&pva->pdev->dev,
+			"err = %d. failed to allocate stats buffer\n", err);
+		pva->vpu_util_info.stats_fw_buffer_va = 0;
+		pva->vpu_util_info.stats_fw_buffer_iova = 0;
+	}
+
+	err = pva_vpu_ocd_init(pva);
+	if (err == 0) {
+		for (i = 0; i < NUM_VPU_BLOCKS; i++)
+			debugfs_create_file(vpu_ocd_names[i], 0644, de,
+					    &pva->vpu_dbg_blocks[i],
+					    &pva_vpu_ocd_fops);
+	} else {
+		dev_err(&pva->pdev->dev, "VPU OCD initialization failed\n");
+	}
+}
diff --git a/drivers/video/tegra/host/pva/pva_dma.c b/drivers/video/tegra/host/pva/pva_dma.c
new file mode 100644
index 00000000..d648e0f2
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_dma.c
@@ -0,0 +1,1264 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/nospec.h>
+#include "pva_dma.h"
+#include "pva_queue.h"
+#include "pva-sys-dma.h"
+#include "pva.h"
+#include "pva_vpu_exe.h"
+#include "nvpva_client.h"
+#include "pva-bit.h"
+#include "fw_config.h"
+#include "pva_hwseq.h"
+
+static int32_t check_address_range(struct nvpva_dma_descriptor const *desc,
+				   uint64_t max_size,
+				   uint64_t max_size2,
+				   bool src_dst,
+				   bool dst2)
+{
+	int32_t err = 0;
+	int64_t start = 0;
+	int64_t end = 0;
+	int64_t offset = 0;
+	int64_t offset2 = 0;
+	uint32_t i;
+	int64_t bppSize = ((int64_t)desc->bytePerPixel == 0) ? 1 :
+				((int64_t)desc->bytePerPixel == 1) ? 2 : 4;
+	int64_t s[5] = {}; // max 5 dimension loop for DMA
+	int64_t last_tx = (int64_t)desc->tx - 1;
+	int64_t last_ty = (int64_t)desc->ty - 1;
+
+	/** dummy transfer mode with no data transfer */
+	if (desc->tx == 0U)
+		return err;
+
+	/** ty = 0 is not allowed */
+	if (desc->ty == 0U)
+		return -EINVAL;
+
+	/** Source transfer mode take care padding */
+	if (src_dst == false) {
+		last_tx -= (int64_t)desc->px;
+		last_ty -= (int64_t)desc->py;
+	}
+
+	/* 1st dimension */
+	s[0] = last_tx;
+	start = min((s[0]*bppSize), 0LL);
+	end = max(((s[0]*bppSize) + (bppSize - 1)), 0LL);
+	if (src_dst) {
+		/* 2nd destination dim */
+		s[1] = (int64_t)desc->dstLinePitch * last_ty;
+		if (desc->dstCbEnable == 1U) {
+			/* ((DLP_ADV * (Ty-1)) + Tx) * BPP <= DB_SIZE */
+			if (((s[1] + last_tx + 1) * bppSize) <=
+			    (int64_t)desc->dstCbSize)
+				return 0;
+
+			pr_err("invalid dst cb advance");
+			return -EINVAL;
+		}
+
+		offset = (int64_t)desc->dst_offset;
+		offset2 = (int64_t)desc->dst2Offset;
+		/* 3rd destination dim */
+		s[2] = ((int64_t)desc->dstAdv1 * (int64_t)desc->dstRpt1);
+		/* 4th destination dim */
+		s[3] = ((int64_t)desc->dstAdv2 * (int64_t)desc->dstRpt2);
+		/* 5th destination dim */
+		s[4] = ((int64_t)desc->dstAdv3 * (int64_t)desc->dstRpt3);
+	} else {
+		/* 2nd source dim */
+		s[1] = (int64_t)desc->srcLinePitch * last_ty;
+		if (desc->srcCbEnable == 1U) {
+			/* ((SLP_ADV * (Ty-1)) + Tx) * BPP <= SB_SIZE */
+			if (((s[1] + last_tx + 1) * bppSize) <=
+			    (int64_t)desc->srcCbSize)
+				return 0;
+			pr_err("invalid src cb");
+				return -EINVAL;
+		}
+
+		offset = (int64_t)desc->src_offset;
+		/* 3rd source dim */
+		s[2] = ((int64_t)desc->srcAdv1 * (int64_t)desc->srcRpt1);
+		/* 4th source dim */
+		s[3] = ((int64_t)desc->srcAdv2 * (int64_t)desc->srcRpt2);
+		/* 5th source dim */
+		s[4] = ((int64_t)desc->srcAdv3 * (int64_t)desc->srcRpt3);
+	}
+
+	for (i = 1U; i < 5U; i++) {
+		start += min(s[i]*bppSize, 0LL);
+		end += max(s[i]*bppSize, 0LL);
+	}
+
+	/* check for out of range access */
+	if (((int64_t) max_size) < 0) {
+		pr_err("max_size too large");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(((offset + start) >= 0)
+	    && ((offset + end) < (int64_t)max_size))) {
+		pr_err("ERROR: Out of range detected");
+		err = -EINVAL;
+	}
+
+	if (dst2) {
+		if ((max_size2 > UINT_MAX) || !(((offset2 + start) >= 0)
+		    && ((offset2 + end) < (int64_t)max_size2))) {
+			pr_err("ERROR: Out of range detected");
+			err = -EINVAL;
+		}
+	}
+out:
+	return err;
+}
+
+static int32_t
+patch_dma_desc_address(struct pva_submit_task *task,
+		      struct nvpva_dma_descriptor *umd_dma_desc,
+		      struct pva_dtd_s *dma_desc, u8 desc_id, bool is_misr)
+{
+	int32_t err = 0;
+	uint64_t addr_base = 0;
+
+	switch (umd_dma_desc->srcTransferMode) {
+	case DMA_DESC_SRC_XFER_L2RAM:
+		/*
+		 * PVA_HW_GEN1 has CVNAS RAM PVA_HW_GEN2 has L2SRAM CVNAS RAM
+		 * memory is pinned and needs conversion from pin ID -> IOVA
+		 * L2SRAM has memory offset which does not need conversion. The
+		 * same conversion is applied for dst
+		 */
+		if (task->pva->version == PVA_HW_GEN1) {
+			struct pva_pinned_memory *mem =
+				pva_task_pin_mem(task, umd_dma_desc->srcPtr);
+			if (IS_ERR(mem)) {
+				err = PTR_ERR(mem);
+				task_err(task,
+					"invalid memory handle in"
+					" descriptor for SRC CVSRAM");
+				goto out;
+			}
+
+			addr_base = mem->dma_addr;
+			err = check_address_range(umd_dma_desc,
+						  mem->size,
+						  0,
+						  false,
+						  false);
+		} else {
+			addr_base = 0;
+			if ((task->desc_hwseq_frm & (1ULL << desc_id)) == 0ULL)
+				err = check_address_range(umd_dma_desc,
+							  task->l2_alloc_size,
+							  0,
+							  false,
+							  false);
+		}
+
+		if (err)
+			goto out;
+
+		break;
+	case DMA_DESC_SRC_XFER_VMEM:{
+		/* calculate symbol address */
+		u32 addr = 0;
+		u32 size = 0;
+
+		if (umd_dma_desc->src_offset > U32_MAX) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		err = pva_get_sym_offset(&task->client->elf_ctx, task->exe_id,
+					 umd_dma_desc->srcPtr, &addr, &size);
+		if (err) {
+			err = -EINVAL;
+			task_err(
+			    task,
+			    "invalid symbol id in descriptor for src VMEM");
+			goto out;
+		}
+
+		err = check_address_range(umd_dma_desc,
+					  size,
+					  0,
+					  false,
+					  false);
+		if (err) {
+			err = -EINVAL;
+			task_err(
+				task, "ERROR: Invalid offset or address");
+			goto out;
+		}
+
+		addr_base = addr;
+		break;
+	}
+	case DMA_DESC_SRC_XFER_VPU_CONFIG: {
+		u32 addr = 0;
+		u32 size = 0;
+
+		/* dest must be null*/
+		if ((umd_dma_desc->dstPtr != NVPVA_INVALID_SYMBOL_ID)
+		   || (umd_dma_desc->dst2Ptr != NVPVA_INVALID_SYMBOL_ID)
+		   || (umd_dma_desc->src_offset > U32_MAX)) {
+			task_err(task, "ERROR: Invalid VPUC");
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* calculate symbol address */
+		/* TODO: check VPUC handling in ELF segment */
+		err = pva_get_sym_offset(&task->client->elf_ctx, task->exe_id,
+					 umd_dma_desc->srcPtr, &addr, &size);
+		if (err) {
+			task_err(task, "ERROR: Invalid offset or address");
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (err) {
+			task_err(task, "ERROR: Invalid offset or address");
+			goto out;
+		}
+
+		addr_base = addr;
+		break;
+	}
+	case DMA_DESC_SRC_XFER_MC: {
+		struct pva_pinned_memory *mem =
+			pva_task_pin_mem(task, umd_dma_desc->srcPtr);
+		if (IS_ERR(mem)) {
+			err = PTR_ERR(mem);
+			task_err(
+				task,
+				"invalid memory handle: descriptor: src MC");
+			goto out;
+		}
+		if ((task->desc_hwseq_frm & (1ULL << desc_id)) == 0ULL)
+			err = check_address_range(umd_dma_desc,
+						  mem->size,
+						  0,
+						  false,
+						  false);
+
+		if (err) {
+			err = -EINVAL;
+			task_err(task, "ERROR: address");
+			goto out;
+		}
+
+		addr_base = mem->dma_addr;
+		task->src_surf_base_addr = addr_base;
+
+		/** If BL format selected, set addr bit 39 to indicate */
+		/* XBAR_RAW swizzling is required */
+		addr_base |= (u64)umd_dma_desc->srcFormat << 39U;
+
+		break;
+	}
+	case DMA_DESC_SRC_XFER_R5TCM:
+		if (!task->is_system_app) {
+			err = -EFAULT;
+			goto out;
+		} else {
+			task->special_access = 1;
+			addr_base = 0;
+			break;
+		}
+	case DMA_DESC_SRC_XFER_MMIO:
+	case DMA_DESC_SRC_XFER_INVAL:
+	case DMA_DESC_SRC_XFER_RSVD:
+		task_err(task, "invalid src mode %d",
+			umd_dma_desc->srcTransferMode);
+		err = -EINVAL;
+		goto out;
+	default:
+		err = -EFAULT;
+		goto out;
+	}
+
+	addr_base += umd_dma_desc->src_offset;
+	dma_desc->src_adr0 = (uint32_t)(addr_base & 0xFFFFFFFFLL);
+	dma_desc->src_adr1 = (uint8_t)((addr_base >> 32U) & 0xFF);
+	if (umd_dma_desc->srcTransferMode ==
+		(uint8_t)DMA_DESC_SRC_XFER_VPU_CONFIG)
+		goto out;
+
+	addr_base = 0;
+	if (is_misr) {
+		if (umd_dma_desc->dstTransferMode == DMA_DESC_DST_XFER_L2RAM
+		    || umd_dma_desc->dstTransferMode == DMA_DESC_DST_XFER_MC) {
+			addr_base = umd_dma_desc->dstPtr;
+			goto done;
+		} else {
+			err = -EINVAL;
+			task_err(
+				task,
+				"invalid dst transfer mode for MISR descriptor");
+			goto out;
+		}
+	}
+
+	switch (umd_dma_desc->dstTransferMode) {
+	case DMA_DESC_DST_XFER_L2RAM:
+		if (task->pva->version == PVA_HW_GEN1) {
+			struct pva_pinned_memory *mem =
+				pva_task_pin_mem(task, umd_dma_desc->dstPtr);
+			if (IS_ERR(mem)) {
+				err = PTR_ERR(mem);
+				task_err(task,
+					"invalid memory handle in"
+					" descriptor for dst CVSRAM");
+				goto out;
+			}
+
+			addr_base = mem->dma_addr;
+			err = check_address_range(umd_dma_desc,
+						  mem->size,
+						  0,
+						  true,
+						  false);
+		} else {
+			addr_base = 0;
+			err = check_address_range(umd_dma_desc,
+						  task->l2_alloc_size,
+						  0,
+						  true,
+						  false);
+		}
+
+		if (err) {
+			task_err(task, "ERROR: Invalid offset or address");
+			err = -EINVAL;
+			goto out;
+		}
+
+		break;
+	case DMA_DESC_DST_XFER_VMEM: {
+		/* calculate symbol address */
+		u32 addr = 0;
+		u32 size = 0;
+		u32 addr2 = 0;
+		u32 size2 = 0;
+		bool check_size2 = false;
+
+		if ((umd_dma_desc->dst_offset > U32_MAX)
+		   || (umd_dma_desc->dst2Offset > U32_MAX)) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		err = pva_get_sym_offset(&task->client->elf_ctx, task->exe_id,
+					 umd_dma_desc->dstPtr, &addr, &size);
+		if (err) {
+			err = -EINVAL;
+			task_err(
+				task,
+				"invalid symbol id in descriptor for dst VMEM");
+			goto out;
+		}
+
+		if (umd_dma_desc->dst2Ptr != NVPVA_INVALID_SYMBOL_ID) {
+			err = pva_get_sym_offset(&task->client->elf_ctx,
+						 task->exe_id,
+						 umd_dma_desc->dst2Ptr,
+						 &addr2,
+						 &size2);
+
+			if (err) {
+				err = -EINVAL;
+				task_err(
+					task,
+					"invalid symbol id in descriptor "
+					"for dst2 VMEM");
+				goto out;
+			}
+
+			if ((addr2 + umd_dma_desc->dst2Offset) & 0x3F) {
+				task_err(task,
+					 "ERR: dst2Ptr/Offset not aligned");
+				err = -EINVAL;
+				goto out;
+			}
+
+			check_size2 = true;
+		}
+
+		err = check_address_range(umd_dma_desc,
+					  size,
+					  size2,
+					  true,
+					  check_size2);
+		if (err) {
+			err = -EINVAL;
+			task_err(
+				task, "ERROR: Invalid offset or address");
+			goto out;
+		}
+
+		addr_base = addr;
+		break;
+	}
+	case DMA_DESC_DST_XFER_MC: {
+		struct pva_pinned_memory *mem =
+			pva_task_pin_mem(task, umd_dma_desc->dstPtr);
+		if (IS_ERR(mem)) {
+			err = PTR_ERR(mem);
+			task_err(
+				task,
+				"invalid memory handle: descriptor: dst MC");
+			goto out;
+		}
+
+		err = check_address_range(umd_dma_desc,
+					  mem->size,
+					  0,
+					  true,
+					  false);
+		if (err) {
+			err = -EINVAL;
+			task_err(task, "ERROR: address");
+			goto out;
+		}
+
+		addr_base = mem->dma_addr;
+		task->dst_surf_base_addr = addr_base;
+
+		/* If BL format selected, set addr bit 39 to indicate */
+		/* XBAR_RAW swizzling is required */
+		addr_base |= (u64)umd_dma_desc->dstFormat << 39U;
+		break;
+	}
+	case DMA_DESC_DST_XFER_R5TCM:
+		if (!task->is_system_app) {
+			err = -EFAULT;
+			goto out;
+		} else {
+			task->special_access = 1;
+			addr_base = 0;
+			break;
+		}
+	case DMA_DESC_DST_XFER_MMIO:
+	case DMA_DESC_DST_XFER_INVAL:
+	case DMA_DESC_DST_XFER_RSVD1:
+	case DMA_DESC_DST_XFER_RSVD2:
+		task_err(task, "invalid dst mode %d",
+		umd_dma_desc->dstTransferMode);
+		err = -EINVAL;
+		goto out;
+	default:
+		err = -EFAULT;
+		goto out;
+	}
+done:
+	addr_base += umd_dma_desc->dst_offset;
+	dma_desc->dst_adr0 = (uint32_t)(addr_base & 0xFFFFFFFFLL);
+	dma_desc->dst_adr1 = (uint8_t)((addr_base >> 32U) & 0xFF);
+out:
+	return err;
+}
+
+static bool
+is_valid_vpu_trigger_mode(const struct nvpva_dma_descriptor *desc,
+			  u32 trigger_mode)
+{
+	bool valid = true;
+
+	if (desc->trigEventMode == 0U)
+		return valid;
+
+	switch ((enum nvpva_task_dma_trig_vpu_hw_events)
+					desc->trigVpuEvents) {
+		case TRIG_VPU_NO_TRIGGER:
+			if (trigger_mode != NVPVA_HWSEQTM_DMATRIG)
+				valid = false;
+
+			break;
+		case TRIG_VPU_CONFIG_START:
+			/** If trig = VPU configuration trigger,
+			 * the DSTM should be VPU configuration
+			 * mode (0x7)
+			 */
+			if (desc->srcTransferMode !=
+				(uint8_t) DMA_DESC_SRC_XFER_VPU_CONFIG) {
+				valid = false;
+			}
+			break;
+		case TRIG_VPU_DMA_READ0_START:
+		case TRIG_VPU_DMA_READ1_START:
+		case TRIG_VPU_DMA_READ2_START:
+		case TRIG_VPU_DMA_READ3_START:
+		case TRIG_VPU_DMA_READ4_START:
+		case TRIG_VPU_DMA_READ5_START:
+		case TRIG_VPU_DMA_READ6_START:
+			/* should be either vpu config  or write to VMEM */
+			valid = ((desc->srcTransferMode ==
+					(uint8_t)DMA_DESC_SRC_XFER_VPU_CONFIG)
+				|| (desc->dstTransferMode ==
+					(uint8_t)DMA_DESC_DST_XFER_VMEM));
+			break;
+		case TRIG_VPU_DMA_STORE0_START:
+		case TRIG_VPU_DMA_STORE1_START:
+		case TRIG_VPU_DMA_STORE2_START:
+		case TRIG_VPU_DMA_STORE3_START:
+		case TRIG_VPU_DMA_STORE4_START:
+		case TRIG_VPU_DMA_STORE5_START:
+		case TRIG_VPU_DMA_STORE6_START:
+			//should be either vpu config or read from VMEM
+			valid = ((desc->srcTransferMode ==
+					(uint8_t)DMA_DESC_SRC_XFER_VPU_CONFIG)
+				|| (desc->srcTransferMode ==
+					(uint8_t) DMA_DESC_SRC_XFER_VMEM));
+			break;
+		default:
+			valid = false;
+			break;
+	}
+
+	return valid;
+}
+
+static int32_t
+validate_descriptor(const struct nvpva_dma_descriptor *desc,
+		    u32 trigger_mode)
+{
+	uint32_t ret = 0;
+	int32_t retval = 0;
+
+	/** padding related validation */
+	if (desc->dstTransferMode == (uint8_t) DMA_DESC_DST_XFER_VMEM) {
+		ret |= ((desc->px != 0U) &&
+				(desc->px >= desc->tx)) ? 1UL : 0UL;
+
+		ret |= ((desc->py != 0U) &&
+				(desc->py >= desc->ty)) ? 1UL : 0UL;
+	}
+
+	/** Validate VPU trigger event config */
+	ret |= (is_valid_vpu_trigger_mode(desc, trigger_mode)) ? 0UL : 1UL;
+
+	/** Check src/dstADV values with respect to ECET bits */
+	ret |= (
+		(desc->trigEventMode == (uint8_t) TRIG_EVENT_MODE_DIM4)
+		&& ((desc->srcRpt1 == 0U) || (desc->srcRpt2 == 0U)
+			|| (desc->dstRpt1 == 0U) ||
+			(desc->dstRpt2 == 0U))) ? 1UL : 0UL;
+
+	ret |= (((desc->trigEventMode) == ((uint8_t)TRIG_EVENT_MODE_DIM3)) &&
+		((desc->srcRpt1 == 0U) || (desc->dstRpt1 == 0U))) ? 1UL : 0UL;
+
+	/** BL format should be associated with MC only */
+	if (desc->srcFormat == 1U) {
+		ret |= (!(desc->srcTransferMode ==
+			(uint8_t) DMA_DESC_SRC_XFER_MC)) ? 1UL : 0UL;
+	}
+
+	if (desc->dstFormat == 1U) {
+		ret |= (!(desc->dstTransferMode ==
+			(uint8_t) DMA_DESC_DST_XFER_MC)) ? 1UL : 0UL;
+	}
+
+	if (ret != 0U)
+		retval = -EINVAL;
+
+	return retval;
+}
+/* User to FW DMA descriptor structure mapping helper */
+/* TODO: Need to handle DMA descriptor like dst2ptr and dst2Offset */
+static int32_t nvpva_task_dma_desc_mapping(struct pva_submit_task *task,
+					   struct pva_hw_task *hw_task)
+{
+	struct nvpva_dma_descriptor *umd_dma_desc = NULL;
+	struct pva_dtd_s *dma_desc = NULL;
+	int32_t err = 0;
+	unsigned int desc_num;
+	uint32_t addr = 0U;
+	uint32_t size = 0;
+	bool is_misr;
+
+	task->special_access = 0;
+
+	for (desc_num = 0U; desc_num < task->num_dma_descriptors; desc_num++) {
+		umd_dma_desc = &task->dma_descriptors[desc_num];
+		dma_desc = &hw_task->dma_desc[desc_num];
+		is_misr = !((task->dma_misr_config.descriptor_mask
+			    & PVA_BIT64(desc_num)) == 0U);
+		is_misr = is_misr && (task->dma_misr_config.enable != 0U);
+
+		err = validate_descriptor(umd_dma_desc,
+					  task->hwseq_config.hwseqTrigMode);
+		if (err) {
+			task_err(
+			    task,
+			    "DMA descriptor validation falied");
+			goto out;
+		}
+
+		err = patch_dma_desc_address(task, umd_dma_desc, dma_desc,
+					     desc_num, is_misr);
+		if (err)
+			goto out;
+
+		/* DMA_DESC_TRANS CNTL0 */
+		dma_desc->transfer_control0 =
+			umd_dma_desc->srcTransferMode |
+			(umd_dma_desc->srcFormat << 3U) |
+			(umd_dma_desc->dstTransferMode << 4U) |
+			(umd_dma_desc->dstFormat << 7U);
+		/* DMA_DESC_TRANS CNTL1 */
+		dma_desc->transfer_control1 =
+			umd_dma_desc->bytePerPixel |
+			(umd_dma_desc->pxDirection << 2U) |
+			(umd_dma_desc->pyDirection << 3U) |
+			(umd_dma_desc->boundaryPixelExtension << 4U) |
+			(umd_dma_desc->transTrueCompletion << 7U);
+		/* DMA_DESC_TRANS CNTL2 */
+		if (umd_dma_desc->prefetchEnable &&
+		    (umd_dma_desc->tx == 0 || umd_dma_desc->ty == 0 ||
+		    umd_dma_desc->srcTransferMode != DMA_DESC_SRC_XFER_MC ||
+		    umd_dma_desc->dstTransferMode != DMA_DESC_DST_XFER_VMEM)) {
+			/* also ECET must be non zero */
+			task_err(task, " Invalid criteria to enable Prefetch");
+			return -EINVAL;
+		}
+		dma_desc->transfer_control2 =
+			umd_dma_desc->prefetchEnable |
+			(umd_dma_desc->dstCbEnable << 1U) |
+			(umd_dma_desc->srcCbEnable << 2U);
+
+		/*!
+		 * Block-linear surface offset. Only the surface in dram
+		 * can be block-linear.
+		 * BLBaseAddress = translate(srcPtr / dstPtr) + surfBLOffset;
+		 * transfer_control2.bit[3:7] = BLBaseAddress[1].bit[1:5]
+		 * GOB offset in BL mode and corresponds to surface address
+		 * bits [13:9]
+		 */
+		if ((umd_dma_desc->srcFormat == 1U)
+		   && (umd_dma_desc->srcTransferMode ==
+					DMA_DESC_SRC_XFER_MC)) {
+			task->src_surf_base_addr += umd_dma_desc->surfBLOffset;
+			dma_desc->transfer_control2 |=
+			    (u8)((task->src_surf_base_addr & 0x3E00) >> 6U);
+		} else if ((umd_dma_desc->dstFormat == 1U) &&
+				(umd_dma_desc->dstTransferMode ==
+					DMA_DESC_DST_XFER_MC)) {
+			task->dst_surf_base_addr += umd_dma_desc->surfBLOffset;
+			dma_desc->transfer_control2 |=
+			    (u8)((task->dst_surf_base_addr & 0x3E00) >> 6U);
+		}
+
+		if (umd_dma_desc->linkDescId > task->num_dma_descriptors) {
+			task_err(task, "invalid link ID");
+			return -EINVAL;
+		}
+
+		dma_desc->link_did = umd_dma_desc->linkDescId;
+
+		/* DMA_DESC_TX */
+		dma_desc->tx = umd_dma_desc->tx;
+		/* DMA_DESC_TY */
+		dma_desc->ty = umd_dma_desc->ty;
+		/* DMA_DESC_DLP_ADV */
+		dma_desc->dlp_adv = umd_dma_desc->dstLinePitch;
+		/* DMA_DESC_SLP_ADV */
+		dma_desc->slp_adv = umd_dma_desc->srcLinePitch;
+		/* DMA_DESC_DB_START */
+		dma_desc->db_start = umd_dma_desc->dstCbStart;
+		/* DMA_DESC_DB_SIZE */
+		dma_desc->db_size = umd_dma_desc->dstCbSize;
+		/* DMA_DESC_SB_START */
+		dma_desc->sb_start = umd_dma_desc->srcCbStart;
+		/* DMA_DESC_SB_SIZE */
+		dma_desc->sb_size = umd_dma_desc->srcCbSize;
+		/* DMA_DESC_TRIG_CH */
+		/* TODO: Need to handle this parameter */
+		dma_desc->trig_ch_events = 0U;
+		/* DMA_DESC_HW_SW_TRIG */
+		dma_desc->hw_sw_trig_events =
+			umd_dma_desc->trigEventMode |
+			(umd_dma_desc->trigVpuEvents << 2U) |
+			(umd_dma_desc->descReloadEnable << (8U + 4U));
+		/* DMA_DESC_PX */
+		dma_desc->px = (uint8_t)umd_dma_desc->px;
+		/* DMA_DESC_PY */
+		dma_desc->py = (uint8_t)umd_dma_desc->py;
+		/* DMA_DESC_FRDA */
+		if (umd_dma_desc->dst2Ptr != NVPVA_INVALID_SYMBOL_ID) {
+			err = pva_get_sym_offset(&task->client->elf_ctx,
+						 task->exe_id,
+						 umd_dma_desc->dst2Ptr,
+						 &addr,
+						 &size);
+			if (err) {
+				task_err(task,
+					 "invalid symbol id in descriptor");
+				goto out;
+			}
+
+			addr = addr + umd_dma_desc->dst2Offset;
+			dma_desc->frda |= ((addr >> 6U) & 0x3FFF);
+		}
+
+		/* DMA_DESC_NDTM_CNTL0 */
+		dma_desc->cb_ext = (((umd_dma_desc->srcCbStart >> 16) & 0x1) << 0)
+			| (((umd_dma_desc->dstCbStart >> 16) & 0x1) << 2)
+			| (((umd_dma_desc->srcCbSize >> 16) & 0x1) << 4)
+			| (((umd_dma_desc->dstCbSize >> 16) & 0x1) << 6);
+		/* DMA_DESC_NS1_ADV & DMA_DESC_ST1_ADV */
+		dma_desc->srcpt1_cntl =
+			(((umd_dma_desc->srcRpt1 & 0xFF) << 24U) |
+			 (umd_dma_desc->srcAdv1 & 0xFFFFFF));
+		/* DMA_DESC_ND1_ADV & DMA_DESC_DT1_ADV */
+		dma_desc->dstpt1_cntl =
+			(((umd_dma_desc->dstRpt1 & 0xFF) << 24U) |
+			 (umd_dma_desc->dstAdv1 & 0xFFFFFF));
+		/* DMA_DESC_NS2_ADV & DMA_DESC_ST2_ADV */
+		dma_desc->srcpt2_cntl =
+			(((umd_dma_desc->srcRpt2 & 0xFF) << 24U) |
+			 (umd_dma_desc->srcAdv2 & 0xFFFFFF));
+		/* DMA_DESC_ND2_ADV & DMA_DESC_DT2_ADV */
+		dma_desc->dstpt2_cntl =
+			(((umd_dma_desc->dstRpt2 & 0xFF) << 24U) |
+			 (umd_dma_desc->dstAdv2 & 0xFFFFFF));
+		/* DMA_DESC_NS3_ADV & DMA_DESC_ST3_ADV */
+		dma_desc->srcpt3_cntl =
+			(((umd_dma_desc->srcRpt3 & 0xFF) << 24U) |
+			 (umd_dma_desc->srcAdv3 & 0xFFFFFF));
+		/* DMA_DESC_ND3_ADV & DMA_DESC_DT3_ADV */
+		dma_desc->dstpt3_cntl =
+			(((umd_dma_desc->dstRpt3 & 0xFF) << 24U) |
+			 (umd_dma_desc->dstAdv3 & 0xFFFFFF));
+	}
+out:
+	return err;
+}
+
+static int
+verify_dma_desc_hwseq(struct pva_submit_task *task,
+		     struct nvpva_dma_channel *user_ch,
+		     struct pva_hw_sweq_blob_s *blob,
+		     u32 did)
+{
+	int err = 0;
+	u64 *desc_hwseq_frm = &task->desc_hwseq_frm;
+	struct nvpva_dma_descriptor *desc;
+
+	if ((did == 0U)
+	|| (did >= NVPVA_TASK_MAX_DMA_DESCRIPTORS)) {
+		pr_err("invalid Descritor ID");
+		err = -EINVAL;
+		goto out;
+	}
+
+	did = array_index_nospec((did - 1),
+				 NVPVA_TASK_MAX_DMA_DESCRIPTORS);
+
+	if ((*desc_hwseq_frm & (1ULL << did)) != 0ULL)
+		goto out;
+
+	*desc_hwseq_frm |= (1ULL << did);
+
+	desc = &task->dma_descriptors[did];
+
+	if ((desc->px != 0U)
+	 || (desc->py != 0U)
+	 || (desc->descReloadEnable != 0U)) {
+		pr_err("invalid descriptor padding");
+		err = -EINVAL;
+		goto out;
+	}
+
+	switch (desc->srcTransferMode) {
+	case DMA_DESC_SRC_XFER_VMEM:
+		if (((desc->dstTransferMode != DMA_DESC_DST_XFER_MC)
+		&& (desc->dstTransferMode != DMA_DESC_DST_XFER_L2RAM))
+		|| (desc->dstCbEnable == 1U)) {
+			pr_err("invalid dst transfer mode");
+			err = -EINVAL;
+		}
+		break;
+	case DMA_DESC_SRC_XFER_L2RAM:
+	case DMA_DESC_SRC_XFER_MC:
+		if ((desc->dstTransferMode != DMA_DESC_DST_XFER_VMEM)
+		|| (desc->srcCbEnable == 1U)) {
+			pr_err("invalid src transfer mode");
+			err = -EINVAL;
+		}
+		break;
+	case DMA_DESC_SRC_XFER_MMIO:
+	case DMA_DESC_SRC_XFER_INVAL:
+	case DMA_DESC_SRC_XFER_R5TCM:
+	case DMA_DESC_SRC_XFER_RSVD:
+	default:
+		pr_err("invalid dma desc transfer mode");
+		err = -EINVAL;
+		break;
+	}
+
+	if (err)
+		goto out;
+
+	if (user_ch->hwseqTxSelect != 1U)
+		goto out;
+
+	if (((desc->srcFormat == 1U)
+	|| (desc->dstFormat == 1U))
+	   && (blob->f_header.to == 0)) {
+		pr_err("invalid tile offset");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (user_ch->hwseqTraversalOrder == 0) {
+		if (((uint32_t)((uint32_t)desc->tx +
+				(uint32_t)blob->f_header.pad_l) > 0xFFFFU)
+		|| ((uint32_t)((uint32_t)desc->tx +
+				(uint32_t)blob->f_header.pad_r) > 0xFFFFU)) {
+			pr_err("invalid tx + pad x");
+			err = -EINVAL;
+		}
+	} else if (user_ch->hwseqTraversalOrder == 1) {
+		if (((uint32_t)((uint32_t)desc->ty +
+				(uint32_t)blob->f_header.pad_t) > 0xFFFFU)
+		|| ((uint32_t)((uint32_t)desc->ty +
+				(uint32_t)blob->f_header.pad_b) > 0xFFFFU)) {
+			pr_err("invalid ty + pad y");
+			err = -EINVAL;
+		}
+	} else {
+		pr_err("invalid traversal order");
+		err = -EINVAL;
+	}
+out:
+	return err;
+}
+
+static int
+verify_hwseq_blob(struct pva_submit_task *task,
+		  struct nvpva_dma_channel *user_ch,
+		  struct nvpva_dma_descriptor *decriptors,
+		  uint8_t *hwseqbuf_cpuva,
+		  int8_t ch_num)
+
+{
+	struct pva_hw_sweq_blob_s *blob;
+	struct pva_hwseq_desc_header_s *blob_desc;
+	struct pva_hwseq_cr_header_s *cr_header;
+	struct pva_hwseq_cr_header_s *end_addr;
+	u32 end = user_ch->hwseqEnd * 4;
+	u32 start = user_ch->hwseqStart * 4;
+	int err = 0;
+	u32 i;
+	u32 j;
+	u32 k;
+	u32 cr_count = 0;
+	u32 entry_size;
+	uintptr_t tmp_addr;
+
+	blob = (struct pva_hw_sweq_blob_s *)&hwseqbuf_cpuva[start];
+	end_addr = (struct pva_hwseq_cr_header_s *)&hwseqbuf_cpuva[end + 4];
+	cr_header = &blob->cr_header;
+	blob_desc = &blob->desc_header;
+
+	if ((end <= start)
+	   || (((end - start + 4U) < sizeof(*blob)))) {
+		pr_err("invalid size of HW sequencer blob");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (end > task->hwseq_config.hwseqBuf.size) {
+		pr_err("blob end greater than buffer size");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (is_desc_mode(blob->f_header.fid)) {
+		if (task->hwseq_config.hwseqTrigMode == NVPVA_HWSEQTM_DMATRIG) {
+			pr_err("dma master not allowed");
+			err = -EINVAL;
+		}
+
+		goto out;
+	}
+
+	if (!is_frame_mode(blob->f_header.fid)) {
+		pr_err("invalid addressing mode");
+		err = -EINVAL;
+		goto out;
+	}
+
+	cr_count = (blob->f_header.no_cr + 1U);
+	start += sizeof(blob->f_header);
+	end += 4;
+	for (i = 0; i < cr_count; i++) {
+		u32 num_descriptors = cr_header->dec + 1;
+		u32 num_desc_entries = (cr_header->dec + 2) / 2;
+
+		entry_size = num_desc_entries;
+		entry_size *= sizeof(struct pva_hwseq_desc_header_s);
+		entry_size += sizeof(struct pva_hwseq_cr_header_s);
+		if ((start + entry_size) > end) {
+			pr_err("row/column entries larger than blob");
+			err = -EINVAL;
+			goto out;
+		}
+
+		for (j = 0, k = 0; j < num_desc_entries; j++) {
+			err = verify_dma_desc_hwseq(task,
+						    user_ch,
+						    blob,
+						    blob_desc->did1);
+			if (err) {
+				pr_err("seq descriptor 1 verification failed");
+				goto out;
+			}
+
+			++k;
+			if (k >= num_descriptors) {
+				++blob_desc;
+				break;
+			}
+
+			err = verify_dma_desc_hwseq(task,
+						    user_ch,
+						    blob,
+						    blob_desc->did2);
+			if (err) {
+				pr_err("seq descriptor 2 verification failed");
+				goto out;
+			}
+
+			++blob_desc;
+		}
+
+		start += entry_size;
+		cr_header = (struct pva_hwseq_cr_header_s *)blob_desc;
+		tmp_addr = (uintptr_t)blob_desc + sizeof(*cr_header);
+		blob_desc = (struct pva_hwseq_desc_header_s *)tmp_addr;
+		if (cr_header > end_addr) {
+			pr_err("blob size smaller than entries");
+			err = -EINVAL;
+			goto out;
+		}
+	}
+out:
+	return err;
+}
+/* User to FW mapping for DMA channel */
+static int
+nvpva_task_dma_channel_mapping(struct pva_submit_task *task,
+			       struct pva_dma_ch_config_s *ch,
+			       u8 *hwseqbuf_cpuva,
+			       int8_t ch_num,
+			       int32_t hwgen,
+			       bool hwseq_in_use)
+
+{
+	struct nvpva_dma_channel *user_ch = &task->dma_channels[ch_num - 1];
+	struct nvpva_dma_descriptor *decriptors = task->dma_descriptors;
+	u32 adb_limit;
+	int err = 0;
+
+	if (((user_ch->descIndex > PVA_NUM_DYNAMIC_DESCS) ||
+	     ((user_ch->vdbSize + user_ch->vdbOffset) >
+	      PVA_NUM_DYNAMIC_VDB_BUFFS))) {
+		pr_err("ERR: Invalid Channel control data");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (hwgen == PVA_HW_GEN1)
+		adb_limit = PVA_NUM_DYNAMIC_ADB_BUFFS_T19X;
+	else
+		adb_limit = PVA_NUM_DYNAMIC_ADB_BUFFS_T23X;
+
+	if ((user_ch->adbSize + user_ch->adbOffset) > adb_limit) {
+		pr_err("ERR: Invalid ADB Buff size or offset");
+		err =  -EINVAL;
+		goto out;
+	}
+
+	/* DMA_CHANNEL_CNTL0_CHSDID: DMA_CHANNEL_CNTL0[0] = descIndex + 1;*/
+	ch->cntl0 = (((user_ch->descIndex + 1U) & 0xFFU) << 0U);
+
+	/* DMA_CHANNEL_CNTL0_CHVMEMOREQ */
+	ch->cntl0 |= ((user_ch->vdbSize & 0xFFU) << 8U);
+
+	/* DMA_CHANNEL_CNTL0_CHBH */
+	ch->cntl0 |= ((user_ch->adbSize & 0x1FFU) << 16U);
+
+	/* DMA_CHANNEL_CNTL0_CHAXIOREQ */
+	ch->cntl0 |= ((user_ch->blockHeight & 7U) << 25U);
+
+	/* DMA_CHANNEL_CNTL0_CHPREF */
+	ch->cntl0 |= ((user_ch->prefetchEnable & 1U) << 30U);
+
+	/* Enable DMA channel */
+	ch->cntl0 |= (0x1U << 31U);
+
+	/* DMA_CHANNEL_CNTL1_CHPWT */
+	ch->cntl1 = ((user_ch->reqPerGrant & 0x7U) << 2U);
+
+	/* DMA_CHANNEL_CNTL1_CHVDBSTART */
+	ch->cntl1 |= ((user_ch->vdbOffset & 0x7FU) << 16U);
+
+	/* DMA_CHANNEL_CNTL1_CHADBSTART */
+	if (hwgen == PVA_HW_GEN1)
+		ch->cntl1 |= ((user_ch->adbOffset & 0xFFU) << 24U);
+	else
+		ch->cntl1 |= ((user_ch->adbOffset & 0x1FFU) << 23U);
+
+	ch->boundary_pad = user_ch->padValue;
+	if (hwgen == PVA_HW_GEN1)
+		goto out;
+
+	/* Applicable only for T23x */
+
+	/* DMA_CHANNEL_CNTL1_CHREP */
+	if ((user_ch->chRepFactor) && (user_ch->chRepFactor != 6)) {
+		pr_err("ERR: Invalid replication factor");
+		err = -EINVAL;
+		goto out;
+	}
+
+	ch->cntl1 |= ((user_ch->chRepFactor & 0x7U) << 8U);
+
+	/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQSTART */
+	ch->hwseqcntl = ((user_ch->hwseqStart & 0xFFU) << 0U);
+
+	/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQEND */
+	ch->hwseqcntl |= ((user_ch->hwseqEnd & 0xFFU) << 12U);
+
+	/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTD */
+	ch->hwseqcntl |= ((user_ch->hwseqTriggerDone & 0x3U) << 24U);
+
+	/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTS */
+	ch->hwseqcntl |= ((user_ch->hwseqTxSelect & 0x1U) << 27U);
+
+	/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTO */
+	ch->hwseqcntl |= ((user_ch->hwseqTraversalOrder & 0x1U) << 30U);
+
+	/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQEN */
+	ch->hwseqcntl |= ((user_ch->hwseqEnable & 0x1U) << 31U);
+
+	if ((user_ch->hwseqEnable & 0x1U) && hwseq_in_use)
+		err = verify_hwseq_blob(task,
+					user_ch,
+					decriptors,
+					hwseqbuf_cpuva,
+					ch_num);
+
+out:
+	return err;
+}
+
+int pva_task_write_dma_info(struct pva_submit_task *task,
+			    struct pva_hw_task *hw_task)
+{
+	int err = 0;
+	u8 ch_num = 0L;
+	int hwgen = task->pva->version;
+	bool is_hwseq_mode = false;
+	struct pva_pinned_memory *mem;
+	u8 *hwseqbuf_cpuva = NULL;
+	u32 i;
+	u32 j;
+	u32 mask;
+	struct pva_dma_info_s *hw_task_dma_info;
+
+	hw_task_dma_info = &hw_task->dma_info_and_params_list.dma_info;
+
+	if (task->num_dma_descriptors == 0L || task->num_dma_channels == 0L) {
+		nvpva_dbg_info(task->pva, "pva: no DMA resources: NOOP mode");
+		goto out;
+	}
+
+	if (task->hwseq_config.hwseqBuf.pin_id != 0U) {
+		if (hwgen != PVA_HW_GEN2) {
+			/* HW sequencer is supported only in HW_GEN2 */
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* Ensure that HWSeq blob size is valid and within the
+		 * acceptable range, i.e. up to 1KB, as per HW Sequencer RAM
+		 * size from T23x DMA IAS doc.
+		 */
+		if ((task->hwseq_config.hwseqBuf.size == 0U) ||
+		    (task->hwseq_config.hwseqBuf.size > 1024U)) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		is_hwseq_mode = true;
+
+		/* Configure HWSeq trigger mode selection in DMA Configuration
+		 * Register
+		 */
+		hw_task_dma_info->dma_common_config |=
+			(task->hwseq_config.hwseqTrigMode & 0x1U) << 12U;
+
+		mem = pva_task_pin_mem(task,
+				       task->hwseq_config.hwseqBuf.pin_id);
+		if (IS_ERR(mem)) {
+			err = PTR_ERR(mem);
+			task_err(task, "failed to pin hwseq buffer");
+			goto out;
+		}
+
+		hwseqbuf_cpuva = pva_dmabuf_vmap(mem->dmabuf) +
+				 task->hwseq_config.hwseqBuf.offset;
+		hw_task_dma_info->dma_hwseq_base = mem->dma_addr +
+			task->hwseq_config.hwseqBuf.offset;
+		hw_task_dma_info->num_hwseq =
+			task->hwseq_config.hwseqBuf.size;
+	}
+
+	/* write dma channel info */
+	hw_task_dma_info->num_channels = task->num_dma_channels;
+	hw_task_dma_info->num_descriptors = task->num_dma_descriptors;
+	hw_task_dma_info->descriptor_id = 1U; /* PVA_DMA_DESC0 */
+	task->desc_hwseq_frm = 0ULL;
+
+	for (i = 0; i < task->num_dma_channels; i++) {
+		ch_num = i + 1; /* Channel 0 can't use */
+		err = nvpva_task_dma_channel_mapping(
+			task,
+			&hw_task_dma_info->dma_channels[i],
+			hwseqbuf_cpuva,
+			ch_num,
+			hwgen,
+			is_hwseq_mode);
+		if (err) {
+			task_err(task, "failed to map DMA channel info");
+			goto out;
+		}
+
+		/* Ensure that HWSEQCNTRL is zero for all dma channels in SW
+		 * mode
+		 */
+		if (!is_hwseq_mode &&
+		    (hw_task_dma_info->dma_channels[i].hwseqcntl != 0U)) {
+			task_err(task, "invalid HWSeq config in SW mode");
+			err = -EINVAL;
+			goto out;
+		}
+
+		hw_task_dma_info->dma_channels[i].ch_number = ch_num;
+		mask = task->dma_channels[i].outputEnableMask;
+		for (j = 0; j < 7; j++) {
+			u32 *trig = &(hw_task_dma_info->dma_triggers[j]);
+
+			(*trig) |= (((mask >> 2*j) & 1U) << ch_num);
+			(*trig) |= (((mask >> (2*j + 1)) & 1U) << (ch_num + 16U));
+		}
+
+		hw_task_dma_info->dma_triggers[7] |=
+			(((mask >> 14) & 1U) << ch_num);
+		if (hwgen == PVA_HW_GEN2) {
+			u32 *trig = &(hw_task_dma_info->dma_triggers[8]);
+
+			(*trig) |= (((mask >> 15) & 1U) << ch_num);
+			(*trig) |= (((mask >> 16) & 1U) << (ch_num + 16U));
+		}
+	}
+
+	err = nvpva_task_dma_desc_mapping(task, hw_task);
+	if (err) {
+		task_err(task, "failed to map DMA desc info");
+		goto out;
+	}
+
+	hw_task->task.dma_info =
+		task->dma_addr + offsetof(struct pva_hw_task, dma_info_and_params_list)
+		+ offsetof(struct pva_dma_info_and_params_list_s, dma_info);
+	hw_task_dma_info->dma_descriptor_base =
+		task->dma_addr + offsetof(struct pva_hw_task, dma_desc);
+
+	hw_task_dma_info->dma_info_version = PVA_DMA_INFO_VERSION_ID;
+	hw_task_dma_info->dma_info_size = sizeof(struct pva_dma_info_s);
+out:
+	if (hwseqbuf_cpuva != NULL)
+		pva_dmabuf_vunmap(mem->dmabuf, hwseqbuf_cpuva);
+
+	return err;
+}
+
+int pva_task_write_dma_misr_info(struct pva_submit_task *task,
+				 struct pva_hw_task *hw_task)
+{
+	struct pva_dma_info_s *hw_task_dma_info;
+	uint32_t common_config;
+	// MISR channel mask bits in DMA COMMON CONFIG
+	uint32_t common_config_ch_mask = PVA_MASK(31, 16);
+	// AXI output enable bit in DMA COMMON CONFIG
+	uint32_t common_config_ao_enable_mask = PVA_BIT(15U);
+	// SW Event select bit in DMA COMMON CONFIG
+	uint32_t common_config_sw_event0 = PVA_BIT(5U);
+	// MISR TO interrupt enable bit in DMA COMMON CONFIG
+	uint32_t common_config_misr_to_enable_mask = PVA_BIT(0U);
+
+	hw_task_dma_info = &hw_task->dma_info_and_params_list.dma_info;
+	common_config = hw_task_dma_info->dma_common_config;
+
+	hw_task_dma_info->dma_misr_base = 0U;
+	if (task->dma_misr_config.enable != 0U) {
+		hw_task->dma_misr_config.ref_addr   =
+			task->dma_misr_config.ref_addr;
+		hw_task->dma_misr_config.seed_crc0  =
+			task->dma_misr_config.seed_crc0;
+		hw_task->dma_misr_config.ref_data_1 =
+			task->dma_misr_config.ref_data_1;
+		hw_task->dma_misr_config.seed_crc1  =
+			task->dma_misr_config.seed_crc1;
+		hw_task->dma_misr_config.ref_data_2 =
+			task->dma_misr_config.ref_data_2;
+		hw_task->dma_misr_config.misr_timeout =
+			task->dma_misr_config.misr_timeout;
+
+		hw_task_dma_info->dma_misr_base = task->dma_addr +
+			offsetof(struct pva_hw_task, dma_misr_config);
+
+		/* Prepare data to be written to DMA COMMON CONFIG register */
+
+		// Select channels that will participate in MISR computation
+		common_config = ((common_config & ~common_config_ch_mask)
+				 | (~task->dma_misr_config.channel_mask << 16U));
+		// Set SW_EVENT0 bit to 0
+		common_config = (common_config & ~common_config_sw_event0);
+		// Disable AXI output
+		common_config = common_config & ~common_config_ao_enable_mask;
+		// common_config = common_config | common_config_ao_enable_mask;
+		// Enable MISR TO interrupts
+		common_config = common_config | common_config_misr_to_enable_mask;
+
+		hw_task_dma_info->dma_common_config = common_config;
+	}
+
+	return 0;
+}
diff --git a/drivers/video/tegra/host/pva/pva_dma.h b/drivers/video/tegra/host/pva/pva_dma.h
new file mode 100644
index 00000000..1fe589b4
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_dma.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_DMA_H
+#define PVA_DMA_H
+
+#include "pva_queue.h"
+
+enum nvpva_task_dma_trig_vpu_hw_events {
+	TRIG_VPU_NO_TRIGGER,
+	TRIG_VPU_DMA_READ0_START,
+	TRIG_VPU_DMA_STORE0_START,
+	TRIG_VPU_CONFIG_START,
+	TRIG_VPU_DMA_READ1_START,
+	TRIG_VPU_DMA_STORE1_START,
+	TRIG_VPU_DMA_READ2_START,
+	TRIG_VPU_DMA_STORE2_START,
+	TRIG_VPU_DMA_READ3_START,
+	TRIG_VPU_DMA_STORE3_START,
+	TRIG_VPU_DMA_READ4_START,
+	TRIG_VPU_DMA_STORE4_START,
+	TRIG_VPU_DMA_READ5_START,
+	TRIG_VPU_DMA_STORE5_START,
+	TRIG_VPU_DMA_READ6_START,
+	TRIG_VPU_DMA_STORE6_START
+};
+
+enum nvpva_dma_trig_event_mode {
+	TRIG_EVENT_MODE_DISABLED,
+	TRIG_EVENT_MODE_DIM4,
+	TRIG_EVENT_MODE_DIM3,
+	TRIG_EVENT_MODE_TILE
+};
+
+enum nvpva_task_dma_src_xfer_mode {
+	DMA_DESC_SRC_XFER_INVAL = 0U,
+	DMA_DESC_SRC_XFER_MC = 1U,
+	DMA_DESC_SRC_XFER_VMEM = 2U,
+	DMA_DESC_SRC_XFER_L2RAM = 3U,
+	DMA_DESC_SRC_XFER_R5TCM = 4U,
+	DMA_DESC_SRC_XFER_MMIO = 5U,
+	DMA_DESC_SRC_XFER_RSVD = 6U,
+	DMA_DESC_SRC_XFER_VPU_CONFIG = 7U,
+};
+
+enum nvpva_task_dma_dst_xfer_mode {
+	DMA_DESC_DST_XFER_INVAL = 0U,
+	DMA_DESC_DST_XFER_MC = 1U,
+	DMA_DESC_DST_XFER_VMEM = 2U,
+	DMA_DESC_DST_XFER_L2RAM = 3U,
+	DMA_DESC_DST_XFER_R5TCM = 4U,
+	DMA_DESC_DST_XFER_MMIO = 5U,
+	DMA_DESC_DST_XFER_RSVD1 = 6U,
+	DMA_DESC_DST_XFER_RSVD2 = 7U,
+};
+
+/* signals generated by channel */
+enum pva_dma_chan_sig {
+	PVA_DMA_READ0 = 0x0001,
+	PVA_DMA_STORE0 = 0x0002,
+	PVA_DMA_READ1 = 0x0004,
+	PVA_DMA_STORE1 = 0x0008,
+	PVA_DMA_READ2 = 0x0010,
+	PVA_DMA_STORE2 = 0x0020,
+	PVA_DMA_READ3 = 0x0040,
+	PVA_DMA_STORE3 = 0x0080,
+	PVA_DMA_READ4 = 0x0100,
+	PVA_DMA_STORE4 = 0x0200,
+	PVA_DMA_READ5 = 0x0400,
+	PVA_DMA_STORE5 = 0x0800,
+	PVA_DMA_READ6 = 0x1000,
+	PVA_DMA_STORE6 = 0x2000,
+	PVA_VPUCONFIG = 0x4000,
+	PVA_HWSEQ_VPUREAD_START = 0x8000,
+	PVA_HWSEQ_VPUWRITE_START = 0x10000
+};
+
+int pva_task_write_dma_info(struct pva_submit_task *task,
+			    struct pva_hw_task *hw_task);
+
+int pva_task_write_dma_misr_info(struct pva_submit_task *task,
+			    struct pva_hw_task *hw_task);
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_fw_carveout.c b/drivers/video/tegra/host/pva/pva_fw_carveout.c
new file mode 100644
index 00000000..3949bd2e
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_fw_carveout.c
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * PVA carveout handling
+ *
+ * Copyright (c) 2022, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/iommu.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/version.h>
+#include <linux/of_reserved_mem.h>
+#include "pva.h"
+#include "pva_fw_carveout.h"
+
+struct nvpva_carveout_info *pva_fw_co_get_info(struct pva *pva)
+{
+	struct device_node *np;
+	const char *status = NULL;
+	u32 reg[4] = {0};
+
+	np = of_find_compatible_node(NULL, NULL, "nvidia,pva-carveout");
+	if (np == NULL) {
+		dev_err(&pva->pdev->dev, "find node failed\n");
+		goto err_out;
+	}
+
+	if (of_property_read_string(np, "status", &status)) {
+		dev_err(&pva->pdev->dev, "read status failed\n");
+		goto err_out;
+	}
+
+	if (strcmp(status, "okay")) {
+		dev_err(&pva->pdev->dev, "status %s  compare failed\n", status);
+		goto err_out;
+	}
+
+	if (of_property_read_u32_array(np, "reg", reg, 4)) {
+		dev_err(&pva->pdev->dev, "reaf_32_array failed\n");
+		goto err_out;
+	}
+
+	pva->fw_carveout.base = ((u64)reg[0] << 32 | (u64)reg[1]);
+	pva->fw_carveout.size = ((u64)reg[2] << 32 | (u64)reg[3]);
+	pva->fw_carveout.base_va = 0;
+	pva->fw_carveout.base_pa = 0;
+	pva->fw_carveout.initialized = true;
+
+	nvpva_dbg_fn(pva, "get co success\n");
+
+	return &pva->fw_carveout;
+err_out:
+	dev_err(&pva->pdev->dev, "get co fail\n");
+	pva->fw_carveout.initialized = false;
+
+	return NULL;
+}
+
+bool pva_fw_co_initialized(struct pva *pva)
+{
+	return pva->fw_carveout.initialized;
+}
diff --git a/drivers/video/tegra/host/pva/pva_fw_carveout.h b/drivers/video/tegra/host/pva/pva_fw_carveout.h
new file mode 100644
index 00000000..860a7ccb
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_fw_carveout.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * PVA carveout handling
+ *
+ * Copyright (c) 2022, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_FW_CO_H
+#define PVA_FW_CO_H
+
+struct nvpva_carveout_info {
+	dma_addr_t	base;
+	dma_addr_t	base_pa;
+	void		*base_va;
+	size_t		size;
+	bool		initialized;
+};
+
+struct nvpva_carveout_info *pva_fw_co_get_info(struct pva *pva);
+bool pva_fw_co_initialized(struct pva *pva);
+
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_hwseq.h b/drivers/video/tegra/host/pva/pva_hwseq.h
new file mode 100644
index 00000000..f8fc60df
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_hwseq.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_HWSEQ_H
+#define PVA_HWSEQ_H
+
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+
+#define PVA_HWSEQ_FRAME_ADDR	0xC0DE
+#define PVA_HWSEQ_DESC_ADDR	0xDEAD
+
+struct pva_hwseq_frame_header_s {
+	u16	fid;
+	u8	fr;
+	u8	no_cr;
+	u16	to;
+	u16	fo;
+	u8	pad_r;
+	u8	pad_t;
+	u8	pad_l;
+	u8	pad_b;
+} __packed;
+
+struct pva_hwseq_cr_header_s {
+	u8	dec;
+	u8	crr;
+	u16	cro;
+} __packed;
+
+struct pva_hwseq_desc_header_s {
+	u8	did1;
+	u8	dr1;
+	u8	did2;
+	u8	dr2;
+} __packed;
+
+struct pva_hw_sweq_blob_s {
+	struct pva_hwseq_frame_header_s f_header;
+	struct pva_hwseq_cr_header_s cr_header;
+	struct pva_hwseq_desc_header_s desc_header;
+} __packed;
+
+static inline bool is_frame_mode(u16 id)
+{
+	return (id == PVA_HWSEQ_FRAME_ADDR);
+}
+
+static inline bool is_desc_mode(u16 id)
+{
+	return (id == PVA_HWSEQ_DESC_ADDR);
+}
+#endif
+
diff --git a/drivers/video/tegra/host/pva/pva_interface_regs_t19x.c b/drivers/video/tegra/host/pva/pva_interface_regs_t19x.c
new file mode 100644
index 00000000..6ba68c4f
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_interface_regs_t19x.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
+#include <soc/tegra/chip-id.h>
+#else
+#include <soc/tegra/fuse.h>
+#endif
+#include <linux/platform_device.h>
+#include <linux/nvhost.h>
+
+#include "pva.h"
+#include "pva_mailbox.h"
+#include "pva_interface_regs_t19x.h"
+
+static struct pva_status_interface_registers t19x_status_regs[NUM_INTERFACES_T19X] = {
+	{
+		{
+			PVA_CCQ_STATUS3_REG,
+			PVA_CCQ_STATUS4_REG,
+			PVA_CCQ_STATUS5_REG,
+			PVA_CCQ_STATUS6_REG,
+			PVA_CCQ_STATUS7_REG
+		}
+	},
+};
+
+void read_status_interface_t19x(struct pva *pva,
+				uint32_t interface_id, u32 isr_status,
+				struct pva_cmd_status_regs *status_output)
+{
+	int i;
+	uint32_t *status_registers;
+
+	status_registers = t19x_status_regs[interface_id].registers;
+
+	for (i = 0; i < PVA_CMD_STATUS_REGS; i++) {
+		if (isr_status & (PVA_VALID_STATUS3 << i)) {
+			status_output->status[i] = host1x_readl(pva->pdev,
+						    status_registers[i]);
+			if ((i == 0) && (isr_status & PVA_CMD_ERROR)) {
+				status_output->error =
+					PVA_GET_ERROR_CODE(
+						status_output->status[i]);
+			}
+		}
+	}
+}
diff --git a/drivers/video/tegra/host/pva/pva_interface_regs_t19x.h b/drivers/video/tegra/host/pva/pva_interface_regs_t19x.h
new file mode 100644
index 00000000..2d887697
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_interface_regs_t19x.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __PVA_INTERFACE_REGS_T19X_H__
+#define __PVA_INTERFACE_REGS_T19X_H__
+
+#include "pva.h"
+#include "pva_mailbox.h"
+
+#define NUM_INTERFACES_T19X     1
+
+#define PVA_CCQ_STATUS3_REG     0x7200c
+#define PVA_CCQ_STATUS4_REG     0x72010
+#define PVA_CCQ_STATUS5_REG     0x72014
+#define PVA_CCQ_STATUS6_REG     0x72018
+#define PVA_CCQ_STATUS7_REG     0x7201c
+
+void read_status_interface_t19x(struct pva *pva,
+				uint32_t interface_id, u32 isr_status,
+				struct pva_cmd_status_regs *status_output);
+
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_interface_regs_t23x.c b/drivers/video/tegra/host/pva/pva_interface_regs_t23x.c
new file mode 100644
index 00000000..af072343
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_interface_regs_t23x.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016-2022, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
+#include <soc/tegra/chip-id.h>
+#else
+#include <soc/tegra/fuse.h>
+#endif
+#include <linux/platform_device.h>
+#include <linux/nvhost.h>
+
+#include "pva.h"
+#include "pva_mailbox.h"
+#include "pva_interface_regs_t23x.h"
+
+static struct pva_status_interface_registers t23x_status_regs[NUM_INTERFACES_T23X] = {
+	{
+		{
+			PVA_EMPTY_STATUS_REG,
+			PVA_MBOX_STATUS4_REG,
+			PVA_MBOX_STATUS5_REG,
+			PVA_MBOX_STATUS6_REG,
+			PVA_MBOX_STATUS7_REG
+		}
+	},
+	{
+		{
+			PVA_EMPTY_STATUS_REG,
+			PVA_CCQ0_STATUS3_REG,
+			PVA_CCQ0_STATUS4_REG,
+			PVA_CCQ0_STATUS5_REG,
+			PVA_CCQ0_STATUS6_REG
+		}
+	},
+	{
+		{
+			PVA_EMPTY_STATUS_REG,
+			PVA_CCQ1_STATUS3_REG,
+			PVA_CCQ1_STATUS4_REG,
+			PVA_CCQ1_STATUS5_REG,
+			PVA_CCQ1_STATUS6_REG
+		}
+	},
+	{
+		{
+			PVA_EMPTY_STATUS_REG,
+			PVA_CCQ2_STATUS3_REG,
+			PVA_CCQ2_STATUS4_REG,
+			PVA_CCQ2_STATUS5_REG,
+			PVA_CCQ2_STATUS6_REG
+		}
+	},
+	{
+		{
+			PVA_EMPTY_STATUS_REG,
+			PVA_CCQ3_STATUS3_REG,
+			PVA_CCQ3_STATUS4_REG,
+			PVA_CCQ3_STATUS5_REG,
+			PVA_CCQ3_STATUS6_REG
+		}
+	},
+	{
+		{
+			PVA_EMPTY_STATUS_REG,
+			PVA_CCQ4_STATUS3_REG,
+			PVA_CCQ4_STATUS4_REG,
+			PVA_CCQ4_STATUS5_REG,
+			PVA_CCQ4_STATUS6_REG
+		}
+	},
+	{
+		{
+			PVA_EMPTY_STATUS_REG,
+			PVA_CCQ5_STATUS3_REG,
+			PVA_CCQ5_STATUS4_REG,
+			PVA_CCQ5_STATUS5_REG,
+			PVA_CCQ5_STATUS6_REG
+		}
+	},
+	{
+		{
+			PVA_EMPTY_STATUS_REG,
+			PVA_CCQ6_STATUS3_REG,
+			PVA_CCQ6_STATUS4_REG,
+			PVA_CCQ6_STATUS5_REG,
+			PVA_CCQ6_STATUS6_REG
+		}
+	},
+	{
+		{
+			PVA_EMPTY_STATUS_REG,
+			PVA_CCQ7_STATUS3_REG,
+			PVA_CCQ7_STATUS4_REG,
+			PVA_CCQ7_STATUS5_REG,
+			PVA_CCQ7_STATUS6_REG
+		}
+	}
+};
+
+
+void read_status_interface_t23x(struct pva *pva,
+				uint32_t interface_id, u32 isr_status,
+				struct pva_cmd_status_regs *status_output)
+{
+	int i;
+	u32 valid_status = PVA_VALID_STATUS3;
+	uint32_t *status_registers;
+	status_registers = t23x_status_regs[interface_id].registers;
+	if (isr_status & PVA_CMD_ERROR) {
+		status_output->error = PVA_GET_ERROR_CODE(isr_status);
+	}
+	if (isr_status & PVA_VALID_STATUS3) {
+		status_output->status[0] = PVA_GET_ERROR_CODE(isr_status);
+	}
+	for (i = 1; i < PVA_CMD_STATUS_REGS; i++) {
+		valid_status = valid_status << 1;
+		if (isr_status & valid_status) {
+			status_output->status[i] = host1x_readl(pva->pdev,
+							status_registers[i]);
+		}
+	}
+
+}
diff --git a/drivers/video/tegra/host/pva/pva_interface_regs_t23x.h b/drivers/video/tegra/host/pva/pva_interface_regs_t23x.h
new file mode 100644
index 00000000..5ac6562f
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_interface_regs_t23x.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2016-2019, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __PVA_INTERFACE_REGS_T23X_H__
+#define __PVA_INTERFACE_REGS_T23X_H__
+
+#include "pva.h"
+
+#define NUM_INTERFACES_T23X	9
+
+#define PVA_EMPTY_STATUS_REG	0
+
+#define PVA_MBOX_STATUS4_REG	0x178000
+#define PVA_MBOX_STATUS5_REG	0x180000
+#define PVA_MBOX_STATUS6_REG	0x188000
+#define PVA_MBOX_STATUS7_REG	0x190000
+
+#define PVA_CCQ0_STATUS3_REG    0x260010
+#define PVA_CCQ0_STATUS4_REG	0x260014
+#define PVA_CCQ0_STATUS5_REG	0x260018
+#define PVA_CCQ0_STATUS6_REG	0x26001c
+
+#define PVA_CCQ1_STATUS3_REG    0x270010
+#define PVA_CCQ1_STATUS4_REG	0x270014
+#define PVA_CCQ1_STATUS5_REG	0x270018
+#define PVA_CCQ1_STATUS6_REG	0x27001c
+
+#define PVA_CCQ2_STATUS3_REG    0x280010
+#define PVA_CCQ2_STATUS4_REG	0x280014
+#define PVA_CCQ2_STATUS5_REG	0x280018
+#define PVA_CCQ2_STATUS6_REG	0x28001c
+
+#define PVA_CCQ3_STATUS3_REG    0x290010
+#define PVA_CCQ3_STATUS4_REG	0x290014
+#define PVA_CCQ3_STATUS5_REG	0x290018
+#define PVA_CCQ3_STATUS6_REG	0x29001c
+
+#define PVA_CCQ4_STATUS3_REG    0x2a0010
+#define PVA_CCQ4_STATUS4_REG	0x2a0014
+#define PVA_CCQ4_STATUS5_REG	0x2a0018
+#define PVA_CCQ4_STATUS6_REG	0x2a001c
+
+#define PVA_CCQ5_STATUS3_REG    0x2b0010
+#define PVA_CCQ5_STATUS4_REG	0x2b0014
+#define PVA_CCQ5_STATUS5_REG	0x2b0018
+#define PVA_CCQ5_STATUS6_REG	0x2b001c
+
+#define PVA_CCQ6_STATUS3_REG    0x2c0010
+#define PVA_CCQ6_STATUS4_REG	0x2c0014
+#define PVA_CCQ6_STATUS5_REG	0x2c0018
+#define PVA_CCQ6_STATUS6_REG	0x2c001c
+
+#define PVA_CCQ7_STATUS3_REG    0x2d0010
+#define PVA_CCQ7_STATUS4_REG	0x2d0014
+#define PVA_CCQ7_STATUS5_REG	0x2d0018
+#define PVA_CCQ7_STATUS6_REG	0x2d001c
+
+void read_status_interface_t23x(struct pva *pva,
+				uint32_t interface_id, u32 isr_status,
+				struct pva_cmd_status_regs *status_output);
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_ioctl.c b/drivers/video/tegra/host/pva/pva_ioctl.c
new file mode 100644
index 00000000..52e59e57
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_ioctl.c
@@ -0,0 +1,1115 @@
+/*
+ * Copyright (c) 2016-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/err.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/nvhost.h>
+#include <linux/semaphore.h>
+#include <linux/platform_device.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/circ_buf.h>
+#include <linux/nospec.h>
+#include <asm/ioctls.h>
+#include <asm/barrier.h>
+#include <linux/kref.h>
+#include <uapi/linux/nvdev_fence.h>
+#include <uapi/linux/nvpva_ioctl.h>
+#include <linux/firmware.h>
+
+#include "pva.h"
+#include "pva_queue.h"
+#include "nvpva_buffer.h"
+#include "pva_vpu_exe.h"
+#include "pva_vpu_app_auth.h"
+#include "pva_system_allow_list.h"
+#include "nvpva_client.h"
+/**
+ * @brief pva_private - Per-fd specific data
+ *
+ * pdev		Pointer the pva device
+ * queue	Pointer the struct nvpva_queue
+ * buffer	Pointer to the struct nvpva_buffer
+ */
+struct pva_private {
+	struct pva *pva;
+	struct nvpva_queue *queue;
+	struct pva_cb *vpu_print_buffer;
+	struct nvpva_client_context *client;
+};
+
+static int copy_part_from_user(void *kbuffer, size_t kbuffer_size,
+			       struct nvpva_ioctl_part part)
+{
+	int err = 0;
+	int copy_ret;
+
+	if (part.size == 0)
+		goto out;
+
+	if (kbuffer_size < part.size) {
+		pr_err("pva: failed to copy from user due to size too large: %llu > %lu",
+		       part.size, kbuffer_size);
+		err = -EINVAL;
+		goto out;
+	}
+	copy_ret =
+		copy_from_user(kbuffer, (void __user *)part.addr, part.size);
+	if (copy_ret) {
+		err = -EFAULT;
+		goto out;
+	}
+out:
+	return err;
+}
+
+static struct pva_cb *pva_alloc_cb(struct device *dev, uint32_t size)
+{
+	int err;
+	struct pva_cb *cb;
+
+	if ((size == 0) || (((size - 1) & size) != 0)) {
+		dev_err(dev, "invalid circular buffer size: %u; it must be 2^N.", size);
+		err = -EINVAL;
+		goto out;
+	}
+
+	cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+	if (IS_ERR_OR_NULL(cb)) {
+		err = PTR_ERR(cb);
+		goto out;
+	}
+
+	cb->size = size;
+	cb->buffer_va =
+		dma_alloc_coherent(dev, cb->size, &cb->buffer_addr, GFP_KERNEL);
+
+	if (IS_ERR_OR_NULL(cb->buffer_va)) {
+		err = PTR_ERR(cb->buffer_va);
+		goto free_mem;
+	}
+
+	cb->head_va = dma_alloc_coherent(dev, sizeof(uint32_t), &cb->head_addr,
+					 GFP_KERNEL);
+	if (IS_ERR_OR_NULL(cb->head_va)) {
+		err = PTR_ERR(cb->head_va);
+		goto free_buffer;
+	}
+
+	cb->tail_va = dma_alloc_coherent(dev, sizeof(uint32_t), &cb->tail_addr,
+					 GFP_KERNEL);
+	if (IS_ERR_OR_NULL(cb->tail_va)) {
+		err = PTR_ERR(cb->tail_va);
+		goto free_head;
+	}
+
+	cb->err_va = dma_alloc_coherent(dev, sizeof(uint32_t), &cb->err_addr,
+					 GFP_KERNEL);
+	if (IS_ERR_OR_NULL(cb->err_va)) {
+		err = PTR_ERR(cb->err_va);
+		goto free_tail;
+	}
+
+	*cb->head_va = 0;
+	cb->tail = 0;
+	*cb->tail_va = cb->tail;
+	*cb->err_va = 0;
+	return cb;
+
+free_tail:
+	dma_free_coherent(dev, sizeof(uint32_t), cb->tail_va, cb->tail_addr);
+free_head:
+	dma_free_coherent(dev, sizeof(uint32_t), cb->head_va, cb->head_addr);
+free_buffer:
+	dma_free_coherent(dev, cb->size, cb->buffer_va, cb->buffer_addr);
+free_mem:
+	kfree(cb);
+out:
+	return ERR_PTR(err);
+}
+
+static void pva_free_cb(struct device *dev, struct pva_cb *cb)
+{
+	dma_free_coherent(dev, sizeof(uint32_t), cb->tail_va, cb->tail_addr);
+	dma_free_coherent(dev, sizeof(uint32_t), cb->head_va, cb->head_addr);
+	dma_free_coherent(dev, sizeof(uint32_t), cb->err_va, cb->err_addr);
+	dma_free_coherent(dev, cb->size, cb->buffer_va, cb->buffer_addr);
+	kfree(cb);
+}
+
+
+/**
+ * @brief	Copy a single task from userspace to kernel space
+ *
+ * This function copies fields from ioctl_task and performs a deep copy
+ * of the task to kernel memory. At the same time, input values shall
+ * be validated. This allows using all the fields without manually performing
+ * copies of the structure and performing checks later.
+ *
+ * @param ioctl_task	Pointer to a userspace task that is copied
+ *				to kernel memory
+ * @param task		Pointer to a task that should be created
+ * @return		0 on Success or negative error code
+ *
+ */
+static int pva_copy_task(struct nvpva_ioctl_task *ioctl_task,
+			 struct pva_submit_task *task)
+{
+	int err = 0;
+	u32 i;
+	struct pva_elf_image *image = NULL;
+
+	nvpva_dbg_fn(task->pva, "");
+	/*
+	 * These fields are clear-text in the task descriptor. Just
+	 * copy them.
+	 */
+	task->exe_id = ioctl_task->exe_id;
+	task->l2_alloc_size = ioctl_task->l2_alloc_size;
+	task->symbol_payload_size = ioctl_task->symbol_payload.size;
+	task->flags = ioctl_task->flags;
+	if (task->exe_id < NVPVA_NOOP_EXE_ID)
+		image = get_elf_image(&task->client->elf_ctx, task->exe_id);
+
+	task->is_system_app = (image != NULL) && image->is_system_app;
+
+#define IOCTL_ARRAY_SIZE(field_name)                                           \
+	(ioctl_task->field_name.size / sizeof(task->field_name[0]))
+
+	task->num_prefences = IOCTL_ARRAY_SIZE(prefences);
+	task->num_user_fence_actions = IOCTL_ARRAY_SIZE(user_fence_actions);
+	task->num_input_task_status = IOCTL_ARRAY_SIZE(input_task_status);
+	task->num_output_task_status = IOCTL_ARRAY_SIZE(output_task_status);
+	task->num_dma_descriptors = IOCTL_ARRAY_SIZE(dma_descriptors);
+	task->num_dma_channels = IOCTL_ARRAY_SIZE(dma_channels);
+	task->num_symbols = IOCTL_ARRAY_SIZE(symbols);
+
+#undef IOCTL_ARRAY_SIZE
+
+	err = copy_part_from_user(&task->prefences, sizeof(task->prefences),
+				  ioctl_task->prefences);
+	if (err)
+		goto out;
+
+	err = copy_part_from_user(&task->user_fence_actions,
+				  sizeof(task->user_fence_actions),
+				  ioctl_task->user_fence_actions);
+	if (err)
+		goto out;
+
+	err = copy_part_from_user(&task->input_task_status,
+				  sizeof(task->input_task_status),
+				  ioctl_task->input_task_status);
+	if (err)
+		goto out;
+
+	err = copy_part_from_user(&task->output_task_status,
+				  sizeof(task->output_task_status),
+				  ioctl_task->output_task_status);
+	if (err)
+		goto out;
+
+	err = copy_part_from_user(&task->dma_descriptors,
+				  sizeof(task->dma_descriptors),
+				  ioctl_task->dma_descriptors);
+	if (err)
+		goto out;
+
+	err = copy_part_from_user(&task->dma_channels,
+				  sizeof(task->dma_channels),
+				  ioctl_task->dma_channels);
+	if (err)
+		goto out;
+
+	if (task->is_system_app)
+		err = copy_part_from_user(&task->dma_misr_config,
+					  sizeof(task->dma_misr_config),
+					  ioctl_task->dma_misr_config);
+	else
+		task->dma_misr_config.enable = 0;
+
+	if (err)
+		goto out;
+
+	err = copy_part_from_user(&task->hwseq_config,
+				  sizeof(task->hwseq_config),
+				  ioctl_task->hwseq_config);
+	if (err)
+		goto out;
+
+	err = copy_part_from_user(&task->symbols, sizeof(task->symbols),
+				  ioctl_task->symbols);
+	if (err)
+		goto out;
+
+	err = copy_part_from_user(&task->symbol_payload,
+				  sizeof(task->symbol_payload),
+				  ioctl_task->symbol_payload);
+	if (err)
+		goto out;
+
+	/* Parse each postfence provided by user in 1D array and store into
+	 * internal 2D array representation wrt type of fence and number of
+	 * fences of each type for further processing
+	 */
+	for (i = 0; i < task->num_user_fence_actions; i++) {
+		struct nvpva_fence_action *fence = &task->user_fence_actions[i];
+		enum nvpva_fence_action_type fence_type = fence->type;
+		u8 num_fence;
+
+		if ((fence_type == 0U) ||
+		    (fence_type >= NVPVA_MAX_FENCE_TYPES)) {
+			task_err(task, "invalid fence type at index: %u", i);
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* Ensure that the number of postfences for each type are within
+		 * limit
+		 */
+		num_fence = task->num_pva_fence_actions[fence_type];
+		if (num_fence >= NVPVA_TASK_MAX_FENCEACTIONS) {
+			task_err(task, "too many fences for type: %u",
+				 fence_type);
+			err = -EINVAL;
+			goto out;
+		}
+
+		task->pva_fence_actions[fence_type][num_fence] = *fence;
+		task->num_pva_fence_actions[fence_type] += 1;
+	}
+
+	/* Check for valid HWSeq trigger mode */
+	if ((task->hwseq_config.hwseqTrigMode != NVPVA_HWSEQTM_VPUTRIG) &&
+	    (task->hwseq_config.hwseqTrigMode != NVPVA_HWSEQTM_DMATRIG)) {
+		task_err(task, "invalid hwseq trigger mode: %d",
+			 task->hwseq_config.hwseqTrigMode);
+		err = -EINVAL;
+		goto out;
+	}
+
+#undef COPY_FIELD
+
+out:
+	return err;
+}
+
+/**
+ * @brief	Submit a task to PVA
+ *
+ * This function takes the given list of tasks, converts
+ * them into kernel internal representation and submits
+ * them to the task queue. On success, it populates
+ * the post-fence structures in userspace and returns 0.
+ *
+ * @param priv	PVA Private data
+ * @param arg	ioctl data
+ * @return	0 on Success or negative error code
+ *
+ */
+static int pva_submit(struct pva_private *priv, void *arg)
+{
+	struct nvpva_ioctl_submit_in_arg *ioctl_tasks_header =
+		(struct nvpva_ioctl_submit_in_arg *)arg;
+	struct nvpva_ioctl_task *ioctl_tasks = NULL;
+	struct pva_submit_tasks *tasks_header;
+	int err = 0;
+	unsigned long rest;
+	int i, j;
+	uint32_t num_tasks;
+
+	num_tasks = ioctl_tasks_header->tasks.size / sizeof(*ioctl_tasks);
+	/* Sanity checks for the task heaader */
+	if (num_tasks > NVPVA_SUBMIT_MAX_TASKS) {
+		err = -EINVAL;
+		dev_err(&priv->pva->pdev->dev,
+			"exceeds maximum number of tasks: %u > %u", num_tasks,
+			NVPVA_SUBMIT_MAX_TASKS);
+		goto out;
+	}
+
+	num_tasks = array_index_nospec(num_tasks, NVPVA_SUBMIT_MAX_TASKS + 1);
+	if (ioctl_tasks_header->version > 0) {
+		err = -ENOSYS;
+		goto out;
+	}
+
+
+	/* Allocate memory for the UMD representation of the tasks */
+	ioctl_tasks = kzalloc(ioctl_tasks_header->tasks.size, GFP_KERNEL);
+	if (ioctl_tasks == NULL) {
+		pr_err("pva: submit: allocation for tasks failed");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	tasks_header = kzalloc(sizeof(struct pva_submit_tasks), GFP_KERNEL);
+	if (tasks_header == NULL) {
+		pr_err("pva: submit: allocation for tasks_header failed");
+		kfree(ioctl_tasks);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* Copy the tasks from userspace */
+	rest = copy_from_user(ioctl_tasks,
+			      (void __user *)ioctl_tasks_header->tasks.addr,
+			      ioctl_tasks_header->tasks.size);
+
+	if (rest > 0) {
+		err = -EFAULT;
+		pr_err("pva: failed to copy tasks");
+		goto free_ioctl_tasks;
+	}
+
+	tasks_header->num_tasks = 0;
+
+	/* Go through the tasks and make a KMD representation of them */
+	for (i = 0; i < num_tasks; i++) {
+		struct pva_submit_task *task;
+		struct nvpva_queue_task_mem_info task_mem_info;
+		long timeout_jiffies = usecs_to_jiffies(
+			ioctl_tasks_header->submission_timeout_us);
+
+		/* Allocate memory for the task and dma */
+		err = down_timeout(&priv->queue->task_pool_sem,
+				   timeout_jiffies);
+		if (err) {
+			pr_err("pva: timeout when allocating task buffer");
+			/* UMD expects this error code */
+			err = -EAGAIN;
+			goto free_tasks;
+		}
+		err = nvpva_queue_alloc_task_memory(priv->queue,
+						     &task_mem_info);
+		task = task_mem_info.kmem_addr;
+
+		WARN_ON((err < 0) || !task);
+
+		/* initialize memory to 0 */
+		(void)memset(task_mem_info.kmem_addr, 0,
+			     priv->queue->task_kmem_size);
+		(void)memset(task_mem_info.va, 0, priv->queue->task_dma_size);
+
+		/* Obtain an initial reference */
+		kref_init(&task->ref);
+		INIT_LIST_HEAD(&task->node);
+
+		tasks_header->tasks[i] = task;
+		tasks_header->num_tasks += 1;
+
+		task->dma_addr = task_mem_info.dma_addr;
+		task->aux_dma_addr = task_mem_info.aux_dma_addr;
+		task->va = task_mem_info.va;
+		task->aux_va = task_mem_info.aux_va;
+		task->pool_index = task_mem_info.pool_index;
+
+		task->pva = priv->pva;
+		task->queue = priv->queue;
+		task->client = priv->client;
+
+		/* setup ownership */
+		err = nvhost_module_busy(task->pva->pdev);
+		if (err)
+			goto free_tasks;
+
+		nvpva_client_context_get(task->client);
+
+		err = pva_copy_task(ioctl_tasks + i, task);
+		if (err)
+			goto free_tasks;
+
+		if (priv->pva->vpu_printf_enabled)
+			task->stdout = priv->vpu_print_buffer;
+	}
+
+	/* Populate header structure */
+	tasks_header->execution_timeout_us =
+		ioctl_tasks_header->execution_timeout_us;
+
+	/* TODO: submission timeout */
+	/* ..and submit them */
+	err = nvpva_queue_submit(priv->queue, tasks_header);
+
+	if (err < 0)
+		goto free_tasks;
+
+	/* Copy fences back to userspace */
+	for (i = 0; i < tasks_header->num_tasks; i++) {
+		struct pva_submit_task *task = tasks_header->tasks[i];
+		u32 n_copied[NVPVA_MAX_FENCE_TYPES] = {};
+		struct nvpva_fence_action __user *action_fences =
+			(struct nvpva_fence_action __user *)ioctl_tasks[i]
+				.user_fence_actions.addr;
+
+		/* Copy return postfences in the same order as that provided in
+		 * input
+		 */
+		for (j = 0; j < task->num_user_fence_actions; j++) {
+			struct nvpva_fence_action *fence =
+				&task->user_fence_actions[j];
+			enum nvpva_fence_action_type fence_type = fence->type;
+
+			*fence = task->pva_fence_actions[fence_type]
+							[n_copied[fence_type]];
+			n_copied[fence_type] += 1;
+		}
+
+		rest = copy_to_user(action_fences, task->user_fence_actions,
+				    ioctl_tasks[i].user_fence_actions.size);
+
+		if (rest) {
+			nvpva_warn(&priv->pva->pdev->dev,
+				    "Failed to copy pva fences to userspace");
+			err = -EFAULT;
+			goto free_tasks;
+		}
+	}
+
+free_tasks:
+
+	for (i = 0; i < tasks_header->num_tasks; i++) {
+		struct pva_submit_task *task = tasks_header->tasks[i];
+		/* Drop the reference */
+		kref_put(&task->ref, pva_task_free);
+	}
+
+free_ioctl_tasks:
+
+	kfree(ioctl_tasks);
+	kfree(tasks_header);
+
+out:
+	return err;
+}
+
+static int pva_pin(struct pva_private *priv, void *arg)
+{
+	int err = 0;
+	struct dma_buf *dmabuf[1];
+	struct nvpva_pin_in_arg *in_arg = (struct nvpva_pin_in_arg *)arg;
+	struct nvpva_pin_out_arg *out_arg = (struct nvpva_pin_out_arg *)arg;
+
+	dmabuf[0] = dma_buf_get(in_arg->pin.handle);
+	if (IS_ERR_OR_NULL(dmabuf[0])) {
+		dev_err(&priv->pva->pdev->dev, "invalid handle to pin: %u",
+			in_arg->pin.handle);
+		err = -EFAULT;
+		goto out;
+	}
+
+	err = nvpva_buffer_pin(priv->client->buffers,
+			       &dmabuf[0],
+			       &in_arg->pin.offset,
+			       &in_arg->pin.size,
+			       in_arg->pin.segment,
+			       1,
+			       &out_arg->pin_id,
+			       &out_arg->error_code);
+	dma_buf_put(dmabuf[0]);
+out:
+	return err;
+}
+
+static int pva_unpin(struct pva_private *priv, void *arg)
+{
+	int err = 0;
+	struct nvpva_unpin_in_arg *in_arg = (struct nvpva_unpin_in_arg *)arg;
+
+	nvpva_buffer_unpin_id(priv->client->buffers, &in_arg->pin_id, 1);
+
+	return err;
+}
+
+static int
+pva_authenticate_vpu_app(struct pva *pva,
+			 struct pva_vpu_auth_s *auth,
+			 uint8_t *data,
+			 u32 size,
+			 bool is_sys)
+{
+	int err = 0;
+
+	if (!auth->pva_auth_enable)
+		goto out;
+
+	mutex_lock(&auth->allow_list_lock);
+	if (!auth->pva_auth_allow_list_parsed) {
+		if (is_sys)
+			err = pva_auth_allow_list_parse_buf(pva->pdev,
+				auth, pva_auth_allow_list_sys,
+				pva_auth_allow_list_sys_len);
+		else
+			err = pva_auth_allow_list_parse(pva->pdev, auth);
+
+		if (err) {
+			nvpva_warn(&pva->pdev->dev,
+					"allow list parse failed");
+			mutex_unlock(&auth->allow_list_lock);
+			goto out;
+		}
+	}
+
+	mutex_unlock(&auth->allow_list_lock);
+	err = pva_vpu_check_sha256_key(pva,
+				       auth->vpu_hash_keys,
+				       data,
+				       size);
+	if (err != 0)
+		nvpva_dbg_fn(pva, "app authentication failed");
+out:
+	return err;
+}
+
+static int pva_register_vpu_exec(struct pva_private *priv, void *arg)
+{
+	struct	nvpva_vpu_exe_register_in_arg *reg_in =
+			(struct nvpva_vpu_exe_register_in_arg *)arg;
+	struct	nvpva_vpu_exe_register_out_arg *reg_out =
+			(struct nvpva_vpu_exe_register_out_arg *)arg;
+	struct pva_elf_image	*image;
+	void			*exec_data = NULL;
+	uint16_t		exe_id;
+	bool			is_system = false;
+	uint64_t		data_size;
+	int			err = 0;
+
+	data_size = reg_in->exe_data.size;
+	exec_data = kmalloc(data_size, GFP_KERNEL);
+	if (exec_data == NULL) {
+		nvpva_err(&priv->pva->pdev->dev,
+				"failed to allocate memory for elf");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = copy_part_from_user(exec_data, data_size,
+				  reg_in->exe_data);
+	if (err) {
+		nvpva_err(&priv->pva->pdev->dev,
+			"failed to copy vpu exe data");
+		goto free_mem;
+	}
+
+	err = pva_authenticate_vpu_app(priv->pva,
+				       &priv->pva->pva_auth,
+				       (uint8_t *)exec_data,
+				       data_size,
+				       false);
+	if (err != 0) {
+		err = pva_authenticate_vpu_app(priv->pva,
+					       &priv->pva->pva_auth_sys,
+					       (uint8_t *)exec_data,
+					       data_size,
+					       true);
+		if (err != 0)
+			goto free_mem;
+
+		is_system = true;
+	}
+
+	err = pva_load_vpu_app(&priv->client->elf_ctx, exec_data,
+				data_size, &exe_id,
+				is_system,
+				priv->pva->version);
+
+	if (err) {
+		nvpva_err(&priv->pva->pdev->dev,
+			  "failed to register vpu app");
+		goto free_mem;
+	}
+
+	reg_out->exe_id = exe_id;
+	image = get_elf_image(&priv->client->elf_ctx, exe_id);
+	reg_out->num_of_symbols = image->num_symbols -
+				  image->num_sys_symbols;
+	reg_out->symbol_size_total = image->symbol_size_total;
+
+free_mem:
+
+	if (exec_data != NULL)
+		kfree(exec_data);
+out:
+	return err;
+}
+
+static int pva_unregister_vpu_exec(struct pva_private *priv, void *arg)
+{
+	struct nvpva_vpu_exe_unregister_in_arg *unreg_in =
+		(struct nvpva_vpu_exe_unregister_in_arg *)arg;
+	return pva_release_vpu_app(&priv->client->elf_ctx,
+			unreg_in->exe_id, false);
+}
+
+static int pva_get_symbol_id(struct pva_private *priv, void *arg)
+{
+	struct nvpva_get_symbol_in_arg *symbol_in =
+		(struct nvpva_get_symbol_in_arg *)arg;
+	struct nvpva_get_symbol_out_arg *symbol_out =
+		(struct nvpva_get_symbol_out_arg *)arg;
+	char *symbol_buffer;
+	int err = 0;
+	uint64_t name_size = symbol_in->name.size;
+	struct pva_elf_symbol symbol = {0};
+
+	if (name_size > ELF_MAX_SYMBOL_LENGTH) {
+		nvpva_warn(&priv->pva->pdev->dev, "symbol size too large:%llu",
+			   symbol_in->name.size);
+		name_size = ELF_MAX_SYMBOL_LENGTH;
+	}
+
+	symbol_buffer = kmalloc(name_size, GFP_KERNEL);
+	if (symbol_buffer == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = copy_from_user(symbol_buffer,
+			     (void __user *)symbol_in->name.addr,
+			     name_size);
+	if (err) {
+		nvpva_err(&priv->pva->pdev->dev,
+			   "failed to copy all name from user");
+		goto free_mem;
+	}
+
+	if (symbol_buffer[name_size - 1] != '\0') {
+		nvpva_warn(&priv->pva->pdev->dev,
+			   "symbol name not terminated with NULL");
+		symbol_buffer[name_size - 1] = '\0';
+	}
+
+	err = pva_get_sym_info(&priv->client->elf_ctx, symbol_in->exe_id,
+				symbol_buffer, &symbol);
+	if (err) {
+		goto free_mem;
+	}
+
+	symbol_out->symbol.id = symbol.symbolID;
+	symbol_out->symbol.size = symbol.size;
+	symbol_out->symbol.isPointer =
+		(symbol.type == (uint32_t)VMEM_TYPE_POINTER) ? 1U : 0U;
+free_mem:
+	kfree(symbol_buffer);
+out:
+	return err;
+}
+
+static int pva_get_symtab(struct pva_private *priv, void *arg)
+{
+	struct nvpva_get_sym_tab_in_arg *sym_tab_in =
+		(struct nvpva_get_sym_tab_in_arg *)arg;
+
+	int err = 0;
+	struct nvpva_sym_info *sym_tab_buffer;
+	u64 tab_size;
+
+	err = pva_get_sym_tab_size(&priv->client->elf_ctx,
+				   sym_tab_in->exe_id,
+				   &tab_size);
+	if (err)
+		goto out;
+
+	if (sym_tab_in->tab.size < tab_size) {
+		nvpva_err(&priv->pva->pdev->dev,
+			   "symbol table size smaller than needed:%llu",
+			   sym_tab_in->tab.size);
+		err = -EINVAL;
+		goto out;
+	}
+
+	sym_tab_buffer = kmalloc(tab_size, GFP_KERNEL);
+	if (sym_tab_buffer == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = pva_get_sym_tab(&priv->client->elf_ctx,
+			  sym_tab_in->exe_id,
+			  sym_tab_buffer);
+	if (err)
+		goto free_mem;
+
+	err = copy_to_user((void __user *)sym_tab_in->tab.addr,
+		     sym_tab_buffer,
+		     tab_size);
+
+free_mem:
+	kfree(sym_tab_buffer);
+out:
+	return err;
+}
+
+/* Maximum VPU print buffer size is 16M */
+#define MAX_VPU_PRINT_BUFFER_SIZE (16 * (1 << 20))
+static int pva_set_vpu_print_buffer_size(struct pva_private *priv, void *arg)
+{
+	union nvpva_set_vpu_print_buffer_size_args *in_arg =
+		(union nvpva_set_vpu_print_buffer_size_args *)arg;
+	uint32_t buffer_size = in_arg->in.size;
+	struct device *dev = &priv->pva->aux_pdev->dev;
+	int err = 0;
+
+	if (buffer_size > MAX_VPU_PRINT_BUFFER_SIZE) {
+		dev_err(&priv->pva->pdev->dev,
+			"requested VPU print buffer too large: %u > %u\n",
+			buffer_size, MAX_VPU_PRINT_BUFFER_SIZE);
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&priv->queue->list_lock);
+	if (!list_empty(&priv->queue->tasklist)) {
+		dev_err(&priv->pva->pdev->dev,
+			"can't set VPU print buffer size when there's unfinished tasks\n");
+		err = -EAGAIN;
+		goto unlock;
+	}
+
+	if (priv->vpu_print_buffer != NULL) {
+		pva_free_cb(dev, priv->vpu_print_buffer);
+		priv->vpu_print_buffer = NULL;
+	}
+
+	if (buffer_size == 0)
+		goto unlock;
+
+	priv->vpu_print_buffer = pva_alloc_cb(dev, buffer_size);
+
+	if (IS_ERR(priv->vpu_print_buffer)) {
+		err = PTR_ERR(priv->vpu_print_buffer);
+		priv->vpu_print_buffer = NULL;
+	}
+
+unlock:
+	mutex_unlock(&priv->queue->list_lock);
+out:
+	return err;
+}
+
+static ssize_t pva_read_cb(struct pva_cb *cb, u8 __user *buffer,
+			   size_t buffer_size)
+{
+	const u32 tail = cb->tail;
+	const u32 head = *cb->head_va;
+	const u32 size = cb->size;
+	ssize_t ret = 0;
+	u32 transfer1_size;
+	u32 transfer2_size;
+
+	/*
+	 * Check if overflow happened, and if so, report it.
+	 */
+	if (*cb->err_va != 0) {
+		pr_warn("pva: VPU print buffer overflowed!\n");
+		ret = -ENOSPC;
+		goto out;
+	}
+
+	transfer1_size = CIRC_CNT_TO_END(head, tail, size);
+	if (transfer1_size <= buffer_size) {
+		buffer_size -= transfer1_size;
+	} else {
+		transfer1_size = buffer_size;
+		buffer_size = 0;
+	}
+
+	transfer2_size =
+		CIRC_CNT(head, tail, size) - CIRC_CNT_TO_END(head, tail, size);
+	if (transfer2_size <= buffer_size) {
+		buffer_size -= transfer2_size;
+	} else {
+		transfer2_size = buffer_size;
+		buffer_size = 0;
+	}
+
+	if (transfer1_size > 0) {
+		unsigned long failed_count;
+
+		failed_count = copy_to_user(buffer, cb->buffer_va + tail,
+					    transfer1_size);
+		if (failed_count > 0) {
+			pr_err("pva: VPU print buffer: write to user buffer 1 failed\n");
+			ret = -EFAULT;
+			goto out;
+		}
+	}
+
+	if (transfer2_size > 0) {
+		unsigned long failed_count;
+
+		failed_count = copy_to_user(&buffer[transfer1_size],
+					    cb->buffer_va, transfer2_size);
+		if (failed_count > 0) {
+			pr_err("pva: VPU print buffer: write to user buffer 2 failed\n");
+			ret = -EFAULT;
+			goto out;
+		}
+	}
+
+	cb->tail =
+		(cb->tail + transfer1_size + transfer2_size) & (cb->size - 1);
+
+	/*
+	 * Update tail so that firmware knows the content is consumed; Memory
+	 * barrier is needed here because the update should only be visible to
+	 * firmware after the content is read.
+	 */
+	mb();
+	*cb->tail_va = cb->tail;
+	ret = transfer1_size + transfer2_size;
+
+out:
+	return ret;
+}
+
+static long pva_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct pva_private *priv = file->private_data;
+	u8 buf[NVPVA_IOCTL_MAX_SIZE] __aligned(sizeof(u64));
+	int err = 0;
+	int err2 = 0;
+
+	nvpva_dbg_fn(priv->pva, "");
+
+	if ((_IOC_TYPE(cmd) != NVPVA_IOCTL_MAGIC) ||
+	    (_IOC_NR(cmd) == 0) ||
+	    (_IOC_NR(cmd) > NVPVA_IOCTL_NUMBER_MAX) ||
+	    (_IOC_SIZE(cmd) > sizeof(buf)))
+		return -ENOIOCTLCMD;
+
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) {
+			dev_err(&priv->pva->pdev->dev,
+				"failed copy ioctl buffer from user; size: %u",
+				_IOC_SIZE(cmd));
+			return -EFAULT;
+		}
+	}
+
+	switch (cmd) {
+	case NVPVA_IOCTL_GET_SYMBOL_ID:
+		err = pva_get_symbol_id(priv, buf);
+		break;
+	case NVPVA_IOCTL_GET_SYM_TAB:
+		err = pva_get_symtab(priv, buf);
+		break;
+	case NVPVA_IOCTL_REGISTER_VPU_EXEC:
+		err = pva_register_vpu_exec(priv, buf);
+		break;
+	case NVPVA_IOCTL_UNREGISTER_VPU_EXEC:
+		err = pva_unregister_vpu_exec(priv, buf);
+		break;
+	case NVPVA_IOCTL_PIN:
+		err = pva_pin(priv, buf);
+		break;
+	case NVPVA_IOCTL_UNPIN:
+		err = pva_unpin(priv, buf);
+		break;
+	case NVPVA_IOCTL_SUBMIT:
+		err = pva_submit(priv, buf);
+		break;
+	case NVPVA_IOCTL_SET_VPU_PRINT_BUFFER_SIZE:
+		err = pva_set_vpu_print_buffer_size(priv, buf);
+		break;
+	default:
+		err2 = -ENOIOCTLCMD;
+		break;
+	}
+
+	if ((err2 == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+		err2 = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
+
+	err = (err == 0) ? err2 : err;
+
+	return err;
+}
+
+static int pva_open(struct inode *inode, struct file *file)
+{
+	struct nvhost_device_data *pdata = container_of(
+		inode->i_cdev, struct nvhost_device_data, ctrl_cdev);
+	struct platform_device *pdev = pdata->pdev;
+	struct pva *pva = pdata->private_data;
+	struct pva_private *priv;
+	int err = 0;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (priv == NULL) {
+		err = -ENOMEM;
+		goto err_alloc_priv;
+	}
+
+	file->private_data = priv;
+	priv->pva = pva;
+	priv->client = nvpva_client_context_alloc(pdev, pva, current->pid);
+	if (priv->client == NULL) {
+		err = -ENOMEM;
+		dev_err(&pdev->dev, "failed to allocate client context");
+		goto err_alloc_context;
+	}
+
+	priv->queue = nvpva_queue_alloc(pva->pool,
+					priv->client->cntxt_dev,
+					MAX_PVA_TASK_COUNT_PER_QUEUE);
+
+	if (IS_ERR(priv->queue)) {
+		err = PTR_ERR(priv->queue);
+		goto err_alloc_queue;
+	}
+
+	sema_init(&priv->queue->task_pool_sem, MAX_PVA_TASK_COUNT_PER_QUEUE);
+	err = nvhost_module_busy(pva->pdev);
+	if (err < 0) {
+		dev_err(&pva->pdev->dev, "error in powering up pva %d",
+			err);
+		goto err_device_busy;
+	}
+
+	return nonseekable_open(inode, file);
+
+err_device_busy:
+	nvpva_queue_put(priv->queue);
+err_alloc_queue:
+	nvpva_client_context_put(priv->client);
+err_alloc_context:
+	nvhost_module_remove_client(pdev, priv);
+	kfree(priv);
+err_alloc_priv:
+	return err;
+}
+
+static void pva_queue_flush(struct pva *pva, struct nvpva_queue *queue)
+{
+	u32 flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE;
+	struct pva_cmd_status_regs status = {};
+	struct pva_cmd_s cmd = {};
+	int err = 0;
+	u32 nregs;
+
+	nregs = pva_cmd_abort_task(&cmd, queue->id, flags);
+	err = nvhost_module_busy(pva->pdev);
+	if (err < 0) {
+		dev_err(&pva->pdev->dev, "error in powering up pva %d",
+			err);
+		goto err_out;
+	}
+
+	err = pva->version_config->submit_cmd_sync(pva, &cmd, nregs, queue->id,
+						   &status);
+	nvhost_module_idle(pva->pdev);
+	if (err < 0) {
+		dev_err(&pva->pdev->dev, "failed to issue FW abort command: %d",
+			err);
+		goto err_out;
+	}
+	/* Ensure that response is valid */
+	if (status.error != PVA_ERR_NO_ERROR) {
+		dev_err(&pva->pdev->dev, "PVA FW Abort rejected: %d",
+			status.error);
+	}
+
+err_out:
+	return;
+}
+
+static int pva_release(struct inode *inode, struct file *file)
+{
+	struct pva_private *priv = file->private_data;
+	bool queue_empty;
+	int i;
+
+	flush_workqueue(priv->pva->task_status_workqueue);
+	mutex_lock(&priv->queue->list_lock);
+	queue_empty = list_empty(&priv->queue->tasklist);
+	mutex_unlock(&priv->queue->list_lock);
+	if (!queue_empty) {
+		/* Cancel remaining tasks */
+		nvpva_dbg_info(priv->pva, "cancel remaining tasks");
+		pva_queue_flush(priv->pva, priv->queue);
+	}
+
+	/* make sure all tasks have been finished */
+	for (i = 0; i < MAX_PVA_TASK_COUNT_PER_QUEUE; i++) {
+		if (down_killable(&priv->queue->task_pool_sem) != 0) {
+			nvpva_err(
+				&priv->pva->pdev->dev,
+				"interrupted while waiting %d tasks\n",
+				MAX_PVA_TASK_COUNT_PER_QUEUE - i);
+			pva_abort(priv->pva);
+			break;
+		}
+	}
+
+	nvhost_module_idle(priv->pva->pdev);
+
+	/* Release reference to client */
+	nvpva_client_context_put(priv->client);
+
+	/*
+	 * Release handle to the queue (on-going tasks have their
+	 * own references to the queue
+	 */
+	nvpva_queue_put(priv->queue);
+
+	/* Free VPU print buffer if allocated */
+	if (priv->vpu_print_buffer != NULL) {
+		pva_free_cb(&priv->pva->pdev->dev, priv->vpu_print_buffer);
+		priv->vpu_print_buffer = NULL;
+	}
+
+	/* Finally, release the private data */
+	kfree(priv);
+
+	return 0;
+}
+
+static ssize_t pva_read_vpu_print_buffer(struct file *file,
+					 char __user *user_buffer,
+					 size_t buffer_size, loff_t *off)
+{
+	struct pva_private *priv = file->private_data;
+	ssize_t ret;
+
+	mutex_lock(&priv->queue->list_lock);
+
+	if (priv->vpu_print_buffer != NULL) {
+		ret = pva_read_cb(priv->vpu_print_buffer, user_buffer,
+				  buffer_size);
+	} else {
+		pr_warn("pva: VPU print buffer size needs to be specified\n");
+		ret = -EIO;
+	}
+
+	mutex_unlock(&priv->queue->list_lock);
+
+	return ret;
+}
+
+const struct file_operations tegra_pva_ctrl_ops = {
+	.owner = THIS_MODULE,
+	.llseek = no_llseek,
+	.unlocked_ioctl = pva_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = pva_ioctl,
+#endif
+	.open = pva_open,
+	.release = pva_release,
+	.read = pva_read_vpu_print_buffer,
+};
diff --git a/drivers/video/tegra/host/pva/pva_iommu_context_dev.c b/drivers/video/tegra/host/pva/pva_iommu_context_dev.c
new file mode 100644
index 00000000..4dcf306f
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_iommu_context_dev.c
@@ -0,0 +1,237 @@
+/*
+ * PVA Application Specific Virtual Memory
+ *
+ * Copyright (c) 2022-2023, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/module.h>
+#include <linux/iommu.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/version.h>
+#include <linux/dma-buf.h>
+
+#include "pva_iommu_context_dev.h"
+#include "pva.h"
+
+static u32 cntxt_dev_count;
+static char *dev_names[] = {
+	"pva0_niso1_ctx0",
+	"pva0_niso1_ctx1",
+	"pva0_niso1_ctx2",
+	"pva0_niso1_ctx3",
+	"pva0_niso1_ctx4",
+	"pva0_niso1_ctx5",
+	"pva0_niso1_ctx6",
+	"pva0_niso1_ctx7",
+};
+
+static const struct of_device_id pva_iommu_context_dev_of_match[] = {
+	{.compatible = "nvidia,pva-tegra186-iommu-context"},
+	{},
+};
+
+struct pva_iommu_ctx {
+	struct platform_device		*pdev;
+	struct list_head		list;
+	struct device_dma_parameters	dma_parms;
+	u32				ref_count;
+	bool				allocated;
+	bool				shared;
+};
+
+static LIST_HEAD(pva_iommu_ctx_list);
+static DEFINE_MUTEX(pva_iommu_ctx_list_mutex);
+
+bool is_cntxt_initialized(void)
+{
+	return (cntxt_dev_count == 8);
+}
+
+int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, int max_cnt)
+{
+	struct pva_iommu_ctx *ctx;
+	int err = 0;
+	int i;
+
+	*count = 0;
+	mutex_lock(&pva_iommu_ctx_list_mutex);
+	for (i = 0; i < max_cnt; i++) {
+		list_for_each_entry(ctx, &pva_iommu_ctx_list, list) {
+			if (strnstr(ctx->pdev->name, dev_names[i], 29) != NULL) {
+				hwids[*count] = nvpva_get_device_hwid(ctx->pdev, 0);
+				if (hwids[*count] < 0) {
+					err = hwids[*count];
+					break;
+				}
+
+				++(*count);
+				if (*count >= max_cnt)
+					break;
+			}
+		}
+	}
+
+	mutex_unlock(&pva_iommu_ctx_list_mutex);
+
+	return err;
+}
+
+struct platform_device
+*nvpva_iommu_context_dev_allocate(char *identifier, size_t len, bool shared)
+{
+	struct pva_iommu_ctx *ctx;
+	struct pva_iommu_ctx *ctx_new = NULL;
+
+	mutex_lock(&pva_iommu_ctx_list_mutex);
+
+	if (identifier == NULL) {
+		list_for_each_entry(ctx, &pva_iommu_ctx_list, list)
+			if (!ctx->allocated && !ctx_new)
+				ctx_new = ctx;
+		if (!ctx_new && shared)
+			list_for_each_entry(ctx, &pva_iommu_ctx_list, list)
+				if ((!ctx->allocated || ctx->shared) && !ctx_new)
+					ctx_new = ctx;
+	} else {
+		list_for_each_entry(ctx, &pva_iommu_ctx_list, list)
+			if (!ctx_new
+			    && (strncmp(ctx->pdev->name, identifier, len) == 0))
+				ctx_new = ctx;
+
+		if (ctx_new && !shared && ctx_new->allocated)
+			ctx_new = NULL;
+
+		if (ctx_new && shared && (ctx_new->allocated && !ctx_new->shared))
+			ctx_new = NULL;
+	}
+
+	if (ctx_new) {
+#ifdef CONFIG_NVMAP
+		/*
+		 * Ensure that all stashed mappings are removed from this context device
+		 * before this context device gets reassigned to some other process
+		 */
+		dma_buf_release_stash(&ctx_new->pdev->dev);
+#endif
+		ctx_new->allocated = true;
+		ctx_new->shared = shared;
+		ctx_new->ref_count += 1;
+		mutex_unlock(&pva_iommu_ctx_list_mutex);
+		return ctx_new->pdev;
+	}
+
+	mutex_unlock(&pva_iommu_ctx_list_mutex);
+
+	return NULL;
+}
+
+void nvpva_iommu_context_dev_release(struct platform_device *pdev)
+{
+	struct pva_iommu_ctx *ctx;
+
+	if (pdev == NULL)
+		return;
+
+	ctx = platform_get_drvdata(pdev);
+	mutex_lock(&pva_iommu_ctx_list_mutex);
+	ctx->ref_count -= 1;
+	if (ctx->ref_count == 0) {
+		ctx->allocated = false;
+		ctx->shared = false;
+	}
+
+	mutex_unlock(&pva_iommu_ctx_list_mutex);
+}
+
+static int pva_iommu_context_dev_probe(struct platform_device *pdev)
+{
+	struct pva_iommu_ctx *ctx;
+
+	if (!iommu_get_domain_for_dev(&pdev->dev)) {
+		dev_err(&pdev->dev,
+			"iommu is not enabled for context device. aborting.");
+		return -ENOSYS;
+	}
+
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx) {
+		dev_err(&pdev->dev,
+			   "%s: could not allocate iommu ctx\n", __func__);
+		return -ENOMEM;
+	}
+
+	if (strnstr(pdev->name, dev_names[7], 29) != NULL)
+		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	else
+		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(39));
+
+	INIT_LIST_HEAD(&ctx->list);
+	ctx->pdev = pdev;
+
+	mutex_lock(&pva_iommu_ctx_list_mutex);
+	list_add_tail(&ctx->list, &pva_iommu_ctx_list);
+	cntxt_dev_count++;
+	mutex_unlock(&pva_iommu_ctx_list_mutex);
+
+	platform_set_drvdata(pdev, ctx);
+
+	pdev->dev.dma_parms = &ctx->dma_parms;
+	dma_set_max_seg_size(&pdev->dev, UINT_MAX);
+
+#ifdef CONFIG_NVMAP
+	/* flag required to handle stashings in context devices */
+	pdev->dev.context_dev = true;
+#endif
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(5, 0, 0)
+	dev_info(&pdev->dev, "initialized (streamid=%d, iommu=%s)",
+		 nvpva_get_device_hwid(pdev, 0),
+		 dev_name(pdev->dev.iommu->iommu_dev->dev));
+#else
+	dev_info(&pdev->dev, "initialized (streamid=%d)",
+		 nvpva_get_device_hwid(pdev, 0));
+#endif
+	return 0;
+}
+
+static int __exit pva_iommu_context_dev_remove(struct platform_device *pdev)
+{
+	struct pva_iommu_ctx *ctx = platform_get_drvdata(pdev);
+
+	mutex_lock(&pva_iommu_ctx_list_mutex);
+	list_del(&ctx->list);
+	mutex_unlock(&pva_iommu_ctx_list_mutex);
+
+	return 0;
+}
+
+struct platform_driver nvpva_iommu_context_dev_driver = {
+	.probe = pva_iommu_context_dev_probe,
+	.remove = __exit_p(pva_iommu_context_dev_remove),
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "pva_iommu_context_dev",
+#ifdef CONFIG_OF
+		.of_match_table = pva_iommu_context_dev_of_match,
+#endif
+	},
+};
+
diff --git a/drivers/video/tegra/host/pva/pva_iommu_context_dev.h b/drivers/video/tegra/host/pva/pva_iommu_context_dev.h
new file mode 100644
index 00000000..fe4d95f2
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_iommu_context_dev.h
@@ -0,0 +1,28 @@
+/*
+ * Host1x Application Specific Virtual Memory
+ *
+ * Copyright (c) 2022, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef IOMMU_CONTEXT_DEV_H
+#define IOMMU_CONTEXT_DEV_H
+
+struct platform_device
+*nvpva_iommu_context_dev_allocate(char *identifier, size_t len, bool shared);
+void nvpva_iommu_context_dev_release(struct platform_device *pdev);
+int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, int max_cnt);
+bool is_cntxt_initialized(void);
+
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_isr.c b/drivers/video/tegra/host/pva/pva_isr.c
new file mode 100644
index 00000000..40a89da0
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_isr.c
@@ -0,0 +1,155 @@
+/*
+ * PVA ISR code
+ *
+ * Copyright (c) 2016-2023, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define PVA_MASK_LOW_16BITS 0xff
+
+#include "pva-interface.h"
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/nvhost.h>
+#include "pva_regs.h"
+#include "pva.h"
+#include "pva_isr_t23x.h"
+
+void pva_push_aisr_status(struct pva *pva, uint32_t aisr_status)
+{
+	struct pva_task_error_s *err_array = pva->priv_circular_array.va;
+	struct pva_task_error_s *src_va = &err_array[pva->circular_array_wr_pos];
+
+	src_va->queue = PVA_GET_QUEUE_ID_FROM_STATUS(aisr_status);
+	src_va->vpu = PVA_GET_VPU_ID_FROM_STATUS(aisr_status);
+	src_va->error = PVA_GET_ERROR_FROM_STATUS(aisr_status);
+	src_va->task_id = PVA_GET_TASK_ID_FROM_STATUS(aisr_status);
+	src_va->valid = 1U;
+
+	if (pva->circular_array_wr_pos == (MAX_PVA_TASK_COUNT-1))
+		pva->circular_array_wr_pos = 0;
+	else
+		pva->circular_array_wr_pos += 1;
+}
+
+static irqreturn_t pva_system_isr(int irq, void *dev_id)
+{
+	struct pva *pva = dev_id;
+	struct platform_device *pdev = pva->pdev;
+	u32 checkpoint = host1x_readl(pdev,
+		cfg_ccq_status_r(pva->version, 0, 8));
+	u32 status7 = pva->version_config->read_mailbox(pdev, PVA_MBOX_ISR);
+	u32 status5 = pva->version_config->read_mailbox(pdev, PVA_MBOX_AISR);
+	u32 lic_int_status = host1x_readl(pdev,
+		sec_lic_intr_status_r(pva->version));
+	u32 h1xflgs;
+	bool recover = false;
+
+	if (status5 & PVA_AISR_INT_PENDING) {
+		nvpva_dbg_info(pva, "PVA AISR (%x)", status5);
+
+		if (status5 & (PVA_AISR_TASK_COMPLETE | PVA_AISR_TASK_ERROR)) {
+			atomic_add(1, &pva->n_pending_tasks);
+			queue_work(pva->task_status_workqueue,
+				   &pva->task_update_work);
+			if ((status5 & PVA_AISR_ABORT) == 0U)
+				pva_push_aisr_status(pva, status5);
+		}
+
+		/* For now, just log the errors */
+		if (status5 & PVA_AISR_TASK_ERROR)
+			nvpva_warn(&pdev->dev, "PVA AISR: PVA_AISR_TASK_ERROR");
+		if (status5 & PVA_AISR_ABORT) {
+			nvpva_warn(&pdev->dev, "PVA AISR: PVA_AISR_ABORT");
+			nvpva_warn(&pdev->dev, "Checkpoint value: 0x%08x",
+				    checkpoint);
+			recover = true;
+		}
+
+		pva->version_config->write_mailbox(pdev, PVA_MBOX_AISR, 0x0);
+	}
+
+	if (status7 & PVA_INT_PENDING) {
+		nvpva_dbg_info(pva, "PVA ISR (%x)", status7);
+
+		pva_mailbox_isr(pva);
+	}
+
+
+	/* Check for watchdog timer interrupt */
+	if (lic_int_status & sec_lic_intr_enable_wdt_f(SEC_LIC_INTR_WDT)) {
+		nvpva_warn(&pdev->dev, "WatchDog Timer");
+		recover = true;
+	}
+
+	/* Check for host1x errors*/
+	if (pva->version == PVA_HW_GEN1) {
+		h1xflgs = sec_lic_intr_enable_h1x_f(SEC_LIC_INTR_H1X_ALL_19);
+	} else {
+		h1xflgs = sec_lic_intr_enable_h1x_f(SEC_LIC_INTR_H1X_ALL_23);
+	}
+	if (lic_int_status & h1xflgs) {
+		nvpva_warn(&pdev->dev, "Pva Host1x errors (0x%x)",
+			     lic_int_status);
+
+		/* Clear the interrupt */
+		host1x_writel(pva->pdev, sec_lic_intr_status_r(pva->version),
+			      (lic_int_status & h1xflgs));
+		recover = true;
+	}
+
+	/* Copy trace points to ftrace buffer */
+	pva_trace_copy_to_ftrace(pva);
+
+	if (recover)
+		pva_abort(pva);
+
+	return IRQ_HANDLED;
+}
+
+int pva_register_isr(struct platform_device *dev)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(dev);
+	struct pva *pva = pdata->private_data;
+	int err;
+	int i;
+	irq_handler_t irq_handler;
+
+	for (i = 0; i < pva->version_config->irq_count; i++) {
+		pva->irq[i] = platform_get_irq(dev, i);
+		if (pva->irq[i] <= 0) {
+			dev_err(&dev->dev, "no irq %d\n", i);
+			err = -ENOENT;
+			break;
+		}
+
+		/* IRQ0 is for mailbox/h1x/watchdog */
+		if (i == 0)
+			irq_handler = pva_system_isr;
+		else
+			irq_handler = pva_ccq_isr;
+
+		err = request_threaded_irq(pva->irq[i], NULL, irq_handler,
+					IRQF_ONESHOT, "pva-isr", pva);
+		if (err) {
+			pr_err("%s: request_irq(%d) failed(%d)\n", __func__,
+				pva->irq[i], err);
+			break;
+		}
+		disable_irq(pva->irq[i]);
+	}
+	return err;
+}
diff --git a/drivers/video/tegra/host/pva/pva_isr_t23x.c b/drivers/video/tegra/host/pva/pva_isr_t23x.c
new file mode 100644
index 00000000..688c46db
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_isr_t23x.c
@@ -0,0 +1,109 @@
+/*
+ * PVA ISR code for T23X
+ *
+ * Copyright (c) 2019-2022, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pva_isr_t23x.h"
+
+#include <linux/irq.h>
+#include <linux/nvhost.h>
+
+#include "pva_regs.h"
+#include "pva.h"
+#include "pva_ccq_t23x.h"
+
+#define PVA_MASK_LOW_16BITS 0xff
+
+irqreturn_t pva_ccq_isr(int irq, void *dev_id)
+{
+	uint32_t int_status = 0, isr_status = 0, aisr_status = 0;
+	unsigned int queue_id = MAX_PVA_QUEUE_COUNT + 1;
+	int i;
+	struct pva *pva = dev_id;
+	struct platform_device *pdev = pva->pdev;
+	bool recover = false;
+
+	for (i = 1; i < MAX_PVA_IRQS; i++) {
+		if (pva->irq[i] == irq) {
+			queue_id = i - 1;
+			break;
+		}
+	}
+	if (queue_id == MAX_PVA_QUEUE_COUNT + 1) {
+		printk("Invalid IRQ received. Returning from ISR");
+		return IRQ_HANDLED;
+	}
+	nvpva_dbg_info(pva, "Received ISR from CCQ block, IRQ: %d", irq);
+	int_status = host1x_readl(pdev, cfg_ccq_status_r(pva->version,
+				queue_id, PVA_CCQ_STATUS2_INDEX))
+				& ~PVA_MASK_LOW_16BITS;
+
+	if (int_status != 0x0) {
+		nvpva_dbg_info(pva, "Clear CCQ interrupt for %d, \
+			       current status: 0x%x",
+			       queue_id, int_status);
+		host1x_writel(pdev,
+			      cfg_ccq_status_r(pva->version, queue_id,
+					       PVA_CCQ_STATUS2_INDEX),
+			      int_status);
+	}
+
+	if (int_status & PVA_VALID_CCQ_ISR) {
+		isr_status = host1x_readl(pdev, cfg_ccq_status_r(pva->version,
+					queue_id, PVA_CCQ_STATUS7_INDEX));
+	}
+	if (int_status & PVA_VALID_CCQ_AISR) {
+		aisr_status = host1x_readl(pdev, cfg_ccq_status_r(pva->version,
+					queue_id, PVA_CCQ_STATUS8_INDEX));
+	}
+	if (aisr_status & PVA_AISR_INT_PENDING) {
+		nvpva_dbg_info(pva, "PVA CCQ AISR (%x)", aisr_status);
+		if (aisr_status &
+		    (PVA_AISR_TASK_COMPLETE | PVA_AISR_TASK_ERROR)) {
+			atomic_add(1, &pva->n_pending_tasks);
+			queue_work(pva->task_status_workqueue,
+				   &pva->task_update_work);
+			if ((aisr_status & PVA_AISR_ABORT) == 0U)
+				pva_push_aisr_status(pva, aisr_status);
+		}
+
+		/* For now, just log the errors */
+
+		if (aisr_status & PVA_AISR_TASK_ERROR)
+			nvpva_warn(&pdev->dev,
+				    "PVA AISR: \
+				    PVA_AISR_TASK_ERROR for queue id = %d",
+				    queue_id);
+		if (aisr_status & PVA_AISR_ABORT) {
+			nvpva_warn(&pdev->dev, "PVA AISR: \
+				PVA_AISR_ABORT for queue id = %d",
+				queue_id);
+			nvpva_warn(&pdev->dev, "Checkpoint value: 0x%08x",
+				    aisr_status);
+			recover = true;
+		}
+		/* Acknowledge AISR by writing status 1 */
+		host1x_writel(pdev, cfg_ccq_status_r(pva->version, queue_id,
+			      PVA_CCQ_STATUS1_INDEX), 0x01U);
+	}
+	if (isr_status & PVA_INT_PENDING) {
+		pva_ccq_isr_handler(pva, queue_id);
+	}
+	if (recover)
+		pva_abort(pva);
+
+	return IRQ_HANDLED;
+}
diff --git a/drivers/video/tegra/host/pva/pva_isr_t23x.h b/drivers/video/tegra/host/pva/pva_isr_t23x.h
new file mode 100644
index 00000000..6c9a491a
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_isr_t23x.h
@@ -0,0 +1,25 @@
+/*
+ * PVA ISR interface for T23X
+ *
+ * Copyright (c) 2019, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __NVHOST_PVA_ISR_T23X_H__
+#define __NVHOST_PVA_ISR_T23X_H__
+
+#include <linux/irqreturn.h>
+
+irqreturn_t pva_ccq_isr(int irq, void *dev_id);
+
+#endif
\ No newline at end of file
diff --git a/drivers/video/tegra/host/pva/pva_mailbox.c b/drivers/video/tegra/host/pva/pva_mailbox.c
new file mode 100644
index 00000000..41b89c65
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_mailbox.c
@@ -0,0 +1,207 @@
+/*
+ * PVA mailbox code
+ *
+ * Copyright (c) 2016-2022, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
+#include <soc/tegra/chip-id.h>
+#else
+#include <soc/tegra/fuse.h>
+#endif
+#include <linux/platform_device.h>
+
+#include "pva.h"
+#include "pva_mailbox.h"
+#include "pva-interface.h"
+
+static u32 pva_get_mb_reg_id(u32 i)
+{
+	u32 mb_reg_id[VALID_MB_INPUT_REGS] = {
+		0,
+		1,
+		2,
+		3
+	};
+
+	return mb_reg_id[i];
+}
+
+static int pva_mailbox_send_cmd(struct pva *pva, struct pva_cmd_s *cmd,
+				u32 nregs)
+{
+	struct platform_device *pdev = pva->pdev;
+	u32 reg, status;
+	int i;
+
+	if (nregs > VALID_MB_INPUT_REGS) {
+		pr_err("%s nregs %d more than expected\n", __func__, nregs);
+		return -EINVAL;
+	}
+
+	/* Make sure the state is what we expect it to be. */
+	status = pva->version_config->read_mailbox(pdev, PVA_MBOX_ISR);
+
+	WARN_ON((status & PVA_INT_PENDING));
+	WARN_ON((status & PVA_READY) == 0);
+	WARN_ON((status & PVA_BUSY));
+
+	/*set MSB of mailbox 0 to trigger FW interrupt*/
+	cmd->cmd_field[0] |= PVA_BIT(31);
+	/* Write all of the other command mailbox
+	 * registers before writing mailbox 0.
+	 */
+	for (i = (nregs - 1); i >= 0; i--) {
+		reg = pva_get_mb_reg_id(i);
+		pva->version_config->write_mailbox(pdev, reg, cmd->cmd_field[i]);
+	}
+
+	return 0;
+}
+
+int pva_mailbox_wait_event(struct pva *pva, int wait_time)
+{
+	int timeout = 1;
+	int err;
+	/* Wait for the event being triggered in ISR */
+	if (pva->timeout_enabled == true)
+		timeout = wait_event_timeout(
+			pva->cmd_waitqueue[PVA_MAILBOX_INDEX],
+			pva->cmd_status[PVA_MAILBOX_INDEX] ==
+						PVA_CMD_STATUS_DONE ||
+			pva->cmd_status[PVA_MAILBOX_INDEX] ==
+						PVA_CMD_STATUS_ABORTED,
+			msecs_to_jiffies(wait_time));
+	else
+		wait_event(pva->cmd_waitqueue[PVA_MAILBOX_INDEX],
+			pva->cmd_status[PVA_MAILBOX_INDEX] ==
+						PVA_CMD_STATUS_DONE ||
+			pva->cmd_status[PVA_MAILBOX_INDEX] ==
+						PVA_CMD_STATUS_ABORTED);
+
+	if (timeout <= 0) {
+		err = -ETIMEDOUT;
+		pva_abort(pva);
+	} else if  (pva->cmd_status[PVA_MAILBOX_INDEX] ==
+						PVA_CMD_STATUS_ABORTED)
+		err = -EIO;
+	else
+		err = 0;
+
+	return err;
+}
+
+void pva_mailbox_isr(struct pva *pva)
+{
+	struct platform_device *pdev = pva->pdev;
+	u32 int_status = pva->version_config->read_mailbox(pdev, PVA_MBOX_ISR);
+	if (pva->cmd_status[PVA_MAILBOX_INDEX] != PVA_CMD_STATUS_WFI) {
+		nvpva_warn(&pdev->dev, "Unexpected PVA ISR (%x)", int_status);
+		return;
+	}
+
+	/* Save the current command and subcommand for later processing */
+	pva->cmd_status_regs[PVA_MAILBOX_INDEX].cmd =
+			pva->version_config->read_mailbox(pdev,
+							PVA_MBOX_COMMAND);
+	pva->version_config->read_status_interface(pva,
+				PVA_MAILBOX_INDEX, int_status,
+				&pva->cmd_status_regs[PVA_MAILBOX_INDEX]);
+	/* Clear the mailbox interrupt status */
+	int_status = int_status & PVA_READY;
+	pva->version_config->write_mailbox(pdev, PVA_MBOX_ISR, int_status);
+
+	/* Wake up the waiters */
+	pva->cmd_status[PVA_MAILBOX_INDEX] = PVA_CMD_STATUS_DONE;
+	wake_up(&pva->cmd_waitqueue[PVA_MAILBOX_INDEX]);
+}
+
+int pva_mailbox_send_cmd_sync_locked(struct pva *pva,
+			struct pva_cmd_s *cmd, u32 nregs,
+			struct pva_cmd_status_regs *status_regs)
+{
+	int err = 0;
+
+	if (status_regs == NULL) {
+		err = -EINVAL;
+		goto err_invalid_parameter;
+	}
+
+	/* Ensure that mailbox state is sane */
+	if (WARN_ON(pva->cmd_status[PVA_MAILBOX_INDEX] !=
+			PVA_CMD_STATUS_INVALID)) {
+		err = -EIO;
+		goto err_check_status;
+	}
+
+	/* Mark that we are waiting for an interrupt */
+	pva->cmd_status[PVA_MAILBOX_INDEX] = PVA_CMD_STATUS_WFI;
+	memset(&pva->cmd_status_regs, 0, sizeof(pva->cmd_status_regs));
+
+	/* Submit command to PVA */
+	err = pva_mailbox_send_cmd(pva, cmd, nregs);
+	if (err < 0)
+		goto err_send_command;
+
+	err = pva_mailbox_wait_event(pva, 100);
+	if (err < 0)
+		goto err_wait_response;
+
+	/* Return interrupt status back to caller */
+	memcpy(status_regs, &pva->cmd_status_regs,
+				sizeof(struct pva_cmd_status_regs));
+
+	pva->cmd_status[PVA_MAILBOX_INDEX] = PVA_CMD_STATUS_INVALID;
+	return err;
+
+err_wait_response:
+err_send_command:
+	pva->cmd_status[PVA_MAILBOX_INDEX] = PVA_CMD_STATUS_INVALID;
+err_check_status:
+err_invalid_parameter:
+	return err;
+}
+
+int pva_mailbox_send_cmd_sync(struct pva *pva,
+			struct pva_cmd_s *cmd, u32 nregs,
+			struct pva_cmd_status_regs *status_regs)
+{
+	int err = 0;
+
+	if (status_regs == NULL) {
+		err = -EINVAL;
+		goto err_invalid_parameter;
+	}
+
+	mutex_lock(&pva->mailbox_mutex);
+	err = pva_mailbox_send_cmd_sync_locked(pva,
+					       cmd,
+					       nregs,
+					       status_regs);
+	mutex_unlock(&pva->mailbox_mutex);
+
+	return err;
+
+err_invalid_parameter:
+	return err;
+}
+
+
+EXPORT_SYMBOL(pva_mailbox_send_cmd_sync);
diff --git a/drivers/video/tegra/host/pva/pva_mailbox.h b/drivers/video/tegra/host/pva/pva_mailbox.h
new file mode 100644
index 00000000..577e0b1f
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_mailbox.h
@@ -0,0 +1,134 @@
+/*
+ * PVA mailbox header
+ *
+ * Copyright (c) 2016-2021, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __PVA_MAILBOX_H__
+#define __PVA_MAILBOX_H__
+
+#include <linux/platform_device.h>
+
+#include "pva-interface.h"
+#include "pva_status_regs.h"
+
+/* Total CCQ status registers */
+#define PVA_CCQ_STATUS_REGS    9
+
+/* Symbolic definitions of the CCQ status registers */
+#define PVA_CCQ_STATUS0_INDEX	0
+#define PVA_CCQ_STATUS1_INDEX	1
+#define PVA_CCQ_STATUS2_INDEX	2
+#define PVA_CCQ_STATUS3_INDEX	3
+#define PVA_CCQ_STATUS4_INDEX	4
+#define PVA_CCQ_STATUS5_INDEX	5
+#define PVA_CCQ_STATUS6_INDEX	6
+#define PVA_CCQ_STATUS7_INDEX	7
+#define PVA_CCQ_STATUS8_INDEX	8
+
+
+/* Number of valid MBOX registers used for sending commands */
+#define VALID_MB_INPUT_REGS 4
+/* Number of valid MBOX registers */
+#define VALID_MB_INPUT_REGS_EX 8
+struct pva;
+
+/**
+ * enum pva_mailbox_status - PVA mailbox status indication
+ *
+ * These enumerations reflect the state of PVA interrupt handler
+ */
+enum pva_mailbox_status {
+	PVA_MBOX_STATUS_INVALID	= 0,
+	PVA_MBOX_STATUS_WFI	= 1,
+	PVA_MBOX_STATUS_DONE	= 2,
+	PVA_MBOX_STATUS_ABORTED	= 3,
+};
+
+/**
+ * struct pva_mailbox_status_regs - Handle the MBOX status based on ISR
+ *
+ * @cmd:		Holds the current MBOX command
+ * @error:		Holds the any error shown through ISR
+ * @status:		Holds the status of all CCQ registers
+ *
+ */
+struct pva_mailbox_status_regs {
+	uint32_t status[PVA_CCQ_STATUS_REGS];
+	uint32_t error;
+	uint32_t cmd;
+};
+
+/**
+ *
+ * pva_mailbox_send_cmd_sync() - Send a command and wait for response
+ *
+ * @pva:		Pointer to PVA structure
+ * @pva_cmd:		Pointer to the pva command struct
+ * @nregs:		Number of valid mailbox registers for the command
+ * @status_regs:	Pointer to pva_cmd_status_regs struct
+ *
+ * This function called by OS to pass the mailbox commands to
+ * the PVA uCode. The function returns the output status from PVA
+ * firmware once the task is completed.
+ *
+ * The caller is responsible to ensure that PVA has been powered
+ * up through nvhost_module_busy() API prior calling this function.
+ */
+
+int pva_mailbox_send_cmd_sync(struct pva *pva,
+				struct pva_cmd_s *cmd, u32 nregs,
+				struct pva_cmd_status_regs *status_regs);
+/**
+ *
+ * pva_mailbox_send_cmd_sync_locked() - Send a command and wait for response
+ *
+ * @pva:		Pointer to PVA structure
+ * @pva_cmd:		Pointer to the pva command struct
+ * @nregs:		Number of valid mailbox registers for the command
+ * @status_regs:	Pointer to pva_cmd_status_regs struct
+ *
+ * This function called by OS to pass the mailbox commands to
+ * the PVA uCode. The function returns the output status from PVA
+ * firmware once the task is completed. This function must not be
+ * used during runtime without holding the mailbox mutex (i.e.
+ * the function can be called during PVA boot-up).
+ */
+int pva_mailbox_send_cmd_sync_locked(struct pva *pva,
+				struct pva_cmd_s *cmd, u32 nregs,
+				struct pva_cmd_status_regs *status_regs);
+
+/**
+ * pva_mailbox_isr() - Handle interrupt for PVA ISR
+ *
+ * @pva:	Pointer to PVA structure
+ *
+ * This function is used to read the CCQ status registers based on
+ * the status set in mailbox7 by the PVA uCode.
+ */
+void pva_mailbox_isr(struct pva *pva);
+
+/**
+ * pva_mailbox_wait_event() - mailbox wait event
+ *
+ * @pva:»	Pointer to PVA structure
+ * @wait_time»	     WaitTime Interval for the event
+ *
+ * This function do the wait until the mailbox isr get invoked based on
+ * the mailbox register set by the ucode.
+ */
+int pva_mailbox_wait_event(struct pva *pva, int wait_time);
+
+#endif /*__PVA_MAINBOX_H__*/
diff --git a/drivers/video/tegra/host/pva/pva_mailbox_t19x.c b/drivers/video/tegra/host/pva/pva_mailbox_t19x.c
new file mode 100644
index 00000000..415ec489
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_mailbox_t19x.c
@@ -0,0 +1,80 @@
+/*
+ * PVA mailbox code
+ *
+ * Copyright (c) 2016-2022, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
+#include <soc/tegra/chip-id.h>
+#else
+#include <soc/tegra/fuse.h>
+#endif
+#include <linux/platform_device.h>
+#include <linux/nvhost.h>
+
+#include "pva_mailbox.h"
+#include "pva_mailbox_t19x.h"
+#include "pva_regs.h"
+
+static u32 pva_get_mb_reg_ex(u32 i)
+{
+	u32 mb_reg[VALID_MB_INPUT_REGS_EX] = {
+		hsp_sm0_r(),
+		hsp_sm1_r(),
+		hsp_sm2_r(),
+		hsp_sm3_r(),
+		hsp_sm4_r(),
+		hsp_sm5_r(),
+		hsp_sm6_r(),
+		hsp_sm7_r()
+	};
+
+	return mb_reg[i];
+}
+
+u32 pva_read_mailbox_t19x(struct platform_device *pdev, u32 mbox_id)
+{
+	u32 side_bits = 0;
+	u32 mbox_value = 0;
+	u32 side_channel_addr =
+		pva_get_mb_reg_ex(PVA_MBOX_SIDE_CHANNEL_HOST_RD);
+
+	side_bits = host1x_readl(pdev, side_channel_addr);
+	mbox_value = host1x_readl(pdev, pva_get_mb_reg_ex(mbox_id));
+	side_bits = ((side_bits >> mbox_id) & 0x1) << PVA_SIDE_CHANNEL_MBOX_BIT;
+	mbox_value = (mbox_value & PVA_SIDE_CHANNEL_MBOX_BIT_MASK) | side_bits;
+
+	return mbox_value;
+}
+
+void pva_write_mailbox_t19x(struct platform_device *pdev,
+				u32 mbox_id, u32 value)
+{
+	u32 side_bits = 0;
+	u32 side_channel_addr =
+		pva_get_mb_reg_ex(PVA_MBOX_SIDE_CHANNEL_HOST_WR);
+
+	side_bits = host1x_readl(pdev, side_channel_addr);
+	side_bits &= ~(1 << mbox_id);
+	side_bits |= ((value >> PVA_SIDE_CHANNEL_MBOX_BIT) & 0x1) << mbox_id;
+	value = (value & PVA_SIDE_CHANNEL_MBOX_BIT_MASK);
+	host1x_writel(pdev, side_channel_addr, side_bits);
+	host1x_writel(pdev, pva_get_mb_reg_ex(mbox_id), value);
+}
diff --git a/drivers/video/tegra/host/pva/pva_mailbox_t19x.h b/drivers/video/tegra/host/pva/pva_mailbox_t19x.h
new file mode 100644
index 00000000..62ce1d89
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_mailbox_t19x.h
@@ -0,0 +1,54 @@
+/*
+ * PVA mailbox header
+ *
+ * Copyright (c) 2016-2019, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __PVA_MAILBOX_T19X_H__
+#define __PVA_MAILBOX_T19X_H__
+
+#include <linux/platform_device.h>
+
+#include "pva-interface.h"
+/**
+ * pva_read_mailbox() - read a mailbox register
+ *
+ * @pva:			Pointer to PVA structure
+ * @mbox:		mailbox register to be written
+ *
+ * This function will read the indicated mailbox register and return its
+ * contents.  it uses side channel B as host would.
+ *
+ * Return Value:
+ *	contents of the indicated mailbox register
+ */
+u32 pva_read_mailbox_t19x(struct platform_device *pdev, u32 mbox_id);
+
+/**
+ * pva_write_mailbox() - write to a mailbox register
+ *
+ * @pva:			Pointer to PVA structure
+ * @mbox:		mailbox register to be written
+ * @value:		value to be written into the mailbox register
+ *
+ * This function will write a value into the indicated mailbox register.
+ *
+ * Return Value:
+ *	none
+ */
+void pva_write_mailbox_t19x(struct platform_device *pdev,
+				u32 mbox_id, u32 value);
+
+#endif /*__PVA_MAINBOX_T19X_H__*/
diff --git a/drivers/video/tegra/host/pva/pva_mailbox_t23x.c b/drivers/video/tegra/host/pva/pva_mailbox_t23x.c
new file mode 100644
index 00000000..8e3b94d9
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_mailbox_t23x.c
@@ -0,0 +1,56 @@
+/*
+ * PVA mailbox code for T23x
+ *
+ * Copyright (c) 2016-2022, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/platform_device.h>
+#include <linux/nvhost.h>
+
+#include "pva_mailbox.h"
+#include "pva_mailbox_t23x.h"
+#include "pva_regs.h"
+
+
+static u32 pva_get_mb_reg_ex(u32 i)
+{
+	u32 mb_reg[VALID_MB_INPUT_REGS_EX] = {
+		hsp_sm0_r(),
+		hsp_sm1_r(),
+		hsp_sm2_r(),
+		hsp_sm3_r(),
+		hsp_sm4_r(),
+		hsp_sm5_r(),
+		hsp_sm6_r(),
+		hsp_sm7_r()
+	};
+
+	return mb_reg[i];
+}
+
+u32 pva_read_mailbox_t23x(struct platform_device *pdev, u32 mbox_id)
+{
+	return host1x_readl(pdev, pva_get_mb_reg_ex(mbox_id));
+}
+
+void pva_write_mailbox_t23x(struct platform_device *pdev, u32 mbox_id, u32 value)
+{
+	host1x_writel(pdev, pva_get_mb_reg_ex(mbox_id), value);
+}
+
diff --git a/drivers/video/tegra/host/pva/pva_mailbox_t23x.h b/drivers/video/tegra/host/pva/pva_mailbox_t23x.h
new file mode 100644
index 00000000..536e23fc
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_mailbox_t23x.h
@@ -0,0 +1,54 @@
+/*
+ * PVA mailbox header
+ *
+ * Copyright (c) 2016-2019, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __PVA_MAILBOX_T23X_H__
+#define __PVA_MAILBOX_T23X_H__
+
+#include <linux/platform_device.h>
+
+#include "pva-interface.h"
+
+/**
+ * pva_read_mailbox() - read a mailbox register
+ *
+ * @pva:			Pointer to PVA structure
+ * @mbox:		mailbox register to be written
+ *
+ * This function will read the indicated mailbox register and return its
+ * contents.  it uses side channel B as host would.
+ *
+ * Return Value:
+ *	contents of the indicated mailbox register
+ */
+u32 pva_read_mailbox_t23x(struct platform_device *pdev, u32 mbox_id);
+
+/**
+ * pva_write_mailbox() - write to a mailbox register
+ *
+ * @pva:			Pointer to PVA structure
+ * @mbox:		mailbox register to be written
+ * @value:		value to be written into the mailbox register
+ *
+ * This function will write a value into the indicated mailbox register.
+ *
+ * Return Value:
+ *	none
+ */
+void pva_write_mailbox_t23x(struct platform_device *pdev, u32 mbox_id, u32 value);
+
+#endif /*__PVA_MAINBOX_T23X_H__*/
diff --git a/drivers/video/tegra/host/pva/pva_nvhost.h b/drivers/video/tegra/host/pva/pva_nvhost.h
new file mode 100644
index 00000000..9abed27c
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_nvhost.h
@@ -0,0 +1,91 @@
+/* Copyright (c) 2021-2022, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_NVHOST_H
+#define PVA_NVHOST_H
+
+#include <linux/platform_device.h>
+#include <linux/fs.h>
+
+extern const struct file_operations tegra_pva_ctrl_ops;
+
+/**
+ * @brief	Finalize the PVA Power-on-Sequence.
+ *
+ * This function called from host subsystem driver after the PVA
+ * partition has been brought up, clocks enabled and reset deasserted.
+ * In production mode, the function needs to wait until the ready  bit
+ * within the PVA aperture has been set. After that enable the PVA IRQ.
+ * Register the queue priorities on the PVA.
+ *
+ * @param pdev	Pointer to PVA device
+ * @return:	0 on Success or negative error code
+ *
+ */
+int pva_finalize_poweron(struct platform_device *pdev);
+
+/**
+ * @brief	Prepare PVA poweroff.
+ *
+ * This function called from host subsystem driver before turning off
+ * the PVA. The function should turn off the PVA IRQ.
+ *
+ * @param pdev	Pointer to PVA device
+ * @return	0 on Success or negative error code
+ *
+ */
+int pva_prepare_poweroff(struct platform_device *pdev);
+
+#ifdef CONFIG_TEGRA_SOC_HWPM
+
+enum tegra_soc_hwpm_ip_reg_op;
+
+/**
+ * @brief	pva_hwpm_ip_pm
+ *
+ * This function called from Tegra HWPM driver to
+ * poweron/off pva device.
+ *
+ * @param ip_dev	Pointer to PVA device
+ * @param disable	disable/enable power management.  PVA is
+ *			powered on when false.
+ * @param reg_offset	offset of register relative to PVA HWP base
+ * @return		0 on Success or negative error code
+ *
+ */
+int pva_hwpm_ip_pm(void *ip_dev, bool disable);
+
+/**
+ * @brief	pva_hwpm_ip_reg_op
+ *
+ * This function called from Tegra HWPM driver to
+ * access PVA HWPM registers.
+ *
+ * @param ip_dev		Pointer to PVA device
+ * @param reg_op		access operation and can be one of
+ *				TEGRA_SOC_HWPM_IP_REG_OP_READ
+ *				TEGRA_SOC_HWPM_IP_REG_OP_WRITE
+ * @param inst_element_index	element index within PVA instance
+ * @param reg_offset		offset of register relative to PVA HWP base
+ * @param reg_data		pointer to where data is to be placed or read.
+ * @return			0 on Success or negative error code
+ *
+ */
+int pva_hwpm_ip_reg_op(void *ip_dev,
+		       enum tegra_soc_hwpm_ip_reg_op reg_op,
+		       u32 inst_element_index, u64 reg_offset,
+		       u32 *reg_data);
+#endif
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_queue.c b/drivers/video/tegra/host/pva/pva_queue.c
new file mode 100644
index 00000000..49ec67c0
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_queue.c
@@ -0,0 +1,1567 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm-generic/errno-base.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include "pva_dma.h"
+#include <linux/delay.h>
+#include <asm/ioctls.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/nvhost.h>
+#include <linux/cvnas.h>
+#include <linux/host1x.h>
+
+#ifdef CONFIG_EVENTLIB
+#include <linux/keventlib.h>
+#include <uapi/linux/nvdev_fence.h>
+#include <uapi/linux/nvhost_events.h>
+#endif
+
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
+#include <soc/tegra/chip-id.h>
+#else
+#include <soc/tegra/fuse.h>
+#endif
+
+#include <linux/seq_file.h>
+#include <uapi/linux/nvpva_ioctl.h>
+#include <trace/events/nvhost_pva.h>
+
+#include "pva.h"
+#include "nvpva_buffer.h"
+#include "nvpva_queue.h"
+#include "pva_mailbox.h"
+#include "pva_queue.h"
+#include "pva_regs.h"
+
+#include "pva-vpu-perf.h"
+#include "pva-interface.h"
+#include "pva_vpu_exe.h"
+#include "nvpva_client.h"
+#include "nvpva_syncpt.h"
+
+void *pva_dmabuf_vmap(struct dma_buf *dmabuf)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0)
+	struct iosys_map map = {0};
+#else
+	struct dma_buf_map map = {0};
+#endif
+	/* Linux v5.11 and later kernels */
+	if (dma_buf_vmap(dmabuf, &map))
+		return NULL;
+
+	return map.vaddr;
+#else
+	/* Linux v5.10 and earlier kernels */
+	return dma_buf_vmap(dmabuf);
+#endif
+}
+
+void pva_dmabuf_vunmap(struct dma_buf *dmabuf, void *addr)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0)
+	struct iosys_map map = IOSYS_MAP_INIT_VADDR(addr);
+#else
+	struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(addr);
+#endif
+	/* Linux v5.11 and later kernels */
+	dma_buf_vunmap(dmabuf, &map);
+#else
+	/* Linux v5.10 and earlier kernels */
+	dma_buf_vunmap(dmabuf, addr);
+#endif
+}
+
+static void pva_task_dump(struct pva_submit_task *task)
+{
+	int i;
+
+	nvpva_dbg_info(task->pva, "task=%p, exe_id=%u", task, task->exe_id);
+
+	for (i = 0; i < task->num_input_task_status; i++)
+		nvpva_dbg_info(task->pva, "input task status %d: pin_id=%u, offset=%u", i,
+				task->input_task_status[i].pin_id,
+				task->input_task_status[i].offset);
+
+	for (i = 0; i < task->num_output_task_status; i++)
+		nvpva_dbg_info(task->pva, "output task status %d: pin_id=%u, offset=%u",
+				i, task->output_task_status[i].pin_id,
+				task->output_task_status[i].offset);
+
+	for (i = 0; i < task->num_user_fence_actions; i++)
+		nvpva_dbg_info(task->pva, "fence action %d: type=%u", i,
+				task->user_fence_actions[i].type);
+}
+
+static void pva_task_get_memsize(size_t *dma_size,
+				 size_t *kmem_size,
+				 size_t *aux_dma_size)
+{
+	/* Align task addr to 64bytes boundary for DMA use*/
+	*dma_size = ALIGN(sizeof(struct pva_hw_task) + 64, 64);
+	*kmem_size = sizeof(struct pva_submit_task);
+	*aux_dma_size = NVPVA_TASK_MAX_PAYLOAD_SIZE;
+}
+
+static inline void nvpva_fetch_task_status_info(struct pva *pva,
+						struct pva_task_error_s *info)
+{
+	struct pva_task_error_s *err_array = pva->priv_circular_array.va;
+	struct pva_task_error_s *src_va =
+		&err_array[pva->circular_array_rd_pos];
+	const u32 len = MAX_PVA_TASK_COUNT;
+
+	pva->circular_array_rd_pos += 1;
+	WARN_ON(pva->circular_array_rd_pos > len);
+	if (pva->circular_array_rd_pos >= len)
+		pva->circular_array_rd_pos = 0;
+
+	/* Cache coherency is guaranteed by DMA API */
+	(void)memcpy(info, src_va, sizeof(struct pva_task_error_s));
+	/* clear it for debugging */
+	(void)memset(src_va, 0, sizeof(struct pva_task_error_s));
+}
+
+static void pva_task_unpin_mem(struct pva_submit_task *task)
+{
+	u32 i;
+
+	for (i = 0; i < task->num_pinned; i++) {
+		struct pva_pinned_memory *mem = &task->pinned_memory[i];
+
+		nvpva_buffer_submit_unpin_id(task->client->buffers,
+					    &mem->id, 1);
+	}
+
+	task->num_pinned = 0;
+}
+
+struct pva_pinned_memory *pva_task_pin_mem(struct pva_submit_task *task,
+					   u32 id)
+{
+	int err;
+	struct pva_pinned_memory *mem;
+
+	if (task->num_pinned >= ARRAY_SIZE(task->pinned_memory)) {
+		task_err(task, "too many objects to pin");
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	if (id == 0) {
+		task_err(task, "pin id  is 0");
+		err = -EFAULT;
+		goto err_out;
+	}
+
+	mem = &task->pinned_memory[task->num_pinned];
+	mem->id = id;
+	err = nvpva_buffer_submit_pin_id(task->client->buffers, &mem->id, 1,
+					 &mem->dmabuf, &mem->dma_addr,
+					 &mem->size, &mem->heap);
+	if (err) {
+		task_err(task, "submit pin failed; Is the handled pinned?");
+		goto err_out;
+	}
+
+	task->num_pinned += 1;
+
+	return mem;
+err_out:
+	return ERR_PTR(err);
+}
+
+/* pin fence and return its dma addr */
+static int
+pva_task_pin_fence(struct pva_submit_task *task,
+		   struct nvpva_submit_fence *fence,
+		   dma_addr_t *addr)
+{
+	int err = 0;
+
+	switch (fence->type) {
+	case NVPVA_FENCE_OBJ_SEM: {
+		struct pva_pinned_memory *mem;
+
+		mem = pva_task_pin_mem(task, fence->obj.sem.mem.pin_id);
+		if (IS_ERR(mem)) {
+			task_err(task, "sempahore submit pin failed");
+			err = PTR_ERR(mem);
+		} else
+			*addr = mem->dma_addr + fence->obj.sem.mem.offset;
+		break;
+	}
+	case NVPVA_FENCE_OBJ_SYNCPT: {
+		dma_addr_t syncpt_addr = nvpva_syncpt_address(
+				task->queue->vm_pdev, fence->obj.syncpt.id,
+				false);
+		nvpva_dbg_info(task->pva,
+			       "id = %d, syncpt addr = %llx",
+			       fence->obj.syncpt.id,
+			       syncpt_addr);
+
+		if (syncpt_addr) {
+			*addr = syncpt_addr;
+		} else {
+			task_err(task,
+				"%s: can't get syncpoint address",
+				__func__);
+			err = -EINVAL;
+		}
+		break;
+	}
+	default:
+		err = -EINVAL;
+		task_err(task, "%s: unsupported fence type: %d",
+			 __func__, fence->type);
+		break;
+	}
+	return err;
+}
+
+static int
+get_fence_value(struct nvpva_submit_fence *fence, u32 *val)
+{
+	int err = 0;
+
+	switch (fence->type) {
+	case NVPVA_FENCE_OBJ_SYNCPT:
+		*val = fence->obj.syncpt.value;
+		break;
+	case NVPVA_FENCE_OBJ_SEM:
+		*val = fence->obj.sem.value;
+		break;
+	default:
+		err = -EINVAL;
+		pr_err("%s: unsupported fence type: %d",
+		       __func__, fence->type);
+		break;
+	}
+	return err;
+}
+
+static inline void
+pva_task_write_fence_action_op(struct pva_task_action_s *op,
+			       uint8_t action,
+			       uint64_t fence_addr,
+			       uint32_t val,
+			       uint64_t time_stamp_addr)
+{
+	op->action	= action;
+	op->args.ptr.p	= fence_addr;
+	op->args.ptr.v	= val;
+	op->args.ptr.t	= time_stamp_addr;
+}
+
+static inline void
+pva_task_write_status_action_op(struct pva_task_action_s *op,
+				uint8_t action,
+				uint64_t addr,
+				uint16_t val)
+{
+	op->action = action;
+	op->args.status.p = addr;
+	op->args.status.status = val;
+}
+
+static inline void
+pva_task_write_stats_action_op(struct pva_task_action_s *op,
+			       uint8_t action,
+			       uint64_t addr)
+{
+	op->action = action;
+	op->args.statistics.p = addr;
+}
+
+static  int
+pva_task_process_fence_actions(struct pva_submit_task *task,
+			       struct pva_hw_task *hw_task)
+{
+	int err = 0;
+	u32 i;
+	u32 fence_type;
+	u32 ts_flag = 0;
+	u8 *action_counter;
+	u8 action_code;
+	struct pva_task_action_s *fw_actions;
+	struct pva_task_action_s *current_fw_actions;
+
+	for (fence_type = NVPVA_FENCE_SOT_R5;
+	     fence_type < NVPVA_MAX_FENCE_TYPES; fence_type++) {
+		switch (fence_type) {
+		case NVPVA_FENCE_SOT_R5:
+			fw_actions = &hw_task->preactions[0];
+			action_code = TASK_ACT_PTR_WRITE_SOT_R;
+			action_counter = &hw_task->task.num_preactions;
+			ts_flag = PVA_TASK_FL_SOT_R_TS;
+			break;
+		case NVPVA_FENCE_SOT_VPU:
+			fw_actions = &hw_task->preactions[0];
+			action_code = TASK_ACT_PTR_WRITE_SOT_V;
+			action_counter = &hw_task->task.num_preactions;
+			ts_flag = PVA_TASK_FL_SOT_V_TS;
+			break;
+		case NVPVA_FENCE_EOT_R5:
+			fw_actions = &hw_task->postactions[0];
+			action_code = TASK_ACT_PTR_WRITE_EOT_R;
+			action_counter = &hw_task->task.num_postactions;
+			ts_flag = PVA_TASK_FL_EOT_R_TS;
+			break;
+		case NVPVA_FENCE_EOT_VPU:
+			fw_actions = &hw_task->postactions[0];
+			action_code = TASK_ACT_PTR_WRITE_EOT_V;
+			action_counter = &hw_task->task.num_postactions;
+			ts_flag = PVA_TASK_FL_EOT_V_TS;
+			break;
+		case NVPVA_FENCE_POST:
+			fw_actions = &hw_task->postactions[0];
+			action_code = TASK_ACT_PTR_WRITE_EOT;
+			action_counter = &hw_task->task.num_postactions;
+			ts_flag = 0;
+			break;
+		default:
+			task_err(task, "unknown fence action type");
+			err = -EINVAL;
+			goto out;
+		}
+
+		for (i = 0; i < task->num_pva_fence_actions[fence_type]; i++) {
+			struct nvpva_fence_action *fence_action =
+			    &task->pva_fence_actions[fence_type][i];
+			dma_addr_t fence_addr = 0;
+			u32 fence_value;
+			dma_addr_t timestamp_addr;
+			switch (fence_action->fence.type) {
+			case NVPVA_FENCE_OBJ_SYNCPT:
+			{
+				u32 id = task->queue->syncpt_id;
+				fence_action->fence.obj.syncpt.id = id;
+				fence_addr = nvpva_syncpt_address(
+				    task->queue->vm_pdev, id, true);
+				nvpva_dbg_info(task->pva,
+					       "id = %d, fence_addr = %llx ",
+					       task->queue->syncpt_id,
+					       fence_addr);
+
+				if (fence_addr == 0) {
+					err = -EFAULT;
+					goto out;
+				}
+				task->fence_num += 1;
+				task->syncpt_thresh += 1;
+				fence_value = 1;
+				fence_action->fence.obj.syncpt.value =
+				    task->syncpt_thresh;
+				break;
+			}
+			case NVPVA_FENCE_OBJ_SEM:
+			{
+				err = pva_task_pin_fence(task,
+							 &fence_action->fence,
+							 &fence_addr);
+				if (err)
+					goto out;
+				task->sem_num += 1;
+				task->sem_thresh += 1;
+				fence_value = task->sem_thresh;
+				fence_action->fence.obj.sem.value = fence_value;
+				break;
+			}
+			default:
+				task_err(task, "unknown fence action object");
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (fence_action->timestamp_buf.pin_id) {
+				struct pva_pinned_memory *mem;
+				mem = pva_task_pin_mem(
+					task,
+					fence_action->timestamp_buf.pin_id);
+				if (IS_ERR(mem)) {
+					err = PTR_ERR(mem);
+					task_err(
+					    task,
+					    "failed to pin timestamp buffer");
+					goto out;
+				}
+				timestamp_addr =
+				    mem->dma_addr +
+				    fence_action->timestamp_buf.offset;
+				hw_task->task.flags |= ts_flag;
+			} else {
+				timestamp_addr = 0;
+			}
+
+			current_fw_actions = &fw_actions[*action_counter];
+			pva_task_write_fence_action_op(current_fw_actions,
+						       action_code, fence_addr,
+						       fence_value,
+						       timestamp_addr);
+			*action_counter = *action_counter + 1;
+		}
+	}
+out:
+	return err;
+}
+
+static int pva_task_process_prefences(struct pva_submit_task *task,
+				      struct pva_hw_task *hw_task)
+{
+	u32 i;
+	int err;
+	struct pva_task_action_s *fw_preactions = NULL;
+	for (i = 0; i < task->num_prefences; i++) {
+		struct nvpva_submit_fence *fence = &task->prefences[i];
+		dma_addr_t fence_addr = 0;
+		u32 fence_val;
+
+		err = pva_task_pin_fence(task, fence, &fence_addr);
+		if (err)
+			goto out;
+
+		if (fence_addr == 0) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		err = get_fence_value(fence, &fence_val);
+		if (err)
+			goto out;
+
+		fw_preactions =
+			&hw_task->preactions[hw_task->task.num_preactions];
+		pva_task_write_fence_action_op(fw_preactions,
+						TASK_ACT_PTR_BLK_GTREQL,
+						fence_addr, fence_val, 0);
+		++hw_task->task.num_preactions;
+	}
+out:
+	return err;
+}
+
+static int pva_task_process_input_status(struct pva_submit_task *task,
+					 struct pva_hw_task *hw_task)
+{
+	u8 i;
+	int err = 0;
+	struct pva_task_action_s *fw_preactions = NULL;
+
+	for (i = 0; i < task->num_input_task_status; i++) {
+		struct nvpva_mem *status;
+		struct pva_pinned_memory *mem;
+		dma_addr_t status_addr;
+
+		status = &task->input_task_status[i];
+		mem = pva_task_pin_mem(task, status->pin_id);
+		if (IS_ERR(mem)) {
+			err = PTR_ERR(mem);
+			goto out;
+		}
+
+		status_addr = mem->dma_addr + status->offset;
+
+		fw_preactions =
+			&hw_task->preactions[hw_task->task.num_preactions];
+		pva_task_write_status_action_op(fw_preactions,
+						(uint8_t)TASK_ACT_READ_STATUS,
+						status_addr, 0U);
+		++hw_task->task.num_preactions;
+	}
+out:
+	return err;
+}
+
+static int pva_task_process_output_status(struct pva_submit_task *task,
+					  struct pva_hw_task *hw_task)
+{
+	u32 i;
+	int err = 0;
+	dma_addr_t stats_addr;
+	struct pva_task_action_s *fw_postactions = NULL;
+
+	for (i = 0; i < task->num_output_task_status; i++) {
+		dma_addr_t status_addr;
+		struct nvpva_mem *status = &task->output_task_status[i];
+		struct pva_pinned_memory *mem;
+
+		mem = pva_task_pin_mem(task, status->pin_id);
+		if (IS_ERR(mem)) {
+			err = PTR_ERR(mem);
+			goto out;
+		}
+
+		status_addr = mem->dma_addr + status->offset;
+		fw_postactions = &hw_task->postactions[hw_task->task.num_postactions];
+		pva_task_write_status_action_op(fw_postactions,
+						(uint8_t)TASK_ACT_WRITE_STATUS,
+						status_addr,
+						1U /* PVA task error code */);
+		++hw_task->task.num_postactions;
+	}
+
+	stats_addr = task->dma_addr + offsetof(struct pva_hw_task, statistics);
+	fw_postactions = &hw_task->postactions[hw_task->task.num_postactions];
+	if ((task->pva->stats_enabled)
+	  || (task->pva->profiling_level > 0)) {
+		pva_task_write_stats_action_op(fw_postactions,
+					       (uint8_t)TASK_ACT_PVA_STATISTICS,
+					       stats_addr);
+		hw_task->task.flags |= PVA_TASK_FL_STATS_ENABLE;
+		++hw_task->task.num_postactions;
+	}
+out:
+	return err;
+}
+static int
+pva_task_write_vpu_parameter(struct pva_submit_task *task,
+			     struct pva_hw_task *hw_task)
+{
+	int err = 0;
+	struct pva_elf_image *elf = NULL;
+	struct nvpva_pointer_symbol *sym_ptr = NULL;
+	struct nvpva_pointer_symbol_ex *sym_ptr_ex = NULL;
+	u32 symbolId = 0U;
+	dma_addr_t symbol_payload = 0U;
+	u32 size = 0U;
+	u32 i;
+	u32 index = 0;
+
+	u32 head_index = 0U;
+	u8 *headPtr = NULL;
+	u32 head_size = 0U;
+	u32 head_count = 0U;
+
+	u32 tail_index = 0U;
+	u8 *tailPtr = NULL;
+	u32 tail_count = 0U;
+	struct pva_vpu_parameters_s *hw_task_param_list;
+
+	if ((task->exe_id == NVPVA_NOOP_EXE_ID) || (task->num_symbols == 0U))
+		goto out;
+
+	tail_index = ((u32)task->num_symbols - 1U);
+	elf = get_elf_image(&task->client->elf_ctx, task->exe_id);
+	if (task->num_symbols > elf->num_symbols) {
+		task_err(task, "invalid number of symbols");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (task->symbol_payload_size == 0U) {
+		task_err(task, "Empty Symbol payload");
+		err = -EINVAL;
+		goto out;
+	}
+
+	symbol_payload = task->aux_dma_addr;
+
+	headPtr = (u8 *)(task->aux_va);
+	tailPtr = (u8 *)(task->aux_va + task->symbol_payload_size);
+	hw_task_param_list = hw_task->dma_info_and_params_list.param_list;
+
+	for (i = 0U; i < task->num_symbols; i++) {
+		symbolId = task->symbols[i].symbol.id;
+		size = elf->sym[symbolId].size;
+		if (task->symbols[i].symbol.size != size) {
+			task_err(task, "size does not match symbol:%s",
+				 elf->sym[symbolId].symbol_name);
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (task->symbols[i].config == NVPVA_SYMBOL_POINTER) {
+			struct pva_pinned_memory *mem;
+
+			memcpy(headPtr, (task->symbol_payload + task->symbols[i].offset),
+				sizeof(struct nvpva_pointer_symbol));
+			sym_ptr = (struct nvpva_pointer_symbol *)(headPtr);
+			mem = pva_task_pin_mem(task, PVA_LOW32(sym_ptr->base));
+			if (IS_ERR(mem)) {
+				err = PTR_ERR(mem);
+				task_err(task, "failed to pin symbol pointer");
+				err = -EINVAL;
+				goto out;
+			}
+
+			sym_ptr->base = mem->dma_addr;
+			sym_ptr->size = mem->size;
+			size = sizeof(struct nvpva_pointer_symbol);
+		} else if (task->symbols[i].config == NVPVA_SYMBOL_POINTER_EX) {
+			struct pva_pinned_memory *mem;
+
+			memcpy(headPtr, (task->symbol_payload + task->symbols[i].offset),
+				sizeof(struct nvpva_pointer_symbol_ex));
+			sym_ptr_ex = (struct nvpva_pointer_symbol_ex *)(headPtr);
+			mem = pva_task_pin_mem(task, PVA_LOW32(sym_ptr_ex->base));
+			if (IS_ERR(mem)) {
+				err = PTR_ERR(mem);
+				task_err(task, "failed to pin symbol pointer");
+				err = -EINVAL;
+				goto out;
+			}
+
+			sym_ptr_ex->base = mem->dma_addr;
+			sym_ptr_ex->size = mem->size;
+			size = sizeof(struct nvpva_pointer_symbol_ex);
+		} else if (size < PVA_DMA_VMEM_COPY_THRESHOLD) {
+			(void)memcpy(headPtr,
+				(task->symbol_payload + task->symbols[i].offset),
+				size);
+		} else if ((uintptr_t)(tailPtr) < ((uintptr_t)(headPtr) + size)) {
+			task_err(task, "Symbol payload overflow");
+			err = -EINVAL;
+			goto out;
+		} else {
+			tailPtr = (tailPtr - size);
+			(void)memcpy(tailPtr,
+				(task->symbol_payload + task->symbols[i].offset),
+				size);
+			hw_task_param_list[tail_index].param_base =
+						(pva_iova)(symbol_payload +
+						((uintptr_t)(tailPtr) -
+						(uintptr_t)(task->aux_va)));
+			index = tail_index;
+			tail_index--;
+			tail_count++;
+			hw_task_param_list[index].addr =
+				elf->sym[symbolId].addr;
+			hw_task_param_list[index].size = size;
+			continue;
+		}
+
+		hw_task_param_list[head_index].param_base =
+						(pva_iova)(symbol_payload +
+						((uintptr_t)(headPtr) -
+						 (uintptr_t)(task->aux_va)));
+		index = head_index;
+		if ((uintptr_t)(headPtr) > ((uintptr_t)(tailPtr) - size)) {
+			task_err(task, "Symbol payload overflow");
+			err = -EINVAL;
+			goto out;
+		} else {
+			headPtr = (headPtr + size);
+			head_index++;
+			head_size += size;
+			head_count++;
+			hw_task_param_list[index].addr =
+				elf->sym[symbolId].addr;
+			hw_task_param_list[index].size = size;
+		}
+	}
+
+	/* Write info for VPU instance data parameter, if available in elf */
+	for (i = 0U; i < elf->num_symbols; i++) {
+		if (elf->sym[i].is_sys) {
+			hw_task_param_list[task->num_symbols].addr =
+			    elf->sym[i].addr;
+			hw_task_param_list[task->num_symbols].size =
+			    elf->sym[i].size;
+			hw_task_param_list[task->num_symbols].param_base =
+			    PVA_SYS_INSTANCE_DATA_V1_IOVA;
+			++task->num_symbols;
+		}
+	}
+
+	hw_task->param_info.small_vpu_param_data_iova =
+			(head_size != 0U) ? symbol_payload : 0UL;
+
+	hw_task->param_info.small_vpu_parameter_data_size = head_size;
+
+	hw_task->param_info.large_vpu_parameter_list_start_index = head_count;
+	hw_task->param_info.vpu_instance_parameter_list_start_index =
+			(head_count + tail_count);
+
+	hw_task->param_info.parameter_data_iova = task->dma_addr
+			+ offsetof(struct pva_hw_task, dma_info_and_params_list)
+			+ offsetof(struct pva_dma_info_and_params_list_s, param_list);
+
+	hw_task->task.num_parameters = task->num_symbols;
+
+	hw_task->task.parameter_info_base = task->dma_addr
+					    + offsetof(struct pva_hw_task, param_info);
+
+	err = pva_task_acquire_ref_vpu_app(&task->client->elf_ctx,
+					   task->exe_id);
+	if (err) {
+		task_err(task,
+			 "unable to acquire ref count for app with id = %u",
+			 task->exe_id);
+	}
+
+	task->pinned_app = true;
+out:
+	return err;
+}
+
+static int set_flags(struct pva_submit_task *task, struct pva_hw_task *hw_task)
+{
+	int err = 0;
+	uint32_t flags = task->flags;
+
+	if (flags & NVPVA_PRE_BARRIER_TASK_TRUE)
+		hw_task->task.flags |= PVA_TASK_FL_SYNC_TASKS;
+	if (flags & NVPVA_GR_CHECK_EXE_FLAG)
+		hw_task->task.flags |= PVA_TASK_FL_GR_CHECK;
+	if (flags & NVPVA_AFFINITY_VPU0)
+		hw_task->task.flags |= PVA_TASK_FL_VPU0;
+	if (flags & NVPVA_AFFINITY_VPU1)
+		hw_task->task.flags |= PVA_TASK_FL_VPU1;
+	if ((flags & NVPVA_AFFINITY_VPU_ANY) == 0) {
+		err = -EINVAL;
+		task_err(task, "incorrect vpu affinity");
+		goto out;
+	}
+	if (task->pva->vpu_debug_enabled)
+		hw_task->task.flags |= PVA_TASK_FL_VPU_DEBUG;
+
+	if (task->special_access)
+		hw_task->task.flags |= PVA_TASK_FL_SPECIAL_ACCESS;
+	if (flags & NVPVA_ERR_MASK_ILLEGAL_INSTR)
+		hw_task->task.flags |= PVA_TASK_FL_ERR_MASK_ILLEGAL_INSTR;
+	if (flags & NVPVA_ERR_MASK_DIVIDE_BY_0)
+		hw_task->task.flags |= PVA_TASK_FL_ERR_MASK_DIVIDE_BY_0;
+	if (flags & NVPVA_ERR_MASK_FP_NAN)
+		hw_task->task.flags |= PVA_TASK_FL_ERR_MASK_FP_NAN;
+out:
+	return err;
+}
+
+static int pva_task_write(struct pva_submit_task *task)
+{
+	struct pva_hw_task *hw_task;
+	u32 pre_ptr, post_ptr;
+	int err = 0;
+
+	if (!pva_vpu_elf_is_registered(&task->client->elf_ctx, task->exe_id) &&
+	    (task->exe_id != NVPVA_NOOP_EXE_ID)) {
+		task_err(task, "invalid exe id: %d", task->exe_id);
+		return -EINVAL;
+	}
+	/* Task start from the memory base */
+	hw_task = task->va;
+	pre_ptr = 0;
+	post_ptr = 0;
+	/* process pre & post actions */
+	err = pva_task_process_prefences(task, hw_task);
+	if (err)
+		goto out;
+
+	err = pva_task_process_input_status(task, hw_task);
+	if (err)
+		goto out;
+
+	err = pva_task_process_output_status(task, hw_task);
+	if (err)
+		goto out;
+
+	err = pva_task_process_fence_actions(task, hw_task);
+	if (err)
+		goto out;
+
+	err = pva_task_write_dma_info(task, hw_task);
+	if (err)
+		goto out;
+
+	err = pva_task_write_dma_misr_info(task, hw_task);
+	if (err)
+		goto out;
+
+	err = pva_task_write_vpu_parameter(task, hw_task);
+	if (err)
+		goto out;
+
+	hw_task->task.next = 0U;
+	hw_task->task.preactions = task->dma_addr + offsetof(struct pva_hw_task,
+							 preactions);
+	hw_task->task.postactions = task->dma_addr + offsetof(struct pva_hw_task,
+							  postactions);
+	hw_task->task.runlist_version = PVA_RUNLIST_VERSION_ID;
+	hw_task->task.sid_index = task->client->sid_index;
+	err = set_flags(task, hw_task);
+	if (err)
+		goto out;
+
+	hw_task->task.bin_info =
+	    phys_get_bin_info(&task->client->elf_ctx, task->exe_id);
+
+	if (task->stdout) {
+		hw_task->stdout_cb_info.buffer = task->stdout->buffer_addr;
+		hw_task->stdout_cb_info.head = task->stdout->head_addr;
+		hw_task->stdout_cb_info.tail = task->stdout->tail_addr;
+		hw_task->stdout_cb_info.err = task->stdout->err_addr;
+		hw_task->stdout_cb_info.buffer_size = task->stdout->size;
+		hw_task->task.stdout_info =
+			task->dma_addr +
+			offsetof(struct pva_hw_task, stdout_cb_info);
+	} else
+		hw_task->task.stdout_info = 0;
+
+out:
+
+	return err;
+}
+#ifdef CONFIG_EVENTLIB
+
+static void
+pva_eventlib_fill_fence(struct nvdev_fence *dst_fence,
+			struct nvpva_submit_fence *src_fence)
+{
+	static u32 obj_type[] = {NVDEV_FENCE_TYPE_SYNCPT,
+				 NVDEV_FENCE_TYPE_SEMAPHORE,
+				 NVDEV_FENCE_TYPE_SEMAPHORE_TS,
+				 NVDEV_FENCE_TYPE_SYNC_FD};
+
+	memset(dst_fence, 0, sizeof(struct nvdev_fence));
+	dst_fence->type = obj_type[src_fence->type];
+	switch (src_fence->type) {
+	case NVPVA_FENCE_OBJ_SYNCPT:
+		dst_fence->syncpoint_index = src_fence->obj.syncpt.id;
+		dst_fence->syncpoint_value = src_fence->obj.syncpt.value;
+		break;
+	case NVPVA_FENCE_OBJ_SEM:
+	case NVPVA_FENCE_OBJ_SEMAPHORE_TS:
+		dst_fence->semaphore_handle = src_fence->obj.sem.mem.pin_id;
+		dst_fence->semaphore_offset = src_fence->obj.sem.mem.offset;
+		dst_fence->semaphore_value  = src_fence->obj.sem.value;
+		break;
+	case NVPVA_FENCE_OBJ_SYNC_FD:
+		break;
+	default:
+		break;
+	}
+}
+
+static void
+pva_eventlib_record_r5_states(struct platform_device *pdev,
+			      u32 syncpt_id,
+			      u32 syncpt_thresh,
+			      struct pva_task_statistics_s *stats,
+			      struct pva_submit_task *task)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct nvhost_pva_task_state state;
+	struct nvdev_fence post_fence;
+	struct nvpva_submit_fence *fence;
+	u8 i;
+
+	if ((task->pva->profiling_level == 0) || (!pdata->eventlib_id))
+		return;
+
+	/* Record task postfences */
+	for (i = 0 ; i < task->num_pva_fence_actions[NVPVA_FENCE_POST]; i++) {
+		fence = &(task->pva_fence_actions[NVPVA_FENCE_POST][i].fence);
+		pva_eventlib_fill_fence(&post_fence, fence);
+		nvhost_eventlib_log_fences(pdev,
+					   syncpt_id,
+					   syncpt_thresh,
+					   &post_fence,
+					   1,
+					   NVDEV_FENCE_KIND_POST,
+					   stats->complete_time);
+	}
+
+	state.class_id		= pdata->class;
+	state.syncpt_id		= syncpt_id;
+	state.syncpt_thresh	= syncpt_thresh;
+	state.vpu_id		= stats->vpu_assigned;
+	state.queue_id		= stats->queue_id;
+	state.iova		= task->dma_addr;
+
+	keventlib_write(pdata->eventlib_id,
+			&state,
+			sizeof(state),
+			stats->vpu_assigned == 0 ? NVHOST_PVA_VPU0_BEGIN
+						 : NVHOST_PVA_VPU1_BEGIN,
+			stats->vpu_start_time);
+
+	keventlib_write(pdata->eventlib_id,
+			&state,
+			sizeof(state),
+			stats->vpu_assigned == 0 ? NVHOST_PVA_VPU0_END
+						 : NVHOST_PVA_VPU1_END,
+			stats->vpu_complete_time);
+	keventlib_write(pdata->eventlib_id,
+			&state,
+			sizeof(state),
+			NVHOST_PVA_PREPARE_END,
+			stats->vpu_start_time);
+	keventlib_write(pdata->eventlib_id,
+			&state,
+			sizeof(state),
+			NVHOST_PVA_POST_BEGIN,
+			stats->vpu_complete_time);
+
+	if (task->pva->profiling_level >= 2) {
+		keventlib_write(pdata->eventlib_id,
+				&state,
+				sizeof(state),
+				NVHOST_PVA_QUEUE_BEGIN,
+				stats->queued_time);
+
+		keventlib_write(pdata->eventlib_id,
+				&state,
+				sizeof(state),
+				NVHOST_PVA_QUEUE_END,
+				stats->vpu_assigned_time);
+
+		keventlib_write(pdata->eventlib_id,
+				&state,
+				sizeof(state),
+				NVHOST_PVA_PREPARE_BEGIN,
+				stats->vpu_assigned_time);
+
+		keventlib_write(pdata->eventlib_id,
+				&state,
+				sizeof(state),
+				NVHOST_PVA_POST_END,
+				stats->complete_time);
+	}
+}
+#else
+static void
+pva_eventlib_fill_fence(struct nvdev_fence *dst_fence,
+			struct nvpva_submit_fence *src_fence)
+{
+}
+static void
+pva_eventlib_record_r5_states(struct platform_device *pdev,
+			      u32 syncpt_id,
+			      u32 syncpt_thresh,
+			      struct pva_task_statistics_s *stats,
+			      struct pva_submit_task *task)
+{
+}
+#endif
+
+void pva_task_free(struct kref *ref)
+{
+	struct pva_submit_task *task =
+	    container_of(ref, struct pva_submit_task, ref);
+	struct nvpva_queue *my_queue = task->queue;
+
+	mutex_lock(&my_queue->tail_lock);
+	if (my_queue->hw_task_tail == task->va)
+		my_queue->hw_task_tail = NULL;
+
+	if (my_queue->old_tail == task->va)
+		my_queue->old_tail = NULL;
+
+	mutex_unlock(&my_queue->tail_lock);
+
+	pva_task_unpin_mem(task);
+	if (task->pinned_app)
+		pva_task_release_ref_vpu_app(&task->client->elf_ctx,
+						     task->exe_id);
+
+	nvhost_module_idle(task->pva->pdev);
+	nvpva_client_context_put(task->client);
+	/* Release memory that was allocated for the task */
+	nvpva_queue_free_task_memory(task->queue, task->pool_index);
+	up(&my_queue->task_pool_sem);
+}
+
+static void update_one_task(struct pva *pva)
+{
+	struct platform_device *pdev = pva->pdev;
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct nvpva_queue *queue;
+	struct pva_task_error_s task_info;
+	struct pva_submit_task *task;
+	struct pva_hw_task *hw_task;
+	struct pva_task_statistics_s *stats;
+	bool found;
+	u64 vpu_time = 0u;
+	u64 r5_overhead = 0u;
+	const u32 tsc_ticks_to_us = 31;
+	u32 vpu_assigned = 0;
+
+	nvpva_fetch_task_status_info(pva, &task_info);
+	WARN_ON(!task_info.valid);
+	WARN_ON(task_info.queue >= MAX_PVA_QUEUE_COUNT);
+	queue = &pva->pool->queues[task_info.queue];
+
+	/* find the finished task; since two tasks can be scheduled at the same
+	 * time, the finished one is not necessarily the first one
+	 */
+	found = false;
+	mutex_lock(&queue->list_lock);
+
+	/* since we are only taking one entry out, we don't need to use the safe
+	 * version
+	 */
+	list_for_each_entry(task, &queue->tasklist, node) {
+		if (task->pool_index == task_info.task_id) {
+			list_del(&task->node);
+				found = true;
+				break;
+		}
+	}
+
+	mutex_unlock(&queue->list_lock);
+	if (!found) {
+		pr_err("pva: unexpected task: queue:%u, valid:%u, error:%u, vpu:%u",
+		       task_info.queue, task_info.valid, task_info.error,
+		       task_info.vpu);
+		return;
+	}
+
+	WARN_ON(task_info.error == PVA_ERR_BAD_TASK ||
+		task_info.error == PVA_ERR_BAD_TASK_ACTION_LIST);
+	hw_task = (struct pva_hw_task *)task->va;
+	stats = &hw_task->statistics;
+	if (!task->pva->stats_enabled)
+		goto prof;
+
+	vpu_assigned = (stats->vpu_assigned & 0x1);
+	vpu_time = (stats->vpu_complete_time - stats->vpu_start_time);
+	r5_overhead = ((stats->complete_time - stats->queued_time) - vpu_time);
+	r5_overhead = r5_overhead / tsc_ticks_to_us;
+
+	trace_nvhost_pva_task_timestamp(dev_name(&pdev->dev),
+				    pdata->class,
+				    queue->syncpt_id,
+				    task->local_sync_counter,
+				    stats->vpu_assigned_time,
+				    stats->complete_time);
+	nvpva_dbg_info(pva, "Completed task %p (0x%llx), "
+			"start_time=%llu, "
+			"end_time=%llu",
+			task,
+			(u64)task->dma_addr,
+			stats->vpu_assigned_time,
+			stats->complete_time);
+	trace_nvhost_pva_task_stats(pdev->name,
+				    stats->queued_time,
+				    stats->head_time,
+				    stats->input_actions_complete,
+				    stats->vpu_assigned_time,
+				    stats->vpu_start_time,
+				    stats->vpu_complete_time,
+				    stats->complete_time,
+				    stats->vpu_assigned,
+				    r5_overhead);
+prof:
+	if (task->pva->profiling_level == 0)
+		goto out;
+
+	nvhost_eventlib_log_task(pdev,
+				 queue->syncpt_id,
+				 task->local_sync_counter,
+				 stats->vpu_assigned_time,
+				 stats->complete_time);
+	pva_eventlib_record_r5_states(pdev,
+				      queue->syncpt_id,
+				      task->local_sync_counter,
+				      stats,
+				      task);
+out:
+	/* Not linked anymore so drop the reference */
+	kref_put(&task->ref, pva_task_free);
+}
+
+void pva_task_update(struct work_struct *work)
+{
+	struct pva *pva = container_of(work, struct pva, task_update_work);
+	int n_tasks = atomic_read(&pva->n_pending_tasks);
+	int i;
+
+	atomic_sub(n_tasks, &pva->n_pending_tasks);
+	for (i = 0; i < n_tasks; i++)
+		update_one_task(pva);
+}
+static void
+pva_queue_dump(struct nvpva_queue *queue, struct seq_file *s)
+{
+	struct pva_submit_task *task;
+	int i = 0;
+
+	seq_printf(s, "Queue %u, Tasks\n", queue->id);
+	mutex_lock(&queue->list_lock);
+	list_for_each_entry(task, &queue->tasklist, node) {
+		seq_printf(s, "    #%u: exe_id = %u\n", i++, task->exe_id);
+	}
+
+	mutex_unlock(&queue->list_lock);
+}
+static int pva_task_submit_mmio_ccq(struct pva_submit_task *task, u8 batchsize)
+{
+	u32 flags = PVA_CMD_INT_ON_ERR;
+	int err = 0;
+
+	/* Construct submit command */
+	err = task->pva->version_config->ccq_send_task(
+	    task->pva, task->queue->id, task->dma_addr, batchsize, flags);
+	return err;
+}
+static int pva_task_submit_mailbox(struct pva_submit_task *task, u8 batchsize)
+{
+	struct nvpva_queue *queue = task->queue;
+	struct pva_cmd_status_regs status;
+	struct pva_cmd_s cmd;
+	u32 flags, nregs;
+	int err = 0;
+
+	/* Construct submit command */
+	flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE;
+	nregs = pva_cmd_submit_batch(&cmd, queue->id, task->dma_addr, batchsize,
+				     flags);
+	/* Submit request to PVA and wait for response */
+	err = pva_mailbox_send_cmd_sync(task->pva, &cmd, nregs, &status);
+	if (err < 0) {
+		nvpva_warn(&task->pva->pdev->dev, "Failed to submit task: %d",
+			    err);
+		goto out;
+	}
+
+	if (status.error != PVA_ERR_NO_ERROR) {
+		nvpva_warn(&task->pva->pdev->dev, "PVA task rejected: %u",
+			    status.error);
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+
+	return err;
+}
+
+static void nvpva_syncpt_dec_max(struct nvpva_queue *queue, u32 val)
+{
+	atomic_sub(val, &queue->syncpt_maxval);
+}
+
+static void nvpva_syncpt_incr_max(struct nvpva_queue *queue, u32 val)
+{
+	atomic_add(val, &queue->syncpt_maxval);
+}
+
+static u32 nvpva_syncpt_read_max(struct nvpva_queue *queue)
+{
+	return (u32)atomic_read(&queue->syncpt_maxval);
+}
+
+static int pva_task_submit(const struct pva_submit_tasks *task_header)
+{
+	struct pva_submit_task *first_task = task_header->tasks[0];
+	struct nvpva_queue *queue = first_task->queue;
+	u64 timestamp;
+	int err = 0;
+	u32 i;
+	u8 batchsize = task_header->num_tasks - 1U;
+	nvpva_dbg_info(first_task->pva, "submitting %u tasks; batchsize: %u",
+		task_header->num_tasks, batchsize);
+
+	/*
+	 * TSC timestamp is same as CNTVCT. Task statistics are being
+	 * reported in TSC ticks.
+	 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0)
+	timestamp = arch_timer_read_counter();
+#else
+	timestamp = arch_counter_get_cntvct();
+#endif
+	for (i = 0; i < task_header->num_tasks; i++) {
+		struct pva_submit_task *task = task_header->tasks[i];
+		struct pva_hw_task *hw_task = task->va;
+
+		/* take the reference until task is finished */
+		kref_get(&task->ref);
+
+		nvpva_syncpt_incr_max(queue, task->fence_num);
+		task->client->curr_sema_value += task->sem_num;
+
+		mutex_lock(&queue->list_lock);
+		list_add_tail(&task->node, &queue->tasklist);
+		mutex_unlock(&queue->list_lock);
+
+		hw_task->task.queued_time = timestamp;
+	}
+
+	/*
+	 * TSC timestamp is same as CNTVCT. Task statistics are being
+	 * reported in TSC ticks.
+	 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0)
+	timestamp = arch_timer_read_counter();
+#else
+	timestamp = arch_counter_get_cntvct();
+#endif
+
+	/* Choose the submit policy based on the mode */
+	switch (first_task->pva->submit_task_mode) {
+	case PVA_SUBMIT_MODE_MAILBOX:
+		err = pva_task_submit_mailbox(first_task, batchsize);
+		break;
+
+	case PVA_SUBMIT_MODE_MMIO_CCQ:
+		err = pva_task_submit_mmio_ccq(first_task, batchsize);
+		break;
+	}
+
+	if (err) {
+		/* assume no task has been submitted to firmware from now on */
+		pr_err("pva: failed to submit %u tasks",
+		       task_header->num_tasks);
+		goto remove_tasks;
+	}
+
+	if (first_task->pva->profiling_level == 0)
+		goto out;
+
+	for (i = 0; i < task_header->num_tasks; i++) {
+		u32 j;
+		struct nvdev_fence pre_fence;
+		struct pva_submit_task *task = task_header->tasks[i];
+
+		for (j = 0; j < task->num_prefences; j++) {
+			pva_eventlib_fill_fence(&pre_fence,
+						&task->prefences[j]);
+			nvhost_eventlib_log_fences(task->pva->pdev,
+						   queue->syncpt_id,
+						   task->local_sync_counter,
+						   &pre_fence,
+						   1,
+						   NVDEV_FENCE_KIND_PRE,
+						   timestamp);
+		}
+
+		nvhost_eventlib_log_submit(task->pva->pdev,
+					   queue->syncpt_id,
+					   task->local_sync_counter,
+					   timestamp);
+	}
+out:
+	return 0;
+
+remove_tasks:
+	for (i = 0; i < task_header->num_tasks; i++) {
+		struct pva_submit_task *task = task_header->tasks[i];
+
+		mutex_lock(&queue->list_lock);
+		list_del(&task->node);
+		mutex_unlock(&queue->list_lock);
+
+		nvpva_syncpt_dec_max(queue, task->fence_num);
+		task->client->curr_sema_value -= task->sem_num;
+
+		kref_put(&task->ref, pva_task_free);
+	}
+
+	return err;
+}
+
+static void
+set_task_parameters(const struct pva_submit_tasks *task_header)
+{
+	struct pva_submit_task *task = task_header->tasks[0];
+	struct pva_hw_task *hw_task = task->va;
+	struct nvpva_queue *queue = task->queue;
+
+	u8 status_interface = 0U;
+	u32 flag = 0;
+	u64 batch_id;
+	u16 idx;
+
+	/* Storing to local variable to update in task
+	 * Increment the batch ID and let it overflow
+	 * after it reached U8_MAX
+	 */
+	batch_id = (queue->batch_id++);
+
+	if (task_header->execution_timeout_us > 0U) {
+		hw_task = task_header->tasks[0]->va;
+		hw_task->task.timer_ref_cnt = task_header->num_tasks;
+		hw_task->task.timeout = task_header->execution_timeout_us;
+		flag = PVA_TASK_FL_DEC_TIMER;
+	}
+
+	/* In T19x, there is only 1 CCQ, so the response should come there
+	 * irrespective of the queue ID. In T23x, there are 8 CCQ FIFO's
+	 * thus the response should come in the correct CCQ
+	 */
+	if ((task->pva->submit_task_mode == PVA_SUBMIT_MODE_MMIO_CCQ)
+	   && (task_header->tasks[0]->pva->version == PVA_HW_GEN2))
+		status_interface = (task->queue->id + 1U);
+
+	for (idx = 0U; idx < task_header->num_tasks; idx++) {
+		task = task_header->tasks[idx];
+		hw_task = task->va;
+		WARN_ON(task->pool_index > 0xFF);
+		hw_task->task.task_id = task->pool_index;
+		hw_task->task.status_interface = status_interface;
+		hw_task->task.batch_id = batch_id;
+
+		hw_task->task.flags |= flag;
+	}
+
+}
+
+static void
+nvpva_task_config_l2sram_window(const struct pva_submit_tasks *task_header,
+				u32 start_index, u32 end_index,
+				u32 size)
+{
+	struct pva_hw_task *hw_task = NULL;
+	u32    task_num;
+
+	hw_task = task_header->tasks[start_index]->va;
+	hw_task->task.l2sram_ref_cnt = (end_index - start_index) + 1U;
+	for (task_num = start_index; task_num <= end_index; task_num++) {
+		hw_task = task_header->tasks[task_num]->va;
+		hw_task->task.l2sram_size = size;
+		hw_task->task.flags |= PVA_TASK_FL_DEC_L2SRAM;
+	}
+}
+
+static void
+update_batch_tasks(const struct pva_submit_tasks *task_header)
+{
+	struct pva_submit_task *task = NULL;
+	u32 task_num;
+	u32 l2s_start_index, l2s_end_index;
+	u32 l2sram_max_size = 0U;
+	u32 invalid_index = task_header->num_tasks + 1U;
+
+	l2s_start_index = invalid_index;
+	l2s_end_index = invalid_index;
+
+	for (task_num = 0; task_num < task_header->num_tasks; task_num++) {
+		task = task_header->tasks[task_num];
+		if (task->l2_alloc_size > 0) {
+			if (l2s_start_index == invalid_index)
+				l2s_start_index = task_num;
+
+			l2s_end_index = task_num;
+
+			if (l2sram_max_size < task->l2_alloc_size)
+				l2sram_max_size = task->l2_alloc_size;
+
+		} else if (l2s_end_index != invalid_index) {
+			/* An L2SRAM window is found within the batch which
+			 * needs to be sanitized
+			 */
+			nvpva_task_config_l2sram_window(task_header,
+							l2s_start_index,
+							l2s_end_index,
+							l2sram_max_size);
+			l2s_start_index = invalid_index;
+			l2s_end_index = invalid_index;
+			l2sram_max_size = 0;
+		}
+	}
+
+	/* Last L2SRAM window in batch may need to be sanitized */
+	if (l2s_end_index != invalid_index) {
+		nvpva_task_config_l2sram_window(task_header,
+						l2s_start_index,
+						l2s_end_index,
+						l2sram_max_size);
+	}
+}
+
+static int pva_queue_submit(struct nvpva_queue *queue, void *args)
+{
+	const struct pva_submit_tasks *task_header = args;
+	int err = 0;
+	int i;
+	uint32_t thresh, sem_thresh;
+	struct pva_hw_task *prev_hw_task = NULL;
+	struct nvpva_client_context *client = task_header->tasks[0]->client;
+
+	mutex_lock(&client->sema_val_lock);
+	thresh = nvpva_syncpt_read_max(queue);
+	sem_thresh = client->curr_sema_value;
+	for (i = 0; i < task_header->num_tasks; i++) {
+		struct pva_submit_task *task = task_header->tasks[i];
+		task->fence_num = 0;
+		task->syncpt_thresh = thresh;
+
+		task->sem_num = 0;
+		task->sem_thresh = sem_thresh;
+
+		/* First, dump the task that we are submitting */
+		pva_task_dump(task);
+
+		/* Write the task data */
+		err = pva_task_write(task);
+		if (err)
+			goto unlock;
+
+		thresh = task->syncpt_thresh;
+		sem_thresh = task->sem_thresh;
+		queue->local_sync_counter += (1 +  task->fence_num);
+		task->local_sync_counter = queue->local_sync_counter;
+		if (prev_hw_task)
+			prev_hw_task->task.next = task->dma_addr;
+
+		prev_hw_task = task->va;
+	}
+
+	set_task_parameters(task_header);
+
+	/* Update L2SRAM flags for generations T23x and after */
+	if (task_header->tasks[0]->pva->version != PVA_HW_GEN1)
+		update_batch_tasks(task_header);
+
+	mutex_lock(&queue->tail_lock);
+
+	/* Once batch is ready, link it to the FW queue*/
+	if (queue->hw_task_tail)
+		queue->hw_task_tail->task.next = task_header->tasks[0]->dma_addr;
+
+	/* Hold a reference to old tail in case submission fails*/
+	queue->old_tail = queue->hw_task_tail;
+
+	queue->hw_task_tail = prev_hw_task;
+	mutex_unlock(&queue->tail_lock);
+
+	err = pva_task_submit(task_header);
+	if (err) {
+		dev_err(&queue->vm_pdev->dev, "failed to submit task");
+		mutex_lock(&queue->tail_lock);
+		queue->hw_task_tail = queue->old_tail;
+		mutex_unlock(&queue->tail_lock);
+	}
+unlock:
+	mutex_unlock(&client->sema_val_lock);
+	return err;
+}
+
+static struct pva_pinned_memory *find_pinned_mem(struct pva_submit_task *task,
+						 int id)
+{
+	u32 i;
+
+	for (i = 0; i < task->num_pinned; i++)
+		if (task->pinned_memory[i].id == id)
+			return &task->pinned_memory[i];
+	return NULL;
+}
+
+static void pva_queue_cleanup_semaphore(struct pva_submit_task *task,
+					struct nvpva_submit_fence *fence)
+{
+	u8 *dmabuf_cpuva;
+	u32 *fence_cpuva;
+	struct pva_pinned_memory *mem;
+
+	if (fence->type != NVPVA_FENCE_OBJ_SEM)
+		goto out;
+
+	WARN_ON((fence->obj.sem.mem.offset % 4) != 0);
+
+	mem = find_pinned_mem(task, fence->obj.sem.mem.pin_id);
+	if (mem == NULL) {
+		task_err(task, "can't find pinned semaphore for cleanup");
+		goto out;
+	}
+
+	dmabuf_cpuva = pva_dmabuf_vmap(mem->dmabuf);
+
+	if (!dmabuf_cpuva)
+		goto out;
+
+	fence_cpuva = (void *)&dmabuf_cpuva[fence->obj.sem.mem.offset];
+	*fence_cpuva = fence->obj.sem.value;
+
+	pva_dmabuf_vunmap(mem->dmabuf, dmabuf_cpuva);
+out:
+	return;
+}
+
+static void pva_queue_cleanup_status(struct pva_submit_task *task,
+				     struct nvpva_mem *status_h)
+{
+	struct pva_pinned_memory *mem;
+	u8 *dmabuf_cpuva;
+	struct pva_gen_task_status_s *status_ptr;
+
+	mem = find_pinned_mem(task, status_h->pin_id);
+	if (mem == NULL) {
+		task_err(task, "can't find pinned status for cleanup");
+		goto out;
+	}
+
+	dmabuf_cpuva = pva_dmabuf_vmap(mem->dmabuf);
+	if (!dmabuf_cpuva)
+		goto out;
+
+	status_ptr = (void *)&dmabuf_cpuva[status_h->offset];
+	status_ptr->status = PVA_ERR_BAD_TASK_STATE;
+	status_ptr->info32 = PVA_ERR_VPU_BAD_STATE;
+
+	pva_dmabuf_vunmap(mem->dmabuf, dmabuf_cpuva);
+out:
+	return;
+}
+
+static void pva_queue_cleanup(struct nvpva_queue *queue,
+			      struct pva_submit_task *task)
+{
+	unsigned int i, fence_type;
+
+	/* Write task status first */
+	for (i = 0; i < task->num_output_task_status; i++)
+		pva_queue_cleanup_status(task, &task->output_task_status[i]);
+
+	/* Finish up non-syncpoint fences */
+	for (fence_type = NVPVA_FENCE_SOT_R5;
+	     fence_type < NVPVA_MAX_FENCE_TYPES; fence_type++) {
+		for (i = 0; i < task->num_pva_fence_actions[fence_type]; i++)
+			pva_queue_cleanup_semaphore(
+				task,
+				&task->pva_fence_actions[fence_type][i].fence);
+	}
+}
+
+static int pva_queue_abort(struct nvpva_queue *queue)
+{
+	struct pva_submit_task *task, *n;
+
+	mutex_lock(&queue->list_lock);
+
+	list_for_each_entry_safe(task, n, &queue->tasklist, node) {
+		pva_queue_cleanup(queue, task);
+		list_del(&task->node);
+		kref_put(&task->ref, pva_task_free);
+	}
+
+	/* Finish syncpoint increments to release waiters */
+	nvhost_syncpt_set_min_update(queue->vm_pdev, queue->syncpt_id,
+				     atomic_read(&queue->syncpt_maxval));
+	mutex_unlock(&queue->list_lock);
+
+	return 0;
+}
+
+struct nvpva_queue_ops pva_queue_ops = {
+	.abort = pva_queue_abort,
+	.submit = pva_queue_submit,
+	.get_task_size = pva_task_get_memsize,
+	.dump = pva_queue_dump,
+	.set_attribute = NULL,
+};
diff --git a/drivers/video/tegra/host/pva/pva_queue.h b/drivers/video/tegra/host/pva/pva_queue.h
new file mode 100644
index 00000000..8130072a
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_queue.h
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_QUEUE_H
+#define PVA_QUEUE_H
+
+#include <uapi/linux/nvpva_ioctl.h>
+#include "nvpva_queue.h"
+#include "nvpva_buffer.h"
+#include "pva-sys-params.h"
+#include "pva-interface.h"
+#include "pva-task.h"
+
+#define task_err(task, fmt, ...) \
+	dev_err(&task->pva->pdev->dev, fmt, ##__VA_ARGS__)
+
+struct dma_buf;
+
+extern struct nvpva_queue_ops pva_queue_ops;
+
+struct pva_pinned_memory {
+	u64 size;
+	dma_addr_t dma_addr;
+	struct dma_buf *dmabuf;
+	int id;
+	enum nvpva_buffers_heap heap;
+};
+
+struct pva_cb {
+	dma_addr_t head_addr;
+	uint32_t *head_va;
+	dma_addr_t tail_addr;
+	uint32_t *tail_va;
+	dma_addr_t err_addr;
+	uint32_t *err_va;
+	dma_addr_t buffer_addr;
+	uint8_t *buffer_va;
+	uint32_t tail;
+	uint32_t size;
+};
+
+/**
+ * @brief	Describe a task for PVA
+ *
+ * This is an internal representation of the task structure. All
+ * pointers refer to kernel memory.
+ *
+ * pva				Pointer to struct pva
+ * buffers			Pointer to struct nvpva_buffers
+ * queue			Pointer to struct nvpva_queue
+ * node				Used to build queue task list
+ * kref				Used to manage allocation and freeing
+ * dma_addr			task dma_addr
+ * aux_dma_addr			task auxdma_addr
+ * va				task virtual address
+ * aux_va			task aux virtual address
+ * pool_index			task pool index
+ * postfence_va			postfence virtual address
+ * num_prefences		Number of pre-fences in this task
+ * num_postfences		Number of post-fences in this task
+ * num_input_surfaces		Number of input surfaces
+ * num_output_surfaces		Number of output surfaces
+ * num_input_task_status	Number of input task status structures
+ * num_output_task_status	Number of output task status structures
+ * operation			task operation
+ * timeout			Latest Unix time when the task must complete or
+ *				0 if disabled.
+ * prefences			Pre-fence structures
+ * postfences			Post-fence structures
+ * input_surfaces		Input surfaces structures
+ * input_scalars		Information for input scalars
+ * output_surfaces		Output surfaces
+ * output_scalars		Information for output scalars
+ * input_task_status		Input status structure
+ * output_task_status		Output status structure
+ *
+ */
+struct pva_submit_task {
+	struct pva *pva;
+	struct nvpva_queue *queue;
+	struct nvpva_client_context *client;
+
+	struct list_head node;
+	struct kref ref;
+
+	dma_addr_t dma_addr;
+	dma_addr_t aux_dma_addr;
+	void *va;
+	void *aux_va;
+	int pool_index;
+
+	bool pinned_app;
+	u32 exe_id;
+
+	u32 l2_alloc_size; /* Not applicable for Xavier */
+	struct pva_cb *stdout;
+	u32 symbol_payload_size;
+
+	u32 flags;
+	u8 num_prefences;
+	u8 num_user_fence_actions;
+	u8 num_input_task_status;
+	u8 num_output_task_status;
+	u8 num_dma_descriptors;
+	u8 num_dma_channels;
+	u8 num_symbols;
+	u8 special_access;
+
+	u64 timeout;
+	u64 desc_hwseq_frm;
+	u32 syncpt_thresh;
+	u32 fence_num;
+	u32 local_sync_counter;
+
+	u32 sem_thresh;
+	u32 sem_num;
+
+	/* Data provided by userspace "as is" */
+	struct nvpva_submit_fence prefences[NVPVA_TASK_MAX_PREFENCES];
+	struct nvpva_fence_action
+		user_fence_actions[NVPVA_MAX_FENCE_TYPES *
+				   NVPVA_TASK_MAX_FENCEACTIONS];
+	struct nvpva_mem input_task_status[NVPVA_TASK_MAX_INPUT_STATUS];
+	struct nvpva_mem output_task_status[NVPVA_TASK_MAX_OUTPUT_STATUS];
+	struct nvpva_dma_descriptor
+		dma_descriptors[NVPVA_TASK_MAX_DMA_DESCRIPTORS];
+	struct nvpva_dma_channel dma_channels
+		[NVPVA_TASK_MAX_DMA_CHANNELS_T23X]; /* max of T19x & T23x */
+	struct nvpva_dma_misr dma_misr_config;
+	struct nvpva_hwseq_config hwseq_config;
+	struct nvpva_symbol_param symbols[NVPVA_TASK_MAX_SYMBOLS];
+	u8 symbol_payload[NVPVA_TASK_MAX_PAYLOAD_SIZE];
+
+	struct pva_pinned_memory pinned_memory[256];
+	u32 num_pinned;
+	u8 num_pva_fence_actions[NVPVA_MAX_FENCE_TYPES];
+	struct nvpva_fence_action
+		pva_fence_actions[NVPVA_MAX_FENCE_TYPES]
+				 [NVPVA_TASK_MAX_FENCEACTIONS];
+	/** Store Suface base address */
+	u64 src_surf_base_addr;
+	u64 dst_surf_base_addr;
+	bool is_system_app;
+};
+
+struct pva_submit_tasks {
+	struct pva_submit_task *tasks[NVPVA_SUBMIT_MAX_TASKS];
+	u32 task_thresh[NVPVA_SUBMIT_MAX_TASKS];
+	u16 num_tasks;
+	u64 execution_timeout_us;
+};
+
+#define ACTION_LIST_FENCE_SIZE 21U
+#define ACTION_LIST_STATUS_OPERATION_SIZE 11U
+#define ACTION_LIST_TERMINATION_SIZE 1U
+#define ACTION_LIST_STATS_SIZE 9U
+#define PVA_TSC_TICKS_TO_US_FACTOR (0.032f)
+
+/*
+ * The worst-case input action buffer size:
+ * - Prefences trigger a word memory operation (size 13 bytes)
+ * - Input status reads trigger a half-word memory operation (size 11 bytes)
+ * - The action list is terminated by a null action (1 byte)
+ */
+#define INPUT_ACTION_BUFFER_SIZE                                               \
+	ALIGN(((NVPVA_TASK_MAX_PREFENCES * ACTION_LIST_FENCE_SIZE) +           \
+	       ((NVPVA_TASK_MAX_FENCEACTIONS * 2U) * ACTION_LIST_FENCE_SIZE) + \
+	       NVPVA_TASK_MAX_INPUT_STATUS *                                   \
+		       ACTION_LIST_STATUS_OPERATION_SIZE +                     \
+	       ACTION_LIST_TERMINATION_SIZE),                                  \
+	      256)
+
+/**
+ * Ensure that sufficient preactions per task are supported by FW/KMD interface.
+ * Maximum possible number of preactions can be determined by adding below
+ * limits:
+ * - Maximum number of prefences allowed per task
+ * - Maximum number of SOT_R and SOT_V fences allowed per task
+ * - Maximum number of input status buffers allowed per task
+ */
+#if ((PVA_MAX_PREACTION_LISTS) < \
+		( \
+			(NVPVA_TASK_MAX_PREFENCES) + \
+			(NVPVA_TASK_MAX_FENCEACTIONS * 2U) + \
+			(NVPVA_TASK_MAX_INPUT_STATUS) \
+		) \
+	)
+#error "Insufficient preactions supported by FW/KMD interface"
+#endif
+
+/**
+ * Ensure that sufficient postactions per task are supported by FW/KMD interface.
+ * Maximum possible number of postactions can be determined by adding below
+ * limits:
+ * - Maximum number of EOT_V, EOT_R and EOT fences allowed per task
+ * - Maximum number of output status buffers allowed per task
+ * - Maximum one postaction for statistics
+ */
+#if ((PVA_MAX_POSTACTION_LISTS) < \
+		( \
+			(NVPVA_TASK_MAX_FENCEACTIONS * 3U) + \
+			(NVPVA_TASK_MAX_OUTPUT_STATUS) + \
+			(1U) \
+		) \
+	)
+#error "Insufficient postactions supported by FW/KMD interface"
+#endif
+
+struct PVA_PACKED pva_task_action_ptr_s {
+	/* IOVA Pointer to update Sync Point Value */
+	pva_iova			p;
+	/* Value to be written to Sync Point */
+	uint32_t			v;
+	/* Pointer to write timestamp */
+	pva_iova			t;
+};
+
+struct PVA_PACKED pva_task_action_status_s {
+	/* IOVA to pva_gen_task_status_t struct */
+	pva_iova		p;
+	uint16_t		status;
+	/* Padding to ensure that structure is 4byte aligned for FW perf optimization */
+	uint8_t			pad[2];
+};
+
+struct PVA_PACKED pva_task_action_statistics_s {
+	/* IOVA to pva_task_statistics_t struct */
+	pva_iova			p;
+};
+struct PVA_PACKED pva_task_action_s {
+	uint8_t		action;
+	/* Padding to ensure that structure is 4byte aligned for FW perf optimization */
+	uint8_t		pad[3];
+	union {
+		struct pva_task_action_ptr_s		ptr;
+		struct pva_task_action_status_s		status;
+		struct pva_task_action_statistics_s	statistics;
+	} args;
+};
+
+/* This structure is created to ensure dma_info and params_list is always
+ * stored in contiguous memory within the HW task structure. This is done as a perf
+ * optimization so that a single dma copy can be triggered by R5 FW for copying both
+ * the dma_info and param_list.
+ */
+struct pva_dma_info_and_params_list_s {
+	struct pva_dma_info_s dma_info;
+	struct pva_vpu_parameters_s param_list[NVPVA_TASK_MAX_SYMBOLS];
+};
+
+struct pva_hw_task {
+	struct pva_td_s task;
+	struct pva_task_action_s preactions[PVA_MAX_PREACTION_LISTS];
+	struct pva_task_action_s postactions[PVA_MAX_POSTACTION_LISTS];
+	struct pva_dma_info_and_params_list_s dma_info_and_params_list;
+	struct pva_dma_misr_config_s dma_misr_config;
+	struct pva_dtd_s dma_desc[NVPVA_TASK_MAX_DMA_DESCRIPTORS];
+	struct pva_vpu_parameter_info_s param_info;
+	struct pva_task_statistics_s statistics;
+	struct pva_circular_buffer_info_s stdout_cb_info;
+};
+
+void pva_task_remove(struct pva_submit_task *task);
+void pva_task_free(struct kref *ref);
+
+void pva_task_update(struct work_struct *work);
+
+struct pva_pinned_memory *pva_task_pin_mem(struct pva_submit_task *task,
+					   u32 id);
+
+void pva_dmabuf_vunmap(struct dma_buf *dmabuf, void *addr);
+void *pva_dmabuf_vmap(struct dma_buf *dmabuf);
+
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_regs.h b/drivers/video/tegra/host/pva/pva_regs.h
new file mode 100644
index 00000000..4a3e1659
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_regs.h
@@ -0,0 +1,205 @@
+/*
+ *
+ * Copyright (c) 2016-2023 NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _PVA_REGS_H_
+#define _PVA_REGS_H_
+
+#include "pva-bit.h"
+#include "hw_cfg_pva_v1.h"
+#include "hw_cfg_pva_v2.h"
+#include "hw_dma_ch_pva.h"
+#include "hw_dma_desc_pva.h"
+#include "hw_proc_pva.h"
+#include "hw_hsp_pva.h"
+#include "hw_sec_pva_v1.h"
+#include "hw_sec_pva_v2.h"
+#include "hw_evp_pva.h"
+#include "pva-interface.h"
+#include "pva_mailbox.h"
+#include "pva-ucode-header.h"
+
+/* Definition for LIC_INTR_ENABLE bits */
+#define SEC_LIC_INTR_HSP1	0x1
+#define SEC_LIC_INTR_HSP2	0x2
+#define SEC_LIC_INTR_HSP3	0x4
+#define SEC_LIC_INTR_HSP4	0x8
+#define SEC_LIC_INTR_HSP_ALL	0xF
+#define SEC_LIC_INTR_H1X_ALL_23	0x3
+#define SEC_LIC_INTR_H1X_ALL_19	0x7
+
+/* Watchdog support */
+#define SEC_LIC_INTR_WDT	0x1
+
+#define SEC_BASE_COMMON		0x20000U
+
+/* unified register interface for both v1 and v2 */
+static inline u32 sec_lic_intr_status_r(int version)
+{
+	if (version == 1)
+		return v1_sec_lic_intr_status_r();
+	else
+		return v2_sec_lic_intr_status_r();
+}
+
+static inline u32 cfg_ccq_status_r(int version, u32 ccq_idx, u32 status_idx)
+{
+	if (version == 1)
+		return v1_cfg_ccq_status_r(status_idx);
+	else
+		return v2_cfg_ccq_status_r(ccq_idx, status_idx);
+}
+
+static inline u32 cfg_ccq_r(int version, u32 ccq_idx)
+{
+	if (version == 1)
+		return v1_cfg_ccq_r();
+	else
+		return v2_cfg_ccq_r(ccq_idx);
+}
+
+static inline u32 cfg_r5user_lsegreg_r(int version)
+{
+	if (version == 1)
+		return v1_cfg_r5user_lsegreg_r();
+	else
+		return v2_cfg_r5user_lsegreg_r();
+}
+
+static inline u32 cfg_priv_ar1_lsegreg_r(int version)
+{
+	if (version == 1)
+		return v1_cfg_priv_ar1_lsegreg_r();
+	else
+		return v2_cfg_priv_ar1_lsegreg_r();
+}
+
+static inline u32 cfg_priv_ar2_lsegreg_r(int version)
+{
+	if (version == 1)
+		return v1_cfg_priv_ar2_lsegreg_r();
+	else
+		return v2_cfg_priv_ar2_lsegreg_r();
+}
+
+static inline u32 cfg_r5user_usegreg_r(int version)
+{
+	if (version == 1)
+		return v1_cfg_r5user_usegreg_r();
+	else
+		return v2_cfg_r5user_usegreg_r();
+}
+
+static inline u32 cfg_priv_ar1_usegreg_r(int version)
+{
+	if (version == 1)
+		return v1_cfg_priv_ar1_usegreg_r();
+	else
+		return v2_cfg_priv_ar1_usegreg_r();
+}
+
+static inline u32 cfg_priv_ar2_usegreg_r(int version)
+{
+	if (version == 1)
+		return v1_cfg_priv_ar2_usegreg_r();
+	else
+		return v2_cfg_priv_ar2_usegreg_r();
+}
+
+static inline u32 cfg_priv_ar1_start_r(int version)
+{
+	if (version == 1)
+		return v1_cfg_priv_ar1_start_r();
+	else
+		return v2_cfg_priv_ar1_start_r();
+}
+
+static inline u32 cfg_priv_ar1_end_r(int version)
+{
+	if (version == 1)
+		return v1_cfg_priv_ar1_end_r();
+	else
+		return v2_cfg_priv_ar1_end_r();
+}
+
+static inline u32 cfg_priv_ar2_start_r(int version)
+{
+	if (version == 1)
+		return v1_cfg_priv_ar2_start_r();
+	else
+		return v2_cfg_priv_ar2_start_r();
+}
+
+static inline u32 cfg_priv_ar2_end_r(int version)
+{
+	if (version == 1)
+		return v1_cfg_priv_ar2_end_r();
+	else
+		return v2_cfg_priv_ar2_end_r();
+}
+
+static inline u32 sec_lic_intr_enable_r(int version)
+{
+	if (version == 1)
+		return v1_sec_lic_intr_enable_r();
+	else
+		return v2_sec_lic_intr_enable_r();
+}
+
+static inline u32 hwpm_get_offset(void)
+{
+	return 0x200000;
+}
+
+static inline u32 sec_ec_errslice0_missionerr_enable_r(void)
+{
+	return (SEC_BASE_COMMON + 0x30U);
+}
+
+static inline u32 sec_ec_errslice1_missionerr_enable_r(void)
+{
+	return (SEC_BASE_COMMON + 0x60U);
+}
+
+static inline u32 sec_ec_errslice2_missionerr_enable_r(void)
+{
+	return (SEC_BASE_COMMON + 0x90U);
+}
+
+static inline u32 sec_ec_errslice3_missionerr_enable_r(void)
+{
+	return (SEC_BASE_COMMON + 0xC0U);
+}
+
+static inline u32 sec_ec_errslice0_latenterr_enable_r(void)
+{
+	return (SEC_BASE_COMMON + 0x40U);
+}
+
+static inline u32 sec_ec_errslice1_latenterr_enable_r(void)
+{
+	return (SEC_BASE_COMMON + 0x70U);
+}
+
+static inline u32 sec_ec_errslice2_latenterr_enable_r(void)
+{
+	return (SEC_BASE_COMMON + 0xA0U);
+}
+
+static inline u32 sec_ec_errslice3_latenterr_enable_r(void)
+{
+	return (SEC_BASE_COMMON + 0xD0U);
+}
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_sec_ec.c b/drivers/video/tegra/host/pva/pva_sec_ec.c
new file mode 100644
index 00000000..b32163b9
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_sec_ec.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/nvhost.h>
+#include "pva_regs.h"
+#include "pva.h"
+
+static u32 pva_get_sec_ec_addrs(u32 index)
+{
+	u32 sec_ec_miss_addrs[] = {
+		sec_ec_errslice0_missionerr_enable_r(),
+		sec_ec_errslice0_latenterr_enable_r(),
+		sec_ec_errslice1_missionerr_enable_r(),
+		sec_ec_errslice1_latenterr_enable_r(),
+		sec_ec_errslice2_missionerr_enable_r(),
+		sec_ec_errslice2_latenterr_enable_r(),
+		sec_ec_errslice3_missionerr_enable_r(),
+		sec_ec_errslice3_latenterr_enable_r()
+	};
+
+	return sec_ec_miss_addrs[index];
+};
+
+void pva_disable_ec_err_reporting(struct pva *pva)
+{
+
+	u32 n_regs = (pva->version != PVA_HW_GEN1) ? 8 : 4;
+	u32 i;
+
+	/* save current state */
+	for (i = 0; i < n_regs; i++)
+		pva->ec_state[i] = host1x_readl(pva->pdev,
+						pva_get_sec_ec_addrs(i));
+
+	/* disable reporting */
+	for (i = 0; i < n_regs; i++)
+		host1x_writel(pva->pdev, pva_get_sec_ec_addrs(i), 0);
+}
+
+void pva_enable_ec_err_reporting(struct pva *pva)
+{
+
+	u32 n_regs = (pva->version != PVA_HW_GEN1) ? 8 : 4;
+	u32 i;
+
+	/* enable reporting */
+	for (i = 0; i < n_regs; i++)
+		host1x_writel(pva->pdev,
+			      pva_get_sec_ec_addrs(i),
+			      pva->ec_state[i]);
+}
diff --git a/drivers/video/tegra/host/pva/pva_sec_ec.h b/drivers/video/tegra/host/pva/pva_sec_ec.h
new file mode 100644
index 00000000..c779d97f
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_sec_ec.h
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PVA_SEC_EC_H_
+#define _PVA_SEC_EC_H_
+
+void pva_disable_ec_err_reporting(struct pva *pva);
+void pva_enable_ec_err_reporting(struct pva *pva);
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_sha256.c b/drivers/video/tegra/host/pva/pva_sha256.c
new file mode 100644
index 00000000..35bb5900
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_sha256.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include "pva_sha256.h"
+
+#define ROTLEFT(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
+#define ROTRIGHT(a, b) (((a) >> (b)) | ((a) << (32 - (b))))
+
+#define CH(x, y, z) (((x) & (y)) ^ (~(x) & (z)))
+#define MAJ(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+#define SHA_EP0(x) (ROTRIGHT(x, 2) ^ ROTRIGHT(x, 13) ^ ROTRIGHT(x, 22))
+#define SHA_EP1(x) (ROTRIGHT(x, 6) ^ ROTRIGHT(x, 11) ^ ROTRIGHT(x, 25))
+#define SIG0(x) (ROTRIGHT(x, 7) ^ ROTRIGHT(x, 18) ^ ((x) >> 3))
+#define SIG1(x) (ROTRIGHT(x, 17) ^ ROTRIGHT(x, 19) ^ ((x) >> 10))
+
+#define SWAP32(x) __builtin_bswap32(x)
+#define SWAP64(x) __builtin_bswap64(x)
+
+/**
+ * This variable is used internally by \ref sha256_transform()
+ */
+static const uint32_t k[64] = {
+	U32(0x428a2f98U), U32(0x71374491U), U32(0xb5c0fbcfU), U32(0xe9b5dba5U),
+	U32(0x3956c25bU), U32(0x59f111f1U), U32(0x923f82a4U), U32(0xab1c5ed5U),
+	U32(0xd807aa98U), U32(0x12835b01U), U32(0x243185beU), U32(0x550c7dc3U),
+	U32(0x72be5d74U), U32(0x80deb1feU), U32(0x9bdc06a7U), U32(0xc19bf174U),
+	U32(0xe49b69c1U), U32(0xefbe4786U), U32(0x0fc19dc6U), U32(0x240ca1ccU),
+	U32(0x2de92c6fU), U32(0x4a7484aaU), U32(0x5cb0a9dcU), U32(0x76f988daU),
+	U32(0x983e5152U), U32(0xa831c66dU), U32(0xb00327c8U), U32(0xbf597fc7U),
+	U32(0xc6e00bf3U), U32(0xd5a79147U), U32(0x06ca6351U), U32(0x14292967U),
+	U32(0x27b70a85U), U32(0x2e1b2138U), U32(0x4d2c6dfcU), U32(0x53380d13U),
+	U32(0x650a7354U), U32(0x766a0abbU), U32(0x81c2c92eU), U32(0x92722c85U),
+	U32(0xa2bfe8a1U), U32(0xa81a664bU), U32(0xc24b8b70U), U32(0xc76c51a3U),
+	U32(0xd192e819U), U32(0xd6990624U), U32(0xf40e3585U), U32(0x106aa070U),
+	U32(0x19a4c116U), U32(0x1e376c08U), U32(0x2748774cU), U32(0x34b0bcb5U),
+	U32(0x391c0cb3U), U32(0x4ed8aa4aU), U32(0x5b9cca4fU), U32(0x682e6ff3U),
+	U32(0x748f82eeU), U32(0x78a5636fU), U32(0x84c87814U), U32(0x8cc70208U),
+	U32(0x90befffaU), U32(0xa4506cebU), U32(0xbef9a3f7U), U32(0xc67178f2U)
+};
+
+/**
+ * \brief
+ * This function is a helper function used by \ref pva_sha256_update
+ * to hash 512-bit blocks and forms the core of the algorithm.
+ * Use \ref sha256_init(), \ref pva_sha256_update(), and
+ * \ref sha256_finalize() instead of calling sha256_transform() directly.
+ * \param[in] ctx  pointer of struct sha256_ctx_s context.
+ * \param[in] data_in  pointer to the data block to be hashed.
+ * \return Void
+ */
+static void
+sha256_transform(struct sha256_ctx_s *ctx,
+		 const void *data_in)
+{
+	uint32_t a, b, c, d, e, f, g, h, t1, t2, m[64];
+	const uint32_t * const data = data_in;
+	size_t i;
+
+	for (i = 0; i < U32(16); i++)
+		m[i] = SWAP32(data[i]);
+
+	for (i = 0; i < U32(64) - U32(16); ++i)
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+		m[i + U32(16)] = SIG1(m[U32(14) + i]) + m[U32(9) + i] +
+				 SIG0(m[U32(1) + i]) + m[i];
+
+
+	a = ctx->state[0];
+	b = ctx->state[1];
+	c = ctx->state[2];
+	d = ctx->state[3];
+	e = ctx->state[4];
+	f = ctx->state[5];
+	g = ctx->state[6];
+	h = ctx->state[7];
+
+	for (i = 0; i < U32(64); ++i) {
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+		t1 = h + SHA_EP1(e) + CH(e, f, g) + k[i] + m[i];
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+		t2 = SHA_EP0(a) + MAJ(a, b, c);
+		h = g;
+		g = f;
+		f = e;
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+		e = d + t1;
+		d = c;
+		c = b;
+		b = a;
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+		a = t1 + t2;
+	}
+
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+	ctx->state[0] += a;
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+	ctx->state[1] += b;
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+	ctx->state[2] += c;
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+	ctx->state[3] += d;
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+	ctx->state[4] += e;
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+	ctx->state[5] += f;
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+	ctx->state[6] += g;
+	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
+	ctx->state[7] += h;
+}
+
+void
+sha256_init(struct sha256_ctx_s *ctx)
+{
+	ctx->bitlen = 0;
+	ctx->state[0] = U32(0x6a09e667);
+	ctx->state[1] = U32(0xbb67ae85);
+	ctx->state[2] = U32(0x3c6ef372);
+	ctx->state[3] = U32(0xa54ff53a);
+	ctx->state[4] = U32(0x510e527f);
+	ctx->state[5] = U32(0x9b05688c);
+	ctx->state[6] = U32(0x1f83d9ab);
+	ctx->state[7] = U32(0x5be0cd19);
+}
+
+void
+pva_sha256_update(struct sha256_ctx_s *ctx,
+		   const void *data,
+		   size_t len)
+{
+	uint i;
+
+	/*assert(len % 64 == 0); */
+
+	for (i = 0; i < len; i += U32(64)) {
+		ctx->bitlen &= U32(0xffffffff);
+		sha256_transform(ctx, ((const uint8_t *)data) + i);
+		ctx->bitlen += U32(512);
+	}
+}
+
+void
+sha256_copy(const struct sha256_ctx_s *ctx_in,
+	    struct sha256_ctx_s *ctx_out)
+{
+	*ctx_out = *ctx_in;
+}
+
+void
+sha256_finalize(struct sha256_ctx_s *ctx,
+		const void *input,
+		size_t input_size,
+		uint32_t out[8])
+{
+	uint8_t data[64];
+	void *p = data;
+	uint32_t t;
+
+	input_size  &= U32(0xffffffff);
+	ctx->bitlen &= U32(0xffffffff);
+
+	/* the false of this condition is illegal for this API agreement */
+	/* this check is here only for Coverity INT30-C */
+	ctx->bitlen += input_size * U32(8);
+	(void)memcpy(p, input, input_size);
+	data[input_size] = 0x80;
+
+	/* can we fit an 8-byte counter? */
+	if (input_size < U32(56)) {
+		/* Pad whatever data is left in the buffer. */
+		(void)memset(data + input_size + U32(1), 0,
+			     U32(56) - input_size - U32(1));
+	} else {
+		/* Go into another block. We are here only for message hashing */
+		if (input_size + U32(1) < U32(64))
+			(void)memset(data + input_size + U32(1), 0,
+				     U32(64) - input_size - U32(1));
+
+		sha256_transform(ctx, data);
+		(void)memset(data, 0, 56);
+	}
+
+	t = ctx->bitlen_low;
+
+	*(uint32_t *)(void *)(data + 56) = 0;
+	*(uint32_t *)(void *)(data + 60) = SWAP32(t);
+
+	sha256_transform(ctx, data);
+
+	out[0] = SWAP32(ctx->state[0]);
+	out[1] = SWAP32(ctx->state[1]);
+	out[2] = SWAP32(ctx->state[2]);
+	out[3] = SWAP32(ctx->state[3]);
+	out[4] = SWAP32(ctx->state[4]);
+	out[5] = SWAP32(ctx->state[5]);
+	out[6] = SWAP32(ctx->state[6]);
+	out[7] = SWAP32(ctx->state[7]);
+}
diff --git a/drivers/video/tegra/host/pva/pva_sha256.h b/drivers/video/tegra/host/pva/pva_sha256.h
new file mode 100644
index 00000000..62b9880d
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_sha256.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_SHA256_H
+#define PVA_SHA256_H
+
+#define U32(x)	((uint32_t)(x))
+
+struct sha256_ctx_s {
+	/*
+	 * On bitlen:
+	 *
+	 * While we don't exceed 2^32 bit (2^29 byte) length for the input buffer,
+	 * size_t is more efficient at least on RISC-V. This particular
+	 * structure is needed to make Coverity happy.
+	 */
+	union {
+		size_t bitlen;
+		uint32_t bitlen_low;
+	};
+	uint32_t state[8];
+};
+
+/**
+ * Initializes struct sha256_ctx_s
+ *
+ * \param[in] ctx  pointer of struct sha256_ctx_s context
+ *
+ * \return  void
+ */
+void sha256_init(struct sha256_ctx_s *ctx);
+
+/**
+ * \brief
+ * Hash full blocks, in units of 64 bytes
+ * can be called repeatedly with chunks of the message
+ * to be hashed (len bytes at data).
+ *
+ * \param[in] ctx  pointer of struct sha256_ctx_s context
+ * \param[in] data  pointer to the data block to be hashed
+ * \param[in] len  length (in units of 64 bytes) of the data to be hashed.
+ *
+ * \return  void
+ */
+void
+pva_sha256_update(struct sha256_ctx_s *ctx,
+		  const void *data,
+		  size_t len);
+
+/**
+ * \brief
+ * Finalize the hash and keep the calcualted hash in out.
+ * Required: input_size < 64. Call pva_sha256_update() first otherwise.
+ *
+ * \param[in] ctx  pointer of struct sha256_ctx_s context
+ * \param[in] input pointer to the data block
+ * (left over from \ref pva_sha256_update) to be hashed
+ * \param[in] input_size size of the data block to hashed
+ * (left over from \ref pva_sha256_update to be hashed)
+ * \param[out] out places the calcuated sha256 key in out.
+ *
+ * \return void
+ */
+void
+sha256_finalize(struct sha256_ctx_s *ctx,
+		const void *input,
+		size_t input_size,
+		uint32_t out[8]);
+
+/**
+ * \brief
+ * copy state information to ctx_out from ctx_in
+ * \param[in] ctx_in input struct sha256_ctx_s
+ * \param[out] ctx_out output struct sha256_ctx_s
+ * \return void
+ */
+void sha256_copy(const struct sha256_ctx_s *ctx_in,
+		 struct sha256_ctx_s *ctx_out);
+
+#endif   /* PVA_SHA256_H */
diff --git a/drivers/video/tegra/host/pva/pva_status_regs.h b/drivers/video/tegra/host/pva/pva_status_regs.h
new file mode 100644
index 00000000..6d7da9dc
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_status_regs.h
@@ -0,0 +1,44 @@
+/*
+ * PVA Command header
+ *
+ * Copyright (c) 2016-2019, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef __PVA_STATUS_REGS_H__
+#define __PVA_STATUS_REGS_H__
+
+#define PVA_CMD_STATUS_REGS 5
+
+#define PVA_CMD_STATUS3_INDEX 0u
+#define PVA_CMD_STATUS4_INDEX 1u
+#define PVA_CMD_STATUS5_INDEX 2u
+#define PVA_CMD_STATUS6_INDEX 3u
+#define PVA_CMD_STATUS7_INDEX 4u
+
+enum pva_cmd_status {
+	PVA_CMD_STATUS_INVALID = 0,
+	PVA_CMD_STATUS_WFI     = 1,
+	PVA_CMD_STATUS_DONE    = 2,
+	PVA_CMD_STATUS_ABORTED = 3,
+};
+
+struct pva_cmd_status_regs {
+	uint32_t status[PVA_CMD_STATUS_REGS];
+	uint32_t error;
+	uint32_t cmd;
+};
+
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_system_allow_list.c b/drivers/video/tegra/host/pva/pva_system_allow_list.c
new file mode 100644
index 00000000..28451b0c
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_system_allow_list.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+unsigned char pva_auth_allow_list_sys[] = {
+0x08, 0x00, 0x00, 0x00, 0xcd, 0xdb, 0x32, 0x3b, 0xf3, 0x07, 0x1c, 0x33, 0x53, 0x86, 0xfa, 0x41,
+0x5e, 0x9b, 0xab, 0x9a, 0x54, 0x0b, 0x8f, 0x24, 0xa3, 0x79, 0xb2, 0x5d, 0xdf, 0xbf, 0x4a, 0x10,
+0xfa, 0x08, 0xd3, 0x7c, 0xca, 0xe7, 0x78, 0xb8, 0x19, 0xb1, 0x60, 0xdc, 0xd1, 0xd3, 0xd9, 0x83,
+0x10, 0xaa, 0x49, 0xb1, 0x0b, 0x95, 0x28, 0xff, 0x00, 0x0d, 0x9a, 0x50, 0x4d, 0x9b, 0x26, 0x56,
+0x85, 0x05, 0x73, 0xac, 0x4f, 0x06, 0xde, 0x93, 0x27, 0xb5, 0xec, 0x16, 0x4e, 0x6d, 0xb4, 0x86,
+0x46, 0xac, 0x48, 0xb2, 0x69, 0xdd, 0x45, 0xdd, 0x9b, 0x7c, 0xbe, 0x9d, 0x86, 0xea, 0x29, 0xda,
+0x58, 0x7e, 0x62, 0x66, 0x8d, 0x6e, 0xef, 0x80, 0x25, 0xef, 0xbc, 0x46, 0xa5, 0x86, 0x12, 0x2f,
+0x97, 0x0b, 0xcc, 0xe5, 0xfa, 0xb8, 0xa4, 0x1d, 0x4d, 0x0f, 0x89, 0xd8, 0xc1, 0xa0, 0xe1, 0x5f,
+0xae, 0x41, 0xce, 0x58, 0xe3, 0x70, 0x04, 0xf2, 0x35, 0x9c, 0x06, 0xc5, 0x9d, 0x8b, 0x51, 0x14,
+0x8c, 0x4a, 0x18, 0x72, 0xc9, 0xdb, 0xa4, 0x84, 0xd4, 0xeb, 0xed, 0xa8, 0x74, 0xa1, 0x3f, 0x9d,
+0x80, 0xae, 0xb3, 0xc6, 0xac, 0x96, 0xb6, 0xb6, 0x8e, 0x39, 0xd4, 0x86, 0xf6, 0x5f, 0xfa, 0x15,
+0x16, 0x90, 0x45, 0x5f, 0xfd, 0x2d, 0x70, 0xf5, 0x5a, 0xa7, 0xe9, 0x10, 0x85, 0x10, 0x6a, 0xa1,
+0x3f, 0x3f, 0x0e, 0x85, 0x47, 0x76, 0xd5, 0xf4, 0xcf, 0xa1, 0xa3, 0xe2, 0x29, 0xad, 0x07, 0x0f,
+0xda, 0x60, 0xdd, 0x6c, 0x42, 0x95, 0xf3, 0xd3, 0x5a, 0xbf, 0xc4, 0x2b, 0x5b, 0x30, 0x73, 0x1f,
+0x0b, 0x58, 0xab, 0x9d, 0x88, 0xf5, 0x8f, 0x90, 0x43, 0xb2, 0xe6, 0x99, 0xe3, 0x27, 0xaf, 0xf3,
+0x29, 0x60, 0xda, 0xc3, 0x35, 0xe2, 0x33, 0x5e, 0x5a, 0xe3, 0xf5, 0x9c, 0xe5, 0x21, 0xb0, 0xd7,
+0x71, 0xd7, 0x5d, 0x89, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x9a, 0xc6, 0x68, 0x10, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xaa, 0x81, 0xa0, 0x39,
+0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xe2, 0xae, 0x3b, 0x6f, 0x01, 0x00, 0x00, 0x00,
+0x03, 0x00, 0x00, 0x00, 0x03, 0xfc, 0xc3, 0x9b, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+0x61, 0x00, 0x27, 0xa1, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xde, 0xdb, 0xd2, 0xc1,
+0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0xfd, 0x45, 0x08, 0xc5, 0x01, 0x00, 0x00, 0x00,
+0x07, 0x00, 0x00, 0x00, 0x05, 0xc0, 0xf1, 0xd7
+};
+
+unsigned int pva_auth_allow_list_sys_len = 360;
diff --git a/drivers/video/tegra/host/pva/pva_system_allow_list.h b/drivers/video/tegra/host/pva/pva_system_allow_list.h
new file mode 100644
index 00000000..b9c21bc2
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_system_allow_list.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef PVA_ALLOW_LIST_SYS_H
+#define PVA_ALLOW_LIST_SYS_H
+
+extern unsigned char pva_auth_allow_list_sys[];
+extern unsigned int pva_auth_allow_list_sys_len;
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_trace.c b/drivers/video/tegra/host/pva/pva_trace.c
new file mode 100644
index 00000000..eaca3220
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_trace.c
@@ -0,0 +1,107 @@
+/*
+ * PVA trace log
+ *
+ * Copyright (c) 2017-2022, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/nvhost_pva.h>
+
+#include "pva.h"
+#include "pva_trace.h"
+
+static void read_linear(struct pva *pva, struct pva_trace_log *trace, u32 toff)
+{
+	struct pva_trace_header *th = NULL;
+	struct pva_trace_block_hdr *bh = NULL;
+	struct pva_trace_point *tp = NULL;
+	u64 dt;
+	u32 i;
+
+	const char *name = pva->pdev->name;
+
+	th = (struct pva_trace_header *)trace->addr;
+	bh = (struct pva_trace_block_hdr *)((u8 *)th + th->head_offset);
+	while (th->head_offset < toff) {
+		tp = (struct pva_trace_point *) ((u8 *)bh + sizeof(*bh));
+		dt = bh->start_time;
+		for (i = 0 ; i < bh->n_entries ; i++) {
+			dt = dt + tp->delta_time;
+			nvpva_dbg_info(pva, "delta_time: %llu\t %s\t major: %u\t"
+				"minor: %u\t flags: %u\tsequence: %u\targ1:"
+				" %u\targ2: %u\n",
+				dt, name, tp->major, tp->minor, tp->flags,
+				tp->sequence, tp->arg1, tp->arg2);
+
+			trace_nvhost_pva_write(dt, name, tp->major,
+				tp->minor, tp->flags, tp->sequence,
+				tp->arg1, tp->arg2);
+			tp = tp + 1;
+		}
+
+		th->head_offset += th->block_size;
+
+		/* head reached end of trace log buffer, break */
+		if (th->head_offset >= trace->size) {
+			th->head_offset = sizeof(*th);
+			break;
+		}
+		bh = (struct pva_trace_block_hdr *) ((u8 *)th +
+			th->head_offset);
+	}
+}
+
+/* Read trace points from head to tail pointer */
+void pva_trace_copy_to_ftrace(struct pva *pva)
+{
+	struct pva_trace_log *trace;
+	struct pva_trace_header *th;
+	u32 toff;
+
+	trace = &pva->pva_trace;
+	th = (struct pva_trace_header *)trace->addr;
+
+	/*
+	 * Read from current head to tail offset. Though tail offset might
+	 * get change in background by FW. Read till current tail ONLY.
+	 */
+	if ((th == NULL) || !th->block_size || !th->head_offset
+		|| !th->tail_offset)
+		return;
+
+	nvpva_dbg_info(pva, "th->block_size: %u\tth->head_offset: %u\tth->tail_offset: %u\n",
+			th->block_size, th->head_offset, th->tail_offset);
+
+	/*
+	 * If head_offset and tail_offset are same, nothing to read.
+	 */
+	if (th->head_offset == th->tail_offset)
+		return;
+
+	toff = th->tail_offset;
+
+	if (th->head_offset < toff) {
+		/* No circular read */
+		read_linear(pva, trace, toff);
+	} else {
+		/*
+		 * Circular read
+		 * Read from head to trace_log buffer size
+		 */
+		read_linear(pva, trace, trace->size);
+		/* Read from head to tail  */
+		read_linear(pva, trace, toff);
+	}
+}
diff --git a/drivers/video/tegra/host/pva/pva_trace.h b/drivers/video/tegra/host/pva/pva_trace.h
new file mode 100644
index 00000000..2965263f
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_trace.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PVA_TRACE_H_
+#define _PVA_TRACE_H_
+
+/*
+ * Individual Trace point
+ *
+ * The delta time recorded in each trace point is the time from the previous
+ * trace point.  The first trace point in a block of trace points will have
+ * a delta time of 0 (it is referencing the absolute time of the block).
+ */
+struct pva_trace_point {
+	u32 delta_time;
+	u8 major;
+	u8 minor;
+	u8 flags;
+	u8 sequence;
+	u32 arg1;
+	u32 arg2;
+};
+
+/*
+ * Trace block header that is written to DRAM, the indicated number of
+ * trace points immediately follows the header.
+ */
+struct pva_trace_block_hdr {
+	u64 start_time;
+	u16 n_entries;
+	u16 reserved_1;
+	u32 reserved_2;
+	u8 align[48];
+};
+
+struct pva_trace_header {
+	u32 block_size;
+	u32 head_offset;
+	u32 tail_offset;
+	u8 align[52];
+
+};
+
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_version_config_t19x.c b/drivers/video/tegra/host/pva/pva_version_config_t19x.c
new file mode 100644
index 00000000..101288e1
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_version_config_t19x.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016-2021, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pva_mailbox_t19x.h"
+#include "pva_interface_regs_t19x.h"
+#include "pva_version_config_t19x.h"
+#include "pva_ccq_t19x.h"
+
+static int submit_sync_t19x(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs,
+			    u32 queue_id,
+			    struct pva_cmd_status_regs *cmd_status_out)
+{
+	(void)queue_id;
+	return pva_mailbox_send_cmd_sync(pva, cmd, nregs, cmd_status_out);
+}
+
+static int submit_sync_locked_t19x(struct pva *pva, struct pva_cmd_s *cmd,
+				   u32 nregs, u32 queue_id,
+				   struct pva_cmd_status_regs *cmd_status_out)
+{
+	(void)queue_id;
+	return pva_mailbox_send_cmd_sync_locked(pva, cmd, nregs,
+						cmd_status_out);
+}
+
+struct pva_version_config pva_t19x_config = {
+	.read_mailbox = pva_read_mailbox_t19x,
+	.write_mailbox = pva_write_mailbox_t19x,
+	.read_status_interface = read_status_interface_t19x,
+	.ccq_send_task = pva_ccq_send_task_t19x,
+	.submit_cmd_sync_locked = submit_sync_locked_t19x,
+	.submit_cmd_sync = submit_sync_t19x,
+	.irq_count = 1,
+};
diff --git a/drivers/video/tegra/host/pva/pva_version_config_t19x.h b/drivers/video/tegra/host/pva/pva_version_config_t19x.h
new file mode 100644
index 00000000..0d85b882
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_version_config_t19x.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2016-2019, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __PVA_VERSION_CONFIG_T19x_H__
+#define __PVA_VERSION_CONFIG_T19x_H__
+
+#include "pva.h"
+
+extern struct pva_version_config pva_t19x_config;
+
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_version_config_t23x.c b/drivers/video/tegra/host/pva/pva_version_config_t23x.c
new file mode 100644
index 00000000..825d7794
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_version_config_t23x.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016-2019, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pva_mailbox_t23x.h"
+#include "pva_interface_regs_t23x.h"
+#include "pva_ccq_t23x.h"
+
+struct pva_version_config pva_t23x_config = {
+	.read_mailbox = pva_read_mailbox_t23x,
+	.write_mailbox = pva_write_mailbox_t23x,
+	.read_status_interface = read_status_interface_t23x,
+	.ccq_send_task = pva_ccq_send_task_t23x,
+	.submit_cmd_sync_locked = pva_send_cmd_sync_locked,
+	.submit_cmd_sync = pva_send_cmd_sync,
+	.irq_count = 9,
+};
diff --git a/drivers/video/tegra/host/pva/pva_version_config_t23x.h b/drivers/video/tegra/host/pva/pva_version_config_t23x.h
new file mode 100644
index 00000000..dada8520
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_version_config_t23x.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2016-2019, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __PVA_VERSION_CONFIG_T23X_H__
+#define __PVA_VERSION_CONFIG_T23X_H__
+
+
+#include "pva.h"
+
+extern struct pva_version_config pva_t23x_config;
+
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_vpu_app_auth.c b/drivers/video/tegra/host/pva/pva_vpu_app_auth.c
new file mode 100644
index 00000000..9c60e2e3
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_vpu_app_auth.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/firmware.h>
+#include <linux/nvhost.h>
+#include <linux/slab.h>
+
+#include "pva.h"
+#include "pva_bit_helpers.h"
+#include "pva_vpu_app_auth.h"
+#include "pva_sha256.h"
+
+struct pva_buff_s {
+	const uint8_t	*buff;
+	uint32_t	pos;
+	uint32_t	size;
+};
+
+s32 read_buff(struct pva_buff_s *src_buf, void *dst, u32 size)
+{
+	u32  pos = src_buf->pos + size;
+
+	if (pos > src_buf->size)
+		return -1;
+
+	memcpy(dst, (src_buf->buff +  src_buf->pos), size);
+	src_buf->pos = pos;
+
+	return size;
+}
+
+static int
+pva_auth_allow_list_parse_pva_buff(struct platform_device *pdev,
+				   struct pva_vpu_auth_s *pva_auth,
+				   struct pva_buff_s *auth_list_buf)
+{
+	int err = 0;
+
+	ssize_t read_bytes = 0;
+	struct vpu_hash_key_pair_s *vhashk;
+	size_t vkey_size = 0;
+	size_t vhash_size = 0;
+
+	//Destroy previously parsed allowlist data
+	pva_auth_allow_list_destroy(pva_auth);
+	vhashk = kzalloc(sizeof(struct vpu_hash_key_pair_s), GFP_KERNEL);
+	if (vhashk == NULL) {
+		nvpva_warn(&pdev->dev, "ERROR: Unable to allocate memory");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	read_bytes = read_buff(auth_list_buf,
+			       &(vhashk->num_keys),
+			       sizeof(vhashk->num_keys));
+	if (read_bytes != (ssize_t)(sizeof(vhashk->num_keys))) {
+		nvpva_warn(&pdev->dev, "ERROR: read failed");
+		err = -EINVAL;
+		goto free_vhashk;
+	}
+
+	vkey_size = sizeof(struct shakey_s)*(vhashk->num_keys);
+	vhashk->psha_key = kzalloc(vkey_size, GFP_KERNEL);
+	if (vhashk->psha_key == NULL) {
+		nvpva_warn(&pdev->dev, "ERROR: Unable to allocate memory");
+		err = -ENOMEM;
+		goto free_vhashk;
+	}
+
+	read_bytes = read_buff(auth_list_buf, vhashk->psha_key, vkey_size);
+	if (read_bytes != (ssize_t)vkey_size) {
+		err = -EINVAL;
+		goto free_shakeys;
+	}
+
+	read_bytes = read_buff(auth_list_buf,
+			       &(vhashk->num_hashes),
+			       sizeof(vhashk->num_hashes));
+	if (read_bytes != (ssize_t)(sizeof(vhashk->num_hashes))) {
+		nvpva_warn(&pdev->dev, "ERROR: read failed");
+		err = -EINVAL;
+		goto free_shakeys;
+	}
+
+	vhash_size = sizeof(struct vpu_hash_vector_s)*(vhashk->num_hashes);
+	vhashk->pvpu_hash_vector = kzalloc(vhash_size, GFP_KERNEL);
+	if (vhashk->pvpu_hash_vector == NULL) {
+		nvpva_warn(&pdev->dev, "ERROR: read failed");
+		err = -ENOMEM;
+		goto free_shakeys;
+	}
+
+	read_bytes = read_buff(auth_list_buf,
+			       vhashk->pvpu_hash_vector,
+			       vhash_size);
+	if (read_bytes != (ssize_t)vhash_size) {
+		nvpva_warn(&pdev->dev, "ERROR: read failed");
+		err = -EINVAL;
+		goto free_hashes;
+	}
+
+	pva_auth->pva_auth_allow_list_parsed = true;
+	pva_auth->pva_auth_enable = true;
+	pva_auth->vpu_hash_keys = vhashk;
+	goto out;
+
+free_hashes:
+	kfree(vhashk->pvpu_hash_vector);
+	vhashk->pvpu_hash_vector = NULL;
+
+free_shakeys:
+	kfree(vhashk->psha_key);
+	vhashk->psha_key = NULL;
+
+free_vhashk:
+
+	kfree(vhashk);
+	vhashk = NULL;
+
+out:
+	return err;
+}
+
+int
+pva_auth_allow_list_parse_buf(struct platform_device *pdev,
+			      struct pva_vpu_auth_s *pva_auth,
+			      u8 *buffer,
+			      u32 length)
+{
+	int err = 0;
+	struct pva_buff_s auth_list_buf = {0};
+
+	auth_list_buf.buff = buffer;
+	auth_list_buf.size = length;
+	auth_list_buf.pos  = 0;
+
+	err = pva_auth_allow_list_parse_pva_buff(pdev,
+						 pva_auth,
+						 &auth_list_buf);
+	return err;
+}
+
+int
+pva_auth_allow_list_parse(struct platform_device *pdev,
+			  struct pva_vpu_auth_s *pva_auth)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
+	struct pva *pva = pdata->private_data;
+	const struct firmware *pallow_list;
+	struct pva_buff_s auth_list_buf = {0};
+	int err = 0;
+
+	err = nvpva_request_firmware(pdev,
+		PVA_AUTH_ALLOW_LIST_DEFAULT, &pallow_list);
+
+	if (err) {
+		nvpva_dbg_fn(pva, "pva allow list request failed");
+		nvpva_warn(&pdev->dev,
+			"Failed to load the allow list\n");
+		err = -ENOENT;
+		goto out;
+	}
+
+	auth_list_buf.buff = pallow_list->data;
+	auth_list_buf.size = pallow_list->size;
+	auth_list_buf.pos  = 0;
+	err = pva_auth_allow_list_parse_pva_buff(pdev,
+						 pva_auth,
+						 &auth_list_buf);
+	release_firmware(pallow_list);
+out:
+	return err;
+}
+
+void
+pva_auth_allow_list_destroy(struct pva_vpu_auth_s *pva_auth)
+{
+	if (pva_auth->vpu_hash_keys == NULL)
+		return;
+
+	kfree(pva_auth->vpu_hash_keys->pvpu_hash_vector);
+	kfree(pva_auth->vpu_hash_keys->psha_key);
+	kfree(pva_auth->vpu_hash_keys);
+	pva_auth->vpu_hash_keys = NULL;
+}
+
+/**
+ * \brief
+ * is_key_match calculates the sha256 key of ELF and checks if it matches with key.
+ * \param[in] dataptr Pointer to the data to which sha256 to ba calculated
+ * \param[in] size length in bytes of the data to which sha256 to be calculated.
+ * \param[in] key the key with which calculated key would be compared for match.
+ * \return The completion status of the operation. Possible values are:
+ * \ref 0 Success. Passed in key matched wth calculated key.
+ * \ref -EINVAL. Passed in Key doesn't match with calcualted key.
+ */
+static int32_t
+is_key_match(uint8_t *dataptr,
+	     size_t size,
+	     struct shakey_s key)
+{
+	int32_t err = 0;
+	uint32_t calc_key[8];
+	size_t off;
+	struct sha256_ctx_s ctx1;
+	struct sha256_ctx_s ctx2;
+
+	sha256_init(&ctx1);
+	off = (size / 64U) * 64U;
+	if (off > 0U)
+		pva_sha256_update(&ctx1, dataptr, off);
+
+	/* clone */
+	sha256_copy(&ctx1, &ctx2);
+
+	/* finalize with leftover, if any */
+	sha256_finalize(&ctx2, dataptr + off, size % 64U, calc_key);
+
+	err = memcmp((void *)&(key.sha_key),
+		     (void *)calc_key,
+		     NVPVA_SHA256_DIGEST_SIZE);
+	if (err != 0)
+		err = -EINVAL;
+
+	return err;
+}
+
+/**
+ * \brief
+ * Keeps checking all the keys accociated with match_hash
+ * against the calculated sha256 key for dataptr, until it finds a match.
+ * \param[in] pva  Pointer to PVA driver context structure struct \ref nvpva_drv_ctx
+ * \param[in] dataptr pointer to ELF data
+ * \param[in] size length (in bytes) of ELF data
+ * \param[in] match_hash pointer to matching hash structure, \ref struct vpu_hash_vector_s.
+ * \return Matching status of the calculated key
+ * against the keys asscociated with match_hash. possible values:
+ * - 0 Success, one of the keys associated with match_hash
+ * matches with the calculated sha256 key.
+ * - -EINVAL, None matches.
+ */
+static int
+check_all_keys_for_match(struct shakey_s *pallkeys,
+			 uint8_t *dataptr,
+			 size_t size,
+			 const struct vpu_hash_vector_s *match_hash)
+{
+	int32_t err = 0;
+	uint32_t idx;
+	uint32_t count;
+	struct shakey_s key;
+	uint32_t i;
+
+	idx = match_hash->index;
+	count = match_hash->count;
+	if (idx > UINT_MAX - count) {
+		err = -ERANGE;
+		goto fail;
+	}
+
+	for (i = 0; i < count; i++) {
+		key = pallkeys[idx+i];
+		err = is_key_match(dataptr, size, key);
+		if (err == 0)
+			break;
+	}
+fail:
+	return err;
+}
+/**
+ * @brief
+ * Helper function for \ref binary_search.
+ * Uses a specific field in @ref pkey to compare with the same filed in @ref pbase.
+ * @param[in] pkey pointer to the object that needs to be compared.
+ * @param[in] pbase pointer to the starting element of the array.
+ * @retval
+ * - -1 when @ref pkey is less than starting element of array pointed to by @ref pbase.
+ * - 1 when @ref pkey is greater than starting element of array pointed to by @ref pbase.
+ * - 0 when @ref pkey is equal to starting element of array pointed to by @ref pbase.
+ */
+static int
+compare_hash_value(const void *pkey,
+		   const void *pbase)
+{
+	int ret;
+
+	if ((((const struct vpu_hash_vector_s *)pkey)->crc32_hash) <
+		(((const struct vpu_hash_vector_s *)pbase)->crc32_hash))
+		ret = -1;
+	else if ((((const struct vpu_hash_vector_s *)pkey)->crc32_hash) >
+		 (((const struct vpu_hash_vector_s *)pbase)->crc32_hash))
+		ret = 1;
+	else
+		ret = 0;
+
+	return ret;
+}
+
+/**
+ * @brief
+ * calculates crc32.
+ * @param[in] crc initial crc value. usually 0.
+ * @param[in] buf pointer to the buffer whose crc32 to be calculated.
+ * @param[in] len length (in bytes) of data at @ref buf.
+ * @retval value of calculated crc32.
+ */
+static uint32_t
+pva_crc32(uint32_t crc,
+	  unsigned char *buf,
+	  size_t len)
+{
+	int k;
+
+	crc = ~crc;
+	while (len != 0U) {
+		crc ^= *buf++;
+		for (k = 0; k < 8; k++)
+			crc = ((crc & 1U) == 1U) ?
+			(crc >> 1U) ^ 0xedb88320U : crc >> 1U;
+
+		len--;
+	}
+
+	return ~crc;
+}
+
+const void
+*binary_search(const void *key,
+	       const void *base,
+	       size_t num_elems,
+	       size_t size,
+	       int (*compare)(const void *pkey, const void *pbase))
+{
+	size_t low = 0U;
+	size_t high;
+
+	if (num_elems == 0U || size == 0U)
+		return NULL;
+
+	high = num_elems - 1U;
+	for (;;) {
+		const void *mid_elem;
+		int r;
+		size_t mid = low + ((high - low) / 2U);
+
+		/* coverity CERT INT30-C Unsigned integer */
+		/* operation mid * size may wrap. */
+		if (mid > UINT_MAX/size)
+			return NULL;
+
+		mid_elem = ((const unsigned char *) base) +
+					 mid * size;
+		r = compare(key, mid_elem);
+
+		if (r < 0) {
+			if (mid == 0U)
+				return NULL;
+
+			high = mid - 1U;
+		} else if (r > 0) {
+			low = mid + 1U;
+			if (low < mid || low > high)
+				return NULL;
+		} else {
+			return mid_elem;
+		}
+	}
+}
+
+int
+pva_vpu_check_sha256_key(struct pva *pva,
+			 struct vpu_hash_key_pair_s *vpu_hash_keys,
+			 uint8_t *dataptr,
+			 size_t size)
+{
+	int err = 0;
+	struct vpu_hash_vector_s cal_Hash;
+	const struct vpu_hash_vector_s *match_Hash;
+
+	cal_Hash.crc32_hash = pva_crc32(0L, dataptr, size);
+
+	match_Hash = (const struct vpu_hash_vector_s *)
+		binary_search(&cal_Hash,
+			      vpu_hash_keys->pvpu_hash_vector,
+			      vpu_hash_keys->num_hashes,
+			      sizeof(struct vpu_hash_vector_s),
+			      compare_hash_value);
+	if (match_Hash == NULL) {
+		nvpva_dbg_info(pva, "ERROR: No Hash Match Found");
+		err = -EINVAL;
+		goto fail;
+	}
+
+	err = check_all_keys_for_match(vpu_hash_keys->psha_key,
+				       dataptr,
+				       size,
+				       match_Hash);
+	if (err != 0)
+		nvpva_dbg_info(pva, "Error: Match key not found");
+fail:
+	return err;
+}
diff --git a/drivers/video/tegra/host/pva/pva_vpu_app_auth.h b/drivers/video/tegra/host/pva/pva_vpu_app_auth.h
new file mode 100644
index 00000000..e2804a49
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_vpu_app_auth.h
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef NVPVA_VPU_HASH_H
+#define NVPVA_VPU_HASH_H
+
+#include "pva_vpu_exe.h"
+
+/**
+ * Size of sha256 keys in bytes.
+ */
+#define NVPVA_SHA256_DIGEST_SIZE 32U
+/**
+ * Maximum length of allowlist file path
+ */
+#define ALLOWLIST_FILE_LEN 128U
+/**
+ * Default path (including filename) of pva vpu elf authentication allowlist file
+ */
+#define PVA_AUTH_ALLOW_LIST_DEFAULT "pva_auth_allowlist"
+/**
+ * Array of all VPU Hash'es
+ */
+struct vpu_hash_vector_s {
+	/*! Number of Keys for this crc32_hash */
+	uint32_t count;
+	/*! Starting Index into Keys Array */
+	uint32_t index;
+	/*! CRC32 hash value */
+	uint32_t crc32_hash;
+};
+
+/**
+ * Stores sha256 key
+ */
+struct shakey_s {
+	/** 256-bit (32 Bytes) SHA Key */
+	uint8_t sha_key[NVPVA_SHA256_DIGEST_SIZE];
+};
+
+/**
+ * Stores Hash Vector and Keys vector
+ */
+struct vpu_hash_key_pair_s {
+	/*! Total number of Keys in binary file */
+	uint32_t num_keys;
+	/*! pointer to SHA keys Array. */
+	struct shakey_s *psha_key;
+	/*! Total number of Hashes in binary file */
+	uint32_t num_hashes;
+	/*! pointer to Array of Hash'es */
+	struct vpu_hash_vector_s *pvpu_hash_vector;
+};
+
+/**
+ * Stores all the information related to pva vpu elf authentication.
+ */
+struct pva_vpu_auth_s {
+	/** Stores crc32-sha256 of ELFs */
+	struct vpu_hash_key_pair_s *vpu_hash_keys;
+	struct mutex allow_list_lock;
+	/** Flag to check if allowlist is enabled */
+	bool pva_auth_enable;
+	/** Flag to track if the allow list is already parsed */
+	bool pva_auth_allow_list_parsed;
+};
+
+struct nvpva_drv_ctx;
+
+/**
+ * \brief checks if the sha256 key of ELF has a match in allowlist.
+ *
+ * It first checks if the allowlist is available.
+ * If its not available it returns error code.
+ * If allowlist is available, then it first calculates the crc32 hash of the elf
+ * and compares the calculated hash with the available hashes in allowlist.
+ * If it doesn't find a match of hash in allowlist it returns error code.
+ * If it finds a matched hash, then it goes ahead and calculates the sha256 key of elf
+ * and compares it with the keys asscociated with the hash in the allowlist file.
+ * If there is a key match then it returns successfully. Else it returs error code.
+ *
+ * \param[in] vpu_hash_keys  Pointer to PVA vpu elf sha256 authentication
+ *            keys structure \ref struct vpu_hash_key_pair_s
+ * \param[in] dataptr data pointer of ELF to be validate SHA
+ * \param[in] size  32-bit unsigned int ELF size in  number of bytes
+ *
+ * \return  The completion status of the operation. Possible values are:
+ * - 0 when there exists a match key for the elf data pointed by dataptr.
+ * - -EINVAL when allowlist file doesn't exists OR
+ *   when the hash of ELF has no match in allowlist file OR
+ *   when the sha256 key has no match in the list of keys
+ *   associated with the hash of ELF
+ */
+int pva_vpu_check_sha256_key(struct pva *pva,
+			     struct vpu_hash_key_pair_s *vpu_hash_keys,
+			     uint8_t *dataptr,
+			     size_t size);
+
+
+/**
+ * Parse binary file containing authentication list stored in firmware dir
+ * This binary file has
+	32-bit num_hashes,
+	32-bit num_keys,
+	Array of {32-bit(4 byte) CRC32 as Hash, 32-bit index into Array of keys,
+		32-bit count of keys for this hash}
+	Array of 256-bit keys.
+ * Allocate memory for all the fileds and Store them.
+ * Parse Hash Array and Store in memory
+ * Parse Keys Array and Store in memory.
+ *
+ * \param[in] pva_auth  Pointer to PVA vpu elf authentication data struct \ref pva_vpu_auth
+ * \return
+ *  - 0, if everything is successful.
+ *  - -ENOENT, if allowlist file is not found at /proc/boot/
+ *  - negative of error code from fstat() if fstat fails.
+ *  - -ERANGE, if file size is less than 0 or greater than  NVPVA_VPU_ELF_MAX_SZ.
+ *  - -ENOMEM, if any memory allocation fails.
+ *  - negative of error code return from read()
+ *  - -EINVAL, if read() doesn't read expected number of bytes from the file.
+ */
+int
+pva_auth_allow_list_parse(struct platform_device *pdev,
+			 struct pva_vpu_auth_s *pva_auth);
+
+/**
+ * Parse allow list stored in memory
+ * This binary file has
+	32-bit num_hashes,
+	32-bit num_keys,
+	Array of {32-bit(4 byte) CRC32 as Hash, 32-bit index into Array of keys,
+		32-bit count of keys for this hash}
+	Array of 256-bit keys.
+ * Allocate memory for all the fileds and Store them.
+ * Parse Hash Array and Store in memory
+ * Parse Keys Array and Store in memory.
+ *
+ * \param[in] pva_auth  Pointer to PVA vpu elf authentication data struct \ref pva_vpu_auth
+ * \return
+ *  - 0, if everything is successful.
+ *  - -ENOENT, if allowlist file is not found at /proc/boot/
+ *  - negative of error code from fstat() if fstat fails.
+ *  - -ERANGE, if file size is less than 0 or greater than  NVPVA_VPU_ELF_MAX_SZ.
+ *  - -ENOMEM, if any memory allocation fails.
+ *  - negative of error code return from read()
+ *  - -EINVAL, if read() doesn't read expected number of bytes from the file.
+ */
+int pva_auth_allow_list_parse_buf(struct platform_device *pdev,
+				  struct pva_vpu_auth_s *pva_auth,
+				  u8 *buffer,
+				  u32 length);
+
+/**
+ * @brief Frees all the memory utilized for storing elf authentication data.
+ * @param[in] pva_auth  Pointer to PVA vpu elf authentication data struct \ref pva_vpu_auth
+ */
+void pva_auth_allow_list_destroy(struct pva_vpu_auth_s *pva_auth);
+
+/**
+ * The binary_search() function performs a binary search
+ * on the sorted array of num elements pointed to by base,
+ * for an item that matches the object pointed to by key.
+ *
+ * \param[in] key The object to search for.
+ * \param[in] base A pointer to the first element in the array
+ * \param[in] num_elems The number of elements in the array.
+ * \param[in] size The size of an element, in bytes.
+ * \param[in] compare A pointer to a user-supplied function
+ *	that lfind() calls to compare an array element with the key.
+ * \param[in] pkey the same pointer as key
+ * \param[in] pbase a pointer to an element in the array.
+ *
+ * \return  A pointer to a matching member of the array,
+ *  or NULL if a matching object couldn't be found
+ */
+const void *binary_search(const void *key,
+			  const void *base,
+			  size_t num_elems,
+			  size_t size,
+			  int (*compare)(const void *pkey,
+			  const void *pbase));
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_vpu_exe.c b/drivers/video/tegra/host/pva/pva_vpu_exe.c
new file mode 100644
index 00000000..e3ec28f3
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_vpu_exe.c
@@ -0,0 +1,1172 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include "nvpva_elf_parser.h"
+#include "pva_bit_helpers.h"
+#include "pva.h"
+#include  "hw_vmem_pva.h"
+#include "pva_vpu_exe.h"
+
+#define ELF_MAXIMUM_SECTION_NAME 64
+#define ELF_EXPORTS_SECTION "EXPORTS"
+#define DATA_SECTION_ALIGNMENT 32
+#define CODE_SECTION_ALIGNMENT 32
+
+#define MAX_VPU_SEGMENTS 4
+
+#define SWAP_DATA(OUT, IN)                                                     \
+	do {                                                                   \
+		OUT = PVA_INSERT(PVA_EXTRACT(IN, 31, 24, uint32_t), 7, 0);     \
+		OUT |= PVA_INSERT(PVA_EXTRACT(IN, 23, 16, uint32_t), 15, 8);   \
+		OUT |= PVA_INSERT(PVA_EXTRACT(IN, 15, 8, uint32_t), 23, 16);   \
+		OUT |= PVA_INSERT(PVA_EXTRACT(IN, 7, 0, uint32_t), 31, 24);    \
+	} while (0)
+
+/*
+ * Define mapping from VPU data, rodata and program sections into
+ * corresponding segment types.
+ */
+
+static const struct pack_rule {
+	const char *elf_section_name;
+	int32_t pva_type;
+} pack_rules[] = {
+	{
+		.elf_section_name = ".data",
+		.pva_type = PVA_SEG_VPU_DATA,
+	}, {
+		.elf_section_name = ".rodata",
+		.pva_type = PVA_SEG_VPU_DATA,
+	}, {
+		.elf_section_name = ".text",
+		.pva_type = PVA_SEG_VPU_CODE,
+	}
+};
+
+static int32_t find_pva_ucode_segment_type(const char *section_name, uint32_t addr)
+{
+	uint32_t i;
+	int32_t ret = PVA_SEG_VPU_MAX_TYPE;
+
+	for (i = 0; i < ARRAY_SIZE(pack_rules); i += 1) {
+		/* Ignore the suffix of the section name */
+		if (strncmp(section_name, pack_rules[i].elf_section_name,
+			    strlen(pack_rules[i].elf_section_name)) == 0) {
+			ret = pack_rules[i].pva_type;
+			break;
+		}
+	}
+	if (ret == PVA_SEG_VPU_DATA) {
+		int section_name_len =
+			strnlen(section_name, ELF_MAXIMUM_SECTION_NAME);
+		int exports_section_name_len =
+			strnlen(ELF_EXPORTS_SECTION, ELF_MAXIMUM_SECTION_NAME);
+		if (section_name_len >= exports_section_name_len &&
+		    strncmp((section_name +
+			     (section_name_len - exports_section_name_len)),
+			    ELF_EXPORTS_SECTION,
+			    exports_section_name_len) == 0) {
+			ret = PVA_SEG_VPU_IN_PARAMS;
+		} else if (addr == 0xc0000U) {
+			ret = PVA_SEG_VPU_IN_PARAMS;
+		}
+	}
+
+	return ret;
+}
+
+void print_segments_info(struct pva_elf_image *elf_img)
+{
+	pr_info("PVA_SEG_VPU_CODE =%d", PVA_SEG_VPU_CODE);
+	pr_info("PVA_SEG_VPU_DATA =%d", PVA_SEG_VPU_DATA);
+	pr_info("Code Buffer");
+	pr_info("vpu_segments_buffer[PVA_SEG_VPU_CODE]");
+	pr_info("code_size = %u",
+		elf_img->vpu_segments_buffer[PVA_SEG_VPU_CODE].localsize);
+	pr_info("vpu_segments_buffer[PVA_SEG_VPU_DATA]");
+	pr_info("data_size = %u",
+		elf_img->vpu_segments_buffer[PVA_SEG_VPU_DATA].localsize);
+}
+
+int32_t pva_get_sym_offset(struct nvpva_elf_context *d, uint16_t exe_id,
+			   uint32_t sym_id, uint32_t *addr, uint32_t *size)
+{
+	if ((!pva_vpu_elf_is_registered(d, exe_id))
+	   || (addr == NULL)
+	   || (size == NULL)
+	   || (sym_id >= get_elf_image(d, exe_id)->num_symbols)
+	   || (sym_id == NVPVA_INVALID_SYMBOL_ID))
+		return -EINVAL;
+
+	*addr = get_elf_image(d, exe_id)->sym[sym_id].addr;
+	*size = get_elf_image(d, exe_id)->sym[sym_id].size;
+
+	return 0;
+}
+
+dma_addr_t phys_get_bin_info(struct nvpva_elf_context *d, uint16_t exe_id)
+{
+	dma_addr_t addr = 0LL;
+
+	if (pva_vpu_elf_is_registered(d, exe_id))
+		addr = get_elf_image(d, exe_id)->vpu_bin_buffer.pa;
+
+	return addr;
+}
+
+static int32_t pva_vpu_elf_alloc_mem(struct pva *pva,
+				     struct platform_device *pdev,
+				     struct pva_elf_buffer *buffer, size_t size)
+{
+	dma_addr_t pa = 0U;
+	void *va = NULL;
+
+	va = dma_alloc_coherent(&pdev->dev, size, &pa, GFP_KERNEL);
+	if (va == NULL)
+		goto fail;
+
+	nvpva_dbg_info(pva, "vpu app addr = %llx", (u64)pa);
+
+	buffer->size = size;
+	buffer->va = va;
+	buffer->pa = pa;
+
+	buffer->alloc_size = size;
+	buffer->alloc_va = va;
+	buffer->alloc_pa = pa;
+
+	return 0;
+fail:
+	return -ENOMEM;
+}
+
+static int32_t pva_vpu_bin_info_allocate(struct pva *dev,
+					 struct pva_elf_image *elf_img)
+{
+	int32_t ret = 0;
+	size_t aligned_size;
+	size_t size = sizeof(struct pva_bin_info_s);
+
+	aligned_size = ALIGN(size + 128, 128);
+
+	ret = pva_vpu_elf_alloc_mem(dev,
+				    dev->aux_pdev,
+				    &elf_img->vpu_bin_buffer,
+				    aligned_size);
+	if (ret) {
+		pr_err("Memory allocation failed");
+		goto fail;
+	}
+
+	elf_img->vpu_bin_buffer.va =
+		(void *)ALIGN((uintptr_t)elf_img->vpu_bin_buffer.va, 128);
+	elf_img->vpu_bin_buffer.pa = ALIGN(elf_img->vpu_bin_buffer.pa, 128);
+
+	(void)memcpy(elf_img->vpu_bin_buffer.va, (void *)&elf_img->info, size);
+
+fail:
+
+	return ret;
+}
+
+static int32_t pva_vpu_allocate_segment_memory(struct nvpva_elf_context *d,
+					       struct pva_elf_image *elf_img)
+{
+	int32_t err = 0;
+	int32_t i;
+	uint32_t segment_size = 0;
+
+	for (i = 0; i < PVA_SEG_VPU_MAX_TYPE; i++) {
+		if (i == PVA_SEG_VPU_IN_PARAMS)
+			continue;
+
+		segment_size = elf_img->vpu_segments_buffer[i].localsize;
+		if (i == PVA_SEG_VPU_CODE) {
+			const u32 cache_size = (d->dev->version == PVA_HW_GEN1) ?
+							     (8 * 1024) :
+							     (16 * 1024);
+
+			segment_size += cache_size;
+		}
+		segment_size = ALIGN(segment_size + 128, 128);
+		if (segment_size == 0)
+			continue;
+
+		err = pva_vpu_elf_alloc_mem(d->dev,
+					    d->cntxt_dev,
+					    &elf_img->vpu_segments_buffer[i],
+					    segment_size);
+		if (err) {
+			pr_err("Memory allocation failed");
+			break;
+		}
+		elf_img->vpu_segments_buffer[i].va = (void *)ALIGN(
+			(uintptr_t)elf_img->vpu_segments_buffer[i].va, 128);
+
+		elf_img->vpu_segments_buffer[i].pa =
+			ALIGN(elf_img->vpu_segments_buffer[i].pa, 128);
+
+		memcpy(elf_img->vpu_segments_buffer[i].va,
+		       elf_img->vpu_segments_buffer[i].localbuffer,
+		       elf_img->vpu_segments_buffer[i].localsize);
+
+		kfree(elf_img->vpu_segments_buffer[i].localbuffer);
+		elf_img->vpu_segments_buffer[i].localbuffer = NULL;
+		elf_img->vpu_segments_buffer[i].localsize = 0;
+	}
+
+	return err;
+}
+
+static int32_t
+pva_allocate_data_section_info(struct pva *dev,
+			       struct pva_elf_image *elf_img)
+{
+	int32_t err = 0;
+
+	if (elf_img->vpu_data_segment_info.localsize == 0U)
+		goto out;
+
+	err = pva_vpu_elf_alloc_mem(dev,
+				    dev->aux_pdev,
+				    &elf_img->vpu_data_segment_info,
+				    elf_img->vpu_data_segment_info.localsize);
+	if (err != 0) {
+		pr_err("Failed to allocate data segment info memory");
+		goto out;
+	}
+
+	(void)memset(elf_img->vpu_data_segment_info.va, 0,
+		     elf_img->vpu_data_segment_info.size);
+
+	(void)memcpy(elf_img->vpu_data_segment_info.va,
+		     (void *)elf_img->vpu_data_segment_info.localbuffer,
+		     elf_img->vpu_data_segment_info.localsize);
+
+	kfree(elf_img->vpu_data_segment_info.localbuffer);
+	elf_img->vpu_data_segment_info.localbuffer = NULL;
+	elf_img->vpu_data_segment_info.localsize = 0;
+
+out:
+
+	return err;
+
+}
+
+static int32_t write_bin_info(struct nvpva_elf_context *d,
+			      struct pva_elf_image *elf_img)
+{
+	struct pva_bin_info_s *curr_bin_info;
+	int32_t err = 0;
+
+	err = pva_vpu_allocate_segment_memory(d, elf_img);
+	if (err < 0) {
+		pr_err("pva: failed to allocate segment memory");
+		goto fail;
+	}
+
+	err = pva_allocate_data_section_info(d->dev, elf_img);
+	if (err < 0) {
+		pr_err("Failed to allocate data segment info memory");
+		goto fail;
+	}
+
+	curr_bin_info = &elf_img->info;
+
+	curr_bin_info->bin_info_size = sizeof(struct pva_bin_info_s);
+	curr_bin_info->bin_info_version = PVA_BIN_INFO_VERSION_ID;
+	curr_bin_info->code_base =
+		elf_img->vpu_segments_buffer[PVA_SEG_VPU_CODE].pa;
+	curr_bin_info->data_sec_base =
+		elf_img->vpu_data_segment_info.pa;
+	curr_bin_info->data_sec_count =
+		 elf_img->vpu_data_segment_info.num_segments;
+	curr_bin_info->data_base =
+		elf_img->vpu_segments_buffer[PVA_SEG_VPU_DATA].pa;
+
+fail:
+
+	return err;
+}
+
+static int32_t copy_to_elf_buffer_code(struct pva_elf_buffer *buffer,
+				       const void *src, size_t src_size,
+				       uint32_t addr)
+{
+	uint32_t addr_bytes = addr * 4;
+	uint32_t *dst_size = NULL;
+
+	dst_size = &buffer->localsize;
+	if (addr_bytes + src_size > *dst_size) {
+		size_t aligned_size = addr_bytes + src_size;
+
+		if (aligned_size % DATA_SECTION_ALIGNMENT)
+			aligned_size +=
+				(DATA_SECTION_ALIGNMENT -
+				 (aligned_size % DATA_SECTION_ALIGNMENT));
+
+		if (buffer->localbuffer == NULL) {
+			/* First .text section must load at 0 */
+			if (addr_bytes != 0) {
+				pr_err("First .text section does not start at 0");
+				return -EINVAL;
+			}
+			buffer->localbuffer = kzalloc(aligned_size, GFP_KERNEL);
+			if (buffer->localbuffer == NULL)
+				return -ENOMEM;
+
+		} else {
+			uint8_t *new_buffer = kzalloc(aligned_size, GFP_KERNEL);
+
+			if (new_buffer == NULL)
+				return -ENOMEM;
+
+			memcpy(new_buffer, buffer->localbuffer, *dst_size);
+			kfree(buffer->localbuffer);
+			buffer->localbuffer = new_buffer;
+		}
+		*dst_size = aligned_size;
+	}
+	memcpy((void *)((uintptr_t)buffer->localbuffer + addr_bytes), src,
+	       src_size);
+	return 0;
+}
+
+static int32_t copy_to_elf_buffer(struct pva_elf_buffer *buffer,
+				  const void *src, size_t src_size)
+{
+	uint8_t *resize_buffer = NULL;
+	uint32_t *dst_size = NULL;
+
+	dst_size = &buffer->localsize;
+	if (src != NULL) {
+		size_t aligned_size = src_size;
+
+		if (src_size % DATA_SECTION_ALIGNMENT)
+			aligned_size += (DATA_SECTION_ALIGNMENT -
+					 (src_size % DATA_SECTION_ALIGNMENT));
+
+		if (buffer->localbuffer == NULL) {
+			buffer->localbuffer = kzalloc(aligned_size, GFP_KERNEL);
+			if (buffer->localbuffer == NULL)
+				return -ENOMEM;
+
+		} else {
+			resize_buffer = kzalloc(*dst_size, GFP_KERNEL);
+			if (resize_buffer == NULL)
+				return -ENOMEM;
+
+			memcpy(resize_buffer, buffer->localbuffer, *dst_size);
+			kfree(buffer->localbuffer);
+			buffer->localbuffer = NULL;
+			buffer->localbuffer =
+				kzalloc((*dst_size) + aligned_size, GFP_KERNEL);
+			if (buffer->localbuffer == NULL) {
+				kfree(resize_buffer);
+				return -ENOMEM;
+			}
+			memcpy(buffer->localbuffer, resize_buffer, *dst_size);
+			kfree(resize_buffer);
+		}
+		memcpy((void *)((uintptr_t)buffer->localbuffer + *dst_size),
+		       src, src_size);
+		*dst_size += aligned_size;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int32_t
+copy_to_elf_data_sec_buffer(struct pva_elf_buffer *buffer,
+			    const void *src,
+			    uint32_t src_size)
+{
+	uint8_t *resize_buffer = NULL;
+	uint32_t *dst_size = NULL;
+
+	dst_size = &buffer->localsize;
+
+	if ((src == NULL) || (src_size == 0U))
+		return -EINVAL;
+
+	if (buffer->localbuffer == NULL) {
+		buffer->localbuffer = kzalloc(src_size, GFP_KERNEL);
+		if (buffer->localbuffer == NULL)
+			return -ENOMEM;
+	} else {
+		resize_buffer = kzalloc((*dst_size) + src_size, GFP_KERNEL);
+		if (resize_buffer == NULL)
+			return -ENOMEM;
+
+		(void) memcpy(resize_buffer, buffer->localbuffer, *dst_size);
+		kfree(buffer->localbuffer);
+		buffer->localbuffer = resize_buffer;
+	}
+
+	(void) memcpy((void *)(buffer->localbuffer + *dst_size), src, src_size);
+	if ((UINT_MAX - *dst_size) < src_size)
+		return -EINVAL;
+
+	*dst_size += src_size;
+
+	return 0;
+}
+
+static int32_t
+copy_segments(void *elf, struct pva_elf_image *elf_img,
+		const struct elf_section_header *section_header,
+		const char *section_name, int hw_gen)
+{
+	int32_t segment_type = 0U;
+	int32_t ret = 0;
+
+	const u8 *elf_data;
+	uint32_t *data;
+	uint32_t sw_data;
+	uint32_t dst_buffer_size_old = 0;
+	struct pva_bin_info_s *bin_info = NULL;
+	struct pva_elf_buffer *buffer = NULL;
+	struct pva_vpu_data_section_s data_sec_info = {0};
+
+	if ((section_header == NULL) || (section_name == NULL)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	segment_type = find_pva_ucode_segment_type(section_name,
+						   section_header->addr);
+
+	bin_info = &elf_img->info;
+	if (!(segment_type == PVA_SEG_VPU_DATA) &&
+	    !(segment_type == PVA_SEG_VPU_CODE)) {
+		ret = 0;
+		goto out;
+	}
+
+	buffer = &elf_img->vpu_segments_buffer[segment_type];
+	dst_buffer_size_old = buffer->localsize;
+	elf_data = elf_section_contents(elf, section_header);
+	if (elf_data == NULL)
+		goto inc_num_segments;
+
+
+
+	if (segment_type == (int32_t)PVA_SEG_VPU_CODE) {
+		unsigned int idx;
+
+		for (idx = 0; idx < (section_header->size / 4); idx++) {
+			data = ((uint32_t *)elf_data) + idx;
+			SWAP_DATA(sw_data, *data);
+			*data = sw_data;
+		}
+
+		ret = copy_to_elf_buffer_code(buffer,
+					      elf_data,
+					      section_header->size,
+					      section_header->addr);
+		if (ret != 0)
+			goto out;
+
+		bin_info->code_size = buffer->localsize;
+
+	} else {
+		ret = copy_to_elf_buffer(buffer,
+					 elf_data,
+					 section_header->size);
+		if (ret != 0)
+			goto out;
+	}
+
+	if (segment_type == (int32_t)PVA_SEG_VPU_DATA) {
+		struct pva_vpu_data_section_s *pdsec;
+		struct pva_elf_buffer *buffer_temp;
+		u32 size_temp;
+
+		pdsec = &data_sec_info;
+		pdsec->offset = dst_buffer_size_old;
+		pdsec->addr = section_header->addr;
+		if (buffer->localsize < dst_buffer_size_old) {
+			pr_err("Invalid buffer size");
+			ret = -EINVAL;
+			goto out;
+		}
+
+		pdsec->size = (buffer->localsize - dst_buffer_size_old);
+		ret = nvpva_validate_vmem_offset(pdsec->addr,
+						 pdsec->size,
+						 hw_gen);
+		if (ret != 0)
+			goto out;
+
+		buffer_temp = &elf_img->vpu_data_segment_info;
+		size_temp = (uint32_t)sizeof(struct pva_vpu_data_section_s);
+		ret = copy_to_elf_data_sec_buffer(buffer_temp,
+						  &data_sec_info,
+						  size_temp);
+		if (ret != 0)
+			goto out;
+
+		if (buffer_temp->num_segments >= (UINT_MAX - 1U)) {
+			ret = -EINVAL;
+			pr_err("Number of data segments exceeds UINT_MAX");
+			goto out;
+		}
+
+		buffer_temp->num_segments++;
+	}
+
+inc_num_segments:
+
+	buffer->num_segments++;
+
+out:
+	return ret;
+}
+
+
+static int32_t
+populate_segments(void *elf, struct pva_elf_image *elf_img,
+				 int hw_gen)
+{
+	const struct elf_section_header *section_header;
+	int32_t ret = 0;
+	uint32_t index = 0;
+	const char *section_name;
+	const u32 sectionCount = elf_shnum(elf);
+
+	for (index = 0; index < sectionCount; index++) {
+		section_header = elf_section_header(elf, index);
+		if (section_header == NULL) {
+			ret = -EINVAL;
+			goto out;
+		}
+		section_name = elf_section_name(elf, section_header);
+		if (section_header->type == SHT_PROGBITS) {
+			ret = copy_segments(elf, elf_img, section_header,
+					    section_name, hw_gen);
+			if (ret)
+				goto out;
+		}
+	}
+out:
+	return ret;
+}
+
+/**
+ * Data about symbol information in EXPORTS sections is present as follows
+ * typedef struct {
+ *   uint32_t type; From VMEM_TYPE enums
+ *   uint32_t addr_offset; Offset from VMEM base
+ *   uint32_t size; Size of VMEM region in bytes
+ * } vmem_symbol_metadata_t;
+ */
+static int32_t update_exports_symbol(void *elf, const struct elf_section_header *section_header,
+				     struct pva_elf_symbol *symID)
+{
+	const u8 *data;
+	const char *section_name;
+	int32_t section_type;
+
+	section_name = elf_section_name(elf, section_header);
+	if (section_name == NULL)
+		return -EINVAL;
+
+	section_type = find_pva_ucode_segment_type(section_name, section_header->addr);
+	if (section_type == PVA_SEG_VPU_IN_PARAMS) {
+		uint32_t symOffset = symID->addr - section_header->addr;
+		data = elf_section_contents(elf, section_header);
+		if (data == NULL)
+			return -EINVAL;
+		symID->type = *(uint32_t *)&data[symOffset];
+		if ((symID->type == (uint8_t)VMEM_TYPE_INVALID)
+		   || (symID->type >= (uint8_t)VMEM_TYPE_MAX))
+			return -EINVAL;
+		symID->addr = *(uint32_t *)&data[symOffset + sizeof(uint32_t)];
+		symID->size = *(uint32_t *)&data[symOffset + (2UL * sizeof(uint32_t))];
+	}
+
+	return 0;
+}
+
+static int32_t
+populate_symtab(void *elf, struct nvpva_elf_context *d,
+		uint16_t exe_id, int hw_gen)
+{
+	const struct elf_section_header *section_header;
+	const struct elf_section_header *sym_scn;
+	int32_t ret = 0;
+	const struct elf_symbol *sym;
+	uint32_t i, count;
+	struct pva_elf_symbol *symID;
+	uint32_t num_symbols = 0;
+	uint32_t num_sys_symbols = 0;
+	uint32_t total_sym_size = 0;
+	const char *symname = NULL;
+	const char *section_name;
+	uint32_t stringsize = 0;
+	int32_t sec_type;
+	struct pva_elf_image *image;
+
+	section_header =
+		(const struct elf_section_header *)elf_named_section_header(
+			elf, ".symtab");
+	if (section_header == NULL)
+		goto update_elf_info;
+
+	count = section_header->size / section_header->entsize;
+	for (i = 0; i < count; i++) {
+		if (num_symbols >= NVPVA_TASK_MAX_SYMBOLS) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		sym = elf_symbol(elf, i);
+		if ((sym == NULL)
+		   || (ELF_ST_BIND(sym) != STB_GLOBAL)
+		   || (ELF_ST_TYPE(sym) == STT_FUNC)
+		   || sym->size <= 0)
+			continue;
+
+		sym_scn = elf_section_header(elf, sym->shndx);
+		if (sym_scn == NULL) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		section_name = elf_section_name(elf, sym_scn);
+		if (section_name == NULL) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		sec_type = find_pva_ucode_segment_type(section_name,
+						       sym_scn->addr);
+		if (sec_type != PVA_SEG_VPU_IN_PARAMS)
+			continue;
+
+		symname = elf_symbol_name(elf, section_header, i);
+		if (symname == NULL) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		stringsize = strnlen(symname, (ELF_MAX_SYMBOL_LENGTH - 1));
+		symID = &get_elf_image(d, exe_id)->sym[num_symbols];
+		symID->symbol_name =
+			kcalloc(ELF_MAX_SYMBOL_LENGTH,
+				sizeof(char), GFP_KERNEL);
+		if (symID->symbol_name == NULL) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		(void)strncpy(symID->symbol_name, symname, stringsize);
+		symID->symbol_name[stringsize] = '\0';
+		if (strncmp(symID->symbol_name,
+			   PVA_SYS_INSTANCE_DATA_V1_SYMBOL,
+			   ELF_MAX_SYMBOL_LENGTH) == 0) {
+			++num_sys_symbols;
+			symID->is_sys = true;
+		} else
+			symID->is_sys = false;
+
+		symID->symbolID = num_symbols;
+		symID->size = sym->size;
+		symID->addr = sym->value;
+		ret = update_exports_symbol(elf, sym_scn, symID);
+		if (ret != 0) {
+			kfree(symID->symbol_name);
+			goto fail;
+		}
+
+		num_symbols++;
+		total_sym_size += symID->size;
+		ret = nvpva_validate_vmem_offset(symID->addr,
+						 symID->size,
+						 hw_gen);
+		if (ret != 0)
+			goto fail;
+	}
+
+update_elf_info:
+	get_elf_image(d, exe_id)->num_symbols = num_symbols;
+	get_elf_image(d, exe_id)->num_sys_symbols = num_sys_symbols;
+	get_elf_image(d, exe_id)->symbol_size_total = total_sym_size;
+
+	return ret;
+fail:
+	image = get_elf_image(d, exe_id);
+	for (i = 0; i < image->num_symbols; i++) {
+		kfree(image->sym[i].symbol_name);
+		image->sym[i].symbolID = 0;
+		image->sym[i].size = 0;
+		image->sym[i].offset = 0;
+	}
+
+	return ret;
+}
+
+/**
+ *
+ * Validate if elf file passed is valid
+ *
+ * @param elf			Buffer containing elf file
+ * @param size			Size of buffer containing elf file
+ *
+ * @return			0 if everything is correct else return error
+ */
+
+static int32_t validate_vpu(const void *elf, size_t size)
+{
+	int32_t err = 0;
+
+	if (!image_is_elf(elf) || !elf_is_32bit(elf)) {
+		pr_err("pva: Invalid 32 bit VPU ELF");
+		err = -EINVAL;
+	}
+	return err;
+}
+
+static void pva_elf_free_buffer(struct platform_device *pdev,
+				struct pva_elf_buffer *buf)
+{
+	if (buf->localbuffer != NULL) {
+		kfree(buf->localbuffer);
+		buf->localbuffer = NULL;
+		buf->localsize = 0;
+		buf->num_segments = 0;
+	}
+	if (buf->pa != 0U) {
+		dma_free_coherent(&pdev->dev,
+				  buf->alloc_size, buf->alloc_va,
+				  buf->alloc_pa);
+	}
+}
+
+static void
+vpu_bin_clean(struct nvpva_elf_context *d,
+	      struct pva_elf_image *elf_img)
+{
+	size_t i;
+
+	if (elf_img == NULL)
+		return;
+
+	/* Initialize vpu_bin_buffer */
+	pva_elf_free_buffer(d->dev->aux_pdev, &elf_img->vpu_bin_buffer);
+
+	pva_elf_free_buffer(d->dev->aux_pdev, &elf_img->vpu_data_segment_info);
+
+	/* Initiaize VPU segments buffer */
+	for (i = 0; i < PVA_SEG_VPU_MAX_TYPE; i++)
+		pva_elf_free_buffer(d->cntxt_dev,
+				    &elf_img->vpu_segments_buffer[i]);
+
+	/* clean up symbols */
+	for (i = 0; i < elf_img->num_symbols; i++)
+		kfree(elf_img->sym[i].symbol_name);
+
+	/* Clean elf img and set everything to 0 */
+	memset(elf_img, 0, sizeof(struct pva_elf_image));
+}
+
+static int32_t pva_get_vpu_app_id(struct nvpva_elf_context *d,
+				  uint16_t *exe_id,
+				  bool is_system_app)
+{
+	int32_t ret = 0;
+	uint16_t index = 0;
+	struct pva_elf_images *images;
+	struct pva_elf_image **image;
+	int32_t alloc_size;
+
+	mutex_lock(&d->elf_mutex);
+	images = d->elf_images;
+	image = &images->elf_img[images->num_allocated / ALOC_SEGMENT_SIZE];
+
+	if (images->num_assigned >= MAX_NUM_VPU_EXE) {
+		pr_err("No space for more VPU binaries");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (images->num_assigned >= images->num_allocated) {
+		alloc_size = ALOC_SEGMENT_SIZE * sizeof(struct pva_elf_image);
+		*image = kzalloc(alloc_size, GFP_KERNEL);
+		if (*image == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		images->num_allocated += ALOC_SEGMENT_SIZE;
+	}
+
+
+	index = rmos_find_first_zero_bit(d->elf_images->alloctable,
+					 MAX_NUM_VPU_EXE);
+	if (index == MAX_NUM_VPU_EXE) {
+		pr_err("No space for more VPU binaries");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	*exe_id = index;
+	rmos_set_bit32((index%32), &d->elf_images->alloctable[index/32U]);
+	++(images->num_assigned);
+	get_elf_image(d, *exe_id)->elf_id = *exe_id;
+	get_elf_image(d, *exe_id)->is_system_app = is_system_app;
+out:
+	mutex_unlock(&d->elf_mutex);
+	return ret;
+}
+
+int32_t
+pva_unload_vpu_app(struct nvpva_elf_context *d, uint16_t exe_id, bool locked)
+{
+	int32_t err = 0;
+	struct pva_elf_images *images;
+
+	images = d->elf_images;
+
+	if (!locked)
+		mutex_lock(&d->elf_mutex);
+
+	if (exe_id >= MAX_NUM_VPU_EXE) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!rmos_test_bit32((exe_id%32), &images->alloctable[exe_id/32])) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	vpu_bin_clean(d, get_elf_image(d, exe_id));
+	rmos_clear_bit32((exe_id%32), &images->alloctable[exe_id/32]);
+	--(images->num_assigned);
+out:
+	if (!locked)
+		mutex_unlock(&d->elf_mutex);
+
+	return err;
+}
+
+int32_t
+pva_get_sym_tab_size(struct nvpva_elf_context *d,
+		     uint16_t exe_id,
+		     u64 *tab_size)
+{
+	struct pva_elf_image *image;
+	u32 number_of_symbols;
+
+	image = get_elf_image(d, exe_id);
+	if (image == NULL)
+		return -EINVAL;
+
+	number_of_symbols = image->num_symbols - image->num_sys_symbols;
+	*tab_size = number_of_symbols * sizeof(struct nvpva_sym_info);
+
+	return 0;
+}
+
+int32_t
+pva_get_sym_tab(struct nvpva_elf_context *d,
+		  uint16_t exe_id,
+		  struct nvpva_sym_info *sym_tab)
+
+{
+	u32 i;
+	struct pva_elf_image *image;
+
+	image = get_elf_image(d, exe_id);
+	if (image == NULL)
+		return -EINVAL;
+
+	for (i = 0; i < image->num_symbols; i++) {
+		if (image->sym[i].is_sys)
+			continue;
+		memcpy(sym_tab->sym_name,
+		       image->sym[i].symbol_name,
+		       NVPVA_SYM_NAME_MAX_LEN);
+		sym_tab->sym_size = image->sym[i].size;
+		sym_tab->sym_type = image->sym[i].type;
+		sym_tab->sym_id   = image->sym[i].symbolID;
+		++sym_tab;
+	}
+
+	return 0;
+}
+int32_t pva_get_sym_info(struct nvpva_elf_context *d, uint16_t vpu_exe_id,
+		       const char *sym_name, struct pva_elf_symbol *symbol)
+{
+	struct pva_elf_image *elf;
+	uint32_t i;
+	int32_t err = 0;
+	size_t strSize = strnlen(sym_name, ELF_MAX_SYMBOL_LENGTH);
+
+	elf = get_elf_image(d, vpu_exe_id);
+	for (i = 0; i < elf->num_symbols; i++) {
+		if (strncmp(sym_name, elf->sym[i].symbol_name, strSize) == 0) {
+			symbol->symbolID = elf->sym[i].symbolID;
+			symbol->size = elf->sym[i].size;
+			symbol->type = elf->sym[i].type;
+			break;
+		}
+	}
+	if (i == elf->num_symbols)
+		err = -EINVAL;
+
+	return err;
+}
+
+int32_t
+pva_release_vpu_app(struct nvpva_elf_context *d, uint16_t exe_id, bool locked)
+{
+	int32_t err = 0;
+	struct pva_elf_image *image = NULL;
+
+	image = get_elf_image(d, exe_id);
+	if (image != NULL && image->user_registered == true) {
+		image->user_registered = false;
+		if (atomic_read(&image->submit_refcount) <= 0)
+			(void)pva_unload_vpu_app(d, exe_id, locked);
+	} else {
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+int32_t pva_task_release_ref_vpu_app(struct nvpva_elf_context *d,
+				     uint16_t exe_id)
+{
+	int32_t err = 0;
+	struct pva_elf_image *image = NULL;
+
+	if (exe_id == NVPVA_NOOP_EXE_ID)
+		goto out;
+
+	image = get_elf_image(d, exe_id);
+	if (image == NULL) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	atomic_sub(1, &image->submit_refcount);
+	if ((atomic_read(&image->submit_refcount) <= 0) &&
+	    (image->user_registered == false))
+		(void)pva_unload_vpu_app(d, exe_id, false);
+out_err:
+out:
+	return err;
+}
+
+int32_t pva_task_acquire_ref_vpu_app(struct nvpva_elf_context *d,
+				     uint16_t exe_id)
+{
+	int32_t err = 0;
+	struct pva_elf_image *image = get_elf_image(d, exe_id);
+
+	if (image != NULL)
+		(void)atomic_add(1, &image->submit_refcount);
+	else
+		err = -EINVAL;
+
+	return err;
+}
+
+int32_t pva_load_vpu_app(struct nvpva_elf_context *d, uint8_t *buffer,
+			 size_t size, uint16_t *exe_id,
+			 bool is_system_app, int hw_gen)
+{
+	void *elf = NULL;
+	int32_t err = 0;
+	uint16_t assigned_exe_id = 0;
+	struct pva_elf_image *image = NULL;
+	struct pva *pva = d->dev;
+	struct device *dev = &pva->pdev->dev;
+
+	err = validate_vpu((void *)buffer, size);
+	if (err < 0) {
+		dev_err(dev, "Not valid elf or null elf");
+		goto out;
+	}
+	err = pva_get_vpu_app_id(d, &assigned_exe_id, is_system_app);
+	if (err) {
+		dev_err(dev, "Unable to get valid VPU id");
+		goto out;
+	}
+	elf = (void *)buffer;
+	image = get_elf_image(d, assigned_exe_id);
+	err = populate_symtab(elf, d, assigned_exe_id, pva->version);
+	if (err) {
+		dev_err(dev, "Populating symbol table failed");
+		err = -EINVAL;
+		goto out_elf_end;
+	}
+	err = populate_segments(elf, image, hw_gen);
+	if (err) {
+		dev_err(dev, "Populating segments failed");
+		err = -EINVAL;
+		goto out_elf_end;
+	}
+	err = write_bin_info(d, image);
+	if (err) {
+		dev_err(dev, "Writing bin_info failed");
+		err = -EINVAL;
+		goto out_elf_end;
+	}
+	err = pva_vpu_bin_info_allocate(pva, image);
+	if (err) {
+		dev_err(dev, "Allocating bin info failed");
+		err = -EINVAL;
+		goto out_elf_end;
+	}
+	*exe_id = assigned_exe_id;
+	image->user_registered = true;
+	(void)atomic_set(&image->submit_refcount, 0);
+out_elf_end:
+	if (err)
+		pva_unload_vpu_app(d, assigned_exe_id, false);
+
+out:
+	return err;
+}
+
+void pva_unload_all_apps(struct nvpva_elf_context *d)
+{
+	uint32_t elf_alloc_table = 0U;
+	uint32_t id = 0U;
+	uint32_t i;
+
+	mutex_lock(&d->elf_mutex);
+	for (i = 0; i < NUM_ALLOC_SEGMENTS; i++) {
+		elf_alloc_table = d->elf_images->alloctable[i];
+		while (elf_alloc_table != 0U) {
+			id = rmos_get_first_set_bit(elf_alloc_table);
+			(void)pva_release_vpu_app(d, (i * 32 + id), true);
+			rmos_clear_bit32(id, &elf_alloc_table);
+		}
+
+		d->elf_images->alloctable[i] = 0;
+	}
+	mutex_unlock(&d->elf_mutex);
+}
+
+void pva_vpu_deinit(struct nvpva_elf_context *d)
+{
+	int32_t i;
+	int32_t allocated_segments;
+	struct pva_elf_images *images = d->elf_images;
+
+	if (d->elf_images == NULL)
+		goto out;
+
+	pva_unload_all_apps(d);
+	allocated_segments = (images->num_allocated/ALOC_SEGMENT_SIZE);
+	for (i = 0; i < allocated_segments; i++) {
+		if (images->elf_img[i] != NULL) {
+			kfree(images->elf_img[i]);
+			images->elf_img[i] = NULL;
+		}
+	}
+
+	d->elf_images->num_allocated = 0;
+	d->elf_images->num_assigned = 0;
+
+	kfree(d->elf_images);
+	d->elf_images = NULL;
+	mutex_destroy(&d->elf_mutex);
+out:
+	return;
+}
+
+int32_t pva_vpu_init(struct pva *dev, struct nvpva_elf_context *d)
+{
+	int32_t err = 0;
+	int32_t alloc_size;
+
+	d->dev = dev;
+	d->elf_images = kzalloc(sizeof(struct pva_elf_images), GFP_KERNEL);
+	if (d->elf_images == NULL) {
+		err = -ENOMEM;
+		goto fail_elf_img_init;
+	}
+
+	d->elf_images->num_allocated = 0;
+	d->elf_images->num_assigned = 0;
+	memset(d->elf_images->elf_img, 0, sizeof(d->elf_images->elf_img));
+	alloc_size = ALOC_SEGMENT_SIZE * sizeof(struct pva_elf_image);
+	d->elf_images->elf_img[0] = kzalloc(alloc_size, GFP_KERNEL);
+	if (d->elf_images->elf_img[0] == NULL) {
+		err = -ENOMEM;
+		kfree(d->elf_images);
+		goto fail_elf_img_init;
+	}
+
+	d->elf_images->num_allocated = ALOC_SEGMENT_SIZE;
+	mutex_init(&d->elf_mutex);
+
+fail_elf_img_init:
+
+	return err;
+}
+struct vmem_region {
+	uint32_t start;
+	uint32_t end;
+};
+
+struct vmem_region vmem_regions_tab[NUM_HEM_GEN + 1][VMEM_REGION_COUNT] = {
+	{{.start = 0, .end = 0},
+	 {.start = 0, .end = 0},
+	 {.start = 0, .end = 0}},
+	{{.start = T19X_VMEM0_START, .end = T19X_VMEM0_END},
+	 {.start = T19X_VMEM1_START, .end = T19X_VMEM1_END},
+	 {.start = T19X_VMEM2_START, .end = T19X_VMEM2_END}},
+	{{.start = T23x_VMEM0_START, .end = T23x_VMEM0_END},
+	 {.start = T23x_VMEM1_START, .end = T23x_VMEM1_END},
+	 {.start = T23x_VMEM2_START, .end = T23x_VMEM2_END}},
+};
+
+int32_t
+nvpva_validate_vmem_offset(const uint32_t vmem_offset,
+			   const uint32_t size,
+			   const int hw_gen)
+{
+
+	int i;
+	int32_t err = -EINVAL;
+
+	if (hw_gen < 0 || hw_gen > NUM_HEM_GEN) {
+		pr_err("invalid hw_gen index: %d", hw_gen);
+		return err;
+	}
+
+	for (i = VMEM_REGION_COUNT; i > 0; i--) {
+		if (vmem_offset >= vmem_regions_tab[hw_gen][i-1].start)
+			break;
+	}
+
+	if ((i > 0) && ((vmem_offset + size) <= vmem_regions_tab[hw_gen][i-1].end))
+		err = 0;
+
+	return err;
+}
diff --git a/drivers/video/tegra/host/pva/pva_vpu_exe.h b/drivers/video/tegra/host/pva/pva_vpu_exe.h
new file mode 100644
index 00000000..2ef06734
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_vpu_exe.h
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef NVPVA_VPU_APP_H
+#define NVPVA_VPU_APP_H
+#include <uapi/linux/nvpva_ioctl.h>
+#include "pva-ucode-header.h"
+#include "pva-sys-params.h"
+#include "pva-task.h"
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include "pva-bit.h"
+
+#define ELF_MAX_SYMBOL_LENGTH 64
+#define MAX_NUM_VPU_EXE		65535U
+#define ALOC_SEGMENT_SIZE	32U
+#define NUM_ALLOC_SEGMENTS	((MAX_NUM_VPU_EXE + 1)/ALOC_SEGMENT_SIZE)
+
+/**
+ * enum to identify different types of symbols
+ */
+enum pva_elf_symbol_type {
+	/**< Symbol type Invalid */
+	VMEM_TYPE_INVALID,
+	/**< Symbol type Data */
+	VMEM_TYPE_DATA,
+	/**< Symbol type VPU Config Table */
+	VMEM_TYPE_VPUC_TABLE,
+	/**< Symbol type Pointer */
+	VMEM_TYPE_POINTER,
+	/**< Symbol type System */
+	VMEM_TYPE_SYSTEM,
+	/** Symbol type Pointer Extension */
+	VMEM_TYPE_POINTER_EX,
+	/** Symbol type Invalid */
+	VMEM_TYPE_MAX
+};
+
+/**
+ *  enum to identify different segments of VPU ELF
+ */
+enum pva_elf_seg_type {
+	/**< Code segment in VPU ELF */
+	PVA_SEG_VPU_CODE = 0U,
+	/**< DATA segment in VPU ELF */
+	PVA_SEG_VPU_DATA,
+	/**< DATA segment in VPU ELF containing symbol information*/
+	PVA_SEG_VPU_IN_PARAMS,
+	/**< Not a valid segment in VPU ELF */
+	PVA_SEG_VPU_MAX_TYPE
+};
+
+/**
+ * Structure that holds buffer and handles shared with FW
+ */
+struct pva_elf_buffer {
+	/**< Aligned size of allocated buffer */
+	size_t size;
+	/**< IOVA address if allocated buffer */
+	dma_addr_t pa;
+	/**< Virtual address of allocated buffer */
+	void *va;
+
+	/*original value came out of allocator*/
+	size_t alloc_size;
+	dma_addr_t alloc_pa;
+	void *alloc_va;
+
+	/*< Local buffer holding data to be copied to allocated buffer.
+	 *  May undergo resizing
+	 */
+	uint8_t *localbuffer;
+	/**< Unaligned size of local buffer */
+	uint32_t localsize;
+	/**< Number of segments */
+	uint32_t num_segments;
+};
+
+/*
+ * Store elf symbols information
+ */
+struct pva_elf_symbol {
+	char *symbol_name;
+	/**<IOVA address offset in symbol buffer */
+	uint64_t offset;
+	/**< Type of symbol */
+	uint32_t type;
+	/**< Symbol Size */
+	uint32_t size;
+	/**< VMEM address of Symbol */
+	uint32_t addr;
+	/**< Symbol ID */
+	uint16_t symbolID;
+	/**< Symbol name */
+	bool is_sys;
+};
+
+/**
+ * elf image details
+ */
+struct pva_elf_image {
+	/**< buffer storing vpu_bin_info */
+	struct pva_elf_buffer vpu_bin_buffer;
+	/**< Buffers containing information about vpu segments */
+	struct pva_elf_buffer vpu_segments_buffer[PVA_SEG_VPU_MAX_TYPE];
+	/** Buffers containing data segment info */
+	struct pva_elf_buffer vpu_data_segment_info;
+	uint16_t elf_id;
+	/**<True if user has successfully registered a VPU ELF */
+	bool user_registered;
+	bool is_system_app;
+	/**<Count of how many tasks submitted to FW use this ELF image */
+	atomic_t submit_refcount;
+	/**< Number of symbols in the VPU app */
+	uint32_t num_symbols;
+	uint32_t num_sys_symbols;
+	/**< Stores symbol information */
+	struct pva_elf_symbol sym[NVPVA_TASK_MAX_SYMBOLS];
+	/**< Total size of all the symbols in VPU app */
+	uint32_t symbol_size_total;
+	/**< Bin info which stores information about different vpu segments */
+	struct pva_bin_info_s info;
+};
+
+/**
+ * Store multiple elf images
+ */
+struct pva_elf_images {
+	/**< Stores information about all VPU APPs */
+	struct pva_elf_image *elf_img[NUM_ALLOC_SEGMENTS];
+	/**< Alloctable keeping track of VPU APPs */
+	uint32_t alloctable[NUM_ALLOC_SEGMENTS];
+	uint32_t num_allocated;
+	uint32_t num_assigned;
+};
+
+struct nvpva_elf_context {
+	struct pva *dev;
+	/* context device */
+	struct platform_device *cntxt_dev;
+	/* Contains context for all elf images */
+	struct pva_elf_images *elf_images;
+	/* Mutex for atomic access */
+	struct mutex elf_mutex;
+};
+
+/* following functions to deal with UMD request */
+/**
+ * Get Symbol info given the symbol name from a vpu app
+ *
+ * @param d		Pointer to Elf Context
+ * @param vpu_exe_id	ID of the VPU app
+ * @param *sym_name	String containing Name of the symbol
+ * @param *symbol	symbol information
+
+ * @return		0 for symbol found. -EINVAL for symbol not found
+ *			When -EINVAL is returned, ignore values in id and
+ *			sym_size
+ */
+int32_t pva_get_sym_info(struct nvpva_elf_context *d, uint16_t vpu_exe_id,
+		       const char *sym_name, struct pva_elf_symbol *symbol);
+
+/**
+ * Get IOVA address of bin_info to passed to FW
+ *
+ * @param d		Pointer to Elf Context
+ * @param exe_id	ID of the VPU app for which IOVA address of bin_info is
+ *			required
+ *
+ * @return		IOVA address of bin_info
+ *
+ * Must be called with registered exe_id or exe_id = NVPVA_NOOP_EXE_ID
+ */
+dma_addr_t phys_get_bin_info(struct nvpva_elf_context *d, uint16_t exe_id);
+
+/* following functions to serve firmware requirement */
+/**
+ * Assign unique ID to VPU APP
+ *
+ * @param d		Pointer to Elf Context
+ * @param *exe_id	Unique ID assigned to VPU APP.
+ *			Filled by this function
+ *
+ * @return		0 if valid ID is assigned else ERROR
+ */
+int32_t pva_get_vpu_exe_id(struct nvpva_elf_context *d, uint16_t *exe_id);
+
+/**
+ * Get VMEM address of symbol
+ *
+ * @param d		Pointer to Elf Context
+ * @param exe_id	Unique VPU APP ID
+ * @param sym_id	Unique Symbol ID
+ * @param addr		Symbol offset
+ * @param size		Symbol size
+ *
+ * @return		0 if valid offset is found else ERROR
+ *
+ * Must be called with registered exe_id or exe_id = NVPVA_NOOP_EXE_ID
+ */
+int32_t pva_get_sym_offset(struct nvpva_elf_context *d, uint16_t exe_id,
+			   uint32_t sym_id, uint32_t *addr, uint32_t *size);
+
+/**
+ * Check if vpu id is registered in given context
+ *
+ * @param d		Pointer to Elf Context
+ * @param exe_id	Unique VPU APP ID
+ *
+ * @return		TRUE if registered else FALSE
+ */
+static inline bool pva_vpu_elf_is_registered(struct nvpva_elf_context *d,
+					     uint16_t exe_id)
+{
+	return (exe_id < MAX_NUM_VPU_EXE) &&
+	       ((d->elf_images->alloctable[(exe_id/32)] >> (exe_id%32)) & 1U);
+}
+
+static inline
+struct pva_elf_image *get_elf_image(struct nvpva_elf_context *d,
+				    uint16_t exe_id)
+{
+	struct pva_elf_image *image = NULL;
+	u32 segment;
+	u32 index;
+
+	segment = exe_id / ALOC_SEGMENT_SIZE;
+	index = exe_id % ALOC_SEGMENT_SIZE;
+
+	if ((d->elf_images->elf_img[segment] != NULL)
+	    && (pva_vpu_elf_is_registered(d, exe_id)))
+		image = &d->elf_images->elf_img[segment][index];
+
+	return image;
+}
+
+/**
+ * Load VPU APP elf file
+ *
+ * @param d		Pointer to the Elf Context
+ * @param *buffer	Buffer containing the VPU APP elf file
+ * @param size		Size of the VPU APP elf file
+ * @param *exe_id	ID assigned to the VPU APP in KMD filled
+ *			by this function
+ * @param hwid		HWID associated with the VPU APP used for
+ *			allocation
+ *
+ * @return		0 if everything is valid and VPU APP is
+ *			loaded successfully
+ */
+int32_t
+pva_load_vpu_app(struct nvpva_elf_context *d, uint8_t *buffer,
+		     size_t size, uint16_t *exe_id,
+		     bool is_system_app, int hw_gen);
+
+/**
+ * Unload VPU APP elf file
+ *
+ * @param d		Pointer to the Elf Context
+ * @param exe_id	Unique ID of VPU APP to be unloaded
+ *
+ * @return		0 if successful
+ */
+int32_t
+pva_unload_vpu_app(struct nvpva_elf_context *d, uint16_t exe_id, bool locked);
+
+/**
+ * Unload all VPU APP elf files associated with the given ELF context
+ *
+ * @param d		Pointer to the Elf Context
+ *
+ */
+void pva_unload_all_apps(struct nvpva_elf_context *d);
+
+/**
+ * Get reference to a vpu app for task
+ *
+ * @param d		Pointer to the Elf Context
+ * @param exe_id	Unique ID of VPU APP to be referenced
+ *
+ * @return		0 if successful
+ */
+int32_t pva_task_acquire_ref_vpu_app(struct nvpva_elf_context *d,
+				     uint16_t exe_id);
+
+/**
+ * Unref VPU APP elf file from user side
+ *
+ * @param d		Pointer to the Elf Context
+ * @param exe_id	Unique ID of VPU APP to be unreferenced
+ *
+ * @return		0 if successful
+ */
+int32_t
+pva_release_vpu_app(struct nvpva_elf_context *d, uint16_t exe_id, bool locked);
+
+/**
+ * Unref VPU APP elf file from task side
+ *
+ * @param d		Pointer to the Elf Context
+ * @param exe_id	Unique ID of VPU APP to be unreferenced
+ *
+ * @return		0 if successful
+ */
+int32_t pva_task_release_ref_vpu_app(struct nvpva_elf_context *d,
+				     uint16_t exe_id);
+
+/**
+ * Deinitialize and Deallocate memory for VPU parsing
+ *
+ * @param		Pointer to the Elf Context
+ *
+ * @return		Void
+ */
+void pva_vpu_deinit(struct nvpva_elf_context *d);
+
+/**
+ * Initialize memory for VPU Parsing
+ *
+ * @param		Pointer to the Elf Context
+ *
+ * @return		0 if no errors encountered
+ */
+int32_t pva_vpu_init(struct pva *dev, struct nvpva_elf_context *d);
+
+void print_segments_info(struct pva_elf_image *elf_img);
+
+int32_t
+nvpva_validate_vmem_offset(const uint32_t vmem_offset,
+			   const uint32_t size,
+			   const int hw_gen);
+int32_t
+pva_get_sym_tab_size(struct nvpva_elf_context *d,
+		     uint16_t exe_id,
+		     u64 *tab_size);
+int32_t
+pva_get_sym_tab(struct nvpva_elf_context *d,
+	    uint16_t exe_id,
+	    struct nvpva_sym_info *sym_tab);
+#endif
diff --git a/drivers/video/tegra/host/pva/pva_vpu_ocd.c b/drivers/video/tegra/host/pva/pva_vpu_ocd.c
new file mode 100644
index 00000000..7eeb7e32
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_vpu_ocd.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/nvhost.h>
+#include "pva.h"
+#include "pva_vpu_ocd.h"
+
+#define PVA_DEBUG_APERTURE_INDEX 1
+#define VPU_OCD_MAX_NUM_DATA_ACCESS 7
+
+static void block_writel(struct pva_vpu_dbg_block *block, u32 offset, u32 val)
+{
+	void __iomem *addr = block->vbase + offset;
+
+	writel(val, addr);
+}
+
+static u32 block_readl(struct pva_vpu_dbg_block *block, u32 offset)
+{
+	void __iomem *addr = block->vbase + offset;
+
+	return readl(addr);
+}
+
+static int init_vpu_dbg_block(struct pva *pva, struct pva_vpu_dbg_block *block,
+			      u32 offset)
+{
+	struct nvhost_device_data *pdata = platform_get_drvdata(pva->pdev);
+	void __iomem *aperture = pdata->aperture[PVA_DEBUG_APERTURE_INDEX];
+
+	if (aperture == NULL)
+		return -EINVAL;
+
+	block->vbase = aperture + offset;
+	return 0;
+}
+
+int pva_vpu_ocd_init(struct pva *pva)
+{
+	u32 i;
+	int err;
+	const phys_addr_t vpu_dbg_offsets[NUM_VPU_BLOCKS] = { 0x00050000,
+							      0x00070000 };
+
+	for (i = 0; i < NUM_VPU_BLOCKS; i++) {
+		err = init_vpu_dbg_block(pva, &pva->vpu_dbg_blocks[i],
+					 vpu_dbg_offsets[i]);
+		if (err != 0)
+			return err;
+	}
+	return 0;
+}
+
+int pva_vpu_ocd_io(struct pva_vpu_dbg_block *block, u32 instr, const u32 *wdata,
+		   u32 nw, u32 *rdata, u32 nr)
+{
+	u32 i;
+
+	if ((nr > VPU_OCD_MAX_NUM_DATA_ACCESS) ||
+	    (nw > VPU_OCD_MAX_NUM_DATA_ACCESS)) {
+		pr_err("pva: too many vpu dbg reg read (%u) or write (%u)\n",
+		       nr, nw);
+		return -EINVAL;
+	}
+
+	/* write instruction first */
+	block_writel(block, 0, instr);
+
+	/*
+	 * write data
+	 * if there's 1 word, write to addr 0x4,
+	 * if there's 2 words, write to addr 2 * 0x4,
+	 * ...
+	 */
+	for (i = 0; i < nw; i++)
+		block_writel(block, nw * sizeof(u32), wdata[i]);
+
+	/*
+	 * read data
+	 * if there's 1 word, read from addr 0x4,
+	 * if there's 2 words, read from addr 2 * 0x4,
+	 * ...
+	 */
+	for (i = 0; i < nr; i++)
+		rdata[i] = block_readl(block, nr * sizeof(u32));
+
+	return 0;
+}
diff --git a/drivers/video/tegra/host/pva/pva_vpu_ocd.h b/drivers/video/tegra/host/pva/pva_vpu_ocd.h
new file mode 100644
index 00000000..99d18ab4
--- /dev/null
+++ b/drivers/video/tegra/host/pva/pva_vpu_ocd.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PVA_VPU_OCD_H
+#define PVA_VPU_OCD_H
+#include <linux/types.h>
+#include "pva.h"
+
+int pva_vpu_ocd_init(struct pva *pva);
+int pva_vpu_ocd_io(struct pva_vpu_dbg_block *block, u32 instr, const u32 *wdata,
+		   u32 nw, u32 *rdata, u32 nr);
+#endif // PVA_VPU_OCD_H
diff --git a/include/trace/events/nvhost_pva.h b/include/trace/events/nvhost_pva.h
new file mode 100644
index 00000000..3a243dd5
--- /dev/null
+++ b/include/trace/events/nvhost_pva.h
@@ -0,0 +1,254 @@
+/*
+ * Nvhost event logging to ftrace.
+ *
+ * Copyright (c) 2017-2022, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nvhost_pva
+
+#if !defined(_TRACE_NVHOST_PVA_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NVHOST_PVA_H
+
+#include <linux/tracepoint.h>
+
+
+TRACE_EVENT(nvhost_pva_write,
+
+	TP_PROTO(
+		u64 delta_time,
+		const char *name,
+		u8 major,
+		u8 minor,
+		u8 flags,
+		u8 sequence,
+		u32 arg1,
+		u32 arg2
+		),
+
+	TP_ARGS(
+		delta_time,
+		name,
+		major,
+		minor,
+		flags,
+		sequence,
+		arg1,
+		arg2
+		),
+
+	TP_STRUCT__entry(
+		__field(u64, delta_time)
+		__field(const char *, name)
+		__field(u8, major)
+		__field(u8, minor)
+		__field(u8, flags)
+		__field(u8, sequence)
+		__field(u32, arg1)
+		__field(u32, arg2)
+		),
+
+	TP_fast_assign(
+		__entry->delta_time = delta_time;
+		__entry->name = name;
+		__entry->major = major;
+		__entry->minor = minor;
+		__entry->flags = flags;
+		__entry->sequence = sequence;
+		__entry->arg1 = arg1;
+		__entry->arg2 = arg2;
+		),
+
+	TP_printk("time: %llu\t %s\t major: 0x%x\tminor: 0x%x\tflags: 0x%x\t"
+		"sequence: 0x%x\targ1: %u\targ2: %u",
+		__entry->delta_time, __entry->name, __entry->major,
+		__entry->minor, __entry->flags, __entry->sequence,
+		__entry->arg1, __entry->arg2)
+);
+
+TRACE_EVENT(nvhost_pva_task_stats,
+
+	TP_PROTO(
+		const char *name,
+		u64 queued_time,
+		u64 head_time,
+		u64 input_actions_complete,
+		u64 vpu_assigned_time,
+		u64 vpu_start_time,
+		u64 vpu_complete_time,
+		u64 complete_time,
+		u8 vpu_assigned,
+		u64 r5_overhead
+		),
+
+	TP_ARGS(
+		name,
+		queued_time,
+		head_time,
+		input_actions_complete,
+		vpu_assigned_time,
+		vpu_start_time,
+		vpu_complete_time,
+		complete_time,
+		vpu_assigned,
+		r5_overhead
+		),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(u64, queued_time)
+		__field(u64, head_time)
+		__field(u64, input_actions_complete)
+		__field(u64, vpu_assigned_time)
+		__field(u64, vpu_start_time)
+		__field(u64, vpu_complete_time)
+		__field(u64, complete_time)
+		__field(u8, vpu_assigned)
+		__field(u64, r5_overhead)
+		),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->queued_time = queued_time;
+		__entry->head_time = head_time;
+		__entry->input_actions_complete = input_actions_complete;
+		__entry->vpu_assigned_time = vpu_assigned_time;
+		__entry->vpu_start_time = vpu_start_time;
+		__entry->vpu_complete_time = vpu_complete_time;
+		__entry->complete_time = complete_time;
+		__entry->vpu_assigned = vpu_assigned;
+		__entry->r5_overhead = r5_overhead;
+		),
+
+	TP_printk("%s\tqueued_time: %llu\thead_time: %llu\t"
+		"input_actions_complete: %llu\tvpu_assigned_time: %llu\t"
+		"vpu_start_time: %llu\tvpu_complete_time: %llu\t"
+		"complete_time: %llu\tvpu_assigned: %d\t"
+		"r5_overhead: %llu us",
+		__entry->name, __entry->queued_time, __entry->head_time,
+		__entry->input_actions_complete, __entry->vpu_assigned_time,
+		__entry->vpu_start_time, __entry->vpu_complete_time,
+		__entry->complete_time, __entry->vpu_assigned,
+		__entry->r5_overhead)
+);
+
+TRACE_EVENT(nvhost_pva_task_vpu_perf,
+
+	TP_PROTO(
+		const char *name,
+		u32 index,
+		u32 count,
+		u32 sum,
+		u64 sum_squared,
+		u32 min,
+		u32 max
+		),
+
+	TP_ARGS(
+		name,
+		index,
+		count,
+		sum,
+		sum_squared,
+		min,
+		max
+		),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(u32, index)
+		__field(u32, count)
+		__field(u32, sum)
+		__field(u64, sum_squared)
+		__field(u32, min)
+		__field(u32, max)
+		),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->index = index;
+		__entry->count = count;
+		__entry->sum = sum;
+		__entry->sum_squared = sum_squared;
+		__entry->min = min;
+		__entry->max = max;
+		),
+
+	TP_printk("%s\tindex: %u\tcount: %u\taverage: %u\t"
+		"variance: %llu\tminimum: %u\t"
+		"maximum: %u",
+		__entry->name, __entry->index, __entry->count,
+		__entry->sum / __entry->count,
+		((u64)__entry->count * __entry->sum_squared -
+			(u64)__entry->sum * (u64)__entry->sum)
+			/ __entry->count / __entry->count,
+		__entry->min, __entry->max)
+);
+
+TRACE_EVENT(nvhost_pva_task_timestamp,
+
+	TP_PROTO(
+		const char *name,
+		u32 class,
+		u32 syncpoint_id,
+		u32 syncpoint_thresh,
+		u64 start_time,
+		u64 end_time
+		),
+
+	TP_ARGS(
+		name,
+		class,
+		syncpoint_id,
+		syncpoint_thresh,
+		start_time,
+		end_time
+		),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(u32, class)
+		__field(u32, syncpoint_id)
+		__field(u32, syncpoint_thresh)
+		__field(u64, start_time)
+		__field(u64, end_time)
+		),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->class = class;
+		__entry->syncpoint_id = syncpoint_id;
+		__entry->syncpoint_thresh = syncpoint_thresh;
+		__entry->start_time = start_time;
+		__entry->end_time = end_time;
+		),
+
+	TP_printk("name=%s, class=0x%02x, syncpoint_id=%u, syncpoint_thresh=%u, start_time=%llu, end_time=%llu",
+		__entry->name, __entry->class, __entry->syncpoint_id, __entry->syncpoint_thresh,
+		__entry->start_time, __entry->end_time)
+);
+
+#endif /*  _TRACE_NVHOST_PVA_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ../../include/trace/events
+
+#define TRACE_INCLUDE_FILE nvhost_pva
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/nvpva_ioctl.h b/include/uapi/linux/nvpva_ioctl.h
new file mode 100644
index 00000000..6e53abee
--- /dev/null
+++ b/include/uapi/linux/nvpva_ioctl.h
@@ -0,0 +1,611 @@
+/*
+ * Tegra PVA Driver ioctls
+ *
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program;  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVPVA_IOCTL_H__
+#define __NVPVA_IOCTL_H__
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define NVPVA_DEVICE_NODE "/dev/nvhost-ctrl-pva"
+/**
+ * Maximum length of the name of a symbol in a VPU ELF
+ */
+#define NVPVA_SYM_NAME_MAX_LEN 64U
+
+/**
+ * Invalid symbol ID
+ */
+#define NVPVA_INVALID_SYMBOL_ID 0xFFFF
+
+/*
+ * PVA specific error code
+ */
+
+#define NVPVA_ENOSLOT 102
+
+struct nvpva_ioctl_part {
+	uint64_t addr;
+	uint64_t size;
+};
+
+/*
+ * VPU REGISTER UNREGISTER command details
+ */
+
+struct nvpva_vpu_exe_register_in_arg {
+	struct nvpva_ioctl_part exe_data;
+};
+
+/* IOCTL magic number - seen available in ioctl-number.txt */
+struct nvpva_vpu_exe_register_out_arg {
+	/* Exe id assigned by KMD for the executable */
+	uint16_t exe_id;
+	/* Number of symbols */
+	uint32_t num_of_symbols;
+	/* Total size of symbols in executable */
+	uint32_t symbol_size_total;
+};
+
+union nvpva_vpu_exe_register_args {
+	struct nvpva_vpu_exe_register_in_arg in;
+	struct nvpva_vpu_exe_register_out_arg out;
+};
+
+struct nvpva_vpu_exe_unregister_in_arg {
+	/* Exe id assigned by KMD for the executable */
+	uint16_t exe_id;
+};
+
+union nvpva_vpu_exe_unregister_args {
+	struct nvpva_vpu_exe_unregister_in_arg in;
+};
+
+enum nvpva_vpu_elf_symbol_type_e {
+	/** Symbol type Invalid */
+	NVPVA_SYMBOL_TYPE_INVALID = 0U,
+	/** Symbol type Data */
+	NVPVA_SYMBOL_TYPE_DATA = 1U,
+	/** Symbol type VPU Config Table */
+	NVPVA_SYMBOL_TYPE_VPUC_TABLE = 2U,
+	/** Symbol type Pointer */
+	NVPVA_SYMBOL_TYPE_POINTER = 3U,
+	/** Symbol type System */
+	NVPVA_SYMBOL_TYPE_SYSTEM = 4U,
+	/** Symbol type Pointer which uses extended address apace */
+	NVPVA_SYMBOL_TYPE_POINTER_EX = 5U,
+	/** Symbol type upper limit */
+	NVPVA_SYMBOL_TYPE_MAX = 6U
+};
+/*
+ * VPU SYMBOL command details
+ */
+
+struct nvpva_symbol {
+	uint32_t size;
+	uint16_t id;
+	/* 1 = true; 0 = false */
+	uint8_t isPointer;
+};
+
+struct nvpva_sym_info {
+	/** Null-terminated string indicating the name of the symbol */
+	char sym_name[NVPVA_SYM_NAME_MAX_LEN];
+	/** Size (in bytes) of the symbol */
+	uint32_t sym_size;
+	/** Registered ID of the symbol*/
+	uint16_t sym_id;
+	/** Type of the symbol */
+	uint8_t sym_type;
+};
+
+struct nvpva_get_symbol_in_arg {
+	uint16_t exe_id;
+	struct nvpva_ioctl_part name; /*size including null*/
+};
+
+struct nvpva_get_symbol_out_arg {
+	struct nvpva_symbol symbol;
+};
+
+union nvpva_get_symbol_args {
+	struct nvpva_get_symbol_in_arg in;
+	struct nvpva_get_symbol_out_arg out;
+};
+
+struct nvpva_get_sym_tab_in_arg {
+	uint16_t exe_id;
+	struct   nvpva_ioctl_part tab;
+};
+
+union nvpva_get_sym_tab_args {
+	struct nvpva_get_sym_tab_in_arg in;
+};
+
+/*
+ * PIN UNPIN command details
+ */
+
+enum nvpva_pin_segment {
+	NVPVA_SEGMENT_PRIV = 1U,
+	NVPVA_SEGMENT_USER = 2U,
+	NVPVA_SEGMENT_CVSRAM = 3U,
+	NVPVA_SEGMENT_MAX
+};
+
+enum nvpva_pin_buf {
+	NVPVA_BUFFER_GEN = 0U,
+	NVPVA_BUFFER_SEM = 1U,
+};
+
+enum nvpva_pin_access {
+	NVPVA_ACCESS_RD = 1U,
+	NVPVA_ACCESS_WR = 2U,
+	NVPVA_ACCESS_RW = 3U,
+};
+
+struct nvpva_pin_handle {
+	uint64_t offset;
+	uint64_t size;
+	int32_t handle;
+	uint32_t access;
+	uint32_t segment;
+	uint32_t type;
+};
+
+struct nvpva_pin_in_arg {
+	struct nvpva_pin_handle pin;
+};
+
+struct nvpva_pin_out_arg {
+	uint32_t pin_id; /* Unique ID assigned by KMD for the Pin */
+	uint32_t error_code;
+};
+
+union nvpva_pin_args {
+	struct nvpva_pin_in_arg in;
+	struct nvpva_pin_out_arg out;
+};
+
+struct nvpva_unpin_in_arg {
+	uint32_t pin_id;
+};
+
+union nvpva_unpin_args {
+	struct nvpva_unpin_in_arg in;
+};
+
+/*
+ * TASK SUBMIT command details
+ */
+
+enum nvpva_flags {
+	NVPVA_AFFINITY_VPU0 = 1U,
+	NVPVA_AFFINITY_VPU1 = 1U << 1U,
+	NVPVA_AFFINITY_VPU_ANY = NVPVA_AFFINITY_VPU0 | NVPVA_AFFINITY_VPU1,
+	NVPVA_PRE_BARRIER_TASK_TRUE = 1U << 2U,
+	NVPVA_ERR_MASK_ILLEGAL_INSTR = 1U << 3U,
+	NVPVA_ERR_MASK_DIVIDE_BY_0 = 1U << 4U,
+	NVPVA_ERR_MASK_FP_NAN = 1U << 5U,
+	NVPVA_GR_CHECK_EXE_FLAG = 1U << 6U
+};
+
+enum nvpva_fence_action_type {
+	NVPVA_FENCE_PRE = 1U,
+	NVPVA_FENCE_SOT_R5 = 2U,
+	NVPVA_FENCE_SOT_VPU = 3U,
+	NVPVA_FENCE_EOT_VPU = 4U,
+	NVPVA_FENCE_EOT_R5 = 5U,
+	NVPVA_FENCE_POST = 6U,
+	NVPVA_MAX_FENCE_TYPES = 7U,
+};
+
+enum nvpva_fence_obj_type {
+	NVPVA_FENCE_OBJ_SYNCPT = 0U,
+	NVPVA_FENCE_OBJ_SEM = 1U,
+	/* Below types are not being used in QNX KMD for now */
+	NVPVA_FENCE_OBJ_SEMAPHORE_TS = 2U,
+	NVPVA_FENCE_OBJ_SYNC_FD = 3U,
+};
+
+enum nvpva_symbol_config {
+	NVPVA_SYMBOL_PARAM = 0U,
+	NVPVA_SYMBOL_POINTER = 1U,
+	NVPVA_SYMBOL_POINTER_EX = 2U,
+};
+
+enum nvpva_hwseq_trigger_mode {
+	NVPVA_HWSEQTM_VPUTRIG = 0U,
+	NVPVA_HWSEQTM_DMATRIG = 1U,
+};
+
+enum nvpva_system_test_id {
+	NVPVA_STRESS_POWER	= 0U,
+	NVPVA_STRESS_POWER_DIDT	= 1U,
+	NVPVA_STRESS_TIMING	= 2U,
+	NVPVA_MAX_TEST_ID	= 2U,
+};
+
+#define NVPVA_MEM_REGISTERED_SIZE (0U)
+struct nvpva_mem {
+	uint32_t pin_id;
+	uint32_t offset;
+	/* size=NVPVA_MEM_REGISTERED_SIZE
+	 *considered as entire pinned area
+	 */
+	uint32_t size;
+};
+
+struct nvpva_fence_obj_syncpt {
+	uint32_t id;
+	uint32_t value;
+};
+
+struct nvpva_fence_obj_sem {
+	struct nvpva_mem mem;
+	uint32_t value;
+};
+
+struct nvpva_fence_obj_syncfd {
+	uint32_t fd;
+};
+
+union nvpva_fence_obj {
+	struct nvpva_fence_obj_syncpt syncpt;
+	struct nvpva_fence_obj_sem sem;
+	struct nvpva_fence_obj_syncfd syncfd;
+};
+
+struct nvpva_submit_fence {
+	uint32_t type;
+	uint32_t reserved;
+	union nvpva_fence_obj obj;
+};
+
+struct nvpva_fence_action {
+	uint32_t type;
+	uint32_t reserved;
+	/* For syncpt, ID is the per-queue ID allocated by KMD */
+	struct nvpva_submit_fence fence;
+	/* Buffer to capture event timestamp */
+	struct nvpva_mem timestamp_buf;
+};
+
+struct nvpva_pointer_symbol {
+	/* Base address of pinned area, where
+	 * lower 32bits filled with pin_id by UMD and
+	 * at KMD will replace it with actual base address.
+	 */
+	uint64_t base;
+	/* Offset in pinned area */
+	uint32_t offset;
+	/* Size of pinned area, filled by KMD */
+	uint32_t size;
+};
+
+struct nvpva_pointer_symbol_ex {
+	/* Base address of pinned area, where
+	 * lower 32bits filled with pin_id by UMD and
+	 * at KMD will replace it with actual base address.
+	 */
+	uint64_t base;
+	/* Offset in pinned area */
+	uint64_t offset;
+	/* Size of pinned area, filled by KMD */
+	uint64_t size;
+};
+
+/* Used to pass both param and pointer type symbols.
+ * Based on nvpva_symbol_config selection the data in payload
+ * pointed by offset will differ.
+ * For NVPVA_SYMBOL_PARAM, payload data is raw data.
+ * For NVPVA_SYMBOL_POINTER, data is of type nvpva_pointer_symbol.
+ */
+struct nvpva_symbol_param {
+	uint32_t config;		/* Type of symbol configuration */
+	uint32_t offset;		/* Offset of symbol data in payload */
+	struct nvpva_symbol symbol;	/* Symbol to be configured */
+};
+
+/* NOTE: Redefining the user side structure here
+ * This is done to allow UMD to pass the descriptor as it is and
+ * to handle the (user struct -> hw struct) coversion at KMD side.
+ * KMD needs redefinition to avoid circular dependency.
+ *
+ * An update in user structure would need corresponding change here
+ */
+struct nvpva_dma_descriptor {
+	uint32_t srcPtr;
+	uint32_t dstPtr;
+	uint32_t dst2Ptr;
+	uint64_t src_offset;
+	uint64_t dst_offset;
+	uint64_t dst2Offset;
+	uint64_t surfBLOffset;
+	uint16_t tx;
+	uint16_t ty;
+	uint16_t srcLinePitch;
+	uint16_t dstLinePitch;
+	int32_t srcAdv1;
+	int32_t dstAdv1;
+	int32_t srcAdv2;
+	int32_t dstAdv2;
+	int32_t srcAdv3;
+	int32_t dstAdv3;
+	uint8_t srcRpt1;
+	uint8_t dstRpt1;
+	uint8_t srcRpt2;
+	uint8_t dstRpt2;
+	uint8_t srcRpt3;
+	uint8_t dstRpt3;
+	uint8_t linkDescId;
+	uint8_t px;
+	uint32_t py;
+	uint8_t srcCbEnable;
+	uint8_t dstCbEnable;
+	uint32_t srcCbStart;
+	uint32_t dstCbStart;
+	uint32_t srcCbSize;
+	uint32_t dstCbSize;
+	uint8_t trigEventMode;
+	uint8_t trigVpuEvents;
+	uint8_t descReloadEnable;
+	uint8_t srcTransferMode;
+	uint8_t dstTransferMode;
+	uint8_t srcFormat;
+	uint8_t dstFormat;
+	uint8_t bytePerPixel;
+	uint8_t pxDirection;
+	uint8_t pyDirection;
+	uint8_t boundaryPixelExtension;
+	uint8_t transTrueCompletion;
+	uint8_t prefetchEnable;
+};
+
+/* NOTE: Redefining the user side structure here
+ * This is done to allow UMD to pass the channel info as it is and
+ * to handle the (user struct -> hw struct) coversion at KMD side.
+ * KMD needs redefinition to avoid circular dependency.
+ *
+ * An update in user structure would need corresponding change here
+ */
+struct nvpva_dma_channel {
+	uint8_t descIndex;
+	uint8_t blockHeight;
+	uint16_t adbSize;
+	uint8_t vdbSize;
+	uint16_t adbOffset;
+	uint8_t vdbOffset;
+	uint32_t outputEnableMask;
+	uint32_t padValue;
+	uint8_t reqPerGrant;
+	uint8_t prefetchEnable;
+	uint8_t chRepFactor;
+	uint8_t hwseqStart;
+	uint8_t hwseqEnd;
+	uint8_t hwseqEnable;
+	uint8_t hwseqTraversalOrder;
+	uint8_t hwseqTxSelect;
+	uint8_t hwseqTriggerDone;
+};
+
+/**
+ *
+ * @brief DMA MISR configuration information. This information is used by R5
+ * to program MISR registers if a task requests MISR computation on its
+ * output DMA channels.
+ *
+ */
+struct nvpva_dma_misr {
+	/* Enable flag for MISR. Set to 0 if MISR check
+	 * is not needed for the task, non-zero otherwise
+	 */
+	uint32_t enable;
+	/* Reference value for CRC computed on write
+	 * addresses, i.e., MISR 1
+	 */
+	uint32_t ref_addr;
+	/* Seed value for address CRC */
+	uint32_t seed_crc0;
+	/* Reference value for CRC computed on first
+	 * 256-bits of AXI write data
+	 */
+	uint32_t ref_data_1;
+	/* Seed value for write data CRC*/
+	uint32_t seed_crc1;
+	/* Reference value for CRC computed on
+	 * second 256-bits of AXI write data
+	 */
+	uint32_t ref_data_2;
+	/* Bitmap indicating channels participating
+	 * in MISR checks
+	 */
+	uint32_t channel_mask;
+	/* Bitmap indicating descriptors participating
+	 * in MISR checks. These are the descriptors on
+	 * channels identified by the channel_mask field
+	 * that perform write through AXI interface to
+	 * MC or L2SRAM
+	 */
+	uint64_t descriptor_mask;
+	/*
+	 * MISR timeout value configured in DMA common register
+	 * @ref PVA_DMA_COMMON_MISR_ENABLE. Timeout is caclutated as
+	 * number of AXI clock cycles.
+	 */
+	uint32_t misr_timeout;
+};
+
+/**
+ * Used to pass config for Hardware Sequencer (HWSeq).
+ * For HWSeq operations, all DMA channels will be configured
+ * based on the selection of hardware sequencer trigger mode.
+ * For NVPVA_HWSEQTM_VPUTRIG, VPU trigger mode will be used.
+ * For NVPVA_HWSEQTM_DMATRIG, DMA trigger mode will be used.
+ */
+struct nvpva_hwseq_config {
+	uint32_t hwseqTrigMode;
+	uint32_t reserved;
+	struct nvpva_mem hwseqBuf;
+};
+
+struct nvpva_ioctl_task {
+	uint16_t exe_id;
+	uint32_t flags;
+	uint32_t l2_alloc_size; /* Not applicable for Xavier */
+	struct nvpva_ioctl_part prefences;
+	struct nvpva_ioctl_part user_fence_actions;
+	struct nvpva_ioctl_part input_task_status;
+	struct nvpva_ioctl_part output_task_status;
+	struct nvpva_ioctl_part dma_descriptors;
+	struct nvpva_ioctl_part dma_channels;
+	struct nvpva_ioctl_part dma_misr_config;
+	struct nvpva_ioctl_part hwseq_config;
+	struct nvpva_ioctl_part symbols;
+	struct nvpva_ioctl_part symbol_payload;
+};
+
+struct nvpva_ioctl_submit_in_arg {
+	uint32_t version;
+	uint64_t submission_timeout_us;
+	uint64_t execution_timeout_us;
+	struct nvpva_ioctl_part tasks;
+};
+
+struct nvpva_submit_in_arg_s {
+	uint32_t version;
+	uint16_t num_tasks;
+	uint64_t submission_timeout_us;
+	uint64_t execution_timeout_us;
+};
+
+union nvpva_ioctl_submit_args {
+	struct nvpva_ioctl_submit_in_arg in;
+};
+
+struct nvpva_set_vpu_print_buffer_size_in_arg {
+	uint32_t size;
+};
+
+union nvpva_set_vpu_print_buffer_size_args {
+	struct nvpva_set_vpu_print_buffer_size_in_arg in;
+};
+
+/* There are 64 DMA descriptors in T19x and T23x. But R5 FW reserves
+ * 4 DMA descriptors for internal use.
+ */
+#define NVPVA_TASK_MAX_DMA_DESCRIPTORS (60U)
+/*TODO: Remove NVPVA_TASK_MAX_DMA_CHANNELS */
+/*There are 14 DMA channels in T19x and 16 DMA channels in T23X.
+ * R5 FW reserves one DMA channel for internal use.
+ */
+#define NVPVA_TASK_MAX_DMA_CHANNELS 16U
+#define NVPVA_TASK_MAX_DMA_CHANNELS_T19X (13U)
+#define NVPVA_TASK_MAX_DMA_CHANNELS_T23X (15U)
+#define NVPVA_NOOP_EXE_ID		65535
+#define NVPVA_SUBMIT_MAX_TASKS		256U
+
+#define NVPVA_IOCTL_MAGIC 'Q'
+
+#define NVPVA_IOCTL_REGISTER_VPU_EXEC \
+	_IOWR(NVPVA_IOCTL_MAGIC, 1, union nvpva_vpu_exe_register_args)
+
+#define NVPVA_IOCTL_UNREGISTER_VPU_EXEC \
+	_IOW(NVPVA_IOCTL_MAGIC, 2, union nvpva_vpu_exe_unregister_args)
+
+#define NVPVA_IOCTL_GET_SYMBOL_ID \
+	_IOWR(NVPVA_IOCTL_MAGIC, 3, union nvpva_get_symbol_args)
+
+#define NVPVA_IOCTL_PIN \
+	_IOWR(NVPVA_IOCTL_MAGIC, 4, union nvpva_pin_args)
+
+#define NVPVA_IOCTL_UNPIN \
+	_IOW(NVPVA_IOCTL_MAGIC, 5, union nvpva_unpin_args)
+
+#define NVPVA_IOCTL_SUBMIT \
+	_IOW(NVPVA_IOCTL_MAGIC, 6, union nvpva_ioctl_submit_args)
+
+#define NVPVA_IOCTL_NOP \
+	_IOW(NVPVA_IOCTL_MAGIC, 7)
+
+#define NVPVA_IOCTL_ACQUIRE_QUEUE \
+	_IOW(NVPVA_IOCTL_MAGIC, 8)
+
+#define NVPVA_IOCTL_RELEASE_QUEUE \
+	_IOW(NVPVA_IOCTL_MAGIC, 9)
+
+#define NVPVA_IOCTL_GET_SYM_TAB \
+	_IOWR(NVPVA_IOCTL_MAGIC, 10, union nvpva_get_sym_tab_args)
+
+#define NVPVA_IOCTL_SET_VPU_PRINT_BUFFER_SIZE \
+	_IOW(NVPVA_IOCTL_MAGIC, 11, union nvpva_set_vpu_print_buffer_size_args)
+
+#define NVPVA_IOCTL_NUMBER_MAX 11
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define NVPVA_IOCTL_MAX_SIZE                                 \
+	    MAX(sizeof(union nvpva_vpu_exe_register_args), \
+	    MAX(sizeof(union nvpva_vpu_exe_unregister_args), \
+	    MAX(sizeof(union nvpva_get_symbol_args), \
+	    MAX(sizeof(union nvpva_pin_args), \
+	    MAX(sizeof(union nvpva_unpin_args), \
+	    MAX(sizeof(union nvpva_ioctl_submit_args), \
+	    MAX(sizeof(union nvpva_get_sym_tab_args), \
+	    MAX(sizeof(union nvpva_set_vpu_print_buffer_size_args), \
+	    0))))))))
+
+/* NvPva Task param limits */
+#define NVPVA_TASK_MAX_PREFENCES 8U
+#define NVPVA_TASK_MAX_FENCEACTIONS 4U
+#define NVPVA_TASK_MAX_INPUT_STATUS 8U
+#define NVPVA_TASK_MAX_OUTPUT_STATUS 8U
+#define NVPVA_TASK_MAX_SYMBOLS 128U
+/* VMEM configurable size */
+#define NVPVA_TASK_MAX_PAYLOAD_SIZE 8192U
+#define NVPVA_TASK_MAX_SIZE                                                  \
+	(sizeof(struct nvpva_submit_task_header) +                           \
+	NVPVA_TASK_MAX_PREFENCES * sizeof(struct nvpva_submit_fence) +       \
+	NVPVA_TASK_MAX_FENCEACTIONS *                                        \
+		NVPVA_MAX_FENCE_TYPES * sizeof(struct nvpva_fence_action) +  \
+	NVPVA_TASK_MAX_INPUT_STATUS * sizeof(struct nvpva_mem) +             \
+	NVPVA_TASK_MAX_OUTPUT_STATUS * sizeof(struct nvpva_mem) +            \
+	NVPVA_TASK_MAX_DMA_DESCRIPTORS *                                     \
+		sizeof(struct nvpva_dma_descriptor) +                        \
+	NVPVA_TASK_MAX_DMA_CHANNELS * sizeof(struct nvpva_dma_channel) +     \
+	sizeof(struct nvpva_hwseq_config) +                                  \
+	NVPVA_TASK_MAX_SYMBOLS * sizeof(struct nvpva_symbol_param) +         \
+	NVPVA_TASK_MAX_PAYLOAD_SIZE)
+
+/* NvPva submit param limits */
+#define NVPVA_SUBMIT_MAX_SIZE                                                \
+	(NVPVA_SUBMIT_MAX_TASKS * NVPVA_TASK_MAX_SIZE +                      \
+	sizeof(struct nvpva_submit_in_arg_s))
+
+struct pva_ocd_ioctl_vpu_io_param {
+	uint32_t instr;
+	uint32_t n_write;
+	uint32_t n_read;
+	uint32_t data[7];
+};
+
+#define PVA_OCD_MAGIC 'V'
+
+#define PVA_OCD_IOCTL_VPU_IO                                                   \
+	_IOWR(PVA_OCD_MAGIC, 1, struct pva_ocd_ioctl_vpu_io_param)
+
+#endif /* __NVPVA_IOCTL_H__ */
diff --git a/kernel-src-files-copy-list.txt b/kernel-src-files-copy-list.txt
index 01be0fa5..3aefe972 100644
--- a/kernel-src-files-copy-list.txt
+++ b/kernel-src-files-copy-list.txt
@@ -2,7 +2,3 @@
 nvidia/drivers/platform/tegra/cvnas drivers/platform/tegra
 nvidia/include/linux/cvnas.h include/linux/cvnas.h
 
-# Files/directories for NVPVA
-nvidia/drivers/video/tegra/host/pva drivers/video/tegra/host
-nvidia/include/trace/events/nvhost_pva.h include/trace/events/nvhost_pva.h
-nvidia/include/uapi/linux/nvpva_ioctl.h include/uapi/linux/nvpva_ioctl.h