pva: deploy V3 KMD

Jira PVAAS-17785 Change-Id: I8ebc4c49aec209c5f82c6725605b62742402500a Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3299880 Tested-by: Nan Wang <nanwa@nvidia.com> Reviewed-by: Vishwas M <vishwasm@nvidia.com> Reviewed-by: Mohnish Jain <mohnishj@nvidia.com> GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com> Reviewed-by: Nan Wang <nanwa@nvidia.com>
2025-12-22 17:25:35 +03:00 · 2025-02-10 14:20:57 -08:00
parent b5d768302a
commit b63a822a1b
113 changed files with 22508 additions and 0 deletions
--- a/drivers/video/tegra/host/pva/Makefile
+++ b/drivers/video/tegra/host/pva/Makefile
@@ -0,0 +1,104 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: GPL-2.0-only
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms and conditions of the GNU General Public License,
 # version 2, as published by the Free Software Foundation.
 #
 # This program is distributed in the hope it will be useful, but WITHOUT
 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
 # more details.
 ifndef CONFIG_TEGRA_SYSTEM_TYPE_ACK
 ifeq ($(CONFIG_TEGRA_OOT_MODULE),m)
 ifeq ($(findstring ack_src,$(NV_BUILD_KERNEL_OPTIONS)),)
 obj-m := nvhost-pva.o
 PVA_SYS_DIR := .
 PVA_SYS_ABSDIR := $(srctree.nvidia-oot)/drivers/video/tegra/host/pva
 ###### Begin generated section ######
 pva_objs += \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_block_allocator.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_cmdbuf.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_context.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_debugfs.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_device.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_binding.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_validate.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_write.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_fw_debug.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_fw_profiler.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_hwseq_validate.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_msg.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_op_handler.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_pm.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_queue.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_resource_table.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_sha256.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_boot.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_elf_parser.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_executable.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_hwpm.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_isr.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_misc.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_submitter.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_t23x.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_t26x.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_tegra_stats.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_utils.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_vpu_app_auth.o \
    $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_vpu_ocd.o \
    $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_debugfs.o \
    $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_device.o \
    $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_device_memory.o \
    $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_driver.o \
    $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_ioctl.o \
    $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_isr.o \
    $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_misc.o \
    $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_smmu.o \
    $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_vpu_app_auth.o \
 pva_inc_flags += \
    -I$(PVA_SYS_ABSDIR)/src/fw/baremetal/include \
    -I$(PVA_SYS_ABSDIR)/src/fw/include \
    -I$(PVA_SYS_ABSDIR)/src/include \
    -I$(PVA_SYS_ABSDIR)/src/kmd/common \
    -I$(PVA_SYS_ABSDIR)/src/kmd/common/shim \
    -I$(PVA_SYS_ABSDIR)/src/kmd/include \
    -I$(PVA_SYS_ABSDIR)/src/kmd/linux/include \
    -I$(PVA_SYS_ABSDIR)/src/libs/pva/include \
 pva_def_flags += \
    -DPVA_BUILD_MODE=PVA_BUILD_MODE_L4T \
    -DPVA_BUILD_MODE_BAREMETAL=5 \
    -DPVA_BUILD_MODE_L4T=3 \
    -DPVA_BUILD_MODE_NATIVE=1 \
    -DPVA_BUILD_MODE_QNX=2 \
    -DPVA_BUILD_MODE_SIM=4 \
    -DPVA_DEV_MAIN_COMPATIBLE=1 \
    -DPVA_ENABLE_CUDA=1 \
    -DPVA_IS_DEBUG=0 \
    -DPVA_SAFETY=0 \
    -DPVA_SKIP_SYMBOL_TYPE_CHECK \
    -DPVA_SUPPORT_XBAR_RAW=1 \
    -Dpva_kmd_linux_dummy_EXPORTS \
 ###### End generated section ######
 nvhost-pva-objs += $(pva_objs)
 ccflags-y += $(pva_inc_flags)
 ccflags-y += $(pva_def_flags)
 ccflags-y += -std=gnu11
 endif
 endif
 endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-bit.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-bit.h
@@ -0,0 +1,196 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA Corporation is strictly prohibited.
 */
 /*
 * Unit: Utility Unit
 * SWUD Document:
 * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
 */
 #ifndef PVA_BIT_H
 #define PVA_BIT_H
 /*
 * Bit manipulation macros
 */
 /**
 * @brief Number of bits per byte.
 */
 #define PVA_BITS_PER_BYTE (8UL)
 /**
 * @defgroup PVA_BIT8_HELPER
 *
 * @brief Bit Manipulation macros for number which is of type uint8_t.
 *        Parameter that convey the bit position should be in the range
 *        of 0 to 7 inclusive.
 *        Parameter with respect to MSB and LSB should satisfy the conditions
 *        of both being in the range of 0 to 7 inclusive with MSB greater than LSB.
 * @{
 */
 /**
 * @brief Macro to set a given bit position in a variable of type uint8_t.
 */
 #define PVA_BIT8(_b_) ((uint8_t)(((uint8_t)1U << (_b_)) & 0xffu))
 //! @cond DISABLE_DOCUMENTATION
 /**
 * @brief Macro used to generate a bit-mask from MSB to LSB in a uint8_t variable.
 *        This macro sets all the bits from MSB to LSB.
 */
 #define PVA_MASK8(_msb_, _lsb_)                                                \
 	((uint8_t)((((PVA_BIT8(_msb_) - 1U) | PVA_BIT8(_msb_)) &               \
 		    ~(PVA_BIT8(_lsb_) - 1U)) &                                 \
 		   0xffu))
 //! @endcond
 /** @} */
 /**
 * @defgroup PVA_BIT16_HELPER
 *
 * @brief Bit Manipulation macros for number which is of type uint16_t.
 *        Parameter that convey the bit position should be in the range
 *        of 0 to 15 inclusive.
 *        Parameter with respect to MSB and LSB should satisfy the conditions
 *        of both being in the range of 0 to 15 inclusive with MSB greater than LSB.
 * @{
 */
 /**
 * @brief Macro to set a given bit position in a 16 bit number.
 */
 #define PVA_BIT16(_b_) ((uint16_t)(((uint16_t)1U << (_b_)) & 0xffffu))
 /**
 * @brief Macro to mask a range(MSB to LSB) of bit positions in a 16 bit number.
 * This will set all the bit positions in specified range.
 */
 #define PVA_MASK16(_msb_, _lsb_)                                               \
 	((uint16_t)((((PVA_BIT16(_msb_) - 1U) | PVA_BIT16(_msb_)) &            \
 		     ~(PVA_BIT16(_lsb_) - 1U)) &                               \
 		    0xffffu))
 //! @cond DISABLE_DOCUMENTATION
 /**
 * @brief Macro to extract bits from a 16 bit number.
 * The bits are extracted from the range provided and the extracted
 * number is finally type-casted to the type provided as argument.
 */
 #define PVA_EXTRACT16(_x_, _msb_, _lsb_, _type_)                               \
 	((_type_)(((_x_)&PVA_MASK16((_msb_), (_lsb_))) >> (_lsb_)))
 //! @endcond
 /**
 * @brief Macro used to generate a bit-mask from MSB to LSB in a uint16_t variable.
 *        This macro sets all the bits from MSB to LSB.
 */
 #define PVA_INSERT16(_x_, _msb_, _lsb_)                                        \
 	((((uint16_t)(_x_)) << (_lsb_)) & PVA_MASK16((_msb_), (_lsb_)))
 /** @} */
 /**
 * @defgroup PVA_BIT32_HELPER
 *
 * @brief Bit Manipulation macros for number which is of type uint32_t.
 *        Parameter that convey the bit position should be in the range
 *        of 0 to 31 inclusive.
 *        Parameter with respect to MSB and LSB should satisfy the conditions
 *        of both being in the range of 0 to 31 inclusive with MSB greater than LSB.
 * @{
 */
 /**
 * @brief Macro to set a given bit position in a 32 bit number.
 */
 #define PVA_BIT(_b_) ((uint32_t)(((uint32_t)1U << (_b_)) & 0xffffffffu))
 /**
 * @brief Macro to mask a range(MSB to LSB) of bit positions in a 32 bit number.
 * This will set all the bit positions in specified range.
 */
 #define PVA_MASK(_msb_, _lsb_)                                                 \
 	(((PVA_BIT(_msb_) - 1U) | PVA_BIT(_msb_)) & ~(PVA_BIT(_lsb_) - 1U))
 /**
 * @brief Macro to extract bits from a 32 bit number.
 * The bits are extracted from the range provided and the extracted
 * number is finally type-casted to the type provided as argument.
 */
 #define PVA_EXTRACT(_x_, _msb_, _lsb_, _type_)                                 \
 	((_type_)(((_x_)&PVA_MASK((_msb_), (_lsb_))) >> (_lsb_)))
 /**
 * @brief Macro to insert a range of bits from a given 32 bit number.
 * Range of bits are derived from the number passed as argument.
 */
 #define PVA_INSERT(_x_, _msb_, _lsb_)                                          \
 	((((uint32_t)(_x_)) << (_lsb_)) & PVA_MASK((_msb_), (_lsb_)))
 /** @} */
 /**
 * @defgroup PVA_BIT64_HELPER
 *
 * @brief Bit Manipulation macros for number which is of type uint64_t.
 *        Parameter that convey the bit position should be in the range
 *        of 0 to 63 inclusive.
 *        Parameter with respect to MSB and LSB should satisfy the conditions
 *        of both being in the range of 0 to 63 inclusive with MSB greater than LSB.
 * @{
 */
 /**
 * @brief Macro to set a given bit position in a 64 bit number.
 */
 #define PVA_BIT64(_b_)                                                         \
 	((uint64_t)(((uint64_t)1UL << (_b_)) & 0xffffffffffffffffu))
 /**
 * @brief Macro used to generate a bit-mask from (MSB to LSB) in a uint64_t variable.
 *        This macro sets all the bits from MSB to LSB.
 */
 #define PVA_MASK64(_msb_, _lsb_)                                               \
 	(((PVA_BIT64(_msb_) - (uint64_t)1U) | PVA_BIT64(_msb_)) &              \
 	 ~(PVA_BIT64(_lsb_) - (uint64_t)1U))
 /**
 * @brief Macro to extract bits from a 64 bit number.
 * The bits are extracted from the range provided and the extracted
 * number is finally type-casted to the type provided as argument.
 */
 #define PVA_EXTRACT64(_x_, _msb_, _lsb_, _type_)                               \
 	((_type_)(((_x_)&PVA_MASK64((_msb_), (_lsb_))) >> (_lsb_)))
 /**
 * @brief Macro to insert a range of bits into a 64 bit number.
 * The bits are derived from the number passed as argument.
 */
 #define PVA_INSERT64(_x_, _msb_, _lsb_)                                        \
 	((((uint64_t)(_x_)) << (_lsb_)) & PVA_MASK64((_msb_), (_lsb_)))
 /**
 * @brief Macro to pack a 64 bit number.
 * A 64 bit number is generated that has first 32 MSB derived from
 * upper 32 bits of passed argument and has lower 32MSB derived from
 * lower 32 bits of another passed argument.
 */
 #define PVA_PACK64(_l_, _h_)                                                   \
 	(PVA_INSERT64((_h_), 63U, 32U) | PVA_INSERT64((_l_), 31U, 0U))
 /**
 * @brief Macro to extract the higher 32 bits from a 64 bit number.
 */
 #define PVA_HI32(_x_) ((uint32_t)(((_x_) >> 32U) & 0xFFFFFFFFU))
 /**
 * @brief Macro to extract the lower 32 bits from a 64 bit number.
 */
 #define PVA_LOW32(_x_) ((uint32_t)((_x_)&0xFFFFFFFFU))
 /** @} */
 #endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-checkpoint.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-checkpoint.h
@@ -0,0 +1,316 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA Corporation is strictly prohibited.
 */
 /*
 * Unit: Utility Unit
 * SWUD Document:
 * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
 */
 #ifndef PVA_CHECKPOINT_H
 #define PVA_CHECKPOINT_H
 /**
 * @file pva-checkpoint.h
 * @brief Defines macros to create a checkpoint
 */
 /**
 * @defgroup PVA_CHECKPOINT_MACROS Macros to define a checkpoint
 *
 * @brief Checkpoints are the 32-bit status values that can be written to status
 * register during R5's execution. The 32-bit value is divided into four 8-bit values.
 * These are:
 *  - major code: major aspect (usually a unit) of the uCode. Bit Position: [31:24]
 *                Valid values are defined at @ref PVA_CHECKPOINT_MAJOR_CODES.
 *  - minor code: minor aspect (usually a function) of the uCode.The interpretation of the
 *                minor value is determined by the major value. Bit Position: [23:16]
 *  - flags: flags indicating type of the checkpoint such as error checkpoint,
 *           performance checkpoint, checkpoint indicating start of an operation,
 *           checkpoint indicating end of an operation etc. Bit Position: [15:8]
 *           Valid values are defined at @ref PVA_CHECKPOINT_FLAGS.
 *  - sequence: disambiguate multiple checkpoints within a minor code or to convey additional
 *              information. The interpretation of the sequence value is determined by both the
 *              major and minor values. Bit Position: [7:0]
 *              Valid values are any values from 0 to UINT8_MAX
 * @{
 */
 /**
 * @defgroup PVA_CHECKPOINT_MAJOR_CODES
 * @brief Macros to define the major code field of the checkpoint @ingroup PVA_CHECKPOINT_MACROS
 * @{
 */
 /*
 * Operational major codes
 */
 /**
 * @brief Major code for PVA during Boot.
 */
 #define PVA_CHK_MAIN (0x01U)
 //! @endcond
 /**
 * @brief Error related major codes
 */
 #define PVA_CHK_ABORT (0xFFU)
 /** @} */
 /**
 * @defgroup PVA_CHECKPOINT_HW_STATE_MINOR_CODES
 * @brief Macros to define the minor code field of the checkpoints with major code PVA_CHK_HW_STATE
 * @ingroup PVA_CHECKPOINT_MACROS
 *
 * @{
 */
 /**
 * @brief Minor code while doing a MMIO HW state check.
 */
 #define PVA_CHK_HW_STATE_MMIO (0x01U)
 /**
 * @brief Minor code while doing a VIC HW state check.
 */
 #define PVA_CHK_HW_STATE_VIC (0x02U)
 /**
 * @brief Minor code while doing a ARM register HW state check.
 */
 #define PVA_CHK_HW_STATE_ARM (0x03U)
 /**
 * @brief Minor code while doing a MPU HW state check.
 */
 #define PVA_CHK_HW_STATE_MPU (0x04U)
 /**
 * @brief Minor code while doing a DMA HW state check.
 */
 #define PVA_CHK_HW_STATE_DMA (0x05U)
 /**
 * @brief Minor code while doing a VIC HW state check.
 */
 #define PVA_CHK_HW_STATE_GOLDEN (0x06U)
 /** @} */
 /** @} */
 /**
 * @defgroup PVA_ABORT_REASONS
 *
 * @brief Macros to define the abort reasons
 * @{
 */
 /**
 * @brief Minor code for abort due to assert.
 */
 #define PVA_ABORT_ASSERT (0x01U)
 /**
 * @brief Minor code for abort in case pva main call fails.
 */
 #define PVA_ABORT_FALLTHRU (0x02U)
 /**
 * @brief Minor code for abort in case of fatal IRQ.
 */
 #define PVA_ABORT_IRQ (0x05U)
 /**
 * @brief Minor code for abort in case of MPU failure.
 */
 #define PVA_ABORT_MPU (0x06U)
 /**
 * @brief Minor code for abort in case of ARM exception.
 */
 #define PVA_ABORT_EXCEPTION (0x07U)
 /**
 * @brief Minor code for abort in case of un-supported SID read.
 */
 #define PVA_ABORT_UNSUPPORTED (0x09U)
 /**
 * @brief Minor code for abort in case of DMA failures.
 */
 #define PVA_ABORT_DMA_TASK (0x0cU)
 /**
 * @brief Minor code for abort in case of WDT failures.
 * Note: This code is not reported to HSM.
 */
 #define PVA_ABORT_WATCHDOG (0x0eU)
 //! @endcond
 /**
 * @brief Minor code for abort in case of VPU init failures.
 */
 #define PVA_ABORT_VPU (0x0fU)
 /**
 * @brief Minor code for abort in case of DMA MISR setup failures.
 */
 #define PVA_ABORT_DMA (0x10U)
 //! @cond DISABLE_DOCUMENTATION
 /**
 * @brief Minor code for abort in case of Mbox errors.
 * Note: This is used only in T19x
 */
 #define PVA_ABORT_MBOX_WAR (0x12U)
 //! @endcond
 /**
 * @brief Minor code for abort in case of AISR errors.
 */
 #define PVA_ABORT_AISR_QUEUE (0x14U)
 /**
 * @brief Minor code for abort in case of bad task.
 */
 #define PVA_ABORT_BAD_TASK (0x15U)
 //! @cond DISABLE_DOCUMENTATION
 /**
 * @brief Minor code for abort in case of PPE init failures.
 * Note: This is only used in T26x
 */
 #define PVA_ABORT_PPE (0x16U)
 //! @endcond
 /**
 * @brief Minor code for abort in case of RAMIC failures.
 */
 #define PVA_ABORT_RAMIC (0x20U)
 /**
 * @brief Minor Code for SEC for safety errors.
 * Note: This code is not reported to HSM.
 */
 #define PVA_ABORT_SEC_SERR (0x21U)
 /**
 * @brief Minor Code for SEC for functional errors.
 * Note: This code is not reported to HSM.
 */
 #define PVA_ABORT_SEC_FERR (0x22U)
 /**
 * @brief Minor code for abort in case of firewall decode error.
 */
 #define PVA_ABORT_L2SRAM_FWDEC (0x23U)
 /**
 * @brief Minor code for abort in case of kernel panic.
 */
 #define PVA_ABORT_KERNEL_PANIC (0x30U)
 /**
 * @brief Minor code for abort in case of Batch Timeout.
 */
 #define PVA_ABORT_BATCH_TIMEOUT (0x40U)
 /**
 * @brief Minor code for abort in case of DMA Transfer Timeout.
 * while in launch phase for the VPU)
 */
 #define PVA_ABORT_DMA_SETUP_TIMEOUT (0x41U)
 //! @cond DISABLE_DOCUMENTATION
 /**
 * @brief Minor code used when NOC BIST is run.
 * Note: This is only used in T19x
 */
 #define PVA_ABORT_NOC_BIST (0xfcU)
 //! @endcond
 /** @} */
 /**
 * @defgroup PVA_ABORT_ARGUMENTS Macros to define the argument for pva_abort operation
 *
 * @brief Argument of pva_abort operation is updated in status register
 *
 */
 /**
 * @defgroup PVA_ABORT_ARGUMENTS_MPU
 * @brief Argument to pva_abort() from MPU operations
 * @ingroup PVA_ABORT_ARGUMENTS
 * @{
 */
 /**
 * @brief Minor code when there is an error while configuring MPU.
 */
 #define PVA_ABORT_MPU_CONFIG (0xE001U)
 /**
 * @brief Minor code when there is an error while initializing MPU.
 */
 #define PVA_ABORT_MPU_INIT (0xE002U)
 /** @} */
 /**
 * @defgroup PVA_ABORT_ARGUMENTS_VPU
 * @brief Argument to pva_abort() from VPU operations
 * @ingroup PVA_ABORT_ARGUMENTS
 * @{
 */
 /**
 * @brief Minor code when VPU is in debug state.
 */
 #define PVA_ABORT_VPU_DEBUG (0xE001U)
 /** @} */
 /**
 * @defgroup PVA_ABORT_ARGUMENTS_PPE
 * @brief Argument to pva_abort() from PPE operations
 * @ingroup PVA_ABORT_ARGUMENTS
 * @{
 */
 /**
 * @brief Minor code when PPE is in debug state.
 */
 #define PVA_ABORT_PPE_DEBUG (0xE002U)
 /** @} */
 /**
 * @brief Minor Code when DMA state is not idle to perform
 * DMA MISR setup.
 */
 #define PVA_ABORT_DMA_MISR_BUSY (0xE001U)
 /**
 * @brief Minor Code in DMA when MISR has timed out
 */
 #define PVA_ABORT_DMA_MISR_TIMEOUT (0xE002U)
 /**
 * @defgroup PVA_ABORT_ARGUMENTS_IRQ Argument to pva_abort() from IRQs
 * @ingroup PVA_ABORT_ARGUMENTS
 * @{
 */
 /**
 * @brief Minor Code for Command FIFO used by Interrupt Handler.
 */
 #define PVA_ABORT_IRQ_CMD_FIFO (0xE001U)
 #if (0 == DOXYGEN_DOCUMENTATION)
 #define PVA_ABORT_IRQ_TEST_HOST (0xE002U)
 #endif
 /** @} */
 #endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-config.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-config.h
@@ -0,0 +1,231 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA Corporation is strictly prohibited.
 */
 /*
 * Unit: Utility Unit
 * SWUD Document:
 * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
 */
 #ifndef PVA_CONFIG_H
 #define PVA_CONFIG_H
 #include <pva-types.h>
 #include "pva_fw_constants.h"
 /**
 * @defgroup PVA_CONFIG_PARAMS
 *
 * @brief PVA Configuration parameters.
 * @{
 */
 /**
 * @brief Queue id for queue0.
 */
 #define PVA_FW_QUEUE_0 (0U)
 /**
 * @brief Total number of queues that are present
 *        for communication between KMD and FW.
 */
 #define PVA_NUM_QUEUES (8U)
 /**
 * @brief Maximum queue id value in PVA System.
 */
 #define PVA_MAX_QUEUE_ID (PVA_NUM_QUEUES - 1U)
 /**
 * @brief Maximum number of tasks that is supported by a queue.
 */
 #define MAX_QUEUE_DEPTH (256U)
 /**
 * @brief Number of Hardware Semaphore registers in PVA System.
 */
 #define PVA_NUM_SEMA_REGS (4U)
 /**
 * @brief Number of Hardware Mailbox registers in PVA System.
 */
 #define PVA_NUM_MBOX_REGS (8U)
 /**
 * @brief Maximum number of Pre-Actions for a task.
 */
 #define PVA_MAX_PREACTIONS (26U)
 /**
 * @brief Maximum number of Post-Actions for a task.
 */
 #define PVA_MAX_POSTACTIONS (28U)
 //! @cond DISABLE_DOCUMENTATION
 /**
 * @brief Maximum number of DMA channels for T26x.
 */
 #define PVA_NUM_DMA_CHANNELS_T26X (8U)
 /**
 * @brief Total number of AXI data buffers for T26x.
 */
 #define PVA_NUM_DMA_ADB_BUFFS_T26X (304U)
 /**
 * @brief Number of reserved AXI data buffers for T26x.
 */
 #define PVA_NUM_RESERVED_ADB_BUFFERS_T26X (16U)
 /**
 * @brief Number of dynamic AXI data buffers for T26x.
 * These exclude the reserved AXI data buffers from total available ones.
 */
 #define PVA_NUM_DYNAMIC_ADB_BUFFS_T26X                                         \
 	(PVA_NUM_DMA_ADB_BUFFS_T26X - PVA_NUM_RESERVED_ADB_BUFFERS_T26X)
 /**
 * @brief Maximum number of DMA channels for T23x.
 */
 #define PVA_NUM_DMA_CHANNELS_T23X (16U)
 //! @endcond
 /**
 * @brief Number of DMA descriptors for T19x.
 */
 #define PVA_NUM_DMA_DESCS_T19X (64U)
 /**
 * @brief Number of DMA descriptors for T23x.
 */
 #define PVA_NUM_DMA_DESCS_T23X (64U)
 /**
 * @brief Number of DMA descriptors for T26x.
 */
 #define PVA_NUM_DMA_DESCS_T26X (96U)
 /**
 * @brief Number of reserved DMA channels. These channels
 * are reserved per DMA for R5 transfers. These channels
 * will be used by R5 to transfer data which it needs.
 */
 #define PVA_NUM_RESERVED_CHANNELS (1U)
 /**
 * @brief Number of dynamic DMA descriptors for T19x. These descriptors can be
 * used by the VPU application transfer data. These exclude
 * the reserved descriptors from total available ones.
 */
 #define PVA_NUM_DYNAMIC_DESCS_T19X                                             \
 	(PVA_NUM_DMA_DESCS_T19X - PVA_NUM_RESERVED_DESCRIPTORS)
 /**
 * @brief Number of dynamic DMA descriptors for T23x. These descriptors can be
 * used by the VPU application transfer data. These exclude
 * the reserved descriptors from total available ones.
 */
 #define PVA_NUM_DYNAMIC_DESCS_T23X                                             \
 	(PVA_NUM_DMA_DESCS_T23X - PVA_NUM_RESERVED_DESCRIPTORS)
 /**
 * @brief Number of dynamic DMA descriptors for T26x. These descriptors can be
 * used by the VPU application transfer data. These exclude
 * the reserved descriptors from total available ones.
 */
 #define PVA_NUM_DYNAMIC_DESCS_T26X                                             \
 	(PVA_NUM_DMA_DESCS_T26X - PVA_NUM_RESERVED_DESCRIPTORS)
 /**
 * Note: T26x will be brought up first on Linux, and then on QNX. To support this,
 * the following macro is needed so that the QNX driver can build without requiring
 * any changes.
 */
 #define PVA_NUM_DYNAMIC_DESCS (PVA_NUM_DYNAMIC_DESCS_T23X)
 /**
 * @brief Number of reserved AXI data buffers for T23x.
 */
 #define PVA_NUM_RESERVED_ADB_BUFFERS_T23X (16U)
 /**
 * @brief Number of reserved VMEM data buffers.
 */
 #define PVA_NUM_RESERVED_VDB_BUFFERS (0U)
 /**
 * @brief Total number of VMEM data buffers.
 */
 #define PVA_NUM_DMA_VDB_BUFFS (128U)
 /**
 * @brief Total number of AXI data buffers for T23x.
 */
 #define PVA_NUM_DMA_ADB_BUFFS_T23X (272U)
 /**
 * @brief Number of dynamic AXI data buffers for T23x.
 * These exclude the reserved AXI data buffers from total available ones.
 */
 #define PVA_NUM_DYNAMIC_ADB_BUFFS_T23X                                         \
 	(PVA_NUM_DMA_ADB_BUFFS_T23X - PVA_NUM_RESERVED_ADB_BUFFERS_T23X)
 /**
 * @brief Number of dynamic VMEM data buffers for T23x.
 * These exclude the reserved VMEM data buffers from total available ones.
 */
 #define PVA_NUM_DYNAMIC_VDB_BUFFS                                              \
 	(PVA_NUM_DMA_VDB_BUFFS - PVA_NUM_RESERVED_VDB_BUFFERS)
 /**
 * @brief The first Reserved DMA descriptor. This is used as a
 *        starting point to iterate over reserved DMA descriptors.
 */
 #define PVA_RESERVED_DESC_START (60U)
 /**
 * @brief The first Reserved AXI data buffers. This is used as a
 *        starting point to iterate over reserved AXI data buffers.
 */
 #define PVA_RESERVED_ADB_BUFF_START PVA_NUM_DYNAMIC_ADB_BUFFS
 /**
 * @brief This macro has the value to be set by KMD in the shared semaphores
 * @ref PVA_PREFENCE_SYNCPT_REGION_IOVA_SEM or @ref PVA_POSTFENCE_SYNCPT_REGION_IOVA_SEM
 * if the syncpoint reserved region must not be configured as uncached
 * in R5 MPU.
 */
 #define PVA_R5_SYNCPT_REGION_IOVA_OFFSET_NOT_SET (0xFFFFFFFFU)
 /** @} */
 /**
 * @defgroup PVA_CONFIG_PARAMS_T19X
 *
 * @brief PVA Configuration parameters exclusively for T19X.
 * @{
 */
 /**
 * @brief Number of DMA channels for T19x or Xavier.
 */
 #define PVA_NUM_DMA_CHANNELS_T19X (14U)
 /**
 * @brief Number of reserved AXI data buffers for T19x.
 */
 #define PVA_NUM_RESERVED_ADB_BUFFERS_T19X (8U)
 /**
 * @brief Total number of AXI data buffers for T19x.
 */
 #define PVA_NUM_DMA_ADB_BUFFS_T19X (256U)
 /**
 * @brief Number of dynamic AXI data buffers for T19x.
 * These exclude the reserved AXI data buffers from total available ones.
 */
 #define PVA_NUM_DYNAMIC_ADB_BUFFS_T19X                                         \
 	(PVA_NUM_DMA_ADB_BUFFS_T19X - PVA_NUM_RESERVED_ADB_BUFFERS_T19X)
 /** @} */
 #endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-errors.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-errors.h
@@ -0,0 +1,428 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA Corporation is strictly prohibited.
 */
 /*
 * Unit: Utility Unit
 * SWUD Document:
 * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
 */
 #ifndef PVA_ERRORS_H
 #define PVA_ERRORS_H
 #include <stdint.h>
 #include <pva-packed.h>
 /**
 * @brief PVA Error codes
 */
 typedef uint16_t pva_errors_t;
 /**
 * @defgroup PVA_ERRORS
 *
 * @brief General and interface errors of PVA.
 * @{
 */
 /**
 * @brief In case of no Error.
 */
 #define PVA_ERR_NO_ERROR (0x0U)
 /**
 * @brief Error in case of an illegal command
 *        PVA FW executes commands that are found
 *        in the command look up table. If a command
 *        is not part of supported commands, this
 *        error will be returned. Valid commands can be
 *        referred at @ref pva_cmd_lookup_t.
 *
 */
 #define PVA_ERR_BAD_CMD (0x1U)
 /**
 * @brief Error in case of bad queue id, ie
 * queue id that was requested is not available.
 */
 #define PVA_ERR_BAD_QUEUE_ID (0x3U)
 /**
 * @brief Error in case of invalid pve-id. This
 *        error is generated if PVE id is greater
 *        than @ref PVA_NUM_PVE.
 */
 #define PVA_ERR_BAD_PVE_ID (0x4U)
 /**
 * @brief Error in case when number of pre-actions
 * are more than what can be accommodated.
 */
 #define PVA_ERR_BUFF_TOO_SMALL (0x5U)
 /**
 * @brief Error in case when requested feature can not be satisfied.
 *        This error arises in scenarios where certain actions are
 *        not supported during execution of pre-actions or post-actions.
 *        For instance, @ref TASK_ACT_WRITE_STATUS is not supported in
 *        executing pre-actions of task.
 */
 #define PVA_ERR_FEATURE_NOT_SUPPORTED (0x6U)
 /**
 * @brief Error in case when the address generated or translated does not
 * meet the constraints like alignment or non-null.
 */
 #define PVA_ERR_BAD_ADDRESS (0x9U)
 /**
 * @brief Error in case when timestamp is requested on un-supported action.
 */
 #define PVA_ERR_BAD_TIME_VALUE (0xdU)
 #if PVA_SAFETY == 0
 /**
 * @brief Error in case when the register provided to update
 *        the status is invalid.
 */
 #define PVA_ERR_BAD_STATUS_REG (0x10U)
 #endif
 //! @endcond
 /**
 * @brief Error in case of bad task.
 *        In scenarios where task does not meet the
 *        necessary criteria like non-zero or 64 byte alignment.
 *        This error will be returned.
 */
 #define PVA_ERR_BAD_TASK (0x15U)
 /**
 * @brief Error in case of invalid task action list. Invalid
 *        action list arises in scenarios like number of
 *        pre and post actions not being zero but actual
 *        pre or post action to be performed being NULL.
 */
 #define PVA_ERR_BAD_TASK_ACTION_LIST (0x16U)
 /**
 * @brief Error when internal state of task is not as expected.
 *        A task goes through transition of various state while
 *        executing. In case when a state is not coherent with
 *        action being performed this error is returned.
 *        For example, task can not be in a running state
 *        while tear-down is being performed.
 */
 #define PVA_ERR_BAD_TASK_STATE (0x17U)
 /**
 * @brief Error when there is a mis-match in input status and the actual status.
 *        This error occurs when there is a mis-match in status from @ref pva_gen_task_status_t
 *        and actual status that is populated by FW during task execution.
 */
 #define PVA_ERR_TASK_INPUT_STATUS_MISMATCH (0x18U)
 /**
 * @brief Error in case of invalid parameters. These errors occur when
 *        parameters passed are invalid and is applicable for task parameters
 *        and DMA parameters.
 */
 #define PVA_ERR_BAD_PARAMETERS (0x1aU)
 /**
 * @brief Error in case of when timed out occurred for batch of task.
 */
 #define PVA_ERR_PVE_TIMEOUT (0x23U)
 /**
 * @brief Error when VPU has halted or turned off.
 */
 #define PVA_ERR_VPU_ERROR_HALT (0x25U)
 /**
 * @brief Error after FW sends an abort signal to KMD. KMD will write into status buffers for
 *        pending tasks after FW sends an abort signal to KMD.
 */
 #define PVA_ERR_VPU_BAD_STATE (0x28U)
 /**
 * @brief Error in case of exiting VPU.
 */
 #define PVA_ERR_VPU_EXIT_ERROR (0x2aU)
 //! @cond DISABLE_DOCUMENTATION
 /**
 * @brief Error in case of exiting PPE.
 */
 #define PVA_ERR_PPE_EXIT_ERROR (0x2bU)
 //! @endcond
 /**
 * @brief Error when a task running on PVE caused abort on PVE.
 */
 #define PVA_ERR_PVE_ABORT (0x2dU)
 /**
 * @brief Error in case of Floating point NAN.
 */
 //! @cond DISABLE_DOCUMENTATION
 /**
 * @brief Error in case of Floating point NAN.
 */
 #define PVA_ERR_PPE_DIVIDE_BY_0 (0x34U)
 /**
 * @brief Error in case of Floating point NAN.
 */
 #define PVA_ERR_PPE_ILLEGAL_DEBUG (0x36U)
 #define PVA_ERR_PPE_ILLEGAL_INSTR_ALIGN (0x37U)
 /**
 * @brief Error in case of Bad cached DRAM segment.
 */
 #define PVA_ERR_BAD_CACHED_DRAM_SEG (0x3aU)
 /**
 * @brief Error in case of Bad DRAM IOVA.
 */
 #define PVA_ERR_BAD_DRAM_IOVA (0x3cU)
 //! @endcond
 /**
 * @brief Error in case of Register mis-match.
 */
 #define PVA_ERR_REG_MISMATCH (0x3dU)
 /**
 * @brief Error in case of AISR queue empty.
 */
 #define PVA_ERR_AISR_INPUT_QUEUE_EMPTY (0x3fU)
 /**
 * @brief Error in case of AISR queue full.
 */
 #define PVA_ERR_AISR_OUTPUT_QUEUE_FULL (0x40U)
 #if (PVA_HAS_L2SRAM == 1)
 /**
 * @brief Error in case of L2SRAM allocation failed due to invalid parameters.
 */
 #define PVA_ERR_BAD_L2SRAM_PARAMS (0x41U)
 #endif
 /**
 * @brief Error in case of bad or invalid task parameters.
 */
 #define PVA_ERR_BAD_TASK_PARAMS (0x42U)
 /**
 * @brief Error in case of invalid VPU system call.
 */
 #define PVA_ERR_VPU_SYS_ERROR (0x43U)
 /**
 * @brief Error in case of HW Watchdog timer timeout
 */
 #define PVA_ERR_WDT_TIMEOUT_ERROR (0x44U)
 /**
 * @brief Error in case Golden register check value mismatch.
 */
 #define PVA_ERR_GR_REG_MISMATCH (0x45U)
 /**
 * @brief Error in case Critical register check value mismatch.
 */
 #define PVA_ERR_CRIT_REG_MISMATCH (0x46U)
 /** @} */
 /**
 * @defgroup PVA_DMA_ERRORS
 *
 * @brief DMA ERROR codes used across PVA.
 * @{
 */
 /**
 * @brief Error when DMA transfer mode in DMA descriptor is invalid.
 */
 #define PVA_ERR_DMA_TRANSFER_TYPE_INVALID (0x204U)
 /**
 * @brief Error when DMA transfer was not successful.
 */
 #define PVA_ERR_DMA_CHANNEL_TRANSFER (0x207U)
 /**
 * @brief Error in case of BAD DMA descriptor.
 */
 #define PVA_ERR_BAD_DMA_DESC_ID (0x208U)
 /**
 * @brief Error in case of BAD DMA channel ID.
 */
 #define PVA_ERR_BAD_DMA_CHANNEL_ID (0x209U)
 /**
 * @brief Error in case of DMA timeout.
 */
 #define PVA_ERR_DMA_TIMEOUT (0x20bU)
 /**
 * @brief Error when program trying to use channel is already active.
 */
 #define PVA_ERR_DMA_INVALID_CONFIG (0x220U)
 /**
 * @brief Error in case DMA transfer was not successful.
 */
 #define PVA_ERR_DMA_ERROR (0x221U)
 /**
 * @brief Error when number of bytes of HW Seq data copy is
 * not a multiple of 4.
 */
 #define PVA_ERR_DMA_HWSEQ_BAD_PROGRAM (0x216U)
 /**
 * @brief Error when number of bytes of HW Seq data copy is
 * more than HW Seq RAM size.
 */
 #define PVA_ERR_DMA_HWSEQ_PROGRAM_TOO_LONG (0x217U)
 /** @} */
 /**
 * @defgroup PVA_MISR_ERRORS
 *
 * @brief MISR error codes used across PVA.
 * @{
 */
 /**
 * @brief Error status when DMA MISR test is not run.
 */
 #define PVA_ERR_MISR_NOT_RUN (0x280U)
 /**
 * @brief Error status when DMA MISR test did not complete.
 */
 #define PVA_ERR_MISR_NOT_DONE (0x281U)
 /**
 * @brief Error status when DMA MISR test timed out.
 */
 #define PVA_ERR_MISR_TIMEOUT (0x282U)
 /**
 * @brief Error status in case of DMA MISR test address failure.
 */
 #define PVA_ERR_MISR_ADDR (0x283U)
 /**
 * @brief Error status in case of DMA MISR test data failure.
 */
 #define PVA_ERR_MISR_DATA (0x284U)
 /**
 * @brief Error status in case of DMA MISR test data and address failure.
 */
 #define PVA_ERR_MISR_ADDR_DATA (0x285U)
 /** @} */
 /**
 * @defgroup PVA_VPU_ISR_ERRORS
 *
 * @brief VPU ISR error codes used across PVA.
 * @{
 */
 /**
 * @defgroup PVA_FAST_RESET_ERRORS
 *
 * @brief Fast reset error codes used across PVA.
 * @{
 */
 /**
 * @brief Error when VPU is not in idle state for a reset to be done.
 */
 #define PVA_ERR_FAST_RESET_TIMEOUT_VPU (0x401U)
 /**
 * @brief Error if VPU I-Cache is busy before checking DMA engine for idle state.
 */
 #define PVA_ERR_FAST_RESET_TIMEOUT_ICACHE1 (0x402U)
 /**
 * @brief Error if DMA channel is busy for a reset to be done.
 */
 #define PVA_ERR_FAST_RESET_TIMEOUT_CH0 (0x403U)
 /**
 * @brief Error if VPU I-Cache is busy after checking DMA engine for idle state.
 */
 #define PVA_ERR_FAST_RESET_TIMEOUT_ICACHE2 (0x419U)
 #if (PVA_CHIP_ID == CHIP_ID_T26X)
 /**
 * @brief Error when PPE is not in idle state for a reset to be done.
 */
 #define PVA_ERR_FAST_RESET_TIMEOUT_PPE (0x420U)
 #endif
 /** @} */
 /**
 * @defgroup PVA_L2SRAM_ERRORS
 *
 * @brief L2SRAM memory error codes used across PVA.
 * @{
 */
 /**
 * @brief Error if l2sram memory allocation failed because of insufficient l2sram memory or
 * if 2 chunks of memory are already allocated.
 */
 #define PVA_ERR_ALLOC_FAILED (0x812U)
 /**
 * @brief Error if If l2sram address given for clearing/freeing is not a valid L2SRAM address
 */
 #define PVA_ERR_FREE_FAILED (0x813U)
 /** @} */
 /**
 * @defgroup PVA_INFO_ERRORS
 *
 * @brief Informational error codes.
 * @{
 */
 /**
 * @brief Error when there is no task.
 */
 #define PVA_ERR_NO_TASK (0x997U)
 /**
 * @brief Error when CCQ IRQ line enable on VIC fails
 */
 #define PVA_ERR_CCQ_IRQ_ENABLE_FAILED (0x998U)
 /**
 * @brief Error when Mailbox IRQ line enable on VIC fails
 */
 #define PVA_ERR_MBOX_IRQ_ENABLE_FAILED (0x999U)
 /**
 * @brief Error when L2SRAM IRQ line enable on VIC fails
 */
 #define PVA_ERR_L2SRAM_IRQ_ENABLE_FAILED (0x99AU)
 /**
 * @brief Error when DMA0 IRQ line enable on VIC fails
 */
 #define PVA_ERR_DMA0_IRQ_ENABLE_FAILED (0x99BU)
 /**
 * @brief Error when DMA1 IRQ line enable on VIC fails
 */
 #define PVA_ERR_DMA1_IRQ_ENABLE_FAILED (0x99CU)
 /**
 * @brief Error when VPU IRQ line enable on VIC fails
 */
 #define PVA_ERR_VPU_IRQ_ENABLE_FAILED (0x99DU)
 /**
 * @brief Error when SEC IRQ line enable on VIC fails
 */
 #define PVA_ERR_SEC_IRQ_ENABLE_FAILED (0x99EU)
 /**
 * @brief Error when RAMIC IRQ line enable on VIC fails
 */
 #define PVA_ERR_RAMIC_IRQ_ENABLE_FAILED (0x99FU)
 /**
 * @brief Error in case to try again.
 * @note This error is internal to FW only.
 */
 #define PVA_ERR_TRY_AGAIN (0x9A0U)
 /** @} */
 /* Never used */
 #define PVA_ERR_MAX_ERR (0xFFFFU)
 #endif /* _PVA_ERRORS_H_ */
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-fw-version.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-fw-version.h
@@ -0,0 +1,189 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2016-2022 NVIDIA CORPORATION.  All rights reserved.
 *
 * NVIDIA CORPORATION and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA CORPORATION is strictly prohibited.
 */
 /*
 * Unit: Host Interface Unit
 * SWUD Document:
 * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
 */
 #ifndef PVA_FW_VERSION_H
 #define PVA_FW_VERSION_H
 #include <pva-bit.h>
 /*
 * Note: Below are doxygen comments with the @def command.
 * This allows the comment to be physically distant from the define
 * being documented.  And allows for a single general comment that is
 * regardless of the being assigned to the macro.
 */
 /**
 * @defgroup PVA_VERSION_TYPE_FLAGS VERSION_TYPE Bit Flags
 *
 * @brief The bit flags that indicate the qualities of the Built Firmware.
 * e.g: Debug, Safety, Test Features, etc.
 *
 * @see VERSION_TYPE
 * @{
 */
 /**
 * @def VERSION_CODE_DEBUG
 * @brief Set or Clear the 'debug' bit for the FW version type value. For a safety
 * build the value of this define will be zero.
 *
 * @details This bit is set if the macro @r PVA_DEBUG is defined.
 * @see PVA_DEBUG
 */
 #if PVA_DEBUG == 1
 #define VERSION_CODE_DEBUG PVA_BIT(0)
 #else
 #define VERSION_CODE_DEBUG (0U)
 #endif
 /**
 * @def VERSION_CODE_SAFETY
 * @brief Set or Clear the 'safety' bit for the FW version type value.  For a safety
 * build the value of this define will be non-zero.
 *
 * @details This bit is set if the macro @r PVA_SAFETY is defined.
 * Building for Safety disables certain functions that are used for debug, testing,
 * or would otherwise pose a risk to system conforming to safety protocols such as ISO-26262 or
 * ASPICE.
 *
 * @see PVA_SAFETY
 */
 #if PVA_SAFETY == 1
 #define VERSION_CODE_SAFETY PVA_BIT(1)
 #else
 #define VERSION_CODE_SAFETY (0U)
 #endif
 /**
 * @def VERSION_CODE_PVA_TEST_SUPPORT
 * @brief Set or Clear the 'test support' bit for the FW version type value.
 *
 * @details This bit is set if the macro @r TEST_TASK is defined.
 * This bit is expected to be unset during a safety build.
 *
 * Building with tests support enabled may add additional commands to that
 * can be processed by the FW to aid in testing of the system code. Often code of this
 * nature can change the processing, memory, or timing characteristics of the system, and
 * and should only enabled when explicitly needed.
 *
 *
 * @see TEST_TASK
 */
 #if TEST_TASK == 1
 #define VERSION_CODE_PVA_TEST_SUPPORT PVA_BIT(2)
 #else
 #define VERSION_CODE_PVA_TEST_SUPPORT (0U)
 #endif
 /**
 * @def VERSION_CODE_STANDALONE_TESTS
 * @brief Set or Clear the 'standalone tests' bit for the FW version type value.
 *
 * @details This bit is set if the macro @r TEST_TASK is defined.
 * This bit is expected to be unset during a safety build.
 *
 * @see TEST_TASK
 *
 */
 #if TEST_TASK == 1
 #define VERSION_CODE_STANDALONE_TESTS PVA_BIT(3)
 #else
 #define VERSION_CODE_STANDALONE_TESTS (0U)
 #endif
 /** @} */
 /**
 * @defgroup PVA_VERSION_MACROS PVA version macros used to calculate the PVA
 * FW binary version.
 * @{
 */
 /**
  * @brief An 8-bit bit field that describes which conditionally compiled facets of the Firmware
  * have been enabled.
  *
  * @details The value of this macro is used when constructing a 32-bit Firmware Version identifier.
  *
  @verbatim
  |  Bit  |  Structure Field Name  |  Condition for Enabling  |
  |:-----:|:----------------------:|:------------------------:|
  |  0  |  VERSION_CODE_DEBUG  |  This bit is set when the Firmware is built with @ref PVA_DEBUG defined as equalling 1.
  |  1  |  VERSION_CODE_SAFETY  |  This bit is set when the Firmware is built with @ref PVA_SAFETY defined equalling 1.  |
  |  2  |  VERSION_CODE_PVA_TEST_SUPPORT  |  This bit is set when the Firmware is built with @ref TEST_TASK defined as equalling 1.  |
  |  3  |  VERSION_CODE_STANDALONE_TESTS  |  This bit is set when the Firmware is built with @ref TEST_TASK defined equalling 1. |
  | 4-7 |  Reserved  |  The remaining bits of the bitfield are undefined.  |
  @endverbatim
  * @see PVA_VERSION_TYPE_FLAGS
  */
 #define VERSION_TYPE                                                           \
 	(uint32_t) VERSION_CODE_DEBUG | (uint32_t)VERSION_CODE_SAFETY |        \
 		(uint32_t)VERSION_CODE_PVA_TEST_SUPPORT |                      \
 		(uint32_t)VERSION_CODE_STANDALONE_TESTS
 /** @} */
 /**
 * @defgroup PVA_VERSION_VALUES PVA Major, Minor, and Subminor Version Values
 *
 * @brief The values listed below are applied to the corresponding fields when
 * the PVA_VERSION macro is used.
 *
 * @see PVA_VERSION, PVA_MAKE_VERSION
 * @{
 */
 /**
 * @brief The Major version of the Firmware
 */
 #define PVA_VERSION_MAJOR 0x08
 /**
 * @brief The Minor version of the Firmware
 */
 #define PVA_VERSION_MINOR 0x02
 /**
 * @brief The sub-minor version of the Firmware.
 */
 #define PVA_VERSION_SUBMINOR 0x03
 /** @} */
 /**
 * @def PVA_VERSION_GCID_REVISION
 * @brief The GCID Revision of the Firmware.
 *
 * @details If this version is not otherwise defined during build time, this fallback value is used.
 */
 #ifndef PVA_VERSION_GCID_REVISION
 /**
 * @brief GCID revision of PVA FW binary.
 */
 #define PVA_VERSION_GCID_REVISION 0x00000000
 #endif
 /**
 * @def PVA_VERSION_BUILT_ON
 * @brief The date and time the version of software was built, expressed as the number
 * of seconds since the Epoch (00:00:00 UTC, January 1, 1970).
 *
 * @details If this version is not otherwise defined during build time, this fallback value is used.
 */
 #ifndef PVA_VERSION_BUILT_ON
 #define PVA_VERSION_BUILT_ON 0x00000000
 #endif
 /** @} */
 #endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-packed.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-packed.h
@@ -0,0 +1,30 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
 *
 * NVIDIA CORPORATION and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA CORPORATION is strictly prohibited.
 */
 /*
 * Unit: Utility Unit
 * SWUD Document:
 * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
 */
 #ifndef PVA_PACKED_H
 #define PVA_PACKED_H
 /**
 * @brief Packed attribute that avoids compiler to add any paddings.
 *        Compiler implicitly adds padding between the structure members
 *        to make it aligned. To avoid this packed attribute is used.
 *        Packed is for shared structures between KMD and FW.
 *        If packed is not used, then we depend on what padding the compiler adds.
 *        Since KMD and FW are compiled by two different compilers, we need to
 *        ensure that the offsets of each member of the structure is the same in
 *        both KMD and FW. To ensure this we pack the structure.
 */
 #define PVA_PACKED __attribute__((packed))
 #endif // PVA_PACKED_H
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-dma.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-dma.h
@@ -0,0 +1,486 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2020-2023 NVIDIA Corporation.  All rights reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA Corporation is strictly prohibited.
 */
 /*
 * Unit: Direct Memory Access Driver Unit
 * SWUD Document:
 * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
 */
 /**
 * @file pva-sys-dma.h
 *
 * @brief Types and constants related to PVA DMA setup and DMA
 * descriptors.
 */
 #ifndef PVA_SYS_DMA_H
 #define PVA_SYS_DMA_H
 #include <stdint.h>
 #include <pva-bit.h>
 #include <pva-packed.h>
 #include "pva_fw_dma_hw_interface.h"
 /**
 * @brief The version number of the current DMA info structure.
 * This is used for detecting the DMA info updates for future
 * HW releases.
 */
 #define PVA_DMA_INFO_VERSION_ID (1U)
 /**
 * @brief Number of DMA done masks in DMA info structure,
 * corresponding to the number of DMA_COMMON_DMA_OUTPUT_ENABLEx
 * registers in the HW.
 */
 #define PVA_SYS_DMA_NUM_TRIGGERS (9U)
 /* NOTE : This must be kept as 15 for build to be
 * successful, because in pva_fw_test we configure
 * 15 channel, but internally we check if the
 * number of channels requested is less than the
 * maximum number of available channels */
 /**
 * @brief Maximum Number of DMA channel configurations
 * in DMA info structure.
 */
 #define PVA_SYS_DMA_NUM_CHANNELS (15U)
 /**
 * @brief Maximum number of DMA descriptors allowed
 * for use for VPU for T23x
 */
 #define PVA_SYS_DMA_MAX_DESCRIPTORS_T23X (60U)
 /**
 * @brief Maximum number of DMA descriptors allowed
 * for use for VPU for T26x
 */
 #define PVA_SYS_DMA_MAX_DESCRIPTORS_T26X (92U)
 /**
 * @brief DMA registers for VPU0 and VPU1 which are primarily
 * used by DMA config and R5 initialization.
 *
 * For more information refer to section 3.4 in PVA Cluster IAS
 * document (Document 11 in Supporting Documentation and References)
 */
 /**
 * @brief DMA channel base register for VPU0.
 */
 #define PVA_DMA0_REG_CH_0 PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA0_REG_CH_0_BASE)
 /**
 * @brief DMA common base register for VPU0.
 */
 #define PVA_DMA0_COMMON PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA0_COMMON_BASE)
 /**
 * @brief DMA DESCRAM base register for VPU0.
 */
 #define PVA_DMA0_DESCRAM PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA0_DESCRAM_BASE)
 /**
 * @brief DMA channel base register for VPU1.
 */
 #define PVA_DMA1_REG_CH_0 PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA1_REG_CH_0_BASE)
 /**
 * @brief DMA common base register for VPU1.
 */
 #define PVA_DMA1_COMMON PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA1_COMMON_BASE)
 /**
 * @brief DMA DESCRAM base register for VPU1.
 */
 #define PVA_DMA1_DESCRAM PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA1_DESCRAM_BASE)
 /** @} */
 /**
 *
 * @brief DMA channel configuration for a user task.
 *
 * The DMA channel structure contains the set-up of a
 * PVA DMA channel used for the VPU app.
 *
 * This VPU app should configure the channel information
 * in this format
 *
 * @note : For more information on channel configuration, refer section 4.1.2 and 6.4 in
 * the DMA IAS document (Document 6 in Supporting Documentation and References)
 */
 typedef struct PVA_PACKED {
 	/**
         * @brief HW DMA channel number from 1 to @ref PVA_NUM_DMA_CHANNELS.
         */
 	uint8_t ch_number;
 	/**
         * @brief Padding bytes of 3 added to align the next
         * field of 4 bytes
         */
 	uint8_t pad_dma_channel1[3];
 	/**
         * @brief The value to be written to DMA channel
         * control 0 register
         */
 	uint32_t cntl0;
 	/**
         * @brief The value to be written to DMA channel
         * control 1 register
         */
 	uint32_t cntl1;
 	/**
         * @brief The value to be written to DMA channel
         * boundary pad register
         */
 	uint32_t boundary_pad;
 	/**
         * @brief This value to be written to DMA HW sequence
         * control register.
         */
 	uint32_t hwseqcntl;
 	/**
         * @brief This field is unused in t19x and T23x.
         * It contains the value to be written to DMA
         * channel HWSEQFSCNTL register.
         */
 	uint32_t hwseqfscntl;
 	/**
         * @brief Output enable mask
         */
 	uint32_t outputEnableMask;
 	/**
         * @brief Padding 8 bytes to align the whole structure
         * to 32 byte boundary
         */
 	uint32_t pad_dma_channel0[1];
 } pva_dma_ch_config_t;
 /**
 *
 * @brief DMA info for an application. The app maybe a VPU app which
 * runs an algorithm on VPU or a DMA app which just has DMA configuration
 * to move certain data. In both cases the application should
 * configure the DMA information in this structure format
 *
 */
 typedef struct PVA_PACKED {
 	/**
         * @brief The size of the dma_info structure.
         * Should be populated with value sizeof(pva_dma_info_t)
         * This is used to validate that the DRAM location populated
         * by KMD is valid
         */
 	uint16_t dma_info_size;
 	/**
         * @brief This field is used to populate the DMA Info version
         * In case we need to create a new
         * DMA version structure then the FW can distinguish the DMA
         * info structure. Currently it should be populated with value
         * @ref PVA_DMA_INFO_VERSION_ID
         */
 	uint16_t dma_info_version;
 	/**
         * @brief The number of used channels. This field can
         * be populated with values from 0 to
         * @ref PVA_NUM_DMA_CHANNELS both inclusive.
         */
 	uint8_t num_channels;
 	/**
         * @brief Number of used descriptors.
         *
         * Note: In generations of PVA where the reserved descriptor range lies
         *       in the middle of the entire descriptor range, when the range of
         *       descriptors requested by the user crosses over the reserved descriptor
         *       range, 'num_descriptors' will include the number of the reserved
         *       descriptors as well.
         *       E.g., if reserved descriptors are at indices 60-63 and user application
         *             needs 70 descriptors, 'num_descriptor' will equal 74. However,
         *             if user application needs 30 descriptors, 'num_descriptors' will be 30.
         *
         * On T19x and T23x, the field can be populated
         * with values from 0 inclusive to less than
         * @ref PVA_SYS_DMA_MAX_DESCRIPTORS
         *
         * On T26x, the field can be populated with values from 0 inclusive to
         * @ref PVA_SYS_DMA_MAX_DESCRIPTORS + @ref PVA_NUM_RESERVED_DESCRIPTORS
         */
 	uint8_t num_descriptors;
 	/**
         * @brief The number of bytes used in HW sequencer
         */
 	uint16_t num_hwseq;
 	/**
         * @brief The First HW descriptor ID used.
         *
         * On T19x and T23x, the field can be populated
         * with values from 0 inclusive to less than
         * @ref PVA_SYS_DMA_MAX_DESCRIPTORS
         *
         * On T26x, the field can be populated with values from 0 inclusive to
         * @ref PVA_SYS_DMA_MAX_DESCRIPTORS + @ref PVA_NUM_RESERVED_DESCRIPTORS
         */
 	uint8_t descriptor_id;
 	/**
         * @brief Padding for alignment of next element
         */
 	uint8_t pva_dma_info_pad_0[3];
 	/**
         * @brief DMA done triggers used by the VPU app.
         * Correspond to COMMON_DMA_OUTPUT_ENABLE registers.
         */
 	uint32_t dma_triggers[PVA_SYS_DMA_NUM_TRIGGERS];
 	/**
         * @brief DMA channel config used by the VPU app.
         * One app can have upto @ref PVA_NUM_DMA_CHANNELS
         * DMA channel configurations. The size of the array
 	 * is @ref PVA_SYS_DMA_NUM_CHANNELS for additional
 	 * configuration required for future products.
         */
 	pva_dma_ch_config_t dma_channels[PVA_SYS_DMA_NUM_CHANNELS];
 	/**
         * @brief Value to be set in DMA common configuration register.
         */
 	uint32_t dma_common_config;
 	/**
         * @brief IOVA to an array of @ref pva_dtd_t, aligned at 64 bytes
         * which holds the DMA descriptors used by the VPU app
         */
 	pva_iova dma_descriptor_base;
 	/**
         * @brief HW sequencer configuration base address.
         */
 	pva_iova dma_hwseq_base;
 	/**
         * @brief IOVA to a structure of @ref pva_dma_misr_config_t,
         * location where DMA MISR configuration information is stored.
         */
 	pva_iova dma_misr_base;
 } pva_dma_info_t;
 /**
 * @brief DMA descriptor.
 *
 * PVA DMA Descriptor in packed HW format.
 * The individual fields can be found from
 * the DMA IAS document (Document 6 in Supporting Documentation and References)
 * section 4.1.3.2
 */
 typedef struct PVA_PACKED {
 	/** @brief TRANSFER_CONTROL0 byte has DSTM in lower 2 bits, SRC_TF in 3rd bit,
         *  DDTM in 4th to 6th bit,DST_TF in 7th bit */
 	uint8_t transfer_control0;
 	/** @brief Next descriptor ID to be executed*/
 	uint8_t link_did;
 	/** @brief Highest 8 bits of the 40 bit source address*/
 	uint8_t src_adr1;
 	/** @brief Highest 8 bits of the 40 bit destination address*/
 	uint8_t dst_adr1;
 	/** @brief Lower 32 bits of the 40 bit source address*/
 	uint32_t src_adr0;
 	/** @brief Lower 32 bits of the 40 bit destination address*/
 	uint32_t dst_adr0;
 	/** @brief Length of tile line*/
 	uint16_t tx;
 	/** @brief Number of tile lines*/
 	uint16_t ty;
 	/** @brief Source Line pitch to advance to every line of 2D tile.*/
 	uint16_t slp_adv;
 	/** @brief Destination Line Pitch to advance to every line of 2D tile.*/
 	uint16_t dlp_adv;
 	/** @brief SRC PT1 CNTL has st1_adv in low 24 bits and ns1_adv in high 8 bits. */
 	uint32_t srcpt1_cntl;
 	/** @brief DST PT1 CNTL has dt1_adv in low 24 bits and nd1_adv in high 8 bits. */
 	uint32_t dstpt1_cntl;
 	/** @brief SRC PT2 CNTL has st2_adv in low 24 bits and ns2_adv in high 8 bits. */
 	uint32_t srcpt2_cntl;
 	/** @brief DST PT2 CNTL has dt2_adv in low 24 bits and nd2_adv in high 8 bits. */
 	uint32_t dstpt2_cntl;
 	/** @brief SRC PT3 CNTL has st3_adv in low 24 bits and ns3_adv in high 8 bits. */
 	uint32_t srcpt3_cntl;
 	/** @brief DST PT3 CNTL has dt3_adv in low 24 bits and nd3_adv in high 8 bits. */
 	uint32_t dstpt3_cntl;
 	/** @brief Source circular buffer Start address offset */
 	uint16_t sb_start;
 	/** @brief Destination circular buffer Start address offset*/
 	uint16_t db_start;
 	/** @brief Source buffer size in bytes for circular buffer mode from Source address.*/
 	uint16_t sb_size;
 	/** @brief Destination buffer size in bytes for circular buffer mode from destination address.*/
 	uint16_t db_size;
 	/** @brief currently reserved*/
 	uint16_t trig_ch_events;
 	/** @brief SW or HW events used for triggering the channel*/
 	uint16_t hw_sw_trig_events;
 	/** @brief Tile x coordinates, for boundary padding in pixels*/
 	uint8_t px;
 	/** @brief Tile y coordinates, for boundary padding in pixels*/
 	uint8_t py;
 	/** @brief Transfer control byte has lower 2 bits as BPP data, bit 2 with PXDIR, bit 3 as PYDIR,
         *  bit 4 as BPE, bit 5 as TTS, bit 6 RSVD, Bit 7 ITC.
         */
 	uint8_t transfer_control1;
 	/** @brief Transfer control 2 gas bit 0 as PREFEN, bit 1 as DCBM, bit 2 as SCBM, Bit 3 to 3 as SBADR.*/
 	uint8_t transfer_control2;
 	/** @brief Circular buffer upper bits for start address and size*/
 	uint8_t cb_ext;
 	/** @brief Reserved*/
 	uint8_t rsvd;
 	/** @brief Full replicated destination base address in VMEM aligned to 64 byte atom*/
 	uint16_t frda;
 } pva_dtd_t;
 /**
 *
 * @brief DMA MISR configuration information. This information is used by R5
 * to program MISR registers if a task requests MISR computation on its
 * output DMA channels.
 *
 */
 typedef struct PVA_PACKED {
 	/** @brief Reference value for CRC computed on write addresses, i.e., MISR 1 */
 	uint32_t ref_addr;
 	/** @brief Seed value for address CRC*/
 	uint32_t seed_crc0;
 	/** @brief Reference value for CRC computed on first 256-bits of AXI write data */
 	uint32_t ref_data_1;
 	/** @brief Seed value for write data CRC*/
 	uint32_t seed_crc1;
 	/** @brief Reference value for CRC computed on second 256-bits of AXI write data */
 	uint32_t ref_data_2;
 	/**
     * @brief MISR timeout value configured in DMA common register
     * @ref PVA_DMA_COMMON_MISR_ENABLE. Timeout is calculated as
     * number of AXI clock cycles.
     */
 	uint32_t misr_timeout;
 } pva_dma_misr_config_t;
 /**
 * @defgroup PVA_DMA_TC0_BITS
 *
 * @brief PVA Transfer Control 0 Bitfields
 *
 * @{
 */
 /**
 * @brief The shift value for extracting DSTM field
 */
 #define PVA_DMA_TC0_DSTM_SHIFT (0U)
 /**
 * @brief The mask to be used to extract DSTM field
 */
 #define PVA_DMA_TC0_DSTM_MASK (7U)
 /**
 * @brief The shift value for extracting DDTM field
 */
 #define PVA_DMA_TC0_DDTM_SHIFT (4U)
 /**
 * @brief The mask to be used to extract DDTM field
 */
 #define PVA_DMA_TC0_DDTM_MASK (7U)
 /** @} */
 /**
 * @defgroup PVA_DMA_TM
 *
 * @brief DMA Transfer Modes. These can be used for both
 * Source (DSTM) and Destination (DDTM) transfer modes
 *
 * @note : For more information on transfer modes, refer section 4.1.3.1 in
 * the DMA IAS document (Document 6 in Supporting Documentation and References)
 *
 * @{
 */
 /**
 * @brief To indicate invalid transfer mode
 */
 #define PVA_DMA_TM_INVALID (0U)
 /**
 * @brief To indicate MC transfer mode
 */
 #define PVA_DMA_TM_MC (1U)
 /**
 * @brief To indicate VMEM transfer mode
 */
 #define PVA_DMA_TM_VMEM (2U)
 #if ENABLE_UNUSED == 1U
 #define PVA_DMA_TM_CVNAS (3U)
 #endif
 /**
 * @brief To indicate L2SRAM transfer mode
 */
 #define PVA_DMA_TM_L2RAM (3U)
 /**
 * @brief To indicate TCM transfer mode
 */
 #define PVA_DMA_TM_TCM (4U)
 /**
 * @brief To indicate MMIO transfer mode
 */
 #define PVA_DMA_TM_MMIO (5U)
 /**
 * @brief To indicate Reserved transfer mode
 */
 #define PVA_DMA_TM_RSVD (6U)
 /**
 * @brief To indicate VPU configuration transfer mode.
 * This is only available in Source transfer mode or
 * (DSTM). In Destination transfer mode, this value is
 * reserved.
 */
 #define PVA_DMA_TM_VPU (7U)
 /** @} */
 #if (ENABLE_UNUSED == 1U)
 /**
 * @brief The macro defines the number of
 * bits to shift right to get the PXDIR field
 * in Transfer Control 1 register in DMA
 * Descriptor
 */
 #define PVA_DMA_TC1_PXDIR_SHIFT (2U)
 /**
 * @brief The macro defines the number of
 * bits to shift right to get the PYDIR field
 * in Transfer Control 1 register in DMA
 * Descriptor
 */
 #define PVA_DMA_TC1_PYDIR_SHIFT (3U)
 #endif
 /**
 * @defgroup PVA_DMA_BPP
 *
 * @brief PVA DMA Bits per Pixel
 *
 * @{
 */
 /**
 * @brief To indicate that the size of pixel data
 * is 1 byte
 */
 #define PVA_DMA_BPP_INT8 (0U)
 #if ENABLE_UNUSED == 1U
 #define PVA_DMA_BPP_INT16 (1U)
 #endif
 /** @} */
 /**
 * @brief PVA DMA Pad X direction set to right
 */
 #define PVA_DMA_PXDIR_RIGHT (1U)
 /**
 * @brief PVA DMA Pad Y direction set to bottom
 */
 #define PVA_DMA_PYDIR_BOT (1U)
 #endif /* PVA_SYS_DMA_H */
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-params.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-params.h
@@ -0,0 +1,150 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2020-2023 NVIDIA Corporation.  All rights reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA Corporation is strictly prohibited.
 */
 /*
 * Unit: Task Unit
 * SWUD Document:
 * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
 */
 /**
 * @file pva-sys-params.h
 *
 * @brief Types and constants related to VPU application parameters.
 */
 #ifndef PVA_SYS_PARAMS_H
 #define PVA_SYS_PARAMS_H
 #include <stdint.h>
 #include <pva-packed.h>
 #include <pva-types.h>
 /** @brief VPU app parameters provided by kernel-user which is to be copied to
 * VMEM during runtime
 *
 * The VPU App parameters contains kernel-user-provided data to be
 * copied into the VMEM before executing the VPU app. The parameter
 * headers are stored in the IOVA address stored in the param_base
 * member of this structure.
 *
 * The FW can also initialize complex datatypes, which are marked by
 * special param_base outside the normal IOVA space. See the structure
 * pva_vpu_instance_data_t for an example.
 */
 typedef struct PVA_PACKED {
 	/** @brief IOVA address of the parameter data */
 	pva_iova param_base;
 	/** @brief VMEM offset where parameter data is to be copied */
 	uint32_t addr;
 	/** @brief Size of the parameter data in bytes */
 	uint32_t size;
 } pva_vpu_parameter_list_t;
 /**
 * @brief The structure holds information of various
 *  VMEM parameters that is submitted in the task.
 */
 typedef struct PVA_PACKED {
 	/**
 	 * @brief The IOVA address of the parameter data.
 	 * This should point to an array of type @ref pva_vpu_parameter_list_t .
 	 * If no parameters are present this should be set to 0
 	 */
 	pva_iova parameter_data_iova;
 	/**
 	 * @brief The starting IOVA address of the parameter data whose size
 	 * is lower than @ref PVA_DMA_VMEM_COPY_THRESHOLD . This data is copied
 	 * from DRAM to TCM using DMA, and then memcopied to VMEM.
 	 * If no small parameters are present this should be set to 0.
 	 */
 	pva_iova small_vpu_param_data_iova;
 	/**
 	 * @brief The number of bytes of small VPU parameter data, i.e the
 	 * data whose size is lower than @ref PVA_DMA_VMEM_COPY_THRESHOLD . If no small
 	 * parameters are present, this should be set to 0
 	 */
 	uint32_t small_vpu_parameter_data_size;
 	/**
 	 * @brief The index of the array of type @ref pva_vpu_parameter_list_t from which
 	 * the VPU large parameters are present, i.e the vpu parameters whose size is greater
 	 * than @ref PVA_DMA_VMEM_COPY_THRESHOLD . This value will always point to the index
 	 * immediately after the small parameters. If no large parameter is present, then
 	 * this field value will be same as the value of
 	 * @ref pva_vpu_parameter_info_t.vpu_instance_parameter_list_start_index field
 	 */
 	uint32_t large_vpu_parameter_list_start_index;
 	/**
 	 * @brief The index of the array of type @ref pva_vpu_parameter_list_t from which
 	 * the VPU instance parameters are present. This value will always point to the index
 	 * immediately after the large parameters if large parameters are present, else it
 	 * will be the same value as @ref pva_vpu_parameter_info_t.large_vpu_parameter_list_start_index
 	 * field.
 	 */
 	uint32_t vpu_instance_parameter_list_start_index;
 } pva_vpu_parameter_info_t;
 /** @brief Special marker for IOVA address of parameter data of a task to differentiate
 *  if the parameter data specified in task should be used or if FW should create a supported
 *  parameter data instance. If the IOVA address of parameter data is lesser than this
 *  special marker, then use the parameter data specified in the task, else FW
 *  creates the parameter data.
 */
 #define PVA_COMPLEX_IOVA (0xDA7AULL << 48ULL)
 /** @brief Macro used to create new parameter base markers
 *  from the special marker address @ref PVA_COMPLEX_IOVA
 */
 #define PVA_COMPLEX_IOVA_V(v) (PVA_COMPLEX_IOVA | ((uint64_t)(v) << 32ULL))
 /** @brief Special Marker for @ref pva_vpu_instance_data_t */
 #define PVA_SYS_INSTANCE_DATA_V1_IOVA (PVA_COMPLEX_IOVA_V(1) | 0x00000001ULL)
 /**
 * @brief The minimuim size of the VPU parameter for it to be considered
 * as a large parameter
 */
 #define PVA_DMA_VMEM_COPY_THRESHOLD (uint32_t)(256U)
 /**
 * @brief The maximum combined size of all VMEM parameters
 * that will be supported by PVA
 */
 #define VMEM_PARAMETER_BUFFER_MAX_SIZE (uint32_t)(8192U)
 /**
 * @brief The maximum number of symbols that will be supported
 * for one task
 */
 #define TASK_VMEM_PARAMETER_MAX_SYMBOLS (uint32_t)(128U)
 /**
 * @brief Information of the VPU instance data passed to VPU kernel.
 */
 typedef struct PVA_PACKED {
 	/** @brief ID of the VPU assigned to the task */
 	uint16_t pve_id;
 	/** @brief Variable to indicate that ppe task was launched or not */
 	uint16_t ppe_task_launched;
 	/** @brief Base of the VMEM memory */
 	uint32_t vmem_base;
 	/** @brief Base of the DMA descriptor SRAM memory */
 	uint32_t dma_descriptor_base;
 	/** @brief Base of L2SRAM allocated for the task executed */
 	uint32_t l2ram_base;
 	/** @brief Size of L2SRAM allocated for the task executed */
 	uint32_t l2ram_size;
 } pva_vpu_instance_data_t;
 #endif /* PVA_SYS_PARAMS_H */
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-types.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-types.h
@@ -0,0 +1,44 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA Corporation is strictly prohibited.
 */
 /*
 * Unit: Utility Unit
 * SWUD Document:
 * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
 */
 #ifndef PVA_TYPES_H
 #define PVA_TYPES_H
 #include <stdint.h>
 /**
 * @brief Used to represent address (IOVA) in PVA system.
 */
 typedef uint64_t pva_iova;
 /**
 * @brief Used to store Queue IDs, that represent the
 *        actual hardware queue id between FW and KMD.
 */
 typedef uint8_t pva_queue_id_t;
 /**
 * @brief Used to store PVE ID, that represents which
 *        PVE is being referred to .
 */
 typedef uint8_t pva_pve_id_t;
 /**
 * @brief Used to store Status interface ID, that is used
 *        to know through which status needs to be written.
 */
 typedef uint8_t pva_status_interface_id_t;
 #endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-version.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-version.h
@@ -0,0 +1,69 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2016-2021 NVIDIA CORPORATION.  All rights reserved.
 *
 * NVIDIA CORPORATION and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA CORPORATION is strictly prohibited.
 */
 /*
 * Unit: Host Interface Unit
 * SWUD Document:
 * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
 */
 #ifndef PVA_VERSION_H
 #define PVA_VERSION_H
 #include <stdint.h>
 #include <pva-bit.h>
 #include <pva-fw-version.h>
 /**
 * @brief Calculate a 32-bit build version with @ref PVA_VERSION_SUBMINOR,
 * @ref PVA_VERSION_MINOR, @ref PVA_VERSION_MAJOR and @ref VERSION_TYPE macros.
 *
 * @param [in] \_type\_ an 8-bit bitfield containing flags indicating which compilation
 * features were enabled when the firmware was compiled.
 *
 * @param [in] \_major\_ an unsigned, 8-bit value containing the major version of the
 * compiled firmware.
 *
 * @param [in] \_minor\_ an unsigned, 8-bit value containing the minor version of the
 * compiled firmware.
 *
 * @param [in] \_subminor\_ an unsigned, 8-bit value containing the sub-minor version
 * of the compiled firmware.
 @verbatim
 | ------------- | ---------------------|
 |  Bit Ranges   |      Function        |
 | ------------- | ---------------------|
 |    7-0        |  subminor version	|
 |   15-8        |  minor version	|
 |   23-16       |  major version  	|
 |   31-24     	 |  version type 	|
 ----------------------------------------
 @endverbatim
 */
 #define PVA_MAKE_VERSION(_type_, _major_, _minor_, _subminor_)                 \
 	(PVA_INSERT(_type_, 31, 24) | PVA_INSERT(_major_, 23, 16) |            \
 	 PVA_INSERT(_minor_, 15, 8) | PVA_INSERT(_subminor_, 7, 0))
 /**
 * @brief Calculate PVA R5 FW binary version by calling @ref PVA_MAKE_VERSION macro.
 *
 * @param [in] \_type\_ an 8-bit bitfield containing flags indicating which compilation
 * features were enabled when the firmware was compiled.
 *
 * @see VERSION_TYPE For details on how to construct the @p \_type\_ field.
 *
 * @see PVA_VERSION_MAJOR, PVA_VERSION_MINOR, PVA_VERSION_SUBMINOR for details
 * on the values used at the time this documentation was produced.
 */
 #define PVA_VERSION(_type_)                                                    \
 	PVA_MAKE_VERSION(_type_, PVA_VERSION_MAJOR, PVA_VERSION_MINOR,         \
 			 PVA_VERSION_SUBMINOR)
 #endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-vpu-syscall-interface.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-vpu-syscall-interface.h
@@ -0,0 +1,309 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved.
 *
 * NVIDIA CORPORATION and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA CORPORATION is strictly prohibited.
 */
 /*
 * Unit: VPU Unit
 * SWUD Document:
 * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
 */
 /**
 * @file pva-vpu-syscall-interface.h
 *
 * @brief Syscall command specification
 *
 * VPU uses syscall commands to request services from R5. A syscall command is a
 * 32bit value that consists of a 8 bit syscall ID and 24 bit parameter. If more
 * information needs to be passed to R5, the parameter field will be a pointer
 * to a VMEM location.
 */
 #ifndef PVA_VPU_SYSCALL_INTERFACE_H
 #define PVA_VPU_SYSCALL_INTERFACE_H
 #include <stdint.h>
 /**
 * @defgroup PVA_VPU_SYSCALL
 *
 * @brief PVA VPU SYS call IDs for each type of
 * SYS call.
 * @{
 */
 //! @cond DISABLE_DOCUMENTATION
 /**
 * @brief VPU Syscall id for vpu printf write.
 */
 #define PVA_FW_PE_SYSCALL_ID_WRITE (1U)
 //! @endcond
 /**
 * @brief VPU Syscall id for Icache prefetch.
 */
 #define PVA_FW_PE_SYSCALL_ID_ICACHE_PREFETCH (2U)
 /**
 * @brief VPU Syscall id for masking exceptions.
 */
 #define PVA_FW_PE_SYSCALL_ID_MASK_EXCEPTION (3U)
 /**
 * @brief VPU Syscall id for unmasking exceptions.
 */
 #define PVA_FW_PE_SYSCALL_ID_UNMASK_EXCEPTION (4U)
 //! @cond DISABLE_DOCUMENTATION
 /**
 * @brief VPU Syscall id for sampling VPU performance counters
 */
 #define PVA_FW_PE_SYSCALL_ID_PERFMON_SAMPLE (5U)
 //! @endcond
 /** @} */
 /**
 * @defgroup PVA_VPU_SYSCALL_WRITE_PARAM_GROUP
 *
 * @brief Parameter specification for syscall write
 */
 /**
 * @defgroup PVA_VPU_SYSCALL_COMMAND_FIELDS_GROUP
 *
 * @brief The command format to be used while issuing vpu syscall command from VPU kernel to R5.
 * The fields mentioned in this group is used for submitting the command
 * through the Signal_R5 interface from VPU kernel.
 *
 * @{
 */
 /**
 * @brief The most significant bit of the vpu syscall ID field in
 * the vpu syscall command interface
 */
 #define PVA_FW_PE_SYSCALL_ID_MSB (31U)
 /**
 * @brief The least significant bit of the vpu syscall ID field in
 * the vpu syscall command interface
 */
 #define PVA_FW_PE_SYSCALL_ID_LSB (24U)
 /**
 * @brief The most significant bit of the vpu syscall parameter field in
 * the vpu syscall command interface
 */
 #define PVA_FW_PE_SYSCALL_PARAM_MSB (23U)
 /**
 * @brief The least significant bit of the vpu syscall parameter field in
 * the vpu syscall command interface
 */
 #define PVA_FW_PE_SYSCALL_PARAM_LSB (0U)
 /** @} */
 /**
 * @defgroup PVA_VPU_SYSCALL_ICACHE_PREFETCH_PARAM_FIELDS_GROUP
 *
 * @brief The parameter format to be used while issuing vpu syscall command from VPU kernel to R5 for syscall icache prefetch.
 * The fields mentioned in this group is used for submitting the icache prefetch command
 * through the Signal_R5 interface from VPU kernel.
 *
 * @{
 */
 /**
 * @brief The most significant bit of the prefetch cache line count field in
 * the vpu syscall command interface
 */
 #define PVA_FW_PE_SYSCALL_PREFETCH_CACHE_LINE_COUNT_MSB (23U)
 /**
 * @brief The least significant bit of the prefetch cache line count field in
 * the vpu syscall command interface
 */
 #define PVA_FW_PE_SYSCALL_PREFETCH_CACHE_LINE_COUNT_LSB (16U)
 /**
 * @brief The most significant bit of the prefetch address field in
 * the vpu syscall command interface
 */
 #define PVA_FW_PE_SYSCALL_PREFETCH_ADDR_MSB (15U)
 /**
 * @brief The least significant bit of the prefetch address field in
 * the vpu syscall command interface
 */
 #define PVA_FW_PE_SYSCALL_PREFETCH_ADDR_LSB (0U)
 /** @} */
 /**
 * @defgroup PVA_VPU_SYSCALL_MASK_UNMASK_PARAM_FIELDS_GROUP
 *
 * @brief The parameter format to be used while issuing vpu syscall command from VPU kernel
 * to R5 for masking or unmasking FP NaN Exception.
 * The fields mentioned in this group is used for submitting the mask and unmask FP NaN eception command
 * through the Signal_R5 interface from VPU kernel.
 *
 * @{
 */
 /**
 * @brief Parameter specification for syscall mask/unmask exceptions
 */
 #define PVA_FW_PE_MASK_FP_INV_NAN (1U << 2U)
 /** @} */
 /**
 * @breif Write syscall parameter will be a pointer to this struct
 * @{
 */
 typedef union {
 	struct {
 		uint32_t addr;
 		uint32_t size;
 	} in;
 	struct {
 		uint32_t written_size;
 	} out;
 } pva_fw_pe_syscall_write;
 /** @} */
 /**
 * @defgroup PVA_VPU_SYSCALL_PERFMON_SAMPLE_PARAM_GROUP
 *
 * @brief Parameter specification for syscall perfmon_sample
 *
 * @{
 */
 /**
 * @brief Perfmon sample syscall parameter will be a pointer to this struct
 */
 typedef struct {
 	/** counter_mask[0] is for ID: 0-31; counter_mask[1] is for ID: 32-63 */
 	uint32_t counter_mask[2];
 	uint32_t output_addr;
 } pva_fw_pe_syscall_perfmon_sample;
 /**
 * @brief Index for t26x performance counters for VPU
 */
 #define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T26X (0U)
 #define PERFMON_COUNTER_ID_VPS_ID_VALID_T26X (1U)
 #define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T26X (2U)
 #define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T26X (3U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T26X (4U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T26X (5U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T26X (6U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T26X (7U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T26X (8U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T26X (9U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T26X (10U)
 #define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T26X (11U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T26X (12U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T26X (13U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T26X (14U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T26X (15U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T26X (16U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T26X (17U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T26X (18U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T26X (19U)
 #define PERFMON_COUNTER_ID_VPS_ICACHE_FETCH_REQ_T26X (20U)
 #define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_T26X (21U)
 #define PERFMON_COUNTER_ID_VPS_ICACHE_PREEMPT_T26X (22U)
 #define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_LINES_T26X (23U)
 #define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_DUR_T26X (24U)
 #define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_DUR_T26X (25U)
 #define PERFMON_COUNTER_ID_DLUT_BUSY_T26X (26U)
 #define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T26X (27U)
 #define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T26X (28U)
 #define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T26X (29U)
 #define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T26X (30U)
 #define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T26X (31U)
 #define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T26X (32U)
 #define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T26X (33U)
 #define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T26X (34U)
 #define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T26X (35U)
 #define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T26X (36U)
 /**
 * @brief Index for t23x performance counters
 */
 #define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T23X (0U)
 #define PERFMON_COUNTER_ID_VPS_ID_VALID_T23X (1U)
 #define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T23X (2U)
 #define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T23X (3U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T23X (4U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T23X (5U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T23X (6U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T23X (7U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T23X (8U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T23X (9U)
 #define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T23X (10U)
 #define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T23X (11U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T23X (12U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T23X (13U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T23X (14U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T23X (15U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T23X (16U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T23X (17U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T23X (18U)
 #define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T23X (19U)
 #define PERFMON_COUNTER_ID_ICACHE_FETCH_REQ_T23X (20U)
 #define PERFMON_COUNTER_ID_ICACHE_MISS_T23X (21U)
 #define PERFMON_COUNTER_ID_ICACHE_PREEMP_T23X (22U)
 #define PERFMON_COUNTER_ID_ICACHE_PREFETCH_LINES_T23X (23U)
 #define PERFMON_COUNTER_ID_ICACHE_MISS_DUR_T23X (24U)
 #define PERFMON_COUNTER_ID_ICACHE_PREFETCH_DUR_T23X (25U)
 #define PERFMON_COUNTER_ID_DLUT_BUSY_T23X (26U)
 #define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T23X (27U)
 #define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T23X (28U)
 #define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T23X (29U)
 #define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T23X (30U)
 #define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T23X (31U)
 #define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T23X (32U)
 #define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T23X (33U)
 #define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T23X (34U)
 #define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T23X (35U)
 #define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T23X (36U)
 /**
 * @brief Index for t26x performance counters for PPE
 */
 #define PERFMON_COUNTER_ID_PPS_STALL_ID_NO_VAL_INSTR_T26X (0U)
 #define PERFMON_COUNTER_ID_PPS_ID_VALID_T26X (1U)
 #define PERFMON_COUNTER_ID_PPS_STALL_ID_REG_DEPEND_T26X (2U)
 #define PERFMON_COUNTER_ID_PPS_STALL_ID_ONLY_T26X (3U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX1_ONLY_T26X (4U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_LD_DEPENDENCY_T26X (5U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_ST_DEPENDENCY_T26X (6U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_DEPENDENCY_T26X (7U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_STRM_STORE_FLUSH_T26X (8U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_STORE_FLUSH_T26X (9U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_STORE_FLUSH_T26X (10U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_LD_T26X (11U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_ST_T26X (12U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_T26X (13U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LD_T26X (14U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_ST_T26X (15U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LDST_T26X (16U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_PUSHBACK_T26X (17U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_STQ_PUSHBACK_T26X (18U)
 #define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_FLUSH_T26X (19U)
 #define PERFMON_COUNTER_ID_PPS_WFE_GPI_EX_STATE_T26X (20U)
 #define PERFMON_COUNTER_ID_PPS_ICACHE_FETCH_REQ_T26X (21U)
 #define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_T26X (22U)
 #define PERFMON_COUNTER_ID_PPS_ICACHE_PREEMPT_T26X (23U)
 #define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_LINES_T26X (24U)
 #define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_DUR_T26X (25U)
 #define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_DUR_T26X (26U)
 /** @} */
 #endif /*PVA_VPU_SYSCALL_INTERFACE_H*/
--- a/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h
+++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h
@@ -0,0 +1,295 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_FW_H
 #define PVA_FW_H
 #include "pva_api.h"
 #include "pva_bit.h"
 #include "pva_constants.h"
 #include "pva_fw_address_map.h"
 #include "pva_math_utils.h"
 /* The sizes of these structs must be explicitly padded to align to 4 bytes */
 struct pva_fw_prefence {
 	uint8_t offset_hi;
 	uint8_t pad0[3];
 	uint32_t offset_lo;
 	uint32_t resource_id;
 	uint32_t value;
 };
 struct pva_fw_postfence {
 	uint8_t offset_hi;
 	uint8_t ts_offset_hi;
 /** Privileged user queue may need to trigger fence that exists in user's own
 * resource table. Set this flags to tell FW to use user's resource table when
 * writing this post fence. This also applies to timestamp resource ID. */
 #define PVA_FW_POSTFENCE_FLAGS_USER_FENCE (1 << 0)
 	uint8_t flags;
 	uint8_t pad0;
 	uint32_t offset_lo;
 	uint32_t resource_id;
 	uint32_t value;
 	/* Timestamp part */
 	uint32_t ts_resource_id;
 	uint32_t ts_offset_lo;
 };
 struct pva_fw_memory_addr {
 	uint8_t offset_hi;
 	uint8_t pad0[3];
 	uint32_t resource_id;
 	uint32_t offset_lo;
 };
 struct pva_fw_cmdbuf_submit_info {
 	uint8_t num_prefence;
 	uint8_t num_postfence;
 	uint8_t num_input_status;
 	uint8_t num_output_status;
 #define PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_MSB (1)
 #define PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_LSB (0)
 	uint8_t flags;
 	uint8_t first_chunk_offset_hi;
 	/** First chunk size*/
 	uint16_t first_chunk_size;
 	struct pva_fw_prefence prefences[PVA_MAX_NUM_PREFENCES];
 	struct pva_fw_memory_addr input_statuses[PVA_MAX_NUM_INPUT_STATUS];
 	/** Resource ID of the first chunk */
 	uint32_t first_chunk_resource_id;
 	/** First chunk offset within the resource*/
 	uint32_t first_chunk_offset_lo;
 	/** Execution Timeout */
 	uint32_t execution_timeout_ms;
 	struct pva_fw_memory_addr output_statuses[PVA_MAX_NUM_OUTPUT_STATUS];
 	struct pva_fw_postfence postfences[PVA_MAX_NUM_POSTFENCES];
 };
 /* This is the header of the circular buffer */
 struct pva_fw_submit_queue_header {
 	/**
 	 * Head index of the circular buffer. Updated by R5, read by CCPLEX
 	 * (UMD/KMD).
 	 */
 	volatile uint32_t cb_head;
 	/**
 	  * Tail index of the circular buffer. Updated by CCPLEX.
 	  *
 	  * CCPLEX informs R5 the tail index through CCQ. In case KMD needs to
 	  * flush the queue. KMD may need to read the tail from here.
 	  */
 	volatile uint32_t cb_tail;
 	/* Immediately followed by an array of struct pva_cmdbuf_submit_info */
 };
 static inline uint32_t pva_fw_queue_count(uint32_t head, uint32_t tail,
 					  uint32_t size)
 {
 	if (tail >= head) {
 		return safe_subu32(tail, head);
 	} else {
 		return safe_addu32(safe_subu32(size, head), tail);
 	}
 }
 static inline uint32_t pva_fw_queue_space(uint32_t head, uint32_t tail,
 					  uint32_t size)
 {
 	return safe_subu32(
 		safe_subu32(size, pva_fw_queue_count(head, tail, size)), 1u);
 }
 /* CCQ commands: KMD -> R5, through CCQ FIFO */
 /*
 * Most CCQ commands are meant to be used at init time.
 * During runtime, only use PVA_FW_CCQ_OP_UPDATE_TAIL
 */
 #define PVA_FW_CCQ_OPCODE_MSB 63
 #define PVA_FW_CCQ_OPCODE_LSB 60
 /*
 * tail value bit field: 31 - 0
 * queue id bit field: 40 - 32
 */
 #define PVA_FW_CCQ_OP_UPDATE_TAIL 0
 #define PVA_FW_CCQ_TAIL_MSB 31
 #define PVA_FW_CCQ_TAIL_LSB 0
 #define PVA_FW_CCQ_QUEUE_ID_MSB 40
 #define PVA_FW_CCQ_QUEUE_ID_LSB 32
 /*
 * resource table IOVA addr bit field: 39 - 0
 * resource table number of entries bit field: 59 - 40
 */
 #define PVA_FW_CCQ_OP_SET_RESOURCE_TABLE 1
 #define PVA_FW_CCQ_RESOURCE_TABLE_ADDR_MSB 39
 #define PVA_FW_CCQ_RESOURCE_TABLE_ADDR_LSB 0
 #define PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_MSB 59
 #define PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_LSB 40
 /*
 * submission queue IOVA addr bit field: 39 - 0
 * submission queue number of entries bit field: 59 - 40
 */
 #define PVA_FW_CCQ_OP_SET_SUBMISSION_QUEUE 2
 #define PVA_FW_CCQ_QUEUE_ADDR_MSB 39
 #define PVA_FW_CCQ_QUEUE_ADDR_LSB 0
 #define PVA_FW_CCQ_QUEUE_N_ENTRIES_MSB 59
 #define PVA_FW_CCQ_QUEUE_N_ENTRIES_LSB 40
 /* KMD and FW communicate using messages.
 *
 * Message can contain up to 6 uint32_t.
 *
 * The first uint32_t is the header that contains message type and length.
 */
 #define PVA_FW_MSG_MAX_LEN 6
 /* KMD send messages to R5 using CCQ FIFO. The message length is always 64 bit. */
 /* When R5 send messages to KMD using CCQ statuses, we use status 3 - 8
 *
 * msg[0] = STATUS8 -> generate interrupt to KMD
 * msg[1] = STATUS3
 * msg[2] = STATUS4
 * msg[3] = STATUS5
 * msg[4] = STATUS6
 * msg[5] = STATUS7
 */
 #define PVA_FW_MSG_STATUS_BASE 3
 #define PVA_FW_MSG_STATUS_LAST 8
 #define PVA_FW_MSG_TYPE_MSB 30
 #define PVA_FW_MSG_TYPE_LSB 25
 #define PVA_FW_MSG_LEN_MSB 24
 #define PVA_FW_MSG_LEN_LSB 22
 /* The remaining bits (0 - 21) of msg[0] can be used for message specific
 * payload */
 /* Message types: R5 -> CCPLEX */
 #define PVA_FW_MSG_TYPE_ABORT 1
 #define PVA_FW_MSG_TYPE_BOOT_DONE 2
 #define PVA_FW_MSG_TYPE_FLUSH_PRINT 3
 #define PVA_FW_MSG_TYPE_RESOURCE_UNREGISTER 3
 /* Message types: CCPLEX -> R5 */
 #define PVA_FW_MSG_TYPE_UPDATE_TAIL 32
 /* Parameters for message ABORT
 * ABORT message contains a short string (up to 22 chars).
 * The first two charactors are in the message header (bit 15 - 0).
 */
 #define PVA_FW_MSG_ABORT_STR_MAX_LEN 22
 /* Parameters for message BOOT_DONE */
 #define PVA_FW_MSG_R5_START_TIME_LO_IDX 1
 #define PVA_FW_MSG_R5_START_TIME_HI_IDX 2
 #define PVA_FW_MSG_R5_READY_TIME_LO_IDX 3
 #define PVA_FW_MSG_R5_READY_TIME_HI_IDX 4
 /* Parameters for message FLUSH PRINT */
 struct pva_fw_print_buffer_header {
 #define PVA_FW_PRINT_BUFFER_OVERFLOWED (1 << 0)
 #define PVA_FW_PRINT_FAILURE (1 << 1)
 	uint32_t flags;
 	uint32_t tail;
 	/* Followed by print content */
 };
 /* Parameters for message resource unregister */
 /* Table ID is stored in msg[0], bit: 0 - 7 */
 #define PVA_FW_MSG_RESOURCE_TABLE_ID_MSB 7
 #define PVA_FW_MSG_RESOURCE_TABLE_ID_LSB 0
 /* Followed by up to 5 resource IDs. The actual number of resource ID is
 * indicated by the message length. */
 /** @brief Circular buffer based data channel to share data between R5 and CCPLEX */
 struct pva_data_channel {
 	uint32_t size;
 #define PVA_DATA_CHANNEL_OVERFLOW (1U << 0U)
 	uint32_t flags;
 	uint32_t head;
 	/**
 	 * Offset location in the circular buffer where from VPU printf data will be written by FW
 	 */
 	uint32_t tail;
 	/* Immediately followed by circular buffer data */
 };
 /* PVA FW Event profiling definitions */
 // Event identifiers
 #define PVA_FW_EVENT_DO_CMD PVA_BIT8(1)
 #define PVA_FW_EVENT_SCAN_QUEUES PVA_BIT8(2)
 #define PVA_FW_EVENT_SCAN_SLOTS PVA_BIT8(3)
 #define PVA_FW_EVENT_RUN_VPU PVA_BIT8(4)
 // Event message format
 struct pva_fw_event_message {
 	uint32_t event : 5;
 	uint32_t type : 3;
 	uint32_t arg1 : 8;
 	uint32_t arg2 : 8;
 	uint32_t arg3 : 8;
 };
 // Each event is one of the following types. This should fit within 3 bits
 enum pva_fw_events_type {
 	EVENT_TRY = 0U,
 	EVENT_START,
 	EVENT_YIELD,
 	EVENT_DONE,
 	EVENT_ERROR,
 	EVENT_TYPE_MAX = 7U
 };
 static inline const char *event_type_to_string(enum pva_fw_events_type status)
 {
 	switch (status) {
 	case EVENT_TRY:
 		return "TRY";
 	case EVENT_START:
 		return "START";
 	case EVENT_YIELD:
 		return "YIELD";
 	case EVENT_DONE:
 		return "DONE";
 	case EVENT_ERROR:
 		return "ERROR";
 	default:
 		return "";
 	}
 }
 enum pva_fw_timestamp_t {
 	TIMESTAMP_TYPE_TSE = 0,
 	TIMESTAMP_TYPE_CYCLE_COUNT = 1
 };
 struct pva_fw_profiling_buffer_header {
 #define PVA_FW_PROFILING_BUFFER_OVERFLOWED (1 << 0)
 #define PVA_FW_PROFILING_FAILURE (1 << 1)
 	uint32_t flags;
 	uint32_t tail;
 	/* Followed by print content */
 };
 /* End of PVA FW Event profiling definitions */
 struct pva_kmd_fw_tegrastats {
 	uint64_t window_start_time;
 	uint64_t window_end_time;
 	uint64_t total_utilization[PVA_NUM_PVE];
 };
 #endif // PVA_FW_H
--- a/drivers/video/tegra/host/pva/src/fw/include/pva_fw_address_map.h
+++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw_address_map.h
@@ -0,0 +1,178 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA Corporation is strictly prohibited.
 */
 /*
 * Unit: Boot Unit
 * SWUD Document:
 * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
 */
 #ifndef PVA_FW_ADDRESS_MAP_H
 #define PVA_FW_ADDRESS_MAP_H
 /**
 * @brief Starting R5 address where FW code and data is placed.
 * This address is expected to be programmed in PVA_CFG_AR1PRIV_START by KMD.
 * This address is also expected to be used as offset where PVA_CFG_R5PRIV_LSEGREG1
 * and PVA_CFG_R5PRIV_USEGREG1 registers would point.
 */
 #define FW_CODE_DATA_START_ADDR 0x60000000
 /**
 * @brief R5 address where FW code and data is expected to end.
 * This address is expected to be programmed in PVA_CFG_AR1PRIV_END by KMD.
 */
 #if PVA_DEV_MAIN_COMPATIBLE == 1
 #define FW_CODE_DATA_END_ADDR 0x60220000
 #else
 #define FW_CODE_DATA_END_ADDR 0x62000000
 #endif
 /**
 * @defgroup PVA_EXCEPTION_VECTORS
 *
 * @brief Following macros define R5 addresses that are expected to be
 * programmed by KMD in EVP registers as is.
 * @{
 */
 /**
 * @brief R5 address of reset exception vector
 */
 #define EVP_RESET_VECTOR 0x60040C00
 /**
 * @brief R5 address of undefined instruction exception vector
 */
 #define EVP_UNDEFINED_INSTRUCTION_VECTOR (EVP_RESET_VECTOR + 0x400 * 1)
 /**
 * @brief R5 address of svc exception vector
 */
 #define EVP_SVC_VECTOR (EVP_RESET_VECTOR + 0x400 * 2)
 /**
 * @brief R5 address of prefetch abort exception vector
 */
 #define EVP_PREFETCH_ABORT_VECTOR (EVP_RESET_VECTOR + 0x400 * 3)
 /**
 * @brief R5 address of data abort exception vector
 */
 #define EVP_DATA_ABORT_VECTOR (EVP_RESET_VECTOR + 0x400 * 4)
 /**
 * @brief R5 address of reserved exception vector.
 * It points to a dummy handler.
 */
 #define EVP_RESERVED_VECTOR (EVP_RESET_VECTOR + 0x400 * 5)
 /**
 * @brief R5 address of IRQ exception vector
 */
 #define EVP_IRQ_VECTOR (EVP_RESET_VECTOR + 0x400 * 6)
 /**
 * @brief R5 address of FIQ exception vector
 */
 #define EVP_FIQ_VECTOR (EVP_RESET_VECTOR + 0x400 * 7)
 /** @} */
 /**
 * @defgroup PVA_DEBUG_BUFFERS
 *
 * @brief These buffers are arranged in the following order:
 * TRACE_BUFFER followed by CODE_COVERAGE_BUFFER followed by DEBUG_LOG_BUFFER.
 * @{
 */
 /**
 * @brief Maximum size of trace buffer in bytes.
 */
 #define FW_TRACE_BUFFER_SIZE 0x40000
 /**
 * @brief Maximum size of code coverage buffer in bytes.
 */
 #define FW_CODE_COVERAGE_BUFFER_SIZE 0x80000
 /**
 * @brief Maximum size of debug log buffer in bytes.
 */
 #if PVA_DEV_MAIN_COMPATIBLE == 1
 #define FW_DEBUG_LOG_BUFFER_SIZE 0x40000
 #else
 #define FW_DEBUG_LOG_BUFFER_SIZE 0x400000
 #endif
 /** @} */
 /**
 * @brief Total size of buffers used for FW debug in bytes.
 * TBD: Update this address based on build configuration once KMD changes are merged.
 */
 #define FW_DEBUG_DATA_TOTAL_SIZE                                               \
 	(FW_TRACE_BUFFER_SIZE + FW_DEBUG_LOG_BUFFER_SIZE +                     \
 	 FW_CODE_COVERAGE_BUFFER_SIZE)
 /**
 * @brief Starting R5 address where FW debug related data is placed.
 * This address is expected to be programmed in PVA_CFG_AR2PRIV_START by KMD.
 * This address is also expected to be used as offset where PVA_CFG_R5PRIV_LSEGREG2
 * and PVA_CFG_R5PRIV_USEGREG2 registers would point.
 */
 #define FW_DEBUG_DATA_START_ADDR (0x70000000) //1879048192 0x70000000
 /**
 * @brief R5 address where FW debug related data is expected to end.
 * This address is expected to be programmed in PVA_CFG_AR2PRIV_END by KMD.
 */
 #define FW_DEBUG_DATA_END_ADDR                                                 \
 	(FW_DEBUG_DATA_START_ADDR + FW_DEBUG_DATA_TOTAL_SIZE)
 /**
 * @brief Starting R5 address where FW expects shared buffers between KMD and FW to be placed.
 * This is to be used as offset when programming PVA_CFG_R5USER_LSEGREG and PVA_CFG_R5USER_USEGREG.
 */
 #define FW_SHARED_MEMORY_START (0x80000000U) //2147483648 0x80000000
 /**
 * @defgroup PVA_HYP_SCR_VALUES
 *
 * @brief Following macros specify SCR firewall values that are expected to be
 * programmed by Hypervisor.
 * @{
 */
 /**
 * @brief EVP SCR firewall to enable only CCPLEX read/write access.
 */
 #define PVA_EVP_SCR_VAL 0x19000202
 /**
 * @brief PRIV SCR firewall to enable only CCPLEX and R5 read/write access.
 */
 #define PVA_PRIV_SCR_VAL 0x1F008282
 /**
 * @brief CCQ SCR firewall to enable only CCPLEX write access and R5 read access.
 */
 #define PVA_CCQ_SCR_VAL 0x19000280
 /**
 * @brief Status Ctl SCR firewall to enable only CCPLEX read access and R5 read/write access.
 */
 #define PVA_STATUS_CTL_SCR_VAL 0x1f008082
 /** @} */
 /**
 * @defgroup PVA_KMD_SCR_VALUES
 *
 * @brief Following macros specify SCR firewall values that are expected to be
 * programmed by KMD.
 * @{
 */
 /**
 * @brief SECEXT_INTR SCR firewall to enable only CCPLEX and R5 read/write access.
 */
 #define PVA_SEC_SCR_SECEXT_INTR_EVENT_VAL 0x39008282U
 /**
 * @brief PROC SCR firewall to enable only CCPLEX read/write access and R5 read only access.
 */
 #define PVA_PROC_SCR_PROC_VAL 0x39000282U
 /** @} */
 #endif
--- a/drivers/video/tegra/host/pva/src/fw/include/pva_fw_hyp.h
+++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw_hyp.h
@@ -0,0 +1,120 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_FW_HYP_H
 #define PVA_FW_HYP_H
 /**
 * @defgroup PVA_BOOT_TIME_MBOX
 *
 * @brief This group defines the mailboxes used by KMD to pass start iovas required for
 * user segment and priv2 segment configuration during boot.
 * @{
 */
 /**
 * @brief Used to pass bits 31-0 of start iova of user segment.
 */
 #define PVA_MBOXID_USERSEG_L (1U)
 /**
 * @brief Used to pass bits 39-32 of start iova of user segment.
 */
 #define PVA_MBOXID_USERSEG_H (2U)
 /**
 * @brief Used to pass bits 31-0 of start iova of priv2 segment.
 */
 #define PVA_MBOXID_PRIV2SEG_L (3U)
 /**
 * @brief Used to pass bits 39-32 of start iova of priv2 segment.
 */
 #define PVA_MBOXID_PRIV2SEG_H (4U)
 /** @} */
 /**
 * @defgroup PVA_SHARED_SEMAPHORE_STATUS_GROUP
 *
 * @brief The status bits for the shared semaphore which are mentioned in
 * the group are used to communicate various information between KMD and
 * PVA R5 FW. The highest 16 bits are used to send information from KMD to
 * R5 FW and the lower 16 bits are used to send information from R5 FW to KMD by
 * writing to the @ref PVA_BOOT_SEMA semaphore
 *
 * The bit-mapping of the semaphore is described below. The table below shows the mapping which
 * is sent by KMD to FW.
 *
 * | Bit Position |    Bit Field Name     |                                                 Description                                                                                               |
 * |:------------:|:---------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------:|
 * |     31       |  BOOT INT             |  To indicate that KMD is expecting an interrupt from R5 once boot is complete                                                                             |
 * |     30       |  Reserved             |  Reserved for future use                                                                                                                                  |
 * |    27-25     |  Reserved             |  Reserved for future use                                                                                                                                  |
 * |    23-21     |  Reserved             |  Reserved for future use                                                                                                                                  |
 * |     20       |  CG DISABLE           |  To indicate the PVA R5 FW should disable the clock gating feature                                                                                        |
 * |     19       |  VMEM RD WAR DISABLE  |  To disable the VMEM Read fail workaround feature                                                                                                         |
 * |    18-16     |  Reserved             |  Reserved for future use                                                                                                                                  |
 *
 * The table below shows the mapping which is sent by FW to KMD
 *
 * | Bit Position |    Bit Field Name     |                                      Description                                                            |
 * |:------------:|:---------------------:|:-----------------------------------------------------------------------------------------------------------:|
 * |   15-11      |    Reserved           |  Reserved for future use                                                                                    |
 * |   07-03      |    Reserved           |  Reserved for future use                                                                                    |
 * |     02       |    HALTED             |  To indicate to KMD that the PVA R5 FW has halted execution                                                 |
 * |     01       |   BOOT DONE           |  To indicate to KMD that the PVA R5 FW booting is complete                                                  |
 *
 * @{
 */
 //! @endcond
 /**
 * @brief This field is used to indicate that the R5 FW should
 * disable the clock gating feature
 */
 #define PVA_BOOT_SEMA_CG_DISABLE PVA_BIT(20U)
 //! @cond DISABLE_DOCUMENTATION
 /** Tell firmware that block linear surfaces are in XBAR_RAW format instead of
 * TEGRA_RAW format */
 #define PVA_BOOT_SEMA_USE_XBAR_RAW PVA_BIT(17U)
 #define PVA_BOOT_SEMA 0U
 /**
 * @brief This macro has the value to be set by KMD in the shared semaphores
 * @ref PVA_PREFENCE_SYNCPT_REGION_IOVA_SEM or @ref PVA_POSTFENCE_SYNCPT_REGION_IOVA_SEM
 * if the syncpoint reserved region must not be configured as uncached
 * in R5 MPU.
 */
 #define PVA_R5_SYNCPT_REGION_IOVA_OFFSET_NOT_SET (0xFFFFFFFFU)
 /** @} */
 /* Runtime mailbox messages between firmware and hypervisor */
 /* When hypervisor send messages to R5 through mailboxes, we use mailbox 0 - 1
 * msg[0] = mailbox 1 -> generate interrupt to R5
 * msg[1] = mailbox 0
 */
 #define PVA_FW_MBOX_TO_R5_BASE 0
 #define PVA_FW_MBOX_TO_R5_LAST 1
 /* When R5 send messages to hypervisor through mailboxes, we use mailbox 2 - 7
 * msg[0] = mailbox 7 -> generate interrupt to hypervisor
 * msg[1] = mailbox 2
 * msg[2] = mailbox 3
 * msg[3] = mailbox 4
 * msg[4] = mailbox 5
 * msg[5] = mailbox 6
 */
 #define PVA_FW_MBOX_TO_HYP_BASE 2
 #define PVA_FW_MBOX_TO_HYP_LAST 7
 #define PVA_FW_MBOX_FULL_BIT PVA_BIT(31)
 #endif // PVA_FW_HYP_H
--- a/drivers/video/tegra/host/pva/src/fw/include/pva_resource.h
+++ b/drivers/video/tegra/host/pva/src/fw/include/pva_resource.h
@@ -0,0 +1,340 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_RESOURCE_H
 #define PVA_RESOURCE_H
 #include "pva_api.h"
 #include "pva_api_dma.h"
 #include "pva_bit.h"
 #include "pva_constants.h"
 #include "pva_utils.h"
 #include "pva_math_utils.h"
 /* The sizes of these structs must be explicitly padded to align to 4 bytes */
 struct pva_fw_dma_descriptor {
 	uint8_t transfer_control0;
 	uint8_t link_did;
 	uint8_t src_adr1;
 	uint8_t dst_adr1;
 	uint32_t src_adr0;
 	uint32_t dst_adr0;
 	uint16_t tx;
 	uint16_t ty;
 	uint16_t slp_adv;
 	uint16_t dlp_adv;
 	uint32_t srcpt1_cntl;
 	uint32_t dstpt1_cntl;
 	uint32_t srcpt2_cntl;
 	uint32_t dstpt2_cntl;
 	uint32_t srcpt3_cntl;
 	uint32_t dstpt3_cntl;
 	uint16_t sb_start;
 	uint16_t db_start;
 	uint16_t sb_size;
 	uint16_t db_size;
 	uint16_t trig_ch_events;
 	uint16_t hw_sw_trig_events;
 	uint8_t px;
 	uint8_t py;
 	uint8_t transfer_control1;
 	uint8_t transfer_control2;
 	uint8_t cb_ext;
 	uint8_t rsvd;
 	uint16_t frda;
 };
 /** Each slot is mapped to <reloc_count> number of pva_fw_dma_reloc. When
 * bind_dram/vmem_slot command is executed, the slot_id will be an index into
 * the slot array. The slot contains starting index and count of reloc structs.
 * All descriptor fields identified by the reloc structs will be patched.
 */
 struct pva_fw_dma_slot {
 /** This slot can be bound to a DRAM buffer */
 #define PVA_FW_DMA_SLOT_FLAG_DRAM (1u << 0u)
 /** This slot can be bound to a L2SRAM buffer */
 #define PVA_FW_DMA_SLOT_FLAG_L2SRAM (1u << 1u)
 /** This slot can be bound to a VMEM DATA buffer */
 #define PVA_FW_DMA_SLOT_FLAG_VMEM_DATA (1u << 2u)
 /** This slot can be bound to a VMEM VPU config table buffer */
 #define PVA_FW_DMA_SLOT_FLAG_VMEM_VPUC_TABLE (1u << 3u)
 /** This slot has enabled circular buffer. Slot with this flags cannot be bound
 * to block linear surface. */
 #define PVA_FW_DMA_SLOT_FLAG_CB (1u << 4u)
 #define PVA_FW_DMA_SLOT_FLAG_BOUND (1u << 5u)
 	uint8_t flags;
 	uint8_t pad;
 	/** Bitmask of channels that use this slot */
 	uint16_t ch_use_mask;
 	/** The number of descriptor fields that share this slot. Each field
 	 * will have a pva_fw_dma_reloc struct
 	 */
 	uint16_t reloc_count;
 	/** Starting index in the pva_fw_dma_reloc array */
 	uint16_t reloc_start_idx;
 	int64_t start_addr;
 	int64_t end_addr;
 };
 static inline uint32_t get_slot_size(struct pva_fw_dma_slot const *slot)
 {
 	uint32_t size = UINT32_MAX;
 	int64_t tmp_size = 0;
 	if (slot->end_addr < slot->start_addr) {
 		return size;
 	}
 	tmp_size = slot->end_addr - slot->start_addr;
 	if (tmp_size > (int64_t)UINT32_MAX) {
 		return size;
 	}
 	size = (uint32_t)tmp_size;
 	return size;
 }
 /**
 * A relocate struct identifies an address field (src, dst or dst2) in
 * the descriptor. The identified address field contains an offset instead of
 * absolute address. The base address will be added to the offset during
 * binding.
 *
 * This struct only has 2 bytes, so an array of this struct must have an even
 * number of elements to satisfy alignment requirement.
 */
 struct pva_fw_dma_reloc {
 	uint8_t desc_index;
 /** This relocation is for source field */
 #define PVA_FW_DMA_RELOC_FIELD_SRC 1u
 /** This relocation is for destination field */
 #define PVA_FW_DMA_RELOC_FIELD_DST 2u
 /** This relocation is for destination 2 field */
 #define PVA_FW_DMA_RELOC_FIELD_DST2 3u
 	uint8_t field;
 };
 struct pva_fw_dma_channel {
 	uint32_t cntl0;
 	uint32_t cntl1;
 	uint32_t boundary_pad;
 	uint32_t hwseqcntl;
 	uint32_t hwseqfscntl;
 };
 struct pva_fw_data_section_info {
 	uint32_t data_buf_off; /*< offset in data section data byte array */
 	uint32_t vmem_addr;
 	uint32_t size;
 };
 struct pva_dma_resource_map {
 // TODO: These macros should be derived using the maximum limits across platforms
 //	 Today, they are being hardcoded. Make it automatic
 #define PVA_DMA_NUM_CHANNEL_PARTITIONS                                         \
 	((PVA_MAX_NUM_DMA_CHANNELS) / (PVA_DMA_CHANNEL_ALIGNMENT))
 #define PVA_DMA_NUM_DESCRIPTOR_PARTITIONS                                      \
 	((PVA_MAX_NUM_DMA_DESC) / (PVA_DMA_DESCRIPTOR_ALIGNMENT))
 #define PVA_DMA_NUM_ADB_PARTITIONS                                             \
 	((PVA_MAX_NUM_ADB_BUFFS) / (PVA_DMA_ADB_ALIGNMENT))
 #define PVA_DMA_NUM_HWSEQ_WORD_PARTITIONS                                      \
 	((PVA_MAX_NUM_HWSEQ_WORDS) / (PVA_DMA_HWSEQ_WORD_ALIGNMENT))
 	uint64_t channels : PVA_DMA_NUM_CHANNEL_PARTITIONS;
 	uint64_t descriptors : PVA_DMA_NUM_DESCRIPTOR_PARTITIONS;
 	uint64_t adbs : PVA_DMA_NUM_ADB_PARTITIONS;
 	uint64_t hwseq_words : PVA_DMA_NUM_HWSEQ_WORD_PARTITIONS;
 	uint64_t triggers : 1;
 };
 static inline void
 pva_dma_resource_map_reset(struct pva_dma_resource_map *resource_map)
 {
 	resource_map->channels = 0u;
 	resource_map->descriptors = 0u;
 	resource_map->adbs = 0u;
 	resource_map->hwseq_words = 0u;
 	resource_map->triggers = 0u;
 }
 // Note: the following pva_dma_resource_map_* APIs assume an alignment requirement
 //	 on the 'start' index. We do not enforce it here though. If this requirement
 //	 is not met, the FW may falsely predicted resource conflicts between commands.
 //	 However, this will not impact functionality or correctness.
 static inline void
 pva_dma_resource_map_add_channels(struct pva_dma_resource_map *map,
 				  uint16_t start, uint16_t count)
 {
 	map->channels |= pva_mask64(start, count, PVA_DMA_CHANNEL_ALIGNMENT);
 }
 static inline void
 pva_dma_resource_map_add_descriptors(struct pva_dma_resource_map *map,
 				     uint16_t start, uint16_t count)
 {
 	map->descriptors |=
 		pva_mask64(start, count, PVA_DMA_DESCRIPTOR_ALIGNMENT);
 }
 static inline void
 pva_dma_resource_map_add_adbs(struct pva_dma_resource_map *map, uint16_t start,
 			      uint16_t count)
 {
 	map->adbs |= pva_mask64(start, count, PVA_DMA_ADB_ALIGNMENT);
 }
 static inline void
 pva_dma_resource_map_add_hwseq_words(struct pva_dma_resource_map *map,
 				     uint16_t start, uint16_t count)
 {
 	map->hwseq_words |=
 		pva_mask64(start, count, PVA_DMA_HWSEQ_WORD_ALIGNMENT);
 }
 static inline void
 pva_dma_resource_map_add_triggers(struct pva_dma_resource_map *map)
 {
 	// If an application is running on VPU, it has access to all the triggers
 	// Only FW and DMA-only workloads can initiate transfers in parallel to
 	// a running VPU application, but they do not require triggers.
 	map->triggers |= 1;
 }
 static inline void
 pva_dma_resource_map_copy_channels(struct pva_dma_resource_map *dst_map,
 				   struct pva_dma_resource_map *src_map)
 {
 	dst_map->channels |= src_map->channels;
 }
 static inline void
 pva_dma_resource_map_copy_descriptors(struct pva_dma_resource_map *dst_map,
 				      struct pva_dma_resource_map *src_map)
 {
 	dst_map->descriptors |= src_map->descriptors;
 }
 static inline void
 pva_dma_resource_map_copy_adbs(struct pva_dma_resource_map *dst_map,
 			       struct pva_dma_resource_map *src_map)
 {
 	dst_map->adbs |= src_map->adbs;
 }
 static inline void
 pva_dma_resource_map_copy_triggers(struct pva_dma_resource_map *dst_map,
 				   struct pva_dma_resource_map *src_map)
 {
 	dst_map->triggers |= src_map->triggers;
 }
 static inline void
 pva_dma_resource_map_copy_hwseq_words(struct pva_dma_resource_map *dst_map,
 				      struct pva_dma_resource_map *src_map)
 {
 	dst_map->hwseq_words |= src_map->hwseq_words;
 }
 struct pva_dma_config_resource {
 	uint8_t base_descriptor;
 	uint8_t base_channel;
 	uint8_t num_descriptors;
 	uint8_t num_channels;
 	uint16_t num_dynamic_slots;
 	/** Must be an even number to satisfy padding requirement. */
 	uint16_t num_relocs;
 	/** Indices of channels. Once the corresponding bit is set, the block height of
 	 * this channel should not be changed. */
 	uint16_t ch_block_height_fixed_mask;
 	uint16_t base_hwseq_word;
 	uint16_t num_hwseq_words;
 	uint16_t pad;
 	uint32_t vpu_exec_resource_id;
 	uint32_t common_config;
 	uint32_t output_enable[PVA_NUM_DMA_TRIGGERS];
 	struct pva_dma_resource_map dma_resource_map;
 	/* Followed by <num_dynamic_slots> of pva_fw_dma_slot */
 	/* Followed by <num_reloc_infos> of pva_fw_dma_reloc */
 	/* Followed by an array of pva_fw_dma_channel */
 	/* Followed by an array of pva_fw_dma_descriptor */
 	/* =====================================================================
 	 * The following fields do not need to be fetched into TCM. The DMA config
 	 * resource size (as noted in the resource table) does not include these
 	 * fields */
 	/* Followed by an array of hwseq words */
 };
 struct pva_fw_vmem_buffer {
 #define PVA_FW_SYM_TYPE_MSB 31
 #define PVA_FW_SYM_TYPE_LSB 29
 #define PVA_FW_VMEM_ADDR_MSB 28
 #define PVA_FW_VMEM_ADDR_LSB 0
 	uint32_t addr;
 	uint32_t size;
 };
 struct pva_exec_bin_resource {
 	uint8_t code_addr_hi;
 	uint8_t data_section_addr_hi;
 	uint8_t num_data_sections;
 	uint8_t pad;
 	uint32_t code_addr_lo;
 	uint32_t data_section_addr_lo;
 	uint32_t code_size;
 	uint32_t num_vmem_buffers;
 	/* Followed by <num_data_sections> number of pva_fw_data_section_info  */
 	/* Followed by <num_vmem_buffers> number of pva_fw_vmem_buffer */
 };
 static inline struct pva_fw_dma_slot *
 pva_dma_config_get_slots(struct pva_dma_config_resource *dma_config)
 {
 	return (struct pva_fw_dma_slot
 			*)((uint8_t *)dma_config +
 			   sizeof(struct pva_dma_config_resource));
 }
 static inline struct pva_fw_dma_reloc *
 pva_dma_config_get_relocs(struct pva_dma_config_resource *dma_config)
 {
 	return (struct pva_fw_dma_reloc
 			*)((uint8_t *)pva_dma_config_get_slots(dma_config) +
 			   sizeof(struct pva_fw_dma_slot) *
 				   dma_config->num_dynamic_slots);
 }
 static inline struct pva_fw_dma_channel *
 pva_dma_config_get_channels(struct pva_dma_config_resource *dma_config)
 {
 	return (struct pva_fw_dma_channel *)((uint8_t *)
 						     pva_dma_config_get_relocs(
 							     dma_config) +
 					     sizeof(struct pva_fw_dma_reloc) *
 						     dma_config->num_relocs);
 }
 static inline struct pva_fw_dma_descriptor *
 pva_dma_config_get_descriptors(struct pva_dma_config_resource *dma_config)
 {
 	return (struct pva_fw_dma_descriptor
 			*)((uint8_t *)pva_dma_config_get_channels(dma_config) +
 			   sizeof(struct pva_fw_dma_channel) *
 				   dma_config->num_channels);
 }
 #endif // PVA_RESOURCE_H
--- a/drivers/video/tegra/host/pva/src/include/pva_api.h
+++ b/drivers/video/tegra/host/pva/src/include/pva_api.h
@@ -0,0 +1,349 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_API_H
 #define PVA_API_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "pva_api_types.h"
 #include "pva_api_dma.h"
 #include "pva_api_vpu.h"
 #include "pva_api_cmdbuf.h"
 /* Core APIs */
 /**
 * @brief Create a PVA context.
 *
 * @param[in] pva_index Select which PVA instance to use if there are multiple PVAs
 * in the SOC.
 * @param[in] max_resource_count Maximum number of resources this context can have.
 * @param[out] ctx Pointer to the created context.
 */
 enum pva_error pva_context_create(uint32_t pva_index,
 				  uint32_t max_resource_count,
 				  struct pva_context **ctx);
 /**
 * @brief Destroy a PVA context.
 *
 * A context can only be destroyed after all queues are destroyed.
 *
 * @param[in] ctx Pointer to the context to destroy.
 */
 void pva_context_destroy(struct pva_context *ctx);
 /**
 * @brief Create a PVA queue.
 *
 * @param[in] ctx Pointer to the context.
 * @param[in] max_submission_count Max number of submissions that can be queued.
 * @param[out] queue Pointer to the created queue.
 */
 enum pva_error pva_queue_create(struct pva_context *ctx,
 				uint32_t max_submission_count,
 				struct pva_queue **queue);
 /**
 * @brief Destroy a PVA queue.
 *
 * @param[in] queue Pointer to the queue to destroy.
 */
 void pva_queue_destroy(struct pva_queue *queue);
 /**
 * @brief Allocate DRAM memory that can be mapped PVA's device space
 *
 * @param[in] size Size of the memory to allocate.
 * @param[out] out_mem Pointer to the allocated memory.
 */
 enum pva_error pva_memory_alloc(uint64_t size, struct pva_memory **out_mem);
 /**
 * @brief Map the memory to CPU's virtual space.
 *
 * @param[in] mem Pointer to the memory to map.
 * @param[in] access_mode Access mode for the memory. PVA_ACCESS_RD or
 *                        PVA_ACCESS_RW.
 * @param[out] out_va Pointer to the virtual address of the mapped memory.
 */
 enum pva_error pva_memory_cpu_map(struct pva_memory *mem, uint32_t access_mode,
 				  void **out_va);
 /**
 *  @brief Unmap the memory from CPU's virtual space.
 *
 *  @param[in] mem Pointer to the memory to unmap.
 *  @param[in] va Previously mapped virtual address.
 */
 enum pva_error pva_memory_cpu_unmap(struct pva_memory *mem, void *va);
 /**
 * @brief Free the memory.
 *
 * Freeing a registered memory is okay since KMD holds a reference to the memory.
 *
 * @param mem Pointer to the memory to free.
 */
 void pva_memory_free(struct pva_memory *mem);
 /**
 * @brief Wait for a syncpoint to reach a value.
 *
 * @param[in] ctx Pointer to the context.
 * @param[in] syncpiont_id Syncpoint ID to wait on.
 * @param[in] value Value to wait for.
 * @param[in] timeout_us Timeout in microseconds. PVA_TIMEOUT_INF for infinite.
 */
 enum pva_error pva_syncpoint_wait(struct pva_context *ctx,
 				  uint32_t syncpiont_id, uint32_t value,
 				  uint64_t timeout_us);
 /**
 * @brief Submit a batch of command buffers.
 *
 * @param[in] queue Pointer to the queue.
 * @param[in] submit_infos Array of submit info structures.
 * @param[in] count Number of submit info structures.
 * @param[in] timeout_us Timeout in microseconds. PVA_TIMEOUT_INF for infinite.
 *
 * @note Concurrent submission to the same queue needs to be serialized by the
 *       caller.
 */
 enum pva_error
 pva_cmdbuf_batch_submit(struct pva_queue *queue,
 			struct pva_cmdbuf_submit_info *submit_infos,
 			uint32_t count, uint64_t timeout_us);
 /**
 * @brief Get the symbol table for a registered executable.
 *
 * @param[in] ctx Pointer to the context.
 * @param[in] exe_resource_id Resource ID of the executable.
 * @param[out] out_info Pointer to the symbol info array.
 * @param[in] max_num_symbols Maximum number of symbols to return.
 */
 enum pva_error pva_executable_get_symbols(struct pva_context *ctx,
 					  uint32_t exe_resource_id,
 					  struct pva_symbol_info *out_info,
 					  uint32_t max_num_symbols);
 /**
 * @brief Submit a list of asynchronous registration operations to KMD.
 *
 * The operations can be:
 * - Memory registration
 * - Executable registration
 * - DMA config registration
 *
 * The response buffer will contain the resource IDs of the registered
 * resources. Any command buffers that use these resources should wait on the
 * returned post fence.
 *
 * @param[in] ctx Pointer to the context.
 * @param[in] fence Pointer to the post fence to wait on. If NULL, it means the
 * caller is not interested in waiting. This usually only applies to unregister
 * operations.
 * @param[in] Input buffer containing the list of operations.
 * @param[out] Output buffer to store the response.
 *
 * @note Input and output buffer may be the same buffer.
 */
 enum pva_error pva_ops_submit_async(struct pva_context *ctx,
 				    struct pva_fence *fence,
 				    struct pva_ops_buffer const *input_buffer,
 				    struct pva_ops_buffer *output_buffer);
 /**
 * @brief Perform a list of registration operations synchronously.
 *
 * The operations can be:
 * - Memory registration
 * - Executable registration
 * - DMA config registration
 *
 * The response buffer will contain the resource IDs of the registered
 * resources.
 *
 * @param[in] ctx Pointer to the context.
 * @param[in] Input buffer containing the list of operations.
 * @param[out] Output buffer to store the response.
 *
 * @note Input and output buffer may be the same buffer.
 *
 */
 enum pva_error pva_ops_submit(struct pva_context *ctx,
 			      struct pva_ops_buffer const *input_buffer,
 			      struct pva_ops_buffer *output_buffer);
 /** Size of the ops buffer header. When user allocates memory for ops buffer,
 * this size needs to be added. */
 #define PVA_OPS_BUFFER_HEADER_SIZE 64
 /**
 * @brief Initialize pva_ops_buffer to keep track of the state of
 * operations buffer during preparation.
 *
 * @param[out] buf_handle Pointer to the pva_ops_buffer object to initialize.
 * @param[in] buf Pointer to the buffer that will store the operations.
 * @param[in] size Size of the buffer.
 */
 enum pva_error pva_ops_buffer_init(struct pva_ops_buffer *buf_handle, void *buf,
 				   uint32_t size);
 #define PVA_OPS_MEMORY_REG_SIZE 64
 /**
 * @brief Append a memory registration operation to the operations buffer.
 *
 * @param[in] ctx Pointer to the context.
 * @param[in] mem Pointer to the memory to register.
 * @param[in] segment Memory segment to register.
 * @param[in] access_flags Access flags for the memory.
 * @param[out] op_buf Pointer to the operations buffer.
 */
 enum pva_error pva_ops_append_memory_register(struct pva_context *ctx,
 					      struct pva_memory *mem,
 					      enum pva_memory_segment segment,
 					      uint32_t access_flags,
 					      struct pva_ops_buffer *op_buf);
 #define PVA_OPS_EXEC_REG_HEADER_SIZE 16
 /**
 * @brief Append an executable registration operation to the operations.
 *
 * @param[in] ctx Pointer to the context.
 * @param[in] executable Pointer to the executable binary content.
 * @param[in] executable_size Size of the executable.
 * @param[out] op_buf Pointer to the operations buffer.
 */
 enum pva_error pva_ops_append_executable_register(
 	struct pva_context *ctx, void const *executable,
 	uint32_t executable_size, struct pva_ops_buffer *op_buf);
 #define PVA_OPS_DMA_CONFIG_REG_SIZE (24 * 1024)
 /**
 * @brief Append a DMA config registration operation to the operations.
 * @param[in] ctx Pointer to the context.
 * @param[in] dma_config Pointer to the DMA config.
 * @param[out] op_buf Pointer to the operations buffer.
 */
 enum pva_error
 pva_ops_append_dma_config_register(struct pva_context *ctx,
 				   struct pva_dma_config const *dma_config,
 				   struct pva_ops_buffer *op_buf);
 #define PVA_OPS_UNREG_SIZE 16
 enum pva_error pva_ops_append_unregister(struct pva_context *ctx,
 					 uint32_t resource_id,
 					 struct pva_ops_buffer *op_buf);
 /**
 * @brief Parse the response buffer to get the resource ID of the registered
 * memory or DMA configuration.
 *
 * @param[in] resp_buf Pointer to the response buffer.
 * @param[out] resource_id output resource ID.
 */
 enum pva_error pva_ops_parse_register_resp(struct pva_ops_buffer *resp_buf,
 					   uint32_t *resource_id);
 /**
 * @brief Parse the response buffer to get the resource ID of the registered
 * executable.
 *
 * @param[in] resp_buf Pointer to the response buffer.
 * @param[out] num_symbols Number of symbols in the executable.
 * @param[out] resource_id output resource ID.
 */
 enum pva_error pva_ops_parse_exec_register_resp(struct pva_ops_buffer *op_buf,
 						uint32_t *num_symbols,
 						uint32_t *resource_id);
 #define PVA_DATA_CHANNEL_HEADER_SIZE 32
 /**
 * @brief Initialize VPU print buffer
 *
 * @param[in] data Pointer to VPU print buffer.
 * @param[in] size Size of VPU print buffer.
 */
 struct pva_data_channel;
 enum pva_error pva_init_data_channel(void *data, uint32_t size,
 				     struct pva_data_channel **data_channel);
 /**
 * @brief Read VPU print buffer
 *
 * @param[in]  data Pointer to VPU print buffer.
 * @param[out] read_buffer Pointer to output buffer in which data will be read.
 * @param[in]  bufferSize Size of output buffer.
 * @param[out] read_size Size of actual data read in output buffer.
 */
 enum pva_error pva_read_data_channel(struct pva_data_channel *data_channel,
 				     uint8_t *read_buffer, uint32_t bufferSize,
 				     uint32_t *read_size);
 /**
 * @brief Duplicate PVA memory object.
 *
 * This function duplicates a PVA memory object. The new object will have shared
 * ownership of the memory.
 *
 * @param[in] src Pointer to the source memory object.
 * @param[in] access_mode Access mode for the new memory object. It should be
 * more restrictive than the source memory. Passing 0 will use the same access
 * mode as the source memory.
 * @param[out] dst Resulting duplicated memory object.
 */
 enum pva_error pva_memory_duplicate(struct pva_memory *src,
 				    uint32_t access_mode,
 				    struct pva_memory **dst);
 /**
 * @brief Get memory attributes.
 *
 * @param[in] mem Pointer to the memory.
 * @param[out] out_attrs Pointer to the memory attributes.
 */
 void pva_memory_get_attrs(struct pva_memory const *mem,
 			  struct pva_memory_attrs *out_attrs);
 /** \brief Specifies the PVA system software major version. */
 #define PVA_SYSSW_MAJOR_VERSION (2U)
 /** \brief Specifies the PVA system software minor version. */
 #define PVA_SYSSW_MINOR_VERSION (7U)
 /**
 * @brief Get PVA system software version.
 *
 * PVA system software version is defined as the latest version of cuPVA which is fully supported
 * by this version of the PVA system software.
 *
 * @param[out] version version of currently running system SW, computed as:
 	       (PVA_SYSSW_MAJOR_VERSION * 1000) + PVA_SYSSW_MINOR_VERSION
 * @return PVA_SUCCESS on success, else error code indicating the failure.
 */
 enum pva_error pva_get_version(uint32_t *version);
 /**
 * @brief Get the hardware characteristics of the PVA.
 *
 * @param[out] pva_hw_char Pointer to the hardware characteristics.
 */
 enum pva_error
 pva_get_hw_characteristics(struct pva_characteristics *pva_hw_char);
 #ifdef __cplusplus
 }
 #endif
 #endif // PVA_API_H
--- a/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h
+++ b/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h
@@ -0,0 +1,627 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_API_CMDBUF_H
 #define PVA_API_CMDBUF_H
 #include "pva_api_types.h"
 //Maximum number of slots for maintaining Timestamps
 #define PVA_MAX_QUERY_SLOTS_COUNT 32U
 /** The common header for all commands.
 */
 struct pva_cmd_header {
 #define PVA_CMD_PRIV_OPCODE_FLAG (1U << 7U)
 	/** Opcode for the command. MSB of opcode indicates whether this command is
 	 * privileged or not */
 	uint8_t opcode;
 	/** Command specific flags  */
 	uint8_t flags;
 	/**
 	* For pva_cmd_barrier: barrier_group specifies which group this barrier
 	*	waits for.
 	* For pva_cmd_retire_barrier_group: barrier_group specifies which id will
 	*	be retired. Retired ids can be re-used by future commands and will refer
 	*	to a new logical group.
 	* For all other commands: barrier_group specifies which barrier group this
 	*	command belongs to. Other commands are able to defer execution until all
 	* 	commands in the barrier group have completed, or stall the cmd buffer
 	*	until such a time. Note that asynchronous commands may complete in an
 	*	order different to the order in which they appear in the commmand
 	*	buffer.
 	*/
 	uint8_t barrier_group;
 	/** Length in 4-bytes, including this header. */
 	uint8_t len;
 };
 struct pva_user_dma_allowance {
 #define PVA_USER_DMA_ALLOWANCE_ADB_STEP_SIZE 8
 	uint32_t channel_idx : 4;
 	uint32_t desc_start_idx : 7;
 	uint32_t desc_count : 7;
 	uint32_t adb_start_idx : 6;
 	uint32_t adb_count : 6;
 };
 /* Basic Commands */
 /** Does nothing. It can be used as a place holder in the command buffer. */
 struct pva_cmd_noop {
 #define PVA_CMD_OPCODE_NOOP 0U
 	struct pva_cmd_header header;
 };
 /** Link next chunk. This command can be placed anywhere in the command buffer.
 * Firmware will start fetching the next chunk when this command is executed. */
 struct pva_cmd_link_chunk {
 #define PVA_CMD_OPCODE_LINK_CHUNK 1U
 	struct pva_cmd_header header;
 	uint8_t next_chunk_offset_hi;
 	uint8_t pad;
 	uint16_t next_chunk_size; /**< Size of next chunk in bytes */
 	uint32_t next_chunk_resource_id;
 	uint32_t next_chunk_offset_lo;
 	struct pva_user_dma_allowance user_dma;
 };
 /** Barrier command. The user can assign a barrier group to any asynchronous
 * command. The barrier command blocks FW execution until the specified group of
 * asynchronous commands have completed. Up to 8 barrier groups are supported.
 *
 * @note A barrier command is not typically required since FW stalls
 * automatically in the event of hardware conflicts or when issuing a command is
 * deemed unsafe according to the state machines. However, if a stall is needed
 * for other reasons, the barrier command can be utilized.
 */
 struct pva_cmd_barrier {
 #define PVA_CMD_OPCODE_BARRIER 2U
 	struct pva_cmd_header header;
 };
 /** Acquire one or more PVE systems, each of which includes a VPS, DMA and PPE.
 * It blocks until specified number of engines are acquired.
 * By default, the lowest engine ID acquired is set as the current engine.
 * Acquired engines will be automatically released when this command buffer finishes.
 * They can also be released using release_engine command.
 */
 struct pva_cmd_acquire_engine {
 #define PVA_CMD_OPCODE_ACQUIRE_ENGINE 3U
 	struct pva_cmd_header header;
 	uint8_t engine_count;
 	uint8_t pad[3];
 };
 /** Release all PVE systems acquired. It is legal to release engine when engine
 * is still running. The released engine won’t be available to be acquired until
 * it finishes and becomes idle again. */
 struct pva_cmd_release_engine {
 #define PVA_CMD_OPCODE_RELEASE_ENGINE 4U
 	struct pva_cmd_header header;
 };
 /** Set a PVE engine as current. Following commands will modify this engine. The
 * zero-based engine index must be less than the acquired engine number. */
 struct pva_cmd_set_current_engine {
 #define PVA_CMD_OPCODE_SET_CURRENT_ENGINE 5U
 	struct pva_cmd_header header;
 	uint8_t engine_index;
 	uint8_t pad[3];
 };
 /** This command specifies the executable to use for the following VPU launches.
 * It doesn’t do anything other than setting the context for the following
 * commands.
 *
 * Note: This command cannot be initiated if any of the DMA sets (that access
 * VMEM) are in a running state, in order to prevent mismatches between DMA sets
 * and VPU executables. The command buffer will stall until these DMA sets are
 * finished. */
 struct pva_cmd_set_vpu_executable {
 #define PVA_CMD_OPCODE_SET_VPU_EXECUTABLE 6U
 	struct pva_cmd_header header;
 	uint32_t vpu_exec_resource_id;
 };
 /** This command clears the entire VMEM. User may choose to skip VMEM clear if
 * there are no bss sections in the VPU executable. Since VMEM can be accessed
 * by both VPU and PPE, this command drives both the VPU state machine and the
 * PPE state machine. As a result, it can only be started if both VPU state
 * machine and PPE state machine are in valid states (Idle or Binded). */
 struct pva_cmd_clear_vmem {
 #define PVA_CMD_OPCODE_CLEAR_VMEM 7U
 	struct pva_cmd_header header;
 };
 /** This command configures VPU hardware. Specifically, it configures code
 * segment register and copies data sections. */
 struct pva_cmd_init_vpu_executable {
 #define PVA_CMD_OPCODE_INIT_VPU_EXECUTABLE 8U
 	struct pva_cmd_header header;
 	struct pva_user_dma_allowance user_dma;
 };
 /** Start VPU instruction prefetch from specified entry point. Currently, the
 * entry point index must be 0. More entry points will be supported in the
 * future. Note that this command merely triggers the prefetch but does not wait
 * for the prefetch to complete. Therefore, this command is synchronous. */
 struct pva_cmd_prefetch_vpu_code {
 #define PVA_CMD_OPCODE_PREFETCH_VPU_CODE 9U
 	struct pva_cmd_header header;
 	uint32_t entry_point_index;
 };
 /** Run the VPU program from the specified entry point until finish. The
 * lifetime of this command covers the entire VPU program execution. Since this
 * command is asynchronous, it doesn’t block the following commands from
 * execution. */
 struct pva_cmd_run_vpu {
 #define PVA_CMD_OPCODE_RUN_VPU 10U
 	struct pva_cmd_header header;
 	uint32_t entry_point_index;
 };
 /** Copy data from opaque payload to a VPU variable. Firmware may choose to copy
 * with R5 or DMA. If using DMA, channel 0 will be used. */
 struct pva_cmd_set_vpu_parameter {
 #define PVA_CMD_OPCODE_SET_VPU_PARAMETER 11U
 	struct pva_cmd_header header;
 	uint16_t data_size;
 	uint16_t pad;
 	uint32_t symbol_id;
 	uint32_t vmem_offset;
 	/* Followed by <data_size> number of bytes, padded to 4 bytes  */
 };
 /** Copy data from a DRAM buffer to a VPU variable. DMA will be used to perform
 * the copy. The user can optionally provide a user channel, a descriptor and
 * ADBs to speed up the copy. */
 struct pva_cmd_set_vpu_parameter_with_buffer {
 #define PVA_CMD_OPCODE_SET_VPU_PARAMETER_WITH_BUFFER 12U
 	struct pva_cmd_header header;
 	struct pva_user_dma_allowance user_dma;
 	uint8_t src_dram_offset_hi;
 	uint8_t pad[3];
 	uint32_t data_size;
 	uint32_t dst_symbol_id;
 	uint32_t dst_vmem_offset;
 	uint32_t src_dram_resource_id;
 	uint32_t src_dram_offset_lo;
 };
 /** For set_vpu_parameter_with_address command, set this flag in header.flags to
 * indicate that the target symbol is the legacy pointer symbol type:
 * pva_fw_vpu_legacy_ptr_symbol, which only supports 32bit offset and 32bit
 * size. */
 #define PVA_CMD_FLAGS_USE_LEGACY_POINTER 0x1
 /** Copy the address of a DRAM buffer to a VPU variable. The variable must be
 * laid out exactly according to pva_fw_vpu_ptr_symbol
 */
 struct pva_cmd_set_vpu_parameter_with_address {
 #define PVA_CMD_OPCODE_SET_VPU_PARAMETER_WITH_ADDRESS 13U
 	struct pva_cmd_header header;
 	uint8_t dram_offset_hi;
 	uint8_t pad[3];
 	uint32_t symbol_id;
 	uint32_t dram_resource_id;
 	uint32_t dram_offset_lo;
 };
 #define PVA_MAX_DMA_SETS_PER_DMA_ENGINE 4
 #define PVA_DMA_CONFIG_FETCH_BUFFER_PER_DMA_ENGINE 1
 /** This command first acquires the TCM scratch and then fetches DMA configuration
 * into the scratch. The command does not modify DMA
 * hardware, allowing FW to continue using user channels for data transfer after
 * its execution. This command only uses channel 0 to fetch the DMA
 * configuration. However, user can still help speed up the process by
 * providing additional ADBs. This command will block if there’s no TCM scratch
 * available. If there’s no pending commands AND there’s no TCM scratch, then it
 * means we encountered a dead lock, the command buffer will be aborted. */
 struct pva_cmd_fetch_dma_configuration {
 #define PVA_CMD_OPCODE_FETCH_DMA_CONFIGURATION 14U
 	struct pva_cmd_header header;
 	uint8_t dma_set_id;
 	uint8_t pad[3];
 	uint32_t resource_id;
 	struct pva_user_dma_allowance user_dma;
 };
 /** Setup DMA hardware registers using previously fetched DMA configuration. FW
 * uses channel 0 to copy DMA descriptors into descriptor RAM. The user can
 * provide additional ADBs to speed up the process. The command will block until
 * the needed channels, descriptors and hwseq words are acquired. The command must
 * also validate that all source and destinations fields of each DMA descriptor
 * being programmed is bound to a resource.
 */
 struct pva_cmd_setup_dma {
 #define PVA_CMD_OPCODE_SETUP_DMA 15U
 	struct pva_cmd_header header;
 	struct pva_user_dma_allowance user_dma;
 	uint8_t dma_set_id;
 	uint8_t pad[3];
 };
 /** Run DMA channels according to the current DMA configuration until they are
 * finished. The lifetime of this command covers the entire DMA transfer. The
 * command shall block until the needed VDBs/ADBs and triggers (GPIOs) are
 * acquired.
 * @note This command checks that the DMA set to be started is indeed paired
 * with the currently bound VPU executable. If not, this constitutes a
 * programming error, and the command buffer will be aborted. */
 struct pva_cmd_run_dma {
 #define PVA_CMD_OPCODE_RUN_DMA 16U
 	struct pva_cmd_header header;
 	uint8_t dma_set_id;
 	uint8_t pad[3];
 };
 /** This command specifies the executable to use for the following PPE launches.
 * It doesn’t do anything other than setting the context for the following
 * commands. */
 struct pva_cmd_set_ppe_executable {
 #define PVA_CMD_OPCODE_SET_PPE_EXECUTABLE 17U
 	struct pva_cmd_header header;
 	uint32_t ppe_exec_resource_id;
 };
 /** Start PPE instruction prefetch from specified entry point. Currently, the
 * entry point index must be 0. Note that this command merely triggers the
 * prefetch but does not wait for the prefetch to complete. Therefore, this
 * command is synchronous. */
 struct pva_cmd_prefetch_ppe_code {
 #define PVA_CMD_OPCODE_PREFETCH_PPE_CODE 18U
 	struct pva_cmd_header header;
 	uint32_t entry_point_index;
 };
 /** Setup PPE code segment and data sections. */
 struct pva_cmd_init_ppe_executable {
 #define PVA_CMD_OPCODE_INIT_PPE_EXECUTABLE 19U
 	struct pva_cmd_header header;
 	struct pva_user_dma_allowance user_dma;
 };
 /** Run the PPE program until finish. This lifetime of this command covers the
 * entire PPE program execution. */
 struct pva_cmd_run_ppe {
 #define PVA_CMD_OPCODE_RUN_PPE 20U
 	struct pva_cmd_header header;
 	uint32_t entry_point_index;
 };
 #define PVA_BARRIER_GROUP_0 0U
 #define PVA_BARRIER_GROUP_1 1U
 #define PVA_BARRIER_GROUP_2 2U
 #define PVA_BARRIER_GROUP_3 3U
 #define PVA_BARRIER_GROUP_4 4U
 #define PVA_BARRIER_GROUP_5 5U
 #define PVA_BARRIER_GROUP_6 6U
 #define PVA_BARRIER_GROUP_7 7U
 #define PVA_MAX_BARRIER_GROUPS 8U
 #define PVA_BARRIER_GROUP_INVALID 0xFFU
 /**
 * @brief Captures a timestamp to DRAM
 *
 * This command allows you to capture a timestamp using one of three modes:
 *
 * - **IMMEDIATE_MODE**: Captures the timestamp immediately.
 * - **VPU START MODE**: Enqueue a timestamp to be captured the next time the
 *   current VPU starts. Up to 8 VPU start timestamps may be active at a time
 *   for a given engine.
 * - **VPU DONE MODE**: Enqueue a timestamp to be captured the next time the
 *   current VPU enters done state. Up to 8 VPU done timestamps may be active at
 *   a time for a given engine.
 * - **DEFER MODE**: Defers the timestamp capture by specifying a barrier group.
 *   The timestamp will be captured once the commands in the specified barrier
 *   group have completed. Each barrier group allows one timestamp to be active
 *   at a time.
 *
 * The timestamp will be available in DRAM after waiting on any postfence.
 *
 * @note This command is asynchronous, ensuring it does not block the next command.
 */
 struct pva_cmd_capture_timestamp {
 #define PVA_CMD_OPCODE_CAPTURE_TIMESTAMP 21U
 	struct pva_cmd_header header;
 	uint8_t offset_hi;
 	uint8_t defer_barrier_group;
 #define PVA_CMD_CAPTURE_MODE_IMMEDIATE 0U
 #define PVA_CMD_CAPTURE_MODE_VPU_START 1U
 #define PVA_CMD_CAPTURE_MODE_VPU_DONE 2U
 #define PVA_CMD_CAPTURE_MODE_DEFER 3U
 	uint8_t capture_mode;
 	uint8_t pad;
 	uint32_t resource_id;
 	uint32_t offset_lo;
 };
 /** Set the address of the status buffer. FW will output detailed command buffer
 * status in case of command buffer abort. */
 struct pva_cmd_request_status {
 #define PVA_CMD_OPCODE_CAPTURE_STATUS 22U
 	struct pva_cmd_header header;
 	uint8_t offset_hi;
 	uint8_t pad[3];
 	uint32_t resource_id;
 	uint32_t offset_lo;
 };
 /** Blocks until l2ram is available. To prevent deadlock with other command
 * buffers, l2ram must be acquired prior to acquiring any engine. It will be
 * automatically freed when this command buffer finishes. If persistence is
 * required, it must be saved to DRAM. One command buffer may only hold one
 * L2SRAM allocation at a time. */
 struct pva_cmd_bind_l2sram {
 #define PVA_CMD_OPCODE_BIND_L2SRAM 23U
 	struct pva_cmd_header header;
 	uint8_t dram_offset_hi;
 #define FILL_ON_MISS (1U << 0U)
 #define FLUSH_ON_EVICTION (1U << 1U)
 	uint8_t access_policy;
 	uint8_t pad[2];
 	uint32_t dram_resource_id;
 	uint32_t dram_offset_lo;
 	uint32_t l2sram_size;
 	struct pva_user_dma_allowance user_dma;
 };
 /** Free previously allocated l2ram. This command is asynchronous because it
 * needs to wait for all commands that are started before it to complete. */
 struct pva_cmd_release_l2sram {
 #define PVA_CMD_OPCODE_RELEASE_L2SRAM 24U
 	struct pva_cmd_header header;
 };
 /*
 * This command writes data to a DRAM region. The DRAM region is described
 * by resource ID, offset and size fields. The data to be written is placed
 * right after the command struct. For this command to successfully execute,
 * the following conditions must be met:
 * 1. 'resource_id' should point to a valid resource in DRAM.
 * 2. the offset and size fields should add up to be less than or equal to the size of DRAM resource.
 */
 struct pva_cmd_write_dram {
 #define PVA_CMD_OPCODE_WRITE_DRAM 25U
 	struct pva_cmd_header header;
 	uint8_t offset_hi;
 	uint8_t pad;
 	uint16_t write_size;
 	uint32_t resource_id;
 	uint32_t offset_lo;
 	/* Followed by write_size bytes, padded to 4 bytes boundary */
 };
 /** Set this bit to @ref pva_surface_format to indicate if the surface format is
 * block linear or pitch linear.
 *
 * For block linear surfaces, the starting address for a descriptor is:
 * IOVA_OF(resource_id) + surface_base_offset + PL2BL(slot_offset + desc_offset).
 *
 * For pitch linear surfaces, the starting address for a descriptor is:
 * IOVA_OF(resource_id) + surface_base_offset + slot_offset + desc_offset
 */
 #define PVA_CMD_FLAGS_SURFACE_FORMAT_MSB 0U
 #define PVA_CMD_FLAGS_SURFACE_FORMAT_LSB 0U
 /** MSB of log2 block height in flags field of the command header */
 #define PVA_CMD_FLAGS_LOG2_BLOCK_HEIGHT_MSB 3U
 /** LSB of log2 block height in flags field of the command header */
 #define PVA_CMD_FLAGS_LOG2_BLOCK_HEIGHT_LSB 1U
 /** Bind a DRAM surface to a slot. The surface can be block linear or pitch
 * linear. */
 struct pva_cmd_bind_dram_slot {
 #define PVA_CMD_OPCODE_BIND_DRAM_SLOT 26U
 	/** flags field will contain block linear flag and block height */
 	struct pva_cmd_header header;
 	uint8_t dma_set_id; /**< ID of the DMA set */
 	uint8_t slot_offset_hi;
 	uint8_t surface_base_offset_hi;
 	uint8_t slot_id; /**< ID of slot to bind */
 	uint32_t resource_id; /**< Resource ID of the DRAM allocation for the surface */
 	uint32_t slot_offset_lo; /**< Per-slot offset in pitch linear domain, from slot base to surface base */
 	uint32_t surface_base_offset_lo; /**< Surface base offset in bytes, from surface base to allocation base */
 };
 struct pva_cmd_bind_vmem_slot {
 #define PVA_CMD_OPCODE_BIND_VMEM_SLOT 27U
 	struct pva_cmd_header header;
 	uint8_t dma_set_id;
 	uint8_t slot_id;
 	uint8_t pad[2];
 	uint32_t symbol_id;
 	uint32_t offset;
 };
 /** @brief Unregisters a resource.
 *
 * This command immediately removes the specified resource from the resource
 * table upon execution. However, FW does not immediately notify KMD to
 * deallocate the resource as it may still be in use by other concurrently
 * running command buffers in the same context.
 *
 * The FW takes note of the currently running command buffers and notifies the
 * KMD to deallocate the resource once these command buffers have completed
 * their execution.
 *
 * @note If a command buffer in the same context either hangs or executes for an
 * extended period, no resources can be effectively freed, potentially leading
 * to resource exhaustion.
 */
 struct pva_cmd_unregister_resource {
 #define PVA_CMD_OPCODE_UNREGISTER_RESOURCE 28U
 	struct pva_cmd_header header;
 	uint32_t resource_id;
 };
 /** Write instance parameter to a VMEM symbol. */
 struct pva_cmd_set_vpu_instance_parameter {
 #define PVA_CMD_OPCODE_SET_VPU_INSTANCE_PARAMETER 29U
 	struct pva_cmd_header header;
 	uint32_t symbol_id;
 };
 struct pva_cmd_run_unit_tests {
 #define PVA_CMD_OPCODE_RUN_UNIT_TESTS 30U
 	struct pva_cmd_header header;
 #define PVA_FW_UTESTS_MAX_ARGC 16U
 	uint8_t argc;
 	uint8_t pad[3];
 	uint32_t in_resource_id;
 	uint32_t in_offset;
 	uint32_t in_size;
 	uint32_t out_resource_id;
 	uint32_t out_offset;
 	uint32_t out_size;
 };
 struct pva_cmd_set_vpu_print_cb {
 #define PVA_CMD_OPCODE_SET_VPU_PRINT_CB 31U
 	struct pva_cmd_header header;
 	uint32_t cb_resource_id;
 	uint32_t cb_offset;
 };
 struct pva_cmd_invalidate_l2sram {
 #define PVA_CMD_OPCODE_INVALIDATE_L2SRAM 32U
 	struct pva_cmd_header header;
 	uint8_t dram_offset_hi;
 	uint8_t pad[3];
 	uint32_t dram_resource_id;
 	uint32_t dram_offset_lo;
 	uint32_t l2sram_size;
 };
 struct pva_cmd_flush_l2sram {
 #define PVA_CMD_OPCODE_FLUSH_L2SRAM 33U
 	struct pva_cmd_header header;
 	struct pva_user_dma_allowance user_dma;
 };
 struct pva_cmd_err_inject {
 #define PVA_CMD_OPCODE_ERR_INJECT 34U
 	struct pva_cmd_header header;
 	enum pva_error_inject_codes err_inject_code;
 };
 struct pva_cmd_patch_l2sram_offset {
 #define PVA_CMD_OPCODE_PATCH_L2SRAM_OFFSET 35U
 	struct pva_cmd_header header;
 	uint8_t dma_set_id;
 	uint8_t slot_id;
 	uint8_t pad[2];
 	uint32_t offset;
 };
 /** After retiring a barrier group, all future commands which refer to that barrier group id will be
 * mapped to a new logical barrier group. This allows re-using barrier ids within a command buffer.
 */
 struct pva_cmd_retire_barrier_group {
 #define PVA_CMD_OPCODE_RETIRE_BARRIER_GROUP 36U
 	struct pva_cmd_header header;
 };
 #define PVA_CMD_OPCODE_COUNT 37U
 struct pva_cmd_init_resource_table {
 #define PVA_CMD_OPCODE_INIT_RESOURCE_TABLE (0U | PVA_CMD_PRIV_OPCODE_FLAG)
 	struct pva_cmd_header header;
 	/**< Resource table id is from 0 to 7, 0 is the device's resource table,
 	 * 1-7 are users'. */
 	uint8_t resource_table_id;
 	uint8_t resource_table_addr_hi;
 	uint8_t pad[2];
 	uint32_t resource_table_addr_lo;
 	uint32_t max_n_entries;
 };
 struct pva_cmd_deinit_resource_table {
 #define PVA_CMD_OPCODE_DEINIT_RESOURCE_TABLE (1U | PVA_CMD_PRIV_OPCODE_FLAG)
 	struct pva_cmd_header header;
 	uint8_t resource_table_id;
 	uint8_t pad[3];
 };
 struct pva_cmd_update_resource_table {
 #define PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE (2U | PVA_CMD_PRIV_OPCODE_FLAG)
 	struct pva_cmd_header header;
 	uint8_t resource_table_id;
 	uint8_t pad[3];
 	uint32_t resource_id;
 	struct pva_resource_entry entry;
 };
 struct pva_cmd_init_queue {
 #define PVA_CMD_OPCODE_INIT_QUEUE (3U | PVA_CMD_PRIV_OPCODE_FLAG)
 	struct pva_cmd_header header;
 	uint8_t ccq_id;
 	uint8_t queue_id;
 	uint8_t queue_addr_hi;
 	uint8_t pad;
 	uint32_t queue_addr_lo;
 	uint32_t max_n_submits;
 };
 struct pva_cmd_deinit_queue {
 #define PVA_CMD_OPCODE_DEINIT_QUEUE (4U | PVA_CMD_PRIV_OPCODE_FLAG)
 	struct pva_cmd_header header;
 	uint8_t ccq_id;
 	uint8_t queue_id;
 	uint8_t pad[2];
 };
 struct pva_cmd_enable_fw_profiling {
 #define PVA_CMD_OPCODE_ENABLE_FW_PROFILING (5U | PVA_CMD_PRIV_OPCODE_FLAG)
 	struct pva_cmd_header header;
 	uint8_t buffer_offset_hi;
 	uint8_t timestamp_type;
 	uint8_t pad[2];
 	uint32_t buffer_resource_id;
 	uint32_t buffer_size;
 	uint32_t buffer_offset_lo;
 	uint32_t filter;
 };
 struct pva_cmd_disable_fw_profiling {
 #define PVA_CMD_OPCODE_DISABLE_FW_PROFILING (6U | PVA_CMD_PRIV_OPCODE_FLAG)
 	struct pva_cmd_header header;
 };
 struct pva_cmd_get_tegra_stats {
 #define PVA_CMD_OPCODE_GET_TEGRA_STATS (7U | PVA_CMD_PRIV_OPCODE_FLAG)
 	struct pva_cmd_header header;
 	uint8_t buffer_offset_hi;
 	bool enabled;
 	uint8_t pad[2];
 	uint32_t buffer_resource_id;
 	uint32_t buffer_size;
 	uint32_t buffer_offset_lo;
 };
 struct pva_cmd_suspend_fw {
 #define PVA_CMD_OPCODE_SUSPEND_FW (8U | PVA_CMD_PRIV_OPCODE_FLAG)
 	struct pva_cmd_header header;
 };
 struct pva_cmd_resume_fw {
 #define PVA_CMD_OPCODE_RESUME_FW (9U | PVA_CMD_PRIV_OPCODE_FLAG)
 	struct pva_cmd_header header;
 };
 #define PVA_CMD_PRIV_OPCODE_COUNT 10U
 #define PVA_MAX_CMDBUF_CHUNK_LEN 1024
 #define PVA_MAX_CMDBUF_CHUNK_SIZE (sizeof(uint32_t) * PVA_MAX_CMDBUF_CHUNK_LEN)
 #endif // PVA_API_CMDBUF_H
--- a/drivers/video/tegra/host/pva/src/include/pva_api_cuda.h
+++ b/drivers/video/tegra/host/pva/src/include/pva_api_cuda.h
@@ -0,0 +1,222 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 *
 * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 * property and proprietary rights in and to this material, related
 * documentation and any modifications thereto. Any use, reproduction,
 * disclosure or distribution of this material and related documentation
 * without an express license agreement from NVIDIA CORPORATION or
 * its affiliates is strictly prohibited.
 */
 #ifndef PVA_API_CUDA_H
 #define PVA_API_CUDA_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "cuda.h"
 #include "pva_api_types.h"
 /**
 *  @brief Structure for cuExtend queue data needed for command submission.
 */
 struct pva_cuextend_queue_data {
 	/*! Holds a pointer to pva queue object */
 	struct pva_queue *queue;
 	/*! Holds engine affinity for command submission*/
 	uint32_t affinity;
 };
 /**
 * @brief Function type for cuExtend register memory callback
 *
 * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
 * @param[in] mem The pointer to a \ref pva_memory object. This register memory callback shall transfer the
 *                ownership of the memory to the client, and it is client's responsibility to release the memory.
 * @param[in] cuda_ptr CUDA device pointer.
 * @param[in] cached_flags The cached flags for the memory.
 * @return \ref pva_error The completion status of register memory operation.
 */
 typedef enum pva_error (*pva_cuextend_memory_register)(void *callback_args,
 						       struct pva_memory *mem,
 						       void *cuda_ptr,
 						       uint32_t cached_flags);
 /**
 *  @brief Function type for cuExtend unregister memory callback.
 *
 * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
 * @param[in] cuda_ptr CUDA device pointer.
 * @return \ref pva_error The completion status of unregister memory operation.
 */
 typedef enum pva_error (*pva_cuextend_memory_unregister)(void *callback_args,
 							 void *cuda_ptr);
 /**
 *  @brief Function type for cuExtend register stream callback.
 *
 * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
 * @param[out] stream_payload Client data associated with a CUDA stream.
 * @param[in] flags Reserved for future. Must set to 0.
 * @return \ref pva_error The completion status of register stream operation.
 */
 typedef enum pva_error (*pva_cuextend_stream_register)(void *callback_args,
 						       void **stream_payload,
 						       uint64_t flags);
 /**
 *  @brief Function type for cuExtend unregister stream callback.
 *
 * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
 * @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register.
 * @param[in] flags Reserved for future. Must set to 0.
 * @return \ref pva_error The completion status of unregister stream operation.
 */
 typedef enum pva_error (*pva_cuextend_stream_unregister)(void *callback_args,
 							 void *stream_payload,
 							 uint64_t flags);
 /**
 *  @brief Function type for cuExtend acquire queue callback.
 *
 * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
 * @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register.
 * @param[out] queue_data Output pointer to a pva_cuextend_queue_data object.
 * @return \ref pva_error The completion status of acquire queue operation.
 */
 typedef enum pva_error (*pva_cuextend_queue_acquire)(
 	void *callback_args, void *stream_payload,
 	struct pva_cuextend_queue_data **queue_data);
 /**
 *  @brief Function type for cuExtend release queue callback.
 *
 * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
 * @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register.
 * @return \ref pva_error The completion status of release  queue operation.
 */
 typedef enum pva_error (*pva_cuextend_queue_release)(void *callback_args,
 						     void *stream_payload,
 						     void *queue_data);
 /**
 * @brief Function type for retrieving error code from cuExtend.
 *
 * @param[in] teardown_ctx Pointer to the cuExtend context pointer.
 */
 typedef enum pva_error (*pva_cuextend_get_error)(void *teardown_ctx);
 /**
 * @brief Function type for cuExtend teardown callback.
 *
 * It is expected that the client does the following necessary actions in this callback:
 * Blocking wait for all pending tasks on all queues. In the wait loop, periodically check for CUDA error by calling \ref pva_cuextend_get_error,
 * hop out then loop if there is an error.
 *
 * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
 * @param[in] teardown_ctx Pointer to a teardown context passed by cuExtend teardown callback.
 * @param[in] get_error Function pointer to get CUDA error function.
 * @return \ref pva_error The completion status of release  queue operation.
 */
 typedef enum pva_error (*pva_cuextend_teardown)(
 	void *callback_args, void *teardown_ctx,
 	pva_cuextend_get_error get_error);
 /**
 *  @brief Structure for cuExtend callbacks provided by the caller during cuExtend initialization.
 */
 struct pva_cuextend_callbacks {
 	/*! Holds the register memory callback */
 	pva_cuextend_memory_register mem_reg;
 	/*! Holds the unregister memory callback */
 	pva_cuextend_memory_unregister mem_unreg;
 	/*! Holds the register stream callback */
 	pva_cuextend_stream_register stream_reg;
 	/*! Holds the unregister stream callback */
 	pva_cuextend_stream_unregister stream_unreg;
 	/*! Holds the acquire queue callback */
 	pva_cuextend_queue_acquire queue_acquire;
 	/*! Holds the release queue callback */
 	pva_cuextend_queue_release queue_release;
 	/*! Holds the teardown callback */
 	pva_cuextend_teardown teardown;
 	/*! Pointer to the callback arguments provided by client during cuExtend initialization */
 	void *args;
 };
 /**
 * @brief Initialize cuExtend context.
 *
 * This function must be called before any other cuExtend functions. It does the following:
 *
 * 1. Load cuExtend library and retrieves function pointers to the library's exported functions.
 * 2. Add PVA to CUDA unified context model.
 * 3. Initialize the opaque cuExtend impl pointer.
 *
 * @param[in] ctx Pointer to a PVA context object.
 * @param[in] callbacks Pointer to CUDA interop callbacks.
 * @return \ref pva_error The completion status of the initialization operation.
 */
 enum pva_error pva_cuextend_init(struct pva_context *ctx,
 				 struct pva_cuextend_callbacks *callbacks);
 /**
 * @brief De-initialize cuExtend context.
 *
 * This function must be called at the context destructor in the client. It does the following:
 *
 * 1. Clear the opaque cuExtend impl pointer in pva context object.
 * 2. Remove PVA to from cuExtend context.
 * 3. Unload cuExtend library and clear all the function pointers.
 *
 * @param[in] ctx Pointer to a PVA context object.
 * @return \ref pva_error The completion status of the de-initialization operation.
 */
 enum pva_error pva_cuextend_deinit(struct pva_context *ctx);
 /**
 * @brief Import a memory region from a CUDA context into a PVA context.
 *
 * @param[in] ctx Pointer to a PVA context structure.
 * @param[in] cuda_ptr Pointer to CUDA memory provided by client.
 * @param[in] size Size of the memory region.
 * @param[in] access_type Access flag provided by client.
 * @param[out] out_mem Pointer to the imported memory object.
 * @param[out] cached_flags Output cached flags for the memory.
 * @return \ref pva_error The completion status of the initialization operation.
 */
 enum pva_error pva_cuextend_memory_import(struct pva_context *ctx,
 					  void *cuda_ptr, uint64_t size,
 					  uint32_t access_mode,
 					  struct pva_memory **out_mem,
 					  uint32_t *cached_flags);
 /**
 * @brief Submit a batch of command buffers via a CUDA stream.
 *
 * @param[in] queue Pointer to the queue. If queue is not NULL, this API will try to submit the client tasks to this queue directly.
 *                  Otherwise, it will call queue_acquire callback to query a pva_queue object from stream payload, and then submit
 *                  the tasks to the queried queue.
 * @param[in] stream A CUDA stream.
 * @param[in] submit_infos Array of submit info structures.
 * @param[in] count Number of submit info structures.
 * @param[in] timeout_ms Timeout in milliseconds. PVA_TIMEOUT_INF for infinite.
 * @return \ref pva_error The completion status of the submit operation.
 *
 * @note Concurrent submission to the same queue needs to be serialized by the
 *       caller.
 */
 enum pva_error
 pva_cuextend_cmdbuf_batch_submit(struct pva_queue *queue, CUstream stream,
 				 struct pva_cmdbuf_submit_info *submit_infos,
 				 uint32_t count, uint64_t timeout_ms);
 #ifdef __cplusplus
 }
 #endif
 #endif // PVA_API_CUDA_H
--- a/drivers/video/tegra/host/pva/src/include/pva_api_dma.h
+++ b/drivers/video/tegra/host/pva/src/include/pva_api_dma.h
@@ -0,0 +1,343 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_API_DMA_H
 #define PVA_API_DMA_H
 #include "pva_api_types.h"
 /** Bit indices for VPU GPIO triggers */
 enum pva_gpio_bit {
 	GPIO_VPU_CFG_BIT = 4U,
 	GPIO_READ0_BIT = 16U,
 	GPIO_READ1_BIT = 17U,
 	GPIO_READ2_BIT = 18U,
 	GPIO_READ3_BIT = 19U,
 	GPIO_READ4_BIT = 20U,
 	GPIO_READ5_BIT = 21U,
 	GPIO_READ6_BIT = 22U,
 	GPIO_WRITE0_BIT = 23U,
 	GPIO_WRITE1_BIT = 24U,
 	GPIO_WRITE2_BIT = 25U,
 	GPIO_WRITE3_BIT = 26U,
 	GPIO_WRITE4_BIT = 27U,
 	GPIO_WRITE5_BIT = 28U,
 	GPIO_WRITE6_BIT = 29U
 };
 enum pva_dma_descriptor_id {
 	PVA_DMA_DESC_NONE = 0,
 	PVA_DMA_DESC0 = 1,
 	PVA_DMA_DESC1 = 2,
 	PVA_DMA_DESC2 = 3,
 	PVA_DMA_DESC3 = 4,
 	PVA_DMA_DESC4 = 5,
 	PVA_DMA_DESC5 = 6,
 	PVA_DMA_DESC6 = 7,
 	PVA_DMA_DESC7 = 8,
 	PVA_DMA_DESC8 = 9,
 	PVA_DMA_DESC9 = 10,
 	PVA_DMA_DESC10 = 11,
 	PVA_DMA_DESC11 = 12,
 	PVA_DMA_DESC12 = 13,
 	PVA_DMA_DESC13 = 14,
 	PVA_DMA_DESC14 = 15,
 	PVA_DMA_DESC15 = 16,
 	PVA_DMA_DESC16 = 17,
 	PVA_DMA_DESC17 = 18,
 	PVA_DMA_DESC18 = 19,
 	PVA_DMA_DESC19 = 20,
 	PVA_DMA_DESC20 = 21,
 	PVA_DMA_DESC21 = 22,
 	PVA_DMA_DESC22 = 23,
 	PVA_DMA_DESC23 = 24,
 	PVA_DMA_DESC24 = 25,
 	PVA_DMA_DESC25 = 26,
 	PVA_DMA_DESC26 = 27,
 	PVA_DMA_DESC27 = 28,
 	PVA_DMA_DESC28 = 29,
 	PVA_DMA_DESC29 = 30,
 	PVA_DMA_DESC30 = 31,
 	PVA_DMA_DESC31 = 32,
 	PVA_DMA_DESC32 = 33,
 	PVA_DMA_DESC33 = 34,
 	PVA_DMA_DESC34 = 35,
 	PVA_DMA_DESC35 = 36,
 	PVA_DMA_DESC36 = 37,
 	PVA_DMA_DESC37 = 38,
 	PVA_DMA_DESC38 = 39,
 	PVA_DMA_DESC39 = 40,
 	PVA_DMA_DESC40 = 41,
 	PVA_DMA_DESC41 = 42,
 	PVA_DMA_DESC42 = 43,
 	PVA_DMA_DESC43 = 44,
 	PVA_DMA_DESC44 = 45,
 	PVA_DMA_DESC45 = 46,
 	PVA_DMA_DESC46 = 47,
 	PVA_DMA_DESC47 = 48,
 	PVA_DMA_DESC48 = 49,
 	PVA_DMA_DESC49 = 50,
 	PVA_DMA_DESC50 = 51,
 	PVA_DMA_DESC51 = 52,
 	PVA_DMA_DESC52 = 53,
 	PVA_DMA_DESC53 = 54,
 	PVA_DMA_DESC54 = 55,
 	PVA_DMA_DESC55 = 56,
 	PVA_DMA_DESC56 = 57,
 	PVA_DMA_DESC57 = 58,
 	PVA_DMA_DESC58 = 59,
 	PVA_DMA_DESC59 = 60,
 	PVA_DMA_DESC60 = 61,
 	PVA_DMA_DESC61 = 62,
 	PVA_DMA_DESC62 = 63,
 	PVA_DMA_DESC63 = 64
 };
 /**
 * The values of the enum members conform to the definitions of DMA descriptors'
 * trig_vpu_events field. Therefore, they can be assigned to trig_vpu_events
 * directly.
 */
 enum pva_dma_trigger {
 	PVA_DMA_NO_TRIG = 0,
 	PVA_DMA_TRIG_READ0,
 	PVA_DMA_TRIG_WRITE0,
 	PVA_DMA_TRIG_VPU_CFG,
 	PVA_DMA_TRIG_READ1,
 	PVA_DMA_TRIG_WRITE1,
 	PVA_DMA_TRIG_READ2,
 	PVA_DMA_TRIG_WRITE2,
 	PVA_DMA_TRIG_READ3,
 	PVA_DMA_TRIG_WRITE3,
 	PVA_DMA_TRIG_READ4,
 	PVA_DMA_TRIG_WRITE4,
 	PVA_DMA_TRIG_READ5,
 	PVA_DMA_TRIG_WRITE5,
 	PVA_DMA_TRIG_READ6,
 	PVA_DMA_TRIG_WRITE6,
 	PVA_DMA_TRIG_HWSEQ_RD,
 	PVA_DMA_TRIG_HWSEQ_WR,
 };
 enum pva_dma_trigger_mode {
 	PVA_DMA_TRIG_MODE_DIS = 0,
 	PVA_DMA_TRIG_MODE_4TH_DIM,
 	PVA_DMA_TRIG_MODE_3RD_DIM,
 	PVA_DMA_TRIG_MODE_TILE
 };
 enum pva_dma_transfer_mode {
 	PVA_DMA_TRANS_MODE_INVALID = 0,
 	PVA_DMA_TRANS_MODE_DRAM = 1,
 	PVA_DMA_TRANS_MODE_VMEM = 2,
 	PVA_DMA_TRANS_MODE_L2SRAM = 3,
 	PVA_DMA_TRANS_MODE_TCM = 4,
 	/** MMIO is valid as dst in VPU config mode only */
 	PVA_DMA_TRANS_MODE_MMIO = 5,
 	PVA_DMA_TRANS_MODE_RSVD = 5,
 	/** VPU config mode, valid for src only */
 	PVA_DMA_TRANS_MODE_VPUCFG = 7
 };
 struct pva_dma_transfer_attr {
 	uint8_t rpt1;
 	uint8_t rpt2;
 	uint8_t rpt3;
 	uint8_t cb_enable;
 	uint8_t transfer_mode;
 	/** When dynamic slot flag is set, it means the memory location will be
 	* relocated by commands.
 	*/
 #define PVA_DMA_DYNAMIC_SLOT (1 << 15)
 #define PVA_DMA_STATIC_SLOT (1 << 14)
 #define PVA_DMA_SLOT_INVALID 0
 #define PVA_DMA_SLOT_ID_MASK 0xFF
 #define PVA_DMA_MAX_NUM_SLOTS 256
 	uint16_t slot;
 	/** Line pitch in pixels */
 	uint16_t line_pitch;
 	uint32_t cb_start;
 	uint32_t cb_size;
 	int32_t adv1;
 	int32_t adv2;
 	int32_t adv3;
 	uint64_t offset;
 };
 struct pva_dma_descriptor {
 	/**
 	 * Linked descriptor ID
 	 *
 	 * - 0: No linked descriptor
 	 * - N (> 0): Linking to descriptor N - 1 in the descriptor array
 	 */
 	uint8_t link_desc_id;
 	uint8_t px;
 	uint8_t py;
 	/** enum pva_dma_trigger_mode */
 	uint8_t trig_event_mode;
 	/** Trigger from enum pva_dma_trigger */
 	uint8_t trig_vpu_events;
 	uint8_t desc_reload_enable;
 	/**
 	 * Log2(number bytes per pixel).
 	 *
 	 * - 0: 1 byte per pixel
 	 * - 1: 2 bytes per pixel
 	 * - 2: 4 bytes per pixel
 	 * - others: invalid
 	 */
 	uint8_t log2_pixel_size;
 	uint8_t px_direction;
 	uint8_t py_direction;
 	uint8_t boundary_pixel_extension;
 	/** TCM transfer size */
 	uint8_t tts;
 	/**
 	 * - 0: transfer true completion disabled
 	 * - 1: transfer true completion enabled
 	 */
 	uint8_t trans_true_completion;
 	uint8_t prefetch_enable;
 	uint16_t tx;
 	uint16_t ty;
 	uint16_t dst2_slot;
 	uint32_t dst2_offset;
 	struct pva_dma_transfer_attr src;
 	struct pva_dma_transfer_attr dst;
 };
 struct pva_dma_channel {
 	/**
 	 *  Starting descriptor index in the descriptor array
 	 *
 	 *  Valid range is [0, max_num_descriptors - 1]. This is different from
 	 *  link_desc_id field, where 0 means no linked descriptor.
 	 */
 	uint8_t desc_index;
 	uint8_t vdb_count;
 	uint8_t vdb_offset;
 	uint8_t req_per_grant;
 	uint8_t prefetch_enable;
 	uint8_t ch_rep_factor;
 	uint8_t hwseq_enable;
 	uint8_t hwseq_traversal_order;
 	uint8_t hwseq_tx_select;
 	uint8_t hwseq_trigger_done;
 	uint8_t hwseq_frame_count;
 	uint8_t hwseq_con_frame_seq;
 	uint16_t hwseq_start;
 	uint16_t hwseq_end;
 	uint16_t adb_count;
 	uint16_t adb_offset;
 	/*!
 	* Holds the trigger signal this channel will react to.
 	*
 	* IAS:
 	*     DMA_COMMON_DMA_OUTPUT_ENABLEn (4 Bytes)
 	*
 	* Mapping:
 	*     chanId corresponding to this structure is allocated by KMD.
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE0.bit[chanId]      = outputEnableMask.bit[0];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE0.bit[16 + chanId] = outputEnableMask.bit[1];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE1.bit[chanId]      = outputEnableMask.bit[2];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE1.bit[16 + chanId] = outputEnableMask.bit[3];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE2.bit[chanId]      = outputEnableMask.bit[4];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE2.bit[16 + chanId] = outputEnableMask.bit[5];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE3.bit[chanId]      = outputEnableMask.bit[6];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE3.bit[16 + chanId] = outputEnableMask.bit[7];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE4.bit[chanId]      = outputEnableMask.bit[8];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE4.bit[16 + chanId] = outputEnableMask.bit[9];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE5.bit[chanId]      = outputEnableMask.bit[10];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE5.bit[16 + chanId] = outputEnableMask.bit[11];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE6.bit[chanId]      = outputEnableMask.bit[12];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE6.bit[16 + chanId] = outputEnableMask.bit[13];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE7.bit[chanId]      = outputEnableMask.bit[14];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE8.bit[chanId]      = outputEnableMask.bit[15];
 	*     DMA_COMMON_DMA_OUTPUT_ENABLE8.bit[16 + chanId] = outputEnableMask.bit[16];
 	*/
 	uint32_t output_enable_mask;
 	uint32_t pad_value;
 };
 struct pva_dma_config_header {
 /* In order to make efficient the allocation and tracking of DMA resources, DMA resources
 * are allocated in groups. For example, descriptors may be allocated in groups of 4, which
 * means that every allocation of descriptors will start at an alignment of 4. The following
 * macros control the alignment/grouping requirement of DMA resources.
 */
 // TODO: Add compile time asserts to ensure the following alignment requirments don't result
 //	 in fractional resource partitions?
 #define PVA_DMA_CHANNEL_ALIGNMENT 1
 #define PVA_DMA_DESCRIPTOR_ALIGNMENT 4
 #define PVA_DMA_ADB_ALIGNMENT 16
 #define PVA_DMA_HWSEQ_WORD_ALIGNMENT 128
 	uint8_t base_channel;
 	uint8_t base_descriptor;
 	uint8_t num_channels;
 	uint8_t num_descriptors;
 	uint16_t num_static_slots;
 	uint16_t num_dynamic_slots;
 	uint16_t base_hwseq_word;
 	uint16_t num_hwseq_words;
 	uint32_t vpu_exec_resource_id;
 	/* For serialized version of pva_dma_config, the following fields follow
 	 * immediately after this header. The starting addresses of these fields
 	 * must be aligned to 8 bytes */
 	/* An array of hwseq words */
 	/* An array of pva_dma_channel */
 	/* An array of pva_dma_descriptor */
 	/* An array of pva_dma_slot_buffer */
 };
 enum pva_dma_static_binding_type {
 	PVA_DMA_STATIC_BINDING_INVALID = 0,
 	PVA_DMA_STATIC_BINDING_DRAM,
 	PVA_DMA_STATIC_BINDING_VMEM,
 };
 /** Max block height is 32 GOB */
 #define PVA_DMA_MAX_LOG2_BLOCK_HEIGHT 5
 struct pva_dma_dram_binding {
 	/** enum pva_surface_format */
 	uint8_t surface_format;
 	uint8_t log2_block_height;
 	uint32_t resource_id;
 	uint64_t surface_base_offset;
 	uint64_t slot_offset;
 };
 struct pva_dma_vmem_binding {
 	struct pva_vmem_addr addr;
 };
 struct pva_dma_static_binding {
 	/** enum pva_dma_static_binding_type */
 	uint8_t type;
 	union {
 		struct pva_dma_dram_binding dram;
 		struct pva_dma_vmem_binding vmem;
 	};
 };
 struct pva_dma_config {
 	struct pva_dma_config_header header;
 	uint32_t *hwseq_words;
 	struct pva_dma_channel *channels;
 	struct pva_dma_descriptor *descriptors;
 	struct pva_dma_static_binding *static_bindings;
 };
 #endif // PVA_API_DMA_H
--- a/drivers/video/tegra/host/pva/src/include/pva_api_nvsci.h
+++ b/drivers/video/tegra/host/pva/src/include/pva_api_nvsci.h
@@ -0,0 +1,202 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_API_NVSCI_H
 #define PVA_API_NVSCI_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include "pva_api_types.h"
 #include "nvscibuf.h"
 #include "nvscisync.h"
 /**
 * @brief Fill NvSciBuf attributes required by PVA.
 *
 * @param[out] scibuf_attr The NvSciBuf attribute list to be filled with PVA-specific attributes.
 */
 enum pva_error pva_nvsci_buf_fill_attrs(NvSciBufAttrList scibuf_attr);
 /**
 * @brief Fill NvSciSync attributes required by PVA.
 *
 * @param[in] access_mode Access mode for the sync object, determining how PVA
 *                        will interact with the sync object (read, write, etc.)
 * @param[out] attr_list The NvSciSync attribute list to be populated with attributes.
 */
 enum pva_error pva_nvsci_sync_fill_attrs(uint32_t access_mode,
 					 NvSciSyncAttrList attr_list);
 /**
 * @brief Holds the metadata for a NvSci plane.
 */
 struct pva_plane_attrs {
 	uint32_t line_pitch;
 	uint32_t width_in_bytes;
 	uint32_t height;
 	uint64_t offset;
 };
 #define PVA_SURFACE_ATTRS_MAX_NUM_PLANES 6U
 /**
 * @brief Holds the metadata for a NvSci surface.
 */
 struct pva_surface_attrs {
 	bool is_surface;
 	enum pva_surface_format format;
 	uint32_t n_planes;
 	uint64_t size;
 	struct pva_plane_attrs planes[PVA_SURFACE_ATTRS_MAX_NUM_PLANES];
 	uint8_t log2_gobs_per_block_y[PVA_SURFACE_ATTRS_MAX_NUM_PLANES];
 };
 /**
 * @brief Import an NvSciBuf object into PVA.
 *
 * This function imports an NvSciBuf buffer object into PVA for further
 * operations. It creates a PVA memory object representing the buffer and
 * retrieves surface information about the buffer.
 *
 * The caller is responsible for freeing the PVA memory object.
 *
 * @param[in] obj The NvSciBuf object to be imported.
 * @param[in] access_mode Access mode for the buffer, determining the PVA's permissions for interaction.
 * @param[out] out_obj A pointer to the PVA memory object representing the imported buffer.
 * @param[out] out_surf_info Surface metadata of the buffer
 */
 enum pva_error pva_nvsci_buf_import(NvSciBufObj obj, uint32_t access_mode,
 				    struct pva_memory **out_obj,
 				    struct pva_surface_attrs *out_surf_info);
 /**
 * @brief An opaque object representing an imported NvSciSync object.
 */
 struct pva_nvsci_syncobj;
 /**
 * @brief Describes the attributes of an imported NvSciSync object.
 *
 * This structure contains details about the memory buffers associated with the
 * imported NvSciSync object.
 */
 struct pva_nvsci_syncobj_attrs {
 	struct pva_memory *
 		semaphore_buf; /**< Pointer to the semaphore memory buffer; NULL if syncpoints are used. */
 	struct pva_memory *
 		timestamp_buf; /**< Pointer to the timestamp memory buffer; NULL if unused. */
 	struct pva_memory
 		*status_buf; /**< Pointer to the status memory buffer. */
 };
 /**
 * @brief Import an NvSciSync object into the PVA.
 *
 * This function imports an NvSciSync object into PVA, enabling it to be used
 * for synchronization of operations.
 *
 * @param[in] ctx The PVA context in which the sync object is to be used.
 * @param[in] nvsci_obj The NvSciSync object to be imported.
 * @param[in] access_mode The access mode for the sync object, indicating how PVA will use it.
 * @param[out] out_obj A pointer to the resulting PVA sync object handle.
 */
 enum pva_error pva_nvsci_syncobj_import(struct pva_context *ctx,
 					NvSciSyncObj nvsci_obj,
 					uint32_t access_mode,
 					struct pva_nvsci_syncobj **out_obj);
 /**
 * @brief Retrieve the attributes of an imported NvSciSync object.
 *
 * This function fills in the provided attribute structure with details from
 * the imported NvSciSync object, including information relevant for semaphores,
 * timestamps, and status.
 *
 * @param[in] syncobj The NvSciSync object whose attributes are to be retrieved.
 * @param[out] out_attrs The structure to be filled with the sync object's attributes.
 */
 void pva_nvsci_syncobj_get_attrs(struct pva_nvsci_syncobj const *syncobj,
 				 struct pva_nvsci_syncobj_attrs *out_attrs);
 /**
 * @brief Free an imported NvSciSync object.
 *
 * This function releases the resources associated with a PVA NvSciSync object,
 * including PVA memory objects for semaphores, timestamps and statuses.
 *
 * @param[in] syncobj The PVA sync object to be freed.
 */
 void pva_nvsci_syncobj_free(struct pva_nvsci_syncobj *syncobj);
 /**
 * @brief Get the next status slot for a new fence.
 *
 * @param[in] syncobj The imported NvSciSyncObj
 * @param[out] out_status_slot The status slot index for the next fence.
 */
 enum pva_error pva_nvsci_syncobj_next_status(struct pva_nvsci_syncobj *syncobj,
 					     uint32_t *out_status_slot);
 /**
 * @brief Get the next timestamp slot for a new fence.
 *
 * @param[in] syncobj The imported NvSciSyncObj
 * @param[out] out_timestamp_slot The timestamp slot index for the next fence.
 */
 enum pva_error
 pva_nvsci_syncobj_next_timestamp(struct pva_nvsci_syncobj *syncobj,
 				 uint32_t *out_timestamp_slot);
 /**
 * @brief Fence data for import and export.
 */
 struct pva_nvsci_fence_info {
 	uint32_t index; /**< The index of the fence. */
 	uint32_t value; /**< The value of the fence. */
 	uint32_t status_slot; /**< The slot index for the status. */
 	uint32_t timestamp_slot; /**< The slot index for the timestamp. */
 };
 /**
 * @brief Import a NvSciSync fence into a PVA fence.
 *
 * @param[in] nvsci_fence The NvSciSync fence to be imported.
 * @param[in] pva_syncobj The previously imported NvSciSyncObj that's associated with the fence.
 * @param[out] out_fence_info The information about the NvSci fence. It can be used to fill a pva_fence.
 *
 * @note This function only fills the index and value field of the pva_fence.
 * The user needs to set the semaphore resource ID if the sync object is a
 * semaphore.
 *
 */
 enum pva_error
 pva_nvsci_fence_import(NvSciSyncFence const *nvsci_fence,
 		       struct pva_nvsci_syncobj const *pva_syncobj,
 		       struct pva_nvsci_fence_info *out_fence_info);
 /**
 * @brief Export a PVA fence into an NvSciSync fence.
 *
 * @param[in] fence_info The information about the fence to be exported.
 * @param[in] syncobj The previously imported NvSciSyncObj that's associated with the fence.
 * @param[out] out_nvsci_fence The resulting NvSciSync fence object.
 */
 enum pva_error
 pva_nvsci_fence_export(struct pva_nvsci_fence_info const *fence_info,
 		       struct pva_nvsci_syncobj const *syncobj,
 		       NvSciSyncFence *out_nvsci_fence);
 #ifdef __cplusplus
 }
 #endif
 #endif // PVA_API_NVSCI_H
--- a/drivers/video/tegra/host/pva/src/include/pva_api_types.h
+++ b/drivers/video/tegra/host/pva/src/include/pva_api_types.h
@@ -0,0 +1,396 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_API_TYPES_H
 #define PVA_API_TYPES_H
 #if !defined(__KERNEL__)
 #include <stdint.h>
 #include <stdbool.h>
 #include <string.h>
 #define container_of(ptr, type, member)                                        \
 	(type *)((char *)(ptr) - (char *)&((type *)0)->member)
 #else
 #include <linux/ioctl.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #define UINT64_MAX U64_MAX
 #define UINT32_MAX U32_MAX
 #endif
 #ifndef NULL
 #define NULL ((void *)0)
 #endif
 #define FOREACH_ERR(ACT)                                                       \
 	ACT(PVA_SUCCESS)                                                       \
 	ACT(PVA_UNKNOWN_ERROR)                                                 \
 	ACT(PVA_BAD_PARAMETER_ERROR)                                           \
 	ACT(PVA_NOT_IMPL)                                                      \
 	ACT(PVA_NOENT)                                                         \
 	ACT(PVA_NOMEM)                                                         \
 	ACT(PVA_INVAL)                                                         \
 	ACT(PVA_TIMEDOUT)                                                      \
 	ACT(PVA_INTERNAL)                                                      \
 	ACT(PVA_CMDBUF_NOT_FOUND)                                              \
 	ACT(PVA_CMDBUF_INVALID)                                                \
 	ACT(PVA_CMDBUF_TOO_LARGE)                                              \
 	ACT(PVA_RES_OUT_OF_RANGE)                                              \
 	ACT(PVA_AGAIN)                                                         \
 	ACT(PVA_NO_RESOURCE_ID)                                                \
 	ACT(PVA_INVALID_RESOURCE)                                              \
 	ACT(PVA_INVALID_RESOURCE_SIZE)                                         \
 	ACT(PVA_INVALID_RESOURCE_ALIGNMENT)                                    \
 	ACT(PVA_QUEUE_FULL)                                                    \
 	ACT(PVA_INVALID_IOVA)                                                  \
 	ACT(PVA_NO_PERM)                                                       \
 	ACT(PVA_INVALID_CMD_OPCODE)                                            \
 	ACT(PVA_BUF_OUT_OF_RANGE)                                              \
 	ACT(PVA_CMDBUF_NO_BEGIN)                                               \
 	ACT(PVA_NO_CCQ)                                                        \
 	ACT(PVA_INPUT_STATUS_ERROR)                                            \
 	ACT(PVA_ENOSPC)                                                        \
 	ACT(PVA_EACCES)                                                        \
 	ACT(PVA_ERANGE)                                                        \
 	ACT(PVA_BAD_SURFACE_BASE_ALIGNMENT)                                    \
 	ACT(PVA_BAD_DESC_ADDR_ALIGNMENT)                                       \
 	ACT(PVA_INVALID_DMA_CONFIG)                                            \
 	ACT(PVA_INVALID_SYMBOL)                                                \
 	ACT(PVA_INVALID_BINDING)                                               \
 	ACT(PVA_EINTR)                                                         \
 	ACT(PVA_FILL_NVSCIBUF_ATTRS_FAILED)                                    \
 	ACT(PVA_NVSCIBUF_SET_ATTR_FAILED)                                      \
 	ACT(PVA_IMPORT_FROM_NVSCIBUF_FAILED)                                   \
 	ACT(PVA_NVSCISYNC_SET_ATTR_FAILED)                                     \
 	ACT(PVA_RETRIEVE_DATA_FROM_NVSCISYNC_FAILED)                           \
 	ACT(PVA_UPDATE_DATA_TO_NVSCISYNC_FAILED)                               \
 	ACT(PVA_UNSUPPORTED_NVSCISYNC_TIMESTAMP_FORMAT)                        \
 	ACT(PVA_INVALID_NVSCISYNC_FENCE)                                       \
 	ACT(PVA_ERR_CMD_NOT_SUPPORTED)                                         \
 	ACT(PVA_CUDA_INITIALIZED)                                              \
 	ACT(PVA_CUDA_LOAD_LIBRARY_FAILED)                                      \
 	ACT(PVA_CUDA_ADD_CLIENT_FAILED)                                        \
 	ACT(PVA_CUDA_REMOVE_CLIENT_FAILED)                                     \
 	ACT(PVA_CUDA_INIT_FAILED)                                              \
 	ACT(PVA_CUDA_SUBMIT_FAILED)                                            \
 	ACT(PVA_CUDA_GET_RM_HANDLE_FAILED)                                     \
 	ACT(PVA_CUDA_INTERNAL_ERROR)                                           \
 	ACT(PVA_ERR_CMD_INVALID_VPU_STATE)                                     \
 	ACT(PVA_ERR_CMD_VMEM_BUF_OUT_OF_RANGE)                                 \
 	ACT(PVA_ERR_CMD_L2SRAM_BUF_OUT_OF_RANGE)                               \
 	ACT(PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE)                                 \
 	ACT(PVA_ERR_CMD_INVALID_BLOCK_HEIGHT)                                  \
 	ACT(PVA_ERR_CMD_PAYLOAD_TOO_SMALL)                                     \
 	ACT(PVA_ERR_CMD_ENGINE_NOT_ACQUIRED)                                   \
 	ACT(PVA_ERR_CMD_INVALID_SYMBOL_TYPE)                                   \
 	ACT(PVA_ERR_CMD_INVALID_ENGINE)                                        \
 	ACT(PVA_ERR_CMD_INVALID_DMA_SET_ID)                                    \
 	ACT(PVA_ERR_CMD_INVALID_DMA_SLOT_ID)                                   \
 	ACT(PVA_ERR_CMD_INVALID_DMA_SLOT_TYPE)                                 \
 	ACT(PVA_ERR_CMD_INVALID_USER_ALLOWANCE)                                \
 	ACT(PVA_ERR_CMD_INCOMPATIBLE_RESOURCE)                                 \
 	ACT(PVA_ERR_CMD_INSUFFICIENT_PRIVILEGE)                                \
 	ACT(PVA_ERR_CMD_INVALID_BARRIER_ID)                                    \
 	ACT(PVA_ERR_CMD_CAPTURE_SLOTS_EXCEEDED)                                \
 	ACT(PVA_ERR_CMD_INVALID_CAPTURE_MODE)                                  \
 	ACT(PVA_ERR_CMD_INVALID_L2SRAM_POLICY)                                 \
 	ACT(PVA_ERR_FW_DMA0_IRQ_ENABLE_FAILED)                                 \
 	ACT(PVA_ERR_FW_DMA1_IRQ_ENABLE_FAILED)                                 \
 	ACT(PVA_ERR_FW_BAD_DMA_STATE)                                          \
 	ACT(PVA_ERR_FW_RESOURCE_IN_USE)                                        \
 	ACT(PVA_ERR_FW_VPU_ERROR_STATE)                                        \
 	ACT(PVA_ERR_FW_VPU_RETCODE_NONZERO)                                    \
 	ACT(PVA_ERR_FW_INVALID_CMD_OPCODE)                                     \
 	ACT(PVA_ERR_FW_INVALID_VPU_CMD_SEQ)                                    \
 	ACT(PVA_ERR_FW_INVALID_DMA_CMD_SEQ)                                    \
 	ACT(PVA_ERR_FW_INVALID_L2SRAM_CMD_SEQ)                                 \
 	ACT(PVA_ERR_FW_ENGINE_NOT_RELEASED)                                    \
 	ACT(PVA_ERR_FW_UTEST)                                                  \
 	ACT(PVA_ERR_VPU_ERROR_STATE)                                           \
 	ACT(PVA_ERR_VPU_RETCODE_NONZERO)                                       \
 	ACT(PVA_ERR_VPU_ILLEGAL_INSTR)                                         \
 	ACT(PVA_ERR_VPU_DIVIDE_BY_0)                                           \
 	ACT(PVA_ERR_VPU_FP_NAN)                                                \
 	ACT(PVA_ERR_VPU_IN_DEBUG)                                              \
 	ACT(PVA_ERR_VPU_DLUT_CFG)                                              \
 	ACT(PVA_ERR_VPU_DLUT_MISS)                                             \
 	ACT(PVA_ERR_VPU_CP_ACCESS)                                             \
 	ACT(PVA_ERR_PPE_ILLEGAL_INSTR)                                         \
 	ACT(PVA_ERR_MATH_OP)                                                   \
 	ACT(PVA_ERR_HWSEQ_INVALID)                                             \
 	ACT(PVA_ERR_CODE_COUNT)
 enum pva_error {
 #define ADD_COMMA(name) name,
 	FOREACH_ERR(ADD_COMMA)
 #undef ADD_COMMA
 };
 enum pva_chip_id {
 	PVA_CHIP_T19X,
 	PVA_CHIP_T23X,
 	PVA_CHIP_T26X,
 	PVA_CHIP_OTHERS
 };
 enum pva_hw_gen {
 	PVA_HW_GEN1,
 	PVA_HW_GEN2,
 	PVA_HW_GEN3,
 };
 /* Opaque API data types */
 struct pva_context;
 struct pva_queue;
 struct pva_memory;
 struct pva_memory_attrs {
 	uint32_t access_mode;
 	uint64_t offset;
 	uint64_t size;
 };
 /**
 * @brief A memory address accessible by PVA.
 */
 struct pva_dram_addr {
 	uint32_t resource_id;
 	uint64_t offset;
 };
 struct pva_vmem_addr {
 	uint32_t symbol_id;
 	uint32_t offset;
 };
 /**
 * @brief Represents a synchronization fence, which can be associated with
 * either a memory semaphore or a syncpoint for signaling or waiting operations.
 *
 * The UMD handles semaphores and syncpoints differently when used as
 * postfences:
 * - Semaphores: UMD does not track future values.
 * - Syncpoints: UMD tracks future values.
 *
 * To use semaphore for either prefences and postfences:
 * - Set `semaphore_resource_id` to the resource ID of the memory backing the semaphore.
 * - Set `index` to the byte offset divided by the semaphore size (`sizeof(uint32_t)`).
 * - Set `value` to the semaphore's signaling or waiting value.
 *
 * To use syncpoint for prefences:
 * - Set `semaphore_resource_id` to `PVA_RESOURCE_ID_INVALID`.
 * - Set `index` to the syncpoint ID to wait for.
 * - Set `value` to the waiting value.
 *
 * To use syncpoint for postfences:
 * - Set `semaphore_resource_id` to `PVA_RESOURCE_ID_INVALID`.
 * - Do not set `index` or `value`.
 * - After submission, UMD will assign `index` to the queue syncpoint ID and `value` to the expected future value.
 */
 struct pva_fence {
 	/** Resource ID of the memory semaphore. If resource ID is
 	 * PVA_RESOURCE_ID_INVALID, then the sync object primitive is assumed to
 	 * be syncpoint. */
 	uint32_t semaphore_resouce_id;
 	/** Represents either the semaphore index or the syncpoint ID, depending
 	 *  on the sync object primitive type.
 	 */
 	uint32_t index;
 	/** Represents the semaphore or syncpoint value used for signaling or
 	 * waiting. */
 	uint32_t value;
 };
 struct pva_fw_vpu_ptr_symbol {
 	uint64_t base;
 	uint64_t offset;
 	uint64_t size;
 };
 struct pva_fw_vpu_legacy_ptr_symbol {
 	uint64_t base;
 	uint32_t offset;
 	uint32_t size;
 };
 enum pva_surface_format {
 	PVA_SURF_FMT_PITCH_LINEAR = 0,
 	PVA_SURF_FMT_BLOCK_LINEAR
 };
 enum pva_memory_segment {
 	/** Memory segment directly reachable by R5. Command buffer chunk
 	 * memories need to be allocated from this segment */
 	PVA_MEMORY_SEGMENT_R5 = 1,
 	/** Memory segment reachable only by DMA. User buffers should be
 	 * allocated from this segment */
 	PVA_MEMORY_SEGMENT_DMA = 2,
 };
 enum pva_symbol_type {
 	/*! Specifies the an invalid symbol type */
 	PVA_SYM_TYPE_INVALID = 0,
 	/*! Specifies a data symbol */
 	PVA_SYM_TYPE_DATA,
 	/*! Specifies a VPU config table symbol */
 	PVA_SYM_TYPE_VPUC_TABLE,
 	/*! Specifies a Pointer symbol */
 	PVA_SYM_TYPE_POINTER,
 	/*! Specifies a System symbol */
 	PVA_SYM_TYPE_SYSTEM,
 	/*! Specifies an extended Pointer symbol */
 	PVA_SYM_TYPE_POINTER_EX,
 	PVA_SYM_TYPE_MAX,
 };
 /**
 * \brief Holds PVA Sync Client Type.
 * Currently NvSciSync supports NvSciSyncFences with syncpoint primitive type only.
 */
 enum pva_sync_client_type {
 	/*! For a given SyncObj PVA acts as a signaler. This type corresponds to
      * postfences from PVA. */
 	PVA_SYNC_CLIENT_TYPE_SIGNALER,
 	/*! For a given SyncObj PVA acts as a waiter. This type corresponds to
      * prefences to PVA. */
 	PVA_SYNC_CLIENT_TYPE_WAITER,
 	/*! For a given SyncObj PVA acts as both signaler and waiter. */
 	PVA_SYNC_CLIENT_TYPE_SIGNALER_WAITER,
 	/*! Specifies the non inclusive upper bound of valid values. */
 	PVA_SYNC_CLIENT_TYPE_MAX,
 	/*! Reserved bound of valid values. */
 	PVA_SYNC_CLIENT_TYPE_RESERVED = 0x7FFFFFFF,
 };
 #define PVA_SYMBOL_ID_INVALID 0U
 #define PVA_SYMBOL_ID_BASE 1U
 #define PVA_MAX_SYMBOL_NAME_LEN 64U
 struct pva_symbol_info {
 	char name[PVA_MAX_SYMBOL_NAME_LEN + 1U];
 	enum pva_symbol_type symbol_type;
 	uint32_t size;
 	uint32_t vmem_addr;
 	/** Symbol ID local to this executable */
 	uint32_t symbol_id; /*< Starting from PVA_SYMBOL_ID_BASE */
 };
 #define PVA_RESOURCE_ID_INVALID 0U
 #define PVA_RESOURCE_ID_BASE 1U
 struct pva_resource_entry {
 #define PVA_RESOURCE_TYPE_INVALID 0U
 #define PVA_RESOURCE_TYPE_DRAM 1U
 #define PVA_RESOURCE_TYPE_EXEC_BIN 2U
 #define PVA_RESOURCE_TYPE_DMA_CONFIG 3U
 	uint8_t type;
 	uint8_t smmu_context_id;
 	uint8_t addr_hi;
 	uint8_t size_hi;
 	uint32_t addr_lo;
 	uint32_t size_lo;
 };
 /** \brief Maximum number of queues per context */
 #define PVA_MAX_QUEUES_PER_CONTEXT (8)
 /** \brief Specifies the memory is GPU CACHED. */
 #define PVA_GPU_CACHED_MEMORY (1u << 1u)
 #define PVA_ACCESS_RO (1U << 0) /**< Read only access */
 #define PVA_ACCESS_WO (1U << 1) /**< Write only access */
 #define PVA_ACCESS_RW                                                          \
 	(PVA_ACCESS_RO | PVA_ACCESS_WO) /**< Read and write access */
 #define PVA_TIMEOUT_INF UINT64_MAX /**< Infinite timeout */
 #define PVA_MAX_NUM_INPUT_STATUS 2 /**< Maximum number of input statuses */
 #define PVA_MAX_NUM_OUTPUT_STATUS 2 /**< Maximum number of output statuses */
 #define PVA_MAX_NUM_PREFENCES 2 /**< Maximum number of pre-fences */
 #define PVA_MAX_NUM_POSTFENCES 2 /**< Maximum number of post-fences */
 /** Maximum number of timestamps */
 #define PVA_MAX_NUM_TIMESTAMPS PVA_MAX_NUM_POSTFENCES
 struct pva_cmdbuf_submit_info {
 	uint8_t num_prefences;
 	uint8_t num_postfences;
 	uint8_t num_input_status;
 	uint8_t num_output_status;
 	uint8_t num_timestamps;
 #define PVA_ENGINE_AFFINITY_NONE 0
 #define PVA_ENGINE_AFFINITY_ENGINE0 (1 << 0)
 #define PVA_ENGINE_AFFINITY_ENGINE1 (1 << 1)
 #define PVA_ENGINE_AFFINITY_ANY                                                \
 	(PVA_ENGINE_AFFINITY_ENGINE0 | PVA_ENGINE_AFFINITY_ENGINE1)
 	uint8_t engine_affinity;
 	/** Size of the first chunk */
 	uint16_t first_chunk_size;
 	/** Resource ID of the first chunk */
 	uint32_t first_chunk_resource_id;
 	/** Offset of the first chunk within the resource */
 	uint64_t first_chunk_offset;
 #define PVA_EXEC_TIMEOUT_REUSE 0xFFFFFFFFU
 #define PVA_EXEC_TIMEOUT_INF 0U
 	/** Execution Timeout */
 	uint32_t execution_timeout_ms;
 	struct pva_fence prefences[PVA_MAX_NUM_PREFENCES];
 	struct pva_fence postfences[PVA_MAX_NUM_POSTFENCES];
 	struct pva_dram_addr input_statuses[PVA_MAX_NUM_INPUT_STATUS];
 	struct pva_dram_addr output_statuses[PVA_MAX_NUM_OUTPUT_STATUS];
 	struct pva_dram_addr timestamps[PVA_MAX_NUM_TIMESTAMPS];
 };
 struct pva_ops_buffer {
 	void *base; /**< Buffer holding a list of async operations */
 	uint32_t offset; /**< First unused byte in the buffer */
 	uint32_t size; /**< Size of the buffer */
 };
 struct pva_cmdbuf_status {
 	/** Timestamp reflecting when the status was updated. This is in resolution of ns */
 	uint64_t timestamp;
 	/** Additional status information for the engine state */
 	uint32_t info32;
 	/** Additional status information for the engine state */
 	uint16_t info16;
 	/** Error code. Type: enum pva_error */
 	uint16_t status;
 };
 /** \brief Holds the PVA capabilities. */
 struct pva_characteristics {
 	/*! Holds the number of PVA engines. */
 	uint32_t pva_engine_count;
 	/*! Holds the number of VPUs per PVA engine. */
 	uint32_t pva_pve_count;
 	/*! Holds the PVA generation information */
 	enum pva_hw_gen hw_version;
 	uint16_t max_desc_count;
 	uint16_t max_ch_count;
 	uint16_t max_adb_count;
 	uint16_t max_hwseq_word_count;
 	uint16_t max_vmem_region_count;
 	uint16_t reserved_desc_start;
 	uint16_t reserved_desc_count;
 	uint16_t reserved_adb_start;
 	uint16_t reserved_adb_count;
 };
 enum pva_error_inject_codes {
 	PVA_ERR_INJECT_WDT_HW_ERR, // watchdog Hardware error
 	PVA_ERR_INJECT_WDT_TIMEOUT, // watchdog Timeout error
 };
 /*
 * !!!! DO NOT MODIFY !!!!!!
 * These values are defined as per DriveOS guidelines
 */
 #define PVA_INPUT_STATUS_SUCCESS (0)
 #define PVA_INPUT_STATUS_INVALID (0xFFFF)
 #endif // PVA_API_TYPES_H
--- a/drivers/video/tegra/host/pva/src/include/pva_api_vpu.h
+++ b/drivers/video/tegra/host/pva/src/include/pva_api_vpu.h
@@ -0,0 +1,33 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_API_VPU_H
 #define PVA_API_VPU_H
 #include "pva_api_types.h"
 /**
 * @brief Information of the VPU instance data passed to VPU kernel.
 */
 struct pva_vpu_instance_data {
 	/** @brief ID of the VPU assigned to the task */
 	uint16_t engine_id;
 	/** @brief Variable to indicate that ppe task was launched or not */
 	uint16_t ppe_task_launched;
 	/** @brief Base of the VMEM memory */
 	uint32_t vmem_base;
 	/** @brief Base of the DMA descriptor SRAM memory */
 	uint32_t dma_descriptor_base;
 	/** @brief Base of L2SRAM allocated for the task executed */
 	uint32_t l2ram_base;
 	/** @brief Size of L2SRAM allocated for the task executed */
 	uint32_t l2ram_size;
 };
 #endif // PVA_API_VPU_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.c
@@ -0,0 +1,125 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_block_allocator.h"
 #include "pva_kmd_utils.h"
 #include "pva_api.h"
 #define INVALID_ID 0xFFFFFFFF
 enum pva_error
 pva_kmd_block_allocator_init(struct pva_kmd_block_allocator *allocator,
 			     void *block_mem, uint32_t base_id,
 			     uint32_t block_size, uint32_t max_num_blocks)
 {
 	enum pva_error err = PVA_SUCCESS;
 	allocator->free_slot_head = INVALID_ID;
 	allocator->next_free_slot = 0;
 	allocator->max_num_blocks = max_num_blocks;
 	allocator->block_size = block_size;
 	allocator->base_id = base_id;
 	allocator->blocks = block_mem;
 	allocator->slot_in_use = pva_kmd_zalloc(
 		sizeof(*allocator->slot_in_use) * max_num_blocks);
 	if (!allocator->slot_in_use) {
 		err = PVA_NOMEM;
 		goto err_out;
 	}
 	return PVA_SUCCESS;
 err_out:
 	return err;
 }
 void pva_kmd_block_allocator_deinit(struct pva_kmd_block_allocator *allocator)
 {
 	pva_kmd_free(allocator->slot_in_use);
 }
 static inline void *get_block(struct pva_kmd_block_allocator *allocator,
 			      uint32_t slot)
 {
 	uintptr_t base = (uintptr_t)allocator->blocks;
 	uintptr_t addr = base + (slot * allocator->block_size);
 	return (void *)addr;
 }
 static inline uint32_t next_slot(struct pva_kmd_block_allocator *allocator,
 				 uint32_t slot)
 {
 	uint32_t *next = (uint32_t *)get_block(allocator, slot);
 	return *next;
 }
 void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator,
 			  uint32_t *out_id)
 {
 	void *block = NULL;
 	uint32_t slot = INVALID_ID;
 	if (allocator->free_slot_head != INVALID_ID) {
 		slot = allocator->free_slot_head;
 		allocator->free_slot_head =
 			next_slot(allocator, allocator->free_slot_head);
 	} else {
 		if (allocator->next_free_slot < allocator->max_num_blocks) {
 			slot = allocator->next_free_slot;
 			allocator->next_free_slot++;
 		} else {
 			goto err_out;
 		}
 	}
 	allocator->slot_in_use[slot] = true;
 	*out_id = slot + allocator->base_id;
 	block = get_block(allocator, slot);
 	return block;
 err_out:
 	return NULL;
 }
 static bool is_slot_valid(struct pva_kmd_block_allocator *allocator,
 			  uint32_t slot)
 {
 	if (slot >= allocator->max_num_blocks) {
 		return false;
 	}
 	return allocator->slot_in_use[slot];
 }
 void *pva_kmd_get_block(struct pva_kmd_block_allocator *allocator, uint32_t id)
 {
 	uint32_t slot = id - allocator->base_id;
 	if (!is_slot_valid(allocator, slot)) {
 		return NULL;
 	}
 	return get_block(allocator, slot);
 }
 enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator,
 				  uint32_t id)
 {
 	uint32_t slot = id - allocator->base_id;
 	uint32_t *next;
 	if (!is_slot_valid(allocator, slot)) {
 		return PVA_INVAL;
 	}
 	allocator->slot_in_use[slot] = false;
 	next = (uint32_t *)get_block(allocator, slot);
 	*next = allocator->free_slot_head;
 	allocator->free_slot_head = slot;
 	return PVA_SUCCESS;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.h
@@ -0,0 +1,50 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_BLOCK_ALLOCATOR_H
 #define PVA_KMD_BLOCK_ALLOCATOR_H
 #include "pva_api.h"
 struct pva_kmd_block_allocator {
 	uint32_t free_slot_head;
 	uint32_t base_id;
 	uint32_t max_num_blocks;
 	uint32_t next_free_slot;
 	uint32_t block_size;
 	void *blocks;
 	bool *slot_in_use;
 };
 enum pva_error
 pva_kmd_block_allocator_init(struct pva_kmd_block_allocator *allocator,
 			     void *chunk_mem, uint32_t base_id,
 			     uint32_t chunk_size, uint32_t max_num_chunks);
 void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator,
 			  uint32_t *out_id);
 static inline void *
 pva_kmd_zalloc_block(struct pva_kmd_block_allocator *allocator,
 		     uint32_t *out_id)
 {
 	void *ptr = pva_kmd_alloc_block(allocator, out_id);
 	if (ptr != NULL) {
 		memset(ptr, 0, allocator->block_size);
 	}
 	return ptr;
 }
 void *pva_kmd_get_block(struct pva_kmd_block_allocator *allocator, uint32_t id);
 enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator,
 				  uint32_t id);
 void pva_kmd_block_allocator_deinit(struct pva_kmd_block_allocator *allocator);
 #endif // PVA_KMD_BLOCK_ALLOCATOR_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.c
@@ -0,0 +1,280 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_cmdbuf.h"
 #include "pva_api_cmdbuf.h"
 #include "pva_kmd_utils.h"
 #include "pva_math_utils.h"
 #define CHUNK_STATE_INVALID 0
 #define CHUNK_STATE_FENCE_TRIGGERED 1
 static uint32_t *
 get_chunk_states(struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool)
 {
 	return (uint32_t *)pva_offset_pointer(
 		cmdbuf_chunk_pool->mem_base_va,
 		cmdbuf_chunk_pool->chunk_states_offset);
 }
 static void *get_chunk(struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool,
 		       uint32_t chunk_id)
 {
 	return pva_offset_pointer(cmdbuf_chunk_pool->mem_base_va,
 				  cmdbuf_chunk_pool->chunk_size * chunk_id);
 }
 static uint32_t get_chunk_id_from_res_offset(
 	struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, uint64_t offset)
 {
 	ASSERT(offset >= cmdbuf_chunk_pool->mem_offset);
 	offset -= cmdbuf_chunk_pool->mem_offset;
 	return offset / cmdbuf_chunk_pool->chunk_size;
 }
 enum pva_error pva_kmd_cmdbuf_chunk_pool_init(
 	struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool,
 	uint32_t mem_resource_id, uint64_t mem_offset, uint32_t mem_size,
 	uint16_t chunk_size, uint32_t num_chunks, void *mem_base_va)
 {
 	uint32_t *chunk_states;
 	uint32_t i;
 	enum pva_error err;
 	ASSERT(mem_size >= pva_kmd_cmdbuf_pool_get_required_mem_size(
 				   chunk_size, num_chunks));
 	cmdbuf_chunk_pool->mem_resource_id = mem_resource_id;
 	cmdbuf_chunk_pool->mem_offset = mem_offset;
 	cmdbuf_chunk_pool->mem_size = mem_size;
 	cmdbuf_chunk_pool->chunk_size = chunk_size;
 	cmdbuf_chunk_pool->num_chunks = num_chunks;
 	cmdbuf_chunk_pool->mem_base_va = mem_base_va;
 	cmdbuf_chunk_pool->chunk_states_offset = chunk_size * num_chunks;
 	chunk_states = get_chunk_states(cmdbuf_chunk_pool);
 	for (i = 0; i < num_chunks; i++) {
 		chunk_states[i] = CHUNK_STATE_INVALID;
 	}
 	err = pva_kmd_block_allocator_init(&cmdbuf_chunk_pool->block_allocator,
 					   mem_base_va, 0, chunk_size,
 					   num_chunks);
 	return err;
 }
 void pva_kmd_cmdbuf_chunk_pool_deinit(struct pva_kmd_cmdbuf_chunk_pool *pool)
 {
 	pva_kmd_block_allocator_deinit(&pool->block_allocator);
 }
 void pva_kmd_free_linked_cmdbuf_chunks(struct pva_kmd_cmdbuf_chunk_pool *pool,
 				       uint32_t chunk_id)
 {
 	struct pva_cmd_link_chunk *begin;
 	uint32_t *chunk_states;
 	uint64_t offset;
 	uint32_t resource_id;
 	chunk_states = get_chunk_states(pool);
 	while (true) {
 		begin = get_chunk(pool, chunk_id);
 		chunk_states[chunk_id] = CHUNK_STATE_INVALID;
 		offset = assemble_addr(begin->next_chunk_offset_hi,
 				       begin->next_chunk_offset_lo);
 		resource_id = begin->next_chunk_resource_id;
 		pva_kmd_free_block(&pool->block_allocator, chunk_id);
 		if (resource_id == PVA_RESOURCE_ID_INVALID) {
 			break;
 		}
 		ASSERT(resource_id == pool->mem_resource_id);
 		/* Free next chunk */
 		chunk_id = get_chunk_id_from_res_offset(pool, offset);
 	}
 }
 static bool recycle_chunks(struct pva_kmd_cmdbuf_chunk_pool *pool)
 {
 	uint32_t *chunk_states;
 	uint32_t i;
 	bool freed = false;
 	chunk_states = get_chunk_states(pool);
 	for (i = 0; i < pool->num_chunks; i++) {
 		if (chunk_states[i] == CHUNK_STATE_FENCE_TRIGGERED) {
 			pva_kmd_free_linked_cmdbuf_chunks(pool, i);
 			freed = true;
 			break;
 		}
 	}
 	return freed;
 }
 enum pva_error
 pva_kmd_alloc_cmdbuf_chunk(struct pva_kmd_cmdbuf_chunk_pool *pool,
 			   uint32_t *out_chunk_id)
 {
 	enum pva_error err = PVA_SUCCESS;
 	void *chunk;
 	chunk = pva_kmd_alloc_block(&pool->block_allocator, out_chunk_id);
 	if (chunk == NULL) {
 		if (recycle_chunks(pool)) {
 			chunk = pva_kmd_alloc_block(&pool->block_allocator,
 						    out_chunk_id);
 			ASSERT(chunk != NULL);
 		} else {
 			err = PVA_NOMEM;
 		}
 	}
 	return err;
 }
 void pva_kmd_get_free_notifier_fence(struct pva_kmd_cmdbuf_chunk_pool *pool,
 				     uint32_t chunk_id,
 				     struct pva_fw_postfence *fence)
 {
 	uint64_t offset_sum =
 		safe_addu64(pool->mem_offset, pool->chunk_states_offset);
 	uint64_t chunk_size =
 		(uint64_t)safe_mulu32((uint32_t)sizeof(uint32_t), chunk_id);
 	uint64_t state_offset = safe_addu64(offset_sum, chunk_size);
 	memset(fence, 0, sizeof(*fence));
 	fence->resource_id = pool->mem_resource_id;
 	fence->offset_lo = iova_lo(state_offset);
 	fence->offset_hi = iova_hi(state_offset);
 	fence->value = CHUNK_STATE_FENCE_TRIGGERED;
 	fence->ts_resource_id = PVA_RESOURCE_ID_INVALID;
 }
 static void *current_cmd(struct pva_kmd_cmdbuf_builder *builder)
 {
 	return pva_offset_pointer(
 		pva_kmd_get_cmdbuf_chunk_va(builder->pool,
 					    builder->current_chunk_id),
 		builder->current_chunk_offset);
 }
 static void begin_chunk(struct pva_kmd_cmdbuf_builder *builder)
 {
 	struct pva_cmd_link_chunk *cmd = pva_kmd_get_cmdbuf_chunk_va(
 		builder->pool, builder->current_chunk_id);
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->header.opcode = PVA_CMD_OPCODE_LINK_CHUNK;
 	cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
 	cmd->next_chunk_resource_id = PVA_RESOURCE_ID_INVALID;
 	builder->current_chunk_offset = sizeof(*cmd);
 }
 static void end_chunk(struct pva_kmd_cmdbuf_builder *builder)
 {
 	/* Size of this chunk is now known. Update the header of the previous chunk. */
 	*builder->chunk_size_ptr = builder->current_chunk_offset;
 }
 static void link_chunk(struct pva_kmd_cmdbuf_builder *builder,
 		       uint32_t new_chunk_id)
 {
 	struct pva_cmd_link_chunk *old_link;
 	uint64_t new_chunk_offset;
 	old_link = (struct pva_cmd_link_chunk *)pva_kmd_get_cmdbuf_chunk_va(
 		builder->pool, builder->current_chunk_id);
 	new_chunk_offset = pva_kmd_get_cmdbuf_chunk_res_offset(builder->pool,
 							       new_chunk_id);
 	old_link->next_chunk_resource_id = builder->pool->mem_resource_id;
 	old_link->next_chunk_offset_lo = iova_lo(new_chunk_offset);
 	old_link->next_chunk_offset_hi = iova_hi(new_chunk_offset);
 	/* The new chunk size is still unknown. We record the pointer here. */
 	builder->chunk_size_ptr = &old_link->next_chunk_size;
 }
 void *pva_kmd_reserve_cmd_space(struct pva_kmd_cmdbuf_builder *builder,
 				uint16_t size)
 {
 	uint16_t max_size;
 	enum pva_error err;
 	void *cmd_start;
 	max_size = safe_subu16(builder->pool->chunk_size,
 			       (uint16_t)sizeof(struct pva_cmd_link_chunk));
 	ASSERT(size <= max_size);
 	if ((builder->current_chunk_offset + size) >
 	    builder->pool->chunk_size) {
 		/* Not enough space in the current chunk. Allocate a new one. */
 		uint32_t new_chunk_id;
 		err = pva_kmd_alloc_cmdbuf_chunk(builder->pool, &new_chunk_id);
 		if (err != PVA_SUCCESS) {
 			pva_kmd_log_err("No more chunk in the pool");
 			goto err_out;
 		}
 		end_chunk(builder);
 		link_chunk(builder, new_chunk_id);
 		builder->current_chunk_id = new_chunk_id;
 		builder->current_chunk_offset = 0;
 		begin_chunk(builder);
 	}
 	cmd_start = current_cmd(builder);
 	(void)memset(cmd_start, 0, size);
 	builder->current_chunk_offset += size;
 	return cmd_start;
 err_out:
 	return NULL;
 }
 enum pva_error
 pva_kmd_cmdbuf_builder_init(struct pva_kmd_cmdbuf_builder *builder,
 			    struct pva_kmd_cmdbuf_chunk_pool *chunk_pool)
 {
 	enum pva_error err = PVA_SUCCESS;
 	uint32_t const min_chunk_size = sizeof(struct pva_cmd_link_chunk);
 	ASSERT(chunk_pool->chunk_size >= min_chunk_size);
 	builder->pool = chunk_pool;
 	err = pva_kmd_alloc_cmdbuf_chunk(chunk_pool,
 					 &builder->current_chunk_id);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	builder->current_chunk_offset = 0;
 	builder->first_chunk_size = 0;
 	builder->first_chunk_id = builder->current_chunk_id;
 	builder->chunk_size_ptr = &builder->first_chunk_size;
 	begin_chunk(builder);
 	return PVA_SUCCESS;
 err_out:
 	return err;
 }
 void pva_kmd_cmdbuf_builder_finalize(struct pva_kmd_cmdbuf_builder *builder,
 				     uint32_t *out_first_chunk_id,
 				     uint16_t *out_first_chunk_size)
 {
 	end_chunk(builder);
 	*out_first_chunk_id = builder->first_chunk_id;
 	*out_first_chunk_size = builder->first_chunk_size;
 }
 void pva_kmd_cmdbuf_builder_cancel(struct pva_kmd_cmdbuf_builder *builder)
 {
 	pva_kmd_free_linked_cmdbuf_chunks(builder->pool,
 					  builder->first_chunk_id);
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h
@@ -0,0 +1,265 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_CMDBUF_H
 #define PVA_KMD_CMDBUF_H
 #include "pva_fw.h"
 #include "pva_resource.h"
 #include "pva_kmd_block_allocator.h"
 #include "pva_kmd_mutex.h"
 #include "pva_api_cmdbuf.h"
 #include "pva_utils.h"
 #include "pva_math_utils.h"
 struct pva_kmd_queue;
 /**
 * A fixed-size pool of command buffer chunks.
 *
 * We can allocate chunks from this pool. When submitting the chunks, we should
 * request a post fence from the pool for the first chunk. When the post fence
 * is triggered, the chain of chunks will be considered free by the pool.
 */
 struct pva_kmd_cmdbuf_chunk_pool {
 	uint16_t chunk_size;
 	uint32_t num_chunks;
 	uint32_t mem_resource_id;
 	uint64_t mem_size;
 	uint64_t mem_offset; /**< Starting offset in the resource that can be
 			      * used by this pool */
 	uint64_t chunk_states_offset;
 	void *mem_base_va;
 	struct pva_kmd_block_allocator block_allocator;
 };
 static inline uint64_t
 pva_kmd_cmdbuf_pool_get_required_mem_size(uint16_t chunk_size,
 					  uint32_t num_chunks)
 {
 	/* Add storage required for free notifier fences */
 	return (chunk_size + sizeof(uint32_t)) * num_chunks;
 }
 /**
 * Initialize the chunk pool.
 *
 * @param[out] Pointer to the pool.
 *
 * @param[in] mem_resource_id Resource ID of the memory to be used for the pool.
 *
 * @param[in] mem_offset Offset of the memory to be used for the pool.
 * @param[in] mem_size Size of the memory to be used for the pool.
 *
 * @param[in] chunk_size Size of each chunk in the pool.
 *
 * @param[in] num_chunks Number of chunks in the pool.
 *
 * @param[in] mem_base_va Virtual address of the memory to be used for the pool.
 *            The virtual address is the base address of the resource.
 */
 enum pva_error pva_kmd_cmdbuf_chunk_pool_init(
 	struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool,
 	uint32_t mem_resource_id, uint64_t mem_offset, uint32_t mem_size,
 	uint16_t chunk_size, uint32_t num_chunks, void *mem_base_va);
 void pva_kmd_cmdbuf_chunk_pool_deinit(
 	struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool);
 /**
 * Allocate a chunk from the pool.
 *
 * If the chunk is submitted, then free will be done automatically when
 * free-notifier fence is triggered.
 */
 enum pva_error
 pva_kmd_alloc_cmdbuf_chunk(struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool,
 			   uint32_t *out_chunk_id);
 /**
 * Free a linked list of chunks.
 *
 * We only need to call this function if we decide not to submit the chunks,
 * usually in error path.
 */
 void pva_kmd_free_linked_cmdbuf_chunks(
 	struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, uint32_t chunk_id);
 /**
 * Get the free-notifier fence.
 *
 * @param[in] The first chunk of the command buffer to be submitted.
 *
 * @param[out] The free-notifier fence that should be submitted with the command buffer.
 */
 void pva_kmd_get_free_notifier_fence(
 	struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, uint32_t chunk_id,
 	struct pva_fw_postfence *fence);
 static inline void *
 pva_kmd_get_cmdbuf_chunk_va(struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool,
 			    uint32_t chunk_id)
 {
 	return (void *)((uintptr_t)cmdbuf_chunk_pool->mem_base_va +
 			chunk_id * cmdbuf_chunk_pool->chunk_size);
 }
 static inline uint64_t pva_kmd_get_cmdbuf_chunk_res_offset(
 	struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, uint32_t chunk_id)
 {
 	uint64_t chunk_size = (uint64_t)safe_mulu32(
 		chunk_id, (uint32_t)cmdbuf_chunk_pool->chunk_size);
 	return safe_addu64(cmdbuf_chunk_pool->mem_offset, chunk_size);
 }
 /**
 * Utility for building a command buffer with multiple chunks.
 *
 * The builder will automatically allocate chunks from the pool when the current
 * chunk is full.
 */
 struct pva_kmd_cmdbuf_builder {
 	uint16_t first_chunk_size;
 	uint16_t current_chunk_offset;
 	uint32_t first_chunk_id;
 	uint32_t current_chunk_id;
 	struct pva_kmd_cmdbuf_chunk_pool *pool;
 	uint16_t *chunk_size_ptr; /**< Pointer to the chunk size field of the previous link_chunk command  */
 };
 enum pva_error
 pva_kmd_cmdbuf_builder_init(struct pva_kmd_cmdbuf_builder *builder,
 			    struct pva_kmd_cmdbuf_chunk_pool *chunk_pool);
 void *pva_kmd_reserve_cmd_space(struct pva_kmd_cmdbuf_builder *builder,
 				uint16_t size);
 void pva_kmd_cmdbuf_builder_finalize(struct pva_kmd_cmdbuf_builder *builder,
 				     uint32_t *out_first_chunk_id,
 				     uint16_t *out_first_chunk_size);
 void pva_kmd_cmdbuf_builder_cancel(struct pva_kmd_cmdbuf_builder *builder);
 static inline void pva_kmd_set_cmd_init_resource_table(
 	struct pva_cmd_init_resource_table *cmd, uint8_t resource_table_id,
 	uint64_t iova_addr, uint32_t max_num_entries)
 {
 	cmd->header.opcode = PVA_CMD_OPCODE_INIT_RESOURCE_TABLE;
 	cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
 	cmd->resource_table_id = resource_table_id;
 	cmd->resource_table_addr_lo = iova_lo(iova_addr);
 	cmd->resource_table_addr_hi = iova_hi(iova_addr);
 	cmd->max_n_entries = max_num_entries;
 }
 static inline void
 pva_kmd_set_cmd_deinit_resource_table(struct pva_cmd_deinit_resource_table *cmd,
 				      uint8_t resource_table_id)
 {
 	cmd->header.opcode = PVA_CMD_OPCODE_DEINIT_RESOURCE_TABLE;
 	cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
 	cmd->resource_table_id = resource_table_id;
 }
 static inline void pva_kmd_set_cmd_init_queue(struct pva_cmd_init_queue *cmd,
 					      uint8_t ccq_id, uint8_t queue_id,
 					      uint64_t iova_addr,
 					      uint32_t max_num_submit)
 {
 	cmd->header.opcode = PVA_CMD_OPCODE_INIT_QUEUE;
 	cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
 	cmd->ccq_id = ccq_id;
 	cmd->queue_id = queue_id;
 	cmd->queue_addr_lo = iova_lo(iova_addr);
 	cmd->queue_addr_hi = iova_hi(iova_addr);
 	cmd->max_n_submits = max_num_submit;
 }
 static inline void
 pva_kmd_set_cmd_deinit_queue(struct pva_cmd_deinit_queue *cmd, uint8_t ccq_id,
 			     uint8_t queue_id)
 {
 	cmd->header.opcode = PVA_CMD_OPCODE_DEINIT_QUEUE;
 	cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
 	cmd->ccq_id = ccq_id;
 	cmd->queue_id = queue_id;
 }
 static inline void pva_kmd_set_cmd_update_resource_table(
 	struct pva_cmd_update_resource_table *cmd, uint32_t resource_table_id,
 	uint32_t resource_id, struct pva_resource_entry const *entry)
 {
 	cmd->header.opcode = PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE;
 	cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
 	cmd->resource_table_id = resource_table_id;
 	cmd->resource_id = resource_id;
 	cmd->entry = *entry;
 }
 static inline void
 pva_kmd_set_cmd_unregister_resource(struct pva_cmd_unregister_resource *cmd,
 				    uint32_t resource_id)
 {
 	cmd->header.opcode = PVA_CMD_OPCODE_UNREGISTER_RESOURCE;
 	cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
 	cmd->resource_id = resource_id;
 }
 static inline void
 pva_kmd_set_cmd_enable_fw_profiling(struct pva_cmd_enable_fw_profiling *cmd,
 				    uint32_t buffer_resource_id,
 				    uint32_t buffer_size, uint64_t offset,
 				    uint32_t filter, uint8_t timestamp_type)
 {
 	cmd->header.opcode = PVA_CMD_OPCODE_ENABLE_FW_PROFILING;
 	cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
 	cmd->buffer_resource_id = buffer_resource_id;
 	cmd->buffer_offset_hi = iova_hi(offset);
 	cmd->buffer_offset_lo = iova_lo(offset);
 	cmd->buffer_size = buffer_size;
 	cmd->filter = filter;
 	cmd->timestamp_type = timestamp_type;
 }
 static inline void
 pva_kmd_set_cmd_disable_fw_profiling(struct pva_cmd_disable_fw_profiling *cmd)
 {
 	cmd->header.opcode = PVA_CMD_OPCODE_DISABLE_FW_PROFILING;
 	cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
 }
 static inline void pva_kmd_set_cmd_get_tegra_stats(
 	struct pva_cmd_get_tegra_stats *cmd, uint32_t buffer_resource_id,
 	uint32_t buffer_size, uint64_t offset, bool enabled)
 {
 	cmd->header.opcode = PVA_CMD_OPCODE_GET_TEGRA_STATS;
 	cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
 	cmd->buffer_resource_id = buffer_resource_id;
 	cmd->buffer_offset_hi = iova_hi(offset);
 	cmd->buffer_offset_lo = iova_lo(offset);
 	cmd->buffer_size = buffer_size;
 	cmd->enabled = enabled;
 }
 static inline void pva_kmd_set_cmd_suspend_fw(struct pva_cmd_suspend_fw *cmd)
 {
 	uint64_t len = (sizeof(*cmd) / sizeof(uint32_t));
 	cmd->header.opcode = PVA_CMD_OPCODE_SUSPEND_FW;
 	ASSERT(len <= 255u);
 	cmd->header.len = (uint8_t)(len);
 }
 static inline void pva_kmd_set_cmd_resume_fw(struct pva_cmd_resume_fw *cmd)
 {
 	uint64_t len = (sizeof(*cmd) / sizeof(uint32_t));
 	cmd->header.opcode = PVA_CMD_OPCODE_RESUME_FW;
 	ASSERT(len <= 255u);
 	cmd->header.len = (uint8_t)(len);
 }
 #endif // PVA_KMD_CMDBUF_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_constants.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_constants.h
@@ -0,0 +1,62 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_CONSTANTS_H
 #define PVA_KMD_CONSTANTS_H
 #include "pva_constants.h"
 /* Limits related to KMD's own submission*/
 #define PVA_KMD_MAX_NUM_KMD_RESOURCES 32
 #define PVA_KMD_MAX_NUM_KMD_DMA_CONFIGS 1
 #define PVA_KMD_MAX_NUM_KMD_CHUNKS 32
 #define PVA_KMD_MAX_NUM_KMD_SUBMITS 32
 /* Limits related to User's privileged submission */
 #define PVA_KMD_MAX_NUM_PRIV_CHUNKS 256
 #define PVA_KMD_MAX_NUM_PRIV_SUBMITS 256
 #define PVA_KMD_USER_CONTEXT_ID_BASE 1u
 #define PVA_KMD_PVA0_T23x_REG_BASE 0x16000000
 #define PVA_KMD_PVA0_T23x_REG_SIZE 0x800000
 #define PVA_KMD_TIMEOUT_INF UINT64_MAX
 // clang-format off
 #if PVA_BUILD_MODE == PVA_BUILD_MODE_SIM
    #define PVA_KMD_TIMEOUT_FACTOR 100
 #else
    #define PVA_KMD_TIMEOUT_FACTOR 1
 #endif
 // clang-format on
 #define PVA_KMD_TIMEOUT(val) (val * PVA_KMD_TIMEOUT_FACTOR)
 #define PVA_KMD_TIMEOUT_RESOURCE_SEMA_MS PVA_KMD_TIMEOUT(100) /*< 100 ms */
 #define PVA_KMD_WAIT_FW_TIMEOUT_US PVA_KMD_TIMEOUT(1000000) /*< 1 second*/
 #define PVA_KMD_WAIT_FW_POLL_INTERVAL_US PVA_KMD_TIMEOUT(100) /*< 100 us*/
 #define PVA_KMD_FW_BOOT_TIMEOUT_MS PVA_KMD_TIMEOUT(1000) /*< 1 seconds */
 #define PVA_NUM_RW_SYNCPTS 56
 // clang-format off
 #if PVA_DEV_MAIN_COMPATIBLE == 1
    #define PVA_KMD_LOAD_FROM_GSC_DEFAULT true
    #if PVA_SAFETY == 1
        #define PVA_KMD_APP_AUTH_DEFAULT true
    #else
        #define PVA_KMD_APP_AUTH_DEFAULT false
    #endif
 #else
    #define PVA_KMD_LOAD_FROM_GSC_DEFAULT false
    #define PVA_KMD_APP_AUTH_DEFAULT false
 #endif
 // clang-format on
 #endif // PVA_KMD_CONSTANTS_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c
@@ -0,0 +1,363 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_utils.h"
 #include "pva_constants.h"
 #include "pva_api_cmdbuf.h"
 #include "pva_kmd_resource_table.h"
 #include "pva_kmd_device.h"
 #include "pva_kmd_queue.h"
 #include "pva_kmd_context.h"
 #include "pva_kmd_constants.h"
 struct pva_kmd_context *pva_kmd_context_create(struct pva_kmd_device *pva)
 {
 	uint32_t alloc_id;
 	enum pva_error err;
 	struct pva_kmd_context *ctx;
 	ctx = pva_kmd_zalloc_block(&pva->context_allocator, &alloc_id);
 	if (ctx == NULL) {
 		goto err_out;
 	}
 	ctx->ccq_id = alloc_id;
 	ctx->resource_table_id = ctx->ccq_id;
 	ctx->smmu_ctx_id = ctx->ccq_id;
 	ctx->pva = pva;
 	ctx->max_n_queues = PVA_MAX_NUM_QUEUES_PER_CONTEXT;
 	ctx->ccq0_lock_ptr = &pva->ccq0_lock;
 	pva_kmd_mutex_init(&ctx->ccq_lock);
 	pva_kmd_mutex_init(&ctx->resource_table_lock);
 	ctx->queue_allocator_mem = pva_kmd_zalloc(sizeof(struct pva_kmd_queue) *
 						  ctx->max_n_queues);
 	if (ctx->queue_allocator_mem == NULL) {
 		goto free_ctx;
 	}
 	err = pva_kmd_block_allocator_init(&ctx->queue_allocator,
 					   ctx->queue_allocator_mem, 0,
 					   sizeof(struct pva_kmd_queue),
 					   ctx->max_n_queues);
 	if (err != PVA_SUCCESS) {
 		goto free_queue_mem;
 	}
 	return ctx;
 free_queue_mem:
 	pva_kmd_free(ctx->queue_allocator_mem);
 free_ctx:
 	pva_kmd_free(ctx);
 err_out:
 	return NULL;
 }
 static enum pva_error notify_fw_context_init(struct pva_kmd_context *ctx)
 {
 	struct pva_kmd_cmdbuf_builder builder;
 	struct pva_kmd_submitter *dev_submitter = &ctx->pva->submitter;
 	struct pva_cmd_init_resource_table *res_cmd;
 	struct pva_cmd_init_queue *queue_cmd;
 	struct pva_cmd_update_resource_table *update_cmd;
 	struct pva_resource_entry entry = { 0 };
 	uint32_t fence_val;
 	enum pva_error err;
 	err = pva_kmd_submitter_prepare(dev_submitter, &builder);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	res_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*res_cmd));
 	ASSERT(res_cmd != NULL);
 	pva_kmd_set_cmd_init_resource_table(
 		res_cmd, ctx->resource_table_id,
 		ctx->ctx_resource_table.table_mem->iova,
 		ctx->ctx_resource_table.n_entries);
 	queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd));
 	ASSERT(queue_cmd != NULL);
 	pva_kmd_set_cmd_init_queue(
 		queue_cmd, PVA_PRIV_CCQ_ID,
 		ctx->ccq_id, /* For privileged queues, queue ID == user CCQ ID*/
 		ctx->ctx_queue.queue_memory->iova,
 		ctx->ctx_queue.max_num_submit);
 	update_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*update_cmd));
 	ASSERT(update_cmd != NULL);
 	err = pva_kmd_make_resource_entry(&ctx->pva->dev_resource_table,
 					  ctx->submit_memory_resource_id,
 					  &entry);
 	ASSERT(err == PVA_SUCCESS);
 	pva_kmd_set_cmd_update_resource_table(update_cmd,
 					      0, /* KMD's resource table ID */
 					      ctx->submit_memory_resource_id,
 					      &entry);
 	err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
 	if (err != PVA_SUCCESS) {
 		// Error is either QUEUE_FULL or TIMEDOUT
 		goto cancel_builder;
 	}
 	err = pva_kmd_submitter_wait(dev_submitter, fence_val,
 				     PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 				     PVA_KMD_WAIT_FW_TIMEOUT_US);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
 			"Waiting for FW timed out when initializing context");
 		goto err_out;
 	}
 	return PVA_SUCCESS;
 cancel_builder:
 	pva_kmd_cmdbuf_builder_cancel(&builder);
 err_out:
 	return err;
 }
 static enum pva_error notify_fw_context_deinit(struct pva_kmd_context *ctx)
 {
 	struct pva_kmd_cmdbuf_builder builder;
 	struct pva_kmd_submitter *dev_submitter = &ctx->pva->submitter;
 	struct pva_cmd_deinit_resource_table *deinit_table_cmd;
 	struct pva_cmd_deinit_queue *deinit_queue_cmd;
 	uint32_t fence_val;
 	enum pva_error err;
 	err = pva_kmd_submitter_prepare(dev_submitter, &builder);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	deinit_queue_cmd =
 		pva_kmd_reserve_cmd_space(&builder, sizeof(*deinit_queue_cmd));
 	ASSERT(deinit_queue_cmd != NULL);
 	pva_kmd_set_cmd_deinit_queue(
 		deinit_queue_cmd, PVA_PRIV_CCQ_ID,
 		ctx->ccq_id /* For privileged queues, queue ID == user CCQ ID*/
 	);
 	deinit_table_cmd =
 		pva_kmd_reserve_cmd_space(&builder, sizeof(*deinit_table_cmd));
 	ASSERT(deinit_table_cmd != NULL);
 	pva_kmd_set_cmd_deinit_resource_table(deinit_table_cmd,
 					      ctx->resource_table_id);
 	err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
 	if (err != PVA_SUCCESS) {
 		goto cancel_builder;
 	}
 	err = pva_kmd_submitter_wait(dev_submitter, fence_val,
 				     PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 				     PVA_KMD_WAIT_FW_TIMEOUT_US);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
 			"Waiting for FW timed out when deinitializing context");
 		goto err_out;
 	}
 	return PVA_SUCCESS;
 cancel_builder:
 	pva_kmd_cmdbuf_builder_cancel(&builder);
 err_out:
 	return err;
 }
 enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx,
 				    uint32_t res_table_capacity)
 {
 	enum pva_error err;
 	uint32_t queue_mem_size;
 	uint64_t chunk_mem_size;
 	struct pva_fw_postfence post_fence = { 0 };
 	struct pva_syncpt_rw_info *syncpts;
 	uint64_t size;
 	/* Power on PVA if not already */
 	err = pva_kmd_device_busy(ctx->pva);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	/* Allocate RW syncpoints for this context */
 	syncpts = (struct pva_syncpt_rw_info *)pva_kmd_alloc_block(
 		&ctx->pva->syncpt_allocator, &ctx->syncpt_block_index);
 	ASSERT(syncpts != NULL);
 	/* Init resource table for this context */
 	err = pva_kmd_resource_table_init(&ctx->ctx_resource_table, ctx->pva,
 					  ctx->smmu_ctx_id, res_table_capacity,
 					  res_table_capacity);
 	if (err != PVA_SUCCESS) {
 		goto drop_device;
 	}
 	/* Init privileged queue for this context */
 	queue_mem_size = pva_get_submission_queue_memory_size(
 		PVA_KMD_MAX_NUM_PRIV_SUBMITS);
 	ctx->ctx_queue_mem =
 		pva_kmd_device_memory_alloc_map(queue_mem_size, ctx->pva,
 						PVA_ACCESS_RW,
 						PVA_R5_SMMU_CONTEXT_ID);
 	if (ctx->ctx_queue_mem == NULL) {
 		err = PVA_NOMEM;
 		goto deinit_table;
 	}
 	pva_kmd_queue_init(
 		&ctx->ctx_queue, ctx->pva, PVA_PRIV_CCQ_ID,
 		ctx->ccq_id, /* Context's PRIV queue ID is identical to CCQ ID */
 		&ctx->pva->ccq0_lock, ctx->ctx_queue_mem,
 		PVA_KMD_MAX_NUM_PRIV_SUBMITS);
 	/* Allocate memory for submission */
 	chunk_mem_size = pva_kmd_cmdbuf_pool_get_required_mem_size(
 		PVA_MAX_CMDBUF_CHUNK_SIZE, PVA_KMD_MAX_NUM_PRIV_CHUNKS);
 	/* Allocate one post fence at the end. This memory will be added to
 	 * KMD's own resource table. We don't need to explicitly free it. It
 	 * will be freed after we drop the resource. */
 	size = safe_addu64(chunk_mem_size, (uint64_t)sizeof(uint32_t));
 	ctx->submit_memory = pva_kmd_device_memory_alloc_map(
 		size, ctx->pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
 	if (ctx->submit_memory == NULL) {
 		err = PVA_NOMEM;
 		goto queue_deinit;
 	}
 	/* Add submit memory to resource table */
 	pva_kmd_mutex_lock(&ctx->pva->resource_table_lock);
 	err = pva_kmd_add_dram_buffer_resource(&ctx->pva->dev_resource_table,
 					       ctx->submit_memory,
 					       &ctx->submit_memory_resource_id);
 	pva_kmd_mutex_unlock(&ctx->pva->resource_table_lock);
 	if (err != PVA_SUCCESS) {
 		goto free_submit_memory;
 	}
 	/* Init chunk pool */
 	err = pva_kmd_cmdbuf_chunk_pool_init(
 		&ctx->chunk_pool, ctx->submit_memory_resource_id,
 		0 /* offset */, chunk_mem_size, PVA_MAX_CMDBUF_CHUNK_SIZE,
 		PVA_KMD_MAX_NUM_PRIV_CHUNKS, ctx->submit_memory->va);
 	if (err != PVA_SUCCESS) {
 		goto free_dram_buffer_resource;
 	}
 	/* Init fence */
 	ctx->fence_offset = chunk_mem_size;
 	/* Init submitter */
 	pva_kmd_mutex_init(&ctx->submit_lock);
 	pva_kmd_mutex_init(&ctx->chunk_pool_lock);
 	post_fence.resource_id = ctx->submit_memory_resource_id;
 	post_fence.offset_lo = iova_lo(ctx->fence_offset);
 	post_fence.offset_hi = iova_hi(ctx->fence_offset);
 	post_fence.ts_resource_id = PVA_RESOURCE_ID_INVALID;
 	pva_kmd_submitter_init(
 		&ctx->submitter, &ctx->ctx_queue, &ctx->submit_lock,
 		&ctx->chunk_pool, &ctx->chunk_pool_lock,
 		pva_offset_pointer(ctx->submit_memory->va, ctx->fence_offset),
 		&post_fence);
 	/* Use KMD's queue to inform FW */
 	err = notify_fw_context_init(ctx);
 	if (err != PVA_SUCCESS) {
 		goto deinit_submitter;
 	}
 	ctx->inited = true;
 	return PVA_SUCCESS;
 deinit_submitter:
 	pva_kmd_mutex_deinit(&ctx->chunk_pool_lock);
 	pva_kmd_mutex_deinit(&ctx->submit_lock);
 	pva_kmd_cmdbuf_chunk_pool_deinit(&ctx->chunk_pool);
 free_dram_buffer_resource:
 	pva_kmd_drop_resource(&ctx->pva->dev_resource_table,
 			      ctx->submit_memory_resource_id);
 free_submit_memory:
 	pva_kmd_device_memory_free(ctx->submit_memory);
 queue_deinit:
 	pva_kmd_queue_deinit(&ctx->ctx_queue);
 	pva_kmd_device_memory_free(ctx->ctx_queue_mem);
 deinit_table:
 	pva_kmd_resource_table_deinit(&ctx->ctx_resource_table);
 drop_device:
 	pva_kmd_device_idle(ctx->pva);
 err_out:
 	return err;
 }
 void pva_kmd_context_deinit(struct pva_kmd_context *ctx)
 {
 	enum pva_error err;
 	if (ctx->inited) {
 		err = notify_fw_context_deinit(ctx);
 		ASSERT(err == PVA_SUCCESS);
 		pva_kmd_verify_all_resources_free(&ctx->ctx_resource_table);
 		pva_kmd_device_idle(ctx->pva);
 		pva_kmd_mutex_deinit(&ctx->submit_lock);
 		pva_kmd_mutex_deinit(&ctx->chunk_pool_lock);
 		pva_kmd_cmdbuf_chunk_pool_deinit(&ctx->chunk_pool);
 		pva_kmd_mutex_lock(&ctx->pva->resource_table_lock);
 		pva_kmd_drop_resource(&ctx->pva->dev_resource_table,
 				      ctx->submit_memory_resource_id);
 		pva_kmd_mutex_unlock(&ctx->pva->resource_table_lock);
 		pva_kmd_queue_deinit(&ctx->ctx_queue);
 		pva_kmd_device_memory_free(ctx->ctx_queue_mem);
 		pva_kmd_resource_table_deinit(&ctx->ctx_resource_table);
 		pva_kmd_free_block(&ctx->pva->syncpt_allocator,
 				   ctx->syncpt_block_index);
 		ctx->inited = false;
 	}
 }
 static void pva_kmd_destroy_all_queues(struct pva_kmd_context *ctx)
 {
 	enum pva_error err;
 	struct pva_kmd_queue_destroy_in_args args;
 	for (uint32_t queue_id = 0u; queue_id < ctx->max_n_queues; queue_id++) {
 		struct pva_kmd_queue *queue =
 			pva_kmd_get_block(&ctx->queue_allocator, queue_id);
 		if (queue != NULL) {
 			args.queue_id = queue_id;
 			err = pva_kmd_queue_destroy(ctx, &args);
 			ASSERT(err == PVA_SUCCESS);
 		}
 	}
 }
 void pva_kmd_context_destroy(struct pva_kmd_context *ctx)
 {
 	enum pva_error err;
 	pva_kmd_destroy_all_queues(ctx);
 	pva_kmd_context_deinit(ctx);
 	pva_kmd_block_allocator_deinit(&ctx->queue_allocator);
 	pva_kmd_free(ctx->queue_allocator_mem);
 	pva_kmd_mutex_deinit(&ctx->ccq_lock);
 	pva_kmd_mutex_deinit(&ctx->resource_table_lock);
 	err = pva_kmd_free_block(&ctx->pva->context_allocator, ctx->ccq_id);
 	ASSERT(err == PVA_SUCCESS);
 }
 struct pva_kmd_context *pva_kmd_get_context(struct pva_kmd_device *pva,
 					    uint8_t alloc_id)
 {
 	return pva_kmd_get_block(&pva->context_allocator, alloc_id);
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.h
@@ -0,0 +1,104 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_CONTEXT_H
 #define PVA_KMD_CONTEXT_H
 #include "pva_api.h"
 #include "pva_constants.h"
 #include "pva_kmd_block_allocator.h"
 #include "pva_kmd_resource_table.h"
 #include "pva_kmd_queue.h"
 #include "pva_kmd_mutex.h"
 #include "pva_kmd_submitter.h"
 struct pva_kmd_device;
 /**
 * @brief This struct manages a user context in KMD.
 *
 * One KMD user context is uniquely mapped to a UMD user context. Each context
 * is assigned a unique CCQ block and, on QNX and Linux, a unique file
 * descriptor.
 */
 struct pva_kmd_context {
 	struct pva_kmd_device *pva;
 	uint8_t resource_table_id;
 	uint8_t ccq_id;
 	uint8_t smmu_ctx_id;
 	bool inited;
 	pva_kmd_mutex_t resource_table_lock;
 	struct pva_kmd_resource_table ctx_resource_table;
 	struct pva_kmd_submitter submitter;
 	/** The lock protects the submission to the queue, including
 	 * incrementing the post fence */
 	pva_kmd_mutex_t submit_lock;
 	/** Privileged queue owned by this context. It uses the privileged
 	 * resource table (ID 0). */
 	struct pva_kmd_device_memory *ctx_queue_mem;
 	/** Privileged queue owned by the context */
 	struct pva_kmd_queue ctx_queue;
 	/** Pointer to the ccq0 lock owned by device*/
 	pva_kmd_mutex_t *ccq0_lock_ptr;
 	/** memory needed for submission: including command buffer chunks and fences */
 	struct pva_kmd_device_memory *submit_memory;
 	/** Resource ID of the submission memory, registered with the privileged resource table (ID 0) */
 	uint32_t submit_memory_resource_id;
 	uint64_t fence_offset; /**< fence offset within submit_memory*/
 	pva_kmd_mutex_t chunk_pool_lock;
 	struct pva_kmd_cmdbuf_chunk_pool chunk_pool;
 	uint32_t max_n_queues;
 	void *queue_allocator_mem;
 	struct pva_kmd_block_allocator queue_allocator;
 	/** This lock protects the context's own CCQ access. We don't really use
 	 * it because we don't do user queue submission in KMD.
 	 */
 	pva_kmd_mutex_t ccq_lock;
 	void *plat_data;
 	uint64_t ccq_shm_handle;
 	/** Index of block of syncpoints allocated for this context */
 	uint32_t syncpt_block_index;
 	uint32_t syncpt_ids[PVA_NUM_RW_SYNCPTS_PER_CONTEXT];
 };
 /**
 * @brief Allocate a KMD context.
 */
 struct pva_kmd_context *pva_kmd_context_create(struct pva_kmd_device *pva);
 /**
 * @brief Destroy a KMD context.
 */
 void pva_kmd_context_destroy(struct pva_kmd_context *client);
 /**
 * @brief Initialize a KMD context.
 *
 * The user provides a CCQ range (inclusive on both ends) and the KMD will pick
 * one CCQ from this range.
 */
 enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx,
 				    uint32_t res_table_capacity);
 void pva_kmd_context_deinit(struct pva_kmd_context *ctx);
 struct pva_kmd_context *pva_kmd_get_context(struct pva_kmd_device *pva,
 					    uint8_t alloc_id);
 #endif // PVA_KMD_CONTEXT_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c
@@ -0,0 +1,142 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2025, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_device.h"
 #include "pva_kmd_debugfs.h"
 #include "pva_kmd_fw_profiler.h"
 #include "pva_kmd_silicon_utils.h"
 #include "pva_kmd_vpu_ocd.h"
 #include "pva_kmd_tegra_stats.h"
 #include "pva_kmd_vpu_app_auth.h"
 void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *pva)
 {
 	static const char *vpu_ocd_names[NUM_VPU_BLOCKS] = { "ocd_vpu0_v3",
 							     "ocd_vpu1_v3" };
 	pva_kmd_debugfs_create_bool(pva, "stats_enable",
 				    &pva->debugfs_context.stats_enable);
 	pva_kmd_debugfs_create_bool(pva, "vpu_debug",
 				    &pva->debugfs_context.vpu_debug);
 	pva_kmd_debugfs_create_u32(pva, "profile_level",
 				   &pva->debugfs_context.profile_level);
 	pva->debugfs_context.vpu_fops.read = &update_vpu_stats;
 	pva->debugfs_context.vpu_fops.pdev = pva;
 	pva_kmd_debugfs_create_file(pva, "vpu_stats",
 				    &pva->debugfs_context.vpu_fops);
 	for (uint32_t i = 0; i < NUM_VPU_BLOCKS; i++) {
 		pva->debugfs_context.vpu_ocd_fops[i].open =
 			&pva_kmd_vpu_ocd_open;
 		pva->debugfs_context.vpu_ocd_fops[i].release =
 			&pva_kmd_vpu_ocd_release;
 		pva->debugfs_context.vpu_ocd_fops[i].read =
 			&pva_kmd_vpu_ocd_read;
 		pva->debugfs_context.vpu_ocd_fops[i].write =
 			&pva_kmd_vpu_ocd_write;
 		pva->debugfs_context.vpu_ocd_fops[i].pdev = pva;
 		pva->debugfs_context.vpu_ocd_fops[i].file_data =
 			(void *)&pva->regspec.vpu_dbg_instr_reg_offset[i];
 		pva_kmd_debugfs_create_file(
 			pva, vpu_ocd_names[i],
 			&pva->debugfs_context.vpu_ocd_fops[i]);
 	}
 	pva->debugfs_context.allowlist_fops.write = &update_vpu_allowlist;
 	pva->debugfs_context.allowlist_fops.pdev = pva;
 	pva_kmd_debugfs_create_file(pva, "vpu_app_authentication",
 				    &pva->debugfs_context.allowlist_fops);
 	pva_kmd_device_init_profiler(pva);
 	pva_kmd_device_init_tegra_stats(pva);
 }
 void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *pva)
 {
 	pva_kmd_device_deinit_tegra_stats(pva);
 	pva_kmd_device_deinit_profiler(pva);
 	pva_kmd_debugfs_remove_nodes(pva);
 }
 static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats,
 			       uint8_t *out_buffer, uint64_t len)
 {
 	char kernel_buffer[256];
 	int64_t formatted_len;
 	formatted_len = snprintf(
 		kernel_buffer, sizeof(kernel_buffer),
 		"%llu\n%llu\n%llu\n%llu\n",
 		(long long unsigned int)(kmd_tegra_stats->window_start_time),
 		(long long unsigned int)(kmd_tegra_stats->window_end_time),
 		(long long unsigned int)
 			kmd_tegra_stats->average_vpu_utilization[0],
 		(long long unsigned int)
 			kmd_tegra_stats->average_vpu_utilization[1]);
 	if (formatted_len <= 0) {
 		return 0;
 	}
 	formatted_len++; //accounting for null terminating character
 	if (len < (uint64_t)formatted_len) {
 		return 0;
 	}
 	// Copy the formatted string from kernel buffer to user buffer
 	if (pva_kmd_copy_data_to_user(out_buffer, kernel_buffer,
 				      formatted_len)) {
 		pva_kmd_log_err("failed to copy read buffer to user");
 		return 0;
 	}
 	return formatted_len;
 }
 int64_t update_vpu_stats(struct pva_kmd_device *dev, void *file_data,
 			 uint8_t *out_buffer, uint64_t offset, uint64_t size)
 {
 	uint64_t size_read = 0U;
 	struct pva_kmd_tegrastats kmd_tegra_stats;
 	kmd_tegra_stats.window_start_time = 0;
 	kmd_tegra_stats.window_end_time = 0;
 	kmd_tegra_stats.average_vpu_utilization[0] = 0;
 	kmd_tegra_stats.average_vpu_utilization[1] = 0;
 	pva_kmd_log_err("Reading VPU stats");
 	pva_kmd_notify_fw_get_tegra_stats(dev, &kmd_tegra_stats);
 	size_read = print_vpu_stats(&kmd_tegra_stats, out_buffer, size);
 	return size_read;
 }
 int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data,
 			     const uint8_t *in_buffer, uint64_t offset,
 			     uint64_t size)
 {
 	char strbuf[2]; // 1 byte for '0' or '1' and another 1 byte for the Null character
 	uint32_t pva_auth_enable;
 	unsigned long retval;
 	retval = pva_kmd_copy_data_from_user(strbuf, in_buffer, sizeof(strbuf));
 	if (retval != 0u) {
 		pva_kmd_log_err("Failed to copy write buffer from user");
 		return -1;
 	}
 	pva_auth_enable = pva_kmd_strtol(strbuf, 16);
 	pva->pva_auth->pva_auth_enable = (pva_auth_enable == 1) ? true : false;
 	if (pva->pva_auth->pva_auth_enable)
 		pva->pva_auth->pva_auth_allow_list_parsed = false;
 	return 2;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h
@@ -0,0 +1,56 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2025, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_DEBUGFS_H
 #define PVA_KMD_DEBUGFS_H
 #include "pva_kmd.h"
 #include "pva_kmd_shim_debugfs.h"
 #include "pva_kmd_fw_profiler.h"
 #define NUM_VPU_BLOCKS 2U
 /**
 * Maximum length of file operation
 */
 #define MAX_FILE_LEN 256U
 struct pva_kmd_file_ops {
 	int (*open)(struct pva_kmd_device *dev);
 	int (*release)(struct pva_kmd_device *dev);
 	int64_t (*read)(struct pva_kmd_device *dev, void *file_data,
 			uint8_t *data, uint64_t offset, uint64_t size);
 	int64_t (*write)(struct pva_kmd_device *dev, void *file_data,
 			 const uint8_t *data, uint64_t offset, uint64_t size);
 	void *pdev;
 	void *file_data;
 };
 struct pva_kmd_debugfs_context {
 	bool stats_enable;
 	bool vpu_debug;
 	bool vpu_print_enable;
 	char *allowlist_path;
 	uint32_t profile_level;
 	struct pva_kmd_file_ops vpu_fops;
 	struct pva_kmd_file_ops allowlist_fops;
 	struct pva_kmd_file_ops hwpm_fops;
 	void *data_hwpm;
 	struct pva_kmd_file_ops vpu_ocd_fops[NUM_VPU_BLOCKS];
 	struct pva_kmd_fw_profiling_config g_fw_profiling_config;
 };
 void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *dev);
 void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *dev);
 int64_t update_vpu_stats(struct pva_kmd_device *dev, void *file_data,
 			 uint8_t *out_buffer, uint64_t offset, uint64_t size);
 int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data,
 			     const uint8_t *in_buffer, uint64_t offset,
 			     uint64_t size);
 #endif //PVA_KMD_DEBUGFS_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c
@@ -0,0 +1,338 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_api_types.h"
 #include "pva_kmd_fw_debug.h"
 #include "pva_kmd_utils.h"
 #include "pva_api_cmdbuf.h"
 #include "pva_api.h"
 #include "pva_kmd_constants.h"
 #include "pva_fw.h"
 #include "pva_bit.h"
 #include "pva_kmd_queue.h"
 #include "pva_kmd_resource_table.h"
 #include "pva_kmd_device.h"
 #include "pva_kmd_context.h"
 #include "pva_kmd_t23x.h"
 #include "pva_kmd_t26x.h"
 #include "pva_kmd_regs.h"
 #include "pva_kmd_device_memory.h"
 #include "pva_kmd_fw_profiler.h"
 #include "pva_kmd_vpu_app_auth.h"
 #include "pva_utils.h"
 #include "pva_kmd_debugfs.h"
 #include "pva_kmd_tegra_stats.h"
 #include "pva_kmd_shim_silicon.h"
 /**
 * @brief Send address and size of the resource table to FW through CCQ.
 *
 * Initialization through CCQ is only intended for KMD's own resource table (the
 * first resource table created).
 */
 void pva_kmd_send_resource_table_info_by_ccq(
 	struct pva_kmd_device *pva, struct pva_kmd_resource_table *res_table)
 {
 	enum pva_error err;
 	uint64_t addr = res_table->table_mem->iova;
 	uint32_t n_entries = res_table->n_entries;
 	uint64_t ccq_entry =
 		PVA_INSERT64(PVA_FW_CCQ_OP_SET_RESOURCE_TABLE,
 			     PVA_FW_CCQ_OPCODE_MSB, PVA_FW_CCQ_OPCODE_LSB) |
 		PVA_INSERT64(addr, PVA_FW_CCQ_RESOURCE_TABLE_ADDR_MSB,
 			     PVA_FW_CCQ_RESOURCE_TABLE_ADDR_LSB) |
 		PVA_INSERT64(n_entries, PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_MSB,
 			     PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_LSB);
 	pva_kmd_mutex_lock(&pva->ccq0_lock);
 	err = pva_kmd_ccq_push_with_timeout(pva, PVA_PRIV_CCQ_ID, ccq_entry,
 					    PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 					    PVA_KMD_WAIT_FW_TIMEOUT_US);
 	ASSERT(err == PVA_SUCCESS);
 	pva_kmd_mutex_unlock(&pva->ccq0_lock);
 }
 /**
 * @brief Send address and size of the queue to FW through CCQ.
 *
 * Initialization through CCQ is only intended for KMD's own queue (the first
 * queue created).
 */
 void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva,
 				    struct pva_kmd_queue *queue)
 {
 	enum pva_error err;
 	uint64_t addr = queue->queue_memory->iova;
 	uint32_t max_submit = queue->max_num_submit;
 	uint64_t ccq_entry =
 		PVA_INSERT64(PVA_FW_CCQ_OP_SET_SUBMISSION_QUEUE,
 			     PVA_FW_CCQ_OPCODE_MSB, PVA_FW_CCQ_OPCODE_LSB) |
 		PVA_INSERT64(addr, PVA_FW_CCQ_QUEUE_ADDR_MSB,
 			     PVA_FW_CCQ_QUEUE_ADDR_LSB) |
 		PVA_INSERT64(max_submit, PVA_FW_CCQ_QUEUE_N_ENTRIES_MSB,
 			     PVA_FW_CCQ_QUEUE_N_ENTRIES_LSB);
 	pva_kmd_mutex_lock(&pva->ccq0_lock);
 	err = pva_kmd_ccq_push_with_timeout(pva, PVA_PRIV_CCQ_ID, ccq_entry,
 					    PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 					    PVA_KMD_WAIT_FW_TIMEOUT_US);
 	ASSERT(err == PVA_SUCCESS);
 	pva_kmd_mutex_unlock(&pva->ccq0_lock);
 }
 /**
 * Initialize submission related data structures for this device.
 *
 * - Create a resource table.
 * - Add DRAM resources to the resource table. These are used for command buffer
 *   chunks and post fences.
 * - Create a queue.
 */
 static void pva_kmd_device_init_submission(struct pva_kmd_device *pva)
 {
 	uint32_t queue_mem_size;
 	uint64_t chunk_mem_size;
 	uint64_t size;
 	enum pva_error err;
 	struct pva_fw_postfence post_fence = { 0 };
 	/* Init KMD's queue */
 	queue_mem_size = pva_get_submission_queue_memory_size(
 		PVA_KMD_MAX_NUM_KMD_SUBMITS);
 	pva->queue_memory = pva_kmd_device_memory_alloc_map(
 		queue_mem_size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
 	ASSERT(pva->queue_memory != NULL);
 	pva_kmd_queue_init(&pva->dev_queue, pva, PVA_PRIV_CCQ_ID,
 			   0 /* KMD's queue ID is 0 */, &pva->ccq0_lock,
 			   pva->queue_memory, PVA_KMD_MAX_NUM_KMD_SUBMITS);
 	/* Init KMD's resource table */
 	err = pva_kmd_resource_table_init(&pva->dev_resource_table, pva,
 					  PVA_R5_SMMU_CONTEXT_ID,
 					  PVA_KMD_MAX_NUM_KMD_RESOURCES,
 					  PVA_KMD_MAX_NUM_KMD_DMA_CONFIGS);
 	ASSERT(err == PVA_SUCCESS);
 	/* Allocate memory for submission*/
 	chunk_mem_size = pva_kmd_cmdbuf_pool_get_required_mem_size(
 		PVA_MAX_CMDBUF_CHUNK_SIZE, PVA_KMD_MAX_NUM_KMD_CHUNKS);
 	size = safe_addu64(chunk_mem_size, (uint64_t)sizeof(uint32_t));
 	/* Allocate one post fence at the end. We don't need to free this memory
 	 * explicitly as it will be freed after we drop the resource. */
 	pva->submit_memory = pva_kmd_device_memory_alloc_map(
 		size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
 	ASSERT(pva->submit_memory != NULL);
 	/* Add submit memory to resource table */
 	err = pva_kmd_add_dram_buffer_resource(&pva->dev_resource_table,
 					       pva->submit_memory,
 					       &pva->submit_memory_resource_id);
 	ASSERT(err == PVA_SUCCESS);
 	pva_kmd_update_fw_resource_table(&pva->dev_resource_table);
 	/* Init chunk pool */
 	pva_kmd_cmdbuf_chunk_pool_init(
 		&pva->chunk_pool, pva->submit_memory_resource_id, 0,
 		chunk_mem_size, PVA_MAX_CMDBUF_CHUNK_SIZE,
 		PVA_KMD_MAX_NUM_KMD_CHUNKS, pva->submit_memory->va);
 	/* Init fence */
 	pva->fence_offset = chunk_mem_size;
 	/* Init submitter */
 	pva_kmd_mutex_init(&pva->submit_lock);
 	pva_kmd_mutex_init(&pva->chunk_pool_lock);
 	post_fence.resource_id = pva->submit_memory_resource_id;
 	post_fence.offset_lo = iova_lo(pva->fence_offset);
 	post_fence.offset_hi = iova_hi(pva->fence_offset);
 	post_fence.ts_resource_id = PVA_RESOURCE_ID_INVALID;
 	pva_kmd_submitter_init(
 		&pva->submitter, &pva->dev_queue, &pva->submit_lock,
 		&pva->chunk_pool, &pva->chunk_pool_lock,
 		pva_offset_pointer(pva->submit_memory->va, pva->fence_offset),
 		&post_fence);
 }
 static void pva_kmd_device_deinit_submission(struct pva_kmd_device *pva)
 {
 	pva_kmd_mutex_deinit(&pva->chunk_pool_lock);
 	pva_kmd_mutex_deinit(&pva->submit_lock);
 	pva_kmd_cmdbuf_chunk_pool_deinit(&pva->chunk_pool);
 	/* Submit memory will be freed after dropping the resource */
 	pva_kmd_drop_resource(&pva->dev_resource_table,
 			      pva->submit_memory_resource_id);
 	pva_kmd_resource_table_deinit(&pva->dev_resource_table);
 	pva_kmd_queue_deinit(&pva->dev_queue);
 	pva_kmd_device_memory_free(pva->queue_memory);
 }
 struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id,
 					     uint32_t device_index,
 					     bool app_authenticate)
 {
 	struct pva_kmd_device *pva;
 	enum pva_error err;
 	uint32_t chunk_size;
 	uint32_t size;
 	pva = pva_kmd_zalloc_nofail(sizeof(*pva));
 	pva->device_index = device_index;
 	pva->load_from_gsc = false;
 	pva->is_hv_mode = true;
 	pva->max_n_contexts = PVA_MAX_NUM_USER_CONTEXTS;
 	pva_kmd_mutex_init(&pva->powercycle_lock);
 	pva_kmd_mutex_init(&pva->ccq0_lock);
 	pva_kmd_mutex_init(&pva->resource_table_lock);
 	pva_kmd_sema_init(&pva->fw_boot_sema, 0);
 	size = safe_mulu32((uint32_t)sizeof(struct pva_kmd_context),
 			   pva->max_n_contexts);
 	pva->context_mem = pva_kmd_zalloc(size);
 	ASSERT(pva->context_mem != NULL);
 	err = pva_kmd_block_allocator_init(&pva->context_allocator,
 					   pva->context_mem,
 					   PVA_KMD_USER_CONTEXT_ID_BASE,
 					   sizeof(struct pva_kmd_context),
 					   pva->max_n_contexts);
 	ASSERT(err == PVA_SUCCESS);
 	if (chip_id == PVA_CHIP_T23X) {
 		pva_kmd_device_init_t23x(pva);
 	} else if (chip_id == PVA_CHIP_T26X) {
 		pva_kmd_device_init_t26x(pva);
 	} else {
 		FAULT("SOC not supported");
 	}
 	pva_kmd_device_plat_init(pva);
 	chunk_size = safe_mulu32((uint32_t)sizeof(struct pva_syncpt_rw_info),
 				 (uint32_t)PVA_NUM_RW_SYNCPTS_PER_CONTEXT);
 	err = pva_kmd_block_allocator_init(&pva->syncpt_allocator,
 					   pva->syncpt_rw, 0, chunk_size,
 					   PVA_MAX_NUM_USER_CONTEXTS);
 	ASSERT(err == PVA_SUCCESS);
 	pva_kmd_device_init_submission(pva);
 	err = pva_kmd_init_vpu_app_auth(pva, app_authenticate);
 	ASSERT(err == PVA_SUCCESS);
 	pva->is_suspended = false;
 	return pva;
 }
 static void pva_kmd_wait_for_active_contexts(struct pva_kmd_device *pva)
 {
 	uint8_t allocated = 0;
 	/* Make sure no context is active by allocating all contexts here. */
 	while (allocated < pva->max_n_contexts) {
 		uint32_t unused_id;
 		struct pva_kmd_context *ctx;
 		ctx = pva_kmd_alloc_block(&pva->context_allocator, &unused_id);
 		if (ctx != NULL) {
 			allocated = safe_addu32(allocated, 1U);
 		} else {
 			pva_kmd_sleep_us(1000);
 		}
 	}
 }
 void pva_kmd_device_destroy(struct pva_kmd_device *pva)
 {
 	pva_kmd_wait_for_active_contexts(pva);
 	pva_kmd_device_deinit_submission(pva);
 	pva_kmd_device_plat_deinit(pva);
 	pva_kmd_block_allocator_deinit(&pva->syncpt_allocator);
 	pva_kmd_block_allocator_deinit(&pva->context_allocator);
 	pva_kmd_free(pva->context_mem);
 	pva_kmd_mutex_deinit(&pva->ccq0_lock);
 	pva_kmd_mutex_deinit(&pva->resource_table_lock);
 	pva_kmd_mutex_deinit(&pva->powercycle_lock);
 	pva_kmd_free(pva->pva_auth);
 	pva_kmd_free(pva);
 }
 enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva)
 {
 	enum pva_error err = PVA_SUCCESS;
 	pva_kmd_mutex_lock(&pva->powercycle_lock);
 	if (pva->refcount == 0) {
 		pva_kmd_allocate_syncpts(pva);
 		err = pva_kmd_power_on(pva);
 		if (err != PVA_SUCCESS) {
 			goto unlock;
 		}
 		err = pva_kmd_init_fw(pva);
 		if (err != PVA_SUCCESS) {
 			goto unlock;
 		}
 		/* Reset KMD queue */
 		pva->dev_queue.queue_header->cb_head = 0;
 		pva->dev_queue.queue_header->cb_tail = 0;
 		pva_kmd_send_resource_table_info_by_ccq(
 			pva, &pva->dev_resource_table);
 		pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue);
 		pva_kmd_notify_fw_enable_profiling(pva);
 	}
 	pva->refcount = safe_addu32(pva->refcount, 1U);
 unlock:
 	pva_kmd_mutex_unlock(&pva->powercycle_lock);
 	return err;
 }
 void pva_kmd_device_idle(struct pva_kmd_device *pva)
 {
 	pva_kmd_mutex_lock(&pva->powercycle_lock);
 	ASSERT(pva->refcount > 0);
 	pva->refcount--;
 	if (pva->refcount == 0) {
 		/* Disable FW profiling */
 		/* TODO: once debugfs is up, move these calls */
 		// pva_kmd_notify_fw_disable_profiling(pva);
 		// pva_kmd_drain_fw_profiling_buffer(pva,
 		// 				  &pva->fw_profiling_buffer);
 		pva_kmd_deinit_fw(pva);
 		pva_kmd_power_off(pva);
 	}
 	pva_kmd_mutex_unlock(&pva->powercycle_lock);
 }
 enum pva_error pva_kmd_ccq_push_with_timeout(struct pva_kmd_device *pva,
 					     uint8_t ccq_id, uint64_t ccq_entry,
 					     uint64_t sleep_interval_us,
 					     uint64_t timeout_us)
 {
 	/* spin until we have space or timeout reached */
 	while (pva_kmd_get_ccq_space(pva, ccq_id) == 0) {
 		if (timeout_us == 0) {
 			pva_kmd_log_err(
 				"pva_kmd_ccq_push_with_timeout Timed out");
 			return PVA_TIMEDOUT;
 		}
 		pva_kmd_sleep_us(sleep_interval_us);
 		timeout_us = sat_sub64(timeout_us, sleep_interval_us);
 	}
 	/* TODO: memory write barrier is needed here */
 	pva_kmd_ccq_push(pva, ccq_id, ccq_entry);
 	return PVA_SUCCESS;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h
@@ -0,0 +1,158 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_DEVICE_H
 #define PVA_KMD_DEVICE_H
 #include "pva_constants.h"
 #include "pva_kmd_cmdbuf.h"
 #include "pva_kmd_utils.h"
 #include "pva_kmd_mutex.h"
 #include "pva_kmd_block_allocator.h"
 #include "pva_kmd_queue.h"
 #include "pva_kmd_resource_table.h"
 #include "pva_kmd_submitter.h"
 #include "pva_kmd_regs.h"
 #include "pva_kmd_thread_sema.h"
 #include "pva_kmd_fw_debug.h"
 #include "pva_kmd_shim_init.h"
 #include "pva_kmd_shim_ccq.h"
 #include "pva_kmd_fw_profiler.h"
 #include "pva_kmd_constants.h"
 #include "pva_kmd_debugfs.h"
 struct pva_syncpt_rw_info {
 	/** Dont switch order since syncpt_id and syncpt_iova is prefilled during kmd boot
 	 * and first field gets updated by pva_kmd_allocator everytime its freed */
 	uint32_t syncpt_value;
 	uint32_t syncpt_id;
 	uint64_t syncpt_iova;
 };
 /** A struct to maintain start and end address of vmem region */
 struct vmem_region {
 	/**! Start address of vmem region */
 	uint32_t start;
 	/**! End address of vmem region */
 	uint32_t end;
 };
 struct pva_kmd_hw_constants {
 	enum pva_hw_gen hw_gen;
 	uint8_t n_vmem_regions;
 	uint32_t n_dma_descriptors;
 	uint32_t n_user_dma_channels;
 	uint32_t n_hwseq_words;
 	uint32_t n_dynamic_adb_buffs;
 	uint32_t n_smmu_contexts;
 };
 /**
 * @brief This struct manages a single PVA cluster.
 *
 * Fields in this struct should be common across all platforms. Platform
 * specific data is stored in plat_data field.
 */
 struct pva_kmd_device {
 	uint32_t device_index;
 	uint32_t r5_image_smmu_context_id;
 	uint32_t stream_ids[PVA_MAX_NUM_SMMU_CONTEXTS];
 	struct pva_kmd_hw_constants hw_consts;
 	uint64_t reg_phy_base[PVA_KMD_APERTURE_COUNT];
 	uint64_t reg_size[PVA_KMD_APERTURE_COUNT];
 	struct pva_kmd_regspec regspec;
 	uint8_t max_n_contexts;
 	void *context_mem;
 	struct pva_kmd_block_allocator context_allocator;
 	pva_kmd_mutex_t resource_table_lock;
 	struct pva_kmd_resource_table dev_resource_table;
 	struct pva_kmd_submitter submitter;
 	/** The lock protects the submission to the queue, including
 	 * incrementing the post fence */
 	pva_kmd_mutex_t submit_lock;
 	struct pva_kmd_device_memory *queue_memory;
 	struct pva_kmd_queue dev_queue;
 	pva_kmd_mutex_t ccq0_lock;
 	/** memory needed for submission: including command buffer chunks and fences */
 	struct pva_kmd_device_memory *submit_memory;
 	uint32_t submit_memory_resource_id;
 	uint64_t fence_offset; /**< fence offset within submit_memory*/
 	pva_kmd_mutex_t chunk_pool_lock;
 	struct pva_kmd_cmdbuf_chunk_pool chunk_pool;
 	pva_kmd_mutex_t powercycle_lock;
 	uint32_t refcount;
 	/** ISR post this semaphore when FW completes boot */
 	pva_kmd_sema_t fw_boot_sema;
 	struct pva_kmd_device_memory *fw_debug_mem;
 	struct pva_kmd_device_memory *fw_bin_mem;
 	struct pva_kmd_device_memory *fw_profiling_buffer_memory;
 	uint32_t fw_profiling_buffer_resource_id;
 	struct pva_kmd_fw_profiling_buffer fw_profiling_buffer;
 	struct pva_kmd_fw_print_buffer fw_print_buffer;
 	struct pva_kmd_device_memory *tegra_stats_memory;
 	uint32_t tegra_stats_resource_id;
 	uint32_t tegra_stats_buf_size;
 	bool load_from_gsc;
 	bool is_hv_mode;
 	struct pva_kmd_debugfs_context debugfs_context;
 	/** Sector packing format for block linear surfaces */
 	uint8_t bl_sector_pack_format;
 	/** Offset between 2 syncpoints */
 	uint32_t syncpt_offset;
 	uint64_t syncpt_ro_iova;
 	uint64_t syncpt_rw_iova;
 	uint32_t num_syncpts;
 	struct pva_syncpt_rw_info syncpt_rw[PVA_NUM_RW_SYNCPTS];
 	struct pva_kmd_block_allocator syncpt_allocator;
 	struct vmem_region *vmem_regions_tab;
 	bool support_hwseq_frame_linking;
 	void *plat_data;
 	void *fw_handle;
 	struct pva_vpu_auth *pva_auth;
 	bool is_suspended;
 };
 struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id,
 					     uint32_t device_index,
 					     bool app_authenticate);
 void pva_kmd_device_destroy(struct pva_kmd_device *pva);
 enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva);
 void pva_kmd_device_idle(struct pva_kmd_device *pva);
 enum pva_error pva_kmd_ccq_push_with_timeout(struct pva_kmd_device *pva,
 					     uint8_t ccq_id, uint64_t ccq_entry,
 					     uint64_t sleep_interval_us,
 					     uint64_t timeout_us);
 void pva_kmd_send_resource_table_info_by_ccq(
 	struct pva_kmd_device *pva, struct pva_kmd_resource_table *res_table);
 void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva,
 				    struct pva_kmd_queue *queue);
 #endif // PVA_KMD_DEVICE_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c
@@ -0,0 +1,148 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_dma_cfg.h"
 #include "pva_utils.h"
 #include "pva_kmd_resource_table.h"
 #include "pva_kmd_device.h"
 #define PVA_KMD_INVALID_CH_IDX 0xFF
 void pva_kmd_unload_dma_config(struct pva_kmd_dma_resource_aux *dma_aux)
 {
 	uint32_t i;
 	for (i = 0; i < dma_aux->dram_res_count; i++) {
 		pva_kmd_drop_resource(dma_aux->res_table,
 				      dma_aux->static_dram_res_ids[i]);
 	}
 	if (dma_aux->vpu_bin_res_id != PVA_RESOURCE_ID_INVALID) {
 		pva_kmd_drop_resource(dma_aux->res_table,
 				      dma_aux->vpu_bin_res_id);
 	}
 }
 static void trace_dma_channels(struct pva_dma_config const *dma_config,
 			       uint8_t *desc_to_ch)
 {
 	uint32_t ch_index;
 	struct pva_dma_config_header const *cfg_hdr = &dma_config->header;
 	struct pva_dma_channel *channel;
 	uint32_t num_descs = dma_config->header.num_descriptors;
 	for (ch_index = 0; ch_index < cfg_hdr->num_channels; ch_index++) {
 		uint8_t desc_index;
 		channel = &dma_config->channels[ch_index];
 		desc_index = channel->desc_index;
 		for (uint32_t i = 0; i < PVA_MAX_NUM_DMA_DESC; i++) {
 			desc_index = array_index_nospec(desc_index, num_descs);
 			if (desc_to_ch[desc_index] != PVA_KMD_INVALID_CH_IDX) {
 				//Already traced this descriptor
 				break;
 			}
 			desc_to_ch[desc_index] = ch_index;
 			desc_index = sat_sub8(
 				dma_config->descriptors[desc_index].link_desc_id,
 				1);
 		}
 	}
 }
 enum pva_error
 pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table,
 			void *dma_config_payload, uint32_t dma_config_size,
 			struct pva_kmd_dma_resource_aux *dma_aux,
 			void *fw_dma_cfg, uint32_t *out_fw_fetch_size)
 {
 	enum pva_error err = PVA_SUCCESS;
 	uint32_t fw_fetch_size;
 	struct pva_dma_config dma_config;
 	struct pva_fw_dma_slot *dyn_slots;
 	struct pva_fw_dma_reloc *dyn_relocs;
 	struct pva_fw_dma_slot *static_slots = dma_aux->static_slots;
 	struct pva_fw_dma_reloc *static_relocs = dma_aux->static_relocs;
 	struct pva_kmd_dma_access *access_sizes = dma_aux->access_sizes;
 	// Mapping descriptor index to channel index
 	uint8_t desc_to_ch[PVA_MAX_NUM_DMA_DESC];
 	for (uint32_t i = 0; i < PVA_MAX_NUM_DMA_DESC; i++) {
 		desc_to_ch[i] = PVA_KMD_INVALID_CH_IDX;
 	}
 	//set access_sizes to 0 by default
 	(void)memset(
 		access_sizes, 0,
 		(PVA_MAX_NUM_DMA_DESC * sizeof(struct pva_kmd_dma_access)));
 	err = pva_kmd_parse_dma_config(dma_config_payload, dma_config_size,
 				       &dma_config,
 				       &resource_table->pva->hw_consts);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	err = pva_kmd_validate_dma_config(&dma_config,
 					  &resource_table->pva->hw_consts,
 					  access_sizes,
 					  dma_aux->hw_dma_descs_mask);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	trace_dma_channels(&dma_config, desc_to_ch);
 	err = pva_kmd_compute_dma_access(&dma_config, access_sizes,
 					 dma_aux->hw_dma_descs_mask);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	dyn_slots = pva_offset_pointer(fw_dma_cfg,
 				       sizeof(struct pva_dma_config_resource));
 	dyn_relocs = pva_offset_pointer(dyn_slots,
 					dma_config.header.num_dynamic_slots *
 						sizeof(*dyn_slots));
 	pva_kmd_collect_relocs(&dma_config, access_sizes, static_slots,
 			       dma_config.header.num_static_slots,
 			       static_relocs, dyn_slots,
 			       dma_config.header.num_dynamic_slots, dyn_relocs,
 			       desc_to_ch);
 	pva_kmd_write_fw_dma_config(
 		&dma_config, fw_dma_cfg, &fw_fetch_size,
 		resource_table->pva->support_hwseq_frame_linking);
 	dma_aux->res_table = resource_table;
 	err = pva_kmd_dma_use_resources(&dma_config, dma_aux);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	err = pva_kmd_bind_static_buffers(fw_dma_cfg, dma_aux, static_slots,
 					  dma_config.header.num_static_slots,
 					  static_relocs,
 					  dma_config.static_bindings,
 					  dma_config.header.num_static_slots);
 	if (err != PVA_SUCCESS) {
 		goto drop_res;
 	}
 	*out_fw_fetch_size = fw_fetch_size;
 	return PVA_SUCCESS;
 drop_res:
 	pva_kmd_unload_dma_config(dma_aux);
 err_out:
 	return err;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h
@@ -0,0 +1,139 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_DMA_CFG_H
 #define PVA_KMD_DMA_CFG_H
 #include "pva_kmd.h"
 #include "pva_resource.h"
 /* Mask to extract the GOB offset from the Surface address */
 #define PVA_DMA_BL_GOB_OFFSET_MASK 0x3E00U
 /* Right shift value for moving GOB offset value extracted from surface address to LSB  */
 #define PVA_DMA_BL_GOB_OFFSET_MASK_RSH 6U
 struct pva_kmd_dma_access_entry {
 	int64_t start_addr;
 	int64_t end_addr;
 };
 struct pva_kmd_dma_access {
 	struct pva_kmd_dma_access_entry src;
 	struct pva_kmd_dma_access_entry dst;
 	struct pva_kmd_dma_access_entry dst2;
 };
 struct pva_kmd_resource_table;
 struct pva_kmd_hw_constants;
 /** Auxiliary information needed for managing DMA resources:
 *
 * - Hold references to DRAM buffers and VPU bin used by the DMA configuration.
 * - Scratch buffers needed during DMA configuration loading.
 */
 struct pva_kmd_dma_resource_aux {
 	struct pva_kmd_resource_table *res_table;
 	uint32_t vpu_bin_res_id;
 	uint32_t dram_res_count;
 	/** DRAM buffers statically referenced by the DMA configuration */
 	uint32_t static_dram_res_ids[PVA_KMD_MAX_NUM_DMA_DRAM_SLOTS];
 	/* Below are work buffers need during DMA configuration loading. They
 	 * don't fit on stack. */
 	struct pva_fw_dma_slot static_slots[PVA_KMD_MAX_NUM_DMA_SLOTS];
 	struct pva_fw_dma_reloc static_relocs[PVA_KMD_MAX_NUM_DMA_SLOTS];
 	struct pva_kmd_dma_access access_sizes[PVA_MAX_NUM_DMA_DESC];
 	uint64_t hw_dma_descs_mask[((PVA_MAX_NUM_DMA_DESC / 64ULL) + 1ULL)];
 };
 enum pva_error
 pva_kmd_parse_dma_config(void *dma_config, uint32_t dma_config_size,
 			 struct pva_dma_config *out_cfg,
 			 struct pva_kmd_hw_constants const *hw_consts);
 enum pva_error
 pva_kmd_dma_use_resources(struct pva_dma_config const *dma_cfg,
 			  struct pva_kmd_dma_resource_aux *dma_aux);
 enum pva_error
 pva_kmd_validate_dma_config(struct pva_dma_config const *dma_cfg,
 			    struct pva_kmd_hw_constants const *hw_consts,
 			    struct pva_kmd_dma_access *access_sizes,
 			    uint64_t *hw_dma_descs_mask);
 enum pva_error
 pva_kmd_compute_dma_access(struct pva_dma_config const *dma_cfg,
 			   struct pva_kmd_dma_access *access_sizes,
 			   uint64_t *hw_dma_descs_mask);
 void pva_kmd_collect_relocs(struct pva_dma_config const *dma_cfg,
 			    struct pva_kmd_dma_access const *access_sizes,
 			    struct pva_fw_dma_slot *out_static_slots,
 			    uint16_t num_static_slots,
 			    struct pva_fw_dma_reloc *out_static_relocs,
 			    struct pva_fw_dma_slot *out_dyn_slots,
 			    uint16_t num_dyn_slots,
 			    struct pva_fw_dma_reloc *out_dyn_relocs,
 			    uint8_t const *desc_to_ch);
 /**
 * @brief Bind static buffers to the DMA configuration.
 *
 * When binding static buffers, we edit pva_dma_config in-place and replace the
 * offset field with the final addresses of static buffers.
 *
 * We also validate that the DMA configuration does not access those static
 * buffers out of range.
 */
 enum pva_error pva_kmd_bind_static_buffers(
 	struct pva_dma_config_resource *fw_dma_cfg,
 	struct pva_kmd_dma_resource_aux *dma_aux,
 	struct pva_fw_dma_slot const *static_slots, uint16_t num_static_slots,
 	struct pva_fw_dma_reloc const *static_relocs,
 	struct pva_dma_static_binding const *static_bindings,
 	uint32_t num_static_bindings);
 /**
 * @brief Convert user DMA configuration to firmware format.
 */
 void pva_kmd_write_fw_dma_config(struct pva_dma_config const *dma_cfg,
 				 void *fw_dma_config,
 				 uint32_t *out_fw_fetch_size,
 				 bool support_hwseq_frame_linking);
 /**
 * @brief Load DMA configuration into firmware format.
 *
 * This function mostly does the following things:
 *
 * - Validate the DMA configuration.
 * - Bind static resources (buffers) and embed their addresses directly in the
 *   firmware DMA configuration.
 * - Hold references to DRAM buffers and VPU bin used by the DMA configuration.
 * - Convert the DMA configuration into firmware format.
 *
 * @param resource_table the resource table for the context.
 * @param dma_config DMA configuration from user space.
 * @param dma_config_size Size of the dma_config buffer.
 * @param dma_aux Auxiliary information needed for loading the DMA
 *        configuration.
 * @param fw_dma_cfg Output buffer for the firmware DMA configuration.
 * @param out_fw_fetch_size Size of the firmware DMA configuration that needs to
 *        be fetched into TCM.
 */
 enum pva_error
 pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table,
 			void *dma_config, uint32_t dma_config_size,
 			struct pva_kmd_dma_resource_aux *dma_aux,
 			void *fw_dma_cfg, uint32_t *out_fw_fetch_size);
 void pva_kmd_unload_dma_config(struct pva_kmd_dma_resource_aux *dma_aux);
 #endif // PVA_KMD_DMA_CFG_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c
@@ -0,0 +1,369 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_resource_table.h"
 #include "pva_kmd_device_memory.h"
 #include "pva_api.h"
 #include "pva_kmd_dma_cfg.h"
 #include "pva_api_dma.h"
 #include "pva_kmd_constants.h"
 #include "pva_kmd_device.h"
 static uint32_t get_slot_line_pitch(struct pva_fw_dma_descriptor *descs,
 				    struct pva_fw_dma_reloc const *relocs,
 				    struct pva_fw_dma_slot const *slot)
 {
 	struct pva_fw_dma_reloc const *reloc = &relocs[slot->reloc_start_idx];
 	uint32_t first_desc_index = reloc->desc_index;
 	struct pva_fw_dma_descriptor *first_desc = &descs[first_desc_index];
 	uint8_t log2_bpp =
 		PVA_EXTRACT(first_desc->transfer_control1, 1, 0, uint8_t);
 	if (reloc->field == PVA_FW_DMA_RELOC_FIELD_SRC) {
 		return first_desc->slp_adv << log2_bpp;
 	} else {
 		return first_desc->dlp_adv << log2_bpp;
 	}
 }
 static enum pva_error
 set_channel_block_height(struct pva_dma_config_resource *dma_config,
 			 uint16_t ch_mask, uint8_t log2_block_height)
 {
 	struct pva_fw_dma_channel *channels =
 		pva_dma_config_get_channels(dma_config);
 	// max block height is 32 GOB
 	if (log2_block_height > PVA_DMA_MAX_LOG2_BLOCK_HEIGHT) {
 		pva_kmd_log_err("Invalid block height");
 		return PVA_ERR_CMD_INVALID_BLOCK_HEIGHT;
 	}
 	while (ch_mask > 0) {
 		uint8_t ch_index = __builtin_ctz(ch_mask);
 		if (dma_config->ch_block_height_fixed_mask & (1 << ch_index)) {
 			/* If this bit is already set, it means block height cannot be changed.  */
 			uint8_t set_bh = PVA_EXTRACT(channels[ch_index].cntl0,
 						     27, 25, uint8_t);
 			if (set_bh != log2_block_height) {
 				pva_kmd_log_err("Conflicting block height");
 				return PVA_INVAL;
 			}
 		} else {
 			channels[ch_index].cntl0 &= ~PVA_MASK(27, 25);
 			channels[ch_index].cntl0 |=
 				PVA_INSERT(log2_block_height, 27, 25);
 			dma_config->ch_block_height_fixed_mask |=
 				(1 << ch_index);
 		}
 		ch_mask &= ~(1 << ch_index);
 	}
 	return PVA_SUCCESS;
 }
 static enum pva_error
 bind_static_dram_slot(struct pva_dma_config_resource *dma_config,
 		      struct pva_kmd_dma_resource_aux *dma_aux,
 		      struct pva_fw_dma_slot const *slot,
 		      struct pva_fw_dma_reloc const *static_relocs,
 		      struct pva_dma_dram_binding const *dram_bd)
 {
 	struct pva_fw_dma_descriptor *descs =
 		pva_dma_config_get_descriptors(dma_config);
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_fw_dma_reloc const *relocs;
 	bool is_block_linear =
 		(dram_bd->surface_format == PVA_SURF_FMT_BLOCK_LINEAR);
 	uint32_t line_pitch = get_slot_line_pitch(descs, static_relocs, slot);
 	uint8_t log2_block_height = dram_bd->log2_block_height;
 	struct pva_kmd_dram_resource *dram_res =
 		&pva_kmd_peek_resource(dma_aux->res_table, dram_bd->resource_id)
 			 ->dram;
 	uint64_t slot_offset_pl = dram_bd->slot_offset;
 	uint64_t surface_base_addr =
 		sat_add64(dram_bd->surface_base_offset, dram_res->mem->iova);
 	/* When binding a buffer, we add the binding->surface_base_offset to the
         * buffer base address. Therefore, the effective buffer size is
 	 * reduced by the offset. */
 	uint64_t max_surface_size =
 		sat_sub64(dram_res->mem->size, dram_bd->surface_base_offset);
 	uint64_t sector_pack_format = 0;
 	int64_t slot_access_start_addr = 0LL;
 	int64_t slot_access_end_addr = 0LL;
 	uint64_t slot_surface_combined_offset = 0ULL;
 	pva_math_error math_error = MATH_OP_SUCCESS;
 	if ((slot->flags & PVA_FW_DMA_SLOT_FLAG_DRAM) == 0) {
 		pva_kmd_log_err("Binding DRAM buffer to incompatible slot");
 		err = PVA_INVALID_BINDING;
 		goto out;
 	}
 	if (is_block_linear) {
 		if (slot->flags & PVA_FW_DMA_SLOT_FLAG_CB) {
 			pva_kmd_log_err(
 				"Block linear surface is not compatible with circular buffer");
 			err = PVA_INVALID_BINDING;
 			goto out;
 		}
 		max_surface_size =
 			pva_max_bl_surface_size(max_surface_size,
 						log2_block_height, line_pitch,
 						&math_error);
 		if (math_error != MATH_OP_SUCCESS) {
 			pva_kmd_log_err(
 				"bind_static_dram_slot pva_max_bl_surface_size triggered a math error");
 			err = PVA_ERR_MATH_OP;
 			goto out;
 		}
 		if (!pva_is_512B_aligned(surface_base_addr)) {
 			pva_kmd_log_err(
 				"BL surface base address is not 512B aligned");
 			err = PVA_BAD_SURFACE_BASE_ALIGNMENT;
 			goto out;
 		}
 		err = set_channel_block_height(dma_config, slot->ch_use_mask,
 					       dram_bd->log2_block_height);
 		if (err != PVA_SUCCESS) {
 			goto out;
 		}
 		sector_pack_format =
 			dma_aux->res_table->pva->bl_sector_pack_format;
 	}
 	slot_surface_combined_offset = addu64(
 		slot_offset_pl, dram_bd->surface_base_offset, &math_error);
 	if (slot_surface_combined_offset >= (uint64_t)MAX_INT64) {
 		pva_kmd_log_err("Slot surface offset too large");
 		return PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE;
 	}
 	slot_access_start_addr =
 		adds64(slot->start_addr, (int64_t)slot_surface_combined_offset,
 		       &math_error);
 	slot_access_end_addr =
 		adds64(slot->end_addr, (int64_t)slot_surface_combined_offset,
 		       &math_error);
 	max_surface_size = addu64(max_surface_size,
 				  dram_bd->surface_base_offset, &math_error);
 	if (max_surface_size >= (uint64_t)MAX_INT64) {
 		pva_kmd_log_err("DRAM buffer too large for slot binding");
 		return PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE;
 	}
 	if (math_error != MATH_OP_SUCCESS) {
 		pva_kmd_log_err("Math error during slot binding");
 		return PVA_ERR_MATH_OP;
 	}
 	if (slot_access_start_addr < 0LL) {
 		pva_kmd_log_err(
 			"DRAM buffer offset underflows for slot binding");
 		return PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE;
 	}
 	if (slot_access_end_addr > (int64_t)max_surface_size) {
 		pva_kmd_log_err("DRAM buffer too small for slot binding");
 		return PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE;
 	}
 	relocs = &static_relocs[slot->reloc_start_idx];
 	for (uint32_t i = 0; i < slot->reloc_count; i++) {
 		struct pva_fw_dma_reloc const *reloc = &relocs[i];
 		struct pva_fw_dma_descriptor *desc = &descs[reloc->desc_index];
 		uint8_t *addr_hi_ptr;
 		uint32_t *addr_lo_ptr;
 		uint32_t format_field_shift = 0;
 		uint64_t addr;
 		uint64_t desc_offset_pl;
 		uint64_t offset;
 		if (reloc->field == PVA_FW_DMA_RELOC_FIELD_SRC) {
 			addr_hi_ptr = &desc->src_adr1;
 			addr_lo_ptr = &desc->src_adr0;
 			format_field_shift = 3; //SRC_TF in TRANSFER_CONTROL0
 		} else if (reloc->field == PVA_FW_DMA_RELOC_FIELD_DST) {
 			addr_hi_ptr = &desc->dst_adr1;
 			addr_lo_ptr = &desc->dst_adr0;
 			format_field_shift = 7; //DST_TF in TRANSFER_CONTROL0
 		} else { /* PVA_FW_DMA_RELOC_FIELD_DST2 */
 			pva_kmd_log_err("Binding DRAM buffer to DST2 slot");
 			err = PVA_INVAL;
 			goto out;
 		}
 		desc_offset_pl = assemble_addr(*addr_hi_ptr, *addr_lo_ptr);
 		offset = sat_add64(slot_offset_pl, desc_offset_pl);
 		desc->transfer_control0 &= ~(1 << format_field_shift);
 		if (is_block_linear) {
 			/* We need to insert bits surface_base_addr[13, 9] to
 			* transfer_control2[7:3] as specified by DMA IAS. This helps the
 			* HW identify starting GOB index inside a block. */
 			desc->transfer_control2 &= ~PVA_MASK(7, 3);
 			desc->transfer_control2 |=
 				PVA_INSERT8(PVA_EXTRACT64(surface_base_addr, 13,
 							  9, uint8_t),
 					    7, 3);
 			desc->transfer_control0 |= 1 << format_field_shift;
 			offset = pva_pl_to_bl_offset(offset, line_pitch,
 						     log2_block_height,
 						     &math_error);
 			if (math_error != MATH_OP_SUCCESS) {
 				pva_kmd_log_err(
 					"pva_fw_do_cmd_bind_dram_slot pva_pl_to_bl_offset triggered a math error");
 				err = PVA_ERR_MATH_OP;
 				goto out;
 			}
 			if (!pva_is_64B_aligned(offset)) {
 				pva_kmd_log_err(
 					"Descriptor starting address is not aligned to 64 bytes");
 				err = PVA_BAD_DESC_ADDR_ALIGNMENT;
 				goto out;
 			}
 		}
 		addr = sat_add64(surface_base_addr, offset);
 		addr |= (sector_pack_format << PVA_BL_SECTOR_PACK_BIT_SHIFT);
 		*addr_hi_ptr = iova_hi(addr);
 		*addr_lo_ptr = iova_lo(addr);
 	}
 out:
 	return err;
 }
 static enum pva_error
 bind_static_vmem_slot(struct pva_dma_config_resource *dma_config,
 		      struct pva_kmd_dma_resource_aux *dma_aux,
 		      struct pva_fw_dma_slot const *slot,
 		      struct pva_fw_dma_reloc const *static_relocs,
 		      struct pva_dma_vmem_binding const *vmem_bd)
 {
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_fw_dma_descriptor *descs =
 		pva_dma_config_get_descriptors(dma_config);
 	struct pva_kmd_vpu_bin_resource *vpu_bin;
 	struct pva_symbol_info *sym;
 	uint32_t buffer_size, buffer_addr;
 	struct pva_fw_dma_reloc const *relocs;
 	enum pva_symbol_type needed_sym_type;
 	if (slot->flags & PVA_FW_DMA_SLOT_FLAG_VMEM_DATA) {
 		needed_sym_type = PVA_SYM_TYPE_DATA;
 	} else if (slot->flags & PVA_FW_DMA_SLOT_FLAG_VMEM_VPUC_TABLE) {
 		needed_sym_type = PVA_SYM_TYPE_VPUC_TABLE;
 	} else {
 		pva_kmd_log_err("Unexpected VMEM slot flags");
 		err = PVA_INTERNAL;
 		goto out;
 	}
 #if defined(WAR_PVAAS16267)
 	needed_sym_type = PVA_SYM_TYPE_DATA;
 #endif
 	vpu_bin = &pva_kmd_peek_resource(dma_aux->res_table,
 					 dma_aux->vpu_bin_res_id)
 			   ->vpu_bin;
 	sym = pva_kmd_get_symbol_with_type(&vpu_bin->symbol_table,
 					   vmem_bd->addr.symbol_id,
 					   needed_sym_type);
 	if (sym == NULL) {
 		err = PVA_INVALID_SYMBOL;
 		goto out;
 	}
 	buffer_size = sat_sub32(sym->size, vmem_bd->addr.offset);
 	buffer_addr = sat_add32(sym->vmem_addr, vmem_bd->addr.offset);
 	if (buffer_size < get_slot_size(slot)) {
 		pva_kmd_log_err("VMEM buffer too small for slot binding");
 		err = PVA_RES_OUT_OF_RANGE;
 		goto out;
 	}
 	relocs = &static_relocs[slot->reloc_start_idx];
 	for (uint32_t i = 0; i < slot->reloc_count; i++) {
 		struct pva_fw_dma_reloc const *reloc = &relocs[i];
 		struct pva_fw_dma_descriptor *desc = &descs[reloc->desc_index];
 		if (reloc->field == PVA_FW_DMA_RELOC_FIELD_SRC) {
 			desc->src_adr0 = sat_add32(buffer_addr, desc->src_adr0);
 		} else if (reloc->field == PVA_FW_DMA_RELOC_FIELD_DST) {
 			desc->dst_adr0 = sat_add32(buffer_addr, desc->dst_adr0);
 		} else {
 			if (!pva_is_64B_aligned(buffer_addr)) {
 				pva_kmd_log_err(
 					"VMEM replication address not aligned to 64 bytes");
 				err = PVA_INVAL;
 				goto out;
 			}
 			desc->frda =
 				((uint16_t)(buffer_addr >> 6U) + desc->frda) &
 				0x3FFF;
 		}
 	}
 out:
 	return err;
 }
 enum pva_error pva_kmd_bind_static_buffers(
 	struct pva_dma_config_resource *fw_dma_cfg_hdr,
 	struct pva_kmd_dma_resource_aux *dma_aux,
 	struct pva_fw_dma_slot const *static_slots, uint16_t num_static_slots,
 	struct pva_fw_dma_reloc const *static_relocs,
 	struct pva_dma_static_binding const *static_bindings,
 	uint32_t num_static_bindings)
 {
 	uint32_t slot_id;
 	enum pva_error err = PVA_SUCCESS;
 	if (num_static_bindings != num_static_slots) {
 		pva_kmd_log_err("Invalid number of static bindings");
 		err = PVA_INVAL;
 		goto out;
 	}
 	// Reset BL status for each channel
 	fw_dma_cfg_hdr->ch_block_height_fixed_mask = 0U;
 	for (slot_id = 0U; slot_id < num_static_slots; slot_id++) {
 		struct pva_fw_dma_slot const *st_slot = &static_slots[slot_id];
 		struct pva_dma_static_binding const *binding =
 			&static_bindings[slot_id];
 		if (binding->type == PVA_DMA_STATIC_BINDING_DRAM) {
 			err = bind_static_dram_slot(fw_dma_cfg_hdr, dma_aux,
 						    st_slot, static_relocs,
 						    &binding->dram);
 		} else { // PVA_FW_DMA_SLOT_FLAG_VMEM
 			err = bind_static_vmem_slot(fw_dma_cfg_hdr, dma_aux,
 						    st_slot, static_relocs,
 						    &binding->vmem);
 		}
 		if (err != PVA_SUCCESS) {
 			goto out;
 		}
 	}
 out:
 	return err;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c
@@ -0,0 +1,821 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_resource_table.h"
 #include "pva_kmd_device_memory.h"
 #include "pva_kmd_hwseq_validate.h"
 #include "pva_api.h"
 #include "pva_kmd_dma_cfg.h"
 #include "pva_api_dma.h"
 #include "pva_kmd_device.h"
 #include "pva_math_utils.h"
 struct pva_fw_dma_reloc_slot_info {
 	struct pva_fw_dma_slot *slots;
 	struct pva_fw_dma_reloc *relocs;
 	uint16_t num_slots;
 	uint8_t *reloc_off;
 };
 struct pva_fw_dma_reloc_slots {
 	struct pva_fw_dma_reloc_slot_info dyn_slot;
 	struct pva_fw_dma_reloc_slot_info static_slot;
 };
 static enum pva_error
 validate_channel_mapping(struct pva_dma_config const *out_cfg,
 			 struct pva_kmd_hw_constants const *hw_consts)
 {
 	struct pva_dma_channel *channel;
 	struct pva_dma_config_header const *cfg_hdr = &out_cfg->header;
 	pva_math_error math_err = MATH_OP_SUCCESS;
 	for (uint8_t i = 0U; i < cfg_hdr->num_channels; i++) {
 		channel = &out_cfg->channels[i];
 		if ((channel->desc_index >= out_cfg->header.num_descriptors) ||
 		    (pva_is_reserved_desc(channel->desc_index))) {
 			pva_kmd_log_err(
 				"ERR: Invalid Channel Descriptor Index");
 			return PVA_INVAL;
 		}
 		if (addu8(channel->vdb_count, channel->vdb_offset, &math_err) >
 		    PVA_NUM_DYNAMIC_VDB_BUFFS) {
 			pva_kmd_log_err("ERR: Invalid Channel control data");
 			return PVA_INVAL;
 		}
 		if (addu16(channel->adb_count, channel->adb_offset, &math_err) >
 		    hw_consts->n_dynamic_adb_buffs) {
 			pva_kmd_log_err("ERR: Invalid ADB Buff Size or Offset");
 			return PVA_INVAL;
 		}
 	}
 	if (math_err != MATH_OP_SUCCESS) {
 		pva_kmd_log_err("validate_channel_mapping math error");
 		return PVA_ERR_MATH_OP;
 	}
 	return PVA_SUCCESS;
 }
 static enum pva_error validate_padding(struct pva_dma_descriptor *desc)
 {
 	if ((desc->px != 0U) && (desc->px >= desc->tx)) {
 		return PVA_INVAL;
 	}
 	if ((desc->py != 0U) && (desc->py >= desc->ty)) {
 		return PVA_INVAL;
 	}
 	return PVA_SUCCESS;
 }
 static bool is_valid_vpu_trigger_mode(struct pva_dma_descriptor *desc)
 {
 	bool valid = true;
 	if (desc->trig_event_mode != 0U) {
 		switch (desc->trig_vpu_events) {
 		case PVA_DMA_NO_TRIG:
 			//HW Sequencer check
 			break;
 		case PVA_DMA_TRIG_VPU_CFG:
 			if (desc->src.transfer_mode !=
 			    PVA_DMA_TRANS_MODE_VPUCFG) {
 				valid = false;
 			}
 			break;
 		case PVA_DMA_TRIG_READ0:
 		case PVA_DMA_TRIG_READ1:
 		case PVA_DMA_TRIG_READ2:
 		case PVA_DMA_TRIG_READ3:
 		case PVA_DMA_TRIG_READ4:
 		case PVA_DMA_TRIG_READ5:
 		case PVA_DMA_TRIG_READ6:
 			if ((desc->src.transfer_mode !=
 			     (uint8_t)PVA_DMA_TRANS_MODE_VPUCFG) &&
 			    (desc->dst.transfer_mode !=
 			     (uint8_t)PVA_DMA_TRANS_MODE_VMEM)) {
 				valid = false;
 			}
 			break;
 		case PVA_DMA_TRIG_WRITE0:
 		case PVA_DMA_TRIG_WRITE1:
 		case PVA_DMA_TRIG_WRITE2:
 		case PVA_DMA_TRIG_WRITE3:
 		case PVA_DMA_TRIG_WRITE4:
 		case PVA_DMA_TRIG_WRITE5:
 		case PVA_DMA_TRIG_WRITE6:
 			if ((desc->src.transfer_mode !=
 			     (uint8_t)PVA_DMA_TRANS_MODE_VPUCFG) &&
 			    (desc->src.transfer_mode !=
 			     (uint8_t)PVA_DMA_TRANS_MODE_VMEM)) {
 				valid = false;
 			}
 			break;
 		default:
 			valid = false;
 			break;
 		}
 	}
 	return valid;
 }
 static bool validate_src_dst_adv_val(struct pva_dma_descriptor *desc,
 				     bool relax_dim3_check)
 {
 	uint8_t is_any_rpt_zero = 0U;
 	is_any_rpt_zero = desc->src.rpt1 & desc->src.rpt2 & desc->dst.rpt1 &
 			  desc->dst.rpt2;
 	if ((desc->trig_event_mode == (uint8_t)PVA_DMA_TRIG_MODE_4TH_DIM) &&
 	    (is_any_rpt_zero == 0U)) {
 		return false;
 	}
 	if (desc->trig_event_mode == ((uint8_t)PVA_DMA_TRIG_MODE_3RD_DIM)) {
 		if (false == relax_dim3_check) {
 			if (((desc->src.rpt1 == 0U) &&
 			     (desc->dst.rpt1 == 0U))) {
 				return false;
 			}
 		} else {
 			if (((desc->dst.rpt1 == 0U) ||
 			     (desc->src.rpt1 > desc->dst.rpt1))) {
 				return false;
 			}
 		}
 	}
 	return true;
 }
 static enum pva_error
 validate_dma_desc_trans_cntl2(struct pva_dma_descriptor *desc)
 {
 	if ((desc->prefetch_enable != 0U) &&
 	    ((desc->tx == 0U) || (desc->ty == 0U) ||
 	     (desc->src.transfer_mode != (uint32_t)PVA_DMA_TRANS_MODE_DRAM) ||
 	     (desc->dst.transfer_mode != (uint32_t)PVA_DMA_TRANS_MODE_VMEM))) {
 		return PVA_INVAL;
 	}
 	return PVA_SUCCESS;
 }
 static enum pva_error
 validate_descriptor(struct pva_dma_descriptor *desc,
 		    struct pva_dma_config_header const *cfg_hdr)
 {
 	enum pva_error err = PVA_SUCCESS;
 	err = validate_padding(desc);
 	if ((desc->dst.transfer_mode == PVA_DMA_TRANS_MODE_VMEM) &&
 	    (err != PVA_SUCCESS)) {
 		return err;
 	}
 	if (!(is_valid_vpu_trigger_mode(desc))) {
 		pva_kmd_log_err("Bad trigger");
 		return PVA_INVAL;
 	}
 	/** Check src/dstADV values with respect to ECET bits */
 	if (false == validate_src_dst_adv_val(desc, false)) {
 		pva_kmd_log_err(
 			"Invalid src/dst ADV values with respect to ECET");
 		return PVA_INVAL;
 	}
 	/* DMA_DESC_TRANS CNTL2 */
 	if (PVA_SUCCESS != validate_dma_desc_trans_cntl2(desc)) {
 		pva_kmd_log_err("Bad trans cntl 2");
 		return PVA_INVAL;
 	}
 	/* DMA_DESC_LDID */
 	if ((desc->link_desc_id > cfg_hdr->num_descriptors) ||
 	    ((desc->link_desc_id != 0) &&
 	     pva_is_reserved_desc(desc->link_desc_id - PVA_DMA_DESC0))) {
 		pva_kmd_log_err("ERR: Invalid linker Desc ID");
 		return PVA_INVAL;
 	}
 	return PVA_SUCCESS;
 }
 static bool
 is_dma_config_header_valid(struct pva_dma_config_header const *cfg_hdr,
 			   struct pva_kmd_hw_constants const *hw_consts)
 {
 	if (((cfg_hdr->base_descriptor + cfg_hdr->num_descriptors) >
 	     hw_consts->n_dma_descriptors) ||
 	    ((cfg_hdr->base_channel + cfg_hdr->num_channels) >
 	     (hw_consts->n_user_dma_channels + 1U)) ||
 	    ((cfg_hdr->base_hwseq_word + cfg_hdr->num_hwseq_words) >
 	     hw_consts->n_hwseq_words) ||
 	    (cfg_hdr->num_static_slots > PVA_KMD_MAX_NUM_DMA_SLOTS) ||
 	    (cfg_hdr->num_dynamic_slots > PVA_KMD_MAX_NUM_DMA_RELOCS) ||
 	    (cfg_hdr->base_channel == 0U)) {
 		return false;
 	}
 	return true;
 }
 enum pva_error
 pva_kmd_parse_dma_config(void *dma_config, uint32_t dma_config_size,
 			 struct pva_dma_config *out_cfg,
 			 struct pva_kmd_hw_constants const *hw_consts)
 {
 	struct pva_dma_config_header const *cfg_hdr = dma_config;
 	uintptr_t offset = 0;
 	if (dma_config_size < sizeof(*cfg_hdr)) {
 		pva_kmd_log_err("DMA configuration too small");
 		return PVA_INVAL;
 	}
 	out_cfg->header = *cfg_hdr;
 	if (!(is_dma_config_header_valid(cfg_hdr, hw_consts))) {
 		pva_kmd_log_err("Invalid PVA DMA Configuration Header");
 		return PVA_INVAL;
 	}
 	offset += PVA_ALIGN8(sizeof(*cfg_hdr));
 	out_cfg->hwseq_words = pva_offset_pointer(dma_config, offset);
 	offset += PVA_ALIGN8(cfg_hdr->num_hwseq_words *
 			     sizeof(*out_cfg->hwseq_words));
 	out_cfg->channels = pva_offset_pointer(dma_config, offset);
 	offset +=
 		PVA_ALIGN8(cfg_hdr->num_channels * sizeof(*out_cfg->channels));
 	out_cfg->descriptors = pva_offset_pointer(dma_config, offset);
 	offset += PVA_ALIGN8(cfg_hdr->num_descriptors *
 			     sizeof(*out_cfg->descriptors));
 	out_cfg->static_bindings = pva_offset_pointer(dma_config, offset);
 	offset += PVA_ALIGN8(cfg_hdr->num_static_slots *
 			     sizeof(*out_cfg->static_bindings));
 	if (offset > dma_config_size) {
 		pva_kmd_log_err("DMA configuration is smaller than expected");
 		return PVA_INVAL;
 	}
 	return PVA_SUCCESS;
 }
 static enum pva_error
 validate_descriptors(struct pva_dma_config const *dma_config)
 {
 	uint32_t i = 0U;
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_dma_config_header const *cfg_hdr = &dma_config->header;
 	struct pva_dma_descriptor *desc;
 	for (i = 0; i < cfg_hdr->num_descriptors; i++) {
 		if (pva_is_reserved_desc(i)) {
 			// skip over the reserved descriptor range
 			i = PVA_RESERVED_DESCRIPTORS_END;
 			continue;
 		}
 		desc = &dma_config->descriptors[i];
 		err = validate_descriptor(desc, cfg_hdr);
 		if (err != PVA_SUCCESS) {
 			return err;
 		}
 	}
 	return err;
 }
 enum pva_error
 pva_kmd_validate_dma_config(struct pva_dma_config const *dma_config,
 			    struct pva_kmd_hw_constants const *hw_consts,
 			    struct pva_kmd_dma_access *access_sizes,
 			    uint64_t *hw_dma_descs_mask)
 {
 	enum pva_error err = PVA_SUCCESS;
 	err = validate_channel_mapping(dma_config, hw_consts);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err("Bad Channels");
 		return err;
 	}
 	err = validate_descriptors(dma_config);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err("Bad Descriptors");
 		return err;
 	}
 	if (dma_config->header.num_hwseq_words != 0U) {
 		err = validate_hwseq(dma_config, hw_consts, access_sizes,
 				     hw_dma_descs_mask);
 		if (err != PVA_SUCCESS) {
 			pva_kmd_log_err("Bad HW Sequencer Blob");
 			return err;
 		}
 	}
 	return err;
 }
 enum pva_error
 pva_kmd_dma_use_resources(struct pva_dma_config const *dma_cfg,
 			  struct pva_kmd_dma_resource_aux *dma_aux)
 {
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_kmd_vpu_bin_resource *vpu_bin = NULL;
 	uint32_t i;
 	/* Increment reference count for VPU bin */
 	if (dma_cfg->header.vpu_exec_resource_id != PVA_RESOURCE_ID_INVALID) {
 		struct pva_kmd_resource_record *vpu_bin_rec;
 		vpu_bin_rec = pva_kmd_use_resource(
 			dma_aux->res_table,
 			dma_cfg->header.vpu_exec_resource_id);
 		if (vpu_bin_rec == NULL) {
 			pva_kmd_log_err(
 				"VPU exec resource id used by DMA config does not exist");
 			err = PVA_INVAL;
 			goto err_out;
 		}
 		if (vpu_bin_rec->type != PVA_RESOURCE_TYPE_EXEC_BIN) {
 			pva_kmd_log_err(
 				"Invalid VPU exec resource id used by DMA config");
 			err = PVA_INVAL;
 			goto drop_vpu_bin;
 		}
 		vpu_bin = &vpu_bin_rec->vpu_bin;
 	}
 	dma_aux->vpu_bin_res_id = dma_cfg->header.vpu_exec_resource_id;
 	dma_aux->dram_res_count = 0;
 	/* Increment reference count for all static DRAM buffers; For static
 	 * VMEM buffers, check that symbol ID is valid. */
 	for (i = 0; i < dma_cfg->header.num_static_slots; i++) {
 		struct pva_dma_static_binding const *slot_buf =
 			&dma_cfg->static_bindings[i];
 		if (slot_buf->type == PVA_DMA_STATIC_BINDING_DRAM) {
 			struct pva_kmd_resource_record *rec;
 			rec = pva_kmd_use_resource(dma_aux->res_table,
 						   slot_buf->dram.resource_id);
 			if (rec == NULL) {
 				pva_kmd_log_err(
 					"DRAM buffers used by DMA config do not exist");
 				err = PVA_INVAL;
 				goto drop_dram;
 			}
 			dma_aux->static_dram_res_ids[dma_aux->dram_res_count] =
 				slot_buf->dram.resource_id;
 			dma_aux->dram_res_count += 1;
 			if (rec->type != PVA_RESOURCE_TYPE_DRAM) {
 				pva_kmd_log_err(
 					"Invalid DRAM resource id used DMA config");
 				err = PVA_INVAL;
 				goto drop_dram;
 			}
 		} else if (slot_buf->type == PVA_DMA_STATIC_BINDING_VMEM) {
 			if (vpu_bin == NULL) {
 				pva_kmd_log_err(
 					"VPU bin resource not found for static VMEM buffer");
 				err = PVA_INVAL;
 				goto drop_dram;
 			}
 			if (pva_kmd_get_symbol(&vpu_bin->symbol_table,
 					       slot_buf->vmem.addr.symbol_id) ==
 			    NULL) {
 				pva_kmd_log_err("Invalid VMEM symbol ID");
 				err = PVA_INVAL;
 				goto drop_dram;
 			}
 		} else {
 			pva_kmd_log_err("Invalid slot buffer type");
 			err = PVA_INVAL;
 			goto drop_dram;
 		}
 	}
 	return PVA_SUCCESS;
 drop_dram:
 	for (i = 0; i < dma_aux->dram_res_count; i++) {
 		pva_kmd_drop_resource(dma_aux->res_table,
 				      dma_aux->static_dram_res_ids[i]);
 	}
 drop_vpu_bin:
 	if (dma_aux->vpu_bin_res_id != PVA_RESOURCE_ID_INVALID) {
 		pva_kmd_drop_resource(dma_aux->res_table,
 				      dma_aux->vpu_bin_res_id);
 	}
 err_out:
 	return err;
 }
 static uint16_t get_slot_id(uint16_t slot)
 {
 	return slot & PVA_DMA_SLOT_ID_MASK;
 }
 static uint8_t get_slot_flag(uint8_t transfer_mode, bool cb_enable)
 {
 	uint8_t flags = 0;
 	if (transfer_mode == PVA_DMA_TRANS_MODE_VMEM) {
 		flags |= PVA_FW_DMA_SLOT_FLAG_VMEM_DATA;
 	} else if (transfer_mode == PVA_DMA_TRANS_MODE_L2SRAM) {
 		flags |= PVA_FW_DMA_SLOT_FLAG_L2SRAM;
 	} else if (transfer_mode == PVA_DMA_TRANS_MODE_DRAM) {
 		flags |= PVA_FW_DMA_SLOT_FLAG_DRAM;
 	} else if (transfer_mode == PVA_DMA_TRANS_MODE_VPUCFG) {
 		flags |= PVA_FW_DMA_SLOT_FLAG_VMEM_VPUC_TABLE;
 	}
 	if (cb_enable) {
 		flags |= PVA_FW_DMA_SLOT_FLAG_CB;
 	}
 	return flags;
 }
 static void update_reloc_count(uint16_t slot, uint8_t transfer_mode,
 			       bool cb_enable,
 			       struct pva_fw_dma_slot *out_static_slots,
 			       uint16_t num_static_slots,
 			       struct pva_fw_dma_slot *out_dyn_slots,
 			       uint16_t num_dyn_slots)
 {
 	uint8_t slot_id = get_slot_id(slot);
 	if (slot & PVA_DMA_DYNAMIC_SLOT) {
 		out_dyn_slots[slot_id].reloc_count =
 			safe_addu16(out_dyn_slots[slot_id].reloc_count, 1U);
 		out_dyn_slots[slot_id].flags |=
 			get_slot_flag(transfer_mode, cb_enable);
 	} else if (slot & PVA_DMA_STATIC_SLOT) {
 		out_static_slots[slot_id].reloc_count =
 			safe_addu16(out_static_slots[slot_id].reloc_count, 1U);
 		;
 		out_static_slots[slot_id].flags |=
 			get_slot_flag(transfer_mode, cb_enable);
 	}
 }
 static void count_relocs(struct pva_dma_config const *dma_cfg,
 			 struct pva_fw_dma_slot *out_static_slots,
 			 uint16_t num_static_slots,
 			 struct pva_fw_dma_slot *out_dyn_slots,
 			 uint16_t num_dyn_slots)
 {
 	uint8_t i;
 	struct pva_dma_descriptor *desc;
 	for (i = 0U; i < dma_cfg->header.num_descriptors; i++) {
 		if (pva_is_reserved_desc(i)) {
 			// skip over the reserved descriptor range
 			i = PVA_RESERVED_DESCRIPTORS_END;
 			continue;
 		}
 		desc = &dma_cfg->descriptors[i];
 		update_reloc_count(desc->src.slot, desc->src.transfer_mode,
 				   desc->src.cb_enable, out_static_slots,
 				   num_static_slots, out_dyn_slots,
 				   num_dyn_slots);
 		update_reloc_count(desc->dst.slot, desc->dst.transfer_mode,
 				   desc->dst.cb_enable, out_static_slots,
 				   num_static_slots, out_dyn_slots,
 				   num_dyn_slots);
 		update_reloc_count(desc->dst2_slot, desc->dst.transfer_mode,
 				   desc->dst.cb_enable, out_static_slots,
 				   num_static_slots, out_dyn_slots,
 				   num_dyn_slots);
 	}
 }
 static void write_one_reloc(uint8_t ch_index, uint32_t desc_index,
 			    uint16_t slot, uint8_t transfer_mode,
 			    uint8_t reloc_field,
 			    struct pva_fw_dma_reloc_slot_info *info,
 			    struct pva_kmd_dma_access_entry const *access_entry)
 {
 	uint16_t slot_id = get_slot_id(slot);
 	uint16_t reloc_id = safe_addu16(info->slots[slot_id].reloc_start_idx,
 					info->reloc_off[slot_id]);
 	int64_t old_start_addr = info->slots[slot_id].start_addr;
 	int64_t old_end_addr = info->slots[slot_id].end_addr;
 	info->slots[slot_id].start_addr =
 		mins64(access_entry->start_addr, old_start_addr);
 	info->slots[slot_id].end_addr =
 		maxs64(access_entry->end_addr, old_end_addr);
 	info->slots[slot_id].ch_use_mask |= (1U << (ch_index & 0x1FU));
 	info->relocs[reloc_id].desc_index = desc_index;
 	info->relocs[reloc_id].field = reloc_field;
 	info->reloc_off[slot_id] = safe_addu8(info->reloc_off[slot_id], 1U);
 }
 static void handle_reloc(uint16_t slot, uint8_t transfer_mode,
 			 struct pva_kmd_dma_access_entry const *access_entry,
 			 struct pva_fw_dma_reloc_slots *rel_info,
 			 uint8_t reloc_field, uint8_t ch_index,
 			 uint8_t desc_index)
 {
 	if (slot & PVA_DMA_DYNAMIC_SLOT) {
 		write_one_reloc(ch_index, desc_index, slot, transfer_mode,
 				reloc_field, &rel_info->dyn_slot, access_entry);
 	} else if (slot & PVA_DMA_STATIC_SLOT) {
 		write_one_reloc(ch_index, desc_index, slot, transfer_mode,
 				reloc_field, &rel_info->static_slot,
 				access_entry);
 	}
 }
 static void write_relocs(struct pva_dma_config const *dma_cfg,
 			 struct pva_kmd_dma_access const *access_sizes,
 			 struct pva_fw_dma_reloc_slots *rel_info,
 			 uint8_t const *desc_to_ch)
 {
 	uint32_t i;
 	uint16_t start_idx = 0U;
 	struct pva_dma_descriptor *desc = NULL;
 	uint8_t ch_index = 0U;
 	for (i = 0U; i < rel_info->dyn_slot.num_slots; i++) {
 		rel_info->dyn_slot.slots[i].reloc_start_idx = start_idx;
 		start_idx = safe_addu16(
 			start_idx, rel_info->dyn_slot.slots[i].reloc_count);
 	}
 	for (i = 0U; i < rel_info->static_slot.num_slots; i++) {
 		rel_info->static_slot.slots[i].reloc_start_idx = start_idx;
 		start_idx = safe_addu16(
 			start_idx, rel_info->static_slot.slots[i].reloc_count);
 	}
 	for (i = 0U; i < dma_cfg->header.num_descriptors; i++) {
 		if (pva_is_reserved_desc(i)) {
 			// skip over the reserved descriptor range
 			i = PVA_RESERVED_DESCRIPTORS_END;
 			continue;
 		}
 		desc = &dma_cfg->descriptors[i];
 		ch_index = desc_to_ch[i];
 		handle_reloc(desc->src.slot, desc->src.transfer_mode,
 			     &access_sizes[i].src, rel_info,
 			     PVA_FW_DMA_RELOC_FIELD_SRC, ch_index, i);
 		handle_reloc(desc->dst.slot, desc->dst.transfer_mode,
 			     &access_sizes[i].dst, rel_info,
 			     PVA_FW_DMA_RELOC_FIELD_DST, ch_index, i);
 		handle_reloc(desc->dst2_slot, desc->dst.transfer_mode,
 			     &access_sizes[i].dst2, rel_info,
 			     PVA_FW_DMA_RELOC_FIELD_DST2, ch_index, i);
 	}
 }
 static enum pva_error
 validate_descriptor_tile_and_padding(struct pva_dma_descriptor *desc,
 				     bool is_dst)
 {
 	enum pva_error err = PVA_SUCCESS;
 	if (desc->ty == 0U) {
 		err = PVA_INVALID_DMA_CONFIG;
 		return err;
 	}
 	if (!is_dst) {
 		if ((desc->tx <= desc->px) || (desc->ty <= desc->py)) {
 			// invalid tile size/padding config
 			err = PVA_INVALID_DMA_CONFIG;
 			return err;
 		}
 	}
 	return PVA_SUCCESS;
 }
 static enum pva_error get_access_size(struct pva_dma_descriptor *desc,
 				      struct pva_kmd_dma_access_entry *entry,
 				      bool is_dst,
 				      struct pva_kmd_dma_access_entry *dst2)
 {
 	struct pva_dma_transfer_attr *attr = NULL;
 	uint32_t tx = 0U;
 	uint32_t ty = 0U;
 	uint64_t tile_size = 0U;
 	int64_t start = 0;
 	int64_t end = 0;
 	int32_t dim_offset = 0;
 	uint32_t dim_offset_U = 0U;
 	uint32_t num_bytes = 0U;
 	enum pva_error err = PVA_SUCCESS;
 	pva_math_error math_err = MATH_OP_SUCCESS;
 	// early out for empty tiles
 	if (desc->tx == 0U) {
 		return err;
 	}
 	err = validate_descriptor_tile_and_padding(desc, is_dst);
 	if (err != PVA_SUCCESS) {
 		return err;
 	}
 	if (is_dst) {
 		attr = &desc->dst;
 		tx = desc->tx;
 		ty = desc->ty;
 	} else {
 		attr = &desc->src;
 		tx = subu32((uint32_t)desc->tx, (uint32_t)desc->px, &math_err);
 		ty = subu32((uint32_t)desc->ty, (uint32_t)desc->py, &math_err);
 	}
 	if (attr->offset > (uint64_t)(MAX_INT64)) {
 		err = PVA_INVALID_DMA_CONFIG;
 		pva_kmd_log_err("Offset is too large");
 		goto err_out;
 	}
 	dim_offset_U = mulu32((uint32_t)(attr->line_pitch),
 			      subu32(ty, 1U, &math_err), &math_err);
 	if (attr->cb_enable != 0U) {
 		tile_size = addu32(dim_offset_U, tx, &math_err);
 		tile_size = tile_size
 			    << (desc->log2_pixel_size & MAX_BYTES_PER_PIXEL);
 		if (tile_size > attr->cb_size) {
 			pva_kmd_log_err(
 				"Tile size is bigger than circular buffer size");
 			err = PVA_INVALID_DMA_CONFIG;
 		}
 		start = 0LL;
 		end = (int64_t)attr->cb_size;
 		goto end;
 	}
 	end += adds64((int64_t)dim_offset_U, (int64_t)tx, &math_err);
 	// 3rd dim
 	// 3rd dim
 	dim_offset = muls32((attr->adv1), (int32_t)(attr->rpt1), &math_err);
 	start += mins32(dim_offset, 0);
 	end += maxs32(dim_offset, 0);
 	// 4th dim
 	dim_offset = muls32((attr->adv2), (int32_t)(attr->rpt2), &math_err);
 	start += mins32(dim_offset, 0);
 	end += maxs32(dim_offset, 0);
 	// 5th dim
 	dim_offset = muls32((attr->adv3), (int32_t)(attr->rpt3), &math_err);
 	start += mins32(dim_offset, 0);
 	end += maxs32(dim_offset, 0);
 	// convert to byte range
 	num_bytes =
 		((uint32_t)1U << (desc->log2_pixel_size & MAX_BYTES_PER_PIXEL));
 	start *= (int64_t)num_bytes;
 	end *= (int64_t)num_bytes;
 	if (math_err != MATH_OP_SUCCESS) {
 		err = PVA_ERR_MATH_OP;
 		pva_kmd_log_err("get_access_size math error");
 		goto err_out;
 	}
 end:
 	entry->start_addr =
 		adds64(mins64(start, end), convert_to_signed_s64(attr->offset),
 		       &math_err);
 	entry->end_addr =
 		adds64(maxs64(start, end), convert_to_signed_s64(attr->offset),
 		       &math_err);
 	if (is_dst) {
 		dst2->start_addr =
 			adds64(mins64(start, end), (int64_t)desc->dst2_offset,
 			       &math_err);
 		dst2->end_addr = adds64(maxs64(start, end),
 					(int64_t)desc->dst2_offset, &math_err);
 	}
 	if (math_err != MATH_OP_SUCCESS) {
 		err = PVA_ERR_MATH_OP;
 		pva_kmd_log_err("get_access_size math error");
 	}
 err_out:
 	return err;
 }
 enum pva_error
 pva_kmd_compute_dma_access(struct pva_dma_config const *dma_cfg,
 			   struct pva_kmd_dma_access *access_sizes,
 			   uint64_t *hw_dma_descs_mask)
 {
 	uint32_t i;
 	struct pva_dma_descriptor *desc = NULL;
 	enum pva_error err = PVA_SUCCESS;
 	bool skip_swseq_size_compute = false;
 	for (i = 0; i < dma_cfg->header.num_descriptors; i++) {
 		/**
 		 * Check if DMA descriptor has been used in HW Sequencer.
 		 * If used, skip_swseq_size_compute = true
 		 * else skip_swseq_size_compute = false
 		 * 
 		 * If skip_swseq_size_compute == true then set access_sizes to 0
 		 * else go ahead with access_sizes calculation.access_sizes
 		 */
 		skip_swseq_size_compute = ((hw_dma_descs_mask[i / 64ULL] &
 					    (1ULL << (i & 0x3FU))) == 1U);
 		if (pva_is_reserved_desc(i)) {
 			// skip over the reserved descriptor range
 			i = PVA_RESERVED_DESCRIPTORS_END;
 			continue;
 		}
 		if (skip_swseq_size_compute == true) {
 			continue;
 		}
 		desc = &dma_cfg->descriptors[i];
 		//Calculate src_size
 		err = get_access_size(desc, &access_sizes[i].src, false,
 				      &access_sizes[i].dst2);
 		if (err != PVA_SUCCESS) {
 			goto out;
 		}
 		//Calculate dst_size
 		err = get_access_size(desc, &access_sizes[i].dst, true,
 				      &access_sizes[i].dst2);
 		if (err != PVA_SUCCESS) {
 			goto out;
 		}
 	}
 out:
 	return err;
 }
 void pva_kmd_collect_relocs(struct pva_dma_config const *dma_cfg,
 			    struct pva_kmd_dma_access const *access_sizes,
 			    struct pva_fw_dma_slot *out_static_slots,
 			    uint16_t num_static_slots,
 			    struct pva_fw_dma_reloc *out_static_relocs,
 			    struct pva_fw_dma_slot *out_dyn_slots,
 			    uint16_t num_dyn_slots,
 			    struct pva_fw_dma_reloc *out_dyn_relocs,
 			    uint8_t const *desc_to_ch)
 {
 	struct pva_fw_dma_reloc_slots rel_info = { 0 };
 	uint8_t static_reloc_off[PVA_MAX_NUM_DMA_DESC * 3];
 	uint8_t dyn_reloc_off[PVA_MAX_NUM_DMA_DESC * 3];
 	memset(out_static_slots, 0,
 	       num_static_slots * sizeof(*out_static_slots));
 	memset(out_dyn_slots, 0, num_dyn_slots * sizeof(*out_dyn_slots));
 	/* First pass: count the number of relocates for each slot */
 	count_relocs(dma_cfg, out_static_slots, num_static_slots, out_dyn_slots,
 		     num_dyn_slots);
 	memset(static_reloc_off, 0U, sizeof(static_reloc_off));
 	memset(dyn_reloc_off, 0U, sizeof(dyn_reloc_off));
 	rel_info.dyn_slot.slots = out_dyn_slots;
 	rel_info.dyn_slot.relocs = out_dyn_relocs;
 	rel_info.dyn_slot.num_slots = num_dyn_slots;
 	rel_info.dyn_slot.reloc_off = dyn_reloc_off;
 	rel_info.static_slot.slots = out_static_slots;
 	rel_info.static_slot.relocs = out_static_relocs;
 	rel_info.static_slot.num_slots = num_static_slots;
 	rel_info.static_slot.reloc_off = static_reloc_off;
 	/* Second pass: write reloc info */
 	write_relocs(dma_cfg, access_sizes, &rel_info, desc_to_ch);
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_write.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_write.c
@@ -0,0 +1,294 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_resource_table.h"
 #include "pva_kmd_device_memory.h"
 #include "pva_api.h"
 #include "pva_api_types.h"
 #include "pva_kmd_dma_cfg.h"
 #include "pva_resource.h"
 #include "pva_kmd_hwseq_validate.h"
 static void write_dma_channel(struct pva_dma_channel const *ch,
 			      uint8_t base_desc_index,
 			      struct pva_fw_dma_channel *fw_ch,
 			      struct pva_dma_resource_map *dma_resource_map,
 			      bool support_hwseq_frame_linking)
 {
 	/* DMA_CHANNEL_CNTL0_CHSDID: DMA_CHANNEL_CNTL0[0] = descIndex + 1;*/
 	fw_ch->cntl0 =
 		(((ch->desc_index + base_desc_index + 1U) & 0xFFU) << 0U);
 	/* DMA_CHANNEL_CNTL0_CHVMEMOREQ */
 	fw_ch->cntl0 |= ((ch->vdb_count & 0xFFU) << 8U);
 	/* DMA_CHANNEL_CNTL0_CHBH */
 	fw_ch->cntl0 |= ((ch->adb_count & 0x1FFU) << 16U);
 	/* DMA_CHANNEL_CNTL0_CHPREF */
 	fw_ch->cntl0 |= ((ch->prefetch_enable & 1U) << 30U);
 	/* DMA_CHANNEL_CNTL1_CHPWT */
 	fw_ch->cntl1 = ((ch->req_per_grant & 0x7U) << 2U);
 	/* DMA_CHANNEL_CNTL1_CHVDBSTART */
 	fw_ch->cntl1 |= ((ch->vdb_offset & 0x7FU) << 16U);
 	/* DMA_CHANNEL_CNTL1_CHADBSTART */
 	fw_ch->cntl1 |= ((ch->adb_offset & 0x1FFU) << 23U);
 	fw_ch->boundary_pad = ch->pad_value;
 	fw_ch->cntl1 |= ((ch->ch_rep_factor & 0x7U) << 8U);
 	/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQSTART */
 	fw_ch->hwseqcntl = ((ch->hwseq_start & 0x1FFU) << 0U);
 	/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQEND */
 	fw_ch->hwseqcntl |= ((ch->hwseq_end & 0x1FFU) << 12U);
 	/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTD */
 	fw_ch->hwseqcntl |= ((ch->hwseq_trigger_done & 0x3U) << 24U);
 	/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTS */
 	fw_ch->hwseqcntl |= ((ch->hwseq_tx_select & 0x1U) << 27U);
 	/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTO */
 	fw_ch->hwseqcntl |= ((ch->hwseq_traversal_order & 0x1U) << 30U);
 	/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQEN */
 	fw_ch->hwseqcntl |= ((ch->hwseq_enable & 0x1U) << 31U);
 	/* DMA_CHANNEL_HWSEQFSCNTL_CHHWSEQFCNT*/
 	fw_ch->hwseqfscntl |=
 		(((uint32_t)ch->hwseq_con_frame_seq & 0x1U) << 0U);
 	/* DMA_CHANNEL_HWSEQFSCNTL_CHHWSEQCFS*/
 	fw_ch->hwseqfscntl |=
 		(((uint32_t)ch->hwseq_frame_count & 0x3FU) << 16U);
 	pva_dma_resource_map_add_adbs(dma_resource_map, ch->adb_offset,
 				      ch->adb_count);
 }
 static uint32_t assemble_rpt_cntl(uint8_t rpt, uint32_t adv)
 {
 	return PVA_INSERT(rpt, 31, 24) | PVA_INSERT(adv, 23, 0);
 }
 static void write_dma_descriptor(struct pva_dma_descriptor const *desc,
 				 struct pva_fw_dma_descriptor *fw_desc)
 {
 	fw_desc->src_adr0 = iova_lo(desc->src.offset);
 	fw_desc->src_adr1 = iova_hi(desc->src.offset);
 	fw_desc->dst_adr0 = iova_lo(desc->dst.offset);
 	fw_desc->dst_adr1 = iova_hi(desc->dst.offset);
 	/* DMA_DESC_TRANS CNTL0 */
 	fw_desc->transfer_control0 = PVA_INSERT(desc->src.transfer_mode, 2, 0) |
 				     PVA_INSERT(desc->dst.transfer_mode, 6, 4);
 	/* DMA_DESC_TRANS CNTL1 */
 	fw_desc->transfer_control1 =
 		PVA_INSERT(desc->log2_pixel_size, 1, 0) |
 		PVA_INSERT(desc->px_direction, 2, 2) |
 		PVA_INSERT(desc->py_direction, 3, 3) |
 		PVA_INSERT(desc->boundary_pixel_extension, 4, 4) |
 		PVA_INSERT(desc->tts, 5, 5) |
 		PVA_INSERT(desc->trans_true_completion, 7, 7);
 	/* DMA_DESC_TRANS CNTL2 */
 	fw_desc->transfer_control2 = PVA_INSERT(desc->prefetch_enable, 0, 0) |
 				     PVA_INSERT(desc->dst.cb_enable, 1, 1) |
 				     PVA_INSERT(desc->src.cb_enable, 2, 2);
 	fw_desc->link_did = desc->link_desc_id;
 	/* DMA_DESC_TX */
 	fw_desc->tx = desc->tx;
 	/* DMA_DESC_TY */
 	fw_desc->ty = desc->ty;
 	/* DMA_DESC_DLP_ADV */
 	fw_desc->dlp_adv = desc->dst.line_pitch;
 	/* DMA_DESC_SLP_ADV */
 	fw_desc->slp_adv = desc->src.line_pitch;
 	/* DMA_DESC_DB_START */
 	fw_desc->db_start = desc->dst.cb_start;
 	/* DMA_DESC_DB_SIZE */
 	fw_desc->db_size = desc->dst.cb_size;
 	/* DMA_DESC_SB_START */
 	fw_desc->sb_start = desc->src.cb_start;
 	/* DMA_DESC_SB_SIZE */
 	fw_desc->sb_size = desc->src.cb_size;
 	/* DMA_DESC_TRIG_CH */
 	/* Channel events are not supported */
 	fw_desc->trig_ch_events = 0U;
 	/* DMA_DESC_HW_SW_TRIG */
 	fw_desc->hw_sw_trig_events =
 		PVA_INSERT(desc->trig_event_mode, 1, 0) |
 		PVA_INSERT(desc->trig_vpu_events, 5, 2) |
 		PVA_INSERT(desc->desc_reload_enable, 12, 12);
 	/* DMA_DESC_PX */
 	fw_desc->px = desc->px;
 	/* DMA_DESC_PY */
 	fw_desc->py = desc->py;
 	/* DMA_DESC_FRDA */
 	fw_desc->frda = ((desc->dst2_offset >> 6U) & 0x3FFF);
 	/* DMA_DESC_NDTM_CNTL0 */
 	fw_desc->cb_ext = (((desc->src.cb_start >> 16) & 0x1) << 0) |
 			  (((desc->dst.cb_start >> 16) & 0x1) << 2) |
 			  (((desc->src.cb_size >> 16) & 0x1) << 4) |
 			  (((desc->dst.cb_size >> 16) & 0x1) << 6);
 	/* DMA_DESC_NS1_ADV & DMA_DESC_ST1_ADV */
 	fw_desc->srcpt1_cntl =
 		assemble_rpt_cntl(desc->src.rpt1, desc->src.adv1);
 	fw_desc->srcpt2_cntl =
 		assemble_rpt_cntl(desc->src.rpt2, desc->src.adv2);
 	fw_desc->srcpt3_cntl =
 		assemble_rpt_cntl(desc->src.rpt3, desc->src.adv3);
 	fw_desc->dstpt1_cntl =
 		assemble_rpt_cntl(desc->dst.rpt1, desc->dst.adv1);
 	fw_desc->dstpt2_cntl =
 		assemble_rpt_cntl(desc->dst.rpt2, desc->dst.adv2);
 	fw_desc->dstpt3_cntl =
 		assemble_rpt_cntl(desc->dst.rpt3, desc->dst.adv3);
 }
 static void write_triggers(struct pva_dma_config const *dma_cfg,
 			   struct pva_dma_config_resource *fw_cfg,
 			   struct pva_dma_resource_map *dma_resource_map)
 {
 	uint32_t i, j;
 	bool trigger_required = false;
 	memset(fw_cfg->output_enable, 0, sizeof(fw_cfg->output_enable));
 	for (i = 0; i < dma_cfg->header.num_channels; i++) {
 		struct pva_dma_channel const *ch = &dma_cfg->channels[i];
 		uint8_t ch_num = i + dma_cfg->header.base_channel;
 		uint32_t mask;
 		mask = ch->output_enable_mask;
 		/* READ/STORE triggers */
 		for (j = 0; j < 7; j++) {
 			fw_cfg->output_enable[j] |=
 				(((mask >> 2 * j) & 1U) << ch_num);
 			fw_cfg->output_enable[j] |=
 				(((mask >> (2 * j + 1)) & 1U)
 				 << (ch_num + 16U));
 		}
 		/* VPU config trigger */
 		fw_cfg->output_enable[7] |= (((mask >> 14) & 1U) << ch_num);
 		/* HWSEQ tirgger */
 		fw_cfg->output_enable[8] |= (((mask >> 15) & 1U) << ch_num);
 		fw_cfg->output_enable[8] |=
 			(((mask >> 16) & 1U) << (ch_num + 16U));
 		if (mask != 0) {
 			trigger_required = true;
 		}
 	}
 	if (trigger_required) {
 		pva_dma_resource_map_add_triggers(dma_resource_map);
 	}
 }
 void pva_kmd_write_fw_dma_config(struct pva_dma_config const *dma_cfg,
 				 void *fw_dma_config,
 				 uint32_t *out_fw_fetch_size,
 				 bool support_hwseq_frame_linking)
 {
 	struct pva_dma_config_resource *hdr;
 	struct pva_fw_dma_channel *fw_channels;
 	struct pva_fw_dma_descriptor *fw_descs;
 	struct pva_fw_dma_slot *fw_slots, *last_slot;
 	struct pva_dma_resource_map *dma_resource_map;
 	uint32_t *hwseq_words;
 	uintptr_t offset;
 	uint32_t i;
 	hdr = fw_dma_config;
 	hdr->base_channel = dma_cfg->header.base_channel;
 	hdr->base_descriptor = dma_cfg->header.base_descriptor;
 	hdr->base_hwseq_word = dma_cfg->header.base_hwseq_word;
 	hdr->num_channels = dma_cfg->header.num_channels;
 	hdr->num_descriptors = dma_cfg->header.num_descriptors;
 	hdr->num_hwseq_words = dma_cfg->header.num_hwseq_words;
 	hdr->vpu_exec_resource_id = dma_cfg->header.vpu_exec_resource_id;
 	hdr->num_dynamic_slots = dma_cfg->header.num_dynamic_slots;
 	dma_resource_map = &hdr->dma_resource_map;
 	pva_dma_resource_map_reset(dma_resource_map);
 	pva_dma_resource_map_add_channels(dma_resource_map,
 					  dma_cfg->header.base_channel,
 					  dma_cfg->header.num_channels);
 	pva_dma_resource_map_add_descriptors(dma_resource_map,
 					     dma_cfg->header.base_descriptor,
 					     dma_cfg->header.num_descriptors);
 	pva_dma_resource_map_add_hwseq_words(dma_resource_map,
 					     dma_cfg->header.base_hwseq_word,
 					     dma_cfg->header.num_hwseq_words);
 	offset = sizeof(*hdr);
 	fw_slots = pva_offset_pointer(fw_dma_config, offset);
 	if (hdr->num_dynamic_slots > 0) {
 		last_slot = &fw_slots[hdr->num_dynamic_slots - 1];
 		hdr->num_relocs = safe_addu16(last_slot->reloc_start_idx,
 					      last_slot->reloc_count);
 		/* Round of the number of relocs to satisfy alignment requirement */
 		hdr->num_relocs = safe_pow2_roundup_u16(hdr->num_relocs, 2U);
 		offset += sizeof(struct pva_fw_dma_slot) *
 				  hdr->num_dynamic_slots +
 			  sizeof(struct pva_fw_dma_reloc) * hdr->num_relocs;
 	} else {
 		hdr->num_relocs = 0;
 	}
 	fw_channels = pva_offset_pointer(fw_dma_config, offset);
 	offset += sizeof(*fw_channels) * hdr->num_channels;
 	fw_descs = pva_offset_pointer(fw_dma_config, offset);
 	offset += sizeof(*fw_descs) * hdr->num_descriptors;
 	/* Do not include fields beyond descriptors as they are not fetched to
 	 * TCM */
 	*out_fw_fetch_size = offset;
 	for (i = 0; i < hdr->num_channels; i++) {
 		write_dma_channel(&dma_cfg->channels[i],
 				  dma_cfg->header.base_descriptor,
 				  &fw_channels[i], dma_resource_map,
 				  support_hwseq_frame_linking);
 	}
 	for (i = 0; i < dma_cfg->header.num_descriptors; i++) {
 		if (pva_is_reserved_desc(i)) {
 			// skip over the reserved descriptor range
 			i = PVA_RESERVED_DESCRIPTORS_END;
 			continue;
 		}
 		write_dma_descriptor(&dma_cfg->descriptors[i], &fw_descs[i]);
 	}
 	write_triggers(dma_cfg, fw_dma_config, dma_resource_map);
 	hwseq_words = pva_offset_pointer(fw_dma_config, offset);
 	memcpy(hwseq_words, dma_cfg->hwseq_words,
 	       sizeof(*hwseq_words) * hdr->num_hwseq_words);
 	/*TODO: write hdr->common_config for hwseq and MISR*/
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_executable.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_executable.h
@@ -0,0 +1,74 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_EXECUTABLE_H
 #define PVA_KMD_EXECUTABLE_H
 #include "pva_kmd.h"
 #include "pva_resource.h"
 #include "pva_kmd_utils.h"
 struct pva_kmd_device;
 struct pva_kmd_device_memory;
 struct pva_kmd_exec_symbol_table {
 	uint32_t n_symbols;
 	struct pva_symbol_info *symbols;
 };
 static inline struct pva_symbol_info *
 pva_kmd_get_symbol(struct pva_kmd_exec_symbol_table *symbol_table,
 		   uint32_t symbol_id)
 {
 	struct pva_symbol_info *symbol = NULL;
 	uint32_t idx = symbol_id - PVA_SYMBOL_ID_BASE;
 	if (idx >= symbol_table->n_symbols) {
 		pva_kmd_log_err("Symbol ID out of range\n");
 		return NULL;
 	}
 	symbol = &symbol_table->symbols[idx];
 	return symbol;
 }
 static inline struct pva_symbol_info *
 pva_kmd_get_symbol_with_type(struct pva_kmd_exec_symbol_table *symbol_table,
 			     uint32_t symbol_id,
 			     enum pva_symbol_type symbol_type)
 {
 	struct pva_symbol_info *symbol = NULL;
 	symbol = pva_kmd_get_symbol(symbol_table, symbol_id);
 	if (!symbol) {
 		return NULL;
 	}
 #if !defined(PVA_SKIP_SYMBOL_TYPE_CHECK)
 	if (symbol->symbol_type != symbol_type) {
 		pva_kmd_log_err("Unexpected symbol type\n");
 		return NULL;
 	}
 #endif
 	return symbol;
 }
 enum pva_error
 pva_kmd_load_executable(void *executable_data, uint32_t executable_size,
 			struct pva_kmd_device *pva, uint8_t dma_smmu_id,
 			struct pva_kmd_exec_symbol_table *out_symbol_table,
 			struct pva_kmd_device_memory **out_metainfo,
 			struct pva_kmd_device_memory **out_sections);
 void pva_kmd_unload_executable(struct pva_kmd_exec_symbol_table *symbol_table,
 			       struct pva_kmd_device_memory *metainfo,
 			       struct pva_kmd_device_memory *sections);
 #endif // PVA_KMD_EXECUTABLE_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c
@@ -0,0 +1,52 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_fw_debug.h"
 #include "pva_kmd_utils.h"
 #include "pva_api.h"
 void pva_kmd_drain_fw_print(struct pva_kmd_fw_print_buffer *print_buffer)
 {
 	uint32_t tail = print_buffer->buffer_info->tail;
 	if (tail > print_buffer->size) {
 		pva_kmd_log_err(
 			"Firmware print tail is out of bounds! Refusing to print\n");
 		pva_dbg_printf("Tail %u vs size %u\n", tail,
 			       print_buffer->size);
 		return;
 	}
 	while (print_buffer->head < tail) {
 		uint32_t max_len = tail - print_buffer->head;
 		const char *str = print_buffer->content + print_buffer->head;
 		uint32_t print_size;
 		/* It must be null terminted */
 		if (print_buffer->content[tail - 1] != '\0') {
 			pva_kmd_log_err(
 				"Firmware print is not null terminated! Refusing to print");
 		}
 		print_size = strnlen(str, max_len);
 		pva_kmd_print_str(str);
 		/* +1 for null terminator */
 		print_buffer->head += print_size + 1;
 	}
 	if (print_buffer->buffer_info->flags & PVA_FW_PRINT_BUFFER_OVERFLOWED) {
 		pva_kmd_log_err("Firmware print buffer overflowed!");
 	}
 	if (print_buffer->buffer_info->flags & PVA_FW_PRINT_FAILURE) {
 		pva_kmd_log_err("Firmware print failed!");
 	}
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.h
@@ -0,0 +1,26 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_FW_DEBUG_H
 #define PVA_KMD_FW_DEBUG_H
 #include "pva_api.h"
 #include "pva_fw.h"
 struct pva_kmd_fw_print_buffer {
 	struct pva_fw_print_buffer_header *buffer_info;
 	char const *content;
 	uint32_t size;
 	uint32_t head;
 };
 void pva_kmd_drain_fw_print(struct pva_kmd_fw_print_buffer *print_buffer);
 #endif // PVA_KMD_FW_DEBUG_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c
@@ -0,0 +1,338 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_api_cmdbuf.h"
 #include "pva_api_types.h"
 #include "pva_bit.h"
 #include "pva_fw.h"
 #include "pva_kmd_cmdbuf.h"
 #include "pva_kmd_device.h"
 #include "pva_kmd_constants.h"
 #include "pva_utils.h"
 #include "pva_kmd_fw_profiler.h"
 // TODO: This is here temporarily just for testing. Should be moved to a common header
 #define CMD_ID(x) PVA_EXTRACT(x, 6, 0, uint8_t)
 #define CMD(name) [CMD_ID(PVA_CMD_OPCODE_##name)] = #name
 static const char *cmd_names[PVA_CMD_OPCODE_COUNT] = {
 	CMD(LINK_CHUNK),
 	CMD(BARRIER),
 	CMD(ACQUIRE_ENGINE),
 	CMD(RELEASE_ENGINE),
 	CMD(SET_CURRENT_ENGINE),
 	CMD(CLEAR_VMEM),
 	CMD(BIND_L2SRAM),
 	CMD(RELEASE_L2SRAM),
 	CMD(INVALIDATE_L2SRAM),
 	CMD(FLUSH_L2SRAM),
 	CMD(PATCH_L2SRAM_OFFSET),
 	CMD(SET_VPU_EXECUTABLE),
 	CMD(INIT_VPU_EXECUTABLE),
 	CMD(PREFETCH_VPU_CODE),
 	CMD(SET_VPU_PARAMETER),
 	CMD(SET_VPU_PARAMETER_WITH_ADDRESS),
 	CMD(SET_VPU_INSTANCE_PARAMETER),
 	CMD(SET_VPU_PARAMETER_WITH_BUFFER),
 	CMD(RUN_VPU),
 	CMD(SET_PPE_EXECUTABLE),
 	CMD(INIT_PPE_EXECUTABLE),
 	CMD(PREFETCH_PPE_CODE),
 	CMD(RUN_PPE),
 	CMD(FETCH_DMA_CONFIGURATION),
 	CMD(SETUP_DMA),
 	CMD(RUN_DMA),
 	CMD(BIND_DRAM_SLOT),
 	CMD(BIND_VMEM_SLOT),
 	CMD(UNREGISTER_RESOURCE),
 	CMD(WRITE_DRAM),
 	CMD(CAPTURE_TIMESTAMP),
 	CMD(RUN_UNIT_TESTS)
 };
 static const char *priv_cmd_names[PVA_CMD_PRIV_OPCODE_COUNT] = {
 	CMD(INIT_RESOURCE_TABLE),
 	CMD(DEINIT_RESOURCE_TABLE),
 	CMD(UPDATE_RESOURCE_TABLE),
 	CMD(INIT_QUEUE),
 	CMD(DEINIT_QUEUE),
 	CMD(ENABLE_FW_PROFILING),
 	CMD(DISABLE_FW_PROFILING),
 	CMD(SUSPEND_FW),
 	CMD(RESUME_FW)
 };
 static inline const char *pva_fw_get_cmd_name(uint32_t opcode)
 {
 	uint32_t cmd_id;
 	const char *name;
 	cmd_id = CMD_ID(opcode);
 	if (opcode & PVA_CMD_PRIV_OPCODE_FLAG) {
 		if (cmd_id >= PVA_CMD_PRIV_OPCODE_COUNT) {
 			return "INVALID";
 		}
 		name = priv_cmd_names[cmd_id];
 	} else {
 		if (cmd_id >= PVA_CMD_OPCODE_COUNT) {
 			return "INVALID";
 		}
 		name = cmd_names[cmd_id];
 	}
 	if (name == NULL) {
 		return "UNKNOWN";
 	} else {
 		return name;
 	}
 }
 void pva_kmd_device_init_profiler(struct pva_kmd_device *pva)
 {
 	enum pva_error err = PVA_SUCCESS;
 	const uint32_t profiling_buffer_size = PVA_KMD_FW_PROFILING_BUFFER_SIZE;
 	struct pva_kmd_fw_profiling_buffer *fw_profiling_buffer =
 		&pva->fw_profiling_buffer;
 	// Event message should be 32-bit to keep logging latency low
 	ASSERT(sizeof(struct pva_fw_event_message) == sizeof(uint32_t));
 	pva->fw_profiling_buffer_memory =
 		pva_kmd_device_memory_alloc_map(profiling_buffer_size, pva,
 						PVA_ACCESS_RW,
 						PVA_R5_SMMU_CONTEXT_ID);
 	ASSERT(pva->fw_profiling_buffer_memory != NULL);
 	/* Add profiling memory to resource table */
 	err = pva_kmd_add_dram_buffer_resource(
 		&pva->dev_resource_table, pva->fw_profiling_buffer_memory,
 		&pva->fw_profiling_buffer_resource_id);
 	ASSERT(err == PVA_SUCCESS);
 	pva_kmd_update_fw_resource_table(&pva->dev_resource_table);
 	fw_profiling_buffer->buffer_info =
 		(struct pva_fw_profiling_buffer_header *)
 			pva->fw_profiling_buffer_memory->va;
 	fw_profiling_buffer->content =
 		pva_offset_pointer(pva->fw_profiling_buffer_memory->va,
 				   sizeof(*fw_profiling_buffer->buffer_info));
 	fw_profiling_buffer->size = pva->fw_profiling_buffer_memory->size;
 	fw_profiling_buffer->head = 0U;
 	fw_profiling_buffer->buffer_info->flags = 0U;
 	fw_profiling_buffer->buffer_info->tail = 0U;
 	pva->debugfs_context.g_fw_profiling_config.enabled = false;
 	pva->debugfs_context.g_fw_profiling_config.filter = 0x0;
 }
 void pva_kmd_device_deinit_profiler(struct pva_kmd_device *pva)
 {
 	pva_kmd_drop_resource(&pva->dev_resource_table,
 			      pva->fw_profiling_buffer_resource_id);
 	pva->debugfs_context.g_fw_profiling_config.enabled = false;
 }
 enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva)
 {
 	struct pva_kmd_cmdbuf_builder builder;
 	struct pva_kmd_submitter *dev_submitter = &pva->submitter;
 	struct pva_cmd_enable_fw_profiling *cmd;
 	uint64_t buffer_offset = 0U;
 	uint32_t filter = 0U;
 	uint8_t timestamp_type = TIMESTAMP_TYPE_CYCLE_COUNT;
 	uint32_t fence_val;
 	enum pva_error err;
 	// filter |= PVA_FW_EVENT_DO_CMD;
 	filter |= PVA_FW_EVENT_RUN_VPU;
 	if (pva->debugfs_context.g_fw_profiling_config.enabled) {
 		return PVA_SUCCESS;
 	}
 	pva->fw_profiling_buffer.head = 0U;
 	pva->fw_profiling_buffer.buffer_info->flags = 0U;
 	pva->fw_profiling_buffer.buffer_info->tail = 0U;
 	err = pva_kmd_submitter_prepare(dev_submitter, &builder);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd));
 	ASSERT(cmd != NULL);
 	pva_kmd_set_cmd_enable_fw_profiling(
 		cmd, pva->fw_profiling_buffer_resource_id,
 		pva->fw_profiling_buffer.size, buffer_offset, filter,
 		timestamp_type);
 	err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	err = pva_kmd_submitter_wait(dev_submitter, fence_val,
 				     PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 				     PVA_KMD_WAIT_FW_TIMEOUT_US);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
 			"Waiting for FW timed out when initializing context");
 		goto err_out;
 	}
 	pva->debugfs_context.g_fw_profiling_config.enabled = true;
 	pva->debugfs_context.g_fw_profiling_config.filter = filter;
 	pva->debugfs_context.g_fw_profiling_config.timestamp_type =
 		timestamp_type;
 	pva->debugfs_context.g_fw_profiling_config.timestamp_size =
 		(pva->debugfs_context.g_fw_profiling_config.timestamp_type ==
 		 TIMESTAMP_TYPE_TSE) ?
 			      8 :
 			      4;
 	return PVA_SUCCESS;
 err_out:
 	return err;
 }
 enum pva_error pva_kmd_notify_fw_disable_profiling(struct pva_kmd_device *pva)
 {
 	struct pva_kmd_cmdbuf_builder builder;
 	struct pva_kmd_submitter *dev_submitter = &pva->submitter;
 	struct pva_cmd_disable_fw_profiling *cmd;
 	uint32_t fence_val;
 	enum pva_error err;
 	err = pva_kmd_submitter_prepare(dev_submitter, &builder);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd));
 	ASSERT(cmd != NULL);
 	pva_kmd_set_cmd_disable_fw_profiling(cmd);
 	err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	err = pva_kmd_submitter_wait(dev_submitter, fence_val,
 				     PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 				     PVA_KMD_WAIT_FW_TIMEOUT_US);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
 			"Waiting for FW timed out when initializing context");
 		goto err_out;
 	}
 	pva->debugfs_context.g_fw_profiling_config.enabled = false;
 	pva->debugfs_context.g_fw_profiling_config.filter = 0x0;
 	return PVA_SUCCESS;
 err_out:
 	return err;
 }
 static void decode_and_print_event(unsigned long walltime,
 				   unsigned long relative_time,
 				   struct pva_fw_event_message message,
 				   char *msg_string)
 {
 	switch (PVA_BIT(message.event)) {
 	case PVA_FW_EVENT_DO_CMD: {
 		sprintf(msg_string,
 			"pva_fw@%lu: [%8lu] event=%-12s type=%-7s slot=%u  idx=%-5u    opcode=%s",
 			walltime, relative_time, "DO_CMD",
 			event_type_to_string(message.type), message.arg2,
 			message.arg3, pva_fw_get_cmd_name(message.arg1));
 	} break;
 	case PVA_FW_EVENT_SCAN_QUEUES: {
 		sprintf(msg_string,
 			"pva_fw@%lu: [%8lu] event=%-12s type=%-7s found=%u ccq_id=%-5u queue_id=%u",
 			walltime, relative_time, "SCAN_QUEUES",
 			event_type_to_string(message.type), message.arg1,
 			message.arg2, message.arg3);
 	} break;
 	case PVA_FW_EVENT_SCAN_SLOTS: {
 		sprintf(msg_string,
 			"pva_fw@%lu: [%8lu] event=%-12s type=%-7s state=%u slot=%u",
 			walltime, relative_time, "SCAN_SLOTS",
 			event_type_to_string(message.type), message.arg1,
 			message.arg2);
 	} break;
 	case PVA_FW_EVENT_RUN_VPU: {
 		sprintf(msg_string,
 			"pva_fw@%lu: [%8lu] event=%-12s type=%-7s slot=%u  idx=%-5u    opcode=%s",
 			walltime, relative_time, "RUN_VPU",
 			event_type_to_string(message.type), message.arg2,
 			message.arg3, pva_fw_get_cmd_name(message.arg1));
 	} break;
 	default:
 		pva_dbg_printf("Unknown event type\n");
 		break;
 	}
 }
 void pva_kmd_drain_fw_profiling_buffer(
 	struct pva_kmd_device *pva,
 	struct pva_kmd_fw_profiling_buffer *profiling_buffer)
 {
 	char msg_string[200] = { '\0' };
 	struct pva_fw_event_message message;
 	uint64_t prev_walltime = 0U;
 	uint64_t timestamp = 0U;
 	uint64_t relative_time = 0U;
 	uint32_t buffer_space;
 	// TODO: R5 frequency is hard-coded for now. Get this at runtime.
 	static const uint32_t r5_freq = 716800000U;
 	static const unsigned long r5_cycle_duration = 1000000000000 / r5_freq;
 	unsigned long walltime = 0U; // in nanoseconds
 	uint64_t walltime_diff;
 	const uint32_t message_size =
 		sizeof(message) +
 		pva->debugfs_context.g_fw_profiling_config.timestamp_size;
 	uint32_t *profiling_buffer_head = &profiling_buffer->head;
 	uint32_t profiling_buffer_tail = profiling_buffer->buffer_info->tail;
 	while (*profiling_buffer_head < profiling_buffer_tail) {
 		buffer_space = safe_addu32(*profiling_buffer_head,
 					   safe_subu32(message_size, 1U));
 		ASSERT(buffer_space <= profiling_buffer_tail);
 		memcpy(&message,
 		       &profiling_buffer->content[*profiling_buffer_head],
 		       sizeof(message));
 		memcpy(&timestamp,
 		       &profiling_buffer->content[*profiling_buffer_head +
 						  sizeof(message)],
 		       pva->debugfs_context.g_fw_profiling_config
 			       .timestamp_size);
 		if (pva->debugfs_context.g_fw_profiling_config.timestamp_type ==
 		    TIMESTAMP_TYPE_TSE) {
 			walltime = (timestamp << 5);
 		} else if (pva->debugfs_context.g_fw_profiling_config
 				   .timestamp_type ==
 			   TIMESTAMP_TYPE_CYCLE_COUNT) {
 			timestamp = PVA_LOW32(timestamp);
 			walltime = (r5_cycle_duration * timestamp) / 1000U;
 		}
 		walltime_diff = safe_subu64((uint64_t)walltime, prev_walltime);
 		relative_time = (prev_walltime == 0U) ? 0U : walltime_diff;
 		decode_and_print_event(walltime, relative_time, message,
 				       &msg_string[0]);
 		pva_kmd_print_str(msg_string);
 		*profiling_buffer_head = *profiling_buffer_head + message_size;
 		prev_walltime = walltime;
 	}
 	return;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.h
@@ -0,0 +1,41 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_FW_PROFILER_H
 #define PVA_KMD_FW_PROFILER_H
 #include "pva_kmd_device.h"
 struct pva_kmd_fw_profiling_buffer {
 #define PVA_KMD_FW_PROFILING_BUFFER_SIZE (512 * 1024)
 	struct pva_fw_profiling_buffer_header *buffer_info;
 	char const *content;
 	uint32_t size;
 	uint32_t head;
 };
 struct pva_kmd_fw_profiling_config {
 	uint32_t filter;
 	enum pva_fw_timestamp_t timestamp_type;
 	uint8_t timestamp_size;
 	uint8_t enabled;
 };
 void pva_kmd_device_init_profiler(struct pva_kmd_device *pva);
 void pva_kmd_device_deinit_profiler(struct pva_kmd_device *pva);
 void pva_kmd_drain_fw_profiling_buffer(
 	struct pva_kmd_device *pva,
 	struct pva_kmd_fw_profiling_buffer *profiling_buffer);
 enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva);
 enum pva_error pva_kmd_notify_fw_disable_profiling(struct pva_kmd_device *pva);
 #endif
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.c
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.h
@@ -0,0 +1,336 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_HWSEQ_VALIDATE_H
 #define PVA_KMD_HWSEQ_VALIDATE_H
 #include "pva_api_dma.h"
 #include "pva_kmd_device.h"
 #define PVA_HWSEQ_RRA_MAX_NOCR 31U
 #define PVA_HWSEQ_RRA_MAX_FRAME_COUNT 63U
 /**
 * List of valid Addressing Modes in HW Sequencer Header
 */
 enum pva_dma_hwseq_fid {
 	PVA_DMA_HWSEQ_RRA_MODE = 0xC0DA, /*!< RRA addressing */
 	PVA_DMA_HWSEQ_FRAME_MODE = 0xC0DE, /*!< frame addressing */
 	PVA_DMA_HWSEQ_DESC_MODE = 0xDEAD /*!< descriptor addressing */
 };
 /**
 * Combine three headers common in HW Sequencer
 *
 * ----------------------------------------------------------------------------
 * |        | byte 3        | byte 2       | byte 1          | byte 0         |
 * |--------|---------------|--------------|-----------------|----------------|
 * | Head 1 | NOCR          | FR           | FID1            | FID0           |
 * | Head 2 | FO in LP 15:8 | FO in LP 7:0 | TO in P/LP 15:8 | TO in P/LP 7:0 |
 * | Head 3 | padB          | padL         | padT            | padR           |
 * ----------------------------------------------------------------------------
 **/
 struct pva_dma_hwseq_hdr {
 	//hdr_1
 	uint16_t fid; /*!< addressing type: frame or descriptor */
 	uint8_t fr; /*!< frame repetition factor */
 	uint8_t nocr; /*!< number of descriptor column/row */
 	//hdr_2
 	int16_t to; /*!< tile offset in pixel/Line Pitch */
 	int16_t fo; /*!< frame offset in Line Pitch */
 	//hdr_3
 	uint8_t padr; /*!< pad right */
 	uint8_t padt; /*!< pad top */
 	uint8_t padl; /*!< pad left */
 	uint8_t padb; /*!< pad bottom */
 };
 /**
 * A struct which represents Column/Row Header in HW Sequencer
 */
 struct pva_dma_hwseq_colrow_hdr {
 	uint8_t dec; /*!< descriptor entry count */
 	uint8_t crr; /*!< col/row repetition factor */
 	int16_t cro; /*!< col/row ofst in pixel/line pitch */
 };
 /**
 * A struct which represents a DMA Descriptor Header in HW Sequencer
 */
 struct pva_dma_hwseq_desc_entry {
 	uint8_t did; /*!< desc id */
 	uint8_t dr; /*!< desc repetition */
 };
 /**
 * A struct which represents a Column/Row Header Entry in HW Sequencer
 */
 struct pva_dma_hwseq_colrow_entry_hdr {
 	struct pva_dma_hwseq_colrow_hdr hdr; /*!< Col/Row Header */
 };
 /**
 * A struct representing Grid Information
 */
 struct pva_hwseq_grid_info {
 	/**
 	 * tile co-ordinates
 	 * In Raster Mode:
 	 * 	- tile_x[0] = Tile width of the first tile in HW Seq DMA Transfer
 	 * 	- tile_x[1] = Tile width of the last tile in HW Seq DMA Transfer
 	 * In Vertical Mining Mode:
 	 * 	- tile_x[0] = Tile height of the first tile in HW Seq DMA Transfer
 	 * 	- tile_x[1] = Tile height of the last tile in HW Seq DMA Transfer
 	 */
 	int32_t tile_x[2];
 	/**
 	 * tile co-ordinates
 	 * In Raster Mode:
 	 * 	- tile_y[0] = Tile height of the first tile in HW Seq DMA Transfer
 	 * 	- tile_y[1] = Tile height of the last tile in HW Seq DMA Transfer
 	 * In Vertical Mining Mode:
 	 * 	- tile_y[0] = Tile width of the first tile in HW Seq DMA Transfer
 	 * 	- tile_y[1] = Tile width of the last tile in HW Seq DMA Transfer
 	 */
 	int32_t tile_y[2];
 	/**
 	 * tile co-ordinates
 	 * In Tensor Data Flow Mode:
 	 */
 	int32_t tile_z;
 	/**
 	 * Padding values
 	 * In Raster Mode:
 	 * 	- pad_x[0] = Left Padding
 	 * 	- pad_x[1] = Right Padding
 	 * In Vertical Mining Mode:
 	 * 	- pad_x[0] = Top Padding
 	 * 	- pad_x[1] = Bottom Padding
 	 */
 	int32_t pad_x[2];
 	/**
 	 * Padding values
 	 * In Raster Mode:
 	 * 	- pad_y[0] = Top Padding
 	 * 	- pad_y[1] = Bottom Padding
 	 * In Vertical Mining Mode:
 	 * 	- pad_y[0] = Left Padding
 	 * 	- pad_y[1] = Right Padding
 	 */
 	int32_t pad_y[2];
 	/**
 	 * Tiles per packet. Grid size in X dimension
 	 */
 	uint32_t grid_size_x;
 	/**
 	 * Repeat Count
 	 */
 	uint32_t grid_size_y;
 	/**
 	 * Grid Size in Z dimension for Tensor Data Flow
 	 */
 	uint32_t grid_size_z;
 	/**
 	 * Tile Offset as specified in the HW Sequencer Header
 	 */
 	int32_t grid_step_x;
 	/**
 	 * Col/Row Offset as specified in the HW Sequencer Col/Row Header
 	 */
 	int32_t grid_step_y;
 	/**
 	 * Repetition factor for Head Descriptor in HW Sequencer Blob
 	 */
 	uint32_t head_tile_count;
 	/**
 	 * Boolean value to indicate if HW Sequencer has split padding
 	 */
 	bool is_split_padding;
 };
 /**
 * A struct representing a valid Frame Information
 */
 struct pva_hwseq_frame_info {
 	/**
 	 * X co-ordinate of start of Frame
 	 */
 	int64_t start_x;
 	/**
 	 * Y co-ordinate of start of Frame
 	 */
 	int64_t start_y;
 	/**
 	 * Z co-ordinates of starte of Frame
 	 */
 	int64_t start_z;
 	/**
 	 * X co-ordinate of end of Frame
 	 */
 	int64_t end_x;
 	/**
 	 * Y co-ordinate of end of Frame
 	 */
 	int64_t end_y;
 	/**
 	 * Z co-ordinate of end of Frame
 	 */
 	int64_t end_z;
 };
 /**
 * Struct which holds the HW Sequencer Buffer as received from User Space
 */
 struct pva_hwseq_buffer {
 	/**
 	 * Pointer to HW Sequencer Blob in Buffer
 	 */
 	const uint8_t *data;
 	/**
 	 * Number of bytes left to be read from the data buffer
 	 */
 	uint32_t bytes_left;
 };
 /**
 * @struct hw_seq_blob_entry
 * @brief Structure to hold information about a hardware sequence blob entry.
 *
 * This structure is used to store the details of a DMA channel and the range of hardware sequencer
 * associated with it, along with the number of frames involved.
 */
 struct hw_seq_blob_entry {
 	/**
 	 * Pointer to a const \ref pva_dma_channel which holds the current DMA Channel Information
 	 * in which current HW Sequencer Blob is present
 	 */
 	struct pva_dma_channel const *ch;
 	/**
 	 * The starting index of the hardware sequencer.
 	 */
 	uint16_t hwseq_start;
 	/**
 	 * The ending index of the hardware sequencer.
 	 */
 	uint16_t hwseq_end;
 	/**
 	 * The number of frames associated with the hardware sequencer.
 	 */
 	uint32_t num_frames;
 };
 /**
 * TODO: Separate out pva_hwseq_priv to be more modular
 * 
 * Items in pva_hwseq_main
 * 	- dma_config
 * 	- hw_gen
 * 	- blob
 * 	- num_hwseq_words
 * Items per segment of main i.e. pva_hwseq_segment
 * 	- hwseq_start, hwseq_end
 * 	- channel id
 * 	- hwseq_header,
 *  - desc_count
 * 	- num_frames
 * 	- head_desc, tail_desc
 * 	- is_split_padding
 * 	- is_raster_scan
 */
 /**
 * A struct holding private data to HW Sequencer Blob being parsed
 */
 struct pva_hwseq_priv {
 	/**
 	 * Number of descriptors in the HW Sequencer Blob
 	 */
 	uint32_t desc_count;
 	/**
 	 * Number of tiles in the packet
 	 * This is the sum total of descriptor repetition factors
 	 * present in the HW Sequencer Blob
 	 */
 	uint32_t tiles_per_packet;
 	int32_t max_tx;
 	int32_t max_ty;
 	/**
 	 * Struct that holds the entry info of HW Sequencer Blob
 	 */
 	struct hw_seq_blob_entry entry;
 	/**
 	 * Struct that holds HW Sequencer Blob to be read
 	 */
 	struct pva_hwseq_buffer blob;
 	/**
 	 * Boolean to indicate if split padding is present in the HW Sequener Blob
 	 */
 	bool is_split_padding;
 	/**
 	 * Bool to indicate if HW Sequencer uses raster scan or Vertical mining
 	 * TRUE: Raster Scan
 	 * FALSE: Vertical Mining
 	 */
 	bool is_raster_scan;
 	/**
 	 * @brief Indicates the generation of PVA HW.
 	 * Allowed values: 0 (GEN 1), 1 (GEN 2), 2 (GEN 3)
 	 */
 	enum pva_hw_gen hw_gen;
 	/**
 	 * @brief Pointer to the DMA configuration header.
 	 */
 	const struct pva_dma_config *dma_config;
 	/**
 	 * Pointer to \ref pva_dma_hwseq_hdr_t which holds the HW Sequencer Header
 	 */
 	const struct pva_dma_hwseq_hdr *hdr;
 	/**
 	 * Pointer to \ref pva_dma_hwseq_colrow_hdr_t which holds the Header of the
 	 * Col/Row inside HW Sequencer
 	 */
 	const struct pva_dma_hwseq_colrow_hdr *colrow;
 	/**
 	 * Pointer to the Head Descriptor of type \ref nvpva_dma_descriptor in the HW Sequencer
 	 */
 	struct pva_dma_descriptor *head_desc;
 	/**
 	 * Pointer to the Tail Descriptor of type \ref nvpva_dma_descriptor in the HW Sequencer
 	 */
 	struct pva_dma_descriptor *tail_desc;
 	/**
 	 * DMA Descriptor information obtained from HW Sequencer Blob of type
 	 * \ref pva_dma_hwseq_desc_entry_t
 	 */
 	struct pva_dma_hwseq_desc_entry dma_descs[2];
 	/**
 	 * Access Sizes are calculated and stored here from HW Sequencer Blob
 	 */
 	struct pva_kmd_dma_access *access_sizes;
 };
 struct pva_hwseq_per_frame_info {
 	uint32_t seq_tile_count;
 	uint32_t vmem_tiles_per_frame;
 };
 enum pva_error validate_hwseq(struct pva_dma_config const *dma_config,
 			      struct pva_kmd_hw_constants const *hw_consts,
 			      struct pva_kmd_dma_access *access_sizes,
 			      uint64_t *hw_dma_descs_mask);
 #endif
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c
@@ -0,0 +1,98 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_msg.h"
 #include "pva_fw.h"
 #include "pva_kmd_utils.h"
 #include "pva_kmd_thread_sema.h"
 #include "pva_kmd_fw_debug.h"
 #include "pva_kmd_device.h"
 #include "pva_kmd_context.h"
 static uint8_t get_msg_type(uint32_t hdr)
 {
 	return PVA_EXTRACT(hdr, PVA_FW_MSG_TYPE_MSB, PVA_FW_MSG_TYPE_LSB,
 			   uint32_t);
 }
 void pva_kmd_handle_hyp_msg(void *pva_dev, uint32_t const *data, uint8_t len)
 {
 	struct pva_kmd_device *pva = pva_dev;
 	uint8_t type = get_msg_type(data[0]);
 	uint8_t updated_len = safe_subu8(len, 1U);
 	uint8_t size = safe_mulu8((uint8_t)sizeof(uint32_t), updated_len);
 	switch (type) {
 	case PVA_FW_MSG_TYPE_BOOT_DONE: {
 		uint64_t r5_start_time =
 			pack64(data[PVA_FW_MSG_R5_START_TIME_HI_IDX],
 			       data[PVA_FW_MSG_R5_START_TIME_LO_IDX]);
 		uint64_t r5_ready_time =
 			pack64(data[PVA_FW_MSG_R5_READY_TIME_HI_IDX],
 			       data[PVA_FW_MSG_R5_READY_TIME_LO_IDX]);
 		pva_kmd_log_err("Firmware boot completes");
 		pva_kmd_log_err_u64("R5 start time (us)",
 				    tsc_to_us(r5_start_time));
 		pva_kmd_log_err_u64("R5 ready time (us)",
 				    tsc_to_us(r5_ready_time));
 		pva_kmd_sema_post(&pva->fw_boot_sema);
 	} break;
 	case PVA_FW_MSG_TYPE_ABORT: {
 		char abort_msg[PVA_FW_MSG_ABORT_STR_MAX_LEN + 1];
 		pva_kmd_drain_fw_print(&pva->fw_print_buffer);
 		pva_kmd_log_err("Firmware aborted! The abort message is: ");
 		abort_msg[0] = PVA_EXTRACT(data[0], 7, 0, uint32_t);
 		abort_msg[1] = PVA_EXTRACT(data[0], 15, 8, uint32_t);
 		memcpy(abort_msg + 2, &data[1], size);
 		abort_msg[PVA_FW_MSG_ABORT_STR_MAX_LEN] = '\0';
 		pva_kmd_log_err(abort_msg);
 	} break;
 	case PVA_FW_MSG_TYPE_FLUSH_PRINT:
 		pva_kmd_drain_fw_print(&pva->fw_print_buffer);
 		break;
 	default:
 		FAULT("Unknown message type from firmware");
 	}
 }
 void pva_kmd_handle_msg(void *pva_dev, uint32_t const *data, uint8_t len)
 {
 	struct pva_kmd_device *pva = pva_dev;
 	uint8_t type = get_msg_type(data[0]);
 	switch (type) {
 	case PVA_FW_MSG_TYPE_RESOURCE_UNREGISTER: {
 		uint8_t table_id =
 			PVA_EXTRACT(data[0], PVA_FW_MSG_RESOURCE_TABLE_ID_MSB,
 				    PVA_FW_MSG_RESOURCE_TABLE_ID_LSB, uint8_t);
 		/* Resource table ID equals context id */
 		struct pva_kmd_context *ctx =
 			pva_kmd_get_context(pva, table_id);
 		uint32_t i;
 		pva_kmd_mutex_lock(&ctx->resource_table_lock);
 		for (i = 1; i < len; i++) {
 			pva_kmd_drop_resource(&ctx->ctx_resource_table,
 					      data[i]);
 		}
 		pva_kmd_mutex_unlock(&ctx->resource_table_lock);
 		break;
 	}
 	default:
 		FAULT("Unexpected CCQ msg type from FW");
 		break;
 	}
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.h
@@ -0,0 +1,26 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_api.h"
 /**
 * @brief Handle messages from FW to hypervisor.
 *
 * This is just a provision for future hypervisor support. For now, this just
 * handles all messages from mailboxes.
 */
 void pva_kmd_handle_hyp_msg(void *pva_dev, uint32_t const *data, uint8_t len);
 /**
 * @brief Handle messages from FW to KMD.
 *
 * These messages come from CCQ0 statues registers.
 */
 void pva_kmd_handle_msg(void *pva_dev, uint32_t const *data, uint8_t len);
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_mutex.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_mutex.h
@@ -0,0 +1,35 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_MUTEX_H
 #define PVA_KMD_MUTEX_H
 #include "pva_api.h"
 #if defined(__KERNEL__) /* For Linux */
 #include <linux/mutex.h>
 typedef struct mutex pva_kmd_mutex_t;
 #else /* For user space code, including QNX KMD */
 #include <pthread.h>
 /* Mutex */
 typedef pthread_mutex_t pva_kmd_mutex_t;
 #endif
 enum pva_error pva_kmd_mutex_init(pva_kmd_mutex_t *m);
 void pva_kmd_mutex_lock(pva_kmd_mutex_t *m);
 void pva_kmd_mutex_unlock(pva_kmd_mutex_t *m);
 void pva_kmd_mutex_deinit(pva_kmd_mutex_t *m);
 #endif // PVA_KMD_MUTEX_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c
@@ -0,0 +1,814 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_op_handler.h"
 #include "pva_kmd_resource_table.h"
 #include "pva_kmd_device_memory.h"
 #include "pva_kmd_cmdbuf.h"
 #include "pva_kmd_device.h"
 #include "pva_kmd_cmdbuf.h"
 #include "pva_kmd_queue.h"
 #include "pva_kmd_constants.h"
 #include "pva_fw.h"
 #include "pva_kmd_vpu_app_auth.h"
 #include "pva_math_utils.h"
 struct pva_kmd_buffer {
 	void const *base;
 	uint32_t offset;
 	uint32_t size;
 };
 /* Offset will always be multiple of 8 bytes */
 static void incr_offset(struct pva_kmd_buffer *buf, uint32_t incr)
 {
 	buf->offset = safe_addu32(buf->offset, incr);
 	buf->offset =
 		safe_pow2_roundup_u32(buf->offset, (uint32_t)sizeof(uint64_t));
 }
 static bool access_ok(struct pva_kmd_buffer const *buf, uint32_t size)
 {
 	return safe_addu32(buf->offset, size) <= buf->size;
 }
 static void *read_data(struct pva_kmd_buffer *buf, uint32_t size)
 {
 	void *data = (void *)((uint8_t *)buf->base + buf->offset);
 	incr_offset(buf, size);
 	return data;
 }
 static void write_data(struct pva_kmd_buffer *buf, void const *data,
 		       uint32_t size)
 {
 	memcpy((uint8_t *)buf->base + buf->offset, data, size);
 	incr_offset(buf, size);
 }
 static enum pva_error
 pva_kmd_op_memory_register_async(struct pva_kmd_context *ctx,
 				 struct pva_kmd_buffer *in_buffer,
 				 struct pva_kmd_buffer *out_buffer,
 				 struct pva_kmd_cmdbuf_builder *cmdbuf_builder)
 {
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_kmd_memory_register_in_args *args;
 	struct pva_kmd_register_out_args out_args = { 0 };
 	struct pva_kmd_device_memory *dev_mem;
 	struct pva_cmd_update_resource_table *update_cmd;
 	struct pva_resource_entry entry = { 0 };
 	uint8_t smmu_ctx_id;
 	uint32_t resource_id = 0;
 	if (!access_ok(out_buffer, sizeof(struct pva_kmd_register_out_args))) {
 		return PVA_INVAL;
 	}
 	if (!access_ok(in_buffer,
 		       sizeof(struct pva_kmd_memory_register_in_args))) {
 		err = PVA_INVAL;
 		goto err_out;
 	}
 	args = read_data(in_buffer,
 			 sizeof(struct pva_kmd_memory_register_in_args));
 	dev_mem = pva_kmd_device_memory_acquire(args->memory_handle,
 						args->offset, args->size, ctx);
 	if (dev_mem == NULL) {
 		err = PVA_NOMEM;
 		goto err_out;
 	}
 	if (args->segment == PVA_MEMORY_SEGMENT_R5) {
 		smmu_ctx_id = PVA_R5_SMMU_CONTEXT_ID;
 	} else {
 		smmu_ctx_id = ctx->smmu_ctx_id;
 	}
 	err = pva_kmd_device_memory_iova_map(dev_mem, ctx->pva,
 					     args->access_flags, smmu_ctx_id);
 	if (err != PVA_SUCCESS) {
 		goto release;
 	}
 	if ((smmu_ctx_id == PVA_R5_SMMU_CONTEXT_ID) &&
 	    (dev_mem->iova < FW_SHARED_MEMORY_START)) {
 		pva_kmd_log_err(
 			"Not able to map memory in the R5 shared region");
 		err = PVA_NOMEM;
 		goto unmap;
 	}
 	pva_kmd_mutex_lock(&ctx->resource_table_lock);
 	err = pva_kmd_add_dram_buffer_resource(&ctx->ctx_resource_table,
 					       dev_mem, &resource_id);
 	pva_kmd_mutex_unlock(&ctx->resource_table_lock);
 	if (err != PVA_SUCCESS) {
 		goto unmap;
 	}
 	update_cmd =
 		pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd));
 	if (update_cmd == NULL) {
 		pva_kmd_log_err("Unable to reserve command buffer space");
 		err = PVA_NOMEM;
 		goto free_dram_buffer_resource;
 	}
 	err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id,
 					  &entry);
 	if (err != PVA_SUCCESS) {
 		goto free_cmdbuf;
 	}
 	pva_kmd_set_cmd_update_resource_table(
 		update_cmd, ctx->resource_table_id, resource_id, &entry);
 	out_args.error = PVA_SUCCESS;
 	out_args.resource_id = resource_id;
 	write_data(out_buffer, &out_args, sizeof(out_args));
 	return err;
 free_cmdbuf:
 	pva_kmd_cmdbuf_builder_cancel(cmdbuf_builder);
 free_dram_buffer_resource:
 	pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id);
 unmap:
 	pva_kmd_device_memory_iova_unmap(dev_mem);
 release:
 	pva_kmd_device_memory_free(dev_mem);
 err_out:
 	out_args.error = err;
 	write_data(out_buffer, &out_args, sizeof(out_args));
 	return err;
 }
 static enum pva_error pva_kmd_op_executable_register_async(
 	struct pva_kmd_context *ctx, struct pva_kmd_buffer *in_buffer,
 	struct pva_kmd_buffer *out_buffer,
 	struct pva_kmd_cmdbuf_builder *cmdbuf_builder)
 {
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_kmd_executable_register_in_args *args;
 	struct pva_kmd_exec_register_out_args out_args = { 0 };
 	struct pva_cmd_update_resource_table *update_cmd;
 	struct pva_resource_entry entry = { 0 };
 	struct pva_kmd_resource_record *rec;
 	uint32_t num_symbols = 0;
 	void *exec_data;
 	uint32_t resource_id = 0;
 	if (!access_ok(out_buffer,
 		       sizeof(struct pva_kmd_exec_register_out_args))) {
 		return PVA_INVAL;
 	}
 	if (!access_ok(in_buffer,
 		       sizeof(struct pva_kmd_executable_register_in_args))) {
 		err = PVA_INVAL;
 		goto err_out;
 	}
 	args = read_data(in_buffer,
 			 sizeof(struct pva_kmd_executable_register_in_args));
 	if (!access_ok(in_buffer, args->size)) {
 		err = PVA_INVAL;
 		goto err_out;
 	}
 	exec_data = read_data(in_buffer, args->size);
 	err = pva_kmd_verify_exectuable_hash(ctx->pva, (uint8_t *)exec_data,
 					     args->size);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	pva_kmd_mutex_lock(&ctx->resource_table_lock);
 	err = pva_kmd_add_vpu_bin_resource(&ctx->ctx_resource_table, exec_data,
 					   args->size, &resource_id);
 	if (err == PVA_SUCCESS) {
 		rec = pva_kmd_use_resource(&ctx->ctx_resource_table,
 					   resource_id);
 		ASSERT(rec != NULL);
 		num_symbols = rec->vpu_bin.symbol_table.n_symbols;
 		pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id);
 	}
 	pva_kmd_mutex_unlock(&ctx->resource_table_lock);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	update_cmd =
 		pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd));
 	if (update_cmd == NULL) {
 		pva_kmd_log_err("Unable to reserve memory in command buffer");
 		err = PVA_NOMEM;
 		goto drop_resource;
 	}
 	ASSERT(update_cmd != NULL);
 	err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id,
 					  &entry);
 	ASSERT(err == PVA_SUCCESS);
 	pva_kmd_set_cmd_update_resource_table(
 		update_cmd, ctx->resource_table_id, resource_id, &entry);
 	out_args.error = PVA_SUCCESS;
 	out_args.resource_id = resource_id;
 	out_args.num_symbols = num_symbols;
 	write_data(out_buffer, &out_args, sizeof(out_args));
 	return err;
 drop_resource:
 	pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id);
 err_out:
 	out_args.error = err;
 	write_data(out_buffer, &out_args, sizeof(out_args));
 	return err;
 }
 static enum pva_error
 pva_kmd_op_dma_register_async(struct pva_kmd_context *ctx,
 			      struct pva_kmd_buffer *in_buffer,
 			      struct pva_kmd_buffer *out_buffer,
 			      struct pva_kmd_cmdbuf_builder *cmdbuf_builder)
 {
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_kmd_dma_config_register_in_args *args;
 	struct pva_kmd_register_out_args out_args = { 0 };
 	struct pva_cmd_update_resource_table *update_cmd;
 	struct pva_resource_entry entry = { 0 };
 	void *dma_cfg_data;
 	uint32_t dma_cfg_payload_size;
 	uint32_t resource_id = 0;
 	uint32_t dma_config_size = 0;
 	if (!access_ok(out_buffer, sizeof(struct pva_kmd_register_out_args))) {
 		return PVA_INVAL;
 	}
 	if (!access_ok(in_buffer,
 		       sizeof(struct pva_kmd_dma_config_register_in_args))) {
 		return PVA_INVAL;
 	}
 	args = read_data(in_buffer,
 			 sizeof(struct pva_kmd_dma_config_register_in_args));
 	dma_cfg_data = &args->dma_config_header;
 	dma_cfg_payload_size = in_buffer->size - in_buffer->offset;
 	// Discard the data we are about to pass to pva_kmd_add_dma_config_resource
 	read_data(in_buffer, dma_cfg_payload_size);
 	pva_kmd_mutex_lock(&ctx->resource_table_lock);
 	dma_config_size =
 		safe_addu32(dma_cfg_payload_size,
 			    (uint32_t)sizeof(args->dma_config_header));
 	err = pva_kmd_add_dma_config_resource(&ctx->ctx_resource_table,
 					      dma_cfg_data, dma_config_size,
 					      &resource_id);
 	pva_kmd_mutex_unlock(&ctx->resource_table_lock);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	update_cmd =
 		pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd));
 	if (update_cmd == NULL) {
 		err = PVA_NOMEM;
 		goto drop_dma_config;
 	}
 	err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id,
 					  &entry);
 	ASSERT(err == PVA_SUCCESS);
 	pva_kmd_set_cmd_update_resource_table(
 		update_cmd, ctx->resource_table_id, resource_id, &entry);
 	out_args.error = PVA_SUCCESS;
 	out_args.resource_id = resource_id;
 	write_data(out_buffer, &out_args, sizeof(out_args));
 	return PVA_SUCCESS;
 drop_dma_config:
 	pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id);
 err_out:
 	out_args.error = err;
 	write_data(out_buffer, &out_args, sizeof(out_args));
 	/* Error is reported in the output buffer. So we return success here.  */
 	return PVA_SUCCESS;
 }
 static enum pva_error
 pva_kmd_op_unregister_async(struct pva_kmd_context *ctx,
 			    struct pva_kmd_buffer *in_buffer,
 			    struct pva_kmd_buffer *out_buffer,
 			    struct pva_kmd_cmdbuf_builder *cmdbuf_builder)
 {
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_kmd_unregister_in_args *args;
 	struct pva_cmd_unregister_resource *unreg_cmd;
 	if (!access_ok(in_buffer, sizeof(struct pva_kmd_unregister_in_args))) {
 		err = PVA_INVAL;
 		goto err_out;
 	}
 	args = read_data(in_buffer, sizeof(struct pva_kmd_unregister_in_args));
 	unreg_cmd =
 		pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*unreg_cmd));
 	if (unreg_cmd == NULL) {
 		pva_kmd_log_err(
 			"Unable to reserve memory for unregister command");
 		err = PVA_NOMEM;
 		goto err_out;
 	}
 	pva_kmd_set_cmd_unregister_resource(unreg_cmd, args->resource_id);
 	return PVA_SUCCESS;
 err_out:
 	return err;
 }
 static enum pva_error pva_kmd_async_ops_handler(
 	struct pva_kmd_context *ctx, struct pva_fw_postfence *post_fence,
 	struct pva_kmd_buffer *in_arg, struct pva_kmd_buffer *out_arg)
 {
 	struct pva_kmd_cmdbuf_builder cmdbuf_builder;
 	enum pva_error err = PVA_SUCCESS;
 	uint32_t wait_time = 0;
 	//first check if we have space in queue
 	while (pva_kmd_queue_space(&ctx->ctx_queue) == 0) {
 		pva_kmd_sleep_us(PVA_KMD_WAIT_FW_POLL_INTERVAL_US);
 		wait_time += PVA_KMD_WAIT_FW_POLL_INTERVAL_US;
 		if (wait_time > PVA_KMD_WAIT_FW_TIMEOUT_US) {
 			err = PVA_TIMEDOUT;
 			goto out;
 		}
 	}
 	err = pva_kmd_submitter_prepare(&ctx->submitter, &cmdbuf_builder);
 	if (err != PVA_SUCCESS) {
 		goto out;
 	}
 	while (access_ok(in_arg, sizeof(struct pva_kmd_op_header))) {
 		struct pva_kmd_op_header *header =
 			read_data(in_arg, sizeof(struct pva_kmd_op_header));
 		if (header->op_type >= PVA_KMD_OP_MAX) {
 			err = PVA_INVAL;
 			goto out;
 		}
 		switch (header->op_type) {
 		case PVA_KMD_OP_MEMORY_REGISTER:
 			err = pva_kmd_op_memory_register_async(
 				ctx, in_arg, out_arg, &cmdbuf_builder);
 			break;
 		case PVA_KMD_OP_EXECUTABLE_REGISTER:
 			err = pva_kmd_op_executable_register_async(
 				ctx, in_arg, out_arg, &cmdbuf_builder);
 			break;
 		case PVA_KMD_OP_DMA_CONFIG_REGISTER:
 			err = pva_kmd_op_dma_register_async(
 				ctx, in_arg, out_arg, &cmdbuf_builder);
 			break;
 		case PVA_KMD_OP_UNREGISTER:
 			err = pva_kmd_op_unregister_async(ctx, in_arg, out_arg,
 							  &cmdbuf_builder);
 			break;
 		default:
 			err = PVA_INVAL;
 			break;
 		}
 		if (err != PVA_SUCCESS) {
 			break;
 		}
 	}
 	/* This fence comes from user, so set the flag to inform FW */
 	post_fence->flags |= PVA_FW_POSTFENCE_FLAGS_USER_FENCE;
 	err = pva_kmd_submitter_submit_with_fence(&ctx->submitter,
 						  &cmdbuf_builder, post_fence);
 	ASSERT(err == PVA_SUCCESS);
 out:
 	return err;
 }
 static enum pva_error pva_kmd_op_context_init(struct pva_kmd_context *ctx,
 					      struct pva_kmd_buffer *in_buffer,
 					      struct pva_kmd_buffer *out_buffer)
 {
 	struct pva_kmd_context_init_in_args *ctx_init_args;
 	struct pva_kmd_context_init_out_args ctx_init_out = { 0 };
 	enum pva_error err;
 	if (!access_ok(in_buffer,
 		       sizeof(struct pva_kmd_context_init_in_args))) {
 		return PVA_INVAL;
 	}
 	if (!access_ok(out_buffer,
 		       sizeof(struct pva_kmd_context_init_out_args))) {
 		return PVA_INVAL;
 	}
 	ctx_init_args = read_data(in_buffer,
 				  sizeof(struct pva_kmd_context_init_in_args));
 	err = pva_kmd_context_init(ctx, ctx_init_args->resource_table_capacity);
 	ctx_init_out.error = err;
 	ctx_init_out.ccq_shm_hdl = (uint64_t)ctx->ccq_shm_handle;
 	write_data(out_buffer, &ctx_init_out, sizeof(ctx_init_out));
 	return err;
 }
 static enum pva_error
 pva_kmd_op_syncpt_register_async(struct pva_kmd_context *ctx,
 				 struct pva_kmd_buffer *in_buffer,
 				 struct pva_kmd_buffer *out_buffer,
 				 struct pva_kmd_cmdbuf_builder *cmdbuf_builder)
 {
 	enum pva_error err;
 	struct pva_syncpt_rw_info *syncpts;
 	struct pva_kmd_device_memory dev_mem;
 	uint32_t resource_id = 0;
 	struct pva_cmd_update_resource_table *update_cmd;
 	struct pva_resource_entry entry = { 0 };
 	struct pva_kmd_syncpt_register_out_args syncpt_register_out = { 0 };
 	/* Register RO syncpts */
 	dev_mem.iova = ctx->pva->syncpt_ro_iova;
 	dev_mem.va = 0;
 	dev_mem.size = ctx->pva->syncpt_offset * ctx->pva->num_syncpts;
 	dev_mem.pva = ctx->pva;
 	dev_mem.smmu_ctx_idx = PVA_R5_SMMU_CONTEXT_ID;
 	pva_kmd_mutex_lock(&ctx->resource_table_lock);
 	err = pva_kmd_add_syncpt_resource(&ctx->ctx_resource_table, &dev_mem,
 					  &resource_id);
 	pva_kmd_mutex_unlock(&ctx->resource_table_lock);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	syncpt_register_out.syncpt_ro_res_id = resource_id;
 	syncpt_register_out.num_ro_syncpoints = ctx->pva->num_syncpts;
 	update_cmd =
 		pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd));
 	ASSERT(update_cmd != NULL);
 	err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id,
 					  &entry);
 	ASSERT(err == PVA_SUCCESS);
 	pva_kmd_set_cmd_update_resource_table(
 		update_cmd, ctx->resource_table_id, resource_id, &entry);
 	/* Register RW syncpts */
 	syncpts = (struct pva_syncpt_rw_info *)pva_kmd_get_block(
 		&ctx->pva->syncpt_allocator, ctx->syncpt_block_index);
 	ASSERT(syncpts != NULL);
 	for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS_PER_CONTEXT; i++) {
 		ctx->syncpt_ids[i] = syncpts[i].syncpt_id;
 		syncpt_register_out.synpt_ids[i] = syncpts[i].syncpt_id;
 	}
 	dev_mem.iova = syncpts[0].syncpt_iova;
 	dev_mem.va = 0;
 	dev_mem.size = ctx->pva->syncpt_offset * PVA_NUM_RW_SYNCPTS_PER_CONTEXT;
 	dev_mem.pva = ctx->pva;
 	dev_mem.smmu_ctx_idx = PVA_R5_SMMU_CONTEXT_ID;
 	pva_kmd_mutex_lock(&ctx->resource_table_lock);
 	err = pva_kmd_add_syncpt_resource(&ctx->ctx_resource_table, &dev_mem,
 					  &resource_id);
 	pva_kmd_mutex_unlock(&ctx->resource_table_lock);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	syncpt_register_out.syncpt_rw_res_id = resource_id;
 	syncpt_register_out.synpt_size = ctx->pva->syncpt_offset;
 	ctx->ctx_resource_table.syncpt_allocator = &ctx->pva->syncpt_allocator;
 	update_cmd =
 		pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd));
 	ASSERT(update_cmd != NULL);
 	err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id,
 					  &entry);
 	ASSERT(err == PVA_SUCCESS);
 	pva_kmd_set_cmd_update_resource_table(
 		update_cmd, ctx->resource_table_id, resource_id, &entry);
 err_out:
 	syncpt_register_out.error = err;
 	write_data(out_buffer, &syncpt_register_out,
 		   sizeof(syncpt_register_out));
 	return err;
 }
 static enum pva_error pva_kmd_op_queue_create(struct pva_kmd_context *ctx,
 					      struct pva_kmd_buffer *in_arg,
 					      struct pva_kmd_buffer *out_arg)
 {
 	struct pva_kmd_queue_create_in_args *queue_create_args;
 	struct pva_kmd_queue_create_out_args queue_out_args = { 0 };
 	uint32_t queue_id = PVA_INVALID_QUEUE_ID;
 	enum pva_error err = PVA_SUCCESS;
 	if (!access_ok(in_arg, sizeof(struct pva_kmd_queue_create_in_args))) {
 		return PVA_INVAL;
 	}
 	if (!access_ok(out_arg, sizeof(struct pva_kmd_queue_create_out_args))) {
 		return PVA_INVAL;
 	}
 	queue_create_args =
 		read_data(in_arg, sizeof(struct pva_kmd_queue_create_in_args));
 	queue_out_args.error =
 		pva_kmd_queue_create(ctx, queue_create_args, &queue_id);
 	if (queue_out_args.error == PVA_SUCCESS) {
 		queue_out_args.queue_id = queue_id;
 	}
 	if (queue_id >= PVA_MAX_NUM_QUEUES_PER_CONTEXT) {
 		pva_kmd_log_err("pva_kmd_op_queue_create invalid queue id");
 		err = PVA_INVAL;
 		goto err_out;
 	}
 	pva_kmd_read_syncpt_val(ctx->pva, ctx->syncpt_ids[queue_id],
 				&queue_out_args.syncpt_fence_counter);
 	write_data(out_arg, &queue_out_args,
 		   sizeof(struct pva_kmd_queue_create_out_args));
 err_out:
 	return err;
 }
 static enum pva_error pva_kmd_op_queue_destroy(struct pva_kmd_context *ctx,
 					       struct pva_kmd_buffer *in_arg,
 					       struct pva_kmd_buffer *out_arg)
 {
 	struct pva_kmd_queue_destroy_in_args *queue_destroy_args;
 	struct pva_kmd_queue_destroy_out_args queue_out_args = { 0 };
 	if (!access_ok(in_arg, sizeof(struct pva_kmd_queue_destroy_in_args))) {
 		return PVA_INVAL;
 	}
 	if (!access_ok(out_arg,
 		       sizeof(struct pva_kmd_queue_destroy_out_args))) {
 		return PVA_INVAL;
 	}
 	queue_destroy_args =
 		read_data(in_arg, sizeof(struct pva_kmd_queue_destroy_in_args));
 	queue_out_args.error = pva_kmd_queue_destroy(ctx, queue_destroy_args);
 	write_data(out_arg, &queue_out_args,
 		   sizeof(struct pva_kmd_queue_destroy_out_args));
 	return PVA_SUCCESS;
 }
 static enum pva_error
 pva_kmd_op_executable_get_symbols(struct pva_kmd_context *ctx,
 				  struct pva_kmd_buffer *in_arg,
 				  struct pva_kmd_buffer *out_arg)
 {
 	struct pva_kmd_executable_get_symbols_in_args *sym_in_args;
 	struct pva_kmd_executable_get_symbols_out_args sym_out_args = { 0 };
 	struct pva_kmd_resource_record *rec;
 	enum pva_error err = PVA_SUCCESS;
 	uint32_t table_size = 0;
 	uint32_t size = 0;
 	if (!access_ok(in_arg,
 		       sizeof(struct pva_kmd_executable_get_symbols_in_args))) {
 		return PVA_INVAL;
 	}
 	if (!access_ok(out_arg,
 		       sizeof(struct pva_kmd_executable_get_symbols_out_args))) {
 		return PVA_INVAL;
 	}
 	sym_in_args = read_data(
 		in_arg, sizeof(struct pva_kmd_executable_get_symbols_in_args));
 	rec = pva_kmd_use_resource(&ctx->ctx_resource_table,
 				   sym_in_args->exec_resource_id);
 	if (rec == NULL) {
 		err = PVA_INVAL;
 		pva_kmd_log_err("pva_kmd_use_resource failed");
 		goto err_out;
 	}
 	if (rec->type != PVA_RESOURCE_TYPE_EXEC_BIN) {
 		err = PVA_INVAL;
 		pva_kmd_log_err("Not an executable resource");
 		goto err_drop;
 	}
 	table_size = safe_mulu32(rec->vpu_bin.symbol_table.n_symbols,
 				 sizeof(struct pva_symbol_info));
 	size = safe_addu32(
 		table_size,
 		sizeof(struct pva_kmd_executable_get_symbols_out_args));
 	if (!access_ok(out_arg, size)) {
 		err = PVA_INVAL;
 		goto err_drop;
 	}
 	sym_out_args.error = err;
 	sym_out_args.num_symbols = rec->vpu_bin.symbol_table.n_symbols;
 	write_data(out_arg, &sym_out_args, sizeof(sym_out_args));
 	write_data(out_arg, rec->vpu_bin.symbol_table.symbols, table_size);
 	pva_kmd_drop_resource(&ctx->ctx_resource_table,
 			      sym_in_args->exec_resource_id);
 	return PVA_SUCCESS;
 err_drop:
 	pva_kmd_drop_resource(&ctx->ctx_resource_table,
 			      sym_in_args->exec_resource_id);
 err_out:
 	sym_out_args.error = err;
 	write_data(out_arg, &sym_out_args, sizeof(sym_out_args));
 	return err;
 }
 typedef enum pva_error (*pva_kmd_async_op_func_t)(
 	struct pva_kmd_context *ctx, struct pva_kmd_buffer *in_buffer,
 	struct pva_kmd_buffer *out_buffer,
 	struct pva_kmd_cmdbuf_builder *cmdbuf_builder);
 static enum pva_error
 pva_kmd_op_synced_submit(struct pva_kmd_context *ctx,
 			 struct pva_kmd_buffer *in_buffer,
 			 struct pva_kmd_buffer *out_buffer,
 			 pva_kmd_async_op_func_t async_op_func)
 {
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_kmd_cmdbuf_builder cmdbuf_builder;
 	uint32_t fence_val;
 	err = pva_kmd_submitter_prepare(&ctx->submitter, &cmdbuf_builder);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	err = async_op_func(ctx, in_buffer, out_buffer, &cmdbuf_builder);
 	if (err != PVA_SUCCESS) {
 		goto cancel_submit;
 	}
 	err = pva_kmd_submitter_submit(&ctx->submitter, &cmdbuf_builder,
 				       &fence_val);
 	/* TODO: handle this error */
 	ASSERT(err == PVA_SUCCESS);
 	err = pva_kmd_submitter_wait(&ctx->submitter, fence_val,
 				     PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 				     PVA_KMD_WAIT_FW_TIMEOUT_US);
 	/* TODO: handle this error when FW reboot is supported */
 	ASSERT(err == PVA_SUCCESS);
 	return PVA_SUCCESS;
 cancel_submit:
 	pva_kmd_cmdbuf_builder_cancel(&cmdbuf_builder);
 err_out:
 	return err;
 }
 static enum pva_error pva_kmd_sync_ops_handler(struct pva_kmd_context *ctx,
 					       struct pva_kmd_buffer *in_arg,
 					       struct pva_kmd_buffer *out_arg)
 {
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_kmd_op_header *header;
 	if (!access_ok(in_arg, sizeof(struct pva_kmd_op_header))) {
 		err = PVA_INVAL;
 		goto out;
 	}
 	header = read_data(in_arg, sizeof(struct pva_kmd_op_header));
 	switch (header->op_type) {
 	case PVA_KMD_OP_CONTEXT_INIT:
 		err = pva_kmd_op_context_init(ctx, in_arg, out_arg);
 		break;
 	case PVA_KMD_OP_QUEUE_CREATE:
 		err = pva_kmd_op_queue_create(ctx, in_arg, out_arg);
 		break;
 	case PVA_KMD_OP_QUEUE_DESTROY:
 		err = pva_kmd_op_queue_destroy(ctx, in_arg, out_arg);
 		break;
 	case PVA_KMD_OP_EXECUTABLE_GET_SYMBOLS:
 		err = pva_kmd_op_executable_get_symbols(ctx, in_arg, out_arg);
 		break;
 	case PVA_KMD_OP_MEMORY_REGISTER:
 		err = pva_kmd_op_synced_submit(
 			ctx, in_arg, out_arg, pva_kmd_op_memory_register_async);
 		break;
 	case PVA_KMD_OP_SYNPT_REGISTER:
 		err = pva_kmd_op_synced_submit(
 			ctx, in_arg, out_arg, pva_kmd_op_syncpt_register_async);
 		break;
 	case PVA_KMD_OP_EXECUTABLE_REGISTER:
 		err = pva_kmd_op_synced_submit(
 			ctx, in_arg, out_arg,
 			pva_kmd_op_executable_register_async);
 		break;
 	case PVA_KMD_OP_DMA_CONFIG_REGISTER:
 		err = pva_kmd_op_synced_submit(ctx, in_arg, out_arg,
 					       pva_kmd_op_dma_register_async);
 		break;
 	case PVA_KMD_OP_UNREGISTER:
 		err = pva_kmd_op_synced_submit(ctx, in_arg, out_arg,
 					       pva_kmd_op_unregister_async);
 		break;
 	default:
 		err = PVA_INVAL;
 		break;
 	}
 out:
 	return err;
 }
 enum pva_error pva_kmd_ops_handler(struct pva_kmd_context *ctx,
 				   void const *ops_buffer, uint32_t ops_size,
 				   void *response,
 				   uint32_t response_buffer_size,
 				   uint32_t *out_response_size)
 {
 	struct pva_kmd_operations *ops;
 	struct pva_kmd_buffer in_buffer = { 0 }, out_buffer = { 0 };
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_kmd_response_header *resp_hdr;
 	in_buffer.base = ops_buffer;
 	in_buffer.size = ops_size;
 	out_buffer.base = response;
 	out_buffer.size = response_buffer_size;
 	if (!access_ok(&in_buffer, sizeof(struct pva_kmd_operations))) {
 		err = PVA_INVAL;
 		goto out;
 	}
 	if (!access_ok(&out_buffer, sizeof(struct pva_kmd_response_header))) {
 		err = PVA_INVAL;
 		goto out;
 	}
 	resp_hdr =
 		read_data(&out_buffer, sizeof(struct pva_kmd_response_header));
 	ops = read_data(&in_buffer, sizeof(struct pva_kmd_operations));
 	if (ops->mode == PVA_KMD_OPS_MODE_SYNC) {
 		/* Process one sync operation */
 		err = pva_kmd_sync_ops_handler(ctx, &in_buffer, &out_buffer);
 	} else {
 		/* Process async operations:
 		 * - memory register
 		 * - executable register
 		 * - DMA configuration registration
 		 * - unregister
 		 */
 		err = pva_kmd_async_ops_handler(ctx, &ops->postfence,
 						&in_buffer, &out_buffer);
 	}
 	//Update the size of the responses in the response header.
 	// This size also include the header size.
 	resp_hdr->rep_size = out_buffer.offset;
 out:
 	*out_response_size = out_buffer.offset;
 	return err;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.h
@@ -0,0 +1,44 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_OP_HANDLER_H
 #define PVA_KMD_OP_HANDLER_H
 #include "pva_kmd_context.h"
 #include "pva_fw.h"
 #include "pva_kmd.h"
 /** @brief Handler for PVA KMD operations.
 *
 * This function implements the only runtime interface with UMD. Shim layers
 * receive the input data from UMD and call this function to execute the
 * operations. Then, shim layers send the response back to UMD.
 *
 * @param ctx The KMD context.
 * @param ops Pointer to the input buffer containing the operations to be
 *            executed. The common layer assumes that this buffer is private to
 *            KMD and will dereference it directly without making a copy.
 *            Specifically on Linux, this parameter should point to a private
 *            kernel space buffer instead of the user space buffer.
 * @param ops_size Size of the input buffer.
 * @param response Pointer to the buffer where the response will be written.
 * @param response_buffer_size Size of the response buffer.
 * @param out_response_size Pointer to a variable where the actual size of the
 *                          response will be written.
 *
 * @return pva_error indicating the success or failure of the operation.
 */
 enum pva_error pva_kmd_ops_handler(struct pva_kmd_context *ctx, void const *ops,
 				   uint32_t ops_size, void *response,
 				   uint32_t response_buffer_size,
 				   uint32_t *out_response_size);
 #endif // PVA_KMD_OP_HANDLER_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.c
@@ -0,0 +1,210 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_utils.h"
 #include "pva_fw.h"
 #include "pva_kmd_device_memory.h"
 #include "pva_kmd_device.h"
 #include "pva_kmd_queue.h"
 #include "pva_kmd_context.h"
 #include "pva_kmd_block_allocator.h"
 #include "pva_utils.h"
 #include "pva_kmd_device.h"
 #include "pva_kmd_constants.h"
 #include "pva_kmd_pm.h"
 enum pva_error pva_kmd_prepare_suspend(struct pva_kmd_device *pva)
 {
 	struct pva_kmd_cmdbuf_builder builder;
 	struct pva_kmd_submitter *dev_submitter = &pva->submitter;
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_cmd_suspend_fw *fw_suspend;
 	uint32_t fence_val;
 	pva_kmd_mutex_lock(&pva->powercycle_lock);
 	if (pva->refcount == 0u) {
 		pva_dbg_printf("PVA: Nothing to prepare for suspend");
 		err = PVA_SUCCESS;
 		goto err_out;
 	}
 	err = pva_kmd_submitter_prepare(dev_submitter, &builder);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
 			"PVA: Prepare submitter for FW suspend command failed\n");
 		goto err_out;
 	}
 	//Build args
 	fw_suspend = pva_kmd_reserve_cmd_space(&builder, sizeof(*fw_suspend));
 	if (fw_suspend == NULL) {
 		pva_kmd_log_err(
 			"PVA: Memory alloc for FW suspend command failed\n");
 		err = PVA_NOMEM;
 		goto cancel_submit;
 	}
 	pva_kmd_set_cmd_suspend_fw(fw_suspend);
 	//Submit
 	err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
 			"PVA: Submission for FW suspend command failed\n");
 		goto cancel_submit;
 	}
 	err = pva_kmd_submitter_wait(dev_submitter, fence_val,
 				     PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 				     PVA_KMD_WAIT_FW_TIMEOUT_US);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
 			"PVA: Waiting for FW timed out when preparing for suspend state\n");
 		goto err_out;
 	}
 cancel_submit:
 	pva_kmd_cmdbuf_builder_cancel(&builder);
 err_out:
 	pva_kmd_mutex_unlock(&pva->powercycle_lock);
 	return err;
 }
 enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva)
 {
 	struct pva_kmd_cmdbuf_builder builder;
 	struct pva_kmd_submitter *dev_submitter = &pva->submitter;
 	struct pva_cmd_init_resource_table *res_cmd;
 	struct pva_cmd_init_queue *queue_cmd;
 	struct pva_cmd_resume_fw *fw_resume;
 	enum pva_error err;
 	uint32_t fence_val;
 	struct pva_kmd_queue *queue;
 	pva_kmd_mutex_lock(&pva->powercycle_lock);
 	if (pva->refcount == 0u) {
 		pva_dbg_printf(
 			"PVA : Nothing to check for completion in resume");
 		err = PVA_SUCCESS;
 		goto err_out;
 	}
 	pva_kmd_send_resource_table_info_by_ccq(pva, &pva->dev_resource_table);
 	pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue);
 	err = pva_kmd_submitter_prepare(dev_submitter, &builder);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
 			"PVA: Prepare submitter for FW resume command failed\n");
 		goto err_out;
 	}
 	fw_resume = pva_kmd_reserve_cmd_space(&builder, sizeof(*fw_resume));
 	if (fw_resume == NULL) {
 		pva_kmd_log_err(
 			"PVA: Memory alloc for FW resume command failed\n");
 		err = PVA_NOMEM;
 		goto cancel_builder;
 	}
 	pva_kmd_set_cmd_resume_fw(fw_resume);
 	for (uint8_t i = 0; i < pva->max_n_contexts; i++) {
 		struct pva_kmd_context *ctx = pva_kmd_get_context(
 			pva, sat_add8(i, PVA_KMD_USER_CONTEXT_ID_BASE));
 		if (ctx != NULL) {
 			/**Initialize resource table */
 			res_cmd = pva_kmd_reserve_cmd_space(&builder,
 							    sizeof(*res_cmd));
 			if (res_cmd == NULL) {
 				pva_kmd_log_err(
 					"PVA: Memory alloc for context registration in FW resume command failed\n");
 				err = PVA_NOMEM;
 				goto cancel_builder;
 			}
 			pva_dbg_printf(
 				"PVA: Resume init resource table for context %d\n",
 				ctx->ccq_id);
 			pva_kmd_set_cmd_init_resource_table(
 				res_cmd, ctx->resource_table_id,
 				ctx->ctx_resource_table.table_mem->iova,
 				ctx->ctx_resource_table.n_entries);
 			queue_cmd = pva_kmd_reserve_cmd_space(
 				&builder, sizeof(*queue_cmd));
 			if (queue_cmd == NULL) {
 				pva_kmd_log_err(
 					"PVA: Memory alloc for queue registration in FW resume command failed\n");
 				err = PVA_NOMEM;
 				goto cancel_builder;
 			}
 			pva_dbg_printf(
 				"PVA: Resume priv queue for context %d\n",
 				ctx->ccq_id);
 			pva_kmd_set_cmd_init_queue(
 				queue_cmd, PVA_PRIV_CCQ_ID,
 				ctx->ccq_id, /* For privileged queues, queue ID == user CCQ ID*/
 				ctx->ctx_queue.queue_memory->iova,
 				ctx->ctx_queue.max_num_submit);
 			/**Initialize resource table */
 			for (uint32_t j = 0; j < ctx->max_n_queues; j++) {
 				queue = pva_kmd_get_block(&ctx->queue_allocator,
 							  j);
 				if (queue != NULL) {
 					pva_dbg_printf(
 						"PVA: Resume queue for context %d, queue %d\n",
 						queue->ccq_id, queue->queue_id);
 					queue_cmd = pva_kmd_reserve_cmd_space(
 						&builder, sizeof(*queue_cmd));
 					if (queue_cmd == NULL) {
 						pva_kmd_log_err(
 							"PVA: Memory alloc for queue registration in FW resume command failed\n");
 						err = PVA_NOMEM;
 						goto cancel_builder;
 					}
 					pva_kmd_set_cmd_init_queue(
 						queue_cmd, queue->ccq_id,
 						queue->queue_id,
 						queue->queue_memory->iova,
 						queue->max_num_submit);
 				}
 			}
 		}
 	}
 	err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
 	if (err != PVA_SUCCESS) {
 		// Error is either QUEUE_FULL or TIMEDOUT
 		pva_kmd_log_err(
 			"PVA: Submission for FW resume command failed\n");
 		goto cancel_builder;
 	}
 	err = pva_kmd_submitter_wait(dev_submitter, fence_val,
 				     PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 				     PVA_KMD_WAIT_FW_TIMEOUT_US);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
 			"Waiting for FW timed out when resuming from suspend state");
 		goto err_out;
 	}
 cancel_builder:
 	pva_kmd_cmdbuf_builder_cancel(&builder);
 err_out:
 	pva_kmd_mutex_unlock(&pva->powercycle_lock);
 	return err;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.h
@@ -0,0 +1,19 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_PM_H
 #define PVA_KMD_PM_H
 struct pva_kmd_device;
 enum pva_error pva_kmd_prepare_suspend(struct pva_kmd_device *pva);
 enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva);
 #endif
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c
@@ -0,0 +1,252 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_utils.h"
 #include "pva_fw.h"
 #include "pva_kmd_device_memory.h"
 #include "pva_kmd_queue.h"
 #include "pva_kmd_context.h"
 #include "pva_kmd_block_allocator.h"
 #include "pva_utils.h"
 #include "pva_kmd_device.h"
 #include "pva_kmd_constants.h"
 void pva_kmd_queue_init(struct pva_kmd_queue *queue, struct pva_kmd_device *pva,
 			uint8_t ccq_id, uint8_t queue_id,
 			pva_kmd_mutex_t *ccq_lock,
 			struct pva_kmd_device_memory *queue_memory,
 			uint32_t max_num_submit)
 {
 	queue->pva = pva;
 	queue->queue_memory = queue_memory;
 	queue->ccq_id = ccq_id;
 	queue->queue_id = queue_id;
 	queue->max_num_submit = max_num_submit;
 	queue->queue_header = queue_memory->va;
 	queue->ccq_lock = ccq_lock;
 }
 uint32_t pva_kmd_queue_space(struct pva_kmd_queue *queue)
 {
 	uint32_t head = queue->queue_header->cb_head;
 	uint32_t tail = queue->queue_header->cb_tail;
 	uint32_t size = queue->max_num_submit;
 	return pva_fw_queue_space(head, tail, size);
 }
 enum pva_error
 pva_kmd_queue_submit(struct pva_kmd_queue *queue,
 		     struct pva_fw_cmdbuf_submit_info const *submit_info)
 {
 	uint32_t head = queue->queue_header->cb_head;
 	uint32_t tail = queue->queue_header->cb_tail;
 	uint32_t size = queue->max_num_submit;
 	uint64_t ccq_entry;
 	enum pva_error err;
 	struct pva_fw_cmdbuf_submit_info *items = pva_offset_pointer(
 		queue->queue_header, sizeof(*queue->queue_header));
 	if (pva_fw_queue_space(head, tail, size) == 0) {
 		return PVA_QUEUE_FULL;
 	}
 	items[tail] = *submit_info;
 	/* Update tail  */
 	tail = wrap_add(tail, 1, size);
 	ccq_entry =
 		PVA_INSERT64(PVA_FW_CCQ_OP_UPDATE_TAIL, PVA_FW_CCQ_OPCODE_MSB,
 			     PVA_FW_CCQ_OPCODE_LSB) |
 		PVA_INSERT64(queue->queue_id, PVA_FW_CCQ_QUEUE_ID_MSB,
 			     PVA_FW_CCQ_QUEUE_ID_LSB) |
 		PVA_INSERT64(tail, PVA_FW_CCQ_TAIL_MSB, PVA_FW_CCQ_TAIL_LSB);
 	pva_kmd_mutex_lock(queue->ccq_lock);
 	/* TODO: memory write barrier is needed here */
 	err = pva_kmd_ccq_push_with_timeout(queue->pva, queue->ccq_id,
 					    ccq_entry,
 					    PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 					    PVA_KMD_WAIT_FW_TIMEOUT_US);
 	if (err == PVA_SUCCESS) {
 		queue->queue_header->cb_tail = tail;
 	}
 	pva_kmd_mutex_unlock(queue->ccq_lock);
 	return err;
 }
 void pva_kmd_queue_deinit(struct pva_kmd_queue *queue)
 {
 	queue->queue_memory = NULL;
 	queue->ccq_id = PVA_INVALID_QUEUE_ID;
 	queue->max_num_submit = 0;
 }
 static enum pva_error notify_fw_queue_deinit(struct pva_kmd_context *ctx,
 					     struct pva_kmd_queue *queue)
 {
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_kmd_cmdbuf_builder builder;
 	struct pva_cmd_deinit_queue *queue_cmd;
 	uint32_t fence_val;
 	err = pva_kmd_submitter_prepare(&ctx->submitter, &builder);
 	if (err != PVA_SUCCESS) {
 		goto end;
 	}
 	queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd));
 	if (queue_cmd == NULL) {
 		err = PVA_NOMEM;
 		goto cancel_submitter;
 	}
 	pva_kmd_set_cmd_deinit_queue(queue_cmd, queue->ccq_id, queue->queue_id);
 	err = pva_kmd_submitter_submit(&ctx->submitter, &builder, &fence_val);
 	if (err != PVA_SUCCESS) {
 		goto cancel_submitter;
 	}
 	err = pva_kmd_submitter_wait(&ctx->submitter, fence_val,
 				     PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 				     PVA_KMD_WAIT_FW_TIMEOUT_US);
 	ASSERT(err == PVA_SUCCESS);
 	return PVA_SUCCESS;
 cancel_submitter:
 	pva_kmd_cmdbuf_builder_cancel(&builder);
 end:
 	return err;
 }
 enum pva_error
 pva_kmd_queue_create(struct pva_kmd_context *ctx,
 		     struct pva_kmd_queue_create_in_args *in_args,
 		     uint32_t *queue_id)
 {
 	struct pva_kmd_device_memory *submission_mem_kmd = NULL;
 	struct pva_kmd_queue *queue = NULL;
 	struct pva_kmd_cmdbuf_builder builder;
 	struct pva_cmd_init_queue *queue_cmd;
 	uint32_t fence_val;
 	enum pva_error err, tmperr;
 	queue = pva_kmd_zalloc_block(&ctx->queue_allocator, queue_id);
 	if (queue == NULL) {
 		err = PVA_NOMEM;
 		goto err_out;
 	}
 	/* Get handle from mapped memory */
 	submission_mem_kmd = pva_kmd_device_memory_acquire(
 		in_args->queue_memory_handle, in_args->queue_memory_offset,
 		pva_get_submission_queue_memory_size(
 			in_args->max_submission_count),
 		ctx);
 	if (submission_mem_kmd == NULL) {
 		err = PVA_INVAL;
 		goto err_free_queue;
 	}
 	pva_kmd_queue_init(queue, ctx->pva, ctx->ccq_id, *queue_id,
 			   &ctx->ccq_lock, submission_mem_kmd,
 			   in_args->max_submission_count);
 	/* Get device mapped IOVA to share with FW */
 	err = pva_kmd_device_memory_iova_map(submission_mem_kmd, ctx->pva,
 					     PVA_ACCESS_RW,
 					     PVA_R5_SMMU_CONTEXT_ID);
 	if (err != PVA_SUCCESS) {
 		goto err_free_kmd_memory;
 	}
 	if (submission_mem_kmd->iova < FW_SHARED_MEMORY_START) {
 		pva_kmd_log_err(
 			"Not able to map memory in the R5 shared region");
 		err = PVA_NOMEM;
 		goto unmap_iova;
 	}
 	err = pva_kmd_submitter_prepare(&ctx->submitter, &builder);
 	if (err != PVA_SUCCESS) {
 		goto unmap_iova;
 	}
 	queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd));
 	if (queue_cmd == NULL) {
 		err = PVA_NOMEM;
 		goto cancel_submitter;
 	}
 	ASSERT(queue_cmd != NULL);
 	pva_kmd_set_cmd_init_queue(queue_cmd, queue->ccq_id, queue->queue_id,
 				   queue->queue_memory->iova,
 				   queue->max_num_submit);
 	err = pva_kmd_submitter_submit(&ctx->submitter, &builder, &fence_val);
 	if (err != PVA_SUCCESS) {
 		goto cancel_submitter;
 	}
 	err = pva_kmd_submitter_wait(&ctx->submitter, fence_val,
 				     PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 				     PVA_KMD_WAIT_FW_TIMEOUT_US);
 	if (err != PVA_SUCCESS) {
 		goto cancel_submitter;
 	}
 	return PVA_SUCCESS;
 cancel_submitter:
 	pva_kmd_cmdbuf_builder_cancel(&builder);
 unmap_iova:
 	pva_kmd_device_memory_iova_unmap(submission_mem_kmd);
 err_free_kmd_memory:
 	pva_kmd_device_memory_free(queue->queue_memory);
 	pva_kmd_queue_deinit(queue);
 err_free_queue:
 	tmperr = pva_kmd_free_block(&ctx->queue_allocator, *queue_id);
 	ASSERT(tmperr == PVA_SUCCESS);
 	*queue_id = PVA_INVALID_QUEUE_ID;
 err_out:
 	return err;
 }
 enum pva_error
 pva_kmd_queue_destroy(struct pva_kmd_context *ctx,
 		      struct pva_kmd_queue_destroy_in_args *in_args)
 {
 	struct pva_kmd_queue *queue;
 	enum pva_error err = PVA_SUCCESS;
 	/*
 	 * TODO :
 	 * Send command to FW to stop queue usage. Wait for ack.
 	 * This call needs to be added after syncpoint and ccq functions are ready.
 	 */
 	queue = pva_kmd_get_block(&ctx->queue_allocator, in_args->queue_id);
 	if (queue == NULL) {
 		return PVA_INVAL;
 	}
 	err = notify_fw_queue_deinit(ctx, queue);
 	if (err != PVA_SUCCESS) {
 		return err;
 	}
 	pva_kmd_device_memory_iova_unmap(queue->queue_memory);
 	pva_kmd_device_memory_free(queue->queue_memory);
 	pva_kmd_queue_deinit(queue);
 	err = pva_kmd_free_block(&ctx->queue_allocator, in_args->queue_id);
 	ASSERT(err == PVA_SUCCESS);
 	return PVA_SUCCESS;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h
@@ -0,0 +1,48 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_QUEUE_H
 #define PVA_KMD_QUEUE_H
 #include "pva_fw.h"
 #include "pva_kmd_device_memory.h"
 #include "pva_kmd_mutex.h"
 struct pva_kmd_queue {
 	struct pva_kmd_device *pva;
 	struct pva_kmd_device_memory *queue_memory;
 	struct pva_fw_submit_queue_header *queue_header;
 	pva_kmd_mutex_t *ccq_lock;
 	uint8_t ccq_id;
 	uint8_t queue_id;
 	uint32_t max_num_submit;
 };
 void pva_kmd_queue_init(struct pva_kmd_queue *queue, struct pva_kmd_device *pva,
 			uint8_t ccq_id, uint8_t queue_id,
 			pva_kmd_mutex_t *ccq_lock,
 			struct pva_kmd_device_memory *queue_memory,
 			uint32_t max_num_submit);
 enum pva_error
 pva_kmd_queue_create(struct pva_kmd_context *ctx,
 		     struct pva_kmd_queue_create_in_args *in_args,
 		     uint32_t *queue_id);
 enum pva_error
 pva_kmd_queue_destroy(struct pva_kmd_context *ctx,
 		      struct pva_kmd_queue_destroy_in_args *in_args);
 enum pva_error
 pva_kmd_queue_submit(struct pva_kmd_queue *queue,
 		     struct pva_fw_cmdbuf_submit_info const *submit_info);
 uint32_t pva_kmd_queue_space(struct pva_kmd_queue *queue);
 void pva_kmd_queue_deinit(struct pva_kmd_queue *queue);
 #endif // PVA_KMD_QUEUE_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_regs.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_regs.h
@@ -0,0 +1,135 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_REGS_H
 #define PVA_KMD_REGS_H
 #include "pva_api.h"
 #include "pva_constants.h"
 /* Exception vectors */
 #define PVA_REG_EVP_RESET_ADDR 0x20
 #define PVA_REG_EVP_UNDEF_ADDR 0x24
 #define PVA_REG_EVP_SWI_ADDR 0x28
 #define PVA_REG_EVP_PREFETCH_ABORT_ADDR 0x2c
 #define PVA_REG_EVP_DATA_ABORT_ADDR 0x30
 #define PVA_REG_EVP_RSVD_ADDR 0x34
 #define PVA_REG_EVP_IRQ_ADDR 0x38
 #define PVA_REG_EVP_FIQ_ADDR 0x3c
 /* R5 */
 #define PVA_REG_PROC_CPUHALT_ADDR 0x30000
 /* SCRs */
 #define PVA_SEC_SCR_SECEXT_INTR_EVENT 0x28804
 #define PVA_PROC_SCR_PROC 0x30800
 #define PVA_REG_EVP_SCR_ADDR 0x40 //PVA_EVP_SCR_EVP_0
 #define PVA_CFG_SCR_STATUS_CNTL 0x258000 //PVA_CFG_SCR_STATUS_CNTL_0
 #define PVA_CFG_SCR_PRIV 0x258008 //PVA_CFG_SCR_PRIV_0
 #define PVA_CFG_SCR_CCQ_CNTL 0x258010 //PVA_CFG_SCR_CCQ_CNTL_0
 /* HSP */
 #define PVA_REG_HSP_COMMON_ADDR 0x160000
 #define PVA_REG_HSP_INT_IE0_ADDR 0x160100
 #define PVA_REG_HSP_INT_IE1_ADDR 0x160104
 #define PVA_REG_HSP_INT_IE2_ADDR 0x160108
 #define PVA_REG_HSP_INT_IE3_ADDR 0x16010c
 #define PVA_REG_HSP_INT_IE4_ADDR 0x160110
 #define PVA_REG_HSP_INT_EXTERNAL_ADDR 0x160300
 #define PVA_REG_HSP_INT_INTERNAL_ADDR 0x160304
 #define PVA_REG_HSP_SM0_ADDR 0x170000
 #define PVA_REG_HSP_SM1_ADDR 0x178000
 #define PVA_REG_HSP_SM2_ADDR 0x180000
 #define PVA_REG_HSP_SM3_ADDR 0x188000
 #define PVA_REG_HSP_SM4_ADDR 0x190000
 #define PVA_REG_HSP_SM5_ADDR 0x198000
 #define PVA_REG_HSP_SM6_ADDR 0x1a0000
 #define PVA_REG_HSP_SM7_ADDR 0x1a8000
 #define PVA_REG_HSP_SS0_STATE_ADDR 0x1b0000
 #define PVA_REG_HSP_SS0_SET_ADDR 0x1b0004
 #define PVA_REG_HSP_SS0_CLR_ADDR 0x1b0008
 #define PVA_REG_HSP_SS1_STATE_ADDR 0x1c0000
 #define PVA_REG_HSP_SS1_SET_ADDR 0x1c0004
 #define PVA_REG_HSP_SS1_CLR_ADDR 0x1c0008
 #define PVA_REG_HSP_SS2_STATE_ADDR 0x1d0000
 #define PVA_REG_HSP_SS2_SET_ADDR 0x1d0004
 #define PVA_REG_HSP_SS2_CLR_ADDR 0x1d0008
 #define PVA_REG_HSP_SS3_STATE_ADDR 0x1e0000
 #define PVA_REG_HSP_SS3_SET_ADDR 0x1e0004
 #define PVA_REG_HSP_SS3_CLR_ADDR 0x1e0008
 /* SEC */
 #define PVA_REG_SEC_ERRSLICE0_MISSIONERR_ENABLE_ADDR 0x20030
 #define PVA_REG_SEC_ERRSLICE1_MISSIONERR_ENABLE_ADDR 0x20060
 #define PVA_REG_SEC_ERRSLICE2_MISSIONERR_ENABLE_ADDR 0x20090
 #define PVA_REG_SEC_ERRSLICE3_MISSIONERR_ENABLE_ADDR 0x200c0
 #define PVA_REG_SEC_ERRSLICE0_LATENTERR_ENABLE_ADDR 0x20040
 #define PVA_REG_SEC_ERRSLICE1_LATENTERR_ENABLE_ADDR 0x20070
 #define PVA_REG_SEC_ERRSLICE2_LATENTERR_ENABLE_ADDR 0x200a0
 #define PVA_REG_SEC_ERRSLICE3_LATENTERR_ENABLE_ADDR 0x200d0
 /* SEC_LIC_INTR_STATUS */
 #define PVA_REG_SEC_LIC_INTR_H1X_MSB 7
 #define PVA_REG_SEC_LIC_INTR_H1X_LSB 5
 #define PVA_REG_SEC_LIC_INTR_HSP_MSB 4
 #define PVA_REG_SEC_LIC_INTR_HSP_LSB 1
 #define PVA_REG_SEC_LIC_INTR_WDT_MSB 0
 #define PVA_REG_SEC_LIC_INTR_WDT_LSB 0
 /* CCQ status 2  */
 #define PVA_REG_CCQ_STATUS2_INTR_OVERFLOW_BIT PVA_BIT(28)
 #define PVA_REG_CCQ_STATUS2_INTR_STATUS8_BIT PVA_BIT(24)
 #define PVA_REG_CCQ_STATUS2_INTR_STATUS7_BIT PVA_BIT(20)
 #define PVA_REG_CCQ_STATUS2_INTR_ALL_BITS                                      \
 	(PVA_REG_CCQ_STATUS2_INTR_OVERFLOW_BIT |                               \
 	 PVA_REG_CCQ_STATUS2_INTR_STATUS8_BIT |                                \
 	 PVA_REG_CCQ_STATUS2_INTR_STATUS7_BIT)
 #define PVA_REG_CCQ_STATUS2_NUM_ENTRIES_MSB 4
 #define PVA_REG_CCQ_STATUS2_NUM_ENTRIES_LSB 0
 struct pva_kmd_ccq_regspec {
 	uint32_t status_count;
 	uint32_t status[PVA_CFG_CCQ_STATUS_COUNT];
 	uint32_t fifo;
 };
 struct pva_kmd_regspec {
 	uint32_t sec_lic_intr_enable;
 	uint32_t sec_lic_intr_status;
 	uint32_t cfg_r5user_lsegreg;
 	uint32_t cfg_r5user_usegreg;
 	uint32_t cfg_priv_ar1_lsegreg;
 	uint32_t cfg_priv_ar1_usegreg;
 	uint32_t cfg_priv_ar2_lsegreg;
 	uint32_t cfg_priv_ar2_usegreg;
 	uint32_t cfg_priv_ar1_start;
 	uint32_t cfg_priv_ar1_end;
 	uint32_t cfg_priv_ar2_start;
 	uint32_t cfg_priv_ar2_end;
 	uint32_t cfg_user_sid_base;
 	uint32_t cfg_priv_sid;
 	uint32_t cfg_vps_sid;
 	uint32_t cfg_perf_mon;
 	uint32_t cfg_scr_priv_0;
 	uint32_t ccq_count;
 	uint32_t vpu_dbg_instr_reg_offset[PVA_NUM_ENGINES];
 	struct pva_kmd_ccq_regspec ccq_regs[PVA_MAX_NUM_CCQ];
 };
 enum pva_kmd_reg_aperture {
 	PVA_KMD_APERTURE_PVA_CLUSTER = 0,
 	PVA_KMD_APERTURE_VPU_DEBUG,
 	PVA_KMD_APERTURE_COUNT,
 };
 #endif // PVA_KMD_REGS_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c
@@ -0,0 +1,477 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_resource_table.h"
 #include "pva_kmd_device.h"
 #include "pva_kmd_constants.h"
 static uint32_t get_max_dma_config_size(struct pva_kmd_device *pva)
 {
 	uint32_t max_num_dyn_slots = PVA_DMA_MAX_NUM_SLOTS;
 	uint32_t max_num_reloc_infos =
 		safe_pow2_roundup_u32(max_num_dyn_slots, 2U);
 	uint32_t max_dma_cfg_size =
 		(uint32_t)sizeof(struct pva_dma_config_resource);
 	max_dma_cfg_size = safe_addu32(
 		max_dma_cfg_size,
 		safe_mulu32(max_num_dyn_slots,
 			    (uint32_t)sizeof(struct pva_fw_dma_slot)));
 	max_dma_cfg_size = safe_addu32(
 		max_dma_cfg_size,
 		safe_mulu32(max_num_reloc_infos,
 			    (uint32_t)sizeof(struct pva_fw_dma_reloc)));
 	max_dma_cfg_size = safe_addu32(
 		max_dma_cfg_size,
 		safe_mulu32(pva->hw_consts.n_user_dma_channels,
 			    (uint32_t)sizeof(struct pva_dma_channel)));
 	max_dma_cfg_size = safe_addu32(
 		max_dma_cfg_size,
 		safe_mulu32(pva->hw_consts.n_dma_descriptors,
 			    (uint32_t)sizeof(struct pva_dma_descriptor)));
 	max_dma_cfg_size = safe_addu32(max_dma_cfg_size,
 				       safe_mulu32(pva->hw_consts.n_hwseq_words,
 						   (uint32_t)sizeof(uint32_t)));
 	//Must be aligned to 8 to form array
 	return safe_pow2_roundup_u32(max_dma_cfg_size,
 				     (uint32_t)sizeof(uint64_t));
 }
 enum pva_error
 pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table,
 			    struct pva_kmd_device *pva,
 			    uint8_t user_smmu_ctx_id, uint32_t n_entries,
 			    uint32_t max_num_dma_configs)
 {
 	uint32_t max_dma_config_size = get_max_dma_config_size(pva);
 	enum pva_error err;
 	uint64_t size;
 	res_table->pva = pva;
 	res_table->n_entries = n_entries;
 	res_table->user_smmu_ctx_id = user_smmu_ctx_id;
 	size = (uint64_t)safe_mulu32(
 		n_entries, (uint32_t)sizeof(struct pva_resource_entry));
 	res_table->table_mem = pva_kmd_device_memory_alloc_map(
 		size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
 	ASSERT(res_table->table_mem != NULL);
 	pva_kmd_sema_init(&res_table->resource_semaphore, n_entries);
 	size = (uint64_t)safe_mulu32(sizeof(struct pva_kmd_resource_record),
 				     n_entries);
 	res_table->records_mem = pva_kmd_zalloc(size);
 	ASSERT(res_table->records_mem != NULL);
 	err = pva_kmd_block_allocator_init(
 		&res_table->resource_record_allocator, res_table->records_mem,
 		PVA_RESOURCE_ID_BASE, sizeof(struct pva_kmd_resource_record),
 		n_entries);
 	ASSERT(err == PVA_SUCCESS);
 	size = (uint64_t)safe_mulu32(max_num_dma_configs, max_dma_config_size);
 	res_table->dma_config_mem = pva_kmd_device_memory_alloc_map(
 		size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
 	ASSERT(res_table->dma_config_mem != NULL);
 	err = pva_kmd_block_allocator_init(&res_table->dma_config_allocator,
 					   res_table->dma_config_mem->va, 0,
 					   max_dma_config_size,
 					   max_num_dma_configs);
 	ASSERT(err == PVA_SUCCESS);
 	res_table->dma_aux = pva_kmd_zalloc(
 		safe_mulu32((uint32_t)sizeof(struct pva_kmd_dma_resource_aux),
 			    max_num_dma_configs));
 	ASSERT(res_table->dma_aux != NULL);
 	return PVA_SUCCESS;
 }
 void pva_kmd_resource_table_deinit(struct pva_kmd_resource_table *res_table)
 {
 	pva_kmd_free(res_table->dma_aux);
 	pva_kmd_block_allocator_deinit(&res_table->dma_config_allocator);
 	pva_kmd_device_memory_free(res_table->dma_config_mem);
 	pva_kmd_block_allocator_deinit(&res_table->resource_record_allocator);
 	pva_kmd_free(res_table->records_mem);
 	pva_kmd_sema_deinit(&res_table->resource_semaphore);
 	pva_kmd_device_memory_free(res_table->table_mem);
 }
 static struct pva_kmd_resource_record *
 pva_kmd_alloc_resource(struct pva_kmd_resource_table *resource_table,
 		       uint32_t *out_resource_id)
 {
 	enum pva_error err;
 	struct pva_kmd_resource_record *rec = NULL;
 	err = pva_kmd_sema_wait_timeout(&resource_table->resource_semaphore,
 					PVA_KMD_TIMEOUT_RESOURCE_SEMA_MS);
 	if (err == PVA_TIMEDOUT) {
 		pva_kmd_log_err("pva_kmd_alloc_resource Timed out");
 	}
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err("Failed to wait for resource IDs");
 		goto out;
 	}
 	rec = (struct pva_kmd_resource_record *)pva_kmd_alloc_block(
 		&resource_table->resource_record_allocator, out_resource_id);
 	ASSERT(rec != NULL);
 out:
 	return rec;
 }
 static void pva_kmd_free_resource(struct pva_kmd_resource_table *resource_table,
 				  uint32_t resource_id)
 {
 	enum pva_error err;
 	err = pva_kmd_free_block(&resource_table->resource_record_allocator,
 				 resource_id);
 	ASSERT(err == PVA_SUCCESS);
 	pva_kmd_sema_post(&resource_table->resource_semaphore);
 }
 enum pva_error
 pva_kmd_add_syncpt_resource(struct pva_kmd_resource_table *resource_table,
 			    struct pva_kmd_device_memory *dev_mem,
 			    uint32_t *out_resource_id)
 {
 	struct pva_kmd_resource_record *rec =
 		pva_kmd_alloc_resource(resource_table, out_resource_id);
 	if (rec == NULL) {
 		pva_kmd_log_err("No more resource id");
 		return PVA_NO_RESOURCE_ID;
 	}
 	if (*out_resource_id > resource_table->curr_max_resource_id) {
 		resource_table->curr_max_resource_id = *out_resource_id;
 	}
 	rec->type = PVA_RESOURCE_TYPE_DRAM;
 	rec->dram.mem = dev_mem;
 	rec->dram.syncpt = true;
 	rec->ref_count = 1;
 	return PVA_SUCCESS;
 }
 enum pva_error
 pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table,
 				 struct pva_kmd_device_memory *dev_mem,
 				 uint32_t *out_resource_id)
 {
 	struct pva_kmd_resource_record *rec =
 		pva_kmd_alloc_resource(resource_table, out_resource_id);
 	if (rec == NULL) {
 		pva_kmd_log_err("No more resource id");
 		return PVA_NO_RESOURCE_ID;
 	}
 	if (*out_resource_id > resource_table->curr_max_resource_id) {
 		resource_table->curr_max_resource_id = *out_resource_id;
 	}
 	rec->type = PVA_RESOURCE_TYPE_DRAM;
 	rec->dram.mem = dev_mem;
 	rec->dram.syncpt = false;
 	rec->ref_count = 1;
 	return PVA_SUCCESS;
 }
 static struct pva_resource_entry *
 get_fw_resource(struct pva_kmd_resource_table *res_table, uint32_t resource_id)
 {
 	struct pva_resource_entry *entries = res_table->table_mem->va;
 	uint32_t index;
 	ASSERT(resource_id >= PVA_RESOURCE_ID_BASE);
 	index = safe_subu32(resource_id, PVA_RESOURCE_ID_BASE);
 	return &entries[index];
 }
 void pva_kmd_update_fw_resource_table(struct pva_kmd_resource_table *res_table)
 {
 	uint32_t id;
 	for (id = PVA_RESOURCE_ID_BASE; id <= res_table->curr_max_resource_id;
 	     id++) {
 		struct pva_resource_entry *entry =
 			get_fw_resource(res_table, id);
 		struct pva_kmd_resource_record *rec = pva_kmd_get_block(
 			&res_table->resource_record_allocator, id);
 		if (rec == NULL) {
 			continue;
 		}
 		entry->type = rec->type;
 		switch (rec->type) {
 		case PVA_RESOURCE_TYPE_DRAM:
 			entry->addr_lo = iova_lo(rec->dram.mem->iova);
 			entry->addr_hi = iova_hi(rec->dram.mem->iova);
 			entry->size_lo = iova_lo(rec->dram.mem->size);
 			entry->size_hi = iova_hi(rec->dram.mem->size);
 			entry->smmu_context_id = rec->dram.mem->smmu_ctx_idx;
 			break;
 		case PVA_RESOURCE_TYPE_INVALID:
 			break;
 		default:
 			pva_kmd_log_err("Unsupported resource type");
 			pva_kmd_fault();
 		}
 	}
 }
 struct pva_kmd_resource_record *
 pva_kmd_use_resource(struct pva_kmd_resource_table *res_table,
 		     uint32_t resource_id)
 {
 	struct pva_kmd_resource_record *rec = pva_kmd_get_block(
 		&res_table->resource_record_allocator, resource_id);
 	if (rec == NULL) {
 		return NULL;
 	}
 	rec->ref_count = safe_addu32(rec->ref_count, 1U);
 	return rec;
 }
 struct pva_kmd_resource_record *
 pva_kmd_peek_resource(struct pva_kmd_resource_table *res_table,
 		      uint32_t resource_id)
 {
 	struct pva_kmd_resource_record *rec = pva_kmd_get_block(
 		&res_table->resource_record_allocator, resource_id);
 	return rec;
 }
 void pva_kmd_drop_resource(struct pva_kmd_resource_table *resource_table,
 			   uint32_t resource_id)
 {
 	struct pva_kmd_resource_record *rec;
 	rec = pva_kmd_get_block(&resource_table->resource_record_allocator,
 				resource_id);
 	ASSERT(rec != NULL);
 	rec->ref_count = safe_subu32(rec->ref_count, 1U);
 	if (rec->ref_count == 0) {
 		pva_dbg_printf("Dropping resource %u of type %u\n", resource_id,
 			       rec->type);
 		switch (rec->type) {
 		case PVA_RESOURCE_TYPE_DRAM:
 			if (rec->dram.syncpt != true) {
 				pva_kmd_device_memory_free(rec->dram.mem);
 			}
 			break;
 		case PVA_RESOURCE_TYPE_EXEC_BIN:
 			pva_kmd_unload_executable(&rec->vpu_bin.symbol_table,
 						  rec->vpu_bin.metainfo_mem,
 						  rec->vpu_bin.sections_mem);
 			break;
 		case PVA_RESOURCE_TYPE_DMA_CONFIG: {
 			struct pva_kmd_dma_resource_aux *dma_aux;
 			dma_aux =
 				&resource_table
 					 ->dma_aux[rec->dma_config.block_index];
 			pva_kmd_unload_dma_config(dma_aux);
 			pva_kmd_free_block(
 				&resource_table->dma_config_allocator,
 				rec->dma_config.block_index);
 			break;
 		}
 		default:
 			pva_kmd_log_err("Unsupported resource type");
 			pva_kmd_fault();
 		}
 		pva_kmd_free_resource(resource_table, resource_id);
 	}
 }
 enum pva_error
 pva_kmd_add_vpu_bin_resource(struct pva_kmd_resource_table *resource_table,
 			     void *executable, uint32_t executable_size,
 			     uint32_t *out_resource_id)
 {
 	uint32_t res_id;
 	struct pva_kmd_resource_record *rec =
 		pva_kmd_alloc_resource(resource_table, &res_id);
 	enum pva_error err;
 	struct pva_kmd_vpu_bin_resource *vpu_bin;
 	if (rec == NULL) {
 		err = PVA_NO_RESOURCE_ID;
 		goto err_out;
 	}
 	vpu_bin = &rec->vpu_bin;
 	err = pva_kmd_load_executable(
 		executable, executable_size, resource_table->pva,
 		resource_table->user_smmu_ctx_id, &vpu_bin->symbol_table,
 		&vpu_bin->metainfo_mem, &vpu_bin->sections_mem);
 	if (err != PVA_SUCCESS) {
 		goto free_block;
 	}
 	if (res_id > resource_table->curr_max_resource_id) {
 		resource_table->curr_max_resource_id = res_id;
 	}
 	rec->type = PVA_RESOURCE_TYPE_EXEC_BIN;
 	rec->ref_count = 1;
 	*out_resource_id = res_id;
 	return PVA_SUCCESS;
 free_block:
 	pva_kmd_free_resource(resource_table, res_id);
 err_out:
 	return err;
 }
 enum pva_error
 pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table,
 			    uint32_t resource_id,
 			    struct pva_resource_entry *entry)
 {
 	struct pva_kmd_resource_record *rec =
 		pva_kmd_use_resource(resource_table, resource_id);
 	if (rec == NULL) {
 		return PVA_NO_RESOURCE_ID;
 	}
 	switch (rec->type) {
 	case PVA_RESOURCE_TYPE_DRAM:
 		entry->type = rec->type;
 		entry->addr_lo = iova_lo(rec->dram.mem->iova);
 		entry->addr_hi = iova_hi(rec->dram.mem->iova);
 		entry->size_lo = iova_lo(rec->dram.mem->size);
 		entry->size_hi = iova_hi(rec->dram.mem->size);
 		entry->smmu_context_id = rec->dram.mem->smmu_ctx_idx;
 		break;
 	case PVA_RESOURCE_TYPE_EXEC_BIN:
 		entry->type = rec->type;
 		entry->addr_lo = iova_lo(rec->vpu_bin.metainfo_mem->iova);
 		entry->addr_hi = iova_hi(rec->vpu_bin.metainfo_mem->iova);
 		entry->size_lo = iova_lo(rec->vpu_bin.metainfo_mem->size);
 		entry->size_hi = iova_hi(rec->vpu_bin.metainfo_mem->size);
 		entry->smmu_context_id =
 			rec->vpu_bin.metainfo_mem->smmu_ctx_idx;
 		break;
 	case PVA_RESOURCE_TYPE_DMA_CONFIG:
 		entry->type = rec->type;
 		entry->addr_lo = iova_lo(rec->dma_config.iova_addr);
 		entry->addr_hi = iova_hi(rec->dma_config.iova_addr);
 		entry->size_lo = iova_lo(rec->dma_config.size);
 		entry->size_hi = iova_hi(rec->dma_config.size);
 		entry->smmu_context_id = PVA_R5_SMMU_CONTEXT_ID;
 		break;
 	default:
 		pva_kmd_log_err("Unsupported resource type");
 		pva_kmd_fault();
 	}
 	pva_kmd_drop_resource(resource_table, resource_id);
 	return PVA_SUCCESS;
 }
 enum pva_error pva_kmd_add_dma_config_resource(
 	struct pva_kmd_resource_table *resource_table, void *dma_config_payload,
 	uint32_t dma_config_size, uint32_t *out_resource_id)
 {
 	enum pva_error err = PVA_SUCCESS;
 	uint32_t block_idx, fw_fetch_size;
 	void *fw_dma_cfg;
 	struct pva_kmd_dma_resource_aux *dma_aux;
 	struct pva_kmd_resource_record *rec;
 	uint32_t res_id;
 	fw_dma_cfg = pva_kmd_zalloc_block(&resource_table->dma_config_allocator,
 					  &block_idx);
 	if (fw_dma_cfg == NULL) {
 		err = PVA_NOMEM;
 		goto err_out;
 	}
 	// Must satisfy alignment requirement for converting to struct
 	// pva_dma_config_resource*
 	ASSERT(((uintptr_t)fw_dma_cfg) % sizeof(uint64_t) == 0);
 	dma_aux = &resource_table->dma_aux[block_idx];
 	err = pva_kmd_load_dma_config(resource_table, dma_config_payload,
 				      dma_config_size, dma_aux, fw_dma_cfg,
 				      &fw_fetch_size);
 	if (err != PVA_SUCCESS) {
 		goto free_block;
 	}
 	rec = pva_kmd_alloc_resource(resource_table, &res_id);
 	if (rec == NULL) {
 		err = PVA_NO_RESOURCE_ID;
 		goto unload_dma;
 	}
 	if (res_id > resource_table->curr_max_resource_id) {
 		resource_table->curr_max_resource_id = res_id;
 	}
 	rec->type = PVA_RESOURCE_TYPE_DMA_CONFIG;
 	rec->ref_count = 1;
 	rec->dma_config.block_index = block_idx;
 	rec->dma_config.iova_addr = safe_addu64(
 		resource_table->dma_config_mem->iova,
 		(uint64_t)safe_mulu32(
 			block_idx,
 			resource_table->dma_config_allocator.block_size));
 	rec->dma_config.size = fw_fetch_size;
 	*out_resource_id = res_id;
 	return PVA_SUCCESS;
 unload_dma:
 	pva_kmd_unload_dma_config(dma_aux);
 free_block:
 	pva_kmd_free_block(&resource_table->dma_config_allocator, block_idx);
 err_out:
 	return err;
 }
 void pva_kmd_verify_all_resources_free(
 	struct pva_kmd_resource_table *resource_table)
 {
 	enum pva_error err;
 	for (uint32_t i = 0; i < resource_table->n_entries; i++) {
 		err = pva_kmd_sema_wait_timeout(
 			&resource_table->resource_semaphore,
 			PVA_KMD_TIMEOUT_RESOURCE_SEMA_MS);
 		ASSERT(err == PVA_SUCCESS);
 	}
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h
@@ -0,0 +1,153 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_RESOURCE_TABLE_H
 #define PVA_KMD_RESOURCE_TABLE_H
 #include "pva_fw.h"
 #include "pva_bit.h"
 #include "pva_resource.h"
 #include "pva_kmd_block_allocator.h"
 #include "pva_kmd.h"
 #include "pva_kmd_utils.h"
 #include "pva_kmd_executable.h"
 #include "pva_constants.h"
 #include "pva_kmd_dma_cfg.h"
 #include "pva_kmd_mutex.h"
 #include "pva_kmd_thread_sema.h"
 struct pva_kmd_device;
 struct pva_kmd_dram_resource {
 	struct pva_kmd_device_memory *mem;
 	bool syncpt;
 };
 struct pva_kmd_vpu_bin_resource {
 	struct pva_kmd_device_memory *metainfo_mem;
 	struct pva_kmd_device_memory *sections_mem;
 	struct pva_kmd_exec_symbol_table symbol_table;
 };
 struct pva_kmd_dma_config_resource {
 	uint32_t block_index;
 	uint64_t size;
 	uint64_t iova_addr;
 };
 struct pva_kmd_resource_record {
 	/**
 	* Possible types:
 	* PVA_RESOURCE_TYPE_DRAM
 	* PVA_RESOURCE_TYPE_EXEC_BIN
 	* PVA_RESOURCE_TYPE_DMA_CONFIG
 	*/
 	uint8_t type;
 	uint32_t ref_count;
 	union {
 		struct pva_kmd_dram_resource dram;
 		struct pva_kmd_vpu_bin_resource vpu_bin;
 		struct pva_kmd_dma_config_resource dma_config;
 	};
 };
 /**
 *
 */
 struct pva_kmd_resource_table {
 	/** @brief User smmu context ID.
 	 *
 	 * - DRAM memory, VPU data/text sections will be mapped to this space.
 	 * - VPU metadata, DMA configurations will always be mapped to R5 SMMU
 	 * context. */
 	uint8_t user_smmu_ctx_id;
 	uint32_t n_entries;
 	/** Maximum resource ID we have seen so far */
 	uint32_t curr_max_resource_id;
 	/** Semaphore to keep track of resources in use*/
 	pva_kmd_sema_t resource_semaphore;
 	/** Memory for resource table entries, in R5 segment */
 	struct pva_kmd_device_memory *table_mem;
 	/** Memory for fw dma configs, in DMA segment */
 	struct pva_kmd_device_memory *dma_config_mem;
 	struct pva_kmd_block_allocator dma_config_allocator;
 	/** Memory for tracking resources used by DMA configuration. Single
 	 * allocation shared by all DMA configs */
 	struct pva_kmd_dma_resource_aux *dma_aux;
 	/** Pointer to syncpt_allocator in pva_kmd_device created during kmd boot */
 	struct pva_kmd_block_allocator *syncpt_allocator;
 	/** Memory for resource records */
 	void *records_mem;
 	struct pva_kmd_block_allocator resource_record_allocator;
 	struct pva_kmd_device *pva;
 };
 enum pva_error
 pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table,
 			    struct pva_kmd_device *pva,
 			    uint8_t user_smmu_ctx_id, uint32_t n_entries,
 			    uint32_t max_num_dma_configs);
 void pva_kmd_resource_table_deinit(struct pva_kmd_resource_table *res_table);
 /** KMD only writes to FW resource table during init time. Once the address of
 * the resource table is sent to FW, all updates should be done through commands.
 */
 void pva_kmd_update_fw_resource_table(struct pva_kmd_resource_table *res_table);
 enum pva_error
 pva_kmd_add_syncpt_resource(struct pva_kmd_resource_table *resource_table,
 			    struct pva_kmd_device_memory *dev_mem,
 			    uint32_t *out_resource_id);
 enum pva_error
 pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table,
 				 struct pva_kmd_device_memory *memory,
 				 uint32_t *out_resource_id);
 enum pva_error
 pva_kmd_add_vpu_bin_resource(struct pva_kmd_resource_table *resource_table,
 			     void *executable, uint32_t executable_size,
 			     uint32_t *out_resource_id);
 enum pva_error
 pva_kmd_add_dma_config_resource(struct pva_kmd_resource_table *resource_table,
 				void *dma_config, uint32_t dma_config_size,
 				uint32_t *out_resource_id);
 /**
 * Increment reference count of the resources
 *
 * TODO: make use and drop thread safe.
 * */
 struct pva_kmd_resource_record *
 pva_kmd_use_resource(struct pva_kmd_resource_table *resource_table,
 		     uint32_t resource_id);
 struct pva_kmd_resource_record *
 pva_kmd_peek_resource(struct pva_kmd_resource_table *resource_table,
 		      uint32_t resource_id);
 void pva_kmd_drop_resource(struct pva_kmd_resource_table *resource_table,
 			   uint32_t resource_id);
 enum pva_error
 pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table,
 			    uint32_t resource_id,
 			    struct pva_resource_entry *entry);
 void pva_kmd_verify_all_resources_free(
 	struct pva_kmd_resource_table *resource_table);
 #endif // PVA_KMD_RESOURCE_TABLE_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.c
@@ -0,0 +1,185 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
 * Copyright (c) 2021-2023, NVIDIA Corporation.  All rights reserved.
 */
 #include "pva_kmd_sha256.h"
 #define ROTLEFT(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
 #define ROTRIGHT(a, b) (((a) >> (b)) | ((a) << (32 - (b))))
 #define CH(x, y, z) (((x) & (y)) ^ (~(x) & (z)))
 #define MAJ(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
 #define SHA_EP0(x) (ROTRIGHT(x, 2) ^ ROTRIGHT(x, 13) ^ ROTRIGHT(x, 22))
 #define SHA_EP1(x) (ROTRIGHT(x, 6) ^ ROTRIGHT(x, 11) ^ ROTRIGHT(x, 25))
 #define SIG0(x) (ROTRIGHT(x, 7) ^ ROTRIGHT(x, 18) ^ ((x) >> 3))
 #define SIG1(x) (ROTRIGHT(x, 17) ^ ROTRIGHT(x, 19) ^ ((x) >> 10))
 #define SWAP32(x) __builtin_bswap32(x)
 #define SWAP64(x) __builtin_bswap64(x)
 /**
 * This variable is used internally by \ref sha256_transform()
 */
 static const uint32_t k[64] = {
 	U32(0x428a2f98U), U32(0x71374491U), U32(0xb5c0fbcfU), U32(0xe9b5dba5U),
 	U32(0x3956c25bU), U32(0x59f111f1U), U32(0x923f82a4U), U32(0xab1c5ed5U),
 	U32(0xd807aa98U), U32(0x12835b01U), U32(0x243185beU), U32(0x550c7dc3U),
 	U32(0x72be5d74U), U32(0x80deb1feU), U32(0x9bdc06a7U), U32(0xc19bf174U),
 	U32(0xe49b69c1U), U32(0xefbe4786U), U32(0x0fc19dc6U), U32(0x240ca1ccU),
 	U32(0x2de92c6fU), U32(0x4a7484aaU), U32(0x5cb0a9dcU), U32(0x76f988daU),
 	U32(0x983e5152U), U32(0xa831c66dU), U32(0xb00327c8U), U32(0xbf597fc7U),
 	U32(0xc6e00bf3U), U32(0xd5a79147U), U32(0x06ca6351U), U32(0x14292967U),
 	U32(0x27b70a85U), U32(0x2e1b2138U), U32(0x4d2c6dfcU), U32(0x53380d13U),
 	U32(0x650a7354U), U32(0x766a0abbU), U32(0x81c2c92eU), U32(0x92722c85U),
 	U32(0xa2bfe8a1U), U32(0xa81a664bU), U32(0xc24b8b70U), U32(0xc76c51a3U),
 	U32(0xd192e819U), U32(0xd6990624U), U32(0xf40e3585U), U32(0x106aa070U),
 	U32(0x19a4c116U), U32(0x1e376c08U), U32(0x2748774cU), U32(0x34b0bcb5U),
 	U32(0x391c0cb3U), U32(0x4ed8aa4aU), U32(0x5b9cca4fU), U32(0x682e6ff3U),
 	U32(0x748f82eeU), U32(0x78a5636fU), U32(0x84c87814U), U32(0x8cc70208U),
 	U32(0x90befffaU), U32(0xa4506cebU), U32(0xbef9a3f7U), U32(0xc67178f2U)
 };
 /**
 * \brief
 * This function is a helper function used by \ref pva_sha256_update
 * to hash 512-bit blocks and forms the core of the algorithm.
 * Use \ref sha256_init(), \ref pva_sha256_update(), and
 * \ref sha256_finalize() instead of calling sha256_transform() directly.
 * \param[in] ctx  pointer of struct sha256_ctx context.
 * \param[in] data_in  pointer to the data block to be hashed.
 * \return Void
 */
 static void sha256_transform(struct sha256_ctx *ctx, const void *data_in)
 {
 	uint32_t a, b, c, d, e, f, g, h, t1, t2, m[64];
 	const uint32_t *const data = data_in;
 	size_t i;
 	for (i = 0; i < U32(16); i++) {
 		m[i] = SWAP32(data[i]);
 	}
 	for (i = 0; i < U32(64) - U32(16); ++i) {
 		/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 		m[i + U32(16)] = SIG1(m[U32(14) + i]) + m[U32(9) + i] +
 				 SIG0(m[U32(1) + i]) + m[i];
 	}
 	a = ctx->state[0];
 	b = ctx->state[1];
 	c = ctx->state[2];
 	d = ctx->state[3];
 	e = ctx->state[4];
 	f = ctx->state[5];
 	g = ctx->state[6];
 	h = ctx->state[7];
 	for (i = 0; i < U32(64); ++i) {
 		/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 		t1 = h + SHA_EP1(e) + CH(e, f, g) + k[i] + m[i];
 		/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 		t2 = SHA_EP0(a) + MAJ(a, b, c);
 		h = g;
 		g = f;
 		f = e;
 		/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 		e = d + t1;
 		d = c;
 		c = b;
 		b = a;
 		/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 		a = t1 + t2;
 	}
 	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 	ctx->state[0] += a;
 	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 	ctx->state[1] += b;
 	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 	ctx->state[2] += c;
 	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 	ctx->state[3] += d;
 	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 	ctx->state[4] += e;
 	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 	ctx->state[5] += f;
 	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 	ctx->state[6] += g;
 	/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
 	ctx->state[7] += h;
 }
 void sha256_init(struct sha256_ctx *ctx)
 {
 	ctx->bitlen = 0;
 	ctx->state[0] = U32(0x6a09e667);
 	ctx->state[1] = U32(0xbb67ae85);
 	ctx->state[2] = U32(0x3c6ef372);
 	ctx->state[3] = U32(0xa54ff53a);
 	ctx->state[4] = U32(0x510e527f);
 	ctx->state[5] = U32(0x9b05688c);
 	ctx->state[6] = U32(0x1f83d9ab);
 	ctx->state[7] = U32(0x5be0cd19);
 }
 void sha256_update(struct sha256_ctx *ctx, const void *data, size_t len)
 {
 	uint32_t i;
 	for (i = 0; i < len; i += U32(64)) {
 		ctx->bitlen &= U32(0xffffffff);
 		sha256_transform(ctx, ((const uint8_t *)data) + i);
 		ctx->bitlen += U32(512);
 	}
 }
 void sha256_copy(const struct sha256_ctx *ctx_in, struct sha256_ctx *ctx_out)
 {
 	*ctx_out = *ctx_in;
 }
 void sha256_finalize(struct sha256_ctx *ctx, const void *input,
 		     size_t input_size, uint32_t out[8])
 {
 	uint8_t data[64];
 	void *p = data;
 	uint32_t t;
 	input_size &= U32(0xffffffff);
 	ctx->bitlen &= U32(0xffffffff);
 	/* the false of this condition is illegal for this API agreement */
 	/* this check is here only for Coverity INT30-C */
 	ctx->bitlen += input_size * U32(8);
 	(void)memcpy(p, input, input_size);
 	data[input_size] = 0x80;
 	if (input_size < U32(56)) { /* can we fit an 8-byte counter? */
 		/* Pad whatever data is left in the buffer. */
 		(void)memset(data + input_size + U32(1), 0,
 			     U32(56) - input_size - U32(1));
 	} else { /* Go into another block. We are here only for message hashing */
 		if (input_size + U32(1) < U32(64)) {
 			(void)memset(data + input_size + U32(1), 0,
 				     U32(64) - input_size - U32(1));
 		}
 		sha256_transform(ctx, data);
 		(void)memset(data, 0, 56);
 	}
 	t = ctx->bitlen_low;
 	*(uint32_t *)(void *)(data + 56) = 0;
 	*(uint32_t *)(void *)(data + 60) = SWAP32(t);
 	sha256_transform(ctx, data);
 	out[0] = SWAP32(ctx->state[0]);
 	out[1] = SWAP32(ctx->state[1]);
 	out[2] = SWAP32(ctx->state[2]);
 	out[3] = SWAP32(ctx->state[3]);
 	out[4] = SWAP32(ctx->state[4]);
 	out[5] = SWAP32(ctx->state[5]);
 	out[6] = SWAP32(ctx->state[6]);
 	out[7] = SWAP32(ctx->state[7]);
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.h
@@ -0,0 +1,76 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2021-2023, NVIDIA Corporation.  All rights reserved.
 */
 #ifndef PVA_KMD_SHA256_H
 #define PVA_KMD_SHA256_H
 #include "pva_api_types.h"
 #define U32(x) ((uint32_t)(x))
 struct sha256_ctx {
 	/*
 	 * On bitlen:
 	 *
 	 * While we don't exceed 2^32 bit (2^29 byte) length for the input buffer,
 	 * size_t is more efficient at least on RISC-V. This particular
 	 * structure is needed to make Coverity happy.
 	 */
 	union {
 		size_t bitlen;
 		uint32_t bitlen_low;
 	};
 	uint32_t state[8];
 };
 /**
 * Initializes struct sha256_ctx
 *
 * \param[in] ctx  pointer of struct sha256_ctx context
 *
 * \return  void
 */
 void sha256_init(struct sha256_ctx *ctx);
 /**
 * \brief
 * Hash full blocks, in units of 64 bytes
 * can be called repeatedly with chunks of the message
 * to be hashed (len bytes at data).
 *
 * \param[in] ctx  pointer of struct sha256_ctx context
 * \param[in] data  pointer to the data block to be hashed
 * \param[in] len  length (in units of 64 bytes) of the data to be hashed.
 *
 * \return  void
 */
 void sha256_update(struct sha256_ctx *ctx, const void *data, size_t len);
 /**
 * \brief
 * Finalize the hash and keep the calcualted hash in out.
 * Required: input_size < 64. Call pva_sha256_update() first otherwise.
 *
 * \param[in] ctx  pointer of struct sha256_ctx context
 * \param[in] input pointer to the data block
 * (left over from \ref pva_sha256_update) to be hashed
 * \param[in] input_size size of the data block to hashed
 * (left over from \ref pva_sha256_update to be hashed)
 * \param[out] out places the calcuated sha256 key in out.
 *
 * \return void
 */
 void sha256_finalize(struct sha256_ctx *ctx, const void *input,
 		     size_t input_size, uint32_t out[8]);
 /**
 * \brief
 * copy state information to ctx_out from ctx_in
 * \param[in] ctx_in input struct sha256_ctx
 * \param[out] ctx_out output struct sha256_ctx
 * \return void
 */
 void sha256_copy(const struct sha256_ctx *ctx_in, struct sha256_ctx *ctx_out);
 #endif /* PVA_SHA256_H */
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c
@@ -0,0 +1,317 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_device.h"
 #include "pva_fw_address_map.h"
 #include "pva_fw_hyp.h"
 #include "pva_kmd_thread_sema.h"
 #include "pva_kmd_constants.h"
 #include "pva_kmd_silicon_isr.h"
 #include "pva_kmd_silicon_boot.h"
 #include "pva_kmd_shim_silicon.h"
 static inline void pva_kmd_set_sema(struct pva_kmd_device *pva,
 				    uint32_t sema_idx, uint32_t val)
 {
 	uint32_t gap = PVA_REG_HSP_SS1_SET_ADDR - PVA_REG_HSP_SS0_SET_ADDR;
 	gap = safe_mulu32(gap, sema_idx);
 	pva_kmd_write(pva, safe_addu32(PVA_REG_HSP_SS0_SET_ADDR, gap), val);
 }
 static void init_fw_print_buffer(struct pva_kmd_fw_print_buffer *print_buffer,
 				 void *debug_buffer_va)
 {
 	print_buffer->buffer_info = pva_offset_pointer(
 		debug_buffer_va,
 		FW_TRACE_BUFFER_SIZE + FW_CODE_COVERAGE_BUFFER_SIZE);
 	print_buffer->size =
 		FW_DEBUG_LOG_BUFFER_SIZE - sizeof(*print_buffer->buffer_info);
 	print_buffer->head = 0;
 	print_buffer->content = pva_offset_pointer(
 		print_buffer->buffer_info, sizeof(*print_buffer->buffer_info));
 }
 static void disable_sec_mission_error_reporting(struct pva_kmd_device *pva)
 {
 	pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE0_MISSIONERR_ENABLE_ADDR, 0U);
 	pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE1_MISSIONERR_ENABLE_ADDR, 0U);
 	pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE2_MISSIONERR_ENABLE_ADDR, 0U);
 	pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE3_MISSIONERR_ENABLE_ADDR, 0U);
 }
 static void disable_sec_latent_error_reporting(struct pva_kmd_device *pva)
 {
 	pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE0_LATENTERR_ENABLE_ADDR, 0U);
 	pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE1_LATENTERR_ENABLE_ADDR, 0U);
 	pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE2_LATENTERR_ENABLE_ADDR, 0U);
 	pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE3_LATENTERR_ENABLE_ADDR, 0U);
 }
 void pva_kmd_config_evp_seg_regs(struct pva_kmd_device *pva)
 {
 	uint64_t seg_reg_value;
 	/* EVP */
 	pva_kmd_write(pva, PVA_REG_EVP_RESET_ADDR, EVP_RESET_VECTOR);
 	pva_kmd_write(pva, PVA_REG_EVP_UNDEF_ADDR,
 		      EVP_UNDEFINED_INSTRUCTION_VECTOR);
 	pva_kmd_write(pva, PVA_REG_EVP_SWI_ADDR, EVP_SVC_VECTOR);
 	pva_kmd_write(pva, PVA_REG_EVP_PREFETCH_ABORT_ADDR,
 		      EVP_PREFETCH_ABORT_VECTOR);
 	pva_kmd_write(pva, PVA_REG_EVP_DATA_ABORT_ADDR, EVP_DATA_ABORT_VECTOR);
 	pva_kmd_write(pva, PVA_REG_EVP_RSVD_ADDR, EVP_RESERVED_VECTOR);
 	pva_kmd_write(pva, PVA_REG_EVP_IRQ_ADDR, EVP_IRQ_VECTOR);
 	pva_kmd_write(pva, PVA_REG_EVP_FIQ_ADDR, EVP_FIQ_VECTOR);
 	/* R5 regions are defined as:
 	 * - PRIV1 region for firmware code and data.
 	 * - PRIV2 region for debug printf data.
 	 * - Remaining region for resource table, queues, etc.
 	 */
 	pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_start,
 		      FW_CODE_DATA_START_ADDR);
 	pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_end,
 		      FW_CODE_DATA_END_ADDR);
 	pva_kmd_write(pva, pva->regspec.cfg_priv_ar2_start,
 		      FW_DEBUG_DATA_START_ADDR);
 	pva_kmd_write(pva, pva->regspec.cfg_priv_ar2_end,
 		      FW_DEBUG_DATA_END_ADDR);
 	/* Firmware expects R5 virtual address FW_CODE_DATA_START_ADDR to be
 	* mapped to the beginning of firmware binary. Therefore, we adjust
 	* segment registers accordingly
 	*
 	* */
 	if (pva->load_from_gsc) {
 		if (pva->is_hv_mode) {
 			/* Loading from GSC with HV (i.e AV+L or AV+Q case).
 			 * This will be trapped by HV
 			 */
 			pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_lsegreg,
 				      0xFFFFFFFFU);
 			pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_usegreg,
 				      0xFFFFFFFFU);
 		} else {
 			/* Loading from GSC without HV i.e L4T case.
 			 * TODO: Program Segment regsites using the GSC Careveout
 			 * fetched from DT file. Till then, ASSERT here.
 			 */
 			ASSERT(false);
 		}
 	} else {
 		/* Loading from file.
 		 * In HV case, traps should be bypassed in HV
 		 */
 		seg_reg_value =
 			pva->fw_bin_mem->iova -
 			FW_CODE_DATA_START_ADDR; /* underflow is totally OK */
 		pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_lsegreg,
 			      iova_lo(seg_reg_value));
 		pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_usegreg,
 			      iova_hi(seg_reg_value));
 	}
 }
 void pva_kmd_config_scr_regs(struct pva_kmd_device *pva)
 {
 	pva_kmd_write(pva, PVA_REG_EVP_SCR_ADDR, PVA_EVP_SCR_VAL);
 	pva_kmd_write(pva, PVA_CFG_SCR_STATUS_CNTL, PVA_STATUS_CTL_SCR_VAL);
 	pva_kmd_write(pva, PVA_CFG_SCR_PRIV, PVA_PRIV_SCR_VAL);
 	pva_kmd_write(pva, PVA_CFG_SCR_CCQ_CNTL, PVA_CCQ_SCR_VAL);
 }
 void pva_kmd_config_sid(struct pva_kmd_device *pva)
 {
 	uint32_t addr;
 	uint32_t i;
 	uint32_t offset;
 	uint8_t priv1_sid;
 	uint8_t priv_sid;
 	priv_sid = pva->stream_ids[PVA_R5_SMMU_CONTEXT_ID] & 0xFF;
 	priv1_sid = pva->stream_ids[pva->r5_image_smmu_context_id] & 0xFF;
 	/* Priv SIDs */
 	if (pva->load_from_gsc) {
 		pva_kmd_write(pva, pva->regspec.cfg_priv_sid,
 			      PVA_INSERT(priv_sid, 7, 0) |
 				      PVA_INSERT(priv1_sid, 15, 8) |
 				      PVA_INSERT(priv_sid, 23, 16));
 	} else {
 		pva_kmd_write(pva, pva->regspec.cfg_priv_sid,
 			      PVA_INSERT(priv_sid, 7, 0) |
 				      PVA_INSERT(priv_sid, 15, 8) |
 				      PVA_INSERT(priv_sid, 23, 16));
 	}
 	/* VPS SIDs  */
 	if ((pva->hw_consts.hw_gen == PVA_HW_GEN3) && pva->load_from_gsc) {
 		pva_kmd_write(pva, pva->regspec.cfg_vps_sid,
 			      PVA_INSERT(priv1_sid, 7, 0) |
 				      PVA_INSERT(priv1_sid, 15, 8));
 	} else {
 		pva_kmd_write(pva, pva->regspec.cfg_vps_sid,
 			      PVA_INSERT(priv_sid, 7, 0) |
 				      PVA_INSERT(priv_sid, 15, 8));
 	}
 	/* User SIDs */
 	offset = 0;
 	for (i = 1; i < pva->hw_consts.n_smmu_contexts - 1; i++) {
 		addr = safe_addu32(pva->regspec.cfg_user_sid_base, offset);
 		pva_kmd_write(pva, addr, pva->stream_ids[i]);
 		offset = safe_addu32(offset, 4U);
 	}
 }
 enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva)
 {
 	uint64_t seg_reg_value;
 	uint32_t debug_data_size;
 	uint32_t boot_sema = 0;
 	enum pva_error err = PVA_SUCCESS;
 	/* Load firmware */
 	if (!pva->load_from_gsc) {
 		err = pva_kmd_read_fw_bin(pva);
 		if (err != PVA_SUCCESS) {
 			pva_kmd_log_err(
 				"Failed to read firmware from filesystem");
 			goto out;
 		}
 	}
 	debug_data_size = (uint32_t)safe_pow2_roundup_u32(
 		FW_DEBUG_DATA_TOTAL_SIZE, SIZE_4KB);
 	pva->fw_debug_mem = pva_kmd_device_memory_alloc_map(
 		debug_data_size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
 	if (pva->fw_debug_mem == NULL) {
 		err = PVA_NOMEM;
 		goto free_fw_mem;
 	}
 	init_fw_print_buffer(&pva->fw_print_buffer, pva->fw_debug_mem->va);
 	/* Program SCRs */
 	pva_kmd_write(pva, PVA_SEC_SCR_SECEXT_INTR_EVENT,
 		      PVA_SEC_SCR_SECEXT_INTR_EVENT_VAL);
 	pva_kmd_write(pva, PVA_PROC_SCR_PROC, PVA_PROC_SCR_PROC_VAL);
 	pva_kmd_config_evp_seg_scr_regs(pva);
 	/* Write IOVA address of debug buffer to mailbox and FW will program
 	 * PRIV2 segment register properly such that the debug buffer is located
 	 * at R5 virtual address FW_DEBUG_DATA_START_ADDR */
 	seg_reg_value = pva->fw_debug_mem->iova;
 	/* When GSC is enabled, KMD cannot write directly to segment registers,
 	 * therefore we write to mailbox registers and FW will program by
 	 * itself.
 	 * pva_kmd_writel(pva, pva->regspec.cfg_priv_ar2_lsegreg,
 	 *	       iova_lo(seg_reg_value));
 	 * pva_kmd_writel(pva, pva->regspec.cfg_priv_ar2_usegreg,
 	 *             iova_hi(seg_reg_value));
 	 */
 	pva_kmd_write_mailbox(pva, PVA_MBOXID_PRIV2SEG_L,
 			      iova_lo(seg_reg_value));
 	pva_kmd_write_mailbox(pva, PVA_MBOXID_PRIV2SEG_H,
 			      iova_hi(seg_reg_value));
 	/* Write shared memory allocation start address to mailbox and FW will
 	 * program user segment register accordingly so that virtual address
 	 * PVA_SHARED_MEMORY_START will point to the allocation start address.
 	 *
 	 * We deliberately also choose PVA_SHARED_MEMORY_START as the allocation
 	 * start address so that the net result is that user segment register
 	 * will be programmed to 0.
 	 */
 	seg_reg_value = FW_SHARED_MEMORY_START;
 	pva_kmd_write_mailbox(pva, PVA_MBOXID_USERSEG_L,
 			      iova_lo(seg_reg_value));
 	pva_kmd_write_mailbox(pva, PVA_MBOXID_USERSEG_H,
 			      iova_hi(seg_reg_value));
 	/* Boot parameters  */
 	if (pva->bl_sector_pack_format == PVA_BL_XBAR_RAW) {
 		boot_sema = PVA_BOOT_SEMA_USE_XBAR_RAW;
 	}
 	pva_kmd_set_sema(pva, PVA_BOOT_SEMA, boot_sema);
 	pva_kmd_write(pva, PVA_REG_HSP_SS2_SET_ADDR,
 		      pva_kmd_get_syncpt_ro_offset(pva));
 	pva_kmd_write(pva, PVA_REG_HSP_SS3_SET_ADDR,
 		      pva_kmd_get_syncpt_rw_offset(pva));
 	pva_kmd_config_sid_regs(pva);
 	/* Enable LIC INTR line for HSP1 and WDT */
 	pva_kmd_write(pva, pva->regspec.sec_lic_intr_enable,
 		      PVA_BIT(0) /*Watchdog*/
 			      | PVA_INSERT(0x1, 4, 1) /* HSP1 */
 			      | PVA_INSERT(0x7, 7, 5) /* All H1X errors */);
 	/* Bind interrupts */
 	err = pva_kmd_bind_intr_handler(pva, PVA_KMD_INTR_LINE_SEC_LIC,
 					pva_kmd_hyp_isr, pva);
 	if (err != PVA_SUCCESS) {
 		goto free_fw_debug_mem;
 	}
 	err = pva_kmd_bind_intr_handler(pva, PVA_KMD_INTR_LINE_CCQ0,
 					pva_kmd_isr, pva);
 	if (err != PVA_SUCCESS) {
 		goto free_sec_lic;
 	}
 	/* Take R5 out of reset */
 	pva_kmd_write(pva, PVA_REG_PROC_CPUHALT_ADDR, 0x1);
 	/* Wait until fw boots */
 	err = pva_kmd_sema_wait_timeout(&pva->fw_boot_sema,
 					PVA_KMD_FW_BOOT_TIMEOUT_MS);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err("Waiting for FW boot timed out.");
 		goto free_ccq0;
 	}
 	return err;
 free_ccq0:
 	pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_CCQ0);
 free_sec_lic:
 	pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC);
 free_fw_debug_mem:
 	pva_kmd_drain_fw_print(&pva->fw_print_buffer);
 	pva_kmd_device_memory_free(pva->fw_debug_mem);
 free_fw_mem:
 	if (!pva->load_from_gsc) {
 		pva_kmd_device_memory_free(pva->fw_bin_mem);
 	}
 out:
 	return err;
 }
 void pva_kmd_deinit_fw(struct pva_kmd_device *pva)
 {
 	pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_CCQ0);
 	pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC);
 	pva_kmd_drain_fw_print(&pva->fw_print_buffer);
 	/*
 	 * Before powering off PVA, disable SEC error reporting.
 	 * While powering off, PVA might generate (unexplained) error interrupts
 	 * This causes HSM to read some PVA SEC registers. However, since PVA might
 	 * already be powergated by this time, access to PVA SEC registers from HSM
 	 * fails. This was discussed in Bug 3785498.
 	 *
 	 * Note: we do not explicity enable these errors during power on since
 	 *	 'enable' is their reset value
 	 */
 	disable_sec_mission_error_reporting(pva);
 	disable_sec_latent_error_reporting(pva);
 	pva_kmd_device_memory_free(pva->fw_debug_mem);
 	if (!pva->load_from_gsc) {
 		pva_kmd_device_memory_free(pva->fw_bin_mem);
 	}
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.h
@@ -0,0 +1,44 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2025, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_SILICON_BOOT_H
 #define PVA_KMD_SILICON_BOOT_H
 #include "pva_kmd_device.h"
 /**
 * @brief Configure EVP and Segment config registers
 *
 * This function configures the EVP and Segment config registers.
 *
 * @param pva Pointer to the PVA device.
 */
 void pva_kmd_config_evp_seg_regs(struct pva_kmd_device *pva);
 /**
 * @brief Configure SCR registers.
 *
 * This function configures the SCR registers.
 *
 * @param pva Pointer to the PVA device.
 */
 void pva_kmd_config_scr_regs(struct pva_kmd_device *pva);
 /**
 * @brief Configure SID registers.
 *
 * This function configures the SID registers.
 *
 * @param pva Pointer to the PVA device.
 */
 void pva_kmd_config_sid(struct pva_kmd_device *pva);
 #endif /* PVA_KMD_SILICON_BOOT_H */
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_elf_parser.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_elf_parser.c
@@ -0,0 +1,414 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_silicon_elf_parser.h"
 #include "pva_kmd_utils.h"
 #ifndef max
 #define max(a, b) (((a) > (b)) ? (a) : (b))
 #endif
 #ifndef UINT8_MAX
 #define UINT8_MAX 0xFF
 #endif
 // CERT complains about casts from const uint8_t*, so do intermediate cast to void*
 static inline const void *uint_8_to_void(const uint8_t *const p)
 {
 	return (const void *)p;
 }
 bool elf_header_check(const elf_ct e)
 {
 	const elfFileHeader *efh = (const elfFileHeader *)e;
 	if ((ELFCLASS32 == efh->oclass) &&
 	    (ELFMAGIC_LSB == *(const elfWord *)e)) {
 		return true;
 	}
 	return false;
 }
 /**
 * @brief Return pointer to ELF file header
 *
 * Cast the elf image data to \ref elfFileHeader*
 *
 * @param [in] e pointer to elf image data
 * @return
 * - Valid poniter to ELF file header
 * - NULL if \a e is NULL or correct elf magic ID is not present
 * in first 4 bytes of elf file pointed by \a e.
 *
 */
 static const elfFileHeader *elf_file_header(const elf_ct e)
 {
 	return (const elfFileHeader *)e;
 }
 /**
 * @brief Get start address of the section table.
 *
 * @param[in] e pointer to elf image
 * @return const elfSectionHeader*
 * - Valid address of section header.
 * - NULL if \a e is NULL or Header in ELF file is NULL.
 */
 static inline const elfSectionHeader *elf_section_table(const elf_parser_ctx e)
 {
 	const elfFileHeader *efh = elf_file_header(e.elf_file);
 	const char *p = (const char *)e.elf_file;
 	if (efh->shoff > e.size) {
 		pva_kmd_log_err("Invalid Section header Offset");
 		return NULL;
 	}
 	p = &p[efh->shoff];
 	// proper ELF should always have offsets be aligned,
 	// but add check just in case.
 	return (const elfSectionHeader *)(const void *)(p);
 }
 /**
 * @brief Get the size of ELF section
 *
 * @param esh pointer to ELF section header
 * @return elfWord
 * - size of the corresponding section header.
 * - 0, if \a esh is NULL.
 *
 */
 static elfWord elf_section_size(const elfSectionHeader *esh)
 {
 	if (NULL == esh) {
 		return UZERO;
 	}
 	return (elfWord)esh->size;
 }
 elfWord elf_shnum(const elf_parser_ctx e)
 {
 	const elfFileHeader *efh = elf_file_header(e.elf_file);
 	if (NULL == efh) {
 		return UZERO;
 	}
 	if (UZERO == efh->shnum) {
 		/* get value from size of first (empty) section */
 		/* to avoid recursion, don't call elf_section_header(0) */
 		const elfSectionHeader *esh = elf_section_table(e);
 		// if esh is somehow NULL, section_size will return UZERO
 		elfWord size = elf_section_size(esh);
 		if (size > e.size) { // make sure we don't lose precision
 			return UZERO;
 		} else {
 			return size;
 		}
 	} else {
 		return (elfWord)efh->shnum;
 	}
 }
 const elfSectionHeader *elf_section_header(const elf_parser_ctx e,
 					   unsigned int index)
 {
 	const elfSectionHeader *esh = elf_section_table(e);
 	if (NULL == esh) {
 		return NULL;
 	}
 	if (index >= elf_shnum(e)) {
 		return NULL;
 	}
 	esh = &esh[index];
 	return esh;
 }
 static inline elfOff get_table_end(elfWord num, elfHalf entsize, elfOff off)
 {
 	elfOff end;
 	elfWord tablesize = 0;
 	/**
 	 * Guaranteed to be less than UINT32_MAX and not overflow
 	 * num if set as efh->shnum is UINT16_MAX
 	 * num if set as section_header->size is file size of ELF which
 	 * is bound to 2 MB
 	 */
 	tablesize = safe_mulu32(num, (uint32_t)entsize);
 	end = off + tablesize;
 	if (end < off) {
 		return UZERO; //Wrap around error
 	}
 	return end;
 }
 bool elf_has_valid_sections(const elf_parser_ctx e)
 {
 	elfOff max_size = UZERO;
 	uint32_t i;
 	elfOff ph_end, sh_end;
 	const elfFileHeader *efh = elf_file_header(e.elf_file);
 	if (efh == NULL) {
 		return false;
 	}
 	ph_end = get_table_end(efh->phnum, efh->phentsize, efh->phoff);
 	sh_end = get_table_end(elf_shnum(e), efh->shentsize, efh->shoff);
 	max_size = max(ph_end, sh_end);
 	if ((max_size == UZERO) || (max_size > e.size)) {
 		return false;
 	}
 	for (i = UZERO; i < elf_shnum(e); ++i) {
 		elfOff esh_end;
 		const elfSectionHeader *esh = elf_section_header(e, i);
 		/*We have already validated the whole section header array is within the file*/
 		ASSERT(esh != NULL);
 		esh_end = esh->offset + esh->size;
 		if (esh_end < esh->offset) {
 			return false; //WRAP around error;
 		}
 		if ((esh->type != SHT_NOBITS) && (esh_end > e.size)) {
 			return false;
 		}
 	}
 	return true;
 }
 /**
 * @brief Get section header index
 * get elf_file_header and check it's not null,
 * get value from link field of first (empty) section
 * if esh is somehow NULL, return esh link
 *
 * @param[in] e		elf context
 *
 * @return 		section header index
 */
 static elfWord elf_shstrndx(const elf_parser_ctx e)
 {
 	const elfFileHeader *efh = elf_file_header(e.elf_file);
 	if (NULL == efh) {
 		return UZERO;
 	}
 	if (efh->shstrndx == SHN_XINDEX) {
 		/* get value from link field of first (empty) section */
 		/* to avoid recursion, don't call elf_section_header(0) */
 		const elfSectionHeader *esh = elf_section_table(e);
 		if (NULL == esh) {
 			return UZERO;
 		}
 		return esh->link;
 	}
 	return efh->shstrndx;
 }
 /**
 * @brief Get name of string from strtab section
 * check elf context and section header not null,
 * check from section header for type and size are not null.
 * Get strtab section, check that stroffset doesn't wrap
 *
 * @param[in] e		elf context
 * @param[in] eshstr	pointer to elf Section header
 * @param[in] offset	offset in integer
 * 			Valid range: 0 to eshstr->size
 *
 * @return 		name of string from strtab section "eshstr" at "offset"
 */
 static const char *elf_string_at_offset(const elf_parser_ctx e,
 					const elfSectionHeader *eshstr,
 					unsigned int offset)
 {
 	const char *strtab;
 	elfOff stroffset;
 	if (SHT_STRTAB != eshstr->type) {
 		return NULL;
 	}
 	if (offset >= eshstr->size) {
 		return NULL;
 	}
 	strtab = (const char *)e.elf_file;
 	stroffset = eshstr->offset + offset;
 	if (stroffset < eshstr->offset) { // check that stroffset doesn't wrap
 		return NULL;
 	}
 	strtab = &strtab[stroffset];
 	return strtab;
 }
 const char *elf_section_name(const elf_parser_ctx e,
 			     const elfSectionHeader *esh)
 {
 	const char *name;
 	const elfSectionHeader *eshstr;
 	elfWord shstrndx;
 	/* get section header string table */
 	shstrndx = elf_shstrndx(e);
 	if (shstrndx == UZERO) {
 		return NULL;
 	}
 	eshstr = elf_section_header(e, shstrndx);
 	if ((NULL == esh) || (NULL == eshstr)) {
 		return NULL;
 	}
 	name = elf_string_at_offset(e, eshstr, esh->name);
 	return name;
 }
 const elfSectionHeader *elf_named_section_header(const elf_parser_ctx e,
 						 const char *name)
 {
 	const elfSectionHeader *esh;
 	unsigned int i;
 	if (NULL == name) {
 		return NULL;
 	}
 	esh = elf_section_table(e);
 	if (NULL == esh) {
 		return NULL;
 	}
 	/* iterate through sections till find matching name */
 	for (i = UZERO; i < elf_shnum(e); ++i) {
 		const char *secname = elf_section_name(e, esh);
 		if (NULL != secname) {
 			size_t seclen = strlen(secname);
 			// use strncmp to avoid problem with input not being null-terminated,
 			// but then need to check for false partial match
 			if ((ZERO == strncmp(secname, name, seclen)) &&
 			    (UZERO == (uint8_t)name[seclen])) {
 				return esh;
 			}
 		}
 		++esh;
 	}
 	return NULL;
 }
 /**
 * @brief Get section header
 * Get elf_section_table pointer and check it and
 * iterate through sections till find matching type
 *
 * @param[in] e		elf context
 * @param[in] type	type in word size
 *
 * @return 		elf section header with given "type"
 */
 static const elfSectionHeader *elf_typed_section_header(const elf_parser_ctx e,
 							elfWord type)
 {
 	unsigned int i;
 	const elfSectionHeader *esh = elf_section_table(e);
 	if (NULL == esh) {
 		return NULL;
 	}
 	/* iterate through sections till find matching type */
 	for (i = UZERO; i < elf_shnum(e); ++i) {
 		if (esh->type == type) {
 			return esh;
 		}
 		++esh;
 	}
 	return NULL;
 }
 const elfByte *elf_section_contents(const elf_parser_ctx e,
 				    const elfSectionHeader *esh)
 {
 	const elfByte *p;
 	if ((NULL == e.elf_file) || (NULL == esh)) {
 		return NULL;
 	}
 	p = (const elfByte *)e.elf_file;
 	if ((esh->offset > e.size) ||
 	    ((uint64_t)((uint64_t)esh->offset + (uint64_t)esh->size) >
 	     e.size)) {
 		return NULL;
 	}
 	return &p[esh->offset];
 }
 const elfSymbol *elf_symbol(const elf_parser_ctx e, unsigned int index)
 {
 	const elfSectionHeader *esh;
 	const elfSymbol *esymtab;
 	const uint8_t *p = e.elf_file;
 	uint8_t align = 0;
 	/* get symbol table */
 	esh = elf_typed_section_header(e, SHT_SYMTAB);
 	if ((NULL == esh) || (UZERO == esh->entsize)) {
 		return NULL;
 	}
 	if (index >= (esh->size / esh->entsize)) {
 		return NULL;
 	}
 	if (esh->addralign <= (uint8_t)UINT8_MAX) {
 		align = (uint8_t)esh->addralign;
 	} else {
 		return NULL;
 	}
 	if ((uint64_t)((uint64_t)esh->size + (uint64_t)esh->offset) > e.size) {
 		return NULL;
 	}
 	p = &p[esh->offset];
 	esymtab = (const elfSymbol *)uint_8_to_void(p);
 	if ((align != 0U) && ((((uintptr_t)(esymtab) % align) != UZERO))) {
 		return NULL;
 	}
 	return &esymtab[index];
 }
 const char *elf_symbol_name(const elf_parser_ctx e, const elfSectionHeader *esh,
 			    unsigned int index)
 {
 	const elfSectionHeader *eshstr;
 	const elfSymbol *esymtab;
 	const elfSymbol *esym;
 	const char *name;
 	const char *p;
 	uint8_t align = 0;
 	if ((NULL == esh) || (UZERO == esh->entsize)) {
 		return NULL;
 	}
 	if (SHT_SYMTAB != esh->type) {
 		return NULL;
 	}
 	if (index >= (esh->size / esh->entsize)) {
 		return NULL;
 	}
 	/* get string table */
 	eshstr = elf_section_header(e, esh->link);
 	if (NULL == eshstr) {
 		return NULL;
 	}
 	p = (const char *)e.elf_file;
 	if (esh->addralign <= (uint8_t)UINT8_MAX) {
 		align = (uint8_t)esh->addralign;
 	} else {
 		return NULL;
 	}
 	if (esh->offset > e.size) {
 		return NULL;
 	}
 	p = &p[esh->offset];
 	esymtab = (const elfSymbol *)(const void *)(p);
 	if ((align != 0U) && ((((uintptr_t)(esymtab) % align) != UZERO))) {
 		return NULL;
 	}
 	esym = &esymtab[index];
 	name = elf_string_at_offset(e, eshstr, esym->name);
 	return name;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_elf_parser.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_elf_parser.h
@@ -0,0 +1,363 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_SILICON_ELF_PARSER_H
 #define PVA_KMD_SILICON_ELF_PARSER_H
 #include "pva_api.h"
 #define ZERO 0
 #define UZERO 0U
 #define ULLZERO 0ULL
 /*
 * Define mapping from VPU data, rodata and program sections into
 * corresponding segment types.
 */
 typedef const void *elf_ct; /* points to const image of elf file */
 /**
 * Struct containing the ELF Buffer and size of the buffer.
 */
 typedef struct {
 	/** Pointer to buffer containing ELF File */
 	elf_ct elf_file;
 	/** Size of the buffer containing ELF File */
 	uint64_t size;
 } elf_parser_ctx;
 /*--------------------------------- Types ----------------------------------*/
 /** unsinged 8-bit data type */
 typedef uint8_t elfByte;
 /** unsinged 16-bit data type */
 typedef uint16_t elfHalf;
 /** unsinged 32-bit data type */
 typedef uint32_t elfWord;
 /** unsinged 32-bit data type */
 typedef uint32_t elfAddr;
 /** unsinged 32-bit data type */
 typedef uint32_t elfOff;
 /**
 * @brief ELF File Header
 *
 */
 typedef struct {
 	/** ELF magic number : 0x7f,0x45,0x4c,0x46 */
 	elfWord magic;
 	/** Object file class */
 	elfByte oclass;
 	/** Data encoding */
 	elfByte data;
 	/** Object format version */
 	elfByte formatVersion;
 	/** OS application binary interface */
 	elfByte abi;
 	/** Version of abi */
 	elfByte abiVersion;
 	/** Elf ident padding */
 	elfByte padd[7];
 	/** Object file type */
 	elfHalf type;
 	/** Architecture */
 	elfHalf machine;
 	/** Object file version */
 	elfWord version;
 	/** Entry point virtual address */
 	elfAddr entry;
 	/** Program header table file offset */
 	elfOff phoff;
 	/** Section header table file offset */
 	elfOff shoff;
 	/** Processor-specific flags */
 	elfWord flags;
 	/** ELF header size in bytes */
 	elfHalf ehsize;
 	/** Program header table entry size */
 	elfHalf phentsize;
 	/** Program header table entry count */
 	elfHalf phnum;
 	/** Section header table entry size */
 	elfHalf shentsize;
 	/** Section header table entry count */
 	elfHalf shnum;
 	/** Section header string table index */
 	elfHalf shstrndx;
 } elfFileHeader;
 /** ELF magic number in big endian */
 #define ELFMAGIC 0x7f454c46U
 #define ELFMAGIC_LSB 0x464c457fU // ELF magic number in little endian
 #define ELFCLASS32 1U // 32 bit object file
 #define EV_NONE 0 // Invalid version
 #define EV_CURRENT 1 // Current version
 /**
 * @brief ELF Section Header
 *
 */
 typedef struct {
 	/** Section name, string table index */
 	elfWord name;
 	/** Type of section */
 	elfWord type;
 	/** Miscellaneous section attributes */
 	elfWord flags;
 	/** Section virtual addr at execution */
 	elfAddr addr;
 	/** Section file offset */
 	elfOff offset;
 	/** Size of section in bytes */
 	elfWord size;
 	/** Index of another section */
 	elfWord link;
 	/** Additional section information */
 	elfWord info;
 	/** Section alignment */
 	elfWord addralign;
 	/** Entry size if section holds table */
 	elfWord entsize;
 } elfSectionHeader;
 /*
 * Section Header Type
 */
 #define SHT_NULL 0x00U /// NULL section (entry unused)
 #define SHT_PROGBITS 0x01U /// Loadable program data
 #define SHT_SYMTAB 0x02U /// Symbol table
 #define SHT_STRTAB 0x03U /// String table
 #define SHT_RELA 0x04U /// Relocation table with addents
 #define SHT_HASH 0x05U /// Hash table
 #define SHT_DYNAMIC 0x06U /// Information for dynamic linking
 #define SHT_NOTE 0x07U /// Information that marks file
 #define SHT_NOBITS 0x08U /// Section does not have data in file
 #define SHT_REL 0x09U /// Relocation table without addents
 #define SHT_SHLIB 0x0aU /// Reserved
 #define SHT_DYNSYM 0x0bU /// Dynamic linker symbol table
 #define SHT_INIT_ARRAY 0x0eU /// Array of pointers to init funcs
 #define SHT_FINI_ARRAY 0x0fU /// Array of function to finish funcs
 #define SHT_PREINIT_ARRAY 0x10U /// Array of pointers to pre-init functions
 #define SHT_GROUP 0x11U /// Section group
 #define SHT_SYMTAB_SHNDX 0x12U /// Table of 32bit symtab shndx
 #define SHT_LOOS 0x60000000U /// Start OS-specific.
 #define SHT_HIOS 0x6fffffffU /// End OS-specific type
 #define SHT_LOPROC 0x70000000U /// Start of processor-specific
 #define SHT_HIPROC 0x7fffffffU /// End of processor-specific
 #define SHT_LOUSER 0x80000000U /// Start of application-specific
 #define SHT_HIUSER 0x8fffffffU /// End of application-specific
 /*
 * Special section index
 */
 #define SHN_UNDEF 0U // Undefined section
 #define SHN_LORESERVE 0xff00U // lower bound of reserved indexes
 #define SHN_ABS 0xfff1U // Associated symbol is absolute
 #define SHN_COMMON 0xfff2U // Associated symbol is common
 #define SHN_XINDEX 0xffffU // Index is in symtab_shndx
 /*
 * Special section names
 */
 #define SHNAME_SHSTRTAB ".shstrtab" /// section string table
 #define SHNAME_STRTAB ".strtab" /// string table
 #define SHNAME_SYMTAB ".symtab" /// symbol table
 #define SHNAME_SYMTAB_SHNDX ".symtab_shndx" /// symbol table shndx array
 #define SHNAME_TEXT ".text." /// suffix with entry name
 /**
 * @brief Symbol's information
 *
 */
 typedef struct {
 	/** Symbol name, index in string tbl */
 	elfWord name;
 	/** Value of the symbol */
 	elfAddr value;
 	/** Associated symbol size */
 	elfWord size;
 	/** Type and binding attributes */
 	elfByte info;
 	/** Extra flags */
 	elfByte other;
 	/** Associated section index */
 	elfHalf shndx;
 } elfSymbol;
 /** Get the \a binding info of the symbol */
 #define ELF_ST_BIND(s) ((elfWord)((s)->info) >> 4)
 /** Get the \a type info of the symbol */
 #define ELF_ST_TYPE(s) ((elfWord)((s)->info) & 0xfU)
 /*
 * ELF symbol type
 */
 #define STT_NOTYPE 0U // No type known
 #define STT_OBJECT 1U // Data symbol
 #define STT_FUNC 2U // Code symbol
 #define STT_SECTION 3U // Section
 #define STT_FILE 4U // File
 #define STT_COMMON 5U // Common symbol
 #define STT_LOOS 10U // Start of OS-specific
 /*
 * ELF symbol scope (binding)
 */
 #define STB_LOCAL 0U /// Symbol not visible outside object
 #define STB_GLOBAL 1U /// Symbol visible outside object
 #define STB_WEAK 2U /// Weak symbol
 /*
 * The following routines that return file/program/section headers
 * all return NULL when not found.
 */
 /*
 *  Typical elf readers create a table of information that is passed
 *  to the different routines.  For simplicity, we're going to just
 *  keep the image of the whole file and pass that around.  Later, if we see
 *  a need to speed this up, we could consider changing elf_parser_ctx to be something
 *  more complicated.
 */
 /**
 * @brief Checks if the file stored in \a e is a 32-bit elf file
 * and if the first 4 bytes contain elf magic ID.
 *
 * @param[in] e		elf context containing complete ELF in a const buffer
 *
 * @return
 *     - TRUE if valid 32-bit elf file and correct elf magic ID present
 *       in first 4 bytes of elf file
 *     - FALSE if either of the above condition is not met
 */
 bool elf_header_check(const elf_ct e);
 /**
 * @brief Provide number of sections in sections header table
 * get elf_file_header and check it's not null,
 * get value from size of first (empty) section
 * if esh is NULL, section_size will return zero
 *
 * @param[in] e		elf context containing complete ELF in a const buffer
 *
 * @return 		section header number
 */
 elfWord elf_shnum(const elf_parser_ctx e);
 /**
 * @brief This function checks all sections in the elf to be valid
 *
 * The function validates all sections as follows:
 * - Valid section offset i.e. within file bounds.
 * - Valid section size i.e. non-zero section size
 *   and offset + section size is within file bounds
 *
 * @param[in]e		elf context containing completeELF in a const buffer  
 *
 * @return
 * 	- TRUE if all sections are valid
 * 	- FALSE if any invalid section found
 */
 bool elf_has_valid_sections(const elf_parser_ctx e);
 /**
 * @brief This function traverses the elf and
 * returns a valid \ref elfSectionHeader if present
 * at the index provided
 *
 * @param[in] e		elf context containing complete ELF in a const buffer
 * @param[in] index	The index of the elfSectionHeader that is requested
 * 			Valid range : 0 to elf_shnum(e)
 *
 * @return
 *     - valid elfSectionHeader from elf if index is valid and if sectionHeader is present
 *     - NULL if invalid or out of bounds index
 */
 const elfSectionHeader *elf_section_header(const elf_parser_ctx e,
 					   unsigned int index);
 /**
 * @brief This function obtains the name of the \ref elfSectionHeader
 * by going to the index specified by elfSectionHeader->name in the string table
 * of the elf
 *
 * @param[in] e			elf context
 *
 * @param[in] esh		Valid \ref elfSectionHeader whose name is requested
 *
 * @return
 *     - Non NULL character array containing name of the elfSectionHeader
 *       if found in elf String Table
 *     - NULL if invalid elfSectionHeader or invalid index in elfSectionHeader->name
 *       going out of bounds of string table of elf
 */
 const char *elf_section_name(const elf_parser_ctx e,
 			     const elfSectionHeader *esh);
 /**
 * @brief Provide elf section header with given "name".
 * check elf context not a null, get elf_section_table and
 *  then iterate through sections till find matching name
 *
 * @param[in] e		elf context
 * @param[in] name	name of section
 *
 * @return
 *     - elf section header with given "name"
 *     - NULL if @a name is NULL or invalid elfSectionHeader is found
 */
 const elfSectionHeader *elf_named_section_header(const elf_parser_ctx e,
 						 const char *name);
 /**
 * @brief Provide contents of section.
 * check elf context and section header not a null,
 *  return byte pointer of section header offset of elf context
 * @param[in] e		elf context
 * @param[in] esh	section header
 *
 i* @return 		Bytepointer of elf (NULL if e or esh == NULL)
 */
 const elfByte *elf_section_contents(const elf_parser_ctx e,
 				    const elfSectionHeader *esh);
 /**
 * @brief Get ELF symbol
 * get elf_typed_section_header section header,
 * check header or it's entsize not null.
 * check index is not crossing section header & table size
 * Also make sure it is address aligned and get symbol table.
 *
 * @param[in] e		elf context
 * @param[in] index 	unsigned index
 * 			Valid range: 0 to number of entries in SHT_SYMTAB of e
 *
 * @return 		elf symbol at given index (NULL if not found).
 */
 const elfSymbol *elf_symbol(const elf_parser_ctx e, unsigned int index);
 /**
 * @brief Get symbol table section
 * check section header or it's entsize not null.
 * check index is not crossing section header & table size
 * get elf_section_header and Also make sure it is address
 * aligned and get symbol table.
 *
 * @param[in] e		elf context
 * @param[in] esh 	pointer to structure elfSectionHeader
 * @param[in] index 	unsigned index
 * 			Valid range: 0 to number of entries in SHT_SYMTAB of e
 *
 * @return 		name of symbol from symtab section "esh" at "index".
 */
 const char *elf_symbol_name(const elf_parser_ctx e, const elfSectionHeader *esh,
 			    unsigned int index);
 #endif // PVA_KMD_SILICON_ELF_PARSER_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c
@@ -0,0 +1,920 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_executable.h"
 #include "pva_kmd_silicon_elf_parser.h"
 #include "pva_kmd_utils.h"
 #include "pva_resource.h"
 #include "pva_kmd_device.h"
 #include "pva_api_types.h"
 #include "pva_kmd_t23x.h"
 #include "pva_kmd_t26x.h"
 #include "pva_math_utils.h"
 /**
 *  enum to identify different segments of VPU ELF
 */
 enum pva_elf_seg_type {
 	/** Code segment in VPU ELF */
 	PVA_SEG_VPU_CODE = 0U,
 	/** DATA segment in VPU ELF */
 	PVA_SEG_VPU_DATA,
 	/** DATA segment in VPU ELF containing symbol information*/
 	PVA_SEG_VPU_IN_PARAMS,
 	/** Not a valid segment in VPU ELF */
 	PVA_SEG_VPU_MAX_TYPE
 };
 /** Maximum number of characters in symbol name */
 #define ELF_MAXIMUM_SYMBOL_LENGTH 64U
 /** Maximum number of characters in section name */
 #define ELF_MAXIMUM_SECTION_NAME 64
 /** Section name of EXPORTS section */
 #define ELF_EXPORTS_SECTION "EXPORTS"
 /** Section name of EXPORTS section name length */
 #define ELF_EXPORTS_SECTION_NAME_LENGTH 7
 /** Alignment needed for Data section of ELFs */
 #define DATA_SECTION_ALIGNMENT 32U
 /** Alignment needed for Text section of ELFs */
 #define TEXT_SECTION_ALIGNMENT 128U
 /** VPU icache size: 16KB */
 #define VPU_ICACHE_SIZE (16U * 1024U)
 /** This value indicates the that current symbol can be ignored in the VPU ELF */
 #define SYM_IGNORE 1
 #define SIZE_EXPORTS_TABLE_ENTRY (3U * sizeof(uint32_t))
 static uint32_t change_byte_order(uint32_t word)
 {
 	uint32_t out_word = 0U;
 	out_word = PVA_INSERT(PVA_EXTRACT(word, 31, 24, uint32_t), 7, 0);
 	out_word |= PVA_INSERT(PVA_EXTRACT(word, 23, 16, uint32_t), 15, 8);
 	out_word |= PVA_INSERT(PVA_EXTRACT(word, 15, 8, uint32_t), 23, 16);
 	out_word |= PVA_INSERT(PVA_EXTRACT(word, 7, 0, uint32_t), 31, 24);
 	return out_word;
 }
 /*
 * Define mapping from VPU data, rodata and program sections into
 * corresponding segment types.
 */
 static const struct pack_rule {
 	const char *elf_sec_name;
 	int32_t pva_type;
 } pack_rules[] = { {
 			   .elf_sec_name = ".data",
 			   .pva_type = (int32_t)PVA_SEG_VPU_DATA,
 		   },
 		   {
 			   .elf_sec_name = ".rodata",
 			   .pva_type = (int32_t)PVA_SEG_VPU_DATA,
 		   },
 		   {
 			   .elf_sec_name = ".text",
 			   .pva_type = (int32_t)PVA_SEG_VPU_CODE,
 		   } };
 /**
 * \brief Compares the \a section_name with all
 * vpu elf section names until it finds a match and
 * then return corresponding segment type.
 * If the segment type is \ref PVA_SEG_VPU_DATA, then it further
 * checks if its PVA_SEG_VPU_IN_PARAMS.
 * \param[in] section_name Name of the section to be searched for, in VPU ELF
 * \return returns corresponding value from enum pva_elf_seg_type.
 */
 static int32_t find_pva_ucode_segment_type(const char *section_name)
 {
 	uint32_t i;
 	int32_t ret = (int32_t)PVA_SEG_VPU_MAX_TYPE;
 	for (i = 0; i < PVA_ARRAY_SIZE(pack_rules); i += 1U) {
 		/* Ignore the suffix of the section name */
 		if (strncmp(section_name, pack_rules[i].elf_sec_name,
 			    strlen(pack_rules[i].elf_sec_name)) == 0) {
 			ret = pack_rules[i].pva_type;
 			break;
 		}
 	}
 	if (ret == (int32_t)PVA_SEG_VPU_DATA) {
 		uint64_t section_name_len =
 			strnlen(section_name, ELF_MAXIMUM_SECTION_NAME);
 		uint64_t exports_section_name_len =
 			ELF_EXPORTS_SECTION_NAME_LENGTH;
 		// Check Export section present in DATA segment. Only support export sections.
 		if ((section_name_len >= exports_section_name_len) &&
 		    (strncmp((section_name +
 			      (section_name_len - exports_section_name_len)),
 			     ELF_EXPORTS_SECTION,
 			     (size_t)exports_section_name_len)) == 0) {
 			ret = (int32_t)PVA_SEG_VPU_IN_PARAMS;
 		}
 	}
 	return ret;
 }
 static enum pva_error validate_elf(const elf_parser_ctx elf)
 {
 	enum pva_error err = PVA_SUCCESS;
 	if (!elf_header_check(elf.elf_file)) {
 		pva_kmd_log_err("Invalid 32 bit VPU ELF");
 		err = PVA_INVAL;
 		goto done;
 	}
 	if (!elf_has_valid_sections(elf)) {
 		pva_kmd_log_err("ELF has invalid sections");
 		err = PVA_INVAL;
 	}
 done:
 	return err;
 }
 static int32_t validate_symbol(elf_parser_ctx elf, uint32_t symbol_entry_id,
 			       const elfSymbol **sym)
 {
 	const elfSectionHeader *sym_scn;
 	const char *section_name = NULL;
 	int32_t section_type = (int32_t)PVA_SEG_VPU_MAX_TYPE;
 	int32_t err = 0;
 	*sym = elf_symbol(elf, symbol_entry_id);
 	if ((*sym == NULL) || ((*sym)->size == 0U) ||
 	    (ELF_ST_BIND(*sym) != STB_GLOBAL) ||
 	    (ELF_ST_TYPE(*sym) == STT_FUNC)) {
 		err = SYM_IGNORE;
 		goto end;
 	}
 	sym_scn = elf_section_header(elf, (*sym)->shndx);
 	section_name = elf_section_name(elf, sym_scn);
 	if (section_name == NULL) {
 		err = SYM_IGNORE;
 		goto end;
 	}
 	section_type = find_pva_ucode_segment_type(section_name);
 	if (section_type != (int32_t)PVA_SEG_VPU_IN_PARAMS) {
 		err = SYM_IGNORE;
 		goto end;
 	}
 	err = 0;
 end:
 	if (err != 0) {
 		*sym = NULL;
 	}
 	return err;
 }
 static enum pva_error count_symbols(const elf_parser_ctx elf,
 				    uint32_t *out_num_symbols)
 {
 	enum pva_error err = PVA_SUCCESS;
 	pva_math_error math_err = MATH_OP_SUCCESS;
 	const elfSectionHeader *section_header;
 	uint32_t i, ent_count;
 	const elfSymbol *sym;
 	int32_t ret;
 	uint32_t num_symbols = 0;
 	section_header = elf_named_section_header(elf, ".symtab");
 	if (section_header == NULL) {
 		err = PVA_INVAL;
 		pva_kmd_log_err("No symbol table found");
 		goto done;
 	}
 	ent_count = section_header->size / section_header->entsize;
 	for (i = 0; i < ent_count; i++) {
 		ret = validate_symbol(elf, i, &sym);
 		if (ret < 0) {
 			err = PVA_INVAL;
 			pva_kmd_log_err("Validation of symbol failed");
 			goto done;
 		}
 		if (ret == SYM_IGNORE) {
 			continue;
 		}
 		num_symbols = addu32(num_symbols, 1U, &math_err);
 	}
 	if (math_err != MATH_OP_SUCCESS) {
 		err = PVA_ERR_MATH_OP;
 		pva_kmd_log_err("count_symbols math error");
 		goto done;
 	}
 	*out_num_symbols = num_symbols;
 done:
 	return err;
 }
 /**
 * @brief updates symbol information (type, addr and size) from
 * VPU ELF PVA_SEG_VPU_IN_PARAMS segment.
 *
 * Data about symbol information in EXPORTS section of ELF is present as follows.
 * typedef struct {
 *   uint32_t type; From VMEM_TYPE enums
 *   uint32_t addr_offset; Offset from VMEM base
 *   uint32_t size; Size of VMEM region in bytes
 * };
 * @param[in] elf pointer to const image of elf file.
 * @param[in] section_header pointer to VPU ELF PVA_SEG_VPU_IN_PARAMS section header
 * @param[in, out] symbol_info pointer to ELF image symbol which needs to be updated.
 */
 static enum pva_error
 update_exports_symbol(elf_parser_ctx elf,
 		      const elfSectionHeader *section_header,
 		      struct pva_symbol_info *symbol_info)
 {
 	const elfByte *data;
 	uint32_t symOffset = 0U;
 	enum pva_error err = PVA_SUCCESS;
 	pva_math_error math_err = MATH_OP_SUCCESS;
 	if ((section_header == NULL) ||
 	    (symbol_info->vmem_addr < section_header->addr) ||
 	    (addu32(symbol_info->vmem_addr, (uint32_t)SIZE_EXPORTS_TABLE_ENTRY,
 		    &math_err) >
 	     addu32(section_header->addr, section_header->size, &math_err))) {
 		err = PVA_INVAL;
 		goto done;
 	} else {
 		symOffset = subu32(symbol_info->vmem_addr, section_header->addr,
 				   &math_err);
 	}
 	data = elf_section_contents(elf, section_header);
 	if (data == NULL) {
 		pva_kmd_log_err("Export section in ELF is NULL");
 		err = PVA_INVAL;
 		goto done;
 	}
 	symbol_info->symbol_type = *(uint8_t *)((uintptr_t)&data[symOffset]);
 	if ((symbol_info->symbol_type == (uint8_t)PVA_SYM_TYPE_INVALID) ||
 	    (symbol_info->symbol_type >= (uint8_t)PVA_SYM_TYPE_MAX)) {
 		pva_kmd_log_err("Invalid symbol type found");
 		err = PVA_INVAL;
 		goto done;
 	}
 	symbol_info->vmem_addr =
 		*(uint32_t *)((uintptr_t)&data[symOffset + sizeof(uint32_t)]);
 	symbol_info->size = *(uint32_t *)((
 		uintptr_t)&data[symOffset + (2UL * sizeof(uint32_t))]);
 	if (math_err != MATH_OP_SUCCESS) {
 		pva_kmd_log_err("update_exports_symbol math error");
 		err = PVA_ERR_MATH_OP;
 		goto done;
 	}
 done:
 	return err;
 }
 static bool validate_vmem_offset(const uint32_t vmem_offset,
 				 const uint32_t size,
 				 const uint8_t vmem_region_count,
 				 const struct vmem_region *vmem_regions_tab)
 {
 	bool valid = false;
 	uint32_t i = 0U;
 	uint32_t prev_idx;
 	pva_math_error math_err = MATH_OP_SUCCESS;
 	for (i = vmem_region_count; i > 0U; i--) {
 		prev_idx = subu32(i, 1U, &math_err);
 		if (vmem_offset >= vmem_regions_tab[prev_idx].start) {
 			break;
 		}
 	}
 	if ((i > 0U) && (addu32(vmem_offset, size, &math_err) <=
 			 vmem_regions_tab[prev_idx].end)) {
 		valid = true;
 	}
 	return (math_err != MATH_OP_SUCCESS) ? false : valid;
 }
 static enum pva_error copy_symbol(elf_parser_ctx elf, const elfSymbol *sym,
 				  const char *symname,
 				  struct pva_symbol_info *symbol_info,
 				  const uint8_t vmem_region_count,
 				  const struct vmem_region *vmem_regions_tab)
 {
 	const elfSectionHeader *sym_scn;
 	enum pva_error err = PVA_SUCCESS;
 	size_t symname_len = strnlen(symname, PVA_MAX_SYMBOL_NAME_LEN);
 	if (symname_len > 0U) {
 		(void)memcpy(symbol_info->name, symname, symname_len);
 	}
 	symbol_info->name[PVA_MAX_SYMBOL_NAME_LEN] = '\0';
 	symbol_info->size = sym->size;
 	symbol_info->vmem_addr = sym->value;
 	sym_scn = elf_section_header(elf, sym->shndx);
 	err = update_exports_symbol(elf, sym_scn, symbol_info);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err("Updating symbol from EXPORTS table failed");
 		goto out;
 	}
 	if (!validate_vmem_offset(symbol_info->vmem_addr, symbol_info->size,
 				  vmem_region_count, vmem_regions_tab)) {
 		pva_kmd_log_err("Invalid symbol vmem offset in ELF");
 		err = PVA_INVAL;
 		goto out;
 	}
 out:
 	return err;
 }
 static enum pva_error
 fill_symbol_table(const elf_parser_ctx elf,
 		  struct pva_kmd_exec_symbol_table *sym_table,
 		  const uint8_t vmem_region_count,
 		  const struct vmem_region *vmem_regions_tab)
 {
 	enum pva_error err = PVA_SUCCESS;
 	pva_math_error math_err = MATH_OP_SUCCESS;
 	const elfSectionHeader *section_header;
 	uint32_t i, ent_count;
 	const elfSymbol *sym;
 	const char *symname;
 	int32_t ret;
 	uint32_t export_sym_idx = 0;
 	section_header = elf_named_section_header(elf, ".symtab");
 	if (section_header == NULL) {
 		err = PVA_INVAL;
 		pva_kmd_log_err("No symbol table found");
 		goto done;
 	}
 	ent_count = section_header->size / section_header->entsize;
 	for (i = 0; i < ent_count; i++) {
 		struct pva_symbol_info *symbol_info;
 		ret = validate_symbol(elf, i, &sym);
 		if (ret < 0) {
 			err = PVA_INVAL;
 			pva_kmd_log_err("Validation of symbol failed");
 			goto done;
 		}
 		if (ret == SYM_IGNORE) {
 			continue;
 		}
 		symbol_info = &sym_table->symbols[export_sym_idx];
 		ASSERT(symbol_info != NULL);
 		symname = elf_symbol_name(elf, section_header, i);
 		if (symname == NULL) {
 			err = PVA_INVAL;
 			pva_kmd_log_err("elf_symbol_name failed");
 			goto done;
 		}
 		err = copy_symbol(elf, sym, symname, symbol_info,
 				  vmem_region_count, vmem_regions_tab);
 		if (err != PVA_SUCCESS) {
 			goto done;
 		}
 		symbol_info->symbol_id =
 			addu32(export_sym_idx, PVA_SYMBOL_ID_BASE, &math_err);
 		export_sym_idx = addu32(export_sym_idx, 1U, &math_err);
 		if (math_err != MATH_OP_SUCCESS) {
 			err = PVA_ERR_MATH_OP;
 			pva_kmd_log_err("fill_symbol_table math error");
 			goto done;
 		}
 	}
 done:
 	return err;
 }
 /**
 * The simplify caller's life: the input ptr should always be considered freed
 * after this call. The returned new ptr should always be considered a new
 * allocation and it needs to be freed if not NULL.
 */
 static void *pva_realloc(void *ptr, uint32_t old_size, uint32_t new_size)
 {
 	void *new_buffer;
 	if (ptr == NULL) {
 		return pva_kmd_zalloc(new_size);
 	}
 	if (new_size <= old_size) {
 		return ptr;
 	}
 	new_buffer = pva_kmd_zalloc(new_size);
 	if (new_buffer == NULL) {
 		goto out;
 	}
 	memcpy(new_buffer, ptr, old_size);
 out:
 	pva_kmd_free(ptr);
 	return new_buffer;
 }
 static void *copy_text_section(const elf_parser_ctx elf,
 			       const elfSectionHeader *section_header,
 			       void *out_buffer, uint32_t *buffer_size)
 {
 	const elfByte *elf_data;
 	uint32_t const *word;
 	uint32_t *dst_word;
 	uint32_t wi;
 	/* The load address in section header is in words (uint32_t) */
 	uint32_t load_addr_bytes =
 		safe_mulu32(section_header->addr, (uint32_t)sizeof(uint32_t));
 	uint32_t needed_size =
 		safe_addu32(load_addr_bytes, section_header->size);
 	// Align required text section size
 	needed_size =
 		safe_pow2_roundup_u32(needed_size, TEXT_SECTION_ALIGNMENT);
 	if (needed_size > *buffer_size) {
 		out_buffer = pva_realloc(out_buffer, *buffer_size, needed_size);
 		*buffer_size = needed_size;
 	}
 	if (out_buffer == NULL) {
 		return NULL;
 	}
 	elf_data = elf_section_contents(elf, section_header);
 	if (elf_data == NULL) {
 		pva_kmd_log_err("copy_text_section elf_data error");
 		return NULL;
 	}
 	word = (uint32_t const *)elf_data;
 	dst_word = (uint32_t *)((uintptr_t)out_buffer + load_addr_bytes);
 	for (wi = 0; wi < (section_header->size / sizeof(uint32_t)); wi++) {
 		dst_word[wi] = change_byte_order(word[wi]);
 	}
 	return out_buffer;
 }
 /**
 * @brief Aggregate all text sections into a single, dynamically
 * allocated buffer.
 *
 * The placement of text sections needs to take into account of the loading
 * addresses.
 *
 * The endianness of text section needs to be changed.
 *
 * Caller is responsible for freeing the returned buffer.
 */
 static void *aggregate_text_sections(const elf_parser_ctx elf,
 				     uint32_t *out_size)
 {
 	const elfSectionHeader *section_header;
 	uint32_t index = 0;
 	const char *section_name;
 	const elfWord sectionCount = elf_shnum(elf);
 	void *sections_content = NULL;
 	uint32_t sections_size = 0;
 	for (index = 0; index < sectionCount; index++) {
 		int32_t segment_type;
 		section_header = elf_section_header(elf, index);
 		if (section_header == NULL) {
 			pva_kmd_log_err(
 				"aggregate_text_sections elf_section_header error");
 			goto out;
 		}
 		section_name = elf_section_name(elf, section_header);
 		if (section_name == NULL) {
 			pva_kmd_log_err(
 				"aggregate_text_sections elf_section_name error");
 			goto out;
 		}
 		segment_type = find_pva_ucode_segment_type(section_name);
 		if ((section_header->type == SHT_PROGBITS) &&
 		    (segment_type == (int32_t)PVA_SEG_VPU_CODE)) {
 			sections_content =
 				copy_text_section(elf, section_header,
 						  sections_content,
 						  &sections_size);
 			if (sections_content == NULL) {
 				pva_kmd_log_err(
 					"aggregate_text_sections copy_text_section error");
 				goto out;
 			}
 		}
 	}
 out:
 	*out_size = sections_size;
 	return sections_content;
 }
 static void copy_data_section(const elf_parser_ctx elf,
 			      const elfSectionHeader *section_header,
 			      void *out_buffer, uint32_t *buffer_offset,
 			      uint32_t buffer_size)
 {
 	const elfByte *elf_data;
 	void *dst;
 	uint32_t aligned_size = safe_pow2_roundup_u32(section_header->size,
 						      DATA_SECTION_ALIGNMENT);
 	uint32_t size = safe_addu32(*buffer_offset, aligned_size);
 	ASSERT(size <= buffer_size);
 	dst = pva_offset_pointer(out_buffer, *buffer_offset);
 	elf_data = elf_section_contents(elf, section_header);
 	ASSERT(elf_data != NULL);
 	memcpy(dst, elf_data, section_header->size);
 	*buffer_offset = safe_addu32(*buffer_offset, aligned_size);
 }
 static enum pva_error count_data_sections(const elf_parser_ctx elf,
 					  uint32_t *out_n_data_sections,
 					  uint32_t *out_total_size)
 {
 	const elfSectionHeader *section_header;
 	uint32_t index = 0;
 	const char *section_name;
 	const elfWord sectionCount = elf_shnum(elf);
 	uint32_t n_data_sections = 0;
 	uint32_t total_size = 0;
 	enum pva_error err = PVA_SUCCESS;
 	pva_math_error math_err = MATH_OP_SUCCESS;
 	for (index = 0; index < sectionCount; index++) {
 		int32_t segment_type;
 		section_header = elf_section_header(elf, index);
 		if (section_header == NULL) {
 			err = PVA_INVAL;
 			goto out;
 		}
 		section_name = elf_section_name(elf, section_header);
 		if (section_name == NULL) {
 			err = PVA_INVAL;
 			goto out;
 		}
 		segment_type = find_pva_ucode_segment_type(section_name);
 		if ((section_header->type == SHT_PROGBITS) &&
 		    (segment_type == (int32_t)PVA_SEG_VPU_DATA)) {
 			n_data_sections =
 				addu32(n_data_sections, 1U, &math_err);
 			total_size += safe_pow2_roundup_u32(
 				section_header->size, DATA_SECTION_ALIGNMENT);
 		}
 	}
 	if (math_err != MATH_OP_SUCCESS) {
 		err = PVA_ERR_MATH_OP;
 		pva_kmd_log_err("count_data_sections math error");
 		goto out;
 	}
 	*out_n_data_sections = n_data_sections;
 	*out_total_size = total_size;
 out:
 	return err;
 }
 /**
 * @brief Aggregate all data sections into a single, dynamically
 * allocated buffer.
 *
 * The offset of each data section must be aligned to DATA_SEGMENT_ALIGNMENT.
 *
 * The caller must free the returned data buffer and out_section_infos.
 *
 */
 static void *
 aggregate_data_sections(const elf_parser_ctx elf, uint32_t n_data_sections,
 			uint32_t total_sections_size,
 			struct pva_fw_data_section_info **out_section_infos)
 {
 	const elfSectionHeader *section_header;
 	uint32_t index = 0;
 	const char *section_name;
 	const elfWord sectionCount = elf_shnum(elf);
 	void *sections_content = NULL;
 	struct pva_fw_data_section_info *section_infos;
 	uint32_t buffer_offset = 0;
 	uint32_t sec_idx = 0;
 	sections_content = pva_kmd_zalloc(total_sections_size);
 	if (sections_content == NULL) {
 		goto err_out;
 	}
 	section_infos =
 		pva_kmd_zalloc(sizeof(*section_infos) * n_data_sections);
 	if (section_infos == NULL) {
 		goto free_content;
 	}
 	for (index = 0; index < sectionCount; index++) {
 		int32_t segment_type;
 		section_header = elf_section_header(elf, index);
 		/* Already checked when count data sections */
 		ASSERT(section_header != NULL);
 		section_name = elf_section_name(elf, section_header);
 		ASSERT(section_name != NULL);
 		segment_type = find_pva_ucode_segment_type(section_name);
 		if ((section_header->type == SHT_PROGBITS) &&
 		    (segment_type == (int32_t)PVA_SEG_VPU_DATA)) {
 			section_infos[sec_idx].data_buf_off = buffer_offset;
 			section_infos[sec_idx].vmem_addr = section_header->addr;
 			section_infos[sec_idx].size = section_header->size;
 			sec_idx = safe_addu32(sec_idx, 1U);
 			copy_data_section(elf, section_header, sections_content,
 					  &buffer_offset, total_sections_size);
 		}
 	}
 	*out_section_infos = section_infos;
 	return sections_content;
 free_content:
 	pva_kmd_free(sections_content);
 err_out:
 	return NULL;
 }
 /**
 * @brief layout text and data sections in a single continuous buffer that is
 * mapped to PVA IOVA space (user SID).
 *
 * We need to pad text size by an entire VPU icache size to avoid SMMU fault
 * when prefetching.
 */
 static struct pva_kmd_device_memory *
 load_sections(struct pva_kmd_device *pva, uint8_t smmu_id,
 	      const void *text_section_buf, uint32_t text_size,
 	      const void *data_section_buf, uint32_t data_size,
 	      uint32_t *out_data_begin_offset)
 {
 	uint32_t size = safe_addu32(text_size, (uint32_t)VPU_ICACHE_SIZE);
 	uint32_t alloc_size = safe_addu32(size, data_size);
 	uint32_t data_begin = safe_addu32(text_size, (uint32_t)VPU_ICACHE_SIZE);
 	struct pva_kmd_device_memory *dev_mem;
 	ASSERT(TEXT_SECTION_ALIGNMENT >= DATA_SECTION_ALIGNMENT);
 	/* This is guaranteed to be true as TEXT_SECTION_ALIGNMENT is more strict */
 	ASSERT(data_begin % DATA_SECTION_ALIGNMENT == 0);
 	/* Map it as read-only. TODO: when VPU debugger is supported, we may
 	 * need to map text as READ_WRITE conditionally. */
 	dev_mem = pva_kmd_device_memory_alloc_map(alloc_size, pva,
 						  PVA_ACCESS_RO, smmu_id);
 	if (dev_mem == NULL) {
 		goto out;
 	}
 	memcpy(dev_mem->va, text_section_buf, text_size);
 	memcpy(pva_offset_pointer(dev_mem->va, data_begin), data_section_buf,
 	       data_size);
 	*out_data_begin_offset = data_begin;
 out:
 	return dev_mem;
 }
 static struct pva_kmd_device_memory *
 load_metainfo(struct pva_kmd_device *pva, uint64_t section_iova,
 	      uint32_t text_size, uint32_t data_begin_off, uint32_t data_size,
 	      struct pva_fw_data_section_info const *section_infos,
 	      uint32_t n_data_sections, struct pva_symbol_info *symbol_table,
 	      uint32_t n_symbols)
 {
 	struct pva_kmd_device_memory *dev_mem;
 	struct pva_exec_bin_resource *metainfo;
 	struct pva_fw_vmem_buffer *vmem_buffers_mem;
 	struct pva_fw_data_section_info *data_sections_mem;
 	uint32_t i;
 	uint32_t alloc_size = (uint32_t)sizeof(struct pva_exec_bin_resource);
 	pva_math_error math_err = MATH_OP_SUCCESS;
 	alloc_size =
 		addu32(alloc_size,
 		       mulu32(n_data_sections,
 			      (uint32_t)sizeof(struct pva_fw_data_section_info),
 			      &math_err),
 		       &math_err);
 	alloc_size = addu32(alloc_size,
 			    mulu32(n_symbols,
 				   (uint32_t)sizeof(struct pva_fw_vmem_buffer),
 				   &math_err),
 			    &math_err);
 	dev_mem = pva_kmd_device_memory_alloc_map(
 		alloc_size, pva, PVA_ACCESS_RO, PVA_R5_SMMU_CONTEXT_ID);
 	if (dev_mem == NULL) {
 		goto out;
 	}
 	metainfo = dev_mem->va;
 	metainfo->code_addr_hi = iova_hi(section_iova);
 	metainfo->code_addr_lo = iova_lo(section_iova);
 	metainfo->code_size = text_size;
 	metainfo->data_section_addr_hi =
 		iova_hi(addu64(section_iova, data_begin_off, &math_err));
 	metainfo->data_section_addr_lo =
 		iova_lo(addu64(section_iova, data_begin_off, &math_err));
 	metainfo->num_data_sections = n_data_sections;
 	metainfo->num_vmem_buffers = n_symbols;
 	data_sections_mem = pva_offset_pointer(metainfo, sizeof(*metainfo));
 	memcpy(data_sections_mem, section_infos,
 	       mulu32(n_data_sections, (uint32_t)sizeof(*section_infos),
 		      &math_err));
 	vmem_buffers_mem = pva_offset_pointer(
 		data_sections_mem,
 		mulu32(n_data_sections, (uint32_t)sizeof(*section_infos),
 		       &math_err));
 	if (math_err != MATH_OP_SUCCESS) {
 		dev_mem = NULL;
 		goto out;
 	}
 	for (i = 0; i < n_symbols; i++) {
 		vmem_buffers_mem[i].addr =
 			PVA_INSERT(symbol_table[i].vmem_addr,
 				   PVA_FW_VMEM_ADDR_MSB, PVA_FW_VMEM_ADDR_LSB) |
 			PVA_INSERT((uint32_t)symbol_table[i].symbol_type,
 				   PVA_FW_SYM_TYPE_MSB, PVA_FW_SYM_TYPE_LSB);
 		vmem_buffers_mem[i].size = symbol_table[i].size;
 	}
 out:
 	return dev_mem;
 }
 enum pva_error
 pva_kmd_load_executable(void *executable_data, uint32_t executable_size,
 			struct pva_kmd_device *pva, uint8_t dma_smmu_id,
 			struct pva_kmd_exec_symbol_table *out_symbol_table,
 			struct pva_kmd_device_memory **out_metainfo,
 			struct pva_kmd_device_memory **out_sections)
 {
 	enum pva_error err = PVA_SUCCESS;
 	pva_math_error math_err = MATH_OP_SUCCESS;
 	elf_parser_ctx elf = { 0 };
 	uint32_t num_symbols = 0;
 	uint32_t n_data_sections;
 	uint32_t total_data_section_size = 0;
 	struct pva_fw_data_section_info *section_infos = NULL;
 	void *data_section_buf = NULL;
 	void *text_section_buf = NULL;
 	uint32_t total_text_section_size = 0;
 	struct pva_kmd_device_memory *metainfo_mem = NULL;
 	struct pva_kmd_device_memory *sections_mem = NULL;
 	uint32_t data_begin_off;
 	elf.elf_file = executable_data;
 	elf.size = executable_size;
 	err = validate_elf(elf);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	err = count_symbols(elf, &num_symbols);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	out_symbol_table->n_symbols = num_symbols;
 	if (num_symbols > 0) {
 		out_symbol_table->symbols = pva_kmd_zalloc(
 			mulu32((uint32_t)sizeof(struct pva_symbol_info),
 			       num_symbols, &math_err));
 		if (out_symbol_table->symbols == NULL) {
 			err = PVA_NOMEM;
 			goto err_out;
 		}
 		if (math_err != MATH_OP_SUCCESS) {
 			err = PVA_ERR_MATH_OP;
 			pva_kmd_log_err("pva_kmd_load_executable math error");
 			goto err_out;
 		}
 	}
 	err = fill_symbol_table(elf, out_symbol_table,
 				pva->hw_consts.n_vmem_regions,
 				pva->vmem_regions_tab);
 	if (err != PVA_SUCCESS) {
 		goto free_syms;
 	}
 	text_section_buf =
 		aggregate_text_sections(elf, &total_text_section_size);
 	/* Must have text sections */
 	if (text_section_buf == NULL) {
 		pva_kmd_log_err(
 			"pva_kmd_load_executable aggregate_text_sections error");
 		goto free_syms;
 	}
 	err = count_data_sections(elf, &n_data_sections,
 				  &total_data_section_size);
 	if (err != PVA_SUCCESS) {
 		goto free_text_buf;
 	}
 	/* It's OK to not have data sections */
 	if (total_data_section_size != 0) {
 		data_section_buf =
 			aggregate_data_sections(elf, n_data_sections,
 						total_data_section_size,
 						&section_infos);
 		ASSERT(data_section_buf != NULL);
 	}
 	sections_mem = load_sections(pva, dma_smmu_id, text_section_buf,
 				     total_text_section_size, data_section_buf,
 				     total_data_section_size, &data_begin_off);
 	if (sections_mem == NULL) {
 		goto free_data_buf;
 	}
 	metainfo_mem =
 		load_metainfo(pva, sections_mem->iova, total_text_section_size,
 			      data_begin_off, total_data_section_size,
 			      section_infos, n_data_sections,
 			      out_symbol_table->symbols, num_symbols);
 	if (metainfo_mem == NULL) {
 		goto free_sec_mem;
 	}
 	/* Success. Now clean up temporary allocations */
 	if (data_section_buf != NULL) {
 		pva_kmd_free(data_section_buf);
 	}
 	if (section_infos != NULL) {
 		pva_kmd_free(section_infos);
 	}
 	pva_kmd_free(text_section_buf);
 	*out_metainfo = metainfo_mem;
 	*out_sections = sections_mem;
 	return PVA_SUCCESS;
 free_sec_mem:
 	pva_kmd_device_memory_free(sections_mem);
 free_data_buf:
 	if (data_section_buf != NULL) {
 		pva_kmd_free(data_section_buf);
 	}
 	if (section_infos != NULL) {
 		pva_kmd_free(section_infos);
 	}
 free_text_buf:
 	pva_kmd_free(text_section_buf);
 free_syms:
 	pva_kmd_free(out_symbol_table->symbols);
 err_out:
 	return err;
 }
 void pva_kmd_unload_executable(struct pva_kmd_exec_symbol_table *symbol_table,
 			       struct pva_kmd_device_memory *metainfo,
 			       struct pva_kmd_device_memory *sections)
 {
 	pva_kmd_device_memory_free(metainfo);
 	pva_kmd_device_memory_free(sections);
 	if (symbol_table->symbols != NULL) {
 		pva_kmd_free(symbol_table->symbols);
 		symbol_table->symbols = NULL;
 	}
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.c
@@ -0,0 +1,63 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2025, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_device.h"
 #include "pva_kmd_silicon_hwpm.h"
 #include "pva_kmd_silicon_utils.h"
 #ifndef TEGRA_SOC_HWPM_IP_REG_OP_READ
 #define TEGRA_SOC_HWPM_IP_REG_OP_READ 0x1
 #endif
 #ifndef TEGRA_SOC_HWPM_IP_REG_OP_WRITE
 #define TEGRA_SOC_HWPM_IP_REG_OP_WRITE 0x2
 #endif
 int pva_kmd_hwpm_ip_reg_op(void *ip_dev, uint32_t reg_op,
 			   uint32_t inst_element_index, uint64_t reg_offset,
 			   uint32_t *reg_data)
 {
 	struct pva_kmd_device *pva = ip_dev;
 	if (reg_offset > UINT32_MAX)
 		return PVA_INVAL;
 	switch (reg_op) {
 	case TEGRA_SOC_HWPM_IP_REG_OP_READ:
 		*reg_data =
 			pva_kmd_read(pva, safe_addu32(pva->regspec.cfg_perf_mon,
 						      reg_offset));
 		break;
 	case TEGRA_SOC_HWPM_IP_REG_OP_WRITE:
 		pva_kmd_write(
 			pva, safe_addu32(pva->regspec.cfg_perf_mon, reg_offset),
 			*reg_data);
 		break;
 	default:
 		pva_kmd_log_err("Invalid HWPM operation");
 		return PVA_INVAL;
 	}
 	return PVA_SUCCESS;
 }
 int pva_kmd_hwpm_ip_pm(void *ip_dev, bool disable)
 {
 	struct pva_kmd_device *dev = ip_dev;
 	enum pva_error err = PVA_SUCCESS;
 	if (disable) {
 		err = pva_kmd_device_busy(dev);
 		if (err != PVA_SUCCESS) {
 			pva_kmd_log_err("Failed to busy");
 		}
 	} else {
 		pva_kmd_device_idle(dev);
 	}
 	return err;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.h
@@ -0,0 +1,50 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2025, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_SILICON_HWPM_H
 #define PVA_KMD_SILICON_HWPM_H
 #include "pva_kmd.h"
 #include "pva_kmd_shim_debugfs.h"
 /**
 * @brief	pva_hwpm_ip_pm
 *
 * This function called from Tegra HWPM driver to
 * poweron/off pva device.
 *
 * @param ip_dev	Pointer to PVA device
 * @param disable	disable/enable power management.  PVA is
 *			powered on when false.
 * @param reg_offset	offset of register relative to PVA HWP base
 * @return		0 on Success or negative error code
 *
 */
 int pva_kmd_hwpm_ip_pm(void *ip_dev, bool disable);
 /**
 * @brief	pva_hwpm_ip_reg_op
 *
 * This function called from Tegra HWPM driver to
 * access PVA HWPM registers.
 *
 * @param ip_dev		Pointer to PVA device
 * @param reg_op		access operation and can be one of
 *				TEGRA_SOC_HWPM_IP_REG_OP_READ
 *				TEGRA_SOC_HWPM_IP_REG_OP_WRITE
 * @param inst_element_index	element index within PVA instance
 * @param reg_offset		offset of register relative to PVA HWP base
 * @param reg_data		pointer to where data is to be placed or read.
 * @return			0 on Success or negative error code
 *
 */
 int pva_kmd_hwpm_ip_reg_op(void *ip_dev, uint32_t reg_op,
 			   uint32_t inst_element_index, uint64_t reg_offset,
 			   uint32_t *reg_data);
 #endif //PVA_KMD_SILICON_HWPM_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.c
@@ -0,0 +1,135 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_silicon_isr.h"
 #include "pva_kmd_device.h"
 #include "pva_fw_hyp.h"
 #include "pva_kmd_msg.h"
 struct pva_fw_msg {
 	uint8_t len;
 	uint32_t data[PVA_FW_MSG_MAX_LEN];
 };
 static void read_hyp_msg(struct pva_kmd_device *pva, struct pva_fw_msg *msg)
 {
 	uint32_t i;
 	msg->data[0] = pva_kmd_read_mailbox(pva, PVA_FW_MBOX_TO_HYP_LAST);
 	msg->len = PVA_EXTRACT(msg->data[0], PVA_FW_MSG_LEN_MSB,
 			       PVA_FW_MSG_LEN_LSB, uint8_t);
 	ASSERT(msg->len <= PVA_ARRAY_SIZE(msg->data));
 	for (i = 1; i < msg->len; i++) {
 		msg->data[i] = pva_kmd_read_mailbox(
 			pva, PVA_FW_MBOX_TO_HYP_BASE + i - 1);
 	}
 }
 void pva_kmd_hyp_isr(void *data)
 {
 	struct pva_kmd_device *pva = data;
 	uint32_t intr_status;
 	uint32_t wdt_val, hsp_val, h1x_val;
 	intr_status = pva_kmd_read(pva, pva->regspec.sec_lic_intr_status);
 	wdt_val = PVA_EXTRACT(intr_status, PVA_REG_SEC_LIC_INTR_WDT_MSB,
 			      PVA_REG_SEC_LIC_INTR_WDT_LSB, uint32_t);
 	hsp_val = PVA_EXTRACT(intr_status, PVA_REG_SEC_LIC_INTR_HSP_MSB,
 			      PVA_REG_SEC_LIC_INTR_HSP_LSB, uint32_t);
 	h1x_val = PVA_EXTRACT(intr_status, PVA_REG_SEC_LIC_INTR_H1X_MSB,
 			      PVA_REG_SEC_LIC_INTR_H1X_LSB, uint32_t);
 	if (wdt_val != 0) {
 		/* Clear interrupt status */
 		pva_kmd_write(pva, pva->regspec.sec_lic_intr_status,
 			      intr_status &
 				      PVA_MASK(PVA_REG_SEC_LIC_INTR_WDT_MSB,
 					       PVA_REG_SEC_LIC_INTR_WDT_LSB));
 		/* TODO: reboot firmware when we can */
 		FAULT("PVA watchdog timeout!");
 	}
 	if (h1x_val != 0) {
 		pva_kmd_log_err_u64("Host1x errors", h1x_val);
 		/* Clear interrupt status */
 		pva_kmd_write(pva, pva->regspec.sec_lic_intr_status,
 			      intr_status &
 				      PVA_MASK(PVA_REG_SEC_LIC_INTR_H1X_MSB,
 					       PVA_REG_SEC_LIC_INTR_H1X_LSB));
 	}
 	if (hsp_val != 0) {
 		struct pva_fw_msg msg = { 0 };
 		read_hyp_msg(pva, &msg);
 		pva_kmd_handle_hyp_msg(pva, &msg.data[0], msg.len);
 		msg.data[0] &= ~PVA_FW_MBOX_FULL_BIT;
 		/* Clear interrupt bit in mailbox */
 		pva_kmd_write_mailbox(pva, PVA_FW_MBOX_TO_HYP_LAST,
 				      msg.data[0]);
 	}
 }
 static uint32_t read_ccq0_status(struct pva_kmd_device *pva, uint8_t status_id)
 {
 	return pva_kmd_read(pva, pva->regspec.ccq_regs[0].status[status_id]);
 }
 static void write_ccq0_status(struct pva_kmd_device *pva, uint8_t status_id,
 			      uint32_t value)
 {
 	pva_kmd_write(pva, pva->regspec.ccq_regs[0].status[status_id], value);
 }
 static void read_ccq_msg(struct pva_kmd_device *pva, struct pva_fw_msg *msg)
 {
 	uint32_t i;
 	msg->data[0] = read_ccq0_status(pva, PVA_FW_MSG_STATUS_LAST);
 	msg->len = PVA_EXTRACT(msg->data[0], PVA_FW_MSG_LEN_MSB,
 			       PVA_FW_MSG_LEN_LSB, uint8_t);
 	ASSERT(msg->len <= PVA_ARRAY_SIZE(msg->data));
 	for (i = 1; i < msg->len; i++) {
 		msg->data[i] =
 			read_ccq0_status(pva, PVA_FW_MSG_STATUS_BASE + i - 1);
 	}
 }
 /* Handle interrupt from CCQ0 */
 void pva_kmd_isr(void *data)
 {
 	struct pva_kmd_device *pva = data;
 	uint32_t intr_status;
 	intr_status =
 		read_ccq0_status(pva, 2) & PVA_REG_CCQ_STATUS2_INTR_ALL_BITS;
 	pva_dbg_printf("CCQ0_INTR_STATUS 0x%x\n", intr_status);
 	/* Clear interupt status This must be done prior to ack CCQ messages
 	 * otherwise we risk losing CCQ messages.
 	 */
 	write_ccq0_status(pva, 2, intr_status);
 	if (intr_status & PVA_REG_CCQ_STATUS2_INTR_STATUS8_BIT) {
 		struct pva_fw_msg msg;
 		read_ccq_msg(pva, &msg);
 		pva_kmd_handle_msg(pva, &msg.data[0], msg.len);
 		/* Ack through status1 write. */
 		write_ccq0_status(pva, 1, 0 /* Value doesn't matter for now */);
 	}
 	/* We don't care about Status7 or CCQ overflow interrupt */
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.h
@@ -0,0 +1,20 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_SILICON_ISR_H
 #define PVA_KMD_SILICON_ISR_H
 #include "pva_kmd_silicon_utils.h"
 #include "pva_kmd_device.h"
 void pva_kmd_hyp_isr(void *data);
 void pva_kmd_isr(void *data);
 #endif // PVA_KMD_SILICON_ISR_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_misc.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_misc.c
@@ -0,0 +1,33 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_silicon_utils.h"
 #include "pva_kmd_device.h"
 #include "pva_math_utils.h"
 void pva_kmd_ccq_push(struct pva_kmd_device *pva, uint8_t ccq_id,
 		      uint64_t ccq_entry)
 {
 	pva_kmd_write(pva, pva->regspec.ccq_regs[ccq_id].fifo,
 		      PVA_EXTRACT64(ccq_entry, 31, 0, uint32_t));
 	pva_kmd_write(pva, pva->regspec.ccq_regs[ccq_id].fifo,
 		      PVA_EXTRACT64(ccq_entry, 63, 32, uint32_t));
 }
 uint32_t pva_kmd_get_ccq_space(struct pva_kmd_device *pva, uint8_t ccq_id)
 {
 	uint32_t status2 =
 		pva_kmd_read(pva, pva->regspec.ccq_regs[ccq_id].status[2]);
 	uint32_t len =
 		PVA_EXTRACT(status2, PVA_REG_CCQ_STATUS2_NUM_ENTRIES_MSB,
 			    PVA_REG_CCQ_STATUS2_NUM_ENTRIES_LSB, uint32_t);
 	return safe_subu32((uint32_t)PVA_CCQ_DEPTH, len) / 2U;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_utils.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_utils.h
@@ -0,0 +1,52 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_SILICON_UTILS_H
 #define PVA_KMD_SILICON_UTILS_H
 #include "pva_utils.h"
 #include "pva_kmd_regs.h"
 #include "pva_kmd_shim_silicon.h"
 #include "pva_math_utils.h"
 static inline void pva_kmd_write(struct pva_kmd_device *pva, uint32_t addr,
 				 uint32_t val)
 {
 	pva_dbg_printf("pva_kmd_write: addr=0x%x, val=0x%x\n", addr, val);
 	pva_kmd_aperture_write(pva, PVA_KMD_APERTURE_PVA_CLUSTER, addr, val);
 }
 static inline uint32_t pva_kmd_read(struct pva_kmd_device *pva, uint32_t addr)
 {
 	uint32_t val;
 	val = pva_kmd_aperture_read(pva, PVA_KMD_APERTURE_PVA_CLUSTER, addr);
 	return val;
 }
 static inline void pva_kmd_write_mailbox(struct pva_kmd_device *pva,
 					 uint32_t mailbox_idx, uint32_t val)
 {
 	uint32_t gap = PVA_REG_HSP_SM1_ADDR - PVA_REG_HSP_SM0_ADDR;
 	uint32_t offset = safe_mulu32(gap, mailbox_idx);
 	uint32_t addr = safe_addu32(PVA_REG_HSP_SM0_ADDR, offset);
 	pva_kmd_write(pva, addr, val);
 }
 static inline uint32_t pva_kmd_read_mailbox(struct pva_kmd_device *pva,
 					    uint32_t mailbox_idx)
 {
 	uint32_t gap = PVA_REG_HSP_SM1_ADDR - PVA_REG_HSP_SM0_ADDR;
 	uint32_t offset = safe_mulu32(gap, mailbox_idx);
 	uint32_t addr = safe_addu32(PVA_REG_HSP_SM0_ADDR, offset);
 	return pva_kmd_read(pva, addr);
 }
 #endif // PVA_KMD_SILICON_UTILS_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.c
@@ -0,0 +1,156 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_submitter.h"
 #include "pva_kmd_utils.h"
 void pva_kmd_submitter_init(struct pva_kmd_submitter *submitter,
 			    struct pva_kmd_queue *queue,
 			    pva_kmd_mutex_t *submit_lock,
 			    struct pva_kmd_cmdbuf_chunk_pool *chunk_pool,
 			    pva_kmd_mutex_t *chunk_pool_lock,
 			    uint32_t *post_fence_va,
 			    struct pva_fw_postfence const *post_fence)
 {
 	submitter->queue = queue;
 	submitter->submit_lock = submit_lock;
 	submitter->post_fence_va = post_fence_va;
 	submitter->post_fence = *post_fence;
 	submitter->fence_future_value = 0;
 	submitter->chunk_pool = chunk_pool;
 	submitter->chunk_pool_lock = chunk_pool_lock;
 	*submitter->post_fence_va = submitter->fence_future_value;
 }
 enum pva_error pva_kmd_submitter_prepare(struct pva_kmd_submitter *submitter,
 					 struct pva_kmd_cmdbuf_builder *builder)
 {
 	enum pva_error err;
 	err = pva_kmd_cmdbuf_builder_init(builder, submitter->chunk_pool);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	return PVA_SUCCESS;
 err_out:
 	return err;
 }
 enum pva_error
 pva_kmd_submitter_submit_with_fence(struct pva_kmd_submitter *submitter,
 				    struct pva_kmd_cmdbuf_builder *builder,
 				    struct pva_fw_postfence *fence)
 {
 	enum pva_error err;
 	uint32_t first_chunk_id;
 	uint16_t first_chunk_size;
 	uint64_t first_chunk_offset;
 	struct pva_fw_cmdbuf_submit_info submit_info = { 0 };
 	struct pva_fw_postfence free_notifier_fence;
 	pva_kmd_cmdbuf_builder_finalize(builder, &first_chunk_id,
 					&first_chunk_size);
 	pva_kmd_get_free_notifier_fence(submitter->chunk_pool, first_chunk_id,
 					&free_notifier_fence);
 	first_chunk_offset = pva_kmd_get_cmdbuf_chunk_res_offset(
 		submitter->chunk_pool, first_chunk_id);
 	submit_info.postfences[0] = free_notifier_fence;
 	submit_info.num_postfence = 1;
 	if (fence->resource_id != PVA_RESOURCE_ID_INVALID) {
 		submit_info.postfences[1] = *fence;
 		submit_info.num_postfence = 2;
 	}
 	submit_info.first_chunk_resource_id =
 		submitter->chunk_pool->mem_resource_id;
 	submit_info.first_chunk_offset_lo = iova_lo(first_chunk_offset);
 	submit_info.first_chunk_offset_hi = iova_hi(first_chunk_offset);
 	submit_info.first_chunk_size = first_chunk_size;
 	pva_kmd_mutex_lock(submitter->submit_lock);
 	err = pva_kmd_queue_submit(submitter->queue, &submit_info);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_cmdbuf_builder_cancel(builder);
 	}
 	pva_kmd_mutex_unlock(submitter->submit_lock);
 	return err;
 }
 enum pva_error pva_kmd_submitter_submit(struct pva_kmd_submitter *submitter,
 					struct pva_kmd_cmdbuf_builder *builder,
 					uint32_t *out_fence_val)
 {
 	enum pva_error err;
 	uint32_t first_chunk_id;
 	uint16_t first_chunk_size;
 	uint64_t first_chunk_offset;
 	struct pva_fw_cmdbuf_submit_info submit_info = { 0 };
 	struct pva_fw_postfence free_notifier_fence;
 	pva_kmd_cmdbuf_builder_finalize(builder, &first_chunk_id,
 					&first_chunk_size);
 	pva_kmd_get_free_notifier_fence(submitter->chunk_pool, first_chunk_id,
 					&free_notifier_fence);
 	first_chunk_offset = pva_kmd_get_cmdbuf_chunk_res_offset(
 		submitter->chunk_pool, first_chunk_id);
 	submit_info.num_postfence = 2;
 	submit_info.postfences[0] = submitter->post_fence;
 	submit_info.postfences[1] = free_notifier_fence;
 	submit_info.first_chunk_resource_id =
 		submitter->chunk_pool->mem_resource_id;
 	submit_info.first_chunk_offset_lo = iova_lo(first_chunk_offset);
 	submit_info.first_chunk_offset_hi = iova_hi(first_chunk_offset);
 	submit_info.first_chunk_size = first_chunk_size;
 	/* TODO: remove these flags after FW execute command buffer with no engines. */
 	submit_info.flags =
 		PVA_INSERT8(0x3, PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_MSB,
 			    PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_LSB);
 	pva_kmd_mutex_lock(submitter->submit_lock);
 	submitter->fence_future_value += 1U;
 	submit_info.postfences[0].value = submitter->fence_future_value;
 	err = pva_kmd_queue_submit(submitter->queue, &submit_info);
 	if (err == PVA_SUCCESS) {
 		*out_fence_val = submitter->fence_future_value;
 	} else {
 		submitter->fence_future_value -= 1U;
 		pva_kmd_cmdbuf_builder_cancel(builder);
 	}
 	pva_kmd_mutex_unlock(submitter->submit_lock);
 	return err;
 }
 enum pva_error pva_kmd_submitter_wait(struct pva_kmd_submitter *submitter,
 				      uint32_t fence_val,
 				      uint32_t poll_interval_us,
 				      uint32_t timeout_us)
 {
 	uint32_t volatile *fence_addr = submitter->post_fence_va;
 	uint32_t time_spent = 0;
 	while (*fence_addr < fence_val) {
 		pva_kmd_sleep_us(poll_interval_us);
 		time_spent = safe_addu32(time_spent, poll_interval_us);
 		if (time_spent >= timeout_us) {
 			pva_kmd_log_err("pva_kmd_submitter_wait Timed out");
 			return PVA_TIMEDOUT;
 		}
 	}
 	return PVA_SUCCESS;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.h
@@ -0,0 +1,68 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_SUBMITTER_H
 #define PVA_KMD_SUBMITTER_H
 #include "pva_kmd_cmdbuf.h"
 #include "pva_kmd_mutex.h"
 #include "pva_kmd_queue.h"
 /** A thread-safe submitter utility */
 struct pva_kmd_submitter {
 	/** The lock protects the submission to the queue, including
 	 * incrementing the post fence */
 	pva_kmd_mutex_t *submit_lock;
 	struct pva_kmd_queue *queue;
 	uint32_t *post_fence_va;
 	struct pva_fw_postfence post_fence;
 	uint32_t fence_future_value;
 	/** This lock protects the use of the chunk_pool*/
 	pva_kmd_mutex_t *chunk_pool_lock;
 	struct pva_kmd_cmdbuf_chunk_pool *chunk_pool;
 };
 void pva_kmd_submitter_init(struct pva_kmd_submitter *submitter,
 			    struct pva_kmd_queue *queue,
 			    pva_kmd_mutex_t *submit_lock,
 			    struct pva_kmd_cmdbuf_chunk_pool *chunk_pool,
 			    pva_kmd_mutex_t *chunk_pool_lock,
 			    uint32_t *post_fence_va,
 			    struct pva_fw_postfence const *post_fence);
 enum pva_error
 pva_kmd_submitter_prepare(struct pva_kmd_submitter *submitter,
 			  struct pva_kmd_cmdbuf_builder *builder);
 enum pva_error pva_kmd_submitter_submit(struct pva_kmd_submitter *submitter,
 					struct pva_kmd_cmdbuf_builder *builder,
 					uint32_t *out_fence_val);
 enum pva_error pva_kmd_submitter_wait(struct pva_kmd_submitter *submitter,
 				      uint32_t fence_val,
 				      uint32_t poll_interval_ms,
 				      uint32_t timeout_ms);
 enum pva_error
 pva_kmd_submitter_submit_with_fence(struct pva_kmd_submitter *submitter,
 				    struct pva_kmd_cmdbuf_builder *builder,
 				    struct pva_fw_postfence *fence);
 /* prepare submission */
 /* add cmd */
 /* add cmd */
 /* do submit -> fence value */
 /* wait for fence */
 /* prepare submission */
 /* add cmd */
 /* add cmd */
 /* do submit with fence (provide a fence) */
 #endif // PVA_KMD_SUBMITTER_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.c
@@ -0,0 +1,88 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_t23x.h"
 #include "pva_kmd_constants.h"
 struct vmem_region vmem_regions_tab_t23x[PVA_VMEM_REGION_COUNT_T23X] = {
 	{ .start = T23x_VMEM0_START, .end = T23x_VMEM0_END },
 	{ .start = T23x_VMEM1_START, .end = T23x_VMEM1_END },
 	{ .start = T23x_VMEM2_START, .end = T23x_VMEM2_END },
 };
 void pva_kmd_device_init_t23x(struct pva_kmd_device *pva)
 {
 	uint32_t ccq;
 	uint32_t st_idx;
 	pva->hw_consts.hw_gen = PVA_HW_GEN2;
 	pva->hw_consts.n_smmu_contexts = PVA_NUM_SMMU_CONTEXTS_T23X;
 	pva->r5_image_smmu_context_id = PVA_NUM_SMMU_CONTEXTS_T23X - 1;
 	pva->hw_consts.n_dma_descriptors = PVA_NUM_DMA_DESC_T23X;
 	pva->hw_consts.n_user_dma_channels = PVA_DMA_NUM_CHANNELS_T23X - 1U;
 	pva->hw_consts.n_hwseq_words = PVA_NUM_HWSEQ_WORDS_T23X;
 	pva->hw_consts.n_dynamic_adb_buffs = PVA_NUM_DYNAMIC_ADB_BUFFS_T23X;
 	pva->hw_consts.n_vmem_regions = PVA_VMEM_REGION_COUNT_T23X;
 	pva->support_hwseq_frame_linking = false;
 	pva->vmem_regions_tab = vmem_regions_tab_t23x;
 	pva->reg_phy_base[PVA_KMD_APERTURE_PVA_CLUSTER] =
 		PVA_KMD_PVA0_T23x_REG_BASE;
 	pva->reg_size[PVA_KMD_APERTURE_PVA_CLUSTER] =
 		PVA_KMD_PVA0_T23x_REG_SIZE;
 	pva->reg_phy_base[PVA_KMD_APERTURE_VPU_DEBUG] = TEGRA_PVA0_VPU_DBG_BASE;
 	pva->reg_size[PVA_KMD_APERTURE_VPU_DEBUG] = TEGRA_PVA0_VPU_DBG_SIZE;
 	pva->regspec.sec_lic_intr_enable = 0x28064;
 	pva->regspec.sec_lic_intr_status = 0x2806C;
 	pva->regspec.cfg_user_sid_base = 0x240000;
 	pva->regspec.cfg_priv_sid = 0x240020;
 	pva->regspec.cfg_vps_sid = 0x240024;
 	pva->regspec.cfg_r5user_lsegreg = 0x250008;
 	pva->regspec.cfg_r5user_usegreg = 0x25001c;
 	pva->regspec.cfg_priv_ar1_lsegreg = 0x25000c;
 	pva->regspec.cfg_priv_ar1_usegreg = 0x250020;
 	pva->regspec.cfg_priv_ar2_lsegreg = 0x250010;
 	pva->regspec.cfg_priv_ar2_usegreg = 0x250024;
 	pva->regspec.cfg_priv_ar1_start = 0x250028;
 	pva->regspec.cfg_priv_ar1_end = 0x25002c;
 	pva->regspec.cfg_priv_ar2_start = 0x250030;
 	pva->regspec.cfg_priv_ar2_end = 0x250034;
 	pva->regspec.cfg_scr_priv_0 = 0x18004;
 	pva->regspec.cfg_perf_mon = 0x200000;
 	pva->regspec.ccq_count = 8U;
 	/* For VPU 0*/
 	pva->regspec.vpu_dbg_instr_reg_offset[0] = 0x50000U;
 	/* For VPU 1*/
 	pva->regspec.vpu_dbg_instr_reg_offset[1] = 0x70000U;
 	for (ccq = 0; ccq < pva->regspec.ccq_count; ccq++) {
 		uint32_t n_st = PVA_CFG_CCQ_STATUS_COUNT;
 		uint32_t ccq_base = safe_addu32(
 			(uint32_t)0x260000,
 			safe_mulu32((uint32_t)PVA_CFG_CCQ_BLOCK_SIZE, ccq));
 		pva->regspec.ccq_regs[ccq].status_count = n_st;
 		pva->regspec.ccq_regs[ccq].fifo = ccq_base;
 		for (st_idx = 0; st_idx < n_st; st_idx++) {
 			pva->regspec.ccq_regs[ccq].status[st_idx] = safe_addu32(
 				ccq_base,
 				safe_addu32((uint32_t)0x4U,
 					    safe_mulu32((uint32_t)0x4U,
 							st_idx)));
 		}
 	}
 #if PVA_SUPPORT_XBAR_RAW == 1
 	pva->bl_sector_pack_format = PVA_BL_XBAR_RAW;
 #else
 	pva->bl_sector_pack_format = PVA_BL_TEGRA_RAW;
 #endif
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.h
@@ -0,0 +1,39 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_T23X_H
 #define PVA_KMD_T23X_H
 #include "pva_kmd_device.h"
 /** Number of VMEM regions */
 #define PVA_VMEM_REGION_COUNT_T23X 3U
 /** Start Address of VMEM0 Bank in T23X */
 #define T23x_VMEM0_START 0x40U
 /** End Address of VMEM0 Bank in T23X */
 #define T23x_VMEM0_END 0x20000U
 /** Start Address of VMEM1 Bank in T23X */
 #define T23x_VMEM1_START 0x40000U
 /** End Address of VMEM1 Bank in T23X */
 #define T23x_VMEM1_END 0x60000U
 /** Start Address of VMEM2 Bank in T23X */
 #define T23x_VMEM2_START 0x80000U
 /** End Address of VMEM2 Bank in T23X */
 #define T23x_VMEM2_END 0xA0000U
 /** @brief Base address for PVA0 VPU Debug Register space (CSITE_PVA0VPU) */
 #define TEGRA_PVA0_VPU_DBG_BASE 0x24740000U
 /** @brief Size (in bytes) of the PVA0 VPU Debug Register space (CSITE_PVA0VPU) */
 #define TEGRA_PVA0_VPU_DBG_SIZE 0x40000U
 void pva_kmd_device_init_t23x(struct pva_kmd_device *pva);
 #endif // PVA_KMD_T23X_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.c
@@ -0,0 +1,84 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_t26x.h"
 #include "pva_kmd_constants.h"
 struct vmem_region vmem_regions_tab_t26x[PVA_VMEM_REGION_COUNT_T26X] = {
 	{ .start = T26x_VMEM0_START, .end = T26x_VMEM0_END },
 	{ .start = T26x_VMEM1_START, .end = T26x_VMEM1_END },
 	{ .start = T26x_VMEM2_START, .end = T26x_VMEM2_END },
 	{ .start = T26x_VMEM3_START, .end = T26x_VMEM3_END },
 };
 void pva_kmd_device_init_t26x(struct pva_kmd_device *pva)
 {
 	uint32_t ccq;
 	uint32_t st_idx;
 	pva->hw_consts.hw_gen = PVA_HW_GEN3;
 	pva->hw_consts.n_smmu_contexts = PVA_NUM_SMMU_CONTEXTS_T26X;
 	pva->r5_image_smmu_context_id = PVA_NUM_SMMU_CONTEXTS_T26X - 1;
 	pva->hw_consts.n_dma_descriptors = PVA_NUM_DMA_DESC_T26X;
 	pva->hw_consts.n_user_dma_channels = PVA_DMA_NUM_CHANNELS_T26X - 1U;
 	pva->hw_consts.n_hwseq_words = PVA_NUM_HWSEQ_WORDS_T26X;
 	pva->hw_consts.n_dynamic_adb_buffs = PVA_NUM_DYNAMIC_ADB_BUFFS_T26X;
 	pva->hw_consts.n_vmem_regions = PVA_VMEM_REGION_COUNT_T26X;
 	pva->vmem_regions_tab = vmem_regions_tab_t26x;
 	pva->support_hwseq_frame_linking = true;
 	pva->reg_phy_base[PVA_KMD_APERTURE_PVA_CLUSTER] =
 		PVA_KMD_PVA0_T26x_REG_BASE;
 	pva->reg_size[PVA_KMD_APERTURE_PVA_CLUSTER] =
 		PVA_KMD_PVA0_T26x_REG_SIZE;
 	pva->reg_phy_base[PVA_KMD_APERTURE_VPU_DEBUG] = TEGRA_PVA0_VPU_DBG_BASE;
 	pva->reg_size[PVA_KMD_APERTURE_VPU_DEBUG] = TEGRA_PVA0_VPU_DBG_SIZE;
 	pva->regspec.sec_lic_intr_enable = 0x28064;
 	pva->regspec.sec_lic_intr_status = 0x2806C;
 	pva->regspec.cfg_user_sid_base = 0x240000;
 	pva->regspec.cfg_priv_sid = 0x240020;
 	pva->regspec.cfg_vps_sid = 0x240024;
 	pva->regspec.cfg_r5user_lsegreg = 0x250008;
 	pva->regspec.cfg_r5user_usegreg = 0x25001c;
 	pva->regspec.cfg_priv_ar1_lsegreg = 0x25000c;
 	pva->regspec.cfg_priv_ar1_usegreg = 0x250020;
 	pva->regspec.cfg_priv_ar2_lsegreg = 0x250010;
 	pva->regspec.cfg_priv_ar2_usegreg = 0x250024;
 	pva->regspec.cfg_priv_ar1_start = 0x250028;
 	pva->regspec.cfg_priv_ar1_end = 0x25002c;
 	pva->regspec.cfg_priv_ar2_start = 0x250030;
 	pva->regspec.cfg_priv_ar2_end = 0x250034;
 	pva->regspec.cfg_scr_priv_0 = 0x18004;
 	pva->regspec.cfg_perf_mon = 0x200000;
 	pva->regspec.ccq_count = 8U;
 	/* For VPU 0*/
 	pva->regspec.vpu_dbg_instr_reg_offset[0] = 0x50000U;
 	/* For VPU 1*/
 	pva->regspec.vpu_dbg_instr_reg_offset[1] = 0x70000U;
 	for (ccq = 0; ccq < pva->regspec.ccq_count; ccq++) {
 		uint32_t n_st = PVA_CFG_CCQ_STATUS_COUNT;
 		uint32_t ccq_base = safe_addu32(
 			(uint32_t)0x260000,
 			safe_mulu32((uint32_t)PVA_CFG_CCQ_BLOCK_SIZE, ccq));
 		pva->regspec.ccq_regs[ccq].status_count = n_st;
 		pva->regspec.ccq_regs[ccq].fifo = ccq_base;
 		for (st_idx = 0; st_idx < n_st; st_idx++) {
 			pva->regspec.ccq_regs[ccq].status[st_idx] = safe_addu32(
 				ccq_base,
 				safe_addu32((uint32_t)0x4U,
 					    safe_mulu32((uint32_t)0x4U,
 							st_idx)));
 		}
 	}
 	pva->bl_sector_pack_format = PVA_BL_TEGRA_RAW;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.h
@@ -0,0 +1,46 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_T26X_H
 #define PVA_KMD_T26X_H
 #include "pva_kmd_device.h"
 #define PVA_KMD_PVA0_T26x_REG_BASE 0x818c000000
 #define PVA_KMD_PVA0_T26x_REG_SIZE 0x900000
 /** Number of VMEM regions in T26X */
 #define PVA_VMEM_REGION_COUNT_T26X 4U
 /** Start Address of VMEM0 Bank in T26X */
 #define T26x_VMEM0_START 0x40U
 /** End Address of VMEM0 Bank in T26X */
 #define T26x_VMEM0_END 0x20000U
 /** Start Address of VMEM1 Bank in T26X */
 #define T26x_VMEM1_START 0x40000U
 /** End Address of VMEM1 Bank in T26X */
 #define T26x_VMEM1_END 0x60000U
 /** End Address of VMEM2 Bank in T26X */
 #define T26x_VMEM2_START 0x80000U
 /** End Address of VMEM2 Bank in T26X */
 #define T26x_VMEM2_END 0xA0000U
 /** End Address of VMEM3 Bank in T26X */
 #define T26x_VMEM3_START 0xC0000U
 /** End Address of VMEM3 Bank in T26X */
 #define T26x_VMEM3_END 0xE0000U
 /** @brief Base address for PVA0 VPU Debug Register space (CSITE_PVA0VPU) */
 #define TEGRA_PVA0_VPU_DBG_BASE 0x24740000U
 /** @brief Size (in bytes) of the PVA0 VPU Debug Register space (CSITE_PVA0VPU) */
 #define TEGRA_PVA0_VPU_DBG_SIZE 0x40000U
 void pva_kmd_device_init_t26x(struct pva_kmd_device *pva);
 #endif // PVA_KMD_T26X_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c
@@ -0,0 +1,141 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2025, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_api_cmdbuf.h"
 #include "pva_api_types.h"
 #include "pva_bit.h"
 #include "pva_fw.h"
 #include "pva_kmd_cmdbuf.h"
 #include "pva_kmd_device.h"
 #include "pva_kmd_constants.h"
 #include "pva_utils.h"
 #include "pva_kmd_tegra_stats.h"
 void pva_kmd_device_init_tegra_stats(struct pva_kmd_device *pva)
 {
 	enum pva_error err = PVA_SUCCESS;
 	pva->tegra_stats_buf_size = sizeof(struct pva_kmd_fw_tegrastats);
 	pva->tegra_stats_memory =
 		pva_kmd_device_memory_alloc_map(pva->tegra_stats_buf_size, pva,
 						PVA_ACCESS_RW,
 						PVA_R5_SMMU_CONTEXT_ID);
 	ASSERT(pva->tegra_stats_memory != NULL);
 	err = pva_kmd_add_dram_buffer_resource(&pva->dev_resource_table,
 					       pva->tegra_stats_memory,
 					       &pva->tegra_stats_resource_id);
 	ASSERT(err == PVA_SUCCESS);
 	pva_kmd_update_fw_resource_table(&pva->dev_resource_table);
 }
 void pva_kmd_device_deinit_tegra_stats(struct pva_kmd_device *pva)
 {
 	pva_kmd_drop_resource(&pva->dev_resource_table,
 			      pva->tegra_stats_resource_id);
 }
 enum pva_error
 pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva,
 				  struct pva_kmd_tegrastats *kmd_tegra_stats)
 {
 	struct pva_kmd_cmdbuf_builder builder;
 	struct pva_kmd_submitter *dev_submitter = &pva->submitter;
 	struct pva_cmd_get_tegra_stats *cmd;
 	uint64_t buffer_offset = 0U;
 	uint32_t fence_val;
 	enum pva_error err;
 	struct pva_kmd_fw_tegrastats *fw_tegra_stats;
 	bool stats_enabled = pva->debugfs_context.stats_enable;
 	uint64_t duration = 0U;
 	/* Power on PVA if not already */
 	err = pva_kmd_device_busy(pva);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
 			"pva_kmd_device_busy failed when submitting tegra stats cmd");
 		return err;
 	}
 	err = pva_kmd_submitter_prepare(dev_submitter, &builder);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
 	cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd));
 	ASSERT(cmd != NULL);
 	pva_kmd_set_cmd_get_tegra_stats(cmd, pva->tegra_stats_resource_id,
 					pva->tegra_stats_buf_size,
 					buffer_offset, stats_enabled);
 	err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err("tegra stats cmd submission failed");
 		goto cancel_builder;
 	}
 	err = pva_kmd_submitter_wait(dev_submitter, fence_val,
 				     PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
 				     PVA_KMD_WAIT_FW_TIMEOUT_US);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
 			"Waiting for FW timed out when getting tegra stats");
 		goto err_out;
 	}
 	if (stats_enabled == false)
 		goto err_out;
 	fw_tegra_stats =
 		(struct pva_kmd_fw_tegrastats *)(pva->tegra_stats_memory->va);
 	duration = safe_subu64(fw_tegra_stats->window_end_time,
 			       fw_tegra_stats->window_start_time);
 	if (duration == 0) {
 		pva_kmd_print_str("VPU Stats: Duration is zero");
 		goto err_out;
 	}
 	pva_kmd_print_str("VPU Stats");
 	pva_kmd_print_str_u64("Window Start Time",
 			      fw_tegra_stats->window_start_time);
 	pva_kmd_print_str_u64("Window End Time",
 			      fw_tegra_stats->window_end_time);
 	pva_kmd_print_str_u64("Total utilization VPU 0",
 			      fw_tegra_stats->total_utilization[0]);
 	pva_kmd_print_str_u64("Total utilization VPU 1",
 			      fw_tegra_stats->total_utilization[1]);
 	pva_kmd_print_str_u64(
 		"VPU 0 percent utilization",
 		safe_mulu64(100ULL, fw_tegra_stats->total_utilization[0]) /
 			duration);
 	pva_kmd_print_str_u64(
 		"VPU 1 percent utilization",
 		safe_mulu64(100ULL, fw_tegra_stats->total_utilization[1]) /
 			duration);
 	kmd_tegra_stats->average_vpu_utilization[0] =
 		safe_mulu64(100ULL, fw_tegra_stats->total_utilization[0]) /
 		duration;
 	kmd_tegra_stats->average_vpu_utilization[1] =
 		safe_mulu64(100ULL, fw_tegra_stats->total_utilization[1]) /
 		duration;
 	kmd_tegra_stats->window_start_time = fw_tegra_stats->window_start_time;
 	kmd_tegra_stats->window_end_time = fw_tegra_stats->window_end_time;
 	err = PVA_SUCCESS;
 cancel_builder:
 	pva_kmd_cmdbuf_builder_cancel(&builder);
 err_out:
 	pva_kmd_device_idle(pva);
 	return err;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.h
@@ -0,0 +1,34 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2025, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_TEGRA_STATS_H
 #define PVA_KMD_TEGRA_STATS_H
 #include "pva_kmd_device.h"
 /**
 * @brief Structure which holds vpu stats information
 */
 struct pva_kmd_tegrastats {
 	/** Holds vpu utilization as a percentage for each VPU in the PVA */
 	uint64_t average_vpu_utilization[PVA_NUM_PVE];
 	/** Current state of pva_kmd_tegrastats */
 	uint64_t window_start_time;
 	uint64_t window_end_time;
 };
 void pva_kmd_device_init_tegra_stats(struct pva_kmd_device *pva);
 void pva_kmd_device_deinit_tegra_stats(struct pva_kmd_device *pva);
 enum pva_error
 pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva,
 				  struct pva_kmd_tegrastats *kmd_tegra_stats);
 #endif
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_userspace_misc.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_userspace_misc.c
@@ -0,0 +1,148 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_mutex.h"
 #include "pva_kmd_utils.h"
 #include "pva_kmd_thread_sema.h"
 #include "pva_kmd_device_memory.h"
 #include <pthread.h>
 #include <time.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <errno.h>
 enum pva_error pva_kmd_mutex_init(pva_kmd_mutex_t *m)
 {
 	int ret = pthread_mutex_init(m, NULL);
 	ASSERT(ret == 0);
 	return PVA_SUCCESS;
 }
 void pva_kmd_mutex_lock(pva_kmd_mutex_t *m)
 {
 	int ret = pthread_mutex_lock(m);
 	ASSERT(ret == 0);
 }
 void pva_kmd_mutex_unlock(pva_kmd_mutex_t *m)
 {
 	int ret = pthread_mutex_unlock(m);
 	ASSERT(ret == 0);
 }
 void pva_kmd_mutex_deinit(pva_kmd_mutex_t *m)
 {
 	int ret = pthread_mutex_destroy(m);
 	ASSERT(ret == 0);
 }
 void *pva_kmd_zalloc(uint64_t size)
 {
 	return calloc(1, size);
 }
 void pva_kmd_free(void *ptr)
 {
 	free(ptr);
 }
 void pva_kmd_fault(void)
 {
 	abort();
 }
 void pva_kmd_sema_init(pva_kmd_sema_t *sem, uint32_t val)
 {
 	int ret;
 	ret = sem_init(sem, 0 /* Only sharing in threads */, val);
 	ASSERT(ret == 0);
 }
 enum pva_error pva_kmd_sema_wait_timeout(pva_kmd_sema_t *sem,
 					 uint32_t timeout_ms)
 {
 	struct timespec ts;
 	int ret;
 	ret = clock_gettime(CLOCK_REALTIME, &ts);
 	ASSERT(ret == 0);
 	/* Add timeout (specified in milliseconds) to the current time */
 	ts.tv_sec += timeout_ms / 1000;
 	ts.tv_nsec += (timeout_ms % 1000) * 1000000;
 	/* Handle case where nanoseconds exceed 1 second */
 	if (ts.tv_nsec >= 1000000000) {
 		ts.tv_nsec -= 1000000000;
 		ts.tv_sec += 1;
 	}
 wait_again:
 	ret = sem_timedwait(sem, &ts);
 	if (ret != 0) {
 		if (errno == ETIMEDOUT) {
 			pva_kmd_log_err("pva_kmd_sema_wait_timeout Timed out");
 			return PVA_TIMEDOUT;
 		} else if (errno == EINTR) {
 			goto wait_again;
 		} else {
 			FAULT("Unexpected sem_timedwait error");
 		}
 	}
 	return PVA_SUCCESS;
 }
 void pva_kmd_sema_deinit(pva_kmd_sema_t *sem)
 {
 	int ret = sem_destroy(sem);
 	ASSERT(ret == 0);
 }
 void pva_kmd_sema_post(pva_kmd_sema_t *sem)
 {
 	int ret = sem_post(sem);
 	ASSERT(ret == 0);
 }
 struct pva_kmd_device_memory *
 pva_kmd_device_memory_alloc_map(uint64_t size, struct pva_kmd_device *pva,
 				uint32_t iova_access_flags,
 				uint32_t smmu_ctx_idx)
 {
 	struct pva_kmd_device_memory *mem;
 	enum pva_error err;
 	mem = pva_kmd_device_memory_alloc(size);
 	if (mem == NULL) {
 		goto err_out;
 	}
 	err = pva_kmd_device_memory_iova_map(mem, pva, iova_access_flags,
 					     smmu_ctx_idx);
 	if (err != PVA_SUCCESS) {
 		goto free_mem;
 	}
 	err = pva_kmd_device_memory_cpu_map(mem);
 	if (err != PVA_SUCCESS) {
 		goto iova_unmap;
 	}
 	return mem;
 iova_unmap:
 	pva_kmd_device_memory_iova_unmap(mem);
 free_mem:
 	pva_kmd_device_memory_free(mem);
 err_out:
 	return NULL;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.c
@@ -0,0 +1,29 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_utils.h"
 void *pva_kmd_zalloc_nofail(uint64_t size)
 {
 	void *ptr = pva_kmd_zalloc(size);
 	ASSERT(ptr != NULL);
 	return ptr;
 }
 void pva_kmd_log_err(const char *msg)
 {
 	pva_kmd_print_str(msg);
 }
 void pva_kmd_log_err_u64(const char *msg, uint64_t val)
 {
 	pva_kmd_print_str_u64(msg, val);
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.h
@@ -0,0 +1,28 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_UTILS_H
 #define PVA_KMD_UTILS_H
 #include "pva_kmd.h"
 #include "pva_api.h"
 #include "pva_kmd_shim_utils.h"
 #include "pva_bit.h"
 #include "pva_utils.h"
 #include "pva_plat_faults.h"
 #include "pva_math_utils.h"
 #define SIZE_4KB (4 * 1024)
 void pva_kmd_log_err(const char *msg);
 void pva_kmd_log_err_u64(const char *msg, uint64_t val);
 void *pva_kmd_zalloc_nofail(uint64_t size);
 #endif // PVA_KMD_UTILS_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.c
@@ -0,0 +1,368 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_api_types.h"
 #include "pva_kmd_vpu_app_auth.h"
 #include "pva_kmd_device.h"
 #include "pva_kmd_sha256.h"
 #include "pva_kmd_utils.h"
 enum pva_error pva_kmd_init_vpu_app_auth(struct pva_kmd_device *pva, bool ena)
 {
 	enum pva_error err = PVA_SUCCESS;
 	const char *default_path = pva_kmd_get_default_allowlist();
 	size_t default_path_len;
 	struct pva_vpu_auth *pva_auth = pva_kmd_zalloc(sizeof(*pva_auth));
 	if (pva_auth == NULL) {
 		pva_kmd_log_err("Unable to allocate memory");
 		return PVA_NOMEM;
 	}
 	pva->pva_auth = pva_auth;
 	ASSERT(pva_auth != NULL);
 	pva_auth->vpu_hash_keys = NULL;
 	pva_auth->pva_auth_allow_list_parsed = false;
 	/**TODO - This will be disabled by default. Authentication will be enabled based on 2 things
 	 * 1. Debug FS (For non production)
 	 * 2. Device tree property (For production)
 	 * Either of the 2 conditions if satisfied will enable authentication
 	 */
 	pva_auth->pva_auth_enable = ena;
 	default_path_len = strnlen(default_path, ALLOWLIST_FILE_LEN);
 	if (default_path_len > 0U) {
 		(void)memcpy(pva_auth->pva_auth_allowlist_path, default_path,
 			     default_path_len);
 	}
 	return err;
 }
 /**
 * \brief
 * is_key_match calculates the sha256 key of ELF and checks if it matches with key.
 * \param[in] dataptr Pointer to the data to which sha256 to ba calculated
 * \param[in] size length in bytes of the data to which sha256 to be calculated.
 * \param[in] key the key with which calculated key would be compared for match.
 * \return The completion status of the operation. Possible values are:
 * \ref PVA_SUCCESS Success. Passed in key matched wth calculated key.
 * \ref -EACCES. Passed in Key doesn't match with calcualted key.
 */
 static enum pva_error is_key_match(uint8_t *dataptr, size_t size,
 				   struct shakey key)
 {
 	enum pva_error err = PVA_SUCCESS;
 	int32_t status = 0;
 	uint32_t calc_key[8];
 	size_t off;
 	struct sha256_ctx ctx1;
 	struct sha256_ctx ctx2;
 	sha256_init(&ctx1);
 	off = (size / 64U) * 64U;
 	if (off > 0U) {
 		sha256_update(&ctx1, dataptr, off);
 	}
 	/* clone */
 	sha256_copy(&ctx1, &ctx2);
 	/* finalize with leftover, if any */
 	sha256_finalize(&ctx2, dataptr + off, size % 64U, calc_key);
 	status = memcmp((void *)&(key.sha_key), (void *)calc_key,
 			NVPVA_SHA256_DIGEST_SIZE);
 	if (status != 0) {
 		err = PVA_EACCES;
 	}
 	return err;
 }
 /**
 * \brief
 * Keeps checking all the keys accociated with match_hash
 * against the calculated sha256 key for dataptr, until it finds a match.
 * \param[in] pallkeys Pointer to array of SHA keys \ref shakey
 * \param[in] dataptr pointer to ELF data
 * \param[in] size length (in bytes) of ELF data
 * \param[in] match_hash pointer to matching hash structure, \ref struct vpu_hash_vector.
 * \return Matching status of the calculated key
 * against the keys asscociated with match_hash. possible values:
 * - 0 Success, one of the keys associated with match_hash
 * matches with the calculated sha256 key.
 * - -EACCES if no match found.
 */
 static enum pva_error
 check_all_keys_for_match(struct shakey *pallkeys, uint8_t *dataptr, size_t size,
 			 const struct vpu_hash_vector *match_hash)
 {
 	enum pva_error err = PVA_SUCCESS;
 	uint32_t idx;
 	uint32_t count;
 	uint32_t end;
 	struct shakey key;
 	uint32_t i;
 	idx = match_hash->index;
 	count = match_hash->count;
 	end = idx + count;
 	if (end < idx) {
 		err = PVA_ERANGE;
 		goto fail;
 	}
 	for (i = 0; i < count; i++) {
 		key = pallkeys[idx + i];
 		err = is_key_match(dataptr, size, key);
 		if (err == PVA_SUCCESS) {
 			break;
 		}
 	}
 fail:
 	return err;
 }
 /**
 * @brief
 * Helper function for \ref binary_search.
 * Uses a specific field in @ref pkey to compare with the same filed in @ref pbase.
 * @param[in] pkey pointer to the object that needs to be compared.
 * @param[in] pbase pointer to the starting element of the array.
 * @retval
 * - -1 when @ref pkey is less than starting element of array pointed to by @ref pbase.
 * - 1 when @ref pkey is greater than starting element of array pointed to by @ref pbase.
 * - 0 when @ref pkey is equal to starting element of array pointed to by @ref pbase.
 */
 static int32_t compare_hash_value(const struct vpu_hash_vector *pkey,
 				  const struct vpu_hash_vector *pbase)
 {
 	int32_t ret;
 	if (pkey->crc32_hash < pbase->crc32_hash) {
 		ret = -1;
 	} else if (pkey->crc32_hash > pbase->crc32_hash) {
 		ret = 1;
 	} else {
 		ret = 0;
 	}
 	return ret;
 }
 /**
 * @brief
 * calculates crc32.
 * @param[in] crc initial crc value. usually 0.
 * @param[in] buf pointer to the buffer whose crc32 to be calculated.
 * @param[in] len length (in bytes) of data at @ref buf.
 * @retval value of calculated crc32.
 */
 static uint32_t pva_crc32(uint32_t crc, uint8_t *buf, size_t len)
 {
 	int32_t k;
 	size_t count;
 	count = len;
 	crc = ~crc;
 	while (count != 0U) {
 		crc ^= *buf++;
 		for (k = 0; k < 8; k++) {
 			crc = ((crc & 1U) == 1U) ? (crc >> 1U) ^ 0xedb88320U :
 							 crc >> 1U;
 		}
 		count--;
 	}
 	return ~crc;
 }
 static const struct vpu_hash_vector *
 binary_search(const struct vpu_hash_vector *key,
 	      const struct vpu_hash_vector *base, size_t num_elems,
 	      int32_t (*compare)(const struct vpu_hash_vector *pkey,
 				 const struct vpu_hash_vector *pbase))
 {
 	size_t low = 0U;
 	size_t high;
 	if (num_elems == 0U) {
 		return NULL;
 	}
 	high = num_elems - 1U;
 	for (;;) {
 		const struct vpu_hash_vector *mid_elem;
 		int32_t r;
 		size_t mid = low + ((high - low) / 2U);
 		mid_elem = &(base[mid]);
 		r = compare(key, mid_elem);
 		if (r < 0) {
 			if (mid == 0U) {
 				return NULL;
 			}
 			high = mid - 1U;
 		} else if (r > 0) {
 			low = mid + 1U;
 			if (low < mid || low > high) {
 				return NULL;
 			}
 		} else {
 			return mid_elem;
 		}
 	}
 }
 static enum pva_error
 pva_kmd_vpu_check_sha256_key(struct vpu_hash_key_pair *vpu_hash_keys,
 			     uint8_t *dataptr, size_t size)
 {
 	enum pva_error err = PVA_SUCCESS;
 	struct vpu_hash_vector cal_Hash;
 	const struct vpu_hash_vector *match_Hash;
 	cal_Hash.crc32_hash = pva_crc32(0L, dataptr, size);
 	match_Hash = (const struct vpu_hash_vector *)binary_search(
 		&cal_Hash, vpu_hash_keys->pvpu_hash_vector,
 		vpu_hash_keys->num_hashes, compare_hash_value);
 	if (match_Hash == NULL) {
 		pva_kmd_log_err("No Hash Match Found");
 		err = PVA_EACCES;
 		goto fail;
 	}
 	err = check_all_keys_for_match(vpu_hash_keys->psha_key, dataptr, size,
 				       match_Hash);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err("Match key not found");
 	}
 fail:
 	return err;
 }
 enum pva_error pva_kmd_verify_exectuable_hash(struct pva_kmd_device *pva,
 					      uint8_t *dataptr, size_t size)
 {
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_vpu_auth *pva_auth;
 	ASSERT(pva != NULL);
 	ASSERT(dataptr != NULL);
 	pva_auth = pva->pva_auth;
 	ASSERT(pva_auth != NULL);
 	pva_kmd_mutex_lock(&pva_auth->allow_list_lock);
 	if (pva_auth->pva_auth_enable) {
 		pva_dbg_printf("App authentication enabled");
 		if (pva_auth->pva_auth_allow_list_parsed == false) {
 			err = pva_kmd_allowlist_parse(pva);
 			if (err == PVA_SUCCESS) {
 				pva_dbg_printf(
 					"App authentication allowlist parsing successfull");
 			} else {
 				pva_dbg_printf(
 					"App authentication allowlist parsing failed");
 			}
 		}
 		if (err == PVA_SUCCESS) {
 			err = pva_kmd_vpu_check_sha256_key(
 				pva_auth->vpu_hash_keys, (uint8_t *)dataptr,
 				size);
 			if (err == PVA_SUCCESS) {
 				pva_dbg_printf(
 					"App authentication successfull");
 			} else {
 				pva_dbg_printf("App authentication failed : %d",
 					       err);
 			}
 		}
 	} else {
 		pva_dbg_printf("App authentication disabled");
 	}
 	pva_kmd_mutex_unlock(&pva_auth->allow_list_lock);
 	return err;
 }
 static void pva_kmd_allowlist_destroy(struct pva_vpu_auth *pva_auth)
 {
 	if (pva_auth->vpu_hash_keys != NULL) {
 		pva_kmd_free(pva_auth->vpu_hash_keys->ptr_file_data);
 		pva_kmd_free(pva_auth->vpu_hash_keys);
 		pva_auth->vpu_hash_keys = NULL;
 	}
 }
 enum pva_error pva_kmd_allowlist_parse(struct pva_kmd_device *pva)
 {
 	struct pva_vpu_auth *pva_auth = pva->pva_auth;
 	enum pva_error err = PVA_SUCCESS;
 	uint8_t *data = NULL;
 	uint64_t size = 0;
 	struct vpu_hash_key_pair *vhashk;
 	size_t vkey_size = 0;
 	size_t vhash_size = 0;
 	ASSERT(pva_auth != NULL);
 	//Destroy previously parsed allowlist data
 	pva_kmd_allowlist_destroy(pva_auth);
 	err = pva_kmd_auth_allowlist_load(
 		pva, pva_auth->pva_auth_allowlist_path, &data, &size);
 	if (err != PVA_SUCCESS) {
 		if (data != NULL) {
 			pva_kmd_free(data);
 		}
 		goto fail;
 	}
 	vhashk = (struct vpu_hash_key_pair *)pva_kmd_zalloc(
 		sizeof(struct vpu_hash_key_pair));
 	if (vhashk == NULL) {
 		pva_kmd_log_err("Unable to allocate memory");
 		pva_kmd_free(data);
 		err = PVA_NOMEM;
 		goto fail;
 	}
 	vhashk->ptr_file_data = data;
 	vhashk->num_keys = ((uint32_t *)(uintptr_t)data)[0];
 	vhashk->psha_key =
 		(struct shakey *)(uintptr_t)(data + sizeof(uint32_t));
 	vkey_size = sizeof(struct shakey) * (vhashk->num_keys);
 	vhashk->num_hashes = ((uint32_t *)(uintptr_t)((char *)vhashk->psha_key +
 						      vkey_size))[0];
 	vhashk->pvpu_hash_vector =
 		(struct vpu_hash_vector
 			 *)(uintptr_t)((char *)(vhashk->psha_key) + vkey_size +
 				       sizeof(uint32_t));
 	vhash_size = sizeof(struct vpu_hash_vector) * (vhashk->num_hashes);
 	if ((sizeof(uint32_t) + sizeof(uint32_t) + vkey_size + vhash_size) !=
 	    size) {
 		pva_kmd_free(data);
 		pva_kmd_free(vhashk);
 		err = PVA_EACCES;
 		goto fail;
 	}
 	pva_auth->pva_auth_allow_list_parsed = true;
 	pva_auth->vpu_hash_keys = vhashk;
 fail:
 	return err;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.h
@@ -0,0 +1,77 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef PVA_KMD_VPU_APP_AUTH_H
 #define PVA_KMD_VPU_APP_AUTH_H
 #include "pva_kmd_shim_vpu_app_auth.h"
 #include "pva_kmd_mutex.h"
 /**
 * Maximum length of allowlist file path
 */
 #define ALLOWLIST_FILE_LEN 128U
 /**
 * Size of sha256 keys in bytes.
 */
 #define NVPVA_SHA256_DIGEST_SIZE 32U
 struct pva_kmd_device;
 /**
 * Array of all VPU Hash'es
 */
 struct vpu_hash_vector {
 	/*! Number of Keys for this crc32_hash */
 	uint32_t count;
 	/*! Starting Index into Keys Array */
 	uint32_t index;
 	/*! CRC32 hash value */
 	uint32_t crc32_hash;
 };
 /**
 * Stores sha256 key
 */
 struct shakey {
 	/** 256-bit (32 Bytes) SHA Key */
 	uint8_t sha_key[NVPVA_SHA256_DIGEST_SIZE];
 };
 /**
 * Stores Hash Vector and Keys vector
 */
 struct vpu_hash_key_pair {
 	/*! Total number of Keys in binary file */
 	uint32_t num_keys;
 	/*! pointer to SHA keys Array. */
 	struct shakey *psha_key;
 	/*! Total number of Hashes in binary file */
 	uint32_t num_hashes;
 	/*! pointer to Array of Hash'es */
 	struct vpu_hash_vector *pvpu_hash_vector;
 	/*! pointer to data loaded from file (QNX Specific)*/
 	uint8_t *ptr_file_data;
 };
 /**
 * Stores all the information related to pva vpu elf authentication.
 */
 struct pva_vpu_auth {
 	/** Stores crc32-sha256 of ELFs */
 	struct vpu_hash_key_pair *vpu_hash_keys;
 	pva_kmd_mutex_t allow_list_lock;
 	/** Flag to check if allowlist is enabled */
 	bool pva_auth_enable;
 	/** Flag to track if the allow list is already parsed */
 	bool pva_auth_allow_list_parsed;
 	/** Stores the path to allowlist binary file. */
 	char pva_auth_allowlist_path[ALLOWLIST_FILE_LEN + 1U];
 };
 enum pva_error pva_kmd_init_vpu_app_auth(struct pva_kmd_device *pva, bool ena);
 enum pva_error pva_kmd_verify_exectuable_hash(struct pva_kmd_device *pva,
 					      uint8_t *dataptr, size_t size);
 enum pva_error pva_kmd_allowlist_parse(struct pva_kmd_device *pva);
 #endif
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_ocd.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_ocd.c
@@ -0,0 +1,128 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include "pva_kmd_device.h"
 #include "pva_math_utils.h"
 #include "pva_kmd_vpu_ocd.h"
 #include "pva_kmd_silicon_utils.h"
 #define PVA_DEBUG_APERTURE_INDEX 1U
 int pva_kmd_vpu_ocd_open(struct pva_kmd_device *dev)
 {
 	int retval = 0;
 	enum pva_error err;
 	err = pva_kmd_device_busy(dev);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
 			"pva_kmd_vpu_ocd_open pva_kmd_device_busy failed");
 		retval = -1;
 		goto out;
 	}
 out:
 	return retval;
 }
 int pva_kmd_vpu_ocd_release(struct pva_kmd_device *dev)
 {
 	pva_kmd_device_idle(dev);
 	return 0;
 }
 int64_t pva_kmd_vpu_ocd_write(struct pva_kmd_device *dev, void *file_data,
 			      const uint8_t *data, uint64_t offset,
 			      uint64_t size)
 {
 	struct pva_vpu_ocd_write_param write_param;
 	uint32_t i;
 	unsigned long retval;
 	uint32_t reg_offset;
 	uint32_t const *vpu_ocd_offset = (uint32_t *)file_data;
 	retval = pva_kmd_copy_data_from_user(&write_param, data,
 					     sizeof(write_param));
 	if (retval != 0u) {
 		pva_kmd_log_err("Failed to copy write buffer from user");
 		return -1;
 	}
 	if (write_param.n_write > VPU_OCD_MAX_NUM_DATA_ACCESS) {
 		pva_kmd_log_err_u64("pva: too many vpu dbg reg write",
 				    write_param.n_write);
 		return -1;
 	}
 	/* Write instruction first */
 	pva_kmd_aperture_write(dev, PVA_DEBUG_APERTURE_INDEX, *vpu_ocd_offset,
 			       write_param.instr);
 	/*
 	* Write data
 	* if there's 1 word, write to addr 0x4,
 	* if there's 2 words, write to addr 2 * 0x4,
 	* ...
 	*/
 	reg_offset = safe_addu32((uint32_t)*vpu_ocd_offset,
 				 safe_mulu32(write_param.n_write,
 					     (uint32_t)sizeof(uint32_t)));
 	for (i = 0u; i < write_param.n_write; i++) {
 		pva_kmd_aperture_write(dev, PVA_DEBUG_APERTURE_INDEX,
 				       reg_offset, write_param.data[i]);
 	}
 	return 0;
 }
 int64_t pva_kmd_vpu_ocd_read(struct pva_kmd_device *dev, void *file_data,
 			     uint8_t *data, uint64_t offset, uint64_t size)
 {
 	struct pva_vpu_ocd_read_param read_param;
 	unsigned long retval;
 	uint32_t i;
 	uint32_t reg_offset;
 	uint32_t const *vpu_ocd_offset = (uint32_t *)file_data;
 	retval = pva_kmd_copy_data_from_user(&read_param, data,
 					     sizeof(read_param));
 	if (retval != 0u) {
 		pva_kmd_log_err("failed to copy read buffer from user");
 		return -1;
 	}
 	if (read_param.n_read > VPU_OCD_MAX_NUM_DATA_ACCESS) {
 		pva_kmd_log_err_u64("pva: too many vpu dbg reg read",
 				    read_param.n_read);
 		return -1;
 	}
 	/*
 	* Read data
 	* if there's 1 word, read from addr 0x4,
 	* if there's 2 words, read from addr 2 * 0x4,
 	* ...
 	*/
 	reg_offset = safe_addu32((uint32_t)*vpu_ocd_offset,
 				 safe_mulu32(read_param.n_read,
 					     (uint32_t)sizeof(uint32_t)));
 	for (i = 0; i < read_param.n_read; i++) {
 		read_param.data[i] = pva_kmd_aperture_read(
 			dev, PVA_DEBUG_APERTURE_INDEX, reg_offset);
 	}
 	retval = pva_kmd_copy_data_to_user(data, &read_param,
 					   sizeof(read_param));
 	if (retval != 0u) {
 		pva_kmd_log_err("failed to copy read buffer to user");
 		return -1;
 	}
 	return 0;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_ocd.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_ocd.h
@@ -0,0 +1,36 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_VPU_OCD_H
 #define PVA_KMD_VPU_OCD_H
 #define VPU_OCD_MAX_NUM_DATA_ACCESS 7U
 struct pva_vpu_ocd_write_param {
 	uint32_t instr;
 	uint32_t n_write;
 	uint32_t data[VPU_OCD_MAX_NUM_DATA_ACCESS];
 };
 struct pva_vpu_ocd_read_param {
 	uint32_t n_read;
 	uint32_t data[VPU_OCD_MAX_NUM_DATA_ACCESS];
 };
 int64_t pva_kmd_vpu_ocd_read(struct pva_kmd_device *dev, void *file_data,
 			     uint8_t *data, uint64_t offset, uint64_t size);
 int64_t pva_kmd_vpu_ocd_write(struct pva_kmd_device *dev, void *file_data,
 			      const uint8_t *data, uint64_t offset,
 			      uint64_t size);
 int pva_kmd_vpu_ocd_open(struct pva_kmd_device *dev);
 int pva_kmd_vpu_ocd_release(struct pva_kmd_device *dev);
 #endif
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_plat_faults.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_plat_faults.h
@@ -0,0 +1,40 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_PLAT_FAULTS_H
 #define PVA_PLAT_FAULTS_H
 #include "pva_kmd_shim_utils.h"
 #define ASSERT(x)                                                              \
 	if (!(x)) {                                                            \
 		pva_kmd_print_str_u64("PVA KMD ASSERT at " __FILE__,           \
 				      __LINE__);                               \
 		pva_kmd_fault();                                               \
 	}
 #define FAULT(msg)                                                             \
 	{                                                                      \
 		pva_kmd_print_str_u64("PVA KMD FAULT at " __FILE__, __LINE__); \
 		pva_kmd_print_str(msg);                                        \
 		pva_kmd_fault();                                               \
 	}                                                                      \
 	while (0)
 #define ASSERT_WITH_LOC(x, err_file, err_line)                                 \
 	if (!(x)) {                                                            \
 		pva_kmd_print_str_u64("Error at line", err_line);              \
 		pva_kmd_print_str(err_file);                                   \
 		pva_kmd_print_str("PVA KMD ASSERT");                           \
 		pva_kmd_fault();                                               \
 	}
 #endif
--- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_device_memory.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_device_memory.h
@@ -0,0 +1,112 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_DEVICE_MEMORY_H
 #define PVA_KMD_DEVICE_MEMORY_H
 #include "pva_kmd.h"
 #include "pva_api.h"
 struct pva_kmd_context;
 /**
 * @brief KMD device memory structure.
 *
 * This structure is essentially a base object. More information is needed to
 * manage memory allocations but the required information is platform dependent.
 * Therefore, each platform will have a derived implementation and this
 * structure is just part of it.
 */
 struct pva_kmd_device_memory {
 	uint64_t iova; /**< IOVA address if mapped. Otherwise 0 */
 	void *va; /**< CPU address if mapped. Otherwise 0. */
 	uint64_t size; /**< Size of the mapping. */
 	struct pva_kmd_device *pva; /**< The PVA this memory is mapped to. */
 	uint32_t smmu_ctx_idx; /**< The SMMU context this memory is mapped to. */
 };
 /**
 * This API is not available in Linux and should not be used by the common code.
 */
 struct pva_kmd_device_memory *pva_kmd_device_memory_alloc(uint64_t size);
 /**
 * Allocate memory and map to both IOVA space and CPU space.
 *
 * @note We cannot just allocate without mapping or just mapping to one
 * space. This restriction comes from the Linux dma_alloc_coherent API, which
 * allocates and maps at the same time.
 *
 * @note iova_access_flag is only supported by QNX implementation.
 *
 * @param size Size of the allocation
 * @param pva The PVA device to map to
 * @param iova_access_flags Access flags for IOVA space. PVA_ACCESS_RO or
 *                          PVA_ACCESS_RW. For CPU space, it's always
 *                          read and write.
 * @param smmu_ctx_idx The SMMU context to map to
 */
 struct pva_kmd_device_memory *
 pva_kmd_device_memory_alloc_map(uint64_t size, struct pva_kmd_device *pva,
 				uint32_t iova_access_flags,
 				uint32_t smmu_ctx_idx);
 /** @brief Acquire memory shared from UMD.
 *
 * This function takes a shared ownership of the memory allocation so that KMD
 * can keep the allocation alive even after UMD closed the memory handle.
 *
 * @param memory_handle Memory handle passed from user space. On Linux, this is
 *                      a file descriptor associated with dma_buf object. On
 *                      QNX, this is NvRM import ID.
 * @param offset Offset into the allocation. This affects the mapped address.
 * @param size Size of the mapping, which can be smaller than the size of the
 *             allocation.
 * @param ctx The user from whom we are importing the memory.
 */
 struct pva_kmd_device_memory *
 pva_kmd_device_memory_acquire(uint64_t memory_handle, uint64_t offset,
 			      uint64_t size, struct pva_kmd_context *ctx);
 /**
 * @brief Release the memory.
 *
 * This function frees memory allocated from acquire or alloc_map. If there are
 * active CPU mapping or IOVA mapping, this function will unmap them.
 *
 * @param memory Pointer to the memory to release.
 */
 void pva_kmd_device_memory_free(struct pva_kmd_device_memory *memory);
 /**
 * @brief Map the memory to CPU space.
 */
 enum pva_error
 pva_kmd_device_memory_cpu_map(struct pva_kmd_device_memory *memory);
 /**
 * @brief Unmap the memory from CPU space.
 *
 * Unmap a not mapped memory will trigger abort.
 */
 void pva_kmd_device_memory_cpu_unmap(struct pva_kmd_device_memory *memory);
 /**
 * @brief Map the memory to IOVA space.
 */
 enum pva_error
 pva_kmd_device_memory_iova_map(struct pva_kmd_device_memory *memory,
 			       struct pva_kmd_device *pva,
 			       uint32_t access_flags, uint32_t smmu_ctx_idx);
 /**
 * @brief Unmap the memory from IOVA space.
 *
 * Unmap a not mapped memory will trigger abort.
 */
 void pva_kmd_device_memory_iova_unmap(struct pva_kmd_device_memory *memory);
 #endif // PVA_KMD_DEVICE_MEMORY_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_ccq.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_ccq.h
@@ -0,0 +1,34 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_SHIM_CCQ_H
 #define PVA_KMD_SHIM_CCQ_H
 #include "pva_api.h"
 struct pva_kmd_device;
 /**
 * @brief Push a 64 bit entry to CCQ FIFO.
 *
 * Push low 32 bits first and then high 32 bits.
 *
 * @note The caller is responsible for checking if CCQ has enough spaces.
 *
 */
 void pva_kmd_ccq_push(struct pva_kmd_device *pva, uint8_t ccq_id,
 		      uint64_t ccq_entry);
 /**
 * @brief Get the number of available spaces in the CCQ.
 *
 * One CCQ entry is 64 bits. One CCQ can hold up to 4 entries. Therefore, this
 * function returns values from 0 to 4.
 */
 uint32_t pva_kmd_get_ccq_space(struct pva_kmd_device *pva, uint8_t ccq_id);
 #endif // PVA_KMD_SHIM_CCQ_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_debugfs.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_debugfs.h
@@ -0,0 +1,29 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2025, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_SHIM_DEBUGFS_H
 #define PVA_KMD_SHIM_DEBUGFS_H
 #include "pva_api.h"
 #include "pva_kmd_tegra_stats.h"
 void pva_kmd_debugfs_create_bool(struct pva_kmd_device *pva, const char *name,
 				 bool *val);
 void pva_kmd_debugfs_create_u32(struct pva_kmd_device *pva, const char *name,
 				uint32_t *val);
 void pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, const char *name,
 				 struct pva_kmd_file_ops *fops);
 void pva_kmd_debugfs_remove_nodes(struct pva_kmd_device *pva);
 unsigned long pva_kmd_copy_data_from_user(void *dst, const void *src,
 					  uint64_t size);
 unsigned long pva_kmd_copy_data_to_user(void *to, const void *from,
 					unsigned long size);
 unsigned long pva_kmd_strtol(const char *str, int base);
 #endif //PVA_KMD_SHIM_DEBUGFS_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_init.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_init.h
@@ -0,0 +1,64 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_SHIM_INIT_H
 #define PVA_KMD_SHIM_INIT_H
 #include "pva_api.h"
 struct pva_kmd_device;
 struct pva_kmd_file_ops;
 /* TODO: remove plat_init APIs. We should just pass in plat_data directly to
 * pva_kmd_device_create. */
 void pva_kmd_device_plat_init(struct pva_kmd_device *pva);
 void pva_kmd_device_plat_deinit(struct pva_kmd_device *pva);
 void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id,
 			     uint32_t *syncpt_value);
 void pva_kmd_get_syncpt_iova(struct pva_kmd_device *pva, uint32_t syncpt_id,
 			     uint64_t *syncpt_iova);
 void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva);
 /**
 * @brief Power on PVA cluster.
 */
 enum pva_error pva_kmd_power_on(struct pva_kmd_device *pva);
 /**
 * @brief Power off PVA cluster.
 */
 void pva_kmd_power_off(struct pva_kmd_device *pva);
 /**
 * @brief Initialize firmware.
 *
 * This function initializes firmware. On silicon, this includes
 * - power on R5,
 * - load firmware,
 * - bind interrupts,
 * - and wait for firmware boot to complete.
 *
 * @param pva pointer to the PVA device to initialize
 */
 enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva);
 /**
 * @brief De-init firmware.
 *
 * This function de-initializes firmware. On silicon, this includes
 * - free interrupts,
 * - power off R5,
 * - and free firmware memories.
 *
 * @param pva pointer to the PVA device to de-initialize
 */
 void pva_kmd_deinit_fw(struct pva_kmd_device *pva);
 #endif // PVA_KMD_SHIM_INIT_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_silicon.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_silicon.h
@@ -0,0 +1,142 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_SHIM_SILICON_H
 #define PVA_KMD_SHIM_SILICON_H
 #include "pva_api.h"
 #include "pva_kmd_regs.h"
 struct pva_kmd_device;
 /**
 * @file This file defines silicon APIs.
 *
 * Silicon APIs are only implemented by platforms that closely resemble the
 * silicon PVA, a.k.a Linux, QNX and SIM platforms. Silicon APIs are used to
 * implement message APIs and some init APIs.
 *
 * On native platform, message APIs are implemented differently. Therefore,
 * native platform does not need to implement silicon APIs.
 */
 /**
 * @brief Write to a register in a MMIO region.
 *
 * @param pva pointer the PVA cluser.
 * @param aperture the MMIO region.
 * @param addr the register offset in the MMIO region.
 * @param val value to write.
 */
 void pva_kmd_aperture_write(struct pva_kmd_device *pva,
 			    enum pva_kmd_reg_aperture aperture, uint32_t addr,
 			    uint32_t val);
 /**
 * @brief Read from a register in a MMIO region.
 *
 * @param pva pointer the PVA cluser.
 * @param aperture the MMIO region.
 * @param addr the register offset in the MMIO region.
 *
 * @return the value of the register.
 */
 uint32_t pva_kmd_aperture_read(struct pva_kmd_device *pva,
 			       enum pva_kmd_reg_aperture aperture,
 			       uint32_t addr);
 /**
 * @brief PVA's interrupt lines.
 */
 enum pva_kmd_intr_line {
 	/** Interrupt line from SEC block. We receive mailbox interrupts from
 	 * this line. */
 	PVA_KMD_INTR_LINE_SEC_LIC = 0,
 	PVA_KMD_INTR_LINE_CCQ0,
 	PVA_KMD_INTR_LINE_CCQ1,
 	PVA_KMD_INTR_LINE_CCQ2,
 	PVA_KMD_INTR_LINE_CCQ3,
 	PVA_KMD_INTR_LINE_CCQ4,
 	PVA_KMD_INTR_LINE_CCQ5,
 	PVA_KMD_INTR_LINE_CCQ6,
 	PVA_KMD_INTR_LINE_CCQ7,
 	PVA_KMD_INTR_LINE_COUNT,
 };
 /**
 * @brief Interrupt handler function prototype.
 */
 typedef void (*pva_kmd_intr_handler_t)(void *data);
 /**
 * @brief Bind an interrupt handler to an interrupt line.
 *
 * Interrupt will be enabled after binding.
 */
 enum pva_error pva_kmd_bind_intr_handler(struct pva_kmd_device *pva,
 					 enum pva_kmd_intr_line intr_line,
 					 pva_kmd_intr_handler_t handler,
 					 void *data);
 /**
 * @brief Enable an interrupt line.
 */
 void pva_kmd_enable_intr(struct pva_kmd_device *pva,
 			 enum pva_kmd_intr_line intr_line);
 /**
 * @brief Disable an interrupt line.
 */
 void pva_kmd_disable_intr(struct pva_kmd_device *pva,
 			  enum pva_kmd_intr_line intr_line);
 /**
 * @brief Free an interrupt line.
 *
 * This will disable the interrupt line and unbind the handler.
 */
 void pva_kmd_free_intr(struct pva_kmd_device *pva,
 		       enum pva_kmd_intr_line intr_line);
 /**
 * @brief Read firmware binary from file system.
 *
 * Firmware binary is loaded into pva->fw_bin_mem, which is directly accessible
 * by R5.
 *
 * KMD will free pva->fw_bin_mem during firmware deinit.
 */
 enum pva_error pva_kmd_read_fw_bin(struct pva_kmd_device *pva);
 /**
 * @brief Get base address of read only syncpoints.
 */
 uint32_t pva_kmd_get_syncpt_ro_offset(struct pva_kmd_device *pva);
 /**
 * @brief Get base address of read write syncpoints.
 */
 uint32_t pva_kmd_get_syncpt_rw_offset(struct pva_kmd_device *pva);
 /**
 * @brief Configure EVP, Segment config registers and SCR registers.
 *
 * This function configures the EVP, Segment config registers and SCR registers.
 *
 * @param pva Pointer to the PVA device.
 */
 void pva_kmd_config_evp_seg_scr_regs(struct pva_kmd_device *pva);
 /**
 * @brief Configure SID registers.
 *
 * This function configures the SID registers.
 *
 * @param pva Pointer to the PVA device.
 */
 void pva_kmd_config_sid_regs(struct pva_kmd_device *pva);
 #endif // PVA_KMD_SHIM_SILICON_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_utils.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_utils.h
@@ -0,0 +1,72 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_SHIM_UTILS_H
 #define PVA_KMD_SHIM_UTILS_H
 #include "pva_api.h"
 /**
 * @brief Allocate memory for KMD's private use.
 *
 *  Memory will be zero initialized.
 */
 void *pva_kmd_zalloc(uint64_t size);
 /**
 * @brief Free memory allocated by pva_kmd_zalloc.
 */
 void pva_kmd_free(void *ptr);
 /**
 * @brief Print a string.
 *
 * This function is used for logging errors, enabled even in safety environment.
 * For debug print, use pva_dbg_printf.
 *
 * @param str The string to print.
 */
 void pva_kmd_print_str(const char *str);
 /**
 * @brief Print a string followed by a 64-bit unsigned number.
 *
 * This function is used for logging errors, enabled even in safety environment.
 * For debug print, use pva_dbg_printf.
 *
 * @param str The string to print.
 * @param n The number to print.
 */
 void pva_kmd_print_str_u64(const char *str, uint64_t n);
 /**
 * @brief Fault KMD.
 *
 * Abort KMD due to critical unrecoverable error.
 */
 void pva_kmd_fault(void) __attribute__((noreturn));
 /**
 * @brief Sleep for some microseconds.
 *
 * @param us The number of microseconds to sleep.
 */
 void pva_kmd_sleep_us(uint64_t us);
 #if defined(__KERNEL__)
 #include <linux/nospec.h>
 #else
 static inline uint32_t array_index_nospec(uint32_t index, uint32_t size)
 {
 	return index < size ? index : 0;
 }
 #endif
 #endif // PVA_KMD_SHIM_UTILS_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_vpu_app_auth.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_vpu_app_auth.h
@@ -0,0 +1,17 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2021-2023, NVIDIA Corporation.  All rights reserved.
 */
 #ifndef PVA_KMD_SHIM_VPU_APP_AUTH_H
 #define PVA_KMD_SHIM_VPU_APP_AUTH_H
 #include "pva_api_types.h"
 struct pva_kmd_device;
 const char *pva_kmd_get_default_allowlist(void);
 enum pva_error pva_kmd_auth_allowlist_load(struct pva_kmd_device *pva,
 					   const char *file_name,
 					   uint8_t **hash_keys_data,
 					   uint64_t *psize);
 #endif
--- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_thread_sema.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_thread_sema.h
@@ -0,0 +1,69 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_THREAD_SEMA_H
 #define PVA_KMD_THREAD_SEMA_H
 #include "pva_api.h"
 #if defined(__KERNEL__) /* For Linux */
 #include <linux/semaphore.h>
 typedef struct semaphore pva_kmd_sema_t;
 #else /* For user space code, including QNX KMD */
 #include <semaphore.h>
 /* Mutex */
 typedef sem_t pva_kmd_sema_t;
 #endif
 /**
 * @brief Initialize a semaphore.
 *
 * @param sem Pointer to the semaphore.
 * @param val Initial value of the semaphore.
 */
 void pva_kmd_sema_init(pva_kmd_sema_t *sem, uint32_t val);
 /**
 * @brief Wait on a semaphore.
 *
 * Decrement the semaphore count. If the count is zero, the caller will block
 * until the semaphore is posted or the timeout expires.
 *
 * @param sem Pointer to the semaphore.
 * @param timeout_ms Timeout in milliseconds.
 *
 * @retval PVA_SUCCESS if the semaphore was successfully acquired.
 * @retval PVA_TIMEDOUT if the semaphore was not acquired within the timeout.
 */
 enum pva_error pva_kmd_sema_wait_timeout(pva_kmd_sema_t *sem,
 					 uint32_t timeout_ms);
 /**
 * @brief Signal a semaphore.
 *
 * Increment the semaphore count.
 *
 * @param sem Pointer to the semaphore.
 */
 void pva_kmd_sema_post(pva_kmd_sema_t *sem);
 /**
 * @brief Deinitialize a semaphore.
 *
 * @param sem Pointer to the semaphore.
 */
 void pva_kmd_sema_deinit(pva_kmd_sema_t *sem);
 #endif // PVA_KMD_THREAD_SEMA_H
--- a/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h
+++ b/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h
@@ -0,0 +1,183 @@
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_H
 #define PVA_KMD_H
 #include "pva_api.h"
 #include "pva_fw.h"
 #include "pva_constants.h"
 #include "pva_math_utils.h"
 /* KMD API: context init */
 struct pva_kmd_context_init_in_args {
 	uint32_t resource_table_capacity;
 };
 struct pva_kmd_context_init_out_args {
 	enum pva_error error;
 	uint64_t ccq_shm_hdl;
 };
 struct pva_kmd_syncpt_register_out_args {
 	enum pva_error error;
 	uint32_t syncpt_ro_res_id;
 	uint32_t syncpt_rw_res_id;
 	uint32_t synpt_size;
 	uint32_t synpt_ids[PVA_NUM_RW_SYNCPTS_PER_CONTEXT];
 	uint32_t num_ro_syncpoints;
 };
 /**
 * Calculates the total memory size required for a PVA submission queue.
 * This includes the size of the queue header and the combined size of all command buffer submission info structures.
 * 
 * @param x The number of command buffer submission info structures.
 * @return The total memory size in bytes.
 */
 static inline uint32_t pva_get_submission_queue_memory_size(uint32_t x)
 {
 	uint32_t submit_info_size =
 		(uint32_t)sizeof(struct pva_fw_cmdbuf_submit_info);
 	uint32_t num_submit_infos = safe_mulu32(x, submit_info_size);
 	uint32_t header_size =
 		(uint32_t)sizeof(struct pva_fw_submit_queue_header);
 	return safe_addu32(header_size, num_submit_infos);
 }
 /* KMD API: queue create */
 struct pva_kmd_queue_create_in_args {
 	uint32_t max_submission_count;
 	uint64_t queue_memory_handle;
 	uint64_t queue_memory_offset;
 };
 struct pva_kmd_queue_create_out_args {
 	enum pva_error error;
 	uint32_t queue_id;
 	uint32_t syncpt_fence_counter;
 };
 /* KMD API: queue destroy */
 struct pva_kmd_queue_destroy_in_args {
 	uint32_t queue_id;
 };
 struct pva_kmd_queue_destroy_out_args {
 	enum pva_error error;
 };
 struct pva_kmd_memory_register_in_args {
 	enum pva_memory_segment segment;
 	uint32_t access_flags;
 	uint64_t memory_handle;
 	uint64_t offset;
 	uint64_t size;
 };
 /* KMD API: executable */
 struct pva_kmd_executable_register_in_args {
 	uint32_t size;
 };
 struct pva_kmd_executable_get_symbols_in_args {
 	uint32_t exec_resource_id;
 };
 struct pva_kmd_executable_get_symbols_out_args {
 	enum pva_error error;
 	uint32_t num_symbols;
 	/* Followed by <num_symbols> of struct pva_symbol_info */
 };
 /* KMD API: DMA config */
 struct pva_kmd_dma_config_register_in_args {
 	struct pva_dma_config_header dma_config_header;
 	/* Followed by hwseq words, channels, descriptors, etc. */
 };
 struct pva_kmd_register_out_args {
 	enum pva_error error;
 	uint32_t resource_id;
 };
 struct pva_kmd_exec_register_out_args {
 	enum pva_error error;
 	uint32_t resource_id;
 	uint32_t num_symbols;
 };
 struct pva_kmd_unregister_in_args {
 	uint32_t resource_id;
 };
 enum pva_kmd_op_type {
 	PVA_KMD_OP_CONTEXT_INIT,
 	PVA_KMD_OP_QUEUE_CREATE,
 	PVA_KMD_OP_QUEUE_DESTROY,
 	PVA_KMD_OP_EXECUTABLE_GET_SYMBOLS,
 	PVA_KMD_OP_MEMORY_REGISTER,
 	PVA_KMD_OP_SYNPT_REGISTER,
 	PVA_KMD_OP_EXECUTABLE_REGISTER,
 	PVA_KMD_OP_DMA_CONFIG_REGISTER,
 	PVA_KMD_OP_UNREGISTER,
 	PVA_KMD_OP_MAX,
 };
 /**
 * The header of a KMD operation
 */
 struct pva_kmd_op_header {
 	enum pva_kmd_op_type op_type; /**< Type of the KMD operation */
 };
 /**
 * The header of a KMD response
 */
 struct pva_kmd_response_header {
 	uint32_t rep_size; /** Size of the response, including the header */
 };
 enum pva_kmd_ops_mode {
 	/**
 	* Only one operation is allowed. The
 	* operation will be done synchronously.
 	* KMD will wait for the fence if
 	* necessary. */
 	PVA_KMD_OPS_MODE_SYNC,
 	/**
 	* A list of registration operations are allowed. These operations will
 	* trigger a post fence. KMD will not wait for the fence.
 	*/
 	PVA_KMD_OPS_MODE_ASYNC,
 };
 /**
 * A buffer contains a list of KMD operations and a post fence.
 *
 * In general, the list of KMD operations contain jobs that need to be done by
 * the KMD and FW. KMD will first perform its part and then submit a privileged
 * command buffer to FW. FW will trigger the provided post fence when done.
 *
 * NOTE: Starting address of every struct/array in the buffer must be aligned to
 * 8 bytes.
 */
 struct pva_kmd_operations {
 	enum pva_kmd_ops_mode mode;
 	struct pva_fw_postfence postfence;
 	/** Followed by a list of KMD operation(s) */
 };
 /* Max op buffer sizer is 8 MB */
 #define PVA_KMD_MAX_OP_BUFFER_SIZE (8 * 1024 * 1024)
 /* Max respone size is 8 KB */
 #define PVA_KMD_MAX_RESP_BUFFER_SIZE (8 * 1024)
 #endif // PVA_KMD_H
--- a/drivers/video/tegra/host/pva/src/kmd/linux/Kbuild
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/Kbuild
@@ -0,0 +1,19 @@
 ################################### tell Emacs this is a -*- makefile-gmake -*-
 #
 # Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
 #
 # NVIDIA CORPORATION and its licensors retain all intellectual property
 # and proprietary rights in and to this software, related documentation
 # and any modifications thereto.  Any use, reproduction, disclosure or
 # distribution of this software and related documentation without an express
 # license agreement from NVIDIA CORPORATION is strictly prohibited.
 ###############################################################################
 obj-m := pva_kmd_linux.o
 pva_kmd_linux-objs += ${PVA_KMD_LINUX_SRC}
 ccflags-y += ${PVA_KMD_LINUX_INC}
 ccflags-y += ${PVA_KMD_LINUX_DEF}
 ccflags-y += ${PVA_KMD_LINUX_CFLAGS}
 ccflags-y += -std=gnu11
--- a/drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux.h
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux.h
@@ -0,0 +1,39 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #ifndef PVA_KMD_LINUX_H
 #define PVA_KMD_LINUX_H
 #include "pva_kmd.h"
 #define PVA_LINUX_DEV_PATH_PREFIX "/dev/nvhost-ctrl-pva"
 #define NVPVA_IOCTL_MAGIC 'Q'
 #define PVA_KMD_IOCTL_GENERIC                                                  \
 	_IOWR(NVPVA_IOCTL_MAGIC, 1, struct pva_kmd_linux_ioctl_header)
 #define NVPVA_IOCTL_MAX_SIZE 256 //Temp value which can be updated later
 struct nvpva_ioctl_part {
 	void *addr;
 	uint64_t size;
 };
 /**
 * The header of request to KMD
 */
 struct pva_kmd_linux_ioctl_header {
 	struct nvpva_ioctl_part request;
 	struct nvpva_ioctl_part response;
 };
 #endif // PVA_KMD_LINUX_H
--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c
@@ -0,0 +1,145 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include <linux/debugfs.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/uaccess.h>
 #include <linux/nvhost.h>
 #include <uapi/linux/tegra-soc-hwpm-uapi.h>
 #include "pva_kmd_linux.h"
 #include "pva_kmd_linux_device.h"
 #include "pva_kmd_debugfs.h"
 static int pva_handle_fops(struct seq_file *s, void *data)
 {
 	return 0;
 }
 static int debugfs_node_open(struct inode *inode, struct file *file)
 {
 	int retval;
 	struct pva_kmd_file_ops *fops = file_inode(file)->i_private;
 	retval = single_open(file, pva_handle_fops, inode->i_private);
 	if (retval != 0) {
 		pva_kmd_log_err("debugfs_node_open single_open failed");
 		goto out;
 	}
 	if (fops->open != NULL) {
 		retval = fops->open(fops->pdev);
 	}
 out:
 	return retval;
 }
 static int debugfs_node_release(struct inode *inode, struct file *file)
 {
 	int retval;
 	struct pva_kmd_file_ops *fops = file_inode(file)->i_private;
 	if (fops->release != NULL) {
 		retval = fops->release(fops->pdev);
 		if (retval != 0) {
 			pva_kmd_log_err("debugfs_node_release release failed");
 			goto out;
 		}
 	}
 	retval = single_release(inode, file);
 out:
 	return retval;
 }
 static long int debugfs_node_read(struct file *file, char *data,
 				  long unsigned int size, long long int *offset)
 {
 	int64_t retval;
 	struct pva_kmd_file_ops *fops = file_inode(file)->i_private;
 	retval = fops->read(fops->pdev, fops->file_data, data, *offset, size);
 	return retval;
 }
 static long int debugfs_node_write(struct file *file, const char *data,
 				   long unsigned int size,
 				   long long int *offset)
 {
 	long int retval;
 	struct pva_kmd_file_ops *fops = file_inode(file)->i_private;
 	retval = fops->write(fops->pdev, fops->file_data, data, *offset, size);
 	return retval;
 }
 static const struct file_operations pva_linux_debugfs_fops = {
 	.open = debugfs_node_open,
 	.read = debugfs_node_read,
 	.write = debugfs_node_write,
 	.release = debugfs_node_release,
 };
 void pva_kmd_debugfs_create_bool(struct pva_kmd_device *pva, const char *name,
 				 bool *pdata)
 {
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *props = device_data->pva_device_properties;
 	struct dentry *de = props->debugfs;
 	debugfs_create_bool(name, 0644, de, pdata);
 }
 void pva_kmd_debugfs_create_u32(struct pva_kmd_device *pva, const char *name,
 				uint32_t *pdata)
 {
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *props = device_data->pva_device_properties;
 	struct dentry *de = props->debugfs;
 	debugfs_create_u32(name, 0644, de, pdata);
 }
 void pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, const char *name,
 				 struct pva_kmd_file_ops *pvafops)
 {
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *props = device_data->pva_device_properties;
 	struct dentry *de = props->debugfs;
 	struct file_operations *fops =
 		(struct file_operations *)&pva_linux_debugfs_fops;
 	struct dentry *file;
 	file = debugfs_create_file(name, 0644, de, pvafops, fops);
 	ASSERT(file != NULL);
 }
 void pva_kmd_debugfs_remove_nodes(struct pva_kmd_device *pva)
 {
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *props = device_data->pva_device_properties;
 	struct dentry *de = props->debugfs;
 	debugfs_lookup_and_remove("stats_enable", de);
 	debugfs_lookup_and_remove("vpu_debug", de);
 	debugfs_lookup_and_remove("profile_level", de);
 	debugfs_lookup_and_remove("vpu_stats", de);
 }
--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c
@@ -0,0 +1,390 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2024, NVIDIA Corporation.  All Rights Reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.  Any
 * use, reproduction, disclosure or distribution of this software and related
 * documentation without an express license agreement from NVIDIA Corporation
 * is strictly prohibited.
 */
 #include <linux/of.h>
 #include <linux/clk.h>
 #include <linux/reset.h>
 #include <linux/pm_runtime.h>
 #include <linux/debugfs.h>
 #include <linux/firmware.h>
 #include <linux/version.h>
 #include <linux/nvhost.h>
 #include <linux/nvhost_t194.h>
 #include <linux/iommu.h>
 #include <linux/dma-mapping.h>
 #include <soc/tegra/virt/syscalls.h>
 #include <asm/io.h>
 #include "pva_kmd_device.h"
 #include "pva_kmd_linux_device.h"
 #include "pva_kmd_device_memory.h"
 #include "pva_kmd_constants.h"
 #include "pva_kmd_silicon_utils.h"
 #include "pva_kmd_silicon_boot.h"
 struct nvhost_device_data *
 pva_kmd_linux_device_get_properties(struct platform_device *pdev)
 {
 	struct nvhost_device_data *props = platform_get_drvdata(pdev);
 	return props;
 }
 struct pva_kmd_linux_device_data *
 pva_kmd_linux_device_get_data(struct pva_kmd_device *device)
 {
 	return (struct pva_kmd_linux_device_data *)device->plat_data;
 }
 void pva_kmd_linux_device_set_data(struct pva_kmd_device *device,
 				   struct pva_kmd_linux_device_data *data)
 {
 	device->plat_data = (void *)data;
 }
 void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id,
 			     uint32_t *syncpt_value)
 {
 	int err = 0;
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *props = device_data->pva_device_properties;
 	err = nvhost_syncpt_read_ext_check(props->pdev, syncpt_id,
 					   syncpt_value);
 	if (err < 0) {
 		FAULT("Failed to read syncpoint value\n");
 	}
 }
 void pva_kmd_get_syncpt_iova(struct pva_kmd_device *pva, uint32_t syncpt_id,
 			     uint64_t *syncpt_iova)
 {
 	uint32_t offset = 0;
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *props = device_data->pva_device_properties;
 	struct platform_device *host_pdev =
 		to_platform_device(props->pdev->dev.parent);
 	offset = nvhost_syncpt_unit_interface_get_byte_offset_ext(host_pdev,
 								  syncpt_id);
 	*syncpt_iova = safe_addu64(pva->syncpt_ro_iova, (uint64_t)offset);
 }
 void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva)
 {
 	phys_addr_t base;
 	size_t size;
 	int err = 0;
 	uint32_t syncpt_page_size;
 	uint32_t syncpt_offset[PVA_NUM_RW_SYNCPTS];
 	dma_addr_t sp_start;
 	struct platform_device *host_pdev;
 	struct device *dev;
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *props = device_data->pva_device_properties;
 	nvhost_syncpt_unit_interface_init(props->pdev);
 	host_pdev = to_platform_device(props->pdev->dev.parent);
 	err = nvhost_syncpt_unit_interface_get_aperture(host_pdev, &base,
 							&size);
 	if (err < 0) {
 		FAULT("Failed to get syncpt aperture\n");
 	}
 	/** Get page size of a syncpoint */
 	syncpt_page_size =
 		nvhost_syncpt_unit_interface_get_byte_offset_ext(host_pdev, 1);
 	dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
 	if (iommu_get_domain_for_dev(dev)) {
 		sp_start = dma_map_resource(dev, base, size, DMA_TO_DEVICE,
 					    DMA_ATTR_SKIP_CPU_SYNC);
 		if (dma_mapping_error(dev, sp_start)) {
 			FAULT("Failed to pin RO syncpoints\n");
 		}
 	} else {
 		FAULT("Failed to pin RO syncpoints\n");
 	}
 	pva->syncpt_ro_iova = sp_start;
 	pva->syncpt_offset = syncpt_page_size;
 	pva->num_syncpts = (size / syncpt_page_size);
 	for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
 		pva->syncpt_rw[i].syncpt_id = nvhost_get_syncpt_client_managed(
 			props->pdev, "pva_syncpt");
 		if (pva->syncpt_rw[i].syncpt_id == 0) {
 			FAULT("Failed to get syncpt\n");
 		}
 		syncpt_offset[i] =
 			nvhost_syncpt_unit_interface_get_byte_offset_ext(
 				host_pdev, pva->syncpt_rw[i].syncpt_id);
 		err = nvhost_syncpt_read_ext_check(
 			props->pdev, pva->syncpt_rw[i].syncpt_id,
 			&pva->syncpt_rw[i].syncpt_value);
 		if (err < 0) {
 			FAULT("Failed to read syncpoint value\n");
 		}
 	}
 	pva->syncpt_rw_iova =
 		dma_map_resource(dev,
 				 safe_addu64(base, (uint64_t)syncpt_offset[0]),
 				 safe_mulu64((uint64_t)pva->syncpt_offset,
 					     (uint64_t)PVA_NUM_RW_SYNCPTS),
 				 DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
 	if (dma_mapping_error(dev, pva->syncpt_rw_iova)) {
 		FAULT("Failed to pin RW syncpoints\n");
 	}
 	pva->syncpt_rw[0].syncpt_iova = pva->syncpt_rw_iova;
 	for (uint32_t i = 1; i < PVA_NUM_RW_SYNCPTS; i++) {
 		if (safe_addu32(syncpt_offset[i - 1], pva->syncpt_offset) !=
 		    syncpt_offset[i]) {
 			FAULT("RW syncpts are not contiguous\n");
 		}
 		pva->syncpt_rw[i].syncpt_iova = safe_addu64(
 			pva->syncpt_rw_iova,
 			safe_mulu64((uint64_t)pva->syncpt_offset, (uint64_t)i));
 	}
 }
 void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva)
 {
 }
 void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva)
 {
 	int err = 0;
 	phys_addr_t base;
 	size_t size;
 	struct device *dev;
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *props = device_data->pva_device_properties;
 	struct platform_device *host_pdev =
 		to_platform_device(props->pdev->dev.parent);
 	err = nvhost_syncpt_unit_interface_get_aperture(host_pdev, &base,
 							&size);
 	if (err < 0) {
 		FAULT("Failed to get syncpt aperture\n");
 	}
 	dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
 	if (iommu_get_domain_for_dev(dev)) {
 		dma_unmap_resource(dev, pva->syncpt_ro_iova, size,
 				   DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
 		dma_unmap_resource(dev, pva->syncpt_rw_iova,
 				   safe_mulu64((uint64_t)pva->syncpt_offset,
 					       (uint64_t)PVA_NUM_RW_SYNCPTS),
 				   DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
 	} else {
 		FAULT("Failed to unmap syncpts\n");
 	}
 	for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
 		nvhost_syncpt_put_ref_ext(props->pdev,
 					  pva->syncpt_rw[i].syncpt_id);
 		pva->syncpt_rw[i].syncpt_id = 0;
 		pva->syncpt_rw[i].syncpt_iova = 0;
 		pva->syncpt_rw[i].syncpt_value = 0;
 	}
 	pva->syncpt_ro_iova = 0;
 	pva->syncpt_rw_iova = 0;
 	pva->syncpt_offset = 0;
 	nvhost_syncpt_unit_interface_deinit(props->pdev);
 }
 void pva_kmd_device_plat_init(struct pva_kmd_device *pva)
 {
 	struct pva_kmd_linux_device_data *plat_data =
 		pva_kmd_zalloc_nofail(sizeof(struct pva_kmd_linux_device_data));
 	pva_kmd_linux_device_set_data(pva, plat_data);
 	/* Get SMMU context devices that were probed earlier and their SIDs */
 	pva_kmd_linux_device_smmu_contexts_init(pva);
 }
 void pva_kmd_device_plat_deinit(struct pva_kmd_device *pva)
 {
 	pva_kmd_linux_host1x_deinit(pva);
 	pva_kmd_free(pva_kmd_linux_device_get_data(pva));
 }
 enum pva_error pva_kmd_power_on(struct pva_kmd_device *pva)
 {
 	int err = 0;
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *props = device_data->pva_device_properties;
 	err = pm_runtime_get_sync(&props->pdev->dev);
 	if (err < 0) {
 		pm_runtime_put_noidle(&props->pdev->dev);
 		goto out;
 	}
 	/* Power management operation is asynchronous. PVA may not be power
 	 * cycled between power_off -> power_on call. Therefore, we need to
 	 * reset it here to make sure it is in a clean state. */
 	reset_control_acquire(props->reset_control);
 	reset_control_reset(props->reset_control);
 	reset_control_release(props->reset_control);
 out:
 	return kernel_err2pva_err(err);
 }
 void pva_kmd_power_off(struct pva_kmd_device *pva)
 {
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *props = device_data->pva_device_properties;
 	pm_runtime_mark_last_busy(&props->pdev->dev);
 	pm_runtime_put(&props->pdev->dev);
 	/* Power management operation is asynchronous. We don't control when PVA
 	 * will really be powered down. However, we need to free memories after
 	 * this call. Therefore, we assert the reset line to stop PVA from any
 	 * further activity. */
 	reset_control_acquire(props->reset_control);
 	reset_control_assert(props->reset_control);
 	reset_control_release(props->reset_control);
 }
 uint32_t pva_kmd_get_syncpt_ro_offset(struct pva_kmd_device *pva)
 {
 	return safe_subu64(pva->syncpt_ro_iova, FW_SHARED_MEMORY_START);
 }
 uint32_t pva_kmd_get_syncpt_rw_offset(struct pva_kmd_device *pva)
 {
 	return safe_subu64(pva->syncpt_rw_iova, FW_SHARED_MEMORY_START);
 }
 enum pva_error pva_kmd_read_fw_bin(struct pva_kmd_device *pva)
 {
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *device_props =
 		device_data->pva_device_properties;
 	struct pva_kmd_device_memory *fw_bin_mem;
 	const struct firmware *fw_ucode;
 	int kerr = request_firmware(&fw_ucode, device_props->firmware_name,
 				    &device_props->pdev->dev);
 	if (kerr < 0) {
 		err = kernel_err2pva_err(kerr);
 		goto out;
 	}
 	fw_bin_mem = pva_kmd_device_memory_alloc_map(
 		safe_pow2_roundup_u64(fw_ucode->size, SIZE_4KB), pva,
 		PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
 	if (fw_bin_mem == NULL) {
 		err = PVA_NOMEM;
 		goto release;
 	}
 	memcpy(fw_bin_mem->va, fw_ucode->data, fw_ucode->size);
 	pva->fw_bin_mem = fw_bin_mem;
 release:
 	release_firmware(fw_ucode);
 out:
 	return err;
 }
 void pva_kmd_aperture_write(struct pva_kmd_device *pva,
 			    enum pva_kmd_reg_aperture aperture, uint32_t reg,
 			    uint32_t val)
 {
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *device_props =
 		device_data->pva_device_properties;
 	void __iomem *addr = device_props->aperture[aperture] + reg;
 	writel(val, addr);
 }
 uint32_t pva_kmd_aperture_read(struct pva_kmd_device *pva,
 			       enum pva_kmd_reg_aperture aperture, uint32_t reg)
 {
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvhost_device_data *device_props =
 		device_data->pva_device_properties;
 	void __iomem *addr = device_props->aperture[aperture] + reg;
 	return readl(addr);
 }
 enum pva_error kernel_err2pva_err(int err)
 {
 	if (err >= 0) {
 		return PVA_SUCCESS;
 	}
 	switch (err) {
 	case -EINVAL:
 		return PVA_INVAL;
 	case -EINTR:
 		return PVA_EINTR;
 	default:
 		return PVA_UNKNOWN_ERROR;
 	}
 }
 unsigned long pva_kmd_copy_data_from_user(void *dst, const void *src,
 					  uint64_t size)
 {
 	return copy_from_user(dst, src, size);
 }
 unsigned long pva_kmd_copy_data_to_user(void __user *to, const void *from,
 					unsigned long size)
 {
 	return copy_to_user(to, from, size);
 }
 unsigned long pva_kmd_strtol(const char *str, int base)
 {
 	unsigned long val;
 	int ret;
 	ret = kstrtoul(str, base, &val);
 	if (ret < 0)
 		return 0;
 	return val;
 }
 /* TODO: Enable HVC call once HVC fix is available on dev-main */
 //static void pva_kmd_config_regs(void)
 //{
 //bool hv_err = true;
 //hv_err = hyp_pva_config_regs();
 //ASSERT(hv_err == true);
 //ASSERT(false);
 //}
 void pva_kmd_config_evp_seg_scr_regs(struct pva_kmd_device *pva)
 {
 	pva_kmd_config_evp_seg_regs(pva);
 	pva_kmd_config_scr_regs(pva);
 }
 void pva_kmd_config_sid_regs(struct pva_kmd_device *pva)
 {
 	pva_kmd_config_sid(pva);
 }
--- a/Show More
+++ b/Show More