pva: mirror from gitlab cv/pva-sys-sw

Gitlab commit b284345610ceb ("pva: fix static defects...") No changes since last deployment. Change-Id: I56211cf375961730f67e246f4103217b33893286 Signed-off-by: nanwa <nanwa@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3428161 Reviewed-by: Rohit Goel <rogoel@nvidia.com> Tested-by: Rohit Goel <rogoel@nvidia.com> Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com> GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
2025-12-22 09:11:26 +03:00 · 2025-08-12 04:20:01 +00:00
parent 8657f0db30
commit c1ba8c8eae
71 changed files with 1289 additions and 2293 deletions
--- a/drivers/video/tegra/host/pva/Makefile
+++ b/drivers/video/tegra/host/pva/Makefile
@@ -63,15 +63,15 @@ pva_objs += \
    $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_vpu_app_auth.o \

 pva_inc_flags += \
-    -I$(PVA_SYS_ABSDIR)/src/fw/baremetal/include \
-    -I$(PVA_SYS_ABSDIR)/src/fw/include \
-    -I$(PVA_SYS_ABSDIR)/src/include \
+    -I$(PVA_SYS_ABSDIR)/src/kmd/include \
    -I$(PVA_SYS_ABSDIR)/src/kmd/common \
    -I$(PVA_SYS_ABSDIR)/src/kmd/common/shim \
-    -I$(PVA_SYS_ABSDIR)/src/kmd/include \
    -I$(PVA_SYS_ABSDIR)/src/kmd/linux/include \
    -I$(PVA_SYS_ABSDIR)/src/libs/pva/include \
+    -I$(PVA_SYS_ABSDIR)/src/include \
    -I$(PVA_SYS_ABSDIR)/src/private_api \
+    -I$(PVA_SYS_ABSDIR)/src/fw/include \
+    -I$(PVA_SYS_ABSDIR)/src/fw/baremetal/include \

 pva_def_flags += \
    -DPVA_BUILD_MODE=PVA_BUILD_MODE_L4T \
@@ -82,6 +82,7 @@ pva_def_flags += \
    -DPVA_BUILD_MODE_SIM=4 \
    -DPVA_DEV_MAIN_COMPATIBLE=1 \
    -DPVA_ENABLE_CUDA=1 \
+    -DPVA_ENABLE_R5_OCD=0 \
    -DPVA_IS_DEBUG=0 \
    -DPVA_SAFETY=0 \
    -DPVA_SKIP_SYMBOL_TYPE_CHECK \
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-bit.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-bit.h
@@ -1,182 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
-#ifndef PVA_BIT_H
-#define PVA_BIT_H
-
-/*
- * Bit manipulation macros
- */
-
-/**
- * @brief Number of bits per byte.
- */
-#define PVA_BITS_PER_BYTE (8UL)
-
-/**
- * @defgroup PVA_BIT8_HELPER
- *
- * @brief Bit Manipulation macros for number which is of type uint8_t.
- *        Parameter that convey the bit position should be in the range
- *        of 0 to 7 inclusive.
- *        Parameter with respect to MSB and LSB should satisfy the conditions
- *        of both being in the range of 0 to 7 inclusive with MSB greater than LSB.
- * @{
- */
-/**
- * @brief Macro to set a given bit position in a variable of type uint8_t.
- */
-#define PVA_BIT8(_b_) ((uint8_t)(((uint8_t)1U << (_b_)) & 0xffu))
-
-//! @cond DISABLE_DOCUMENTATION
-/**
- * @brief Macro used to generate a bit-mask from MSB to LSB in a uint8_t variable.
- *        This macro sets all the bits from MSB to LSB.
- */
-#define PVA_MASK8(_msb_, _lsb_)                                                \
-	((uint8_t)((((PVA_BIT8(_msb_) - 1U) | PVA_BIT8(_msb_)) &               \
-		    ~(PVA_BIT8(_lsb_) - 1U)) &                                 \
-		   0xffu))
-//! @endcond
-/** @} */
-
-/**
- * @defgroup PVA_BIT16_HELPER
- *
- * @brief Bit Manipulation macros for number which is of type uint16_t.
- *        Parameter that convey the bit position should be in the range
- *        of 0 to 15 inclusive.
- *        Parameter with respect to MSB and LSB should satisfy the conditions
- *        of both being in the range of 0 to 15 inclusive with MSB greater than LSB.
- * @{
- */
-/**
- * @brief Macro to set a given bit position in a 16 bit number.
- */
-#define PVA_BIT16(_b_) ((uint16_t)(((uint16_t)1U << (_b_)) & 0xffffu))
-
-/**
- * @brief Macro to mask a range(MSB to LSB) of bit positions in a 16 bit number.
- * This will set all the bit positions in specified range.
- */
-#define PVA_MASK16(_msb_, _lsb_)                                               \
-	((uint16_t)((((PVA_BIT16(_msb_) - 1U) | PVA_BIT16(_msb_)) &            \
-		     ~(PVA_BIT16(_lsb_) - 1U)) &                               \
-		    0xffffu))
-
-//! @cond DISABLE_DOCUMENTATION
-/**
- * @brief Macro to extract bits from a 16 bit number.
- * The bits are extracted from the range provided and the extracted
- * number is finally type-casted to the type provided as argument.
- */
-#define PVA_EXTRACT16(_x_, _msb_, _lsb_, _type_)                               \
-	((_type_)(((_x_)&PVA_MASK16((_msb_), (_lsb_))) >> (_lsb_)))
-//! @endcond
-
-/**
- * @brief Macro used to generate a bit-mask from MSB to LSB in a uint16_t variable.
- *        This macro sets all the bits from MSB to LSB.
- */
-#define PVA_INSERT16(_x_, _msb_, _lsb_)                                        \
-	((((uint16_t)(_x_)) << (_lsb_)) & PVA_MASK16((_msb_), (_lsb_)))
-/** @} */
-
-/**
- * @defgroup PVA_BIT32_HELPER
- *
- * @brief Bit Manipulation macros for number which is of type uint32_t.
- *        Parameter that convey the bit position should be in the range
- *        of 0 to 31 inclusive.
- *        Parameter with respect to MSB and LSB should satisfy the conditions
- *        of both being in the range of 0 to 31 inclusive with MSB greater than LSB.
- * @{
- */
-
-/**
- * @brief Macro to set a given bit position in a 32 bit number.
- */
-#define PVA_BIT(_b_) ((uint32_t)(((uint32_t)1U << (_b_)) & 0xffffffffu))
-
-/**
- * @brief Macro to mask a range(MSB to LSB) of bit positions in a 32 bit number.
- * This will set all the bit positions in specified range.
- */
-#define PVA_MASK(_msb_, _lsb_)                                                 \
-	(((PVA_BIT(_msb_) - 1U) | PVA_BIT(_msb_)) & ~(PVA_BIT(_lsb_) - 1U))
-
-/**
- * @brief Macro to extract bits from a 32 bit number.
- * The bits are extracted from the range provided and the extracted
- * number is finally type-casted to the type provided as argument.
- */
-#define PVA_EXTRACT(_x_, _msb_, _lsb_, _type_)                                 \
-	((_type_)(((_x_)&PVA_MASK((_msb_), (_lsb_))) >> (_lsb_)))
-
-/**
- * @brief Macro to insert a range of bits from a given 32 bit number.
- * Range of bits are derived from the number passed as argument.
- */
-#define PVA_INSERT(_x_, _msb_, _lsb_)                                          \
-	((((uint32_t)(_x_)) << (_lsb_)) & PVA_MASK((_msb_), (_lsb_)))
-/** @} */
-
-/**
- * @defgroup PVA_BIT64_HELPER
- *
- * @brief Bit Manipulation macros for number which is of type uint64_t.
- *        Parameter that convey the bit position should be in the range
- *        of 0 to 63 inclusive.
- *        Parameter with respect to MSB and LSB should satisfy the conditions
- *        of both being in the range of 0 to 63 inclusive with MSB greater than LSB.
- * @{
- */
-/**
- * @brief Macro to set a given bit position in a 64 bit number.
- */
-#define PVA_BIT64(_b_)                                                         \
-	((uint64_t)(((uint64_t)1UL << (_b_)) & 0xffffffffffffffffu))
-
-/**
- * @brief Macro used to generate a bit-mask from (MSB to LSB) in a uint64_t variable.
- *        This macro sets all the bits from MSB to LSB.
- */
-#define PVA_MASK64(_msb_, _lsb_)                                               \
-	(((PVA_BIT64(_msb_) - (uint64_t)1U) | PVA_BIT64(_msb_)) &              \
-	 ~(PVA_BIT64(_lsb_) - (uint64_t)1U))
-
-/**
- * @brief Macro to extract bits from a 64 bit number.
- * The bits are extracted from the range provided and the extracted
- * number is finally type-casted to the type provided as argument.
- */
-#define PVA_EXTRACT64(_x_, _msb_, _lsb_, _type_)                               \
-	((_type_)(((_x_)&PVA_MASK64((_msb_), (_lsb_))) >> (_lsb_)))
-
-/**
- * @brief Macro to insert a range of bits into a 64 bit number.
- * The bits are derived from the number passed as argument.
- */
-#define PVA_INSERT64(_x_, _msb_, _lsb_)                                        \
-	((((uint64_t)(_x_)) << (_lsb_)) & PVA_MASK64((_msb_), (_lsb_)))
-
-/**
- * @brief Macro to pack a 64 bit number.
- * A 64 bit number is generated that has first 32 MSB derived from
- * upper 32 bits of passed argument and has lower 32MSB derived from
- * lower 32 bits of another passed argument.
- */
-#define PVA_PACK64(_l_, _h_)                                                   \
-	(PVA_INSERT64((_h_), 63U, 32U) | PVA_INSERT64((_l_), 31U, 0U))
-
-/**
- * @brief Macro to extract the higher 32 bits from a 64 bit number.
- */
-#define PVA_HI32(_x_) ((uint32_t)(((_x_) >> 32U) & 0xFFFFFFFFU))
-
-/**
- * @brief Macro to extract the lower 32 bits from a 64 bit number.
- */
-#define PVA_LOW32(_x_) ((uint32_t)((_x_)&0xFFFFFFFFU))
-/** @} */
-
-#endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-checkpoint.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-checkpoint.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
 #ifndef PVA_CHECKPOINT_H
 #define PVA_CHECKPOINT_H

@@ -109,125 +110,77 @@
 */
 #define PVA_ABORT_FALLTHRU (0x02U)

+/**
+ * @brief Minor code for abort in case of un-supported SID read.
+ */
+#define PVA_ABORT_UNSUPPORTED (0x03U)
+
 /**
 * @brief Minor code for abort in case of fatal IRQ.
 */
-#define PVA_ABORT_IRQ (0x05U)
+#define PVA_ABORT_IRQ (0x04U)

 /**
 * @brief Minor code for abort in case of MPU failure.
 */
-#define PVA_ABORT_MPU (0x06U)
+#define PVA_ABORT_MPU (0x05U)

 /**
 * @brief Minor code for abort in case of ARM exception.
 */
-#define PVA_ABORT_EXCEPTION (0x07U)
-
-/**
- * @brief Minor code for abort in case of un-supported SID read.
- */
-#define PVA_ABORT_UNSUPPORTED (0x09U)
-
-/**
- * @brief Minor code for abort in case of DMA failures.
- */
-#define PVA_ABORT_DMA_TASK (0x0cU)
+#define PVA_ABORT_EXCEPTION (0x06U)

 /**
 * @brief Minor code for abort in case of WDT failures.
- * Note: This code is not reported to HSM.
 */
-#define PVA_ABORT_WATCHDOG (0x0eU)
-
-//! @endcond
+#define PVA_ABORT_WATCHDOG (0x07U)

 /**
 * @brief Minor code for abort in case of VPU init failures.
 */
-#define PVA_ABORT_VPU (0x0fU)
+#define PVA_ABORT_VPU (0x08U)

 /**
 * @brief Minor code for abort in case of DMA MISR setup failures.
 */
-#define PVA_ABORT_DMA (0x10U)
-
-//! @cond DISABLE_DOCUMENTATION
-/**
- * @brief Minor code for abort in case of Mbox errors.
- * Note: This is used only in T19x
- */
-#define PVA_ABORT_MBOX_WAR (0x12U)
-//! @endcond
-
-/**
- * @brief Minor code for abort in case of AISR errors.
- */
-#define PVA_ABORT_AISR_QUEUE (0x14U)
-
-/**
- * @brief Minor code for abort in case of bad task.
- */
-#define PVA_ABORT_BAD_TASK (0x15U)
-
-//! @cond DISABLE_DOCUMENTATION
-/**
- * @brief Minor code for abort in case of PPE init failures.
- * Note: This is only used in T26x
- */
-#define PVA_ABORT_PPE (0x16U)
-//! @endcond
+#define PVA_ABORT_DMA (0x09U)

 /**
 * @brief Minor code for abort in case of RAMIC failures.
 */
-#define PVA_ABORT_RAMIC (0x20U)
+#define PVA_ABORT_RAMIC (0x10U)
+
+/**
+ * @brief Minor code for abort in case of firewall decode error.
+ */
+#define PVA_ABORT_L2SRAM_FWDEC (0x11U)
+
+/**
+ * @brief Minor code for abort in case of FSP abort.
+ */
+#define PVA_ABORT_FSP (0x12U)
+
+/**
+ * @brief Minor code for abort in case of kernel panic.
+ */
+#define PVA_ABORT_KERNEL_PANIC (0x13U)
+
+/**
+ * @brief Minor code for abort in case of boot failure.
+ */
+#define PVA_ABORT_BOOT (0x14U)

 /**
 * @brief Minor Code for SEC for safety errors.
 * Note: This code is not reported to HSM.
 */
-#define PVA_ABORT_SEC_SERR (0x21U)
+#define PVA_ABORT_SEC_SERR (0x15U)

 /**
 * @brief Minor Code for SEC for functional errors.
 * Note: This code is not reported to HSM.
 */
-#define PVA_ABORT_SEC_FERR (0x22U)
-
-/**
- * @brief Minor code for abort in case of firewall decode error.
- */
-#define PVA_ABORT_L2SRAM_FWDEC (0x23U)
-
-/**
- * @brief Minor code for abort in case of kernel panic.
- */
-#define PVA_ABORT_KERNEL_PANIC (0x30U)
-
-/**
- * @brief Minor code for abort in case of Batch Timeout.
- */
-#define PVA_ABORT_BATCH_TIMEOUT (0x40U)
-
-/**
- * @brief Minor code for abort in case of DMA Transfer Timeout.
- * while in launch phase for the VPU)
- */
-#define PVA_ABORT_DMA_SETUP_TIMEOUT (0x41U)
-
-//! @cond DISABLE_DOCUMENTATION
-/**
- * @brief Minor code used when NOC BIST is run.
- * Note: This is only used in T19x
- */
-#define PVA_ABORT_NOC_BIST (0xfcU)
-//! @endcond
-
-/**
- * @brief Minor code for abort in case of FSP abort.
- */
-#define PVA_ABORT_FSP 0x42U
+#define PVA_ABORT_SEC_FERR (0x16U)

 /** @} */

@@ -336,4 +289,46 @@
 */
 #define PVA_ABORT_FSP_UNKNOWN (0xE005U)
 /** @} */
+
+/**
+ * @brief Minor Code for Unhandled SVC
+ */
+#define PVA_ABORT_SVC_UNHANDLED (0xE006U)
+
+/**
+ * @defgroup PVA_ABORT_ARGUMENTS_BOOT Argument to pva_abort() for BOOT operations
+ * @ingroup PVA_ABORT_ARGUMENTS
+ * @{
+ */
+
+/**
+ * @brief Minor code for boot abort due to invalid code IOVA
+ */
+#define PVA_ABORT_BOOT_BAD_CODE_IOVA (0xE001U)
+
+/**
+ * @brief Minor code for boot abort due to invalid addresses
+ */
+#define PVA_ABORT_BOOT_BAD_ADDRS (0xE002U)
+
+/**
+ * @brief Minor code for boot abort due to invalid descriptor start
+ */
+#define PVA_ABORT_BOOT_BAD_DESC_START (0xE003U)
+
+/**
+ * @brief Minor code for boot abort due to invalid descriptor end
+ */
+#define PVA_ABORT_BOOT_BAD_DESC_END (0xE004U)
+
+/**
+ * @brief Minor code for boot abort due to invalid descriptor ID
+ */
+#define PVA_ABORT_BOOT_BAD_DESC_ID (0xE005U)
+
+/**
+ * @brief Minor code for boot abort due to invalid platform
+ */
+#define PVA_ABORT_BOOT_INVALID_PLATFORM (0xE006U)
+/** @} */
 #endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-config.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-config.h
@@ -1,217 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
-#ifndef PVA_CONFIG_H
-#define PVA_CONFIG_H
-
-#include <pva-types.h>
-#include "pva_fw_constants.h"
-
-/**
- * @defgroup PVA_CONFIG_PARAMS
- *
- * @brief PVA Configuration parameters.
- * @{
- */
-/**
- * @brief Queue id for queue0.
- */
-#define PVA_FW_QUEUE_0 (0U)
-
-/**
- * @brief Total number of queues that are present
- *        for communication between KMD and FW.
- */
-#define PVA_NUM_QUEUES (8U)
-
-/**
- * @brief Maximum queue id value in PVA System.
- */
-#define PVA_MAX_QUEUE_ID (PVA_NUM_QUEUES - 1U)
-
-/**
- * @brief Maximum number of tasks that is supported by a queue.
- */
-#define MAX_QUEUE_DEPTH (256U)
-
-/**
- * @brief Number of Hardware Semaphore registers in PVA System.
- */
-#define PVA_NUM_SEMA_REGS (4U)
-
-/**
- * @brief Number of Hardware Mailbox registers in PVA System.
- */
-#define PVA_NUM_MBOX_REGS (8U)
-
-/**
- * @brief Maximum number of Pre-Actions for a task.
- */
-#define PVA_MAX_PREACTIONS (26U)
-
-/**
- * @brief Maximum number of Post-Actions for a task.
- */
-#define PVA_MAX_POSTACTIONS (28U)
-
-//! @cond DISABLE_DOCUMENTATION
-/**
- * @brief Maximum number of DMA channels for T26x.
- */
-#define PVA_NUM_DMA_CHANNELS_T26X (8U)
-
-/**
- * @brief Total number of AXI data buffers for T26x.
- */
-#define PVA_NUM_DMA_ADB_BUFFS_T26X (304U)
-
-/**
- * @brief Number of reserved AXI data buffers for T26x.
- */
-#define PVA_NUM_RESERVED_ADB_BUFFERS_T26X (16U)
-
-/**
- * @brief Number of dynamic AXI data buffers for T26x.
- * These exclude the reserved AXI data buffers from total available ones.
- */
-#define PVA_NUM_DYNAMIC_ADB_BUFFS_T26X                                         \
-	(PVA_NUM_DMA_ADB_BUFFS_T26X - PVA_NUM_RESERVED_ADB_BUFFERS_T26X)
-
-/**
- * @brief Maximum number of DMA channels for T23x.
- */
-#define PVA_NUM_DMA_CHANNELS_T23X (16U)
-//! @endcond
-
-/**
- * @brief Number of DMA descriptors for T19x.
- */
-#define PVA_NUM_DMA_DESCS_T19X (64U)
-/**
- * @brief Number of DMA descriptors for T23x.
- */
-#define PVA_NUM_DMA_DESCS_T23X (64U)
-/**
- * @brief Number of DMA descriptors for T26x.
- */
-#define PVA_NUM_DMA_DESCS_T26X (96U)
-
-/**
- * @brief Number of reserved DMA channels. These channels
- * are reserved per DMA for R5 transfers. These channels
- * will be used by R5 to transfer data which it needs.
- */
-#define PVA_NUM_RESERVED_CHANNELS (1U)
-
-/**
- * @brief Number of dynamic DMA descriptors for T19x. These descriptors can be
- * used by the VPU application transfer data. These exclude
- * the reserved descriptors from total available ones.
- */
-#define PVA_NUM_DYNAMIC_DESCS_T19X                                             \
-	(PVA_NUM_DMA_DESCS_T19X - PVA_NUM_RESERVED_DESCRIPTORS)
-/**
- * @brief Number of dynamic DMA descriptors for T23x. These descriptors can be
- * used by the VPU application transfer data. These exclude
- * the reserved descriptors from total available ones.
- */
-#define PVA_NUM_DYNAMIC_DESCS_T23X                                             \
-	(PVA_NUM_DMA_DESCS_T23X - PVA_NUM_RESERVED_DESCRIPTORS)
-/**
- * @brief Number of dynamic DMA descriptors for T26x. These descriptors can be
- * used by the VPU application transfer data. These exclude
- * the reserved descriptors from total available ones.
- */
-#define PVA_NUM_DYNAMIC_DESCS_T26X                                             \
-	(PVA_NUM_DMA_DESCS_T26X - PVA_NUM_RESERVED_DESCRIPTORS)
-/**
- * Note: T26x will be brought up first on Linux, and then on QNX. To support this,
- * the following macro is needed so that the QNX driver can build without requiring
- * any changes.
- */
-#define PVA_NUM_DYNAMIC_DESCS (PVA_NUM_DYNAMIC_DESCS_T23X)
-
-/**
- * @brief Number of reserved AXI data buffers for T23x.
- */
-#define PVA_NUM_RESERVED_ADB_BUFFERS_T23X (16U)
-
-/**
- * @brief Number of reserved VMEM data buffers.
- */
-#define PVA_NUM_RESERVED_VDB_BUFFERS (0U)
-
-/**
- * @brief Total number of VMEM data buffers.
- */
-#define PVA_NUM_DMA_VDB_BUFFS (128U)
-
-/**
- * @brief Total number of AXI data buffers for T23x.
- */
-#define PVA_NUM_DMA_ADB_BUFFS_T23X (272U)
-
-/**
- * @brief Number of dynamic AXI data buffers for T23x.
- * These exclude the reserved AXI data buffers from total available ones.
- */
-#define PVA_NUM_DYNAMIC_ADB_BUFFS_T23X                                         \
-	(PVA_NUM_DMA_ADB_BUFFS_T23X - PVA_NUM_RESERVED_ADB_BUFFERS_T23X)
-
-/**
- * @brief Number of dynamic VMEM data buffers for T23x.
- * These exclude the reserved VMEM data buffers from total available ones.
- */
-#define PVA_NUM_DYNAMIC_VDB_BUFFS                                              \
-	(PVA_NUM_DMA_VDB_BUFFS - PVA_NUM_RESERVED_VDB_BUFFERS)
-
-/**
- * @brief The first Reserved DMA descriptor. This is used as a
- *        starting point to iterate over reserved DMA descriptors.
- */
-#define PVA_RESERVED_DESC_START (60U)
-
-/**
- * @brief The first Reserved AXI data buffers. This is used as a
- *        starting point to iterate over reserved AXI data buffers.
- */
-#define PVA_RESERVED_ADB_BUFF_START PVA_NUM_DYNAMIC_ADB_BUFFS
-
-/**
- * @brief This macro has the value to be set by KMD in the shared semaphores
- * @ref PVA_PREFENCE_SYNCPT_REGION_IOVA_SEM or @ref PVA_POSTFENCE_SYNCPT_REGION_IOVA_SEM
- * if the syncpoint reserved region must not be configured as uncached
- * in R5 MPU.
- */
-#define PVA_R5_SYNCPT_REGION_IOVA_OFFSET_NOT_SET (0xFFFFFFFFU)
-/** @} */
-
-/**
- * @defgroup PVA_CONFIG_PARAMS_T19X
- *
- * @brief PVA Configuration parameters exclusively for T19X.
- * @{
- */
-/**
- * @brief Number of DMA channels for T19x or Xavier.
- */
-#define PVA_NUM_DMA_CHANNELS_T19X (14U)
-
-/**
- * @brief Number of reserved AXI data buffers for T19x.
- */
-#define PVA_NUM_RESERVED_ADB_BUFFERS_T19X (8U)
-
-/**
- * @brief Total number of AXI data buffers for T19x.
- */
-#define PVA_NUM_DMA_ADB_BUFFS_T19X (256U)
-
-/**
- * @brief Number of dynamic AXI data buffers for T19x.
- * These exclude the reserved AXI data buffers from total available ones.
- */
-#define PVA_NUM_DYNAMIC_ADB_BUFFS_T19X                                         \
-	(PVA_NUM_DMA_ADB_BUFFS_T19X - PVA_NUM_RESERVED_ADB_BUFFERS_T19X)
-
-/** @} */
-#endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-errors.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-errors.h
@@ -1,371 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
-#ifndef PVA_ERRORS_H
-#define PVA_ERRORS_H
-
-#include <stdint.h>
-#include <pva-packed.h>
-
-/**
- * @brief PVA Error codes
- */
-typedef uint16_t pva_errors_t;
-
-/**
- * @defgroup PVA_ERRORS
- *
- * @brief General and interface errors of PVA.
- * @{
- */
-/**
- * @brief In case of no Error.
- */
-#define PVA_ERR_NO_ERROR (0x0U)
-
-/**
- * @brief Error in case of an illegal command
- *        PVA FW executes commands that are found
- *        in the command look up table. If a command
- *        is not part of supported commands, this
- *        error will be returned. Valid commands can be
- *        referred at @ref pva_cmd_lookup_t.
- *
- */
-#define PVA_ERR_BAD_CMD (0x1U)
-
-/**
- * @brief Error in case of bad queue id, ie
- * queue id that was requested is not available.
- */
-#define PVA_ERR_BAD_QUEUE_ID (0x3U)
-
-/**
- * @brief Error in case of invalid pve-id. This
- *        error is generated if PVE id is greater
- *        than @ref PVA_NUM_PVE.
- */
-#define PVA_ERR_BAD_PVE_ID (0x4U)
-
-/**
- * @brief Error in case when number of pre-actions
- * are more than what can be accommodated.
- */
-#define PVA_ERR_BUFF_TOO_SMALL (0x5U)
-
-/**
- * @brief Error in case when requested feature can not be satisfied.
- *        This error arises in scenarios where certain actions are
- *        not supported during execution of pre-actions or post-actions.
- *        For instance, @ref TASK_ACT_WRITE_STATUS is not supported in
- *        executing pre-actions of task.
- */
-#define PVA_ERR_FEATURE_NOT_SUPPORTED (0x6U)
-
-/**
- * @brief Error in case when the address generated or translated does not
- * meet the constraints like alignment or non-null.
- */
-#define PVA_ERR_BAD_ADDRESS (0x9U)
-
-/**
- * @brief Error in case when timestamp is requested on un-supported action.
- */
-#define PVA_ERR_BAD_TIME_VALUE (0xdU)
-#if PVA_SAFETY == 0
-/**
- * @brief Error in case when the register provided to update
- *        the status is invalid.
- */
-#define PVA_ERR_BAD_STATUS_REG (0x10U)
-#endif
-//! @endcond
-/**
- * @brief Error in case of bad task.
- *        In scenarios where task does not meet the
- *        necessary criteria like non-zero or 64 byte alignment.
- *        This error will be returned.
- */
-#define PVA_ERR_BAD_TASK (0x15U)
-
-/**
- * @brief Error in case of invalid task action list. Invalid
- *        action list arises in scenarios like number of
- *        pre and post actions not being zero but actual
- *        pre or post action to be performed being NULL.
- */
-#define PVA_ERR_BAD_TASK_ACTION_LIST (0x16U)
-
-/**
- * @brief Error when internal state of task is not as expected.
- *        A task goes through transition of various state while
- *        executing. In case when a state is not coherent with
- *        action being performed this error is returned.
- *        For example, task can not be in a running state
- *        while tear-down is being performed.
- */
-#define PVA_ERR_BAD_TASK_STATE (0x17U)
-
-/**
- * @brief Error when there is a mis-match in input status and the actual status.
- *        This error occurs when there is a mis-match in status from @ref pva_gen_task_status_t
- *        and actual status that is populated by FW during task execution.
- */
-#define PVA_ERR_TASK_INPUT_STATUS_MISMATCH (0x18U)
-
-/**
- * @brief Error in case of invalid parameters. These errors occur when
- *        parameters passed are invalid and is applicable for task parameters
- *        and DMA parameters.
- */
-#define PVA_ERR_BAD_PARAMETERS (0x1aU)
-
-/**
- * @brief Error in case of when timed out occurred for batch of task.
- */
-#define PVA_ERR_PVE_TIMEOUT (0x23U)
-
-/**
- * @brief Error when VPU has halted or turned off.
- */
-#define PVA_ERR_VPU_ERROR_HALT (0x25U)
-
-/**
- * @brief Error after FW sends an abort signal to KMD. KMD will write into status buffers for
- *        pending tasks after FW sends an abort signal to KMD.
- */
-#define PVA_ERR_VPU_BAD_STATE (0x28U)
-
-/**
- * @brief Error in case of exiting VPU.
- */
-#define PVA_ERR_VPU_EXIT_ERROR (0x2aU)
-//! @cond DISABLE_DOCUMENTATION
-/**
- * @brief Error in case of exiting PPE.
- */
-#define PVA_ERR_PPE_EXIT_ERROR (0x2bU)
-//! @endcond
-/**
- * @brief Error when a task running on PVE caused abort on PVE.
- */
-#define PVA_ERR_PVE_ABORT (0x2dU)
-/**
- * @brief Error in case of Floating point NAN.
- */
-
-//! @cond DISABLE_DOCUMENTATION
-#define PVA_ERR_PPE_ILLEGAL_INSTR_ALIGN (0x37U)
-
-/**
- * @brief Error in case of Bad cached DRAM segment.
- */
-#define PVA_ERR_BAD_CACHED_DRAM_SEG (0x3aU)
-
-/**
- * @brief Error in case of Bad DRAM IOVA.
- */
-#define PVA_ERR_BAD_DRAM_IOVA (0x3cU)
-//! @endcond
-
-/**
- * @brief Error in case of Register mis-match.
- */
-#define PVA_ERR_REG_MISMATCH (0x3dU)
-
-/**
- * @brief Error in case of AISR queue empty.
- */
-#define PVA_ERR_AISR_INPUT_QUEUE_EMPTY (0x3fU)
-
-/**
- * @brief Error in case of AISR queue full.
- */
-#define PVA_ERR_AISR_OUTPUT_QUEUE_FULL (0x40U)
-#if (PVA_HAS_L2SRAM == 1)
-/**
- * @brief Error in case of L2SRAM allocation failed due to invalid parameters.
- */
-#define PVA_ERR_BAD_L2SRAM_PARAMS (0x41U)
-#endif
-/**
- * @brief Error in case of bad or invalid task parameters.
- */
-#define PVA_ERR_BAD_TASK_PARAMS (0x42U)
-/**
- * @brief Error in case of invalid VPU system call.
- */
-#define PVA_ERR_VPU_SYS_ERROR (0x43U)
-/**
- * @brief Error in case of HW Watchdog timer timeout
- */
-#define PVA_ERR_WDT_TIMEOUT_ERROR (0x44U)
-/**
- * @brief Error in case Golden register check value mismatch.
- */
-#define PVA_ERR_GR_REG_MISMATCH (0x45U)
-/**
- * @brief Error in case Critical register check value mismatch.
- */
-#define PVA_ERR_CRIT_REG_MISMATCH (0x46U)
-/** @} */
-
-/**
- * @defgroup PVA_DMA_ERRORS
- *
- * @brief DMA ERROR codes used across PVA.
- * @{
- */
-/**
- * @brief Error when DMA transfer mode in DMA descriptor is invalid.
- */
-#define PVA_ERR_DMA_TRANSFER_TYPE_INVALID (0x204U)
-
-/**
- * @brief Error when DMA transfer was not successful.
- */
-#define PVA_ERR_DMA_CHANNEL_TRANSFER (0x207U)
-
-/**
- * @brief Error in case of BAD DMA descriptor.
- */
-#define PVA_ERR_BAD_DMA_DESC_ID (0x208U)
-
-/**
- * @brief Error in case of BAD DMA channel ID.
- */
-#define PVA_ERR_BAD_DMA_CHANNEL_ID (0x209U)
-
-/**
- * @brief Error in case of DMA timeout.
- */
-#define PVA_ERR_DMA_TIMEOUT (0x20bU)
-
-/**
- * @brief Error when program trying to use channel is already active.
- */
-#define PVA_ERR_DMA_INVALID_CONFIG (0x220U)
-
-/**
- * @brief Error in case DMA transfer was not successful.
- */
-#define PVA_ERR_DMA_ERROR (0x221U)
-
-/**
- * @brief Error when number of bytes of HW Seq data copy is
- * not a multiple of 4.
- */
-#define PVA_ERR_DMA_HWSEQ_BAD_PROGRAM (0x216U)
-
-/**
- * @brief Error when number of bytes of HW Seq data copy is
- * more than HW Seq RAM size.
- */
-#define PVA_ERR_DMA_HWSEQ_PROGRAM_TOO_LONG (0x217U)
-/**
- * @defgroup PVA_VPU_ISR_ERRORS
- *
- * @brief VPU ISR error codes used across PVA.
- * @{
- */
-/**
- * @defgroup PVA_FAST_RESET_ERRORS
- *
- * @brief Fast reset error codes used across PVA.
- * @{
- */
-/**
- * @brief Error when VPU is not in idle state for a reset to be done.
- */
-#define PVA_ERR_FAST_RESET_TIMEOUT_VPU (0x401U)
-/**
- * @brief Error if VPU I-Cache is busy before checking DMA engine for idle state.
- */
-#define PVA_ERR_FAST_RESET_TIMEOUT_ICACHE1 (0x402U)
-/**
- * @brief Error if DMA channel is busy for a reset to be done.
- */
-#define PVA_ERR_FAST_RESET_TIMEOUT_CH0 (0x403U)
-/**
- * @brief Error if VPU I-Cache is busy after checking DMA engine for idle state.
- */
-#define PVA_ERR_FAST_RESET_TIMEOUT_ICACHE2 (0x419U)
-
-#if (PVA_CHIP_ID == CHIP_ID_T26X)
-/**
- * @brief Error when PPE is not in idle state for a reset to be done.
- */
-#define PVA_ERR_FAST_RESET_TIMEOUT_PPE (0x420U)
-#endif
-/** @} */
-
-/**
- * @defgroup PVA_L2SRAM_ERRORS
- *
- * @brief L2SRAM memory error codes used across PVA.
- * @{
- */
-/**
- * @brief Error if l2sram memory allocation failed because of insufficient l2sram memory or
- * if 2 chunks of memory are already allocated.
- */
-#define PVA_ERR_ALLOC_FAILED (0x812U)
-/**
- * @brief Error if If l2sram address given for clearing/freeing is not a valid L2SRAM address
- */
-#define PVA_ERR_FREE_FAILED (0x813U)
-/** @} */
-
-/**
- * @defgroup PVA_INFO_ERRORS
- *
- * @brief Informational error codes.
- * @{
- */
-/**
- * @brief Error when there is no task.
- */
-#define PVA_ERR_NO_TASK (0x997U)
-/**
- * @brief Error when CCQ IRQ line enable on VIC fails
- */
-#define PVA_ERR_CCQ_IRQ_ENABLE_FAILED (0x998U)
-/**
- * @brief Error when Mailbox IRQ line enable on VIC fails
- */
-#define PVA_ERR_MBOX_IRQ_ENABLE_FAILED (0x999U)
-/**
- * @brief Error when L2SRAM IRQ line enable on VIC fails
- */
-#define PVA_ERR_L2SRAM_IRQ_ENABLE_FAILED (0x99AU)
-/**
- * @brief Error when DMA0 IRQ line enable on VIC fails
- */
-#define PVA_ERR_DMA0_IRQ_ENABLE_FAILED (0x99BU)
-/**
- * @brief Error when DMA1 IRQ line enable on VIC fails
- */
-#define PVA_ERR_DMA1_IRQ_ENABLE_FAILED (0x99CU)
-/**
- * @brief Error when VPU IRQ line enable on VIC fails
- */
-#define PVA_ERR_VPU_IRQ_ENABLE_FAILED (0x99DU)
-/**
- * @brief Error when SEC IRQ line enable on VIC fails
- */
-#define PVA_ERR_SEC_IRQ_ENABLE_FAILED (0x99EU)
-/**
- * @brief Error when RAMIC IRQ line enable on VIC fails
- */
-#define PVA_ERR_RAMIC_IRQ_ENABLE_FAILED (0x99FU)
-
-/**
- * @brief Error in case to try again.
- * @note This error is internal to FW only.
- */
-#define PVA_ERR_TRY_AGAIN (0x9A0U)
-/** @} */
-
-/* Never used */
-#define PVA_ERR_MAX_ERR (0xFFFFU)
-
-#endif /* _PVA_ERRORS_H_ */
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-fw-version.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-fw-version.h
@@ -1,175 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
-#ifndef PVA_FW_VERSION_H
-#define PVA_FW_VERSION_H
-
-#include <pva-bit.h>
-
-/*
- * Note: Below are doxygen comments with the @def command.
- * This allows the comment to be physically distant from the define
- * being documented.  And allows for a single general comment that is
- * regardless of the being assigned to the macro.
- */
-
-/**
- * @defgroup PVA_VERSION_TYPE_FLAGS VERSION_TYPE Bit Flags
- *
- * @brief The bit flags that indicate the qualities of the Built Firmware.
- * e.g: Debug, Safety, Test Features, etc.
- *
- * @see VERSION_TYPE
- * @{
- */
-
-/**
- * @def VERSION_CODE_DEBUG
- * @brief Set or Clear the 'debug' bit for the FW version type value. For a safety
- * build the value of this define will be zero.
- *
- * @details This bit is set if the macro @r PVA_DEBUG is defined.
- * @see PVA_DEBUG
- */
-#if PVA_DEBUG == 1
-#define VERSION_CODE_DEBUG PVA_BIT(0)
-#else
-#define VERSION_CODE_DEBUG (0U)
-#endif
-
-/**
- * @def VERSION_CODE_SAFETY
- * @brief Set or Clear the 'safety' bit for the FW version type value.  For a safety
- * build the value of this define will be non-zero.
- *
- * @details This bit is set if the macro @r PVA_SAFETY is defined.
- * Building for Safety disables certain functions that are used for debug, testing,
- * or would otherwise pose a risk to system conforming to safety protocols such as ISO-26262 or
- * ASPICE.
- *
- * @see PVA_SAFETY
- */
-#if PVA_SAFETY == 1
-#define VERSION_CODE_SAFETY PVA_BIT(1)
-#else
-#define VERSION_CODE_SAFETY (0U)
-#endif
-
-/**
- * @def VERSION_CODE_PVA_TEST_SUPPORT
- * @brief Set or Clear the 'test support' bit for the FW version type value.
- *
- * @details This bit is set if the macro @r TEST_TASK is defined.
- * This bit is expected to be unset during a safety build.
- *
- * Building with tests support enabled may add additional commands to that
- * can be processed by the FW to aid in testing of the system code. Often code of this
- * nature can change the processing, memory, or timing characteristics of the system, and
- * and should only enabled when explicitly needed.
- *
- *
- * @see TEST_TASK
- */
-#if TEST_TASK == 1
-#define VERSION_CODE_PVA_TEST_SUPPORT PVA_BIT(2)
-#else
-#define VERSION_CODE_PVA_TEST_SUPPORT (0U)
-#endif
-
-/**
- * @def VERSION_CODE_STANDALONE_TESTS
- * @brief Set or Clear the 'standalone tests' bit for the FW version type value.
- *
- * @details This bit is set if the macro @r TEST_TASK is defined.
- * This bit is expected to be unset during a safety build.
- *
- * @see TEST_TASK
- *
- */
-#if TEST_TASK == 1
-#define VERSION_CODE_STANDALONE_TESTS PVA_BIT(3)
-#else
-#define VERSION_CODE_STANDALONE_TESTS (0U)
-#endif
-/** @} */
-
-/**
- * @defgroup PVA_VERSION_MACROS PVA version macros used to calculate the PVA
- * FW binary version.
- * @{
- */
-
-/**
-  * @brief An 8-bit bit field that describes which conditionally compiled facets of the Firmware
-  * have been enabled.
-  *
-  * @details The value of this macro is used when constructing a 32-bit Firmware Version identifier.
-  *
-  @verbatim
-  |  Bit  |  Structure Field Name  |  Condition for Enabling  |
-  |:-----:|:----------------------:|:------------------------:|
-  |  0  |  VERSION_CODE_DEBUG  |  This bit is set when the Firmware is built with @ref PVA_DEBUG defined as equalling 1.
-  |  1  |  VERSION_CODE_SAFETY  |  This bit is set when the Firmware is built with @ref PVA_SAFETY defined equalling 1.  |
-  |  2  |  VERSION_CODE_PVA_TEST_SUPPORT  |  This bit is set when the Firmware is built with @ref TEST_TASK defined as equalling 1.  |
-  |  3  |  VERSION_CODE_STANDALONE_TESTS  |  This bit is set when the Firmware is built with @ref TEST_TASK defined equalling 1. |
-  | 4-7 |  Reserved  |  The remaining bits of the bitfield are undefined.  |
-  @endverbatim
-  * @see PVA_VERSION_TYPE_FLAGS
-  */
-#define VERSION_TYPE                                                           \
-	(uint32_t) VERSION_CODE_DEBUG | (uint32_t)VERSION_CODE_SAFETY |        \
-		(uint32_t)VERSION_CODE_PVA_TEST_SUPPORT |                      \
-		(uint32_t)VERSION_CODE_STANDALONE_TESTS
-/** @} */
-
-/**
- * @defgroup PVA_VERSION_VALUES PVA Major, Minor, and Subminor Version Values
- *
- * @brief The values listed below are applied to the corresponding fields when
- * the PVA_VERSION macro is used.
- *
- * @see PVA_VERSION, PVA_MAKE_VERSION
- * @{
- */
-
-/**
- * @brief The Major version of the Firmware
- */
-#define PVA_VERSION_MAJOR 0x08
-
-/**
- * @brief The Minor version of the Firmware
- */
-#define PVA_VERSION_MINOR 0x02
-
-/**
- * @brief The sub-minor version of the Firmware.
- */
-#define PVA_VERSION_SUBMINOR 0x03
-/** @} */
-
-/**
- * @def PVA_VERSION_GCID_REVISION
- * @brief The GCID Revision of the Firmware.
- *
- * @details If this version is not otherwise defined during build time, this fallback value is used.
- */
-#ifndef PVA_VERSION_GCID_REVISION
-/**
- * @brief GCID revision of PVA FW binary.
- */
-#define PVA_VERSION_GCID_REVISION 0x00000000
-#endif
-
-/**
- * @def PVA_VERSION_BUILT_ON
- * @brief The date and time the version of software was built, expressed as the number
- * of seconds since the Epoch (00:00:00 UTC, January 1, 1970).
- *
- * @details If this version is not otherwise defined during build time, this fallback value is used.
- */
-#ifndef PVA_VERSION_BUILT_ON
-#define PVA_VERSION_BUILT_ON 0x00000000
-#endif
-/** @} */
-
-#endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-packed.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-packed.h
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
-#ifndef PVA_PACKED_H
-#define PVA_PACKED_H
-/**
- * @brief Packed attribute that avoids compiler to add any paddings.
- *        Compiler implicitly adds padding between the structure members
- *        to make it aligned. To avoid this packed attribute is used.
- *        Packed is for shared structures between KMD and FW.
- *        If packed is not used, then we depend on what padding the compiler adds.
- *        Since KMD and FW are compiled by two different compilers, we need to
- *        ensure that the offsets of each member of the structure is the same in
- *        both KMD and FW. To ensure this we pack the structure.
- */
-#define PVA_PACKED __attribute__((packed))
-#endif // PVA_PACKED_H
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-dma.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-dma.h
@@ -1,466 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
-
-#ifndef PVA_SYS_DMA_H
-#define PVA_SYS_DMA_H
-
-#include <stdint.h>
-#include <pva-bit.h>
-#include <pva-packed.h>
-
-#include "pva_fw_dma_hw_interface.h"
-
-/**
- * @brief The version number of the current DMA info structure.
- * This is used for detecting the DMA info updates for future
- * HW releases.
- */
-#define PVA_DMA_INFO_VERSION_ID (1U)
-
-/**
- * @brief Number of DMA done masks in DMA info structure,
- * corresponding to the number of DMA_COMMON_DMA_OUTPUT_ENABLEx
- * registers in the HW.
- */
-#define PVA_SYS_DMA_NUM_TRIGGERS (9U)
-
-/* NOTE : This must be kept as 15 for build to be
- * successful, because in pva_fw_test we configure
- * 15 channel, but internally we check if the
- * number of channels requested is less than the
- * maximum number of available channels */
-/**
- * @brief Maximum Number of DMA channel configurations
- * in DMA info structure.
- */
-#define PVA_SYS_DMA_NUM_CHANNELS (15U)
-
-/**
- * @brief Maximum number of DMA descriptors allowed
- * for use for VPU for T23x
- */
-#define PVA_SYS_DMA_MAX_DESCRIPTORS_T23X (60U)
-/**
- * @brief Maximum number of DMA descriptors allowed
- * for use for VPU for T26x
- */
-#define PVA_SYS_DMA_MAX_DESCRIPTORS_T26X (92U)
-
-/**
- * @brief DMA registers for VPU0 and VPU1 which are primarily
- * used by DMA config and R5 initialization.
- *
- * For more information refer to section 3.4 in PVA Cluster IAS
- * document (Document 11 in Supporting Documentation and References)
- */
-/**
- * @brief DMA channel base register for VPU0.
- */
-#define PVA_DMA0_REG_CH_0 PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA0_REG_CH_0_BASE)
-/**
- * @brief DMA common base register for VPU0.
- */
-#define PVA_DMA0_COMMON PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA0_COMMON_BASE)
-/**
- * @brief DMA DESCRAM base register for VPU0.
- */
-#define PVA_DMA0_DESCRAM PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA0_DESCRAM_BASE)
-/**
- * @brief DMA channel base register for VPU1.
- */
-#define PVA_DMA1_REG_CH_0 PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA1_REG_CH_0_BASE)
-/**
- * @brief DMA common base register for VPU1.
- */
-#define PVA_DMA1_COMMON PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA1_COMMON_BASE)
-/**
- * @brief DMA DESCRAM base register for VPU1.
- */
-#define PVA_DMA1_DESCRAM PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA1_DESCRAM_BASE)
-/** @} */
-
-/**
- *
- * @brief DMA channel configuration for a user task.
- *
- * The DMA channel structure contains the set-up of a
- * PVA DMA channel used for the VPU app.
- *
- * This VPU app should configure the channel information
- * in this format
- *
- * @note : For more information on channel configuration, refer section 4.1.2 and 6.4 in
- * the DMA IAS document (Document 6 in Supporting Documentation and References)
- */
-typedef struct PVA_PACKED {
-	/**
-         * @brief HW DMA channel number from 1 to @ref PVA_NUM_DMA_CHANNELS.
-         */
-	uint8_t ch_number;
-	/**
-         * @brief Padding bytes of 3 added to align the next
-         * field of 4 bytes
-         */
-	uint8_t pad_dma_channel1[3];
-	/**
-         * @brief The value to be written to DMA channel
-         * control 0 register
-         */
-	uint32_t cntl0;
-	/**
-         * @brief The value to be written to DMA channel
-         * control 1 register
-         */
-	uint32_t cntl1;
-	/**
-         * @brief The value to be written to DMA channel
-         * boundary pad register
-         */
-	uint32_t boundary_pad;
-	/**
-         * @brief This value to be written to DMA HW sequence
-         * control register.
-         */
-	uint32_t hwseqcntl;
-	/**
-         * @brief This field is unused in t19x and T23x.
-         * It contains the value to be written to DMA
-         * channel HWSEQFSCNTL register.
-         */
-	uint32_t hwseqfscntl;
-	/**
-         * @brief Output enable mask
-         */
-	uint32_t outputEnableMask;
-	/**
-         * @brief Padding 8 bytes to align the whole structure
-         * to 32 byte boundary
-         */
-	uint32_t pad_dma_channel0[1];
-} pva_dma_ch_config_t;
-
-/**
- *
- * @brief DMA info for an application. The app maybe a VPU app which
- * runs an algorithm on VPU or a DMA app which just has DMA configuration
- * to move certain data. In both cases the application should
- * configure the DMA information in this structure format
- *
- */
-typedef struct PVA_PACKED {
-	/**
-         * @brief The size of the dma_info structure.
-         * Should be populated with value sizeof(pva_dma_info_t)
-         * This is used to validate that the DRAM location populated
-         * by KMD is valid
-         */
-	uint16_t dma_info_size;
-	/**
-         * @brief This field is used to populate the DMA Info version
-         * In case we need to create a new
-         * DMA version structure then the FW can distinguish the DMA
-         * info structure. Currently it should be populated with value
-         * @ref PVA_DMA_INFO_VERSION_ID
-         */
-	uint16_t dma_info_version;
-
-	/**
-         * @brief The number of used channels. This field can
-         * be populated with values from 0 to
-         * @ref PVA_NUM_DMA_CHANNELS both inclusive.
-         */
-	uint8_t num_channels;
-	/**
-         * @brief Number of used descriptors.
-         *
-         * Note: In generations of PVA where the reserved descriptor range lies
-         *       in the middle of the entire descriptor range, when the range of
-         *       descriptors requested by the user crosses over the reserved descriptor
-         *       range, 'num_descriptors' will include the number of the reserved
-         *       descriptors as well.
-         *       E.g., if reserved descriptors are at indices 60-63 and user application
-         *             needs 70 descriptors, 'num_descriptor' will equal 74. However,
-         *             if user application needs 30 descriptors, 'num_descriptors' will be 30.
-         *
-         * On T19x and T23x, the field can be populated
-         * with values from 0 inclusive to less than
-         * @ref PVA_SYS_DMA_MAX_DESCRIPTORS
-         *
-         * On T26x, the field can be populated with values from 0 inclusive to
-         * @ref PVA_SYS_DMA_MAX_DESCRIPTORS + @ref PVA_NUM_RESERVED_DESCRIPTORS
-         */
-	uint8_t num_descriptors;
-	/**
-         * @brief The number of bytes used in HW sequencer
-         */
-	uint16_t num_hwseq;
-
-	/**
-         * @brief The First HW descriptor ID used.
-         *
-         * On T19x and T23x, the field can be populated
-         * with values from 0 inclusive to less than
-         * @ref PVA_SYS_DMA_MAX_DESCRIPTORS
-         *
-         * On T26x, the field can be populated with values from 0 inclusive to
-         * @ref PVA_SYS_DMA_MAX_DESCRIPTORS + @ref PVA_NUM_RESERVED_DESCRIPTORS
-         */
-	uint8_t descriptor_id;
-	/**
-         * @brief Padding for alignment of next element
-         */
-	uint8_t pva_dma_info_pad_0[3];
-
-	/**
-         * @brief DMA done triggers used by the VPU app.
-         * Correspond to COMMON_DMA_OUTPUT_ENABLE registers.
-         */
-	uint32_t dma_triggers[PVA_SYS_DMA_NUM_TRIGGERS];
-	/**
-         * @brief DMA channel config used by the VPU app.
-         * One app can have upto @ref PVA_NUM_DMA_CHANNELS
-         * DMA channel configurations. The size of the array
-	 * is @ref PVA_SYS_DMA_NUM_CHANNELS for additional
-	 * configuration required for future products.
-         */
-	pva_dma_ch_config_t dma_channels[PVA_SYS_DMA_NUM_CHANNELS];
-	/**
-         * @brief Value to be set in DMA common configuration register.
-         */
-	uint32_t dma_common_config;
-	/**
-         * @brief IOVA to an array of @ref pva_dtd_t, aligned at 64 bytes
-         * which holds the DMA descriptors used by the VPU app
-         */
-	pva_iova dma_descriptor_base;
-	/**
-         * @brief HW sequencer configuration base address.
-         */
-	pva_iova dma_hwseq_base;
-	/**
-         * @brief IOVA to a structure of @ref pva_dma_misr_config_t,
-         * location where DMA MISR configuration information is stored.
-         */
-	pva_iova dma_misr_base;
-} pva_dma_info_t;
-
-/**
- * @brief DMA descriptor.
- *
- * PVA DMA Descriptor in packed HW format.
- * The individual fields can be found from
- * the DMA IAS document (Document 6 in Supporting Documentation and References)
- * section 4.1.3.2
- */
-typedef struct PVA_PACKED {
-	/** @brief TRANSFER_CONTROL0 byte has DSTM in lower 2 bits, SRC_TF in 3rd bit,
-         *  DDTM in 4th to 6th bit,DST_TF in 7th bit */
-	uint8_t transfer_control0;
-	/** @brief Next descriptor ID to be executed*/
-	uint8_t link_did;
-	/** @brief Highest 8 bits of the 40 bit source address*/
-	uint8_t src_adr1;
-	/** @brief Highest 8 bits of the 40 bit destination address*/
-	uint8_t dst_adr1;
-	/** @brief Lower 32 bits of the 40 bit source address*/
-	uint32_t src_adr0;
-	/** @brief Lower 32 bits of the 40 bit destination address*/
-	uint32_t dst_adr0;
-	/** @brief Length of tile line*/
-	uint16_t tx;
-	/** @brief Number of tile lines*/
-	uint16_t ty;
-	/** @brief Source Line pitch to advance to every line of 2D tile.*/
-	uint16_t slp_adv;
-	/** @brief Destination Line Pitch to advance to every line of 2D tile.*/
-	uint16_t dlp_adv;
-	/** @brief SRC PT1 CNTL has st1_adv in low 24 bits and ns1_adv in high 8 bits. */
-	uint32_t srcpt1_cntl;
-	/** @brief DST PT1 CNTL has dt1_adv in low 24 bits and nd1_adv in high 8 bits. */
-	uint32_t dstpt1_cntl;
-	/** @brief SRC PT2 CNTL has st2_adv in low 24 bits and ns2_adv in high 8 bits. */
-	uint32_t srcpt2_cntl;
-	/** @brief DST PT2 CNTL has dt2_adv in low 24 bits and nd2_adv in high 8 bits. */
-	uint32_t dstpt2_cntl;
-	/** @brief SRC PT3 CNTL has st3_adv in low 24 bits and ns3_adv in high 8 bits. */
-	uint32_t srcpt3_cntl;
-	/** @brief DST PT3 CNTL has dt3_adv in low 24 bits and nd3_adv in high 8 bits. */
-	uint32_t dstpt3_cntl;
-	/** @brief Source circular buffer Start address offset */
-	uint16_t sb_start;
-	/** @brief Destination circular buffer Start address offset*/
-	uint16_t db_start;
-	/** @brief Source buffer size in bytes for circular buffer mode from Source address.*/
-	uint16_t sb_size;
-	/** @brief Destination buffer size in bytes for circular buffer mode from destination address.*/
-	uint16_t db_size;
-	/** @brief currently reserved*/
-	uint16_t trig_ch_events;
-	/** @brief SW or HW events used for triggering the channel*/
-	uint16_t hw_sw_trig_events;
-	/** @brief Tile x coordinates, for boundary padding in pixels*/
-	uint8_t px;
-	/** @brief Tile y coordinates, for boundary padding in pixels*/
-	uint8_t py;
-	/** @brief Transfer control byte has lower 2 bits as BPP data, bit 2 with PXDIR, bit 3 as PYDIR,
-         *  bit 4 as BPE, bit 5 as TTS, bit 6 RSVD, Bit 7 ITC.
-         */
-	uint8_t transfer_control1;
-	/** @brief Transfer control 2 gas bit 0 as PREFEN, bit 1 as DCBM, bit 2 as SCBM, Bit 3 to 3 as SBADR.*/
-	uint8_t transfer_control2;
-	/** @brief Circular buffer upper bits for start address and size*/
-	uint8_t cb_ext;
-	/** @brief Reserved*/
-	uint8_t rsvd;
-	/** @brief Full replicated destination base address in VMEM aligned to 64 byte atom*/
-	uint16_t frda;
-} pva_dtd_t;
-
-/**
- *
- * @brief DMA MISR configuration information. This information is used by R5
- * to program MISR registers if a task requests MISR computation on its
- * output DMA channels.
- *
- */
-typedef struct PVA_PACKED {
-	/** @brief Reference value for CRC computed on write addresses, i.e., MISR 1 */
-	uint32_t ref_addr;
-	/** @brief Seed value for address CRC*/
-	uint32_t seed_crc0;
-	/** @brief Reference value for CRC computed on first 256-bits of AXI write data */
-	uint32_t ref_data_1;
-	/** @brief Seed value for write data CRC*/
-	uint32_t seed_crc1;
-	/** @brief Reference value for CRC computed on second 256-bits of AXI write data */
-	uint32_t ref_data_2;
-	/**
-     * @brief MISR timeout value configured in DMA common register
-     * @ref PVA_DMA_COMMON_MISR_ENABLE. Timeout is calculated as
-     * number of AXI clock cycles.
-     */
-	uint32_t misr_timeout;
-} pva_dma_misr_config_t;
-
-/**
- * @defgroup PVA_DMA_TC0_BITS
- *
- * @brief PVA Transfer Control 0 Bitfields
- *
- * @{
- */
-/**
- * @brief The shift value for extracting DSTM field
- */
-#define PVA_DMA_TC0_DSTM_SHIFT (0U)
-/**
- * @brief The mask to be used to extract DSTM field
- */
-#define PVA_DMA_TC0_DSTM_MASK (7U)
-
-/**
- * @brief The shift value for extracting DDTM field
- */
-#define PVA_DMA_TC0_DDTM_SHIFT (4U)
-/**
- * @brief The mask to be used to extract DDTM field
- */
-#define PVA_DMA_TC0_DDTM_MASK (7U)
-/** @} */
-
-/**
- * @defgroup PVA_DMA_TM
- *
- * @brief DMA Transfer Modes. These can be used for both
- * Source (DSTM) and Destination (DDTM) transfer modes
- *
- * @note : For more information on transfer modes, refer section 4.1.3.1 in
- * the DMA IAS document (Document 6 in Supporting Documentation and References)
- *
- * @{
- */
-/**
- * @brief To indicate invalid transfer mode
- */
-#define PVA_DMA_TM_INVALID (0U)
-/**
- * @brief To indicate MC transfer mode
- */
-#define PVA_DMA_TM_MC (1U)
-/**
- * @brief To indicate VMEM transfer mode
- */
-#define PVA_DMA_TM_VMEM (2U)
-#if ENABLE_UNUSED == 1U
-#define PVA_DMA_TM_CVNAS (3U)
-#endif
-/**
- * @brief To indicate L2SRAM transfer mode
- */
-#define PVA_DMA_TM_L2RAM (3U)
-/**
- * @brief To indicate TCM transfer mode
- */
-#define PVA_DMA_TM_TCM (4U)
-/**
- * @brief To indicate MMIO transfer mode
- */
-#define PVA_DMA_TM_MMIO (5U)
-/**
- * @brief To indicate Reserved transfer mode
- */
-#define PVA_DMA_TM_RSVD (6U)
-/**
- * @brief To indicate VPU configuration transfer mode.
- * This is only available in Source transfer mode or
- * (DSTM). In Destination transfer mode, this value is
- * reserved.
- */
-#define PVA_DMA_TM_VPU (7U)
-/** @} */
-
-#if (ENABLE_UNUSED == 1U)
-/**
- * @brief The macro defines the number of
- * bits to shift right to get the PXDIR field
- * in Transfer Control 1 register in DMA
- * Descriptor
- */
-#define PVA_DMA_TC1_PXDIR_SHIFT (2U)
-
-/**
- * @brief The macro defines the number of
- * bits to shift right to get the PYDIR field
- * in Transfer Control 1 register in DMA
- * Descriptor
- */
-#define PVA_DMA_TC1_PYDIR_SHIFT (3U)
-#endif
-/**
- * @defgroup PVA_DMA_BPP
- *
- * @brief PVA DMA Bits per Pixel
- *
- * @{
- */
-/**
- * @brief To indicate that the size of pixel data
- * is 1 byte
- */
-#define PVA_DMA_BPP_INT8 (0U)
-#if ENABLE_UNUSED == 1U
-#define PVA_DMA_BPP_INT16 (1U)
-#endif
-/** @} */
-
-/**
- * @brief PVA DMA Pad X direction set to right
- */
-#define PVA_DMA_PXDIR_RIGHT (1U)
-
-/**
- * @brief PVA DMA Pad Y direction set to bottom
- */
-#define PVA_DMA_PYDIR_BOT (1U)
-
-#endif /* PVA_SYS_DMA_H */
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-params.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-params.h
@@ -1,131 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
-
-#ifndef PVA_SYS_PARAMS_H
-#define PVA_SYS_PARAMS_H
-
-#include <stdint.h>
-#include <pva-packed.h>
-#include <pva-types.h>
-
-/** @brief VPU app parameters provided by kernel-user which is to be copied to
- * VMEM during runtime
- *
- * The VPU App parameters contains kernel-user-provided data to be
- * copied into the VMEM before executing the VPU app. The parameter
- * headers are stored in the IOVA address stored in the param_base
- * member of this structure.
- *
- * The FW can also initialize complex datatypes, which are marked by
- * special param_base outside the normal IOVA space. See the structure
- * pva_vpu_instance_data_t for an example.
- */
-typedef struct PVA_PACKED {
-	/** @brief IOVA address of the parameter data */
-	pva_iova param_base;
-	/** @brief VMEM offset where parameter data is to be copied */
-	uint32_t addr;
-	/** @brief Size of the parameter data in bytes */
-	uint32_t size;
-} pva_vpu_parameter_list_t;
-
-/**
- * @brief The structure holds information of various
- *  VMEM parameters that is submitted in the task.
- */
-typedef struct PVA_PACKED {
-	/**
-	 * @brief The IOVA address of the parameter data.
-	 * This should point to an array of type @ref pva_vpu_parameter_list_t .
-	 * If no parameters are present this should be set to 0
-	 */
-	pva_iova parameter_data_iova;
-
-	/**
-	 * @brief The starting IOVA address of the parameter data whose size
-	 * is lower than @ref PVA_DMA_VMEM_COPY_THRESHOLD . This data is copied
-	 * from DRAM to TCM using DMA, and then memcopied to VMEM.
-	 * If no small parameters are present this should be set to 0.
-	 */
-	pva_iova small_vpu_param_data_iova;
-
-	/**
-	 * @brief The number of bytes of small VPU parameter data, i.e the
-	 * data whose size is lower than @ref PVA_DMA_VMEM_COPY_THRESHOLD . If no small
-	 * parameters are present, this should be set to 0
-	 */
-	uint32_t small_vpu_parameter_data_size;
-
-	/**
-	 * @brief The index of the array of type @ref pva_vpu_parameter_list_t from which
-	 * the VPU large parameters are present, i.e the vpu parameters whose size is greater
-	 * than @ref PVA_DMA_VMEM_COPY_THRESHOLD . This value will always point to the index
-	 * immediately after the small parameters. If no large parameter is present, then
-	 * this field value will be same as the value of
-	 * @ref pva_vpu_parameter_info_t.vpu_instance_parameter_list_start_index field
-	 */
-	uint32_t large_vpu_parameter_list_start_index;
-
-	/**
-	 * @brief The index of the array of type @ref pva_vpu_parameter_list_t from which
-	 * the VPU instance parameters are present. This value will always point to the index
-	 * immediately after the large parameters if large parameters are present, else it
-	 * will be the same value as @ref pva_vpu_parameter_info_t.large_vpu_parameter_list_start_index
-	 * field.
-	 */
-	uint32_t vpu_instance_parameter_list_start_index;
-} pva_vpu_parameter_info_t;
-
-/** @brief Special marker for IOVA address of parameter data of a task to differentiate
- *  if the parameter data specified in task should be used or if FW should create a supported
- *  parameter data instance. If the IOVA address of parameter data is lesser than this
- *  special marker, then use the parameter data specified in the task, else FW
- *  creates the parameter data.
- */
-#define PVA_COMPLEX_IOVA (0xDA7AULL << 48ULL)
-
-/** @brief Macro used to create new parameter base markers
- *  from the special marker address @ref PVA_COMPLEX_IOVA
- */
-#define PVA_COMPLEX_IOVA_V(v) (PVA_COMPLEX_IOVA | ((uint64_t)(v) << 32ULL))
-
-/** @brief Special Marker for @ref pva_vpu_instance_data_t */
-#define PVA_SYS_INSTANCE_DATA_V1_IOVA (PVA_COMPLEX_IOVA_V(1) | 0x00000001ULL)
-
-/**
- * @brief The minimuim size of the VPU parameter for it to be considered
- * as a large parameter
- */
-#define PVA_DMA_VMEM_COPY_THRESHOLD (uint32_t)(256U)
-
-/**
- * @brief The maximum combined size of all VMEM parameters
- * that will be supported by PVA
- */
-#define VMEM_PARAMETER_BUFFER_MAX_SIZE (uint32_t)(8192U)
-
-/**
- * @brief The maximum number of symbols that will be supported
- * for one task
- */
-#define TASK_VMEM_PARAMETER_MAX_SYMBOLS (uint32_t)(128U)
-
-/**
- * @brief Information of the VPU instance data passed to VPU kernel.
- */
-typedef struct PVA_PACKED {
-	/** @brief ID of the VPU assigned to the task */
-	uint16_t pve_id;
-	/** @brief Variable to indicate that ppe task was launched or not */
-	uint16_t ppe_task_launched;
-	/** @brief Base of the VMEM memory */
-	uint32_t vmem_base;
-	/** @brief Base of the DMA descriptor SRAM memory */
-	uint32_t dma_descriptor_base;
-	/** @brief Base of L2SRAM allocated for the task executed */
-	uint32_t l2ram_base;
-	/** @brief Size of L2SRAM allocated for the task executed */
-	uint32_t l2ram_size;
-} pva_vpu_instance_data_t;
-
-#endif /* PVA_SYS_PARAMS_H */
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-types.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-types.h
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
-#ifndef PVA_TYPES_H
-#define PVA_TYPES_H
-#include <stdint.h>
-
-/**
- * @brief Used to represent address (IOVA) in PVA system.
- */
-typedef uint64_t pva_iova;
-
-/**
- * @brief Used to store Queue IDs, that represent the
- *        actual hardware queue id between FW and KMD.
- */
-typedef uint8_t pva_queue_id_t;
-
-/**
- * @brief Used to store PVE ID, that represents which
- *        PVE is being referred to .
- */
-typedef uint8_t pva_pve_id_t;
-
-/**
- * @brief Used to store Status interface ID, that is used
- *        to know through which status needs to be written.
- */
-typedef uint8_t pva_status_interface_id_t;
-
-#endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-version.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-version.h
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
-#ifndef PVA_VERSION_H
-#define PVA_VERSION_H
-
-#include <stdint.h>
-#include <pva-bit.h>
-#include <pva-fw-version.h>
-
-/**
- * @brief Calculate a 32-bit build version with @ref PVA_VERSION_SUBMINOR,
- * @ref PVA_VERSION_MINOR, @ref PVA_VERSION_MAJOR and @ref VERSION_TYPE macros.
- *
- * @param [in] \_type\_ an 8-bit bitfield containing flags indicating which compilation
- * features were enabled when the firmware was compiled.
- *
- * @param [in] \_major\_ an unsigned, 8-bit value containing the major version of the
- * compiled firmware.
- *
- * @param [in] \_minor\_ an unsigned, 8-bit value containing the minor version of the
- * compiled firmware.
- *
- * @param [in] \_subminor\_ an unsigned, 8-bit value containing the sub-minor version
- * of the compiled firmware.
- @verbatim
- | ------------- | ---------------------|
- |  Bit Ranges   |      Function        |
- | ------------- | ---------------------|
- |    7-0        |  subminor version	|
- |   15-8        |  minor version	|
- |   23-16       |  major version  	|
- |   31-24     	 |  version type 	|
- ----------------------------------------
- @endverbatim
- */
-#define PVA_MAKE_VERSION(_type_, _major_, _minor_, _subminor_)                 \
-	(PVA_INSERT(_type_, 31, 24) | PVA_INSERT(_major_, 23, 16) |            \
-	 PVA_INSERT(_minor_, 15, 8) | PVA_INSERT(_subminor_, 7, 0))
-
-/**
- * @brief Calculate PVA R5 FW binary version by calling @ref PVA_MAKE_VERSION macro.
- *
- * @param [in] \_type\_ an 8-bit bitfield containing flags indicating which compilation
- * features were enabled when the firmware was compiled.
- *
- * @see VERSION_TYPE For details on how to construct the @p \_type\_ field.
- *
- * @see PVA_VERSION_MAJOR, PVA_VERSION_MINOR, PVA_VERSION_SUBMINOR for details
- * on the values used at the time this documentation was produced.
- */
-#define PVA_VERSION(_type_)                                                    \
-	PVA_MAKE_VERSION(_type_, PVA_VERSION_MAJOR, PVA_VERSION_MINOR,         \
-			 PVA_VERSION_SUBMINOR)
-
-#endif
--- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-vpu-syscall-interface.h
+++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-vpu-syscall-interface.h
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
-
-#ifndef PVA_VPU_SYSCALL_INTERFACE_H
-#define PVA_VPU_SYSCALL_INTERFACE_H
-
-#include <stdint.h>
-
-#endif /*PVA_VPU_SYSCALL_INTERFACE_H*/
--- a/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h
+++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h
@@ -29,6 +29,12 @@ struct pva_resource_entry {
 	uint32_t size_lo;
 };

+struct pva_resource_aux_info {
+	// Serial ID of NvRM memory resources
+	uint32_t serial_id_hi;
+	uint32_t serial_id_lo;
+};
+
 struct pva_cmd_init_resource_table {
 #define PVA_CMD_OPCODE_INIT_RESOURCE_TABLE (0U | PVA_CMD_PRIV_OPCODE_FLAG)
 	struct pva_cmd_header header;
@@ -55,6 +61,7 @@ struct pva_cmd_update_resource_table {
 	uint8_t pad[3];
 	uint32_t resource_id;
 	struct pva_resource_entry entry;
+	struct pva_resource_aux_info aux_info;
 };

 struct pva_cmd_init_queue {
@@ -129,10 +136,10 @@ struct pva_cmd_deinit_shared_dram_buffer {
 	uint8_t interface;
 	uint8_t pad[3];
 };
-struct pva_cmd_set_debug_log_level {
-#define PVA_CMD_OPCODE_SET_DEBUG_LOG_LEVEL (12U | PVA_CMD_PRIV_OPCODE_FLAG)
+struct pva_cmd_set_trace_level {
+#define PVA_CMD_OPCODE_SET_TRACE_LEVEL (12U | PVA_CMD_PRIV_OPCODE_FLAG)
 	struct pva_cmd_header header;
-	uint32_t log_level;
+	uint32_t trace_level;
 };

 struct pva_cmd_set_profiling_level {
@@ -141,7 +148,15 @@ struct pva_cmd_set_profiling_level {
 	uint32_t level;
 };

-#define PVA_CMD_PRIV_OPCODE_COUNT 14U
+struct pva_cmd_get_version {
+#define PVA_CMD_OPCODE_GET_VERSION (14U | PVA_CMD_PRIV_OPCODE_FLAG)
+	struct pva_cmd_header header;
+	uint8_t buffer_iova_hi;
+	uint8_t pad[3];
+	uint32_t buffer_iova_lo;
+};
+
+#define PVA_CMD_PRIV_OPCODE_COUNT 15U

 struct pva_fw_prefence {
 	uint8_t offset_hi;
@@ -221,9 +236,9 @@ static inline uint32_t pva_fw_queue_count(uint32_t head, uint32_t tail,
 					  uint32_t size)
 {
 	if (tail >= head) {
-		return safe_subu32(tail, head);
+		return tail - head;
 	} else {
-		return safe_addu32(safe_subu32(size, head), tail);
+		return sat_sub32(size, head - tail);
 	}
 }

@@ -237,22 +252,11 @@ static inline uint32_t pva_fw_queue_space(uint32_t head, uint32_t tail,
 /* CCQ commands: KMD -> R5, through CCQ FIFO */

 /*
- * Most CCQ commands are meant to be used at init time.
- * During runtime, only use PVA_FW_CCQ_OP_UPDATE_TAIL
+ * CCQ commands are meant to be used at init time.
 */
 #define PVA_FW_CCQ_OPCODE_MSB 63
 #define PVA_FW_CCQ_OPCODE_LSB 60

-/*
- * tail value bit field: 31 - 0
- * queue id bit field: 40 - 32
- */
-#define PVA_FW_CCQ_OP_UPDATE_TAIL 0
-#define PVA_FW_CCQ_TAIL_MSB 31
-#define PVA_FW_CCQ_TAIL_LSB 0
-#define PVA_FW_CCQ_QUEUE_ID_MSB 40
-#define PVA_FW_CCQ_QUEUE_ID_LSB 32
-
 /*
 * resource table IOVA addr bit field: 39 - 0
 * resource table number of entries bit field: 59 - 40
@@ -435,6 +439,7 @@ struct pva_kmd_fw_buffer_msg_header {
 #define PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE 1
 #define PVA_KMD_FW_BUF_MSG_TYPE_FENCE_TRACE 2
 #define PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG 3
+#define PVA_KMD_FW_BUF_MSG_TYPE_FW_TRACEPOINT 4
 	uint32_t type : 8;
 	// Size of payload in bytes. Includes the size of the header.
 	uint32_t size : 24;
@@ -475,8 +480,7 @@ struct pva_kmd_fw_msg_fence_trace {
 	uint64_t fence_id;
 	// 'offset' is the offset into the semaphore memory where the value is stored
 	// This is only valid for semaphore fences
-	// Note: Trace APIs in KMD only support 32-bit offset
-	uint32_t offset;
+	uint64_t offset;
 	uint32_t value;
 	uint8_t ccq_id;
 	uint8_t queue_id;
@@ -506,4 +510,101 @@ struct pva_kmd_fw_tegrastats {
 #define PVA_TEST_MODE_MAX_CMDBUF_CHUNK_SIZE                                    \
 	(sizeof(uint32_t) * PVA_TEST_MODE_MAX_CMDBUF_CHUNK_LEN)

+#define PVA_FW_TP_LVL_NONE 0U
+#define PVA_FW_TP_LVL_CMD_BUF PVA_BIT8(0)
+#define PVA_FW_TP_LVL_VPU PVA_BIT8(1)
+#define PVA_FW_TP_LVL_DMA PVA_BIT8(2)
+#define PVA_FW_TP_LVL_L2SRAM PVA_BIT8(3)
+#define PVA_FW_TP_LVL_PPE PVA_BIT8(4)
+#define PVA_FW_TP_LVL_ALL                                                      \
+	(PVA_FW_TP_LVL_CMD_BUF | PVA_FW_TP_LVL_VPU | PVA_FW_TP_LVL_DMA |       \
+	 PVA_FW_TP_LVL_PPE | PVA_FW_TP_LVL_L2SRAM)
+
+/* Tracepoint Flags for PVA */
+/** @brief Macro to define flag field for a normal checkpoint*/
+#define PVA_FW_TP_FLAG_NONE (0U)
+/** @brief Macro to define a checkpoint's flag field to indicate start of an operation */
+#define PVA_FW_TP_FLAG_START (1U)
+/** @brief Macro to define a checkpoint's flag field to indicate end of an operation */
+#define PVA_FW_TP_FLAG_END (2U)
+/** @brief Macro to define a checkpoint's flag field to indicate error */
+#define PVA_FW_TP_FLAG_ERROR (3U)
+
+struct pva_fw_tracepoint {
+	uint32_t type : 3;
+	uint32_t flags : 2;
+	uint32_t slot_id : 2;
+	uint32_t ccq_id : 3;
+	uint32_t queue_id : 3;
+	uint32_t engine_id : 1;
+	uint32_t arg1 : 2;
+	uint32_t arg2 : 16;
+};
+
+static inline const char *pva_fw_tracepoint_type_to_string(uint32_t type)
+{
+	switch (type) {
+	case PVA_FW_TP_LVL_NONE:
+		return "NONE";
+	case PVA_FW_TP_LVL_CMD_BUF:
+		return "CMD_BUF";
+	case PVA_FW_TP_LVL_VPU:
+		return "VPU";
+	case PVA_FW_TP_LVL_DMA:
+		return "DMA";
+	case PVA_FW_TP_LVL_L2SRAM:
+		return "L2SRAM";
+	case PVA_FW_TP_LVL_PPE:
+		return "PPE";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+static inline const char *pva_fw_tracepoint_flags_to_string(uint32_t flags)
+{
+	switch (flags) {
+	case PVA_FW_TP_FLAG_NONE:
+		return "NONE";
+	case PVA_FW_TP_FLAG_START:
+		return "START";
+	case PVA_FW_TP_FLAG_END:
+		return "END";
+	case PVA_FW_TP_FLAG_ERROR:
+		return "ERROR";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+static inline const char *pva_fw_tracepoint_slot_id_to_string(uint32_t slot_id)
+{
+	switch (slot_id) {
+	case 0:
+		return "PRIV_SLOT";
+	case 1:
+		return "USER_SLOT_1";
+	case 2:
+		return "USER_SLOT_2";
+	case 3:
+		return "USER_PRIV_SLOT";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+#define PVA_R5_OCD_TYPE_MMIO_READ 1
+#define PVA_R5_OCD_TYPE_MMIO_WRITE 2
+#define PVA_R5_OCD_TYPE_REG_READ 3
+#define PVA_R5_OCD_TYPE_REG_WRITE 4
+
+#define PVA_R5_OCD_MAX_DATA_SIZE FW_TRACE_BUFFER_SIZE
+
+struct pva_r5_ocd_request {
+	uint32_t type;
+	uint32_t addr;
+	uint32_t size;
+	//followed by data if any
+};
+
 #endif // PVA_FW_H
--- a/drivers/video/tegra/host/pva/src/fw/include/pva_fw_address_map.h
+++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw_address_map.h
@@ -123,6 +123,9 @@
 * programmed by Hypervisor.
 * @{
 */
+
+#define PVA_SCR_LOCK PVA_BIT(29)
+
 /**
 * @brief EVP SCR firewall to enable only CCPLEX read/write access.
 */
@@ -142,6 +145,7 @@
 * @brief Status Ctl SCR firewall to enable only CCPLEX read access and R5 read/write access.
 */
 #define PVA_STATUS_CTL_SCR_VAL 0x1f008082
+#define PVA_STATUS_CTL_SCR_VAL_SIM 0x1f008282
 /** @} */

 /**
--- a/drivers/video/tegra/host/pva/src/fw/include/pva_fw_hyp.h
+++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw_hyp.h
@@ -88,6 +88,7 @@
 #define PVA_RO_SYNC_BASE_SEMA 1U
 #define PVA_RW_SYNC_BASE_SEMA 2U
 #define PVA_RW_SYNC_SIZE_SEMA 3U
+#define PVA_SEMA_MAX 4U

 /**
 * @brief This macro has the value to be set by KMD in the shared semaphores
--- a/drivers/video/tegra/host/pva/src/include/pva_api.h
+++ b/drivers/video/tegra/host/pva/src/include/pva_api.h
@@ -217,7 +217,7 @@ enum pva_error pva_memory_import_id_destroy(uint64_t import_id);
 #define PVA_SYSSW_MAJOR_VERSION (2U)

 /** \brief Specifies the PVA system software minor version. */
-#define PVA_SYSSW_MINOR_VERSION (7U)
+#define PVA_SYSSW_MINOR_VERSION (8U)

 #ifdef __cplusplus
 }
--- a/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h
+++ b/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h
@@ -53,6 +53,7 @@ struct pva_dma_misr {

 struct pva_user_dma_allowance {
 #define PVA_USER_DMA_ALLOWANCE_ADB_STEP_SIZE 8
+	/*desc start index and descriptor count should be multiple of 4*/
 	uint32_t channel_idx : 4;
 	uint32_t desc_start_idx : 7;
 	uint32_t desc_count : 7;
@@ -60,6 +61,40 @@ struct pva_user_dma_allowance {
 	uint32_t adb_count : 6;
 };

+/**
+ * @brief Parameter array structure for pva_cmd_set_vpu_parameter_array command
+ *
+ * Memory layout: [pva_vpu_parameter_array][parameter_data][next_array]...
+ * Parameter data must be 4-byte aligned. Each data_size is rounded up to 4-byte boundary.
+ */
+struct pva_vpu_parameter_array {
+	uint16_t data_size;
+	uint8_t pad[2];
+	uint32_t symbol_id;
+	uint32_t vmem_offset;
+};
+
+/*struct for set_vpu_array_parameter_with_address*/
+struct pva_vpu_parameter_with_address_array {
+	uint8_t flags; /**< Control flags: 0x1=legacy, 0=modern (default) */
+	uint8_t dram_offset_hi;
+	uint8_t pad[2];
+	uint32_t symbol_id;
+	uint32_t dram_resource_id;
+	uint32_t dram_offset_lo;
+};
+
+/*struct for set_vpu_array_parameter_with_buffer*/
+struct pva_vpu_parameter_with_buffer_array {
+	uint8_t src_dram_offset_hi;
+	uint8_t pad[3];
+	uint32_t data_size;
+	uint32_t dst_symbol_id;
+	uint32_t dst_vmem_offset;
+	uint32_t src_dram_resource_id;
+	uint32_t src_dram_offset_lo;
+};
+
 /* Basic Commands */

 /** Does nothing. It can be used as a place holder in the command buffer. */
@@ -109,7 +144,7 @@ struct pva_cmd_acquire_engine {
 };

 /** Release all PVE systems acquired. It is legal to release engine when engine
- * is still running. The released engine won’t be available to be acquired until
+ * is still running. The released engine won't be available to be acquired until
 * it finishes and becomes idle again. */
 struct pva_cmd_release_engine {
 #define PVA_CMD_OPCODE_RELEASE_ENGINE 4U
@@ -126,7 +161,7 @@ struct pva_cmd_set_current_engine {
 };

 /** This command specifies the executable to use for the following VPU launches.
- * It doesn’t do anything other than setting the context for the following
+ * It doesn't do anything other than setting the context for the following
 * commands.
 *
 * Note: This command cannot be initiated if any of the DMA sets (that access
@@ -169,7 +204,7 @@ struct pva_cmd_prefetch_vpu_code {

 /** Run the VPU program from the specified entry point until finish. The
 * lifetime of this command covers the entire VPU program execution. Since this
- * command is asynchronous, it doesn’t block the following commands from
+ * command is asynchronous, it doesn't block the following commands from
 * execution. */
 struct pva_cmd_run_vpu {
 #define PVA_CMD_OPCODE_RUN_VPU 10U
@@ -225,8 +260,8 @@ struct pva_cmd_set_vpu_parameter_with_address {
 * hardware, allowing FW to continue using user channels for data transfer after
 * its execution. This command only uses channel 0 to fetch the DMA
 * configuration. However, user can still help speed up the process by
- * providing additional ADBs. This command will block if there’s no TCM scratch
- * available. If there’s no pending commands AND there’s no TCM scratch, then it
+ * providing additional ADBs. This command will block if there's no TCM scratch
+ * available. If there's no pending commands AND there's no TCM scratch, then it
 * means we encountered a dead lock, the command buffer will be aborted. */
 struct pva_cmd_fetch_dma_configuration {
 #define PVA_CMD_OPCODE_FETCH_DMA_CONFIGURATION 14U
@@ -268,7 +303,7 @@ struct pva_cmd_run_dma {
 };

 /** This command specifies the executable to use for the following PPE launches.
- * It doesn’t do anything other than setting the context for the following
+ * It doesn't do anything other than setting the context for the following
 * commands. */
 struct pva_cmd_set_ppe_executable {
 #define PVA_CMD_OPCODE_SET_PPE_EXECUTABLE 17U
@@ -515,6 +550,30 @@ struct pva_cmd_setup_misr {
 	struct pva_dma_misr misr_params;
 };

-#define PVA_CMD_OPCODE_MAX 36U
+struct pva_cmd_set_vpu_parameter_array {
+#define PVA_CMD_OPCODE_SET_VPU_PARAMETER_ARRAY 36U
+	struct pva_cmd_header header;
+	uint16_t param_count;
+	uint16_t pad;
+};
+
+struct pva_cmd_set_vpu_parameter_with_address_array {
+#define PVA_CMD_OPCODE_SET_VPU_PARAMETER_WITH_ADDRESS_ARRAY 37U
+	struct pva_cmd_header header;
+	uint16_t param_count;
+	uint16_t pad;
+	/*Followed by param_count number of struct pva_vpu_parameter_with_address_array*/
+};
+
+struct pva_cmd_set_vpu_parameter_with_buffer_array {
+#define PVA_CMD_OPCODE_SET_VPU_PARAMETER_WITH_BUFFER_ARRAY 38U
+	struct pva_cmd_header header;
+	uint16_t param_count;
+	uint16_t pad;
+	struct pva_user_dma_allowance user_dma;
+	/*Followed by param_count number of struct pva_vpu_parameter_with_buffer_array*/
+};
+
+#define PVA_CMD_OPCODE_MAX 39U

 #endif // PVA_API_CMDBUF_H
--- a/drivers/video/tegra/host/pva/src/include/pva_api_dma.h
+++ b/drivers/video/tegra/host/pva/src/include/pva_api_dma.h
@@ -131,7 +131,7 @@ struct pva_dma_descriptor {
 	 * - 0: transfer true completion disabled
 	 * - 1: transfer true completion enabled
 	 */
-	uint8_t trans_true_completion;
+	uint8_t trigger_completion;
 	uint8_t prefetch_enable;

 	uint16_t tx;
--- a/drivers/video/tegra/host/pva/src/include/pva_api_types.h
+++ b/drivers/video/tegra/host/pva/src/include/pva_api_types.h
@@ -128,6 +128,13 @@
 	ACT(PVA_ERR_MISR_NOT_DONE)                                             \
 	ACT(PVA_ERR_MISR_ADDR_DATA)                                            \
 	ACT(PVA_ERR_MISR_TIMEOUT)                                              \
+	ACT(PVA_ERR_DMA_ACTIVE_AFTER_VPU_EXIT)                                 \
+	ACT(PVA_ERR_CCQ_TIMEOUT)                                               \
+	ACT(PVA_ERR_WDT_TIMEOUT)                                               \
+	ACT(PVA_ERR_HOST1X_ERR)                                                \
+	ACT(PVA_ERR_GOLDEN_REG_MISMATCH)                                       \
+	ACT(PVA_ERR_CRITICAL_REG_MISMATCH)                                     \
+	ACT(PVA_ERR_CONFIG_REG_MISMATCH)                                       \
 	ACT(PVA_ERR_CODE_COUNT)

 enum pva_error {
--- a/drivers/video/tegra/host/pva/src/include/pva_api_vpu.h
+++ b/drivers/video/tegra/host/pva/src/include/pva_api_vpu.h
@@ -77,6 +77,11 @@ typedef uint32_t pva_vpu_syscall_id_t;
 */
 #define PVA_FW_PE_SYSCALL_ID_PERFMON_SAMPLE (5U)

+/**
+ * @brief VPU Syscall id for checking DMA active after VPU exit
+ */
+#define PVA_FW_PE_SYSCALL_ID_ALLOW_DMA_ACTIVE_AFTER_VPU_EXIT (6U)
+
 /**
 * @brief PPE Syscall id for ppe printf write.
 */
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_abort.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_abort.c
@@ -6,12 +6,13 @@
 #include "pva_kmd_regs.h"
 #include "pva_kmd_silicon_utils.h"

-void pva_kmd_abort_fw(struct pva_kmd_device *pva)
+void pva_kmd_abort_fw(struct pva_kmd_device *pva, uint32_t error_code)
 {
 	// HW watchdog may fire repeatedly if PVA is hung. Therefore, disable all
 	// interrupts to protect KMD from potential interrupt floods.
 	pva_kmd_disable_all_interrupts_nosync(pva);

+	pva_kmd_report_error_fsi(pva, error_code);
 	// We will handle firmware reboot after all contexts are closed and a new
 	// one is re-opened again
 	pva->recovery = true;
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_abort.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_abort.h
@@ -5,6 +5,6 @@
 #include "pva_kmd_device.h"
 #include "pva_kmd_utils.h"

-void pva_kmd_abort_fw(struct pva_kmd_device *pva);
+void pva_kmd_abort_fw(struct pva_kmd_device *pva, uint32_t error_code);

 #endif //PVA_KMD_ABORT_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.c
@@ -24,6 +24,8 @@ pva_kmd_block_allocator_init(struct pva_kmd_block_allocator *allocator,
 		sizeof(*allocator->slot_in_use) * max_num_blocks);
 	if (!allocator->slot_in_use) {
 		err = PVA_NOMEM;
+		pva_kmd_log_err(
+			"pva_kmd_block_allocator_init slot_in_use NULL");
 		goto err_out;
 	}
 	pva_kmd_mutex_init(&allocator->allocator_lock);
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h
@@ -195,7 +195,8 @@ pva_kmd_set_cmd_deinit_queue(struct pva_cmd_deinit_queue *cmd, uint8_t ccq_id,

 static inline void pva_kmd_set_cmd_update_resource_table(
 	struct pva_cmd_update_resource_table *cmd, uint32_t resource_table_id,
-	uint32_t resource_id, struct pva_resource_entry const *entry)
+	uint32_t resource_id, struct pva_resource_entry const *entry,
+	struct pva_resource_aux_info const *aux_info)
 {
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->header.opcode = PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE;
@@ -203,6 +204,9 @@ static inline void pva_kmd_set_cmd_update_resource_table(
 	cmd->resource_table_id = resource_table_id;
 	cmd->resource_id = resource_id;
 	cmd->entry = *entry;
+	if (aux_info) {
+		cmd->aux_info = *aux_info;
+	}
 }

 static inline void
@@ -249,13 +253,13 @@ static inline void pva_kmd_set_cmd_get_tegra_stats(
 }

 static inline void
-pva_kmd_set_cmd_set_debug_log_level(struct pva_cmd_set_debug_log_level *cmd,
-				    uint32_t log_level)
+pva_kmd_set_cmd_set_trace_level(struct pva_cmd_set_trace_level *cmd,
+				uint32_t trace_level)
 {
 	memset(cmd, 0, sizeof(*cmd));
-	cmd->header.opcode = PVA_CMD_OPCODE_SET_DEBUG_LOG_LEVEL;
+	cmd->header.opcode = PVA_CMD_OPCODE_SET_TRACE_LEVEL;
 	cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
-	cmd->log_level = log_level;
+	cmd->trace_level = trace_level;
 }

 static inline void pva_kmd_set_cmd_suspend_fw(struct pva_cmd_suspend_fw *cmd)
@@ -304,6 +308,16 @@ pva_kmd_set_cmd_set_profiling_level(struct pva_cmd_set_profiling_level *cmd,
 	cmd->level = level;
 }

+static inline void pva_kmd_set_cmd_get_version(struct pva_cmd_get_version *cmd,
+					       uint64_t buffer_iova)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->header.opcode = PVA_CMD_OPCODE_GET_VERSION;
+	cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
+	cmd->buffer_iova_hi = iova_hi(buffer_iova);
+	cmd->buffer_iova_lo = iova_lo(buffer_iova);
+}
+
 #define CMD_LEN(cmd_type) (sizeof(cmd_type) / sizeof(uint32_t))

 #endif // PVA_KMD_CMDBUF_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_constants.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_constants.h
@@ -39,6 +39,7 @@

 #define PVA_KMD_TIMEOUT_RESOURCE_SEMA_MS PVA_KMD_TIMEOUT(400) /*< 100 ms */
 #define PVA_KMD_WAIT_FW_TIMEOUT_US PVA_KMD_TIMEOUT(100000) /*< 100 ms */
+#define PVA_KMD_WAIT_FW_TIMEOUT_SCALER_SIM 100
 #define PVA_KMD_WAIT_FW_POLL_INTERVAL_US PVA_KMD_TIMEOUT(100) /*< 100 us*/
 #define PVA_KMD_FW_BOOT_TIMEOUT_MS PVA_KMD_TIMEOUT(1000) /*< 1 seconds */

--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c
@@ -19,6 +19,8 @@ struct pva_kmd_context *pva_kmd_context_create(struct pva_kmd_device *pva)

 	ctx = pva_kmd_zalloc_block(&pva->context_allocator, &alloc_id);
 	if (ctx == NULL) {
+		pva_kmd_log_err(
+			"pva_kmd_context_create pva_kmd_context block alloc failed");
 		err = PVA_NOMEM;
 		goto err_out;
 	}
@@ -27,13 +29,13 @@ struct pva_kmd_context *pva_kmd_context_create(struct pva_kmd_device *pva)
 	ctx->smmu_ctx_id = ctx->ccq_id;
 	ctx->pva = pva;
 	ctx->max_n_queues = PVA_MAX_NUM_QUEUES_PER_CONTEXT;
-	ctx->ccq0_lock_ptr = &pva->ccq0_lock;
-	pva_kmd_mutex_init(&ctx->ccq_lock);
 	pva_kmd_mutex_init(&ctx->ocb_lock);
 	ctx->queue_allocator_mem = pva_kmd_zalloc(sizeof(struct pva_kmd_queue) *
 						  ctx->max_n_queues);
 	if (ctx->queue_allocator_mem == NULL) {
 		err = PVA_NOMEM;
+		pva_kmd_log_err(
+			"pva_kmd_context_create queue_allocator_mem NULL");
 		goto free_ctx;
 	}

@@ -42,11 +44,14 @@ struct pva_kmd_context *pva_kmd_context_create(struct pva_kmd_device *pva)
 					   sizeof(struct pva_kmd_queue),
 					   ctx->max_n_queues);
 	if (err != PVA_SUCCESS) {
+		pva_kmd_log_err(
+			"pva_kmd_context_create block allocator init failed");
 		goto free_queue_mem;
 	}
 	/* Power on PVA if not already */
 	err = pva_kmd_device_busy(ctx->pva);
 	if (err != PVA_SUCCESS) {
+		pva_kmd_log_err("pva_kmd_context_create device busy failed");
 		goto deinit_queue_allocator;
 	}

@@ -57,7 +62,6 @@ deinit_queue_allocator:
 free_queue_mem:
 	pva_kmd_free(ctx->queue_allocator_mem);
 free_ctx:
-	pva_kmd_mutex_deinit(&ctx->ccq_lock);
 	pva_kmd_mutex_deinit(&ctx->ocb_lock);
 	pva_kmd_free_block(&pva->context_allocator, alloc_id);
 err_out:
@@ -112,7 +116,7 @@ static enum pva_error notify_fw_context_init(struct pva_kmd_context *ctx)
 	pva_kmd_set_cmd_update_resource_table(update_cmd,
 					      0, /* KMD's resource table ID */
 					      ctx->submit_memory_resource_id,
-					      &entry);
+					      &entry, NULL);

 	err = pva_kmd_submit_cmd_sync(dev_submitter, cmd_scratch,
 				      sizeof(cmd_scratch),
@@ -193,8 +197,7 @@ enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx,
 	pva_kmd_queue_init(
 		&ctx->ctx_queue, ctx->pva, PVA_PRIV_CCQ_ID,
 		ctx->ccq_id, /* Context's PRIV queue ID is identical to CCQ ID */
-		&ctx->pva->ccq0_lock, ctx->ctx_queue_mem,
-		PVA_KMD_MAX_NUM_PRIV_SUBMITS);
+		ctx->ctx_queue_mem, PVA_KMD_MAX_NUM_PRIV_SUBMITS);

 	/* Allocate memory for submission */
 	chunk_mem_size = pva_kmd_cmdbuf_pool_get_required_mem_size(
@@ -284,36 +287,39 @@ err_out:
 	return err;
 }

-void pva_kmd_context_deinit(struct pva_kmd_context *ctx)
+void pva_kmd_free_context(struct pva_kmd_context *ctx)
 {
-	enum pva_error err;
+	enum pva_error err = PVA_SUCCESS;

 	if (ctx->inited) {
-		err = notify_fw_context_deinit(ctx);
-		if (err != PVA_SUCCESS) {
-			pva_kmd_log_err(
-				"Failed to notify FW of context deinit");
-		}
-
-		err = pva_kmd_shared_buffer_deinit(ctx->pva, ctx->ccq_id);
-		if (err != PVA_SUCCESS) {
-			pva_kmd_log_err("Failed to deinit FW buffer");
-		}
-
 		pva_kmd_mutex_deinit(&ctx->submit_lock);
 		pva_kmd_mutex_deinit(&ctx->chunk_pool_lock);
 		pva_kmd_cmdbuf_chunk_pool_deinit(&ctx->chunk_pool);
+
 		pva_kmd_drop_resource(&ctx->pva->dev_resource_table,
 				      ctx->submit_memory_resource_id);
 		pva_kmd_device_memory_free(ctx->ctx_queue_mem);
 		pva_kmd_resource_table_deinit(&ctx->ctx_resource_table);
 		ctx->inited = false;
 	}
+
+	pva_kmd_block_allocator_deinit(&ctx->queue_allocator);
+	pva_kmd_free(ctx->queue_allocator_mem);
+	pva_kmd_mutex_deinit(&ctx->ocb_lock);
+	err = pva_kmd_free_block(&ctx->pva->context_allocator, ctx->ccq_id);
+	ASSERT(err == PVA_SUCCESS);
 }

-static void pva_kmd_destroy_all_queues(struct pva_kmd_context *ctx)
+static void set_sticky_error(enum pva_error *ret, enum pva_error err)
 {
-	enum pva_error err;
+	if (*ret == PVA_SUCCESS) {
+		*ret = err;
+	}
+}
+
+static enum pva_error pva_kmd_destroy_all_queues(struct pva_kmd_context *ctx)
+{
+	enum pva_error ret = PVA_SUCCESS;

 	for (uint32_t queue_id = 0u; queue_id < ctx->max_n_queues; queue_id++) {
 		struct pva_kmd_queue *queue;
@@ -323,28 +329,45 @@ static void pva_kmd_destroy_all_queues(struct pva_kmd_context *ctx)
 						 queue_id);
 		pva_kmd_mutex_unlock(&ctx->queue_allocator.allocator_lock);
 		if (queue != NULL) {
-			err = pva_kmd_queue_destroy(ctx, queue_id);
-			if (err != PVA_SUCCESS) {
-				pva_kmd_log_err_u64(
-					"Failed to destroy queue %d", queue_id);
+			set_sticky_error(&ret,
+					 pva_kmd_queue_destroy(ctx, queue_id));
 		}
 	}
+	return ret;
 }
+
+static enum pva_error notify_fw_context_destroy(struct pva_kmd_context *ctx)
+{
+	enum pva_error ret = PVA_SUCCESS;
+
+	set_sticky_error(&ret, pva_kmd_destroy_all_queues(ctx));
+	set_sticky_error(&ret, notify_fw_context_deinit(ctx));
+	set_sticky_error(&ret,
+			 pva_kmd_shared_buffer_deinit(ctx->pva, ctx->ccq_id));
+
+	return ret;
 }

 void pva_kmd_context_destroy(struct pva_kmd_context *ctx)
 {
-	enum pva_error err;
+	enum pva_error err = PVA_SUCCESS;
+	struct pva_kmd_device *pva = ctx->pva;
+	bool deferred_free = false;

-	pva_kmd_destroy_all_queues(ctx);
-	pva_kmd_context_deinit(ctx);
-	pva_kmd_device_idle(ctx->pva);
-	pva_kmd_block_allocator_deinit(&ctx->queue_allocator);
-	pva_kmd_free(ctx->queue_allocator_mem);
-	pva_kmd_mutex_deinit(&ctx->ccq_lock);
-	pva_kmd_mutex_deinit(&ctx->ocb_lock);
-	err = pva_kmd_free_block(&ctx->pva->context_allocator, ctx->ccq_id);
-	ASSERT(err == PVA_SUCCESS);
+	if (ctx->inited) {
+		err = notify_fw_context_destroy(ctx);
+		if (err != PVA_SUCCESS) {
+			deferred_free = true;
+			pva_kmd_add_deferred_context_free(pva, ctx->ccq_id);
+			pva_kmd_log_err(
+				"Failed to notify FW of context destroy; Deferring resource free until PVA is powered off.");
+		}
+	}
+
+	if (!deferred_free) {
+		pva_kmd_free_context(ctx);
+	}
+	pva_kmd_device_idle(pva);
 }

 struct pva_kmd_context *pva_kmd_get_context(struct pva_kmd_device *pva,
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.h
@@ -40,8 +40,6 @@ struct pva_kmd_context {

 	/** Privileged queue owned by the context */
 	struct pva_kmd_queue ctx_queue;
-	/** Pointer to the ccq0 lock owned by device*/
-	pva_kmd_mutex_t *ccq0_lock_ptr;

 	/** memory needed for submission: including command buffer chunks and fences */
 	struct pva_kmd_device_memory *submit_memory;
@@ -56,10 +54,6 @@ struct pva_kmd_context {
 	void *queue_allocator_mem;
 	struct pva_kmd_block_allocator queue_allocator;

-	/** This lock protects the context's own CCQ access. We don't really use
-	 * it because we don't do user queue submission in KMD.
-	 */
-	pva_kmd_mutex_t ccq_lock;
 	void *plat_data;
 	uint64_t ccq_shm_handle;

@@ -73,9 +67,21 @@ struct pva_kmd_context *pva_kmd_context_create(struct pva_kmd_device *pva);

 /**
 * @brief Destroy a KMD context.
+ *
+ * This function first notify FW of context destruction. If successful, it
+ * calls pva_kmd_free_context() to free the context. Otherwise, the
+ * free is deferred until PVA is powered off.
 */
 void pva_kmd_context_destroy(struct pva_kmd_context *client);

+/**
+ * @brief Free a KMD context.
+ *
+ * This function frees the context without notifying FW. We need to make sure FW
+ * will not access any context resources before calling this function.
+ */
+void pva_kmd_free_context(struct pva_kmd_context *ctx);
+
 /**
 * @brief Initialize a KMD context.
 *
@@ -85,8 +91,6 @@ void pva_kmd_context_destroy(struct pva_kmd_context *client);
 enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx,
 				    uint32_t res_table_capacity);

-void pva_kmd_context_deinit(struct pva_kmd_context *ctx);
-
 struct pva_kmd_context *pva_kmd_get_context(struct pva_kmd_device *pva,
 					    uint8_t alloc_id);

--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c
@@ -9,8 +9,9 @@
 #include "pva_kmd_tegra_stats.h"
 #include "pva_kmd_vpu_app_auth.h"
 #include "pva_kmd_shared_buffer.h"
+#include "pva_kmd_r5_ocd.h"

-static uint64_t read_from_buffer_to_user(void *to, uint64_t count,
+uint64_t pva_kmd_read_from_buffer_to_user(void *to, uint64_t count,
 					  uint64_t offset, const void *from,
 					  uint64_t available)
 {
@@ -44,8 +45,8 @@ static int64_t profiling_level_read(struct pva_kmd_device *dev, void *file_data,

 	formatted_len++; // Account for null terminator

-	return read_from_buffer_to_user(out_buffer, size, offset, kernel_buffer,
-					formatted_len);
+	return pva_kmd_read_from_buffer_to_user(out_buffer, size, offset,
+						kernel_buffer, formatted_len);
 }

 static int64_t profiling_level_write(struct pva_kmd_device *dev,
@@ -100,12 +101,6 @@ static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats,
 	char kernel_buffer[256];
 	int64_t formatted_len;

-	// We don't support partial reads for vpu stats because we cannot mix two
-	// reads at different times together.
-	if (offset != 0) {
-		return 0;
-	}
-
 	formatted_len = snprintf(
 		kernel_buffer, sizeof(kernel_buffer),
 		"%llu\n%llu\n%llu\n%llu\n",
@@ -127,8 +122,8 @@ static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats,
 	}

 	// Copy the formatted string from kernel buffer to user buffer
-	return read_from_buffer_to_user(out_buffer, len, offset, kernel_buffer,
-					formatted_len);
+	return pva_kmd_read_from_buffer_to_user(out_buffer, len, offset,
+						kernel_buffer, formatted_len);
 }

 static int64_t get_vpu_stats(struct pva_kmd_device *dev, void *file_data,
@@ -137,6 +132,12 @@ static int64_t get_vpu_stats(struct pva_kmd_device *dev, void *file_data,
 {
 	struct pva_kmd_tegrastats kmd_tegra_stats;

+	// We don't support partial reads for vpu stats because we cannot mix two
+	// reads at different times together.
+	if (offset != 0) {
+		return 0;
+	}
+
 	kmd_tegra_stats.window_start_time = 0;
 	kmd_tegra_stats.window_end_time = 0;
 	kmd_tegra_stats.average_vpu_utilization[0] = 0;
@@ -159,8 +160,8 @@ static int64_t get_vpu_allowlist_enabled(struct pva_kmd_device *pva,
 	pva_kmd_mutex_unlock(&(pva->pva_auth->allow_list_lock));

 	// Copy the formatted string from kernel buffer to user buffer
-	return read_from_buffer_to_user(out_buffer, size, offset, out_str,
-					sizeof(out_str));
+	return pva_kmd_read_from_buffer_to_user(out_buffer, size, offset,
+						out_str, sizeof(out_str));
 }

 static int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data,
@@ -204,7 +205,7 @@ static int64_t get_vpu_allowlist_path(struct pva_kmd_device *pva,
 {
 	uint64_t len;
 	pva_kmd_mutex_lock(&(pva->pva_auth->allow_list_lock));
-	len = read_from_buffer_to_user(
+	len = pva_kmd_read_from_buffer_to_user(
 		out_buffer, size, offset,
 		pva->pva_auth->pva_auth_allowlist_path,
 		safe_addu64(strlen(pva->pva_auth->pva_auth_allowlist_path),
@@ -248,12 +249,11 @@ static int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva,
 	return size;
 }

-static int64_t update_fw_debug_log_level(struct pva_kmd_device *pva,
-					 void *file_data,
-					 const uint8_t *in_buffer,
+static int64_t update_fw_trace_level(struct pva_kmd_device *pva,
+				     void *file_data, const uint8_t *in_buffer,
 				     uint64_t offset, uint64_t size)
 {
-	uint32_t log_level;
+	uint32_t trace_level;
 	unsigned long retval;
 	size_t copy_size;
 	uint32_t base = 10;
@@ -275,9 +275,9 @@ static int64_t update_fw_debug_log_level(struct pva_kmd_device *pva,
 		return -1;
 	}

-	log_level = pva_kmd_strtol(strbuf, base);
+	trace_level = pva_kmd_strtol(strbuf, base);

-	pva->fw_debug_log_level = log_level;
+	pva->fw_trace_level = trace_level;

 	/* If device is on, busy the device and set the debug log level */
 	if (pva_kmd_device_maybe_on(pva) == true) {
@@ -289,7 +289,8 @@ static int64_t update_fw_debug_log_level(struct pva_kmd_device *pva,
 			goto err_end;
 		}

-		err = pva_kmd_notify_fw_set_debug_log_level(pva, log_level);
+		err = pva_kmd_notify_fw_set_trace_level(pva, trace_level);
+
 		pva_kmd_device_idle(pva);

 		if (err != PVA_SUCCESS) {
@@ -301,21 +302,22 @@ err_end:
 	return copy_size;
 }

-static int64_t get_fw_debug_log_level(struct pva_kmd_device *dev,
-				      void *file_data, uint8_t *out_buffer,
-				      uint64_t offset, uint64_t size)
+static int64_t get_fw_trace_level(struct pva_kmd_device *dev, void *file_data,
+				  uint8_t *out_buffer, uint64_t offset,
+				  uint64_t size)
 {
 	char print_buffer[64];
 	int formatted_len;

 	formatted_len = snprintf(print_buffer, sizeof(print_buffer), "%u\n",
-				 dev->fw_debug_log_level);
+				 dev->fw_trace_level);

 	if (formatted_len <= 0) {
 		return -1;
 	}

-	return read_from_buffer_to_user(out_buffer, size, offset, print_buffer,
+	return pva_kmd_read_from_buffer_to_user(out_buffer, size, offset,
+						print_buffer,
 						(uint64_t)formatted_len);
 }

@@ -370,7 +372,8 @@ static int64_t read_simulate_sc7(struct pva_kmd_device *pva, void *file_data,
 	char buf;
 	buf = pva->debugfs_context.entered_sc7 ? '1' : '0';

-	return read_from_buffer_to_user(out_buffer, size, offset, &buf, 1);
+	return pva_kmd_read_from_buffer_to_user(out_buffer, size, offset, &buf,
+						1);
 }

 enum pva_error pva_kmd_debugfs_create_nodes(struct pva_kmd_device *pva)
@@ -457,17 +460,14 @@ enum pva_error pva_kmd_debugfs_create_nodes(struct pva_kmd_device *pva)
 		return err;
 	}

-	pva->debugfs_context.fw_debug_log_level_fops.write =
-		&update_fw_debug_log_level;
-	pva->debugfs_context.fw_debug_log_level_fops.read =
-		&get_fw_debug_log_level;
-	pva->debugfs_context.fw_debug_log_level_fops.pdev = pva;
+	pva->debugfs_context.fw_trace_level_fops.write = &update_fw_trace_level;
+	pva->debugfs_context.fw_trace_level_fops.read = &get_fw_trace_level;
+	pva->debugfs_context.fw_trace_level_fops.pdev = pva;
 	err = pva_kmd_debugfs_create_file(
-		pva, "fw_debug_log_level",
-		&pva->debugfs_context.fw_debug_log_level_fops);
+		pva, "fw_trace_level",
+		&pva->debugfs_context.fw_trace_level_fops);
 	if (err != PVA_SUCCESS) {
-		pva_kmd_log_err(
-			"Failed to create fw_debug_log_level debugfs file");
+		pva_kmd_log_err("Failed to create fw_trace_level debugfs file");
 		return err;
 	}

@@ -484,6 +484,20 @@ enum pva_error pva_kmd_debugfs_create_nodes(struct pva_kmd_device *pva)
 		return err;
 	}

+#if PVA_ENABLE_R5_OCD == 1
+	pva->debugfs_context.r5_ocd_fops.open = &pva_kmd_r5_ocd_open;
+	pva->debugfs_context.r5_ocd_fops.release = &pva_kmd_r5_ocd_release;
+	pva->debugfs_context.r5_ocd_fops.read = &pva_kmd_r5_ocd_read;
+	pva->debugfs_context.r5_ocd_fops.write = &pva_kmd_r5_ocd_write;
+	pva->debugfs_context.r5_ocd_fops.pdev = pva;
+	err = pva_kmd_debugfs_create_file(pva, "r5_ocd",
+					  &pva->debugfs_context.r5_ocd_fops);
+	if (err != PVA_SUCCESS) {
+		pva_kmd_log_err("Failed to create r5_ocd debugfs file");
+		return err;
+	}
+#endif
+
 	return PVA_SUCCESS;
 }

--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h
@@ -35,11 +35,17 @@ struct pva_kmd_debugfs_context {
 	void *data_hwpm;
 	struct pva_kmd_file_ops vpu_ocd_fops[NUM_VPU_BLOCKS];
 	struct pva_kmd_fw_profiling_config g_fw_profiling_config;
-	struct pva_kmd_file_ops fw_debug_log_level_fops;
+	struct pva_kmd_file_ops fw_trace_level_fops;
 	struct pva_kmd_file_ops simulate_sc7_fops;
+	struct pva_kmd_file_ops r5_ocd_fops;
+	void *r5_ocd_stage_buffer;
 };

 enum pva_error pva_kmd_debugfs_create_nodes(struct pva_kmd_device *dev);
 void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *dev);

+uint64_t pva_kmd_read_from_buffer_to_user(void *to, uint64_t count,
+					  uint64_t offset, const void *from,
+					  uint64_t available);
+
 #endif //PVA_KMD_DEBUGFS_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c
@@ -27,17 +27,17 @@
 #include "pva_kmd_shared_buffer.h"

 #include "pva_kmd_abort.h"
+#include "pva_version.h"
+
 /**
 * @brief Send address and size of the resource table to FW through CCQ.
 *
 * Initialization through CCQ is only intended for KMD's own resource table (the
 * first resource table created).
 */
-static enum pva_error pva_kmd_send_resource_table_info_by_ccq(
+static void pva_kmd_send_resource_table_info_by_ccq(
 	struct pva_kmd_device *pva, struct pva_kmd_resource_table *res_table)
 {
-	enum pva_error err;
-
 	uint64_t addr = res_table->table_mem->iova;
 	uint32_t n_entries = res_table->n_entries;
 	uint64_t ccq_entry =
@@ -48,13 +48,11 @@ static enum pva_error pva_kmd_send_resource_table_info_by_ccq(
 		PVA_INSERT64(n_entries, PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_MSB,
 			     PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_LSB);

-	pva_kmd_mutex_lock(&pva->ccq0_lock);
-	err = pva_kmd_ccq_push_with_timeout(pva, PVA_PRIV_CCQ_ID, ccq_entry,
-					    PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
-					    PVA_KMD_WAIT_FW_TIMEOUT_US);
-	pva_kmd_mutex_unlock(&pva->ccq0_lock);
+	uint32_t ccq_entry_lo = PVA_EXTRACT64(ccq_entry, 31, 0, uint32_t);
+	uint32_t ccq_entry_hi = PVA_EXTRACT64(ccq_entry, 63, 32, uint32_t);

-	return err;
+	pva_kmd_ccq_push(pva, PVA_PRIV_CCQ_ID, ccq_entry_lo);
+	pva_kmd_ccq_push(pva, PVA_PRIV_CCQ_ID, ccq_entry_hi);
 }

 /**
@@ -63,11 +61,9 @@ static enum pva_error pva_kmd_send_resource_table_info_by_ccq(
 * Initialization through CCQ is only intended for KMD's own queue (the first
 * queue created).
 */
-static enum pva_error
-pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva,
+static void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva,
 					   struct pva_kmd_queue *queue)
 {
-	enum pva_error err;
 	uint64_t addr = queue->queue_memory->iova;
 	uint32_t max_submit = queue->max_num_submit;
 	uint64_t ccq_entry =
@@ -77,13 +73,12 @@ pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva,
 			     PVA_FW_CCQ_QUEUE_ADDR_LSB) |
 		PVA_INSERT64(max_submit, PVA_FW_CCQ_QUEUE_N_ENTRIES_MSB,
 			     PVA_FW_CCQ_QUEUE_N_ENTRIES_LSB);
-	pva_kmd_mutex_lock(&pva->ccq0_lock);
-	err = pva_kmd_ccq_push_with_timeout(pva, PVA_PRIV_CCQ_ID, ccq_entry,
-					    PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
-					    PVA_KMD_WAIT_FW_TIMEOUT_US);
-	pva_kmd_mutex_unlock(&pva->ccq0_lock);

-	return err;
+	uint32_t ccq_entry_lo = PVA_EXTRACT64(ccq_entry, 31, 0, uint32_t);
+	uint32_t ccq_entry_hi = PVA_EXTRACT64(ccq_entry, 63, 32, uint32_t);
+
+	pva_kmd_ccq_push(pva, PVA_PRIV_CCQ_ID, ccq_entry_lo);
+	pva_kmd_ccq_push(pva, PVA_PRIV_CCQ_ID, ccq_entry_hi);
 }

 /**
@@ -111,8 +106,8 @@ static void pva_kmd_device_init_submission(struct pva_kmd_device *pva)
 	ASSERT(pva->queue_memory != NULL);

 	pva_kmd_queue_init(&pva->dev_queue, pva, PVA_PRIV_CCQ_ID,
-			   0 /* KMD's queue ID is 0 */, &pva->ccq0_lock,
-			   pva->queue_memory, PVA_KMD_MAX_NUM_KMD_SUBMITS);
+			   0 /* KMD's queue ID is 0 */, pva->queue_memory,
+			   PVA_KMD_MAX_NUM_KMD_SUBMITS);

 	/* Init KMD's resource table */
 	err = pva_kmd_resource_table_init(&pva->dev_resource_table, pva,
@@ -195,7 +190,6 @@ struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id,
 	pva->is_hv_mode = true;
 	pva->max_n_contexts = PVA_MAX_NUM_USER_CONTEXTS;
 	pva_kmd_mutex_init(&pva->powercycle_lock);
-	pva_kmd_mutex_init(&pva->ccq0_lock);
 	pva_kmd_sema_init(&pva->fw_boot_sema, 0);
 	size = safe_mulu32((uint32_t)sizeof(struct pva_kmd_context),
 			   pva->max_n_contexts);
@@ -224,13 +218,9 @@ struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id,
 	err = pva_kmd_init_vpu_app_auth(pva, app_authenticate);
 	ASSERT(err == PVA_SUCCESS);

-	pva->is_suspended = false;
+	pva->fw_inited = false;

-#if PVA_IS_DEBUG == 1
-	pva->fw_debug_log_level = 255U;
-#else
-	pva->fw_debug_log_level = 0U;
-#endif
+	pva->fw_trace_level = PVA_FW_TP_LVL_NONE;

 	return pva;
 }
@@ -256,11 +246,11 @@ static void pva_kmd_wait_for_active_contexts(struct pva_kmd_device *pva)
 void pva_kmd_device_destroy(struct pva_kmd_device *pva)
 {
 	pva_kmd_wait_for_active_contexts(pva);
+
 	pva_kmd_device_deinit_submission(pva);
 	pva_kmd_device_plat_deinit(pva);
 	pva_kmd_block_allocator_deinit(&pva->context_allocator);
 	pva_kmd_free(pva->context_mem);
-	pva_kmd_mutex_deinit(&pva->ccq0_lock);
 	pva_kmd_mutex_deinit(&pva->powercycle_lock);
 	pva_kmd_deinit_vpu_app_auth(pva);
 	pva_kmd_free(pva);
@@ -275,9 +265,8 @@ static enum pva_error config_fw_by_cmds(struct pva_kmd_device *pva)
 		goto err_out;
 	}

-	/* Set FW debug log level */
-	err = pva_kmd_notify_fw_set_debug_log_level(pva,
-						    pva->fw_debug_log_level);
+	/* Set FW trace level */
+	err = pva_kmd_notify_fw_set_trace_level(pva, pva->fw_trace_level);
 	if (err != PVA_SUCCESS) {
 		goto err_out;
 	}
@@ -293,7 +282,44 @@ err_out:
 	return err;
 }

-enum pva_error pva_kmd_config_fw_after_boot(struct pva_kmd_device *pva)
+/**
+ * @brief Print PVA firmware and KMD version information
+ *
+ * @param pva PVA device instance
+ */
+static void pva_kmd_print_version_info(struct pva_kmd_device *pva)
+{
+	enum pva_error err;
+	char fw_version[PVA_VERSION_BUFFER_SIZE];
+	char kmd_version[PVA_VERSION_BUFFER_SIZE];
+
+	/* Query and print firmware version */
+	const char *prefix = "PVA FW version: ";
+	const char *kmd_prefix = "PVA KMD version: ";
+	uint32_t prefix_len = strlen(prefix);
+	uint32_t kmd_prefix_len = strlen(kmd_prefix);
+	/* Store initial string */
+	(void)memcpy(fw_version, prefix, prefix_len);
+
+	/* Get actual firmware version - write directly after the prefix */
+	err = pva_kmd_query_fw_version(pva, fw_version + prefix_len,
+				       safe_subu32(PVA_VERSION_BUFFER_SIZE,
+						   prefix_len));
+	if (err == PVA_SUCCESS) {
+		pva_kmd_log_err(fw_version);
+	} else {
+		pva_kmd_log_err("Failed to query firmware version");
+	}
+
+	/* Print PVA KMD version */
+	(void)memcpy(kmd_version, kmd_prefix, kmd_prefix_len);
+	(void)memcpy(kmd_version + kmd_prefix_len, PVA_SYSSW_COMMIT_ID,
+		     sizeof(PVA_SYSSW_COMMIT_ID));
+	kmd_version[kmd_prefix_len + sizeof(PVA_SYSSW_COMMIT_ID)] = '\0';
+	pva_kmd_log_err(kmd_version);
+}
+
+static enum pva_error pva_kmd_config_fw_after_boot(struct pva_kmd_device *pva)
 {
 	enum pva_error err = PVA_SUCCESS;

@@ -301,15 +327,8 @@ enum pva_error pva_kmd_config_fw_after_boot(struct pva_kmd_device *pva)
 	pva->dev_queue.queue_header->cb_head = 0;
 	pva->dev_queue.queue_header->cb_tail = 0;

-	err = pva_kmd_send_resource_table_info_by_ccq(pva,
-						      &pva->dev_resource_table);
-	if (err != PVA_SUCCESS) {
-		goto err_out;
-	}
-	err = pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue);
-	if (err != PVA_SUCCESS) {
-		goto err_out;
-	}
+	pva_kmd_send_resource_table_info_by_ccq(pva, &pva->dev_resource_table);
+	pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue);

 	err = pva_kmd_shared_buffer_init(pva, PVA_PRIV_CCQ_ID,
 					 PVA_KMD_FW_BUF_ELEMENT_SIZE,
@@ -327,67 +346,63 @@ enum pva_error pva_kmd_config_fw_after_boot(struct pva_kmd_device *pva)
 		goto err_out;
 	}

+	pva_kmd_print_version_info(pva);
+
 err_out:
 	return err;
 }

-enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva)
+enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva)
 {
 	enum pva_error err = PVA_SUCCESS;

-	pva_kmd_mutex_lock(&pva->powercycle_lock);
-	if (pva->refcount == 0) {
-		pva_kmd_allocate_syncpts(pva);
-
-		err = pva_kmd_power_on(pva);
+	err = pva_kmd_load_fw(pva);
 	if (err != PVA_SUCCESS) {
-			goto unlock;
-		}
-
-		err = pva_kmd_init_fw(pva);
-		if (err != PVA_SUCCESS) {
-			goto poweroff;
+		goto err_out;
 	}

 	err = pva_kmd_config_fw_after_boot(pva);
 	if (err != PVA_SUCCESS) {
-			goto deinit_fw;
+		goto unload_fw;
 	}
-	} else {
-		// Once firwmare is aborted, we no longer allow incrementing PVA
-		// refcount. This makes sure refcount will eventually reach 0 and allow
-		// device to be powered off.
-		if (pva->recovery) {
-			pva_kmd_log_err_u64(
-				"PVA firmware aborted. "
-				"Waiting for active PVA uses to finish. Remaining",
-				pva->refcount);
-			err = PVA_ERR_FW_ABORTED;
-			goto unlock;
-		}
-	}
-
-	pva->refcount = safe_addu32(pva->refcount, 1U);
-	pva_kmd_mutex_unlock(&pva->powercycle_lock);
+	pva->fw_inited = true;
 	return PVA_SUCCESS;

-deinit_fw:
-	pva_kmd_deinit_fw(pva);
-poweroff:
-	pva_kmd_power_off(pva);
-unlock:
-	pva_kmd_mutex_unlock(&pva->powercycle_lock);
+unload_fw:
+	pva_kmd_unload_fw(pva);
+err_out:
 	return err;
 }

-void pva_kmd_device_idle(struct pva_kmd_device *pva)
+void pva_kmd_add_deferred_context_free(struct pva_kmd_device *pva,
+				       uint8_t ccq_id)
+{
+	uint32_t index = (uint32_t)pva_kmd_atomic_fetch_add(
+		&pva->n_deferred_context_free, 1);
+
+	ASSERT(index < PVA_MAX_NUM_USER_CONTEXTS);
+	pva->deferred_context_free_ids[index] = ccq_id;
+}
+
+static void free_deferred_contexts(struct pva_kmd_device *pva)
+{
+	uint32_t n_deferred_context_free =
+		(uint32_t)pva_kmd_atomic_load(&pva->n_deferred_context_free);
+
+	for (uint32_t i = 0; i < n_deferred_context_free; i++) {
+		uint8_t ccq_id = pva->deferred_context_free_ids[i];
+		struct pva_kmd_context *ctx = pva_kmd_get_context(pva, ccq_id);
+		ASSERT(ctx != NULL);
+		pva_kmd_free_context(ctx);
+	}
+
+	pva_kmd_atomic_store(&pva->n_deferred_context_free, 0);
+}
+
+enum pva_error pva_kmd_deinit_fw(struct pva_kmd_device *pva)
 {
 	enum pva_error err = PVA_SUCCESS;

-	pva_kmd_mutex_lock(&pva->powercycle_lock);
-	ASSERT(pva->refcount > 0);
-	pva->refcount--;
-	if (pva->refcount == 0) {
 	err = pva_kmd_notify_fw_disable_profiling(pva);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
@@ -398,45 +413,57 @@ void pva_kmd_device_idle(struct pva_kmd_device *pva)
 		pva_kmd_log_err(
 			"pva_kmd_shared_buffer_deinit failed during device idle");
 	}
-		pva_kmd_deinit_fw(pva);
-		pva_kmd_power_off(pva);
-	}
-	pva_kmd_mutex_unlock(&pva->powercycle_lock);
+	pva_kmd_unload_fw(pva);
+	free_deferred_contexts(pva);
+
+	/* No longer in recovery state */
+	pva->recovery = false;
+	pva->fw_inited = false;
+	return err;
 }

-enum pva_error pva_kmd_ccq_push_with_timeout(struct pva_kmd_device *pva,
-					     uint8_t ccq_id, uint64_t ccq_entry,
-					     uint64_t sleep_interval_us,
-					     uint64_t timeout_us)
+enum pva_error pva_kmd_query_fw_version(struct pva_kmd_device *pva,
+					char *version_buffer,
+					uint32_t buffer_size)
 {
-	/* spin until we have space or timeout reached */
-	while (pva_kmd_get_ccq_space(pva, ccq_id) == 0) {
-		if (timeout_us == 0) {
-			pva_kmd_log_err(
-				"pva_kmd_ccq_push_with_timeout Timed out");
-			pva_kmd_abort_fw(pva);
-			return PVA_TIMEDOUT;
-		}
-		if (pva->recovery) {
-			return PVA_ERR_FW_ABORTED;
-		}
-		pva_kmd_sleep_us(sleep_interval_us);
-		timeout_us = sat_sub64(timeout_us, sleep_interval_us);
-	}
-	/* TODO: memory write barrier is needed here */
-	pva_kmd_ccq_push(pva, ccq_id, ccq_entry);
+	enum pva_error err = PVA_SUCCESS;
+	struct pva_kmd_device_memory *device_memory;
+	struct pva_cmd_get_version get_version_cmd = { 0 };
+	uint32_t version_buffer_size = PVA_VERSION_BUFFER_SIZE;

-	return PVA_SUCCESS;
+	if (version_buffer == NULL || buffer_size <= 1) {
+		return PVA_INVAL;
 	}

-bool pva_kmd_device_maybe_on(struct pva_kmd_device *pva)
-{
-	bool device_on = false;
+	/* Allocate device memory for version string */
+	device_memory = pva_kmd_device_memory_alloc_map(version_buffer_size,
+							pva, PVA_ACCESS_RW,
+							PVA_R5_SMMU_CONTEXT_ID);
+	if (device_memory == NULL) {
+		return PVA_NOMEM;
+	}

-	pva_kmd_mutex_lock(&pva->powercycle_lock);
-	if (pva->refcount > 0) {
-		device_on = true;
+	/* Clear the buffer */
+	memset(device_memory->va, 0, version_buffer_size);
+
+	/* Set up the command */
+	pva_kmd_set_cmd_get_version(&get_version_cmd, device_memory->iova);
+
+	/* Submit the command synchronously */
+	err = pva_kmd_submit_cmd_sync(&pva->submitter, &get_version_cmd,
+				      sizeof(get_version_cmd),
+				      PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
+				      PVA_KMD_WAIT_FW_TIMEOUT_US);
+	if (err != PVA_SUCCESS) {
+		pva_kmd_log_err("Failed to submit get_version command");
+		goto free_memory;
 	}
-	pva_kmd_mutex_unlock(&pva->powercycle_lock);
-	return device_on;
+
+	(void)memcpy(version_buffer, (char *)device_memory->va,
+		     (buffer_size - 1));
+	version_buffer[buffer_size - 1] = '\0'; /* Ensure null termination */
+
+free_memory:
+	pva_kmd_device_memory_free(device_memory);
+	return err;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h
@@ -79,7 +79,6 @@ struct pva_kmd_device {
 	pva_kmd_mutex_t submit_lock;
 	struct pva_kmd_device_memory *queue_memory;
 	struct pva_kmd_queue dev_queue;
-	pva_kmd_mutex_t ccq0_lock;

 	/** memory needed for submission: including command buffer chunks and fences */
 	struct pva_kmd_device_memory *submit_memory;
@@ -111,7 +110,7 @@ struct pva_kmd_device {
 	// FW and Hypervisor
 	struct pva_kmd_shared_buffer kmd_fw_buffers[PVA_MAX_NUM_CCQ];

-	uint32_t fw_debug_log_level;
+	uint32_t fw_trace_level;
 	struct pva_kmd_fw_print_buffer fw_print_buffer;

 	struct pva_kmd_device_memory *tegra_stats_memory;
@@ -120,6 +119,7 @@ struct pva_kmd_device {

 	bool load_from_gsc;
 	bool is_hv_mode;
+	bool is_silicon;
 	struct pva_kmd_debugfs_context debugfs_context;
 	/** Sector packing format for block linear surfaces */
 	uint8_t bl_sector_pack_format;
@@ -137,15 +137,20 @@ struct pva_kmd_device {
 	bool support_hwseq_frame_linking;

 	void *plat_data;
-	void *fw_handle;

 	struct pva_vpu_auth *pva_auth;
-	bool is_suspended;
+	bool fw_inited;

 	/** Carveout info for FW */
 	struct pva_co_info fw_carveout;

 	bool test_mode;
+
+	pva_kmd_atomic_t n_deferred_context_free;
+	uint32_t deferred_context_free_ids[PVA_MAX_NUM_USER_CONTEXTS];
+
+	uint64_t tsc_to_ns_multiplier; /**< TSC to nanoseconds multiplier */
+	bool r5_ocd_on;
 };

 struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id,
@@ -155,18 +160,18 @@ struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id,

 void pva_kmd_device_destroy(struct pva_kmd_device *pva);

-enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva);
-void pva_kmd_device_idle(struct pva_kmd_device *pva);
+void pva_kmd_add_deferred_context_free(struct pva_kmd_device *pva,
+				       uint8_t ccq_id);

-enum pva_error pva_kmd_ccq_push_with_timeout(struct pva_kmd_device *pva,
-					     uint8_t ccq_id, uint64_t ccq_entry,
-					     uint64_t sleep_interval_us,
-					     uint64_t timeout_us);
-
-enum pva_error pva_kmd_config_fw_after_boot(struct pva_kmd_device *pva);
+enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva);
+enum pva_error pva_kmd_deinit_fw(struct pva_kmd_device *pva);

 bool pva_kmd_device_maybe_on(struct pva_kmd_device *pva);

+enum pva_error pva_kmd_query_fw_version(struct pva_kmd_device *pva,
+					char *version_buffer,
+					uint32_t buffer_size);
+
 static inline uint32_t pva_kmd_get_device_class_id(struct pva_kmd_device *pva)
 {
 	if (pva->device_index == 0) {
@@ -185,4 +190,17 @@ pva_kmd_get_max_cmdbuf_chunk_size(struct pva_kmd_device *pva)
 		return PVA_MAX_CMDBUF_CHUNK_SIZE;
 	}
 }
+
+static inline uint64_t pva_kmd_tsc_to_ns(struct pva_kmd_device *pva,
+					 uint64_t tsc)
+{
+	// Convert TSC to nanoseconds using the multiplier
+	return safe_mulu64(tsc, pva->tsc_to_ns_multiplier);
+}
+static inline uint64_t pva_kmd_tsc_to_us(struct pva_kmd_device *pva,
+					 uint64_t tsc)
+{
+	// Convert TSC to microseconds using the multiplier
+	return safe_mulu64(tsc, pva->tsc_to_ns_multiplier) / 1000;
+}
 #endif // PVA_KMD_DEVICE_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c
@@ -384,12 +384,6 @@ pva_kmd_validate_dma_config(struct pva_dma_config const *dma_config,
 		return err;
 	}

-	err = validate_descriptors(dma_config);
-	if (err != PVA_SUCCESS) {
-		pva_kmd_log_err("Bad Descriptors");
-		return err;
-	}
-
 	if (dma_config->header.num_hwseq_words != 0U) {
 		err = validate_hwseq(dma_config, hw_consts, access_sizes,
 				     hw_dma_descs_mask);
@@ -399,6 +393,12 @@ pva_kmd_validate_dma_config(struct pva_dma_config const *dma_config,
 		}
 	}

+	err = validate_descriptors(dma_config);
+	if (err != PVA_SUCCESS) {
+		pva_kmd_log_err("Bad Descriptors");
+		return err;
+	}
+
 	return err;
 }

@@ -612,7 +612,6 @@ static void write_one_reloc(uint8_t ch_index, uint32_t desc_index,

 	info->relocs[reloc_id].desc_index = desc_index;
 	info->relocs[reloc_id].field = reloc_field;
-
 	info->reloc_off[slot_id] = safe_addu8(info->reloc_off[slot_id], 1U);
 }

@@ -641,7 +640,6 @@ static void write_relocs(const struct pva_dma_config *dma_cfg,
 	uint16_t start_idx = 0U;
 	const struct pva_dma_descriptor *desc = NULL;
 	uint8_t ch_index = 0U;
-
 	for (i = 0U; i < rel_info->dyn_slot.num_slots; i++) {
 		rel_info->dyn_slot.slots[i].reloc_start_idx = start_idx;
 		start_idx = safe_addu16(
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_write.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_write.c
@@ -95,7 +95,7 @@ static void write_dma_descriptor(struct pva_dma_descriptor const *desc,
 		PVA_INSERT(desc->py_direction, 3, 3) |
 		PVA_INSERT(desc->boundary_pixel_extension, 4, 4) |
 		PVA_INSERT(desc->tts, 5, 5) |
-		PVA_INSERT(desc->trans_true_completion, 7, 7);
+		PVA_INSERT(desc->trigger_completion, 7, 7);
 	/* DMA_DESC_TRANS CNTL2 */
 	fw_desc->transfer_control2 = PVA_INSERT(desc->prefetch_enable, 0, 0) |
 				     PVA_INSERT(desc->dst.cb_enable, 1, 1) |
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c
@@ -13,11 +13,11 @@
 #include "pva_kmd_constants.h"
 #include "pva_utils.h"

-enum pva_error pva_kmd_notify_fw_set_debug_log_level(struct pva_kmd_device *pva,
-						     uint32_t log_level)
+enum pva_error pva_kmd_notify_fw_set_trace_level(struct pva_kmd_device *pva,
+						 uint32_t trace_level)
 {
-	struct pva_cmd_set_debug_log_level cmd = { 0 };
-	pva_kmd_set_cmd_set_debug_log_level(&cmd, log_level);
+	struct pva_cmd_set_trace_level cmd = { 0 };
+	pva_kmd_set_cmd_set_trace_level(&cmd, trace_level);

 	return pva_kmd_submit_cmd_sync(&pva->submitter, &cmd, sizeof(cmd),
 				       PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.h
@@ -12,8 +12,8 @@ struct pva_kmd_fw_print_buffer {
 	char const *content;
 };

-enum pva_error pva_kmd_notify_fw_set_debug_log_level(struct pva_kmd_device *pva,
-						     uint32_t log_level);
+enum pva_error pva_kmd_notify_fw_set_trace_level(struct pva_kmd_device *pva,
+						 uint32_t trace_level);

 enum pva_error pva_kmd_notify_fw_set_profiling_level(struct pva_kmd_device *pva,
 						     uint32_t level);
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c
@@ -253,3 +253,21 @@ enum pva_error pva_kmd_process_fw_event(struct pva_kmd_device *pva,

 	return PVA_SUCCESS;
 }
+
+void pva_kmd_process_fw_tracepoint(struct pva_kmd_device *pva,
+				   struct pva_fw_tracepoint *tp)
+{
+	char msg_string[200] = { '\0' };
+
+	snprintf(
+		msg_string, sizeof(msg_string),
+		"pva fw tracepoint: type=%s flags=%s slot=%s ccq=%u queue=%u engine=%u arg1=0x%x arg2=0x%x",
+		pva_fw_tracepoint_type_to_string(PVA_BIT(tp->type)),
+		pva_fw_tracepoint_flags_to_string(tp->flags),
+		pva_fw_tracepoint_slot_id_to_string(tp->slot_id),
+		(uint32_t)tp->ccq_id, (uint32_t)tp->queue_id,
+		(uint32_t)tp->engine_id, (uint32_t)tp->arg1,
+		(uint32_t)tp->arg2);
+
+	pva_kmd_print_str(msg_string);
+}
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.h
@@ -5,7 +5,7 @@
 #include "pva_kmd_device.h"
 #include "pva_kmd_shared_buffer.h"

-#define PVA_KMD_FW_PROFILING_BUF_NUM_ELEMENTS (4096)
+#define PVA_KMD_FW_PROFILING_BUF_NUM_ELEMENTS (4096 * 100)

 struct pva_kmd_fw_profiling_config {
 	uint32_t filter;
@@ -21,6 +21,9 @@ void pva_kmd_device_deinit_profiler(struct pva_kmd_device *pva);
 enum pva_error pva_kmd_process_fw_event(struct pva_kmd_device *pva,
 					uint8_t *data, uint32_t data_size);

+void pva_kmd_process_fw_tracepoint(struct pva_kmd_device *pva,
+				   struct pva_fw_tracepoint *tp);
+
 enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva);

 enum pva_error pva_kmd_notify_fw_disable_profiling(struct pva_kmd_device *pva);
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c
@@ -33,9 +33,9 @@ void pva_kmd_handle_hyp_msg(void *pva_dev, uint32_t const *data, uint8_t len)

 		pva_kmd_log_err("Firmware boot completes");
 		pva_kmd_log_err_u64("R5 start time (us)",
-				    tsc_to_us(r5_start_time));
+				    pva_kmd_tsc_to_us(pva, r5_start_time));
 		pva_kmd_log_err_u64("R5 ready time (us)",
-				    tsc_to_us(r5_ready_time));
+				    pva_kmd_tsc_to_us(pva, r5_ready_time));

 		pva_kmd_sema_post(&pva->fw_boot_sema);
 	} break;
@@ -50,7 +50,7 @@ void pva_kmd_handle_hyp_msg(void *pva_dev, uint32_t const *data, uint8_t len)
 		memcpy(abort_msg + 2, &data[1], size);
 		abort_msg[PVA_FW_MSG_ABORT_STR_MAX_LEN] = '\0';
 		pva_kmd_log_err(abort_msg);
-		pva_kmd_abort_fw(pva);
+		pva_kmd_abort_fw(pva, PVA_ERR_FW_ABORTED);
 	} break;
 	case PVA_FW_MSG_TYPE_FLUSH_PRINT:
 		pva_kmd_drain_fw_print(&pva->fw_print_buffer);
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c
@@ -66,6 +66,7 @@ pva_kmd_op_memory_register_async(struct pva_kmd_context *ctx,
 	struct pva_kmd_device_memory *dev_mem;
 	struct pva_cmd_update_resource_table *update_cmd;
 	struct pva_resource_entry entry = { 0 };
+	struct pva_resource_aux_info aux_info = { 0 };
 	uint8_t smmu_ctx_id;
 	uint32_t resource_id = 0;

@@ -118,8 +119,13 @@ pva_kmd_op_memory_register_async(struct pva_kmd_context *ctx,
 		goto free_cmdbuf;
 	}

-	pva_kmd_set_cmd_update_resource_table(
-		update_cmd, ctx->resource_table_id, resource_id, &entry);
+	// Prepare aux info for the resource
+	aux_info.serial_id_hi = PVA_HI32(args->serial_id);
+	aux_info.serial_id_lo = PVA_LOW32(args->serial_id);
+
+	pva_kmd_set_cmd_update_resource_table(update_cmd,
+					      ctx->resource_table_id,
+					      resource_id, &entry, &aux_info);

 	out_args.error = PVA_SUCCESS;
 	out_args.resource_id = resource_id;
@@ -212,7 +218,7 @@ static enum pva_error pva_kmd_op_executable_register_async(
 	ASSERT(err == PVA_SUCCESS);

 	pva_kmd_set_cmd_update_resource_table(
-		update_cmd, ctx->resource_table_id, resource_id, &entry);
+		update_cmd, ctx->resource_table_id, resource_id, &entry, NULL);

 	out_args.error = PVA_SUCCESS;
 	out_args.resource_id = resource_id;
@@ -267,7 +273,7 @@ static enum pva_error pva_kmd_op_dma_register_async(
 	ASSERT(err == PVA_SUCCESS);

 	pva_kmd_set_cmd_update_resource_table(
-		update_cmd, ctx->resource_table_id, resource_id, &entry);
+		update_cmd, ctx->resource_table_id, resource_id, &entry, NULL);

 	out_args.error = PVA_SUCCESS;
 	out_args.resource_id = resource_id;
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.c
@@ -18,13 +18,6 @@ enum pva_error pva_kmd_prepare_suspend(struct pva_kmd_device *pva)
 	enum pva_error err = PVA_SUCCESS;
 	struct pva_cmd_suspend_fw cmd = { 0 };

-	pva_kmd_mutex_lock(&pva->powercycle_lock);
-	if (pva->refcount == 0u) {
-		pva_dbg_printf("PVA: Nothing to prepare for suspend");
-		err = PVA_SUCCESS;
-		goto err_out;
-	}
-
 	pva_kmd_set_cmd_suspend_fw(&cmd);

 	err = pva_kmd_submit_cmd_sync(&pva->submitter, &cmd, sizeof(cmd),
@@ -36,7 +29,6 @@ enum pva_error pva_kmd_prepare_suspend(struct pva_kmd_device *pva)
 	}

 err_out:
-	pva_kmd_mutex_unlock(&pva->powercycle_lock);
 	return err;
 }

@@ -53,19 +45,6 @@ enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva)
 	struct pva_kmd_queue *queue;
 	const struct pva_syncpt_rw_info *syncpt_info;

-	pva_kmd_mutex_lock(&pva->powercycle_lock);
-	if (pva->refcount == 0u) {
-		pva_dbg_printf(
-			"PVA : Nothing to check for completion in resume");
-		err = PVA_SUCCESS;
-		goto err_out;
-	}
-
-	err = pva_kmd_config_fw_after_boot(pva);
-	if (err != PVA_SUCCESS) {
-		goto err_out;
-	}
-
 	err = pva_kmd_submitter_prepare(dev_submitter, &builder);
 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err(
@@ -201,13 +180,11 @@ enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva)
 		goto cancel_builder;
 	}

-	pva_kmd_mutex_unlock(&pva->powercycle_lock);
 	return PVA_SUCCESS;

 cancel_builder:
 	pva_kmd_cmdbuf_builder_cancel(&builder);

 err_out:
-	pva_kmd_mutex_unlock(&pva->powercycle_lock);
 	return err;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c
@@ -14,7 +14,6 @@

 void pva_kmd_queue_init(struct pva_kmd_queue *queue, struct pva_kmd_device *pva,
 			uint8_t ccq_id, uint8_t queue_id,
-			pva_kmd_mutex_t *ccq_lock,
 			struct pva_kmd_device_memory *queue_memory,
 			uint32_t max_num_submit)
 {
@@ -24,7 +23,6 @@ void pva_kmd_queue_init(struct pva_kmd_queue *queue, struct pva_kmd_device *pva,
 	queue->queue_id = queue_id;
 	queue->max_num_submit = max_num_submit;
 	queue->queue_header = queue_memory->va;
-	queue->ccq_lock = ccq_lock;
 }

 uint32_t pva_kmd_queue_space(struct pva_kmd_queue *queue)
@@ -42,8 +40,6 @@ pva_kmd_queue_submit(struct pva_kmd_queue *queue,
 	uint32_t head = queue->queue_header->cb_head;
 	uint32_t tail = queue->queue_header->cb_tail;
 	uint32_t size = queue->max_num_submit;
-	uint64_t ccq_entry;
-	enum pva_error err;
 	struct pva_fw_cmdbuf_submit_info *items = pva_offset_pointer(
 		queue->queue_header, sizeof(*queue->queue_header));

@@ -55,25 +51,11 @@ pva_kmd_queue_submit(struct pva_kmd_queue *queue,

 	/* Update tail  */
 	tail = wrap_add(tail, 1, size);
-	ccq_entry =
-		PVA_INSERT64(PVA_FW_CCQ_OP_UPDATE_TAIL, PVA_FW_CCQ_OPCODE_MSB,
-			     PVA_FW_CCQ_OPCODE_LSB) |
-		PVA_INSERT64(queue->queue_id, PVA_FW_CCQ_QUEUE_ID_MSB,
-			     PVA_FW_CCQ_QUEUE_ID_LSB) |
-		PVA_INSERT64(tail, PVA_FW_CCQ_TAIL_MSB, PVA_FW_CCQ_TAIL_LSB);
-
-	pva_kmd_mutex_lock(queue->ccq_lock);
-	/* TODO: memory write barrier is needed here */
-	err = pva_kmd_ccq_push_with_timeout(queue->pva, queue->ccq_id,
-					    ccq_entry,
-					    PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
-					    PVA_KMD_WAIT_FW_TIMEOUT_US);
-	if (err == PVA_SUCCESS) {
 	queue->queue_header->cb_tail = tail;
-	}
-	pva_kmd_mutex_unlock(queue->ccq_lock);
+	__sync_synchronize();
+	pva_kmd_ccq_push(queue->pva, queue->ccq_id, queue->queue_id);

-	return err;
+	return PVA_SUCCESS;
 }
 static enum pva_error notify_fw_queue_deinit(struct pva_kmd_context *ctx,
 					     struct pva_kmd_queue *queue)
@@ -124,8 +106,7 @@ enum pva_error pva_kmd_queue_create(struct pva_kmd_context *ctx,
 	}

 	pva_kmd_queue_init(queue, ctx->pva, ctx->ccq_id, *queue_id,
-			   &ctx->ccq_lock, submission_mem_kmd,
-			   in_args->max_submission_count);
+			   submission_mem_kmd, in_args->max_submission_count);

 	/* Get device mapped IOVA to share with FW */
 	err = pva_kmd_device_memory_iova_map(submission_mem_kmd, ctx->pva,
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h
@@ -11,7 +11,6 @@ struct pva_kmd_queue {
 	struct pva_kmd_device *pva;
 	struct pva_kmd_device_memory *queue_memory;
 	struct pva_fw_submit_queue_header *queue_header;
-	pva_kmd_mutex_t *ccq_lock;
 	uint8_t ccq_id;
 	uint8_t queue_id;
 	uint32_t max_num_submit;
@@ -19,9 +18,9 @@ struct pva_kmd_queue {

 void pva_kmd_queue_init(struct pva_kmd_queue *queue, struct pva_kmd_device *pva,
 			uint8_t ccq_id, uint8_t queue_id,
-			pva_kmd_mutex_t *ccq_lock,
 			struct pva_kmd_device_memory *queue_memory,
 			uint32_t max_num_submit);
+
 enum pva_error pva_kmd_queue_create(struct pva_kmd_context *ctx,
 				    const struct pva_ops_queue_create *in_args,
 				    uint32_t *queue_id);
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_r5_ocd.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_r5_ocd.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "pva_kmd_r5_ocd.h"
+#include "pva_api_types.h"
+#include "pva_fw_address_map.h"
+#include "pva_kmd_debugfs.h"
+#include "pva_kmd_shim_debugfs.h"
+#include "pva_kmd_shim_utils.h"
+#include "pva_kmd_silicon_utils.h"
+#include "pva_fw_hyp.h"
+
+int pva_kmd_r5_ocd_open(struct pva_kmd_device *dev)
+{
+	enum pva_error err = pva_kmd_device_busy(dev);
+	if (err == PVA_SUCCESS) {
+		dev->r5_ocd_on = true;
+	}
+	return 0;
+}
+
+int pva_kmd_r5_ocd_release(struct pva_kmd_device *dev)
+{
+	if (dev->r5_ocd_on) {
+		dev->r5_ocd_on = false;
+		pva_kmd_device_idle(dev);
+	}
+	return 0;
+}
+
+int64_t pva_kmd_r5_ocd_write(struct pva_kmd_device *pva, void *file_data,
+			     const uint8_t *data, uint64_t offset,
+			     uint64_t size)
+{
+	struct pva_r5_ocd_request *req = NULL;
+	if (size > PVA_R5_OCD_MAX_DATA_SIZE) {
+		pva_kmd_log_err("pva_kmd_r5_ocd_write: size too large");
+		return -1;
+	}
+
+	pva_kmd_copy_data_from_user(pva->debugfs_context.r5_ocd_stage_buffer,
+				    data, size);
+
+	req = (struct pva_r5_ocd_request *)
+		      pva->debugfs_context.r5_ocd_stage_buffer;
+
+	if (req->size > PVA_R5_OCD_MAX_DATA_SIZE) {
+		pva_kmd_log_err("pva_kmd_r5_ocd_write: size too large");
+		return -1;
+	}
+
+	pva_kmd_write_mailbox(pva, PVA_FW_MBOX_TO_R5_BASE, 0xFFFFFFFF);
+
+	return size;
+}
+
+int64_t pva_kmd_r5_ocd_read(struct pva_kmd_device *pva, void *file_data,
+			    uint8_t *data, uint64_t offset, uint64_t size)
+{
+	//wait until mailbox is cleared
+	while (pva_kmd_read_mailbox(pva, PVA_FW_MBOX_TO_R5_BASE) != 0) {
+		pva_kmd_sleep_us(1);
+	}
+
+	return pva_kmd_read_from_buffer_to_user(
+		data, size, offset, pva->debugfs_context.r5_ocd_stage_buffer,
+		PVA_R5_OCD_MAX_DATA_SIZE);
+}
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_r5_ocd.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_r5_ocd.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef PVA_KMD_R5_OCD_H
+#define PVA_KMD_R5_OCD_H
+
+#include "pva_kmd_device.h"
+#include "pva_kmd.h"
+
+int64_t pva_kmd_r5_ocd_read(struct pva_kmd_device *dev, void *file_data,
+			    uint8_t *data, uint64_t offset, uint64_t size);
+int64_t pva_kmd_r5_ocd_write(struct pva_kmd_device *dev, void *file_data,
+			     const uint8_t *data, uint64_t offset,
+			     uint64_t size);
+int pva_kmd_r5_ocd_open(struct pva_kmd_device *dev);
+int pva_kmd_r5_ocd_release(struct pva_kmd_device *dev);
+
+#endif
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_regs.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_regs.h
@@ -1,11 +1,28 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
 #ifndef PVA_KMD_REGS_H
 #define PVA_KMD_REGS_H

 #include "pva_api.h"
 #include "pva_constants.h"

+#define PVA0_SEC_OFFSET 0x20000
+#define PVA0_PROC_OFFSET 0x30000
+#define PVA0_PM_OFFSET 0x200000
+#define PVA0_CFG_SID_OFFSET 0x240000
+#define PVA0_CFG_CCQ_OFFSET 0x260000
+#define PVA0_HSP_OFFSET 0x160000
+#define PVA0_EVP_OFFSET 0x0
+
+#define PVA_KMD_PVA0_SEC_SIZE 0x10000 // 64KB
+#define PVA_KMD_PVA0_PROC_SIZE 0x10000 // 64KB
+#define PVA_KMD_PVA0_PM_SIZE 0x10000 // 64KB
+#define PVA_KMD_PVA0_CFG_SID_SIZE 0x20000 // 128KB
+#define PVA_KMD_PVA0_CFG_CCQ_SIZE 0x80000 // 512KB
+#define PVA_KMD_PVA0_HSP_SIZE 0x90000 // 576KB
+#define PVA_KMD_PVA0_EVP_SIZE 0x10000 // 64KB
+
 /* Exception vectors */
 #define PVA_REG_EVP_RESET_ADDR 0x20
 #define PVA_REG_EVP_UNDEF_ADDR 0x24
@@ -119,7 +136,17 @@ struct pva_kmd_regspec {
 };

 enum pva_kmd_reg_aperture {
+	/** Main PVA_CLUSTER aperture */
 	PVA_KMD_APERTURE_PVA_CLUSTER = 0,
+	/** Sub-clusters within PVA_CLUSTER */
+	PVA_KMD_APERTURE_PVA_CLUSTER_SEC,
+	PVA_KMD_APERTURE_PVA_CLUSTER_PROC,
+	PVA_KMD_APERTURE_PVA_CLUSTER_PM,
+	PVA_KMD_APERTURE_PVA_CLUSTER_HSP,
+	PVA_KMD_APERTURE_PVA_CLUSTER_EVP,
+	PVA_KMD_APERTURE_PVA_CLUSTER_CFG_SID,
+	PVA_KMD_APERTURE_PVA_CLUSTER_CFG_CCQ,
+	/** Debug aperture */
 	PVA_KMD_APERTURE_VPU_DEBUG,
 	PVA_KMD_APERTURE_COUNT,
 };
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c
@@ -60,6 +60,8 @@ pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table,

 	size = (uint64_t)safe_mulu32(
 		n_entries, (uint32_t)sizeof(struct pva_resource_entry));
+	size += (uint64_t)safe_mulu32(
+		n_entries, (uint32_t)sizeof(struct pva_resource_aux_info));
 	res_table->table_mem = pva_kmd_device_memory_alloc_map(
 		size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
 	if (res_table->table_mem == NULL) {
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c
@@ -130,6 +130,7 @@ static void shared_buffer_process_msg(struct pva_kmd_device *pva,
 	struct pva_kmd_context *ctx = NULL;
 	void *msg_body;
 	uint32_t msg_size;
+	struct pva_fw_tracepoint tracepoint;

 	ASSERT(msg != NULL);

@@ -155,6 +156,12 @@ static void shared_buffer_process_msg(struct pva_kmd_device *pva,
 		}
 		break;
 	}
+	case PVA_KMD_FW_BUF_MSG_TYPE_FW_TRACEPOINT: {
+		ASSERT(msg_size == sizeof(struct pva_fw_tracepoint));
+		memcpy(&tracepoint, msg_body, sizeof(tracepoint));
+		pva_kmd_process_fw_tracepoint(pva, &tracepoint);
+		break;
+	}
 	case PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE: {
 		ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_vpu_trace));
 		memcpy(&vpu_trace, msg_body, sizeof(vpu_trace));
@@ -237,7 +244,7 @@ void pva_kmd_shared_buffer_process(void *pva_dev, uint8_t interface)
 			// Note that ideally this should never happen as the buffer is expected to be
 			// the same size as the resource table.
 			// TODO: abort only the user context, not the device.
-			pva_kmd_abort_fw(pva);
+			pva_kmd_abort_fw(pva, PVA_BUF_OUT_OF_RANGE);
 		}

 		// Buffer corresponding to CCQ 0 is used for sending messages common to a VM.
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c
@@ -10,6 +10,7 @@
 #include "pva_kmd_silicon_isr.h"
 #include "pva_kmd_silicon_boot.h"
 #include "pva_kmd_shim_silicon.h"
+#include "pva_kmd_utils.h"

 static inline void pva_kmd_set_sema(struct pva_kmd_device *pva,
 				    uint32_t sema_idx, uint32_t val)
@@ -108,10 +109,21 @@ void pva_kmd_config_evp_seg_regs(struct pva_kmd_device *pva)

 void pva_kmd_config_scr_regs(struct pva_kmd_device *pva)
 {
-	pva_kmd_write(pva, PVA_REG_EVP_SCR_ADDR, PVA_EVP_SCR_VAL);
-	pva_kmd_write(pva, PVA_CFG_SCR_STATUS_CNTL, PVA_STATUS_CTL_SCR_VAL);
-	pva_kmd_write(pva, PVA_CFG_SCR_PRIV, PVA_PRIV_SCR_VAL);
-	pva_kmd_write(pva, PVA_CFG_SCR_CCQ_CNTL, PVA_CCQ_SCR_VAL);
+	uint32_t scr_lock_mask = pva->is_silicon ? 0xFFFFFFFF : (~PVA_SCR_LOCK);
+
+	pva_kmd_write(pva, PVA_REG_EVP_SCR_ADDR,
+		      PVA_EVP_SCR_VAL & scr_lock_mask);
+	if (pva->is_silicon) {
+		pva_kmd_write(pva, PVA_CFG_SCR_STATUS_CNTL,
+			      PVA_STATUS_CTL_SCR_VAL & scr_lock_mask);
+	} else {
+		pva_kmd_write(pva, PVA_CFG_SCR_STATUS_CNTL,
+			      PVA_STATUS_CTL_SCR_VAL_SIM & scr_lock_mask);
+	}
+
+	pva_kmd_write(pva, PVA_CFG_SCR_PRIV, PVA_PRIV_SCR_VAL & scr_lock_mask);
+	pva_kmd_write(pva, PVA_CFG_SCR_CCQ_CNTL,
+		      PVA_CCQ_SCR_VAL & scr_lock_mask);
 }

 void pva_kmd_config_sid(struct pva_kmd_device *pva)
@@ -169,12 +181,14 @@ static uint32_t get_syncpt_offset(struct pva_kmd_device *pva,
 	}
 }

-enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva)
+enum pva_error pva_kmd_load_fw(struct pva_kmd_device *pva)
 {
 	uint64_t seg_reg_value;
 	uint32_t debug_data_size;
 	uint32_t boot_sema = 0;
 	enum pva_error err = PVA_SUCCESS;
+	uint32_t checkpoint;
+	uint32_t scr_lock_mask = pva->is_silicon ? 0xFFFFFFFF : (~PVA_SCR_LOCK);

 	/* Load firmware */
 	if (!pva->load_from_gsc) {
@@ -192,14 +206,18 @@ enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva)
 		debug_data_size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
 	if (pva->fw_debug_mem == NULL) {
 		err = PVA_NOMEM;
+		pva_kmd_log_err(
+			"pva_kmd_device_memory_alloc_map failed in pva_kmd_load_fw");
 		goto free_fw_mem;
 	}
 	init_fw_print_buffer(&pva->fw_print_buffer, pva->fw_debug_mem->va);
+	pva->debugfs_context.r5_ocd_stage_buffer = pva->fw_debug_mem->va;

 	/* Program SCRs */
 	pva_kmd_write(pva, PVA_SEC_SCR_SECEXT_INTR_EVENT,
-		      PVA_SEC_SCR_SECEXT_INTR_EVENT_VAL);
-	pva_kmd_write(pva, PVA_PROC_SCR_PROC, PVA_PROC_SCR_PROC_VAL);
+		      (PVA_SEC_SCR_SECEXT_INTR_EVENT_VAL & scr_lock_mask));
+	pva_kmd_write(pva, PVA_PROC_SCR_PROC,
+		      (PVA_PROC_SCR_PROC_VAL & scr_lock_mask));

 	pva_kmd_config_evp_seg_scr_regs(pva);

@@ -271,9 +289,14 @@ enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva)

 	if (err != PVA_SUCCESS) {
 		pva_kmd_log_err("Waiting for FW boot timed out.");
+		/* show checkpoint value here*/
+		checkpoint = pva_kmd_read(
+			pva, pva->regspec.ccq_regs[PVA_PRIV_CCQ_ID]
+				     .status[PVA_REG_CCQ_STATUS6_IDX]);
+		pva_kmd_log_err_hex32("Checkpoint value:", checkpoint);
+		pva_kmd_report_error_fsi(pva, err);
 		goto free_sec_lic;
 	}
-	pva->recovery = false;

 	return err;

@@ -309,7 +332,7 @@ void pva_kmd_freeze_fw(struct pva_kmd_device *pva)
 	pva_kmd_set_reset_line(pva);
 }

-void pva_kmd_deinit_fw(struct pva_kmd_device *pva)
+void pva_kmd_unload_fw(struct pva_kmd_device *pva)
 {
 	pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC);
 	pva_kmd_drain_fw_print(&pva->fw_print_buffer);
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.c
@@ -47,14 +47,14 @@ void pva_kmd_hyp_isr(void *data, enum pva_kmd_intr_line intr_line)
 		/* Clear interrupt status */
 		pva_kmd_write(pva, pva->regspec.sec_lic_intr_status, wdt_val);
 		pva_kmd_log_err("PVA watchdog timeout!");
-		pva_kmd_abort_fw(pva);
+		pva_kmd_abort_fw(pva, PVA_ERR_WDT_TIMEOUT);
 	}

 	if (h1x_val != 0) {
 		pva_kmd_log_err_u64("Host1x errors", h1x_val);
 		/* Clear interrupt status */
 		pva_kmd_write(pva, pva->regspec.sec_lic_intr_status, h1x_val);
-		pva_kmd_abort_fw(pva);
+		pva_kmd_abort_fw(pva, PVA_ERR_HOST1X_ERR);
 	}

 	if (hsp_val != 0) {
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_misc.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_misc.c
@@ -6,12 +6,9 @@
 #include "pva_math_utils.h"

 void pva_kmd_ccq_push(struct pva_kmd_device *pva, uint8_t ccq_id,
-		      uint64_t ccq_entry)
+		      uint32_t ccq_entry)
 {
-	pva_kmd_write(pva, pva->regspec.ccq_regs[ccq_id].fifo,
-		      PVA_EXTRACT64(ccq_entry, 31, 0, uint32_t));
-	pva_kmd_write(pva, pva->regspec.ccq_regs[ccq_id].fifo,
-		      PVA_EXTRACT64(ccq_entry, 63, 32, uint32_t));
+	pva_kmd_write(pva, pva->regspec.ccq_regs[ccq_id].fifo, ccq_entry);
 }

 uint32_t pva_kmd_get_ccq_space(struct pva_kmd_device *pva, uint8_t ccq_id)
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.c
@@ -142,6 +142,13 @@ enum pva_error pva_kmd_submitter_wait(struct pva_kmd_submitter *submitter,
 	uint32_t time_spent = 0;
 	struct pva_kmd_device *pva = submitter->queue->pva;

+#if (PVA_BUILD_MODE == PVA_BUILD_MODE_L4T) ||                                  \
+	(PVA_BUILD_MODE == PVA_BUILD_MODE_QNX)
+	if (!pva->is_silicon) {
+		timeout_us = safe_mulu32(timeout_us,
+					 PVA_KMD_WAIT_FW_TIMEOUT_SCALER_SIM);
+	}
+#endif
 	while (*fence_addr < fence_val) {
 		if (pva->recovery) {
 			return PVA_ERR_FW_ABORTED;
@@ -150,7 +157,7 @@ enum pva_error pva_kmd_submitter_wait(struct pva_kmd_submitter *submitter,
 		time_spent = safe_addu32(time_spent, poll_interval_us);
 		if (time_spent >= timeout_us) {
 			pva_kmd_log_err("pva_kmd_submitter_wait Timed out");
-			pva_kmd_abort_fw(submitter->queue->pva);
+			pva_kmd_abort_fw(submitter->queue->pva, PVA_TIMEDOUT);
 			return PVA_TIMEDOUT;
 		}
 	}
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.c
@@ -24,6 +24,7 @@ void pva_kmd_device_init_t23x(struct pva_kmd_device *pva)
 	pva->hw_consts.n_vmem_regions = PVA_VMEM_REGION_COUNT_T23X;
 	pva->support_hwseq_frame_linking = false;
 	pva->vmem_regions_tab = vmem_regions_tab_t23x;
+
 	pva->reg_phy_base[PVA_KMD_APERTURE_PVA_CLUSTER] =
 		PVA_KMD_PVA0_T23x_REG_BASE;
 	pva->reg_size[PVA_KMD_APERTURE_PVA_CLUSTER] =
@@ -77,4 +78,6 @@ void pva_kmd_device_init_t23x(struct pva_kmd_device *pva)
 #else
 	pva->bl_sector_pack_format = PVA_BL_TEGRA_RAW;
 #endif
+
+	pva->tsc_to_ns_multiplier = PVA_NS_PER_TSC_TICK_T23X;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.c
@@ -25,6 +25,7 @@ void pva_kmd_device_init_t26x(struct pva_kmd_device *pva)
 	pva->hw_consts.n_vmem_regions = PVA_VMEM_REGION_COUNT_T26X;
 	pva->vmem_regions_tab = vmem_regions_tab_t26x;
 	pva->support_hwseq_frame_linking = true;
+
 	pva->reg_phy_base[PVA_KMD_APERTURE_PVA_CLUSTER] =
 		PVA_KMD_PVA0_T26x_REG_BASE;
 	pva->reg_size[PVA_KMD_APERTURE_PVA_CLUSTER] =
@@ -73,4 +74,5 @@ void pva_kmd_device_init_t26x(struct pva_kmd_device *pva)
 		}
 	}
 	pva->bl_sector_pack_format = PVA_BL_TEGRA_RAW;
+	pva->tsc_to_ns_multiplier = PVA_NS_PER_TSC_TICK_T26X;
 }
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_userspace_misc.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_userspace_misc.c
@@ -4,6 +4,8 @@
 #include "pva_kmd_utils.h"
 #include "pva_kmd_thread_sema.h"
 #include "pva_kmd_device_memory.h"
+#include "pva_kmd_device.h"
+#include "pva_kmd_shim_init.h"
 #include <pthread.h>
 #include <time.h>
 #include <unistd.h>
@@ -139,3 +141,35 @@ free_mem:
 err_out:
 	return NULL;
 }
+
+void pva_kmd_atomic_store(pva_kmd_atomic_t *atomic_val, int val)
+{
+	atomic_store(atomic_val, val);
+}
+
+int pva_kmd_atomic_fetch_add(pva_kmd_atomic_t *atomic_val, int val)
+{
+	return atomic_fetch_add(atomic_val, val);
+}
+
+int pva_kmd_atomic_fetch_sub(pva_kmd_atomic_t *atomic_val, int val)
+{
+	return atomic_fetch_sub(atomic_val, val);
+}
+
+int pva_kmd_atomic_load(pva_kmd_atomic_t *atomic_val)
+{
+	return atomic_load(atomic_val);
+}
+
+bool pva_kmd_device_maybe_on(struct pva_kmd_device *pva)
+{
+	bool device_on = false;
+
+	pva_kmd_mutex_lock(&pva->powercycle_lock);
+	if (pva->refcount > 0) {
+		device_on = true;
+	}
+	pva_kmd_mutex_unlock(&pva->powercycle_lock);
+	return device_on;
+}
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.c
@@ -19,3 +19,8 @@ void pva_kmd_log_err_u64(const char *msg, uint64_t val)
 {
 	pva_kmd_print_str_u64(msg, val);
 }
+
+void pva_kmd_log_err_hex32(const char *msg, uint32_t val)
+{
+	pva_kmd_print_str_hex32(msg, val);
+}
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.h
@@ -15,6 +15,7 @@

 void pva_kmd_log_err(const char *msg);
 void pva_kmd_log_err_u64(const char *msg, uint64_t val);
+void pva_kmd_log_err_hex32(const char *msg, uint32_t val);
 void *pva_kmd_zalloc_nofail(uint64_t size);

 #endif // PVA_KMD_UTILS_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_ccq.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_ccq.h
@@ -14,7 +14,8 @@ struct pva_kmd_device;
 *
 */
 void pva_kmd_ccq_push(struct pva_kmd_device *pva, uint8_t ccq_id,
-		      uint64_t ccq_entry);
+		      uint32_t ccq_entry);
+
 /**
 * @brief Get the number of available spaces in the CCQ.
 *
--- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_init.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_init.h
@@ -14,18 +14,6 @@ void pva_kmd_device_plat_deinit(struct pva_kmd_device *pva);
 void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id,
 			     uint32_t *syncpt_value);

-void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva);
-
-/**
- * @brief Power on PVA cluster.
- */
-enum pva_error pva_kmd_power_on(struct pva_kmd_device *pva);
-
-/**
- * @brief Power off PVA cluster.
- */
-void pva_kmd_power_off(struct pva_kmd_device *pva);
-
 /**
 * @brief Reset assert FW so it can be in recovery and
 * user submission halted. This is requied for host1x
@@ -34,29 +22,56 @@ void pva_kmd_power_off(struct pva_kmd_device *pva);
 void pva_kmd_freeze_fw(struct pva_kmd_device *pva);

 /**
- * @brief Initialize firmware.
+ * @brief Increase reference count on the PVA device.
 *
- * This function initializes firmware. On silicon, this includes
+ * Power on PVA if necessary.
+ */
+enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva);
+
+/**
+ * @brief Decrease reference count on the PVA device.
+ *
+ * Power off PVA if necessary.
+ */
+void pva_kmd_device_idle(struct pva_kmd_device *pva);
+
+/**
+ * @brief Check if PVA is already powered on.
+ *
+ * This function provides a hint about the current power state of the PVA device.
+ * A return value of true indicates the device is likely powered on, while false
+ * indicates it is likely powered off.
+ *
+ * @note This is only a hint and the power state may change at any time. The caller
+ * must still take a reference using pva_kmd_device_busy() before attempting any
+ * communication with the device.
+ */
+bool pva_kmd_device_maybe_on(struct pva_kmd_device *pva);
+
+/**
+ * @brief Load firmware.
+ *
+ * This function loads firmware. On silicon, this includes
 * - power on R5,
 * - load firmware,
 * - bind interrupts,
 * - and wait for firmware boot to complete.
 *
- * @param pva pointer to the PVA device to initialize
+ * @param pva pointer to the PVA device
 */
-enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva);
+enum pva_error pva_kmd_load_fw(struct pva_kmd_device *pva);

 /**
- * @brief De-init firmware.
+ * @brief Unload firmware.
 *
- * This function de-initializes firmware. On silicon, this includes
+ * This function unloads firmware. On silicon, this includes
 * - free interrupts,
 * - power off R5,
 * - and free firmware memories.
 *
- * @param pva pointer to the PVA device to de-initialize
+ * @param pva pointer to the PVA device
 */
-void pva_kmd_deinit_fw(struct pva_kmd_device *pva);
+void pva_kmd_unload_fw(struct pva_kmd_device *pva);

 /**
 * @brief Disable all interrupts without waiting for running interrupt handlers
@@ -71,4 +86,13 @@ void pva_kmd_deinit_fw(struct pva_kmd_device *pva);
 */
 void pva_kmd_disable_all_interrupts_nosync(struct pva_kmd_device *pva);

+/**
+ * @brief Report error to FSI.
+ *
+ * This function reports an error to FSI.
+ *
+ * @param pva pointer to the PVA device
+ */
+void pva_kmd_report_error_fsi(struct pva_kmd_device *pva, uint32_t error_code);
+
 #endif // PVA_KMD_SHIM_INIT_H
--- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_utils.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_utils.h
@@ -38,6 +38,17 @@ void pva_kmd_print_str(const char *str);
 */
 void pva_kmd_print_str_u64(const char *str, uint64_t n);

+/**
+ * @brief Print a string followed by a 32-bit unsigned number in hex format.
+ *
+ * This function is used for logging errors, enabled even in safety environment.
+ * For debug print, use pva_dbg_printf.
+ *
+ * @param str The string to print.
+ * @param n The number to print.
+ */
+void pva_kmd_print_str_hex32(const char *str, uint32_t n);
+
 /**
 * @brief Fault KMD.
 *
--- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_thread_sema.h
+++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_thread_sema.h
@@ -11,12 +11,26 @@
 #include <linux/semaphore.h>
 typedef struct semaphore pva_kmd_sema_t;

+#include <linux/atomic.h>
+typedef atomic_t pva_kmd_atomic_t;
+
 #else /* For user space code, including QNX KMD */

 #include <semaphore.h>
 /* Mutex */
 typedef sem_t pva_kmd_sema_t;

+// clang-format off
+#ifdef __cplusplus
+#include <atomic>
+// The strange format is to make kernel patch check script happy
+typedef std::atomic < int > pva_kmd_atomic_t;
+#else
+#include <stdatomic.h>
+typedef atomic_int pva_kmd_atomic_t;
+#endif
+// clang-format on
+
 #endif

 /**
@@ -58,4 +72,9 @@ void pva_kmd_sema_post(pva_kmd_sema_t *sem);
 */
 void pva_kmd_sema_deinit(pva_kmd_sema_t *sem);

+void pva_kmd_atomic_store(pva_kmd_atomic_t *atomic_val, int val);
+int pva_kmd_atomic_fetch_add(pva_kmd_atomic_t *atomic_val, int val);
+int pva_kmd_atomic_fetch_sub(pva_kmd_atomic_t *atomic_val, int val);
+int pva_kmd_atomic_load(pva_kmd_atomic_t *atomic_val);
+
 #endif // PVA_KMD_THREAD_SEMA_H
--- a/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h
+++ b/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h
@@ -94,6 +94,7 @@ struct pva_ops_memory_register {
 	uint64_t import_id; /**< Import ID of the memory */
 	uint64_t offset; /**< Offset into the memory */
 	uint64_t size; /**< Size of memory to register */
+	uint64_t serial_id; /**< Serial ID of the memory */
 };

 /**
--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c
@@ -98,6 +98,8 @@ static long int debugfs_node_write(struct file *file, const char *data,
 }

 static const struct file_operations pva_linux_debugfs_fops = {
+	// Prevent KMD from being unloaded while file is open
+	.owner = THIS_MODULE,
 	.open = debugfs_node_open,
 	.read = debugfs_node_read,
 	.write = debugfs_node_write,
--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c
@@ -22,6 +22,17 @@
 #include "pva_kmd_silicon_boot.h"
 #include "pva_kmd_linux_device_api.h"

+#define HVC_NR_PVA_CONFIG_REGS_CALL 0x8136U
+
+__attribute__((no_sanitize_address)) static inline bool
+hyp_pva_config_regs(void)
+{
+	uint64_t args[4] = { 0U, 0U, 0U, 0U };
+	hyp_call44(HVC_NR_PVA_CONFIG_REGS_CALL, args);
+
+	return (args[0] == 0U);
+}
+
 struct nvpva_device_data *
 pva_kmd_linux_device_get_properties(struct platform_device *pdev)
 {
@@ -140,10 +151,6 @@ err_out:
 	return err;
 }

-void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva)
-{
-}
-
 void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva)
 {
 	int err = 0;
@@ -203,38 +210,74 @@ void pva_kmd_device_plat_deinit(struct pva_kmd_device *pva)
 	pva_kmd_free(pva_kmd_linux_device_get_data(pva));
 }

-enum pva_error pva_kmd_power_on(struct pva_kmd_device *pva)
+enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva)
 {
 	int err = 0;
+	enum pva_error pva_err = PVA_SUCCESS;
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvpva_device_data *props = device_data->pva_device_properties;

+	pva_kmd_mutex_lock(&pva->powercycle_lock);
+
+	// Once firmware is aborted, we no longer allow incrementing PVA
+	// refcount. This makes sure refcount will eventually reach 0 and allow
+	// device to be powered off.
+	if (pva->recovery) {
+		pva_kmd_log_err(
+			"PVA firmware aborted. Waiting for active PVA uses to finish");
+		pva_err = PVA_ERR_FW_ABORTED;
+		goto unlock;
+	}
+
 	err = pm_runtime_get_sync(&props->pdev->dev);
 	if (err < 0) {
 		pm_runtime_put_noidle(&props->pdev->dev);
-		goto out;
+		pva_kmd_log_err_u64(
+			"pva_kmd_device_busy pm_runtime_get_sync failed",
+			(uint64_t)(-err));
+		goto convert_err;
 	}

-	/* Power management operation is asynchronous. PVA may not be power
-	 * cycled between power_off -> power_on call. Therefore, we need to
-	 * reset it here to make sure it is in a clean state. */
-	reset_control_acquire(props->reset_control);
-	reset_control_reset(props->reset_control);
-	reset_control_release(props->reset_control);
+	pva->refcount = safe_addu32(pva->refcount, 1);

-out:
-	return kernel_err2pva_err(err);
+convert_err:
+	pva_err = kernel_err2pva_err(err);
+unlock:
+	pva_kmd_mutex_unlock(&pva->powercycle_lock);
+	return pva_err;
 }

-void pva_kmd_power_off(struct pva_kmd_device *pva)
+void pva_kmd_device_idle(struct pva_kmd_device *pva)
 {
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvpva_device_data *props = device_data->pva_device_properties;
+	int err = 0;
+
+	pva_kmd_mutex_lock(&pva->powercycle_lock);
+
+	pva->refcount = safe_subu32(pva->refcount, 1);
+
+	if (pva->refcount == 0 && pva->recovery) {
+		/*
+		 * At this point, there are no active PVA users (refcount=0).
+		 * Since PVA needs recovery (recovery=true), perform a forced
+		 * power cycle to recover it.
+		 */
+		err = pm_runtime_force_suspend(&props->pdev->dev);
+		if (err == 0) {
+			err = pm_runtime_force_resume(&props->pdev->dev);
+		}
+		if (err < 0) {
+			pva_kmd_log_err("Failed to recover PVA");
+		}
+	}

 	pm_runtime_mark_last_busy(&props->pdev->dev);
 	pm_runtime_put(&props->pdev->dev);
+
+	pva_kmd_mutex_unlock(&pva->powercycle_lock);
 }

 void pva_kmd_set_reset_line(struct pva_kmd_device *pva)
@@ -353,22 +396,46 @@ unsigned long pva_kmd_strtol(const char *str, int base)
 	return val;
 }

-/* TODO: Enable HVC call once HVC fix is available on dev-main */
-//static void pva_kmd_config_regs(void)
-//{
-//bool hv_err = true;
-//hv_err = hyp_pva_config_regs();
-//ASSERT(hv_err == true);
-//ASSERT(false);
-//}
+static void pva_kmd_config_regs(void)
+{
+	bool hv_err = true;
+	hv_err = hyp_pva_config_regs();
+	ASSERT(hv_err == true);
+}

 void pva_kmd_config_evp_seg_scr_regs(struct pva_kmd_device *pva)
 {
+	if (pva->load_from_gsc && pva->is_hv_mode) {
+		/* HVC Call to program EVP, Segment config registers and SCR registers */
+		pva_kmd_config_regs();
+	} else {
 		pva_kmd_config_evp_seg_regs(pva);
 		pva_kmd_config_scr_regs(pva);
 	}
+}

 void pva_kmd_config_sid_regs(struct pva_kmd_device *pva)
 {
+	if (!(pva->load_from_gsc && pva->is_hv_mode)) {
 		pva_kmd_config_sid(pva);
 	}
+}
+
+bool pva_kmd_device_maybe_on(struct pva_kmd_device *pva)
+{
+	struct pva_kmd_linux_device_data *device_data =
+		pva_kmd_linux_device_get_data(pva);
+	struct nvpva_device_data *device_props =
+		device_data->pva_device_properties;
+	struct device *dev = &device_props->pdev->dev;
+
+	if (pm_runtime_active(dev)) {
+		return true;
+	} else {
+		return false;
+	}
+}
+void pva_kmd_report_error_fsi(struct pva_kmd_device *pva, uint32_t error_code)
+{
+	//TODO: Implement FSI error reporting once available for Linux
+}
--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c
@@ -40,6 +40,7 @@ pva_kmd_device_memory_alloc_map(uint64_t size, struct pva_kmd_device *pva,

 	mem_impl = pva_kmd_zalloc(sizeof(struct pva_kmd_device_memory_impl));
 	if (mem_impl == NULL) {
+		pva_kmd_log_err("pva_kmd_zalloc failed");
 		goto err_out;
 	}

--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c
@@ -21,6 +21,7 @@
 #include <linux/clkdev.h>
 #include <linux/clk-provider.h>
 #include <linux/dma-mapping.h>
+#include <soc/tegra/fuse-helper.h>

 #if KERNEL_VERSION(5, 14, 0) > LINUX_VERSION_CODE
 #include <linux/tegra-ivc.h>
@@ -62,11 +63,7 @@ struct nvpva_device_data t23x_pva0_props = {
 	.version = PVA_CHIP_T23X,
 	.ctrl_ops = &tegra_pva_ctrl_ops,
 	.class = NV_PVA0_CLASS_ID,
-	/* We should not enable autosuspend here as this logic is handled in
-	 * common code. When poweroff is called, common code expects PVA to be
-	 * _really_ powered off. If we enable autosuspend, PVA will stay on for
-	 * a while. */
-	.autosuspend_delay = 0,
+	.autosuspend_delay = 500,
 	.firmware_name = PVA_KMD_LINUX_T23X_FIRMWARE_NAME
 };

@@ -74,11 +71,7 @@ struct nvpva_device_data t26x_pva0_props = {
 	.version = PVA_CHIP_T26X,
 	.ctrl_ops = &tegra_pva_ctrl_ops,
 	.class = NV_PVA0_CLASS_ID,
-	/* We should not enable autosuspend here as this logic is handled in
-	 * common code. When poweroff is called, common code expects PVA to be
-	 * _really_ powered off. If we enable autosuspend, PVA will stay on for
-	 * a while. */
-	.autosuspend_delay = 0,
+	.autosuspend_delay = 500,
 	.firmware_name = PVA_KMD_LINUX_T26X_FIRMWARE_NAME
 };

@@ -265,6 +258,9 @@ static bool pva_kmd_in_test_mode(struct device *dev, bool param_test_mode)
 {
 	const char *dt_test_mode = NULL;

+	if (!tegra_platform_is_silicon())
+		return true;
+
 	if (of_property_read_string(dev->of_node, "nvidia,test_mode_enable",
 				    &dt_test_mode)) {
 		return param_test_mode;
@@ -362,8 +358,14 @@ static int pva_probe(struct platform_device *pdev)
 		pva_props->version, 0, app_authenticate, pva_enter_test_mode);

 	pva_device->is_hv_mode = is_tegra_hypervisor_mode();
+	pva_device->is_silicon = tegra_platform_is_silicon();

+	if (!pva_device->is_silicon) {
+		pva_device->load_from_gsc = false;
+	} else {
 		pva_device->load_from_gsc = load_from_gsc;
+	}
+
 	pva_device->stream_ids[pva_device->r5_image_smmu_context_id] =
 		pva_get_gsc_priv_hwid(pdev);

@@ -488,6 +490,17 @@ static int __exit pva_remove(struct platform_device *pdev)
 	struct kobj_attribute *attr = NULL;
 	int i;

+	/* Make sure PVA is powered off here by disabling auto suspend */
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
+	/* At this point, PVA should be suspended in L4T. However, for AV+L,
+	 * PVA will still be powered on since the system took an additional
+	 * reference count. We need to temporary drop it to suspend and then
+	 * restore the reference count. */
+	if (pm_runtime_active(&pdev->dev)) {
+		pm_runtime_put_sync(&pdev->dev);
+		pm_runtime_get_noresume(&pdev->dev);
+	}
+
 	if (pva_props->clk_cap_attrs) {
 		for (i = 0; i < pva_props->num_clks; i++) {
 			attr = &pva_props->clk_cap_attrs[i];
@@ -511,40 +524,59 @@ static int __exit pva_remove(struct platform_device *pdev)
 	return 0;
 }

-static int pva_kmd_linux_device_runtime_resume(struct device *dev)
+static int runtime_resume(struct device *dev)
 {
 	int err;
 	struct nvpva_device_data *props = dev_get_drvdata(dev);
+	struct pva_kmd_device *pva = props->private_data;
+	enum pva_error pva_err = PVA_SUCCESS;

-	dev_info(dev, "PVA: Calling runtime resume");
-	reset_control_acquire(props->reset_control);
+	dev_info(dev, "Start runtime resume");

 	err = clk_bulk_prepare_enable(props->num_clks, props->clks);
 	if (err < 0) {
-		reset_control_release(props->reset_control);
-		dev_err(dev, "failed to enabled clocks: %d\n", err);
-		return err;
+		dev_err(dev, "Runtime resume failed to enabled clocks: %d\n",
+			err);
+		goto err_out;
 	}

+	reset_control_acquire(props->reset_control);
 	reset_control_reset(props->reset_control);
 	reset_control_release(props->reset_control);

-	return 0;
+	pva_err = pva_kmd_init_fw(pva);
+	if (pva_err != PVA_SUCCESS) {
+		err = -EIO;
+		dev_info(dev, "Runtime resume failed to init fw");
+		goto disable_clocks;
 	}

-static int pva_kmd_linux_device_runtime_suspend(struct device *dev)
+	dev_info(dev, "Runtime resume succeeded");
+	return 0;
+
+disable_clocks:
+	clk_bulk_disable_unprepare(props->num_clks, props->clks);
+err_out:
+	return err;
+}
+
+static int runtime_suspend(struct device *dev)
 {
 	struct nvpva_device_data *props = dev_get_drvdata(dev);
+	struct pva_kmd_device *pva = props->private_data;
+	enum pva_error pva_err = PVA_SUCCESS;

-	dev_info(dev, "PVA: Calling runtime suspend");
+	dev_info(dev, "Start runtime suspend");

-	reset_control_acquire(props->reset_control);
-	reset_control_assert(props->reset_control);
+	pva_err = pva_kmd_deinit_fw(pva);
+	if (pva_err != PVA_SUCCESS) {
+		//These might be errors if PVA is aborted. It's safe to ignore them.
+		dev_err(dev, "Failed to deinit firmware");
+	}

 	clk_bulk_disable_unprepare(props->num_clks, props->clks);

-	reset_control_release(props->reset_control);
-
+	dev_info(dev, "Runtime suspend complete");
 	return 0;
 }
 #if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID) /* Linux v6.11 */
@@ -560,113 +592,77 @@ static int __exit pva_remove_wrapper(struct platform_device *pdev)
 }
 #endif

-static int pva_kmd_linux_device_resume(struct device *dev)
+static int system_resume(struct device *dev)
 {
-	enum pva_error status = PVA_SUCCESS;
 	int err = 0;
 	struct nvpva_device_data *props = dev_get_drvdata(dev);
 	struct pva_kmd_device *pva_device = props->private_data;
+	enum pva_error pva_err = PVA_SUCCESS;

-	if (pva_device->is_suspended == false) {
-		dev_warn(dev, "PVA is not in suspend state.\n");
-		goto fail_not_in_suspend;
-	}
-
-	dev_info(dev, "PVA: Calling resume");
+	dev_info(dev, "System resume");
 	err = pm_runtime_force_resume(dev);
-
 	if (err != 0) {
-		goto fail_runtime_resume;
+		dev_err(dev, "Force resume failed");
+		goto out;
 	}

-	if (pva_device->refcount != 0u) {
-		status = pva_kmd_init_fw(pva_device);
+	/* Even after force resume, the PVA may still be powered off if the usage count is 0.
+	 * Therefore, we need to skip restoring firmware state in this case.
+	 */
+	if (!pm_runtime_active(dev)) {
+		dev_info(dev, "No active PVA users. Skipping resume.");
+		goto out;
 	}

-	if (status != PVA_SUCCESS) {
-		err = -EINVAL;
-		goto fail_init_fw;
+	pva_err = pva_kmd_complete_resume(pva_device);
+	if (pva_err != PVA_SUCCESS) {
+		dev_err(dev, "Complete resume failed");
+		err = -EIO;
+		goto out;
 	}

-fail_init_fw:
-fail_runtime_resume:
-fail_not_in_suspend:
+out:
+	dev_info(dev, "Resume from system suspend completed: %d\n", err);
 	return err;
 }

-static int pva_kmd_linux_device_suspend(struct device *dev)
+static int system_suspend(struct device *dev)
 {
 	int err = 0;
 	struct nvpva_device_data *props = dev_get_drvdata(dev);
 	struct pva_kmd_device *pva_device = props->private_data;
+	enum pva_error pva_err = PVA_SUCCESS;

-	if (pva_device->refcount != 0u) {
-		pva_kmd_deinit_fw(pva_device);
+	dev_info(dev, "System suspend");
+
+	// Synchornize with runtime suspend/resume calls
+	pm_runtime_barrier(dev);
+
+	// Now it's safe to check runtime status
+	if (!pm_runtime_active(dev)) {
+		dev_info(
+			dev,
+			"PVA is powered off. Nothing to do for system suspend.");
+		goto out;
+	}
+
+	pva_err = pva_kmd_prepare_suspend(pva_device);
+	if (pva_err != PVA_SUCCESS) {
+		dev_err(dev, "Prepare system suspend failed");
+		err = -EBUSY;
+		goto out;
 	}

-	dev_info(dev, "PVA: Calling suspend");
 	err = pm_runtime_force_suspend(dev);
 	if (err != 0) {
-		dev_err(dev, "(FAIL) PM suspend\n");
-		goto fail_nvhost_module_suspend;
+		dev_err(dev, "Force suspend failed");
+		goto out;
 	}

-	pva_device->is_suspended = true;
-
-fail_nvhost_module_suspend:
+out:
 	return err;
 }

-static int pva_kmd_linux_device_prepare_suspend(struct device *dev)
-{
-	struct nvpva_device_data *props = dev_get_drvdata(dev);
-	struct pva_kmd_device *pva_device = props->private_data;
-	enum pva_error status = PVA_SUCCESS;
-	int err = 0;
-
-	dev_info(dev, "PVA: Preparing to suspend");
-	if (pva_device->is_suspended == true) {
-		dev_info(dev, "PVA device already suspended");
-		goto fail_already_in_suspend;
-	}
-
-	status = pva_kmd_prepare_suspend(pva_device);
-	if (status != PVA_SUCCESS) {
-		dev_info(dev, "PVA: Suspend FAIL");
-		err = -EBUSY;
-		goto fail;
-	}
-
-fail_already_in_suspend:
-fail:
-	return err;
-}
-
-static void pva_kmd_linux_device_complete_resume(struct device *dev)
-{
-	enum pva_error status = PVA_SUCCESS;
-	struct nvpva_device_data *props = dev_get_drvdata(dev);
-	struct pva_kmd_device *pva_device = props->private_data;
-
-	dev_info(dev, "PVA: Completing resume");
-	if (pva_device->is_suspended == false) {
-		dev_info(dev, "PVA device not in suspend state");
-		goto done;
-	}
-
-	status = pva_kmd_complete_resume(pva_device);
-	if (status != PVA_SUCCESS) {
-		dev_err(dev, "PVA: Resume failed");
-		goto done;
-	}
-
-	dev_info(dev, "PVA: Resume complete");
-
-done:
-	pva_device->is_suspended = false;
-	return;
-}
-
 enum pva_error pva_kmd_simulate_enter_sc7(struct pva_kmd_device *pva)
 {
 	struct pva_kmd_linux_device_data *device_data;
@@ -682,12 +678,7 @@ enum pva_error pva_kmd_simulate_enter_sc7(struct pva_kmd_device *pva)
 	// we need to emulate this behavior as well.
 	pm_runtime_get_noresume(dev);

-	ret = pva_kmd_linux_device_prepare_suspend(dev);
-	if (ret != 0) {
-		pva_kmd_log_err("SC7 simulation: prepare suspend failed");
-		return PVA_INTERNAL;
-	}
-	ret = pva_kmd_linux_device_suspend(dev);
+	ret = system_suspend(dev);
 	if (ret != 0) {
 		pva_kmd_log_err("SC7 simulation: suspend failed");
 		return PVA_INTERNAL;
@@ -705,14 +696,13 @@ enum pva_error pva_kmd_simulate_exit_sc7(struct pva_kmd_device *pva)
 	struct device *dev = &device_props->pdev->dev;
 	int ret;

-	ret = pva_kmd_linux_device_resume(dev);
+	dev_info(dev, "SC7 simulation: resume");
+
+	ret = system_resume(dev);
 	if (ret != 0) {
 		pva_kmd_log_err("SC7 simulation: resume failed");
 		return PVA_INTERNAL;
 	}
-
-	pva_kmd_linux_device_complete_resume(dev);
-
 	// The PM core decreases the device usage count after calling complete, so
 	// we need to emulate this behavior as well.
 	pm_runtime_put(dev);
@@ -721,12 +711,8 @@ enum pva_error pva_kmd_simulate_exit_sc7(struct pva_kmd_device *pva)
 }

 static const struct dev_pm_ops pva_kmd_linux_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(pva_kmd_linux_device_suspend,
-				pva_kmd_linux_device_resume)
-		SET_RUNTIME_PM_OPS(pva_kmd_linux_device_runtime_suspend,
-				   pva_kmd_linux_device_runtime_resume, NULL)
-			.prepare = pva_kmd_linux_device_prepare_suspend,
-	.complete = pva_kmd_linux_device_complete_resume
+	SET_SYSTEM_SLEEP_PM_OPS(system_suspend, system_resume)
+		SET_RUNTIME_PM_OPS(runtime_suspend, runtime_resume, NULL)
 };

 static struct platform_driver pva_platform_driver = {
--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c
@@ -75,9 +75,9 @@ void pva_kmd_shim_add_trace_fence(
 					   trace_info->value);
 		} else if (trace_info->type ==
 			   PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SEMAPHORE) {
-			trace_job_prefence_semaphore(job_id,
-						     trace_info->fence_id,
-						     trace_info->offset,
+			trace_job_prefence_semaphore(
+				job_id, trace_info->fence_id,
+				PVA_LOW32(trace_info->offset),
 				trace_info->value);
 		}
 	} else if (trace_info->action ==
@@ -87,9 +87,9 @@ void pva_kmd_shim_add_trace_fence(
 					    trace_info->value);
 		} else if (trace_info->type ==
 			   PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SEMAPHORE) {
-			trace_job_postfence_semaphore(job_id,
-						      trace_info->fence_id,
-						      trace_info->offset,
+			trace_job_postfence_semaphore(
+				job_id, trace_info->fence_id,
+				PVA_LOW32(trace_info->offset),
 				trace_info->value);
 		}
 	}
--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c
@@ -26,12 +26,17 @@ void pva_kmd_free(void *ptr)

 void pva_kmd_print_str(const char *str)
 {
-	printk(KERN_INFO "%s", str);
+	printk(KERN_INFO "%s\n", str);
 }

 void pva_kmd_print_str_u64(const char *str, uint64_t n)
 {
-	printk(KERN_INFO "%s:%llu", str, n);
+	printk(KERN_INFO "%s:%llu\n", str, n);
+}
+
+void pva_kmd_print_str_hex32(const char *str, uint32_t n)
+{
+	printk("%s: 0x%08x\n", str, n);
 }

 enum pva_error pva_kmd_mutex_init(pva_kmd_mutex_t *m)
@@ -104,3 +109,23 @@ uint64_t pva_kmd_get_time_tsc(void)
 #endif
 	return timestamp;
 }
+
+void pva_kmd_atomic_store(pva_kmd_atomic_t *atomic_val, int val)
+{
+	atomic_set(atomic_val, val);
+}
+
+int pva_kmd_atomic_fetch_add(pva_kmd_atomic_t *atomic_val, int val)
+{
+	return atomic_fetch_add(val, atomic_val);
+}
+
+int pva_kmd_atomic_fetch_sub(pva_kmd_atomic_t *atomic_val, int val)
+{
+	return atomic_fetch_sub(val, atomic_val);
+}
+
+int pva_kmd_atomic_load(pva_kmd_atomic_t *atomic_val)
+{
+	return atomic_read(atomic_val);
+}
--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c
@@ -34,21 +34,30 @@ atomic_t g_num_smmu_ctxs = ATOMIC_INIT(0);
 atomic_t g_num_smmu_probing_done = ATOMIC_INIT(0);
 bool g_smmu_probing_done = false;

-static uint32_t pva_kmd_device_get_sid(struct platform_device *pdev)
+static int pva_kmd_device_get_sid(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+	uint32_t sid;

-	ASSERT(fwspec != NULL);
-	ASSERT(fwspec->num_ids != 0);
+	if (fwspec == NULL) {
+		dev_err(&pdev->dev, "SMMU fwspec is NULL");
+		return -ENOENT;
+	}
+	if (fwspec->num_ids == 0) {
+		dev_err(&pdev->dev, "SMMU fwspec has no IDs");
+		return -EINVAL;
+	}

-	return fwspec->ids[0] & 0xffff;
+	sid = fwspec->ids[0] & 0xffff;
+	return (int)sid;
 }

 static int pva_kmd_linux_device_smmu_context_probe(struct platform_device *pdev)
 {
 	int idx;
 	int new_idx;
+	int sid_or_err;

 	if (!iommu_get_domain_for_dev(&pdev->dev)) {
 		dev_err(&pdev->dev,
@@ -73,12 +82,17 @@ static int pva_kmd_linux_device_smmu_context_probe(struct platform_device *pdev)
 	idx = new_idx;

 	g_smmu_ctxs[idx].pdev = pdev;
-	g_smmu_ctxs[idx].sid = pva_kmd_device_get_sid(pdev);
+	sid_or_err = pva_kmd_device_get_sid(pdev);
+	if (sid_or_err < 0) {
+		dev_err(&pdev->dev, "Failed to get SID: %d", sid_or_err);
+		atomic_dec(&g_num_smmu_ctxs);
+		return sid_or_err;
+	}
+	g_smmu_ctxs[idx].sid = (uint32_t)sid_or_err;

 	atomic_add(1, &g_num_smmu_probing_done);

-	dev_info(&pdev->dev, "initialized (streamid=%d)",
-		 pva_kmd_device_get_sid(pdev));
+	dev_info(&pdev->dev, "initialized (streamid=%u)", g_smmu_ctxs[idx].sid);
 	return 0;
 }

--- a/drivers/video/tegra/host/pva/src/libs/pva/include/pva_constants.h
+++ b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_constants.h
@@ -11,6 +11,8 @@
 #define PVA_INVALID_CCQ_ID 0xFF
 #define PVA_INVALID_ENGINE_ID 0xFFU

+/* CCQ status 6 is used to store checkpoint value */
+#define PVA_REG_CCQ_STATUS6_IDX 6
 #define PVA_CFG_CCQ_STATUS_COUNT 9
 #define PVA_CFG_CCQ_BLOCK_SIZE 0x10000

@@ -140,4 +142,9 @@

 #define PVA_KMD_TEST_MODE_ENV_VAR "PVA_TEST_MODE"

+#define PVA_NS_PER_TSC_TICK_T26X 1U
+#define PVA_NS_PER_TSC_TICK_T23X 32U
+
+#define PVA_VERSION_BUFFER_SIZE 256U
+
 #endif // PVA_CONSTANTS_H
--- a/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h
+++ b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h
@@ -4,7 +4,6 @@
 #ifndef PVA_MATH_UTILS_H
 #define PVA_MATH_UTILS_H
 #include "pva_plat_faults.h"
-
 typedef enum {
 	MATH_OP_SUCCESS,
 	MATH_OP_ERROR,
@@ -766,26 +765,6 @@ MAX_DEFINE(a, b, u64, uint64_t)
 MAX_DEFINE(a, b, s32, int32_t)
 MAX_DEFINE(a, b, s64, int64_t)

-static inline uint64_t tsc_to_ns(uint64_t tsc)
-{
-	return safe_mulu64(tsc, 32);
-}
-
-static inline uint64_t tsc_to_us(uint64_t tsc)
-{
-	return tsc_to_ns(tsc) / 1000;
-}
-
-static inline uint64_t ns_to_tsc(uint64_t ns)
-{
-	return ns / 32;
-}
-
-static inline uint64_t us_to_tsc(uint64_t us)
-{
-	return ns_to_tsc(safe_mulu64(us, 1000));
-}
-
 /**
 * @brief Generates a 64-bit mask based on the specified start position, count, and density.
 *
@@ -923,4 +902,37 @@ static inline uint64_t pva_pl_to_bl_offset(uint64_t pl_offset,
 	return addu64((uint64_t)gobBase, gobOffset, math_error);
 }

+static inline int syncobj_reached_threshold(uint32_t value, uint32_t threshold)
+{
+	/*
+	* We're interested in "value >= threshold" but need to take wraparound
+	* into account. Ideally signed arithmetic of (value - threshold) >= 0
+	* should do, which can handle max wrap difference of half the uint
+	* range.
+	*/
+	uint32_t a = threshold;
+	uint32_t b = value;
+	uint32_t two_pow_31 = 0x80000000u;
+	uint32_t c = 0u;
+	uint32_t distance_ab;
+	uint32_t distance_ac;
+
+	if (a < two_pow_31) {
+		c = a + two_pow_31;
+	} else {
+		c = a & 0x7FFFFFFFu;
+	}
+
+	/* If we imagine numbers between 0 and (1<<32)-1 placed along a circle,
+	* then a-b is exactly the distance from b to a along the circle moving
+	* clockwise. This test checks that the distance between a and b is
+	* strictly smaller than the distance between a and c.
+	*/
+
+	/* Underflow of unsigned value, if happens, is intentional. */
+	distance_ab = b - a;
+	distance_ac = c - a;
+	return (distance_ab < distance_ac);
+}
+
 #endif
--- a/drivers/video/tegra/host/pva/src/private_api/pva_version.h
+++ b/drivers/video/tegra/host/pva/src/private_api/pva_version.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef PVA_VERSION_H
+#define PVA_VERSION_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** rief Specifies the PVA system software commit ID. */
+#define PVA_SYSSW_COMMIT_ID "b284345610cebb0cc1a9cd85a91298d50e4a7d68"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // PVA_VERSION_H
+