pva: deploy V3 KMD

Jira PVAAS-17785

Change-Id: I8ebc4c49aec209c5f82c6725605b62742402500a
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3299880
Tested-by: Nan Wang <nanwa@nvidia.com>
Reviewed-by: Vishwas M <vishwasm@nvidia.com>
Reviewed-by: Mohnish Jain <mohnishj@nvidia.com>
GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
Reviewed-by: Nan Wang <nanwa@nvidia.com>
This commit is contained in:
Nan Wang
2025-02-10 14:20:57 -08:00
committed by Jon Hunter
parent b5d768302a
commit b63a822a1b
113 changed files with 22508 additions and 0 deletions

View File

@@ -0,0 +1,104 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: GPL-2.0-only
#
# This program is free software; you can redistribute it and/or modify it
# under the terms and conditions of the GNU General Public License,
# version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
ifndef CONFIG_TEGRA_SYSTEM_TYPE_ACK
ifeq ($(CONFIG_TEGRA_OOT_MODULE),m)
ifeq ($(findstring ack_src,$(NV_BUILD_KERNEL_OPTIONS)),)
obj-m := nvhost-pva.o
PVA_SYS_DIR := .
PVA_SYS_ABSDIR := $(srctree.nvidia-oot)/drivers/video/tegra/host/pva
###### Begin generated section ######
pva_objs += \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_block_allocator.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_cmdbuf.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_context.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_debugfs.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_device.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_binding.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_validate.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_write.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_fw_debug.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_fw_profiler.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_hwseq_validate.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_msg.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_op_handler.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_pm.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_queue.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_resource_table.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_sha256.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_boot.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_elf_parser.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_executable.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_hwpm.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_isr.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_misc.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_submitter.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_t23x.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_t26x.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_tegra_stats.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_utils.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_vpu_app_auth.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_vpu_ocd.o \
$(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_debugfs.o \
$(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_device.o \
$(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_device_memory.o \
$(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_driver.o \
$(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_ioctl.o \
$(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_isr.o \
$(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_misc.o \
$(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_smmu.o \
$(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_vpu_app_auth.o \
pva_inc_flags += \
-I$(PVA_SYS_ABSDIR)/src/fw/baremetal/include \
-I$(PVA_SYS_ABSDIR)/src/fw/include \
-I$(PVA_SYS_ABSDIR)/src/include \
-I$(PVA_SYS_ABSDIR)/src/kmd/common \
-I$(PVA_SYS_ABSDIR)/src/kmd/common/shim \
-I$(PVA_SYS_ABSDIR)/src/kmd/include \
-I$(PVA_SYS_ABSDIR)/src/kmd/linux/include \
-I$(PVA_SYS_ABSDIR)/src/libs/pva/include \
pva_def_flags += \
-DPVA_BUILD_MODE=PVA_BUILD_MODE_L4T \
-DPVA_BUILD_MODE_BAREMETAL=5 \
-DPVA_BUILD_MODE_L4T=3 \
-DPVA_BUILD_MODE_NATIVE=1 \
-DPVA_BUILD_MODE_QNX=2 \
-DPVA_BUILD_MODE_SIM=4 \
-DPVA_DEV_MAIN_COMPATIBLE=1 \
-DPVA_ENABLE_CUDA=1 \
-DPVA_IS_DEBUG=0 \
-DPVA_SAFETY=0 \
-DPVA_SKIP_SYMBOL_TYPE_CHECK \
-DPVA_SUPPORT_XBAR_RAW=1 \
-Dpva_kmd_linux_dummy_EXPORTS \
###### End generated section ######
nvhost-pva-objs += $(pva_objs)
ccflags-y += $(pva_inc_flags)
ccflags-y += $(pva_def_flags)
ccflags-y += -std=gnu11
endif
endif
endif

View File

@@ -0,0 +1,196 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*/
/*
* Unit: Utility Unit
* SWUD Document:
* p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
*/
#ifndef PVA_BIT_H
#define PVA_BIT_H
/*
* Bit manipulation macros
*/
/**
* @brief Number of bits per byte.
*/
#define PVA_BITS_PER_BYTE (8UL)
/**
* @defgroup PVA_BIT8_HELPER
*
* @brief Bit Manipulation macros for number which is of type uint8_t.
* Parameter that convey the bit position should be in the range
* of 0 to 7 inclusive.
* Parameter with respect to MSB and LSB should satisfy the conditions
* of both being in the range of 0 to 7 inclusive with MSB greater than LSB.
* @{
*/
/**
* @brief Macro to set a given bit position in a variable of type uint8_t.
*/
#define PVA_BIT8(_b_) ((uint8_t)(((uint8_t)1U << (_b_)) & 0xffu))
//! @cond DISABLE_DOCUMENTATION
/**
* @brief Macro used to generate a bit-mask from MSB to LSB in a uint8_t variable.
* This macro sets all the bits from MSB to LSB.
*/
#define PVA_MASK8(_msb_, _lsb_) \
((uint8_t)((((PVA_BIT8(_msb_) - 1U) | PVA_BIT8(_msb_)) & \
~(PVA_BIT8(_lsb_) - 1U)) & \
0xffu))
//! @endcond
/** @} */
/**
* @defgroup PVA_BIT16_HELPER
*
* @brief Bit Manipulation macros for number which is of type uint16_t.
* Parameter that convey the bit position should be in the range
* of 0 to 15 inclusive.
* Parameter with respect to MSB and LSB should satisfy the conditions
* of both being in the range of 0 to 15 inclusive with MSB greater than LSB.
* @{
*/
/**
* @brief Macro to set a given bit position in a 16 bit number.
*/
#define PVA_BIT16(_b_) ((uint16_t)(((uint16_t)1U << (_b_)) & 0xffffu))
/**
* @brief Macro to mask a range(MSB to LSB) of bit positions in a 16 bit number.
* This will set all the bit positions in specified range.
*/
#define PVA_MASK16(_msb_, _lsb_) \
((uint16_t)((((PVA_BIT16(_msb_) - 1U) | PVA_BIT16(_msb_)) & \
~(PVA_BIT16(_lsb_) - 1U)) & \
0xffffu))
//! @cond DISABLE_DOCUMENTATION
/**
* @brief Macro to extract bits from a 16 bit number.
* The bits are extracted from the range provided and the extracted
* number is finally type-casted to the type provided as argument.
*/
#define PVA_EXTRACT16(_x_, _msb_, _lsb_, _type_) \
((_type_)(((_x_)&PVA_MASK16((_msb_), (_lsb_))) >> (_lsb_)))
//! @endcond
/**
* @brief Macro used to generate a bit-mask from MSB to LSB in a uint16_t variable.
* This macro sets all the bits from MSB to LSB.
*/
#define PVA_INSERT16(_x_, _msb_, _lsb_) \
((((uint16_t)(_x_)) << (_lsb_)) & PVA_MASK16((_msb_), (_lsb_)))
/** @} */
/**
* @defgroup PVA_BIT32_HELPER
*
* @brief Bit Manipulation macros for number which is of type uint32_t.
* Parameter that convey the bit position should be in the range
* of 0 to 31 inclusive.
* Parameter with respect to MSB and LSB should satisfy the conditions
* of both being in the range of 0 to 31 inclusive with MSB greater than LSB.
* @{
*/
/**
* @brief Macro to set a given bit position in a 32 bit number.
*/
#define PVA_BIT(_b_) ((uint32_t)(((uint32_t)1U << (_b_)) & 0xffffffffu))
/**
* @brief Macro to mask a range(MSB to LSB) of bit positions in a 32 bit number.
* This will set all the bit positions in specified range.
*/
#define PVA_MASK(_msb_, _lsb_) \
(((PVA_BIT(_msb_) - 1U) | PVA_BIT(_msb_)) & ~(PVA_BIT(_lsb_) - 1U))
/**
* @brief Macro to extract bits from a 32 bit number.
* The bits are extracted from the range provided and the extracted
* number is finally type-casted to the type provided as argument.
*/
#define PVA_EXTRACT(_x_, _msb_, _lsb_, _type_) \
((_type_)(((_x_)&PVA_MASK((_msb_), (_lsb_))) >> (_lsb_)))
/**
* @brief Macro to insert a range of bits from a given 32 bit number.
* Range of bits are derived from the number passed as argument.
*/
#define PVA_INSERT(_x_, _msb_, _lsb_) \
((((uint32_t)(_x_)) << (_lsb_)) & PVA_MASK((_msb_), (_lsb_)))
/** @} */
/**
* @defgroup PVA_BIT64_HELPER
*
* @brief Bit Manipulation macros for number which is of type uint64_t.
* Parameter that convey the bit position should be in the range
* of 0 to 63 inclusive.
* Parameter with respect to MSB and LSB should satisfy the conditions
* of both being in the range of 0 to 63 inclusive with MSB greater than LSB.
* @{
*/
/**
* @brief Macro to set a given bit position in a 64 bit number.
*/
#define PVA_BIT64(_b_) \
((uint64_t)(((uint64_t)1UL << (_b_)) & 0xffffffffffffffffu))
/**
* @brief Macro used to generate a bit-mask from (MSB to LSB) in a uint64_t variable.
* This macro sets all the bits from MSB to LSB.
*/
#define PVA_MASK64(_msb_, _lsb_) \
(((PVA_BIT64(_msb_) - (uint64_t)1U) | PVA_BIT64(_msb_)) & \
~(PVA_BIT64(_lsb_) - (uint64_t)1U))
/**
* @brief Macro to extract bits from a 64 bit number.
* The bits are extracted from the range provided and the extracted
* number is finally type-casted to the type provided as argument.
*/
#define PVA_EXTRACT64(_x_, _msb_, _lsb_, _type_) \
((_type_)(((_x_)&PVA_MASK64((_msb_), (_lsb_))) >> (_lsb_)))
/**
* @brief Macro to insert a range of bits into a 64 bit number.
* The bits are derived from the number passed as argument.
*/
#define PVA_INSERT64(_x_, _msb_, _lsb_) \
((((uint64_t)(_x_)) << (_lsb_)) & PVA_MASK64((_msb_), (_lsb_)))
/**
* @brief Macro to pack a 64 bit number.
* A 64 bit number is generated that has first 32 MSB derived from
* upper 32 bits of passed argument and has lower 32MSB derived from
* lower 32 bits of another passed argument.
*/
#define PVA_PACK64(_l_, _h_) \
(PVA_INSERT64((_h_), 63U, 32U) | PVA_INSERT64((_l_), 31U, 0U))
/**
* @brief Macro to extract the higher 32 bits from a 64 bit number.
*/
#define PVA_HI32(_x_) ((uint32_t)(((_x_) >> 32U) & 0xFFFFFFFFU))
/**
* @brief Macro to extract the lower 32 bits from a 64 bit number.
*/
#define PVA_LOW32(_x_) ((uint32_t)((_x_)&0xFFFFFFFFU))
/** @} */
#endif

View File

@@ -0,0 +1,316 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*/
/*
* Unit: Utility Unit
* SWUD Document:
* p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
*/
#ifndef PVA_CHECKPOINT_H
#define PVA_CHECKPOINT_H
/**
* @file pva-checkpoint.h
* @brief Defines macros to create a checkpoint
*/
/**
* @defgroup PVA_CHECKPOINT_MACROS Macros to define a checkpoint
*
* @brief Checkpoints are the 32-bit status values that can be written to status
* register during R5's execution. The 32-bit value is divided into four 8-bit values.
* These are:
* - major code: major aspect (usually a unit) of the uCode. Bit Position: [31:24]
* Valid values are defined at @ref PVA_CHECKPOINT_MAJOR_CODES.
* - minor code: minor aspect (usually a function) of the uCode.The interpretation of the
* minor value is determined by the major value. Bit Position: [23:16]
* - flags: flags indicating type of the checkpoint such as error checkpoint,
* performance checkpoint, checkpoint indicating start of an operation,
* checkpoint indicating end of an operation etc. Bit Position: [15:8]
* Valid values are defined at @ref PVA_CHECKPOINT_FLAGS.
* - sequence: disambiguate multiple checkpoints within a minor code or to convey additional
* information. The interpretation of the sequence value is determined by both the
* major and minor values. Bit Position: [7:0]
* Valid values are any values from 0 to UINT8_MAX
* @{
*/
/**
* @defgroup PVA_CHECKPOINT_MAJOR_CODES
* @brief Macros to define the major code field of the checkpoint @ingroup PVA_CHECKPOINT_MACROS
* @{
*/
/*
* Operational major codes
*/
/**
* @brief Major code for PVA during Boot.
*/
#define PVA_CHK_MAIN (0x01U)
//! @endcond
/**
* @brief Error related major codes
*/
#define PVA_CHK_ABORT (0xFFU)
/** @} */
/**
* @defgroup PVA_CHECKPOINT_HW_STATE_MINOR_CODES
* @brief Macros to define the minor code field of the checkpoints with major code PVA_CHK_HW_STATE
* @ingroup PVA_CHECKPOINT_MACROS
*
* @{
*/
/**
* @brief Minor code while doing a MMIO HW state check.
*/
#define PVA_CHK_HW_STATE_MMIO (0x01U)
/**
* @brief Minor code while doing a VIC HW state check.
*/
#define PVA_CHK_HW_STATE_VIC (0x02U)
/**
* @brief Minor code while doing a ARM register HW state check.
*/
#define PVA_CHK_HW_STATE_ARM (0x03U)
/**
* @brief Minor code while doing a MPU HW state check.
*/
#define PVA_CHK_HW_STATE_MPU (0x04U)
/**
* @brief Minor code while doing a DMA HW state check.
*/
#define PVA_CHK_HW_STATE_DMA (0x05U)
/**
* @brief Minor code while doing a VIC HW state check.
*/
#define PVA_CHK_HW_STATE_GOLDEN (0x06U)
/** @} */
/** @} */
/**
* @defgroup PVA_ABORT_REASONS
*
* @brief Macros to define the abort reasons
* @{
*/
/**
* @brief Minor code for abort due to assert.
*/
#define PVA_ABORT_ASSERT (0x01U)
/**
* @brief Minor code for abort in case pva main call fails.
*/
#define PVA_ABORT_FALLTHRU (0x02U)
/**
* @brief Minor code for abort in case of fatal IRQ.
*/
#define PVA_ABORT_IRQ (0x05U)
/**
* @brief Minor code for abort in case of MPU failure.
*/
#define PVA_ABORT_MPU (0x06U)
/**
* @brief Minor code for abort in case of ARM exception.
*/
#define PVA_ABORT_EXCEPTION (0x07U)
/**
* @brief Minor code for abort in case of un-supported SID read.
*/
#define PVA_ABORT_UNSUPPORTED (0x09U)
/**
* @brief Minor code for abort in case of DMA failures.
*/
#define PVA_ABORT_DMA_TASK (0x0cU)
/**
* @brief Minor code for abort in case of WDT failures.
* Note: This code is not reported to HSM.
*/
#define PVA_ABORT_WATCHDOG (0x0eU)
//! @endcond
/**
* @brief Minor code for abort in case of VPU init failures.
*/
#define PVA_ABORT_VPU (0x0fU)
/**
* @brief Minor code for abort in case of DMA MISR setup failures.
*/
#define PVA_ABORT_DMA (0x10U)
//! @cond DISABLE_DOCUMENTATION
/**
* @brief Minor code for abort in case of Mbox errors.
* Note: This is used only in T19x
*/
#define PVA_ABORT_MBOX_WAR (0x12U)
//! @endcond
/**
* @brief Minor code for abort in case of AISR errors.
*/
#define PVA_ABORT_AISR_QUEUE (0x14U)
/**
* @brief Minor code for abort in case of bad task.
*/
#define PVA_ABORT_BAD_TASK (0x15U)
//! @cond DISABLE_DOCUMENTATION
/**
* @brief Minor code for abort in case of PPE init failures.
* Note: This is only used in T26x
*/
#define PVA_ABORT_PPE (0x16U)
//! @endcond
/**
* @brief Minor code for abort in case of RAMIC failures.
*/
#define PVA_ABORT_RAMIC (0x20U)
/**
* @brief Minor Code for SEC for safety errors.
* Note: This code is not reported to HSM.
*/
#define PVA_ABORT_SEC_SERR (0x21U)
/**
* @brief Minor Code for SEC for functional errors.
* Note: This code is not reported to HSM.
*/
#define PVA_ABORT_SEC_FERR (0x22U)
/**
* @brief Minor code for abort in case of firewall decode error.
*/
#define PVA_ABORT_L2SRAM_FWDEC (0x23U)
/**
* @brief Minor code for abort in case of kernel panic.
*/
#define PVA_ABORT_KERNEL_PANIC (0x30U)
/**
* @brief Minor code for abort in case of Batch Timeout.
*/
#define PVA_ABORT_BATCH_TIMEOUT (0x40U)
/**
* @brief Minor code for abort in case of DMA Transfer Timeout.
* while in launch phase for the VPU)
*/
#define PVA_ABORT_DMA_SETUP_TIMEOUT (0x41U)
//! @cond DISABLE_DOCUMENTATION
/**
* @brief Minor code used when NOC BIST is run.
* Note: This is only used in T19x
*/
#define PVA_ABORT_NOC_BIST (0xfcU)
//! @endcond
/** @} */
/**
* @defgroup PVA_ABORT_ARGUMENTS Macros to define the argument for pva_abort operation
*
* @brief Argument of pva_abort operation is updated in status register
*
*/
/**
* @defgroup PVA_ABORT_ARGUMENTS_MPU
* @brief Argument to pva_abort() from MPU operations
* @ingroup PVA_ABORT_ARGUMENTS
* @{
*/
/**
* @brief Minor code when there is an error while configuring MPU.
*/
#define PVA_ABORT_MPU_CONFIG (0xE001U)
/**
* @brief Minor code when there is an error while initializing MPU.
*/
#define PVA_ABORT_MPU_INIT (0xE002U)
/** @} */
/**
* @defgroup PVA_ABORT_ARGUMENTS_VPU
* @brief Argument to pva_abort() from VPU operations
* @ingroup PVA_ABORT_ARGUMENTS
* @{
*/
/**
* @brief Minor code when VPU is in debug state.
*/
#define PVA_ABORT_VPU_DEBUG (0xE001U)
/** @} */
/**
* @defgroup PVA_ABORT_ARGUMENTS_PPE
* @brief Argument to pva_abort() from PPE operations
* @ingroup PVA_ABORT_ARGUMENTS
* @{
*/
/**
* @brief Minor code when PPE is in debug state.
*/
#define PVA_ABORT_PPE_DEBUG (0xE002U)
/** @} */
/**
* @brief Minor Code when DMA state is not idle to perform
* DMA MISR setup.
*/
#define PVA_ABORT_DMA_MISR_BUSY (0xE001U)
/**
* @brief Minor Code in DMA when MISR has timed out
*/
#define PVA_ABORT_DMA_MISR_TIMEOUT (0xE002U)
/**
* @defgroup PVA_ABORT_ARGUMENTS_IRQ Argument to pva_abort() from IRQs
* @ingroup PVA_ABORT_ARGUMENTS
* @{
*/
/**
* @brief Minor Code for Command FIFO used by Interrupt Handler.
*/
#define PVA_ABORT_IRQ_CMD_FIFO (0xE001U)
#if (0 == DOXYGEN_DOCUMENTATION)
#define PVA_ABORT_IRQ_TEST_HOST (0xE002U)
#endif
/** @} */
#endif

View File

@@ -0,0 +1,231 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*/
/*
* Unit: Utility Unit
* SWUD Document:
* p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
*/
#ifndef PVA_CONFIG_H
#define PVA_CONFIG_H
#include <pva-types.h>
#include "pva_fw_constants.h"
/**
* @defgroup PVA_CONFIG_PARAMS
*
* @brief PVA Configuration parameters.
* @{
*/
/**
* @brief Queue id for queue0.
*/
#define PVA_FW_QUEUE_0 (0U)
/**
* @brief Total number of queues that are present
* for communication between KMD and FW.
*/
#define PVA_NUM_QUEUES (8U)
/**
* @brief Maximum queue id value in PVA System.
*/
#define PVA_MAX_QUEUE_ID (PVA_NUM_QUEUES - 1U)
/**
* @brief Maximum number of tasks that is supported by a queue.
*/
#define MAX_QUEUE_DEPTH (256U)
/**
* @brief Number of Hardware Semaphore registers in PVA System.
*/
#define PVA_NUM_SEMA_REGS (4U)
/**
* @brief Number of Hardware Mailbox registers in PVA System.
*/
#define PVA_NUM_MBOX_REGS (8U)
/**
* @brief Maximum number of Pre-Actions for a task.
*/
#define PVA_MAX_PREACTIONS (26U)
/**
* @brief Maximum number of Post-Actions for a task.
*/
#define PVA_MAX_POSTACTIONS (28U)
//! @cond DISABLE_DOCUMENTATION
/**
* @brief Maximum number of DMA channels for T26x.
*/
#define PVA_NUM_DMA_CHANNELS_T26X (8U)
/**
* @brief Total number of AXI data buffers for T26x.
*/
#define PVA_NUM_DMA_ADB_BUFFS_T26X (304U)
/**
* @brief Number of reserved AXI data buffers for T26x.
*/
#define PVA_NUM_RESERVED_ADB_BUFFERS_T26X (16U)
/**
* @brief Number of dynamic AXI data buffers for T26x.
* These exclude the reserved AXI data buffers from total available ones.
*/
#define PVA_NUM_DYNAMIC_ADB_BUFFS_T26X \
(PVA_NUM_DMA_ADB_BUFFS_T26X - PVA_NUM_RESERVED_ADB_BUFFERS_T26X)
/**
* @brief Maximum number of DMA channels for T23x.
*/
#define PVA_NUM_DMA_CHANNELS_T23X (16U)
//! @endcond
/**
* @brief Number of DMA descriptors for T19x.
*/
#define PVA_NUM_DMA_DESCS_T19X (64U)
/**
* @brief Number of DMA descriptors for T23x.
*/
#define PVA_NUM_DMA_DESCS_T23X (64U)
/**
* @brief Number of DMA descriptors for T26x.
*/
#define PVA_NUM_DMA_DESCS_T26X (96U)
/**
* @brief Number of reserved DMA channels. These channels
* are reserved per DMA for R5 transfers. These channels
* will be used by R5 to transfer data which it needs.
*/
#define PVA_NUM_RESERVED_CHANNELS (1U)
/**
* @brief Number of dynamic DMA descriptors for T19x. These descriptors can be
* used by the VPU application transfer data. These exclude
* the reserved descriptors from total available ones.
*/
#define PVA_NUM_DYNAMIC_DESCS_T19X \
(PVA_NUM_DMA_DESCS_T19X - PVA_NUM_RESERVED_DESCRIPTORS)
/**
* @brief Number of dynamic DMA descriptors for T23x. These descriptors can be
* used by the VPU application transfer data. These exclude
* the reserved descriptors from total available ones.
*/
#define PVA_NUM_DYNAMIC_DESCS_T23X \
(PVA_NUM_DMA_DESCS_T23X - PVA_NUM_RESERVED_DESCRIPTORS)
/**
* @brief Number of dynamic DMA descriptors for T26x. These descriptors can be
* used by the VPU application transfer data. These exclude
* the reserved descriptors from total available ones.
*/
#define PVA_NUM_DYNAMIC_DESCS_T26X \
(PVA_NUM_DMA_DESCS_T26X - PVA_NUM_RESERVED_DESCRIPTORS)
/**
* Note: T26x will be brought up first on Linux, and then on QNX. To support this,
* the following macro is needed so that the QNX driver can build without requiring
* any changes.
*/
#define PVA_NUM_DYNAMIC_DESCS (PVA_NUM_DYNAMIC_DESCS_T23X)
/**
* @brief Number of reserved AXI data buffers for T23x.
*/
#define PVA_NUM_RESERVED_ADB_BUFFERS_T23X (16U)
/**
* @brief Number of reserved VMEM data buffers.
*/
#define PVA_NUM_RESERVED_VDB_BUFFERS (0U)
/**
* @brief Total number of VMEM data buffers.
*/
#define PVA_NUM_DMA_VDB_BUFFS (128U)
/**
* @brief Total number of AXI data buffers for T23x.
*/
#define PVA_NUM_DMA_ADB_BUFFS_T23X (272U)
/**
* @brief Number of dynamic AXI data buffers for T23x.
* These exclude the reserved AXI data buffers from total available ones.
*/
#define PVA_NUM_DYNAMIC_ADB_BUFFS_T23X \
(PVA_NUM_DMA_ADB_BUFFS_T23X - PVA_NUM_RESERVED_ADB_BUFFERS_T23X)
/**
* @brief Number of dynamic VMEM data buffers for T23x.
* These exclude the reserved VMEM data buffers from total available ones.
*/
#define PVA_NUM_DYNAMIC_VDB_BUFFS \
(PVA_NUM_DMA_VDB_BUFFS - PVA_NUM_RESERVED_VDB_BUFFERS)
/**
* @brief The first Reserved DMA descriptor. This is used as a
* starting point to iterate over reserved DMA descriptors.
*/
#define PVA_RESERVED_DESC_START (60U)
/**
* @brief The first Reserved AXI data buffers. This is used as a
* starting point to iterate over reserved AXI data buffers.
*/
#define PVA_RESERVED_ADB_BUFF_START PVA_NUM_DYNAMIC_ADB_BUFFS
/**
* @brief This macro has the value to be set by KMD in the shared semaphores
* @ref PVA_PREFENCE_SYNCPT_REGION_IOVA_SEM or @ref PVA_POSTFENCE_SYNCPT_REGION_IOVA_SEM
* if the syncpoint reserved region must not be configured as uncached
* in R5 MPU.
*/
#define PVA_R5_SYNCPT_REGION_IOVA_OFFSET_NOT_SET (0xFFFFFFFFU)
/** @} */
/**
* @defgroup PVA_CONFIG_PARAMS_T19X
*
* @brief PVA Configuration parameters exclusively for T19X.
* @{
*/
/**
* @brief Number of DMA channels for T19x or Xavier.
*/
#define PVA_NUM_DMA_CHANNELS_T19X (14U)
/**
* @brief Number of reserved AXI data buffers for T19x.
*/
#define PVA_NUM_RESERVED_ADB_BUFFERS_T19X (8U)
/**
* @brief Total number of AXI data buffers for T19x.
*/
#define PVA_NUM_DMA_ADB_BUFFS_T19X (256U)
/**
* @brief Number of dynamic AXI data buffers for T19x.
* These exclude the reserved AXI data buffers from total available ones.
*/
#define PVA_NUM_DYNAMIC_ADB_BUFFS_T19X \
(PVA_NUM_DMA_ADB_BUFFS_T19X - PVA_NUM_RESERVED_ADB_BUFFERS_T19X)
/** @} */
#endif

View File

@@ -0,0 +1,428 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*/
/*
* Unit: Utility Unit
* SWUD Document:
* p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
*/
#ifndef PVA_ERRORS_H
#define PVA_ERRORS_H
#include <stdint.h>
#include <pva-packed.h>
/**
* @brief PVA Error codes
*/
typedef uint16_t pva_errors_t;
/**
* @defgroup PVA_ERRORS
*
* @brief General and interface errors of PVA.
* @{
*/
/**
* @brief In case of no Error.
*/
#define PVA_ERR_NO_ERROR (0x0U)
/**
* @brief Error in case of an illegal command
* PVA FW executes commands that are found
* in the command look up table. If a command
* is not part of supported commands, this
* error will be returned. Valid commands can be
* referred at @ref pva_cmd_lookup_t.
*
*/
#define PVA_ERR_BAD_CMD (0x1U)
/**
* @brief Error in case of bad queue id, ie
* queue id that was requested is not available.
*/
#define PVA_ERR_BAD_QUEUE_ID (0x3U)
/**
* @brief Error in case of invalid pve-id. This
* error is generated if PVE id is greater
* than @ref PVA_NUM_PVE.
*/
#define PVA_ERR_BAD_PVE_ID (0x4U)
/**
* @brief Error in case when number of pre-actions
* are more than what can be accommodated.
*/
#define PVA_ERR_BUFF_TOO_SMALL (0x5U)
/**
* @brief Error in case when requested feature can not be satisfied.
* This error arises in scenarios where certain actions are
* not supported during execution of pre-actions or post-actions.
* For instance, @ref TASK_ACT_WRITE_STATUS is not supported in
* executing pre-actions of task.
*/
#define PVA_ERR_FEATURE_NOT_SUPPORTED (0x6U)
/**
* @brief Error in case when the address generated or translated does not
* meet the constraints like alignment or non-null.
*/
#define PVA_ERR_BAD_ADDRESS (0x9U)
/**
* @brief Error in case when timestamp is requested on un-supported action.
*/
#define PVA_ERR_BAD_TIME_VALUE (0xdU)
#if PVA_SAFETY == 0
/**
* @brief Error in case when the register provided to update
* the status is invalid.
*/
#define PVA_ERR_BAD_STATUS_REG (0x10U)
#endif
//! @endcond
/**
* @brief Error in case of bad task.
* In scenarios where task does not meet the
* necessary criteria like non-zero or 64 byte alignment.
* This error will be returned.
*/
#define PVA_ERR_BAD_TASK (0x15U)
/**
* @brief Error in case of invalid task action list. Invalid
* action list arises in scenarios like number of
* pre and post actions not being zero but actual
* pre or post action to be performed being NULL.
*/
#define PVA_ERR_BAD_TASK_ACTION_LIST (0x16U)
/**
* @brief Error when internal state of task is not as expected.
* A task goes through transition of various state while
* executing. In case when a state is not coherent with
* action being performed this error is returned.
* For example, task can not be in a running state
* while tear-down is being performed.
*/
#define PVA_ERR_BAD_TASK_STATE (0x17U)
/**
* @brief Error when there is a mis-match in input status and the actual status.
* This error occurs when there is a mis-match in status from @ref pva_gen_task_status_t
* and actual status that is populated by FW during task execution.
*/
#define PVA_ERR_TASK_INPUT_STATUS_MISMATCH (0x18U)
/**
* @brief Error in case of invalid parameters. These errors occur when
* parameters passed are invalid and is applicable for task parameters
* and DMA parameters.
*/
#define PVA_ERR_BAD_PARAMETERS (0x1aU)
/**
* @brief Error in case of when timed out occurred for batch of task.
*/
#define PVA_ERR_PVE_TIMEOUT (0x23U)
/**
* @brief Error when VPU has halted or turned off.
*/
#define PVA_ERR_VPU_ERROR_HALT (0x25U)
/**
* @brief Error after FW sends an abort signal to KMD. KMD will write into status buffers for
* pending tasks after FW sends an abort signal to KMD.
*/
#define PVA_ERR_VPU_BAD_STATE (0x28U)
/**
* @brief Error in case of exiting VPU.
*/
#define PVA_ERR_VPU_EXIT_ERROR (0x2aU)
//! @cond DISABLE_DOCUMENTATION
/**
* @brief Error in case of exiting PPE.
*/
#define PVA_ERR_PPE_EXIT_ERROR (0x2bU)
//! @endcond
/**
* @brief Error when a task running on PVE caused abort on PVE.
*/
#define PVA_ERR_PVE_ABORT (0x2dU)
/**
* @brief Error in case of Floating point NAN.
*/
//! @cond DISABLE_DOCUMENTATION
/**
* @brief Error in case of Floating point NAN.
*/
#define PVA_ERR_PPE_DIVIDE_BY_0 (0x34U)
/**
* @brief Error in case of Floating point NAN.
*/
#define PVA_ERR_PPE_ILLEGAL_DEBUG (0x36U)
#define PVA_ERR_PPE_ILLEGAL_INSTR_ALIGN (0x37U)
/**
* @brief Error in case of Bad cached DRAM segment.
*/
#define PVA_ERR_BAD_CACHED_DRAM_SEG (0x3aU)
/**
* @brief Error in case of Bad DRAM IOVA.
*/
#define PVA_ERR_BAD_DRAM_IOVA (0x3cU)
//! @endcond
/**
* @brief Error in case of Register mis-match.
*/
#define PVA_ERR_REG_MISMATCH (0x3dU)
/**
* @brief Error in case of AISR queue empty.
*/
#define PVA_ERR_AISR_INPUT_QUEUE_EMPTY (0x3fU)
/**
* @brief Error in case of AISR queue full.
*/
#define PVA_ERR_AISR_OUTPUT_QUEUE_FULL (0x40U)
#if (PVA_HAS_L2SRAM == 1)
/**
* @brief Error in case of L2SRAM allocation failed due to invalid parameters.
*/
#define PVA_ERR_BAD_L2SRAM_PARAMS (0x41U)
#endif
/**
* @brief Error in case of bad or invalid task parameters.
*/
#define PVA_ERR_BAD_TASK_PARAMS (0x42U)
/**
* @brief Error in case of invalid VPU system call.
*/
#define PVA_ERR_VPU_SYS_ERROR (0x43U)
/**
* @brief Error in case of HW Watchdog timer timeout
*/
#define PVA_ERR_WDT_TIMEOUT_ERROR (0x44U)
/**
* @brief Error in case Golden register check value mismatch.
*/
#define PVA_ERR_GR_REG_MISMATCH (0x45U)
/**
* @brief Error in case Critical register check value mismatch.
*/
#define PVA_ERR_CRIT_REG_MISMATCH (0x46U)
/** @} */
/**
* @defgroup PVA_DMA_ERRORS
*
* @brief DMA ERROR codes used across PVA.
* @{
*/
/**
* @brief Error when DMA transfer mode in DMA descriptor is invalid.
*/
#define PVA_ERR_DMA_TRANSFER_TYPE_INVALID (0x204U)
/**
* @brief Error when DMA transfer was not successful.
*/
#define PVA_ERR_DMA_CHANNEL_TRANSFER (0x207U)
/**
* @brief Error in case of BAD DMA descriptor.
*/
#define PVA_ERR_BAD_DMA_DESC_ID (0x208U)
/**
* @brief Error in case of BAD DMA channel ID.
*/
#define PVA_ERR_BAD_DMA_CHANNEL_ID (0x209U)
/**
* @brief Error in case of DMA timeout.
*/
#define PVA_ERR_DMA_TIMEOUT (0x20bU)
/**
* @brief Error when program trying to use channel is already active.
*/
#define PVA_ERR_DMA_INVALID_CONFIG (0x220U)
/**
* @brief Error in case DMA transfer was not successful.
*/
#define PVA_ERR_DMA_ERROR (0x221U)
/**
* @brief Error when number of bytes of HW Seq data copy is
* not a multiple of 4.
*/
#define PVA_ERR_DMA_HWSEQ_BAD_PROGRAM (0x216U)
/**
* @brief Error when number of bytes of HW Seq data copy is
* more than HW Seq RAM size.
*/
#define PVA_ERR_DMA_HWSEQ_PROGRAM_TOO_LONG (0x217U)
/** @} */
/**
* @defgroup PVA_MISR_ERRORS
*
* @brief MISR error codes used across PVA.
* @{
*/
/**
* @brief Error status when DMA MISR test is not run.
*/
#define PVA_ERR_MISR_NOT_RUN (0x280U)
/**
* @brief Error status when DMA MISR test did not complete.
*/
#define PVA_ERR_MISR_NOT_DONE (0x281U)
/**
* @brief Error status when DMA MISR test timed out.
*/
#define PVA_ERR_MISR_TIMEOUT (0x282U)
/**
* @brief Error status in case of DMA MISR test address failure.
*/
#define PVA_ERR_MISR_ADDR (0x283U)
/**
* @brief Error status in case of DMA MISR test data failure.
*/
#define PVA_ERR_MISR_DATA (0x284U)
/**
* @brief Error status in case of DMA MISR test data and address failure.
*/
#define PVA_ERR_MISR_ADDR_DATA (0x285U)
/** @} */
/**
* @defgroup PVA_VPU_ISR_ERRORS
*
* @brief VPU ISR error codes used across PVA.
* @{
*/
/**
* @defgroup PVA_FAST_RESET_ERRORS
*
* @brief Fast reset error codes used across PVA.
* @{
*/
/**
* @brief Error when VPU is not in idle state for a reset to be done.
*/
#define PVA_ERR_FAST_RESET_TIMEOUT_VPU (0x401U)
/**
* @brief Error if VPU I-Cache is busy before checking DMA engine for idle state.
*/
#define PVA_ERR_FAST_RESET_TIMEOUT_ICACHE1 (0x402U)
/**
* @brief Error if DMA channel is busy for a reset to be done.
*/
#define PVA_ERR_FAST_RESET_TIMEOUT_CH0 (0x403U)
/**
* @brief Error if VPU I-Cache is busy after checking DMA engine for idle state.
*/
#define PVA_ERR_FAST_RESET_TIMEOUT_ICACHE2 (0x419U)
#if (PVA_CHIP_ID == CHIP_ID_T26X)
/**
* @brief Error when PPE is not in idle state for a reset to be done.
*/
#define PVA_ERR_FAST_RESET_TIMEOUT_PPE (0x420U)
#endif
/** @} */
/**
* @defgroup PVA_L2SRAM_ERRORS
*
* @brief L2SRAM memory error codes used across PVA.
* @{
*/
/**
* @brief Error if l2sram memory allocation failed because of insufficient l2sram memory or
* if 2 chunks of memory are already allocated.
*/
#define PVA_ERR_ALLOC_FAILED (0x812U)
/**
* @brief Error if If l2sram address given for clearing/freeing is not a valid L2SRAM address
*/
#define PVA_ERR_FREE_FAILED (0x813U)
/** @} */
/**
* @defgroup PVA_INFO_ERRORS
*
* @brief Informational error codes.
* @{
*/
/**
* @brief Error when there is no task.
*/
#define PVA_ERR_NO_TASK (0x997U)
/**
* @brief Error when CCQ IRQ line enable on VIC fails
*/
#define PVA_ERR_CCQ_IRQ_ENABLE_FAILED (0x998U)
/**
* @brief Error when Mailbox IRQ line enable on VIC fails
*/
#define PVA_ERR_MBOX_IRQ_ENABLE_FAILED (0x999U)
/**
* @brief Error when L2SRAM IRQ line enable on VIC fails
*/
#define PVA_ERR_L2SRAM_IRQ_ENABLE_FAILED (0x99AU)
/**
* @brief Error when DMA0 IRQ line enable on VIC fails
*/
#define PVA_ERR_DMA0_IRQ_ENABLE_FAILED (0x99BU)
/**
* @brief Error when DMA1 IRQ line enable on VIC fails
*/
#define PVA_ERR_DMA1_IRQ_ENABLE_FAILED (0x99CU)
/**
* @brief Error when VPU IRQ line enable on VIC fails
*/
#define PVA_ERR_VPU_IRQ_ENABLE_FAILED (0x99DU)
/**
* @brief Error when SEC IRQ line enable on VIC fails
*/
#define PVA_ERR_SEC_IRQ_ENABLE_FAILED (0x99EU)
/**
* @brief Error when RAMIC IRQ line enable on VIC fails
*/
#define PVA_ERR_RAMIC_IRQ_ENABLE_FAILED (0x99FU)
/**
* @brief Error in case to try again.
* @note This error is internal to FW only.
*/
#define PVA_ERR_TRY_AGAIN (0x9A0U)
/** @} */
/* Never used */
#define PVA_ERR_MAX_ERR (0xFFFFU)
#endif /* _PVA_ERRORS_H_ */

View File

@@ -0,0 +1,189 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016-2022 NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited.
*/
/*
* Unit: Host Interface Unit
* SWUD Document:
* p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
*/
#ifndef PVA_FW_VERSION_H
#define PVA_FW_VERSION_H
#include <pva-bit.h>
/*
* Note: Below are doxygen comments with the @def command.
* This allows the comment to be physically distant from the define
* being documented. And allows for a single general comment that is
* regardless of the being assigned to the macro.
*/
/**
* @defgroup PVA_VERSION_TYPE_FLAGS VERSION_TYPE Bit Flags
*
* @brief The bit flags that indicate the qualities of the Built Firmware.
* e.g: Debug, Safety, Test Features, etc.
*
* @see VERSION_TYPE
* @{
*/
/**
* @def VERSION_CODE_DEBUG
* @brief Set or Clear the 'debug' bit for the FW version type value. For a safety
* build the value of this define will be zero.
*
* @details This bit is set if the macro @r PVA_DEBUG is defined.
* @see PVA_DEBUG
*/
#if PVA_DEBUG == 1
#define VERSION_CODE_DEBUG PVA_BIT(0)
#else
#define VERSION_CODE_DEBUG (0U)
#endif
/**
* @def VERSION_CODE_SAFETY
* @brief Set or Clear the 'safety' bit for the FW version type value. For a safety
* build the value of this define will be non-zero.
*
* @details This bit is set if the macro @r PVA_SAFETY is defined.
* Building for Safety disables certain functions that are used for debug, testing,
* or would otherwise pose a risk to system conforming to safety protocols such as ISO-26262 or
* ASPICE.
*
* @see PVA_SAFETY
*/
#if PVA_SAFETY == 1
#define VERSION_CODE_SAFETY PVA_BIT(1)
#else
#define VERSION_CODE_SAFETY (0U)
#endif
/**
* @def VERSION_CODE_PVA_TEST_SUPPORT
* @brief Set or Clear the 'test support' bit for the FW version type value.
*
* @details This bit is set if the macro @r TEST_TASK is defined.
* This bit is expected to be unset during a safety build.
*
* Building with tests support enabled may add additional commands to that
* can be processed by the FW to aid in testing of the system code. Often code of this
* nature can change the processing, memory, or timing characteristics of the system, and
* and should only enabled when explicitly needed.
*
*
* @see TEST_TASK
*/
#if TEST_TASK == 1
#define VERSION_CODE_PVA_TEST_SUPPORT PVA_BIT(2)
#else
#define VERSION_CODE_PVA_TEST_SUPPORT (0U)
#endif
/**
* @def VERSION_CODE_STANDALONE_TESTS
* @brief Set or Clear the 'standalone tests' bit for the FW version type value.
*
* @details This bit is set if the macro @r TEST_TASK is defined.
* This bit is expected to be unset during a safety build.
*
* @see TEST_TASK
*
*/
#if TEST_TASK == 1
#define VERSION_CODE_STANDALONE_TESTS PVA_BIT(3)
#else
#define VERSION_CODE_STANDALONE_TESTS (0U)
#endif
/** @} */
/**
* @defgroup PVA_VERSION_MACROS PVA version macros used to calculate the PVA
* FW binary version.
* @{
*/
/**
* @brief An 8-bit bit field that describes which conditionally compiled facets of the Firmware
* have been enabled.
*
* @details The value of this macro is used when constructing a 32-bit Firmware Version identifier.
*
@verbatim
| Bit | Structure Field Name | Condition for Enabling |
|:-----:|:----------------------:|:------------------------:|
| 0 | VERSION_CODE_DEBUG | This bit is set when the Firmware is built with @ref PVA_DEBUG defined as equalling 1.
| 1 | VERSION_CODE_SAFETY | This bit is set when the Firmware is built with @ref PVA_SAFETY defined equalling 1. |
| 2 | VERSION_CODE_PVA_TEST_SUPPORT | This bit is set when the Firmware is built with @ref TEST_TASK defined as equalling 1. |
| 3 | VERSION_CODE_STANDALONE_TESTS | This bit is set when the Firmware is built with @ref TEST_TASK defined equalling 1. |
| 4-7 | Reserved | The remaining bits of the bitfield are undefined. |
@endverbatim
* @see PVA_VERSION_TYPE_FLAGS
*/
#define VERSION_TYPE \
(uint32_t) VERSION_CODE_DEBUG | (uint32_t)VERSION_CODE_SAFETY | \
(uint32_t)VERSION_CODE_PVA_TEST_SUPPORT | \
(uint32_t)VERSION_CODE_STANDALONE_TESTS
/** @} */
/**
* @defgroup PVA_VERSION_VALUES PVA Major, Minor, and Subminor Version Values
*
* @brief The values listed below are applied to the corresponding fields when
* the PVA_VERSION macro is used.
*
* @see PVA_VERSION, PVA_MAKE_VERSION
* @{
*/
/**
* @brief The Major version of the Firmware
*/
#define PVA_VERSION_MAJOR 0x08
/**
* @brief The Minor version of the Firmware
*/
#define PVA_VERSION_MINOR 0x02
/**
* @brief The sub-minor version of the Firmware.
*/
#define PVA_VERSION_SUBMINOR 0x03
/** @} */
/**
* @def PVA_VERSION_GCID_REVISION
* @brief The GCID Revision of the Firmware.
*
* @details If this version is not otherwise defined during build time, this fallback value is used.
*/
#ifndef PVA_VERSION_GCID_REVISION
/**
* @brief GCID revision of PVA FW binary.
*/
#define PVA_VERSION_GCID_REVISION 0x00000000
#endif
/**
* @def PVA_VERSION_BUILT_ON
* @brief The date and time the version of software was built, expressed as the number
* of seconds since the Epoch (00:00:00 UTC, January 1, 1970).
*
* @details If this version is not otherwise defined during build time, this fallback value is used.
*/
#ifndef PVA_VERSION_BUILT_ON
#define PVA_VERSION_BUILT_ON 0x00000000
#endif
/** @} */
#endif

View File

@@ -0,0 +1,30 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited.
*/
/*
* Unit: Utility Unit
* SWUD Document:
* p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
*/
#ifndef PVA_PACKED_H
#define PVA_PACKED_H
/**
* @brief Packed attribute that avoids compiler to add any paddings.
* Compiler implicitly adds padding between the structure members
* to make it aligned. To avoid this packed attribute is used.
* Packed is for shared structures between KMD and FW.
* If packed is not used, then we depend on what padding the compiler adds.
* Since KMD and FW are compiled by two different compilers, we need to
* ensure that the offsets of each member of the structure is the same in
* both KMD and FW. To ensure this we pack the structure.
*/
#define PVA_PACKED __attribute__((packed))
#endif // PVA_PACKED_H

View File

@@ -0,0 +1,486 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2020-2023 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*/
/*
* Unit: Direct Memory Access Driver Unit
* SWUD Document:
* p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
*/
/**
* @file pva-sys-dma.h
*
* @brief Types and constants related to PVA DMA setup and DMA
* descriptors.
*/
#ifndef PVA_SYS_DMA_H
#define PVA_SYS_DMA_H
#include <stdint.h>
#include <pva-bit.h>
#include <pva-packed.h>
#include "pva_fw_dma_hw_interface.h"
/**
* @brief The version number of the current DMA info structure.
* This is used for detecting the DMA info updates for future
* HW releases.
*/
#define PVA_DMA_INFO_VERSION_ID (1U)
/**
* @brief Number of DMA done masks in DMA info structure,
* corresponding to the number of DMA_COMMON_DMA_OUTPUT_ENABLEx
* registers in the HW.
*/
#define PVA_SYS_DMA_NUM_TRIGGERS (9U)
/* NOTE : This must be kept as 15 for build to be
* successful, because in pva_fw_test we configure
* 15 channel, but internally we check if the
* number of channels requested is less than the
* maximum number of available channels */
/**
* @brief Maximum Number of DMA channel configurations
* in DMA info structure.
*/
#define PVA_SYS_DMA_NUM_CHANNELS (15U)
/**
* @brief Maximum number of DMA descriptors allowed
* for use for VPU for T23x
*/
#define PVA_SYS_DMA_MAX_DESCRIPTORS_T23X (60U)
/**
* @brief Maximum number of DMA descriptors allowed
* for use for VPU for T26x
*/
#define PVA_SYS_DMA_MAX_DESCRIPTORS_T26X (92U)
/**
* @brief DMA registers for VPU0 and VPU1 which are primarily
* used by DMA config and R5 initialization.
*
* For more information refer to section 3.4 in PVA Cluster IAS
* document (Document 11 in Supporting Documentation and References)
*/
/**
* @brief DMA channel base register for VPU0.
*/
#define PVA_DMA0_REG_CH_0 PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA0_REG_CH_0_BASE)
/**
* @brief DMA common base register for VPU0.
*/
#define PVA_DMA0_COMMON PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA0_COMMON_BASE)
/**
* @brief DMA DESCRAM base register for VPU0.
*/
#define PVA_DMA0_DESCRAM PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA0_DESCRAM_BASE)
/**
* @brief DMA channel base register for VPU1.
*/
#define PVA_DMA1_REG_CH_0 PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA1_REG_CH_0_BASE)
/**
* @brief DMA common base register for VPU1.
*/
#define PVA_DMA1_COMMON PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA1_COMMON_BASE)
/**
* @brief DMA DESCRAM base register for VPU1.
*/
#define PVA_DMA1_DESCRAM PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA1_DESCRAM_BASE)
/** @} */
/**
*
* @brief DMA channel configuration for a user task.
*
* The DMA channel structure contains the set-up of a
* PVA DMA channel used for the VPU app.
*
* This VPU app should configure the channel information
* in this format
*
* @note : For more information on channel configuration, refer section 4.1.2 and 6.4 in
* the DMA IAS document (Document 6 in Supporting Documentation and References)
*/
typedef struct PVA_PACKED {
/**
* @brief HW DMA channel number from 1 to @ref PVA_NUM_DMA_CHANNELS.
*/
uint8_t ch_number;
/**
* @brief Padding bytes of 3 added to align the next
* field of 4 bytes
*/
uint8_t pad_dma_channel1[3];
/**
* @brief The value to be written to DMA channel
* control 0 register
*/
uint32_t cntl0;
/**
* @brief The value to be written to DMA channel
* control 1 register
*/
uint32_t cntl1;
/**
* @brief The value to be written to DMA channel
* boundary pad register
*/
uint32_t boundary_pad;
/**
* @brief This value to be written to DMA HW sequence
* control register.
*/
uint32_t hwseqcntl;
/**
* @brief This field is unused in t19x and T23x.
* It contains the value to be written to DMA
* channel HWSEQFSCNTL register.
*/
uint32_t hwseqfscntl;
/**
* @brief Output enable mask
*/
uint32_t outputEnableMask;
/**
* @brief Padding 8 bytes to align the whole structure
* to 32 byte boundary
*/
uint32_t pad_dma_channel0[1];
} pva_dma_ch_config_t;
/**
*
* @brief DMA info for an application. The app maybe a VPU app which
* runs an algorithm on VPU or a DMA app which just has DMA configuration
* to move certain data. In both cases the application should
* configure the DMA information in this structure format
*
*/
typedef struct PVA_PACKED {
/**
* @brief The size of the dma_info structure.
* Should be populated with value sizeof(pva_dma_info_t)
* This is used to validate that the DRAM location populated
* by KMD is valid
*/
uint16_t dma_info_size;
/**
* @brief This field is used to populate the DMA Info version
* In case we need to create a new
* DMA version structure then the FW can distinguish the DMA
* info structure. Currently it should be populated with value
* @ref PVA_DMA_INFO_VERSION_ID
*/
uint16_t dma_info_version;
/**
* @brief The number of used channels. This field can
* be populated with values from 0 to
* @ref PVA_NUM_DMA_CHANNELS both inclusive.
*/
uint8_t num_channels;
/**
* @brief Number of used descriptors.
*
* Note: In generations of PVA where the reserved descriptor range lies
* in the middle of the entire descriptor range, when the range of
* descriptors requested by the user crosses over the reserved descriptor
* range, 'num_descriptors' will include the number of the reserved
* descriptors as well.
* E.g., if reserved descriptors are at indices 60-63 and user application
* needs 70 descriptors, 'num_descriptor' will equal 74. However,
* if user application needs 30 descriptors, 'num_descriptors' will be 30.
*
* On T19x and T23x, the field can be populated
* with values from 0 inclusive to less than
* @ref PVA_SYS_DMA_MAX_DESCRIPTORS
*
* On T26x, the field can be populated with values from 0 inclusive to
* @ref PVA_SYS_DMA_MAX_DESCRIPTORS + @ref PVA_NUM_RESERVED_DESCRIPTORS
*/
uint8_t num_descriptors;
/**
* @brief The number of bytes used in HW sequencer
*/
uint16_t num_hwseq;
/**
* @brief The First HW descriptor ID used.
*
* On T19x and T23x, the field can be populated
* with values from 0 inclusive to less than
* @ref PVA_SYS_DMA_MAX_DESCRIPTORS
*
* On T26x, the field can be populated with values from 0 inclusive to
* @ref PVA_SYS_DMA_MAX_DESCRIPTORS + @ref PVA_NUM_RESERVED_DESCRIPTORS
*/
uint8_t descriptor_id;
/**
* @brief Padding for alignment of next element
*/
uint8_t pva_dma_info_pad_0[3];
/**
* @brief DMA done triggers used by the VPU app.
* Correspond to COMMON_DMA_OUTPUT_ENABLE registers.
*/
uint32_t dma_triggers[PVA_SYS_DMA_NUM_TRIGGERS];
/**
* @brief DMA channel config used by the VPU app.
* One app can have upto @ref PVA_NUM_DMA_CHANNELS
* DMA channel configurations. The size of the array
* is @ref PVA_SYS_DMA_NUM_CHANNELS for additional
* configuration required for future products.
*/
pva_dma_ch_config_t dma_channels[PVA_SYS_DMA_NUM_CHANNELS];
/**
* @brief Value to be set in DMA common configuration register.
*/
uint32_t dma_common_config;
/**
* @brief IOVA to an array of @ref pva_dtd_t, aligned at 64 bytes
* which holds the DMA descriptors used by the VPU app
*/
pva_iova dma_descriptor_base;
/**
* @brief HW sequencer configuration base address.
*/
pva_iova dma_hwseq_base;
/**
* @brief IOVA to a structure of @ref pva_dma_misr_config_t,
* location where DMA MISR configuration information is stored.
*/
pva_iova dma_misr_base;
} pva_dma_info_t;
/**
* @brief DMA descriptor.
*
* PVA DMA Descriptor in packed HW format.
* The individual fields can be found from
* the DMA IAS document (Document 6 in Supporting Documentation and References)
* section 4.1.3.2
*/
typedef struct PVA_PACKED {
/** @brief TRANSFER_CONTROL0 byte has DSTM in lower 2 bits, SRC_TF in 3rd bit,
* DDTM in 4th to 6th bit,DST_TF in 7th bit */
uint8_t transfer_control0;
/** @brief Next descriptor ID to be executed*/
uint8_t link_did;
/** @brief Highest 8 bits of the 40 bit source address*/
uint8_t src_adr1;
/** @brief Highest 8 bits of the 40 bit destination address*/
uint8_t dst_adr1;
/** @brief Lower 32 bits of the 40 bit source address*/
uint32_t src_adr0;
/** @brief Lower 32 bits of the 40 bit destination address*/
uint32_t dst_adr0;
/** @brief Length of tile line*/
uint16_t tx;
/** @brief Number of tile lines*/
uint16_t ty;
/** @brief Source Line pitch to advance to every line of 2D tile.*/
uint16_t slp_adv;
/** @brief Destination Line Pitch to advance to every line of 2D tile.*/
uint16_t dlp_adv;
/** @brief SRC PT1 CNTL has st1_adv in low 24 bits and ns1_adv in high 8 bits. */
uint32_t srcpt1_cntl;
/** @brief DST PT1 CNTL has dt1_adv in low 24 bits and nd1_adv in high 8 bits. */
uint32_t dstpt1_cntl;
/** @brief SRC PT2 CNTL has st2_adv in low 24 bits and ns2_adv in high 8 bits. */
uint32_t srcpt2_cntl;
/** @brief DST PT2 CNTL has dt2_adv in low 24 bits and nd2_adv in high 8 bits. */
uint32_t dstpt2_cntl;
/** @brief SRC PT3 CNTL has st3_adv in low 24 bits and ns3_adv in high 8 bits. */
uint32_t srcpt3_cntl;
/** @brief DST PT3 CNTL has dt3_adv in low 24 bits and nd3_adv in high 8 bits. */
uint32_t dstpt3_cntl;
/** @brief Source circular buffer Start address offset */
uint16_t sb_start;
/** @brief Destination circular buffer Start address offset*/
uint16_t db_start;
/** @brief Source buffer size in bytes for circular buffer mode from Source address.*/
uint16_t sb_size;
/** @brief Destination buffer size in bytes for circular buffer mode from destination address.*/
uint16_t db_size;
/** @brief currently reserved*/
uint16_t trig_ch_events;
/** @brief SW or HW events used for triggering the channel*/
uint16_t hw_sw_trig_events;
/** @brief Tile x coordinates, for boundary padding in pixels*/
uint8_t px;
/** @brief Tile y coordinates, for boundary padding in pixels*/
uint8_t py;
/** @brief Transfer control byte has lower 2 bits as BPP data, bit 2 with PXDIR, bit 3 as PYDIR,
* bit 4 as BPE, bit 5 as TTS, bit 6 RSVD, Bit 7 ITC.
*/
uint8_t transfer_control1;
/** @brief Transfer control 2 gas bit 0 as PREFEN, bit 1 as DCBM, bit 2 as SCBM, Bit 3 to 3 as SBADR.*/
uint8_t transfer_control2;
/** @brief Circular buffer upper bits for start address and size*/
uint8_t cb_ext;
/** @brief Reserved*/
uint8_t rsvd;
/** @brief Full replicated destination base address in VMEM aligned to 64 byte atom*/
uint16_t frda;
} pva_dtd_t;
/**
*
* @brief DMA MISR configuration information. This information is used by R5
* to program MISR registers if a task requests MISR computation on its
* output DMA channels.
*
*/
typedef struct PVA_PACKED {
/** @brief Reference value for CRC computed on write addresses, i.e., MISR 1 */
uint32_t ref_addr;
/** @brief Seed value for address CRC*/
uint32_t seed_crc0;
/** @brief Reference value for CRC computed on first 256-bits of AXI write data */
uint32_t ref_data_1;
/** @brief Seed value for write data CRC*/
uint32_t seed_crc1;
/** @brief Reference value for CRC computed on second 256-bits of AXI write data */
uint32_t ref_data_2;
/**
* @brief MISR timeout value configured in DMA common register
* @ref PVA_DMA_COMMON_MISR_ENABLE. Timeout is calculated as
* number of AXI clock cycles.
*/
uint32_t misr_timeout;
} pva_dma_misr_config_t;
/**
* @defgroup PVA_DMA_TC0_BITS
*
* @brief PVA Transfer Control 0 Bitfields
*
* @{
*/
/**
* @brief The shift value for extracting DSTM field
*/
#define PVA_DMA_TC0_DSTM_SHIFT (0U)
/**
* @brief The mask to be used to extract DSTM field
*/
#define PVA_DMA_TC0_DSTM_MASK (7U)
/**
* @brief The shift value for extracting DDTM field
*/
#define PVA_DMA_TC0_DDTM_SHIFT (4U)
/**
* @brief The mask to be used to extract DDTM field
*/
#define PVA_DMA_TC0_DDTM_MASK (7U)
/** @} */
/**
* @defgroup PVA_DMA_TM
*
* @brief DMA Transfer Modes. These can be used for both
* Source (DSTM) and Destination (DDTM) transfer modes
*
* @note : For more information on transfer modes, refer section 4.1.3.1 in
* the DMA IAS document (Document 6 in Supporting Documentation and References)
*
* @{
*/
/**
* @brief To indicate invalid transfer mode
*/
#define PVA_DMA_TM_INVALID (0U)
/**
* @brief To indicate MC transfer mode
*/
#define PVA_DMA_TM_MC (1U)
/**
* @brief To indicate VMEM transfer mode
*/
#define PVA_DMA_TM_VMEM (2U)
#if ENABLE_UNUSED == 1U
#define PVA_DMA_TM_CVNAS (3U)
#endif
/**
* @brief To indicate L2SRAM transfer mode
*/
#define PVA_DMA_TM_L2RAM (3U)
/**
* @brief To indicate TCM transfer mode
*/
#define PVA_DMA_TM_TCM (4U)
/**
* @brief To indicate MMIO transfer mode
*/
#define PVA_DMA_TM_MMIO (5U)
/**
* @brief To indicate Reserved transfer mode
*/
#define PVA_DMA_TM_RSVD (6U)
/**
* @brief To indicate VPU configuration transfer mode.
* This is only available in Source transfer mode or
* (DSTM). In Destination transfer mode, this value is
* reserved.
*/
#define PVA_DMA_TM_VPU (7U)
/** @} */
#if (ENABLE_UNUSED == 1U)
/**
* @brief The macro defines the number of
* bits to shift right to get the PXDIR field
* in Transfer Control 1 register in DMA
* Descriptor
*/
#define PVA_DMA_TC1_PXDIR_SHIFT (2U)
/**
* @brief The macro defines the number of
* bits to shift right to get the PYDIR field
* in Transfer Control 1 register in DMA
* Descriptor
*/
#define PVA_DMA_TC1_PYDIR_SHIFT (3U)
#endif
/**
* @defgroup PVA_DMA_BPP
*
* @brief PVA DMA Bits per Pixel
*
* @{
*/
/**
* @brief To indicate that the size of pixel data
* is 1 byte
*/
#define PVA_DMA_BPP_INT8 (0U)
#if ENABLE_UNUSED == 1U
#define PVA_DMA_BPP_INT16 (1U)
#endif
/** @} */
/**
* @brief PVA DMA Pad X direction set to right
*/
#define PVA_DMA_PXDIR_RIGHT (1U)
/**
* @brief PVA DMA Pad Y direction set to bottom
*/
#define PVA_DMA_PYDIR_BOT (1U)
#endif /* PVA_SYS_DMA_H */

View File

@@ -0,0 +1,150 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2020-2023 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*/
/*
* Unit: Task Unit
* SWUD Document:
* p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
*/
/**
* @file pva-sys-params.h
*
* @brief Types and constants related to VPU application parameters.
*/
#ifndef PVA_SYS_PARAMS_H
#define PVA_SYS_PARAMS_H
#include <stdint.h>
#include <pva-packed.h>
#include <pva-types.h>
/** @brief VPU app parameters provided by kernel-user which is to be copied to
* VMEM during runtime
*
* The VPU App parameters contains kernel-user-provided data to be
* copied into the VMEM before executing the VPU app. The parameter
* headers are stored in the IOVA address stored in the param_base
* member of this structure.
*
* The FW can also initialize complex datatypes, which are marked by
* special param_base outside the normal IOVA space. See the structure
* pva_vpu_instance_data_t for an example.
*/
typedef struct PVA_PACKED {
/** @brief IOVA address of the parameter data */
pva_iova param_base;
/** @brief VMEM offset where parameter data is to be copied */
uint32_t addr;
/** @brief Size of the parameter data in bytes */
uint32_t size;
} pva_vpu_parameter_list_t;
/**
* @brief The structure holds information of various
* VMEM parameters that is submitted in the task.
*/
typedef struct PVA_PACKED {
/**
* @brief The IOVA address of the parameter data.
* This should point to an array of type @ref pva_vpu_parameter_list_t .
* If no parameters are present this should be set to 0
*/
pva_iova parameter_data_iova;
/**
* @brief The starting IOVA address of the parameter data whose size
* is lower than @ref PVA_DMA_VMEM_COPY_THRESHOLD . This data is copied
* from DRAM to TCM using DMA, and then memcopied to VMEM.
* If no small parameters are present this should be set to 0.
*/
pva_iova small_vpu_param_data_iova;
/**
* @brief The number of bytes of small VPU parameter data, i.e the
* data whose size is lower than @ref PVA_DMA_VMEM_COPY_THRESHOLD . If no small
* parameters are present, this should be set to 0
*/
uint32_t small_vpu_parameter_data_size;
/**
* @brief The index of the array of type @ref pva_vpu_parameter_list_t from which
* the VPU large parameters are present, i.e the vpu parameters whose size is greater
* than @ref PVA_DMA_VMEM_COPY_THRESHOLD . This value will always point to the index
* immediately after the small parameters. If no large parameter is present, then
* this field value will be same as the value of
* @ref pva_vpu_parameter_info_t.vpu_instance_parameter_list_start_index field
*/
uint32_t large_vpu_parameter_list_start_index;
/**
* @brief The index of the array of type @ref pva_vpu_parameter_list_t from which
* the VPU instance parameters are present. This value will always point to the index
* immediately after the large parameters if large parameters are present, else it
* will be the same value as @ref pva_vpu_parameter_info_t.large_vpu_parameter_list_start_index
* field.
*/
uint32_t vpu_instance_parameter_list_start_index;
} pva_vpu_parameter_info_t;
/** @brief Special marker for IOVA address of parameter data of a task to differentiate
* if the parameter data specified in task should be used or if FW should create a supported
* parameter data instance. If the IOVA address of parameter data is lesser than this
* special marker, then use the parameter data specified in the task, else FW
* creates the parameter data.
*/
#define PVA_COMPLEX_IOVA (0xDA7AULL << 48ULL)
/** @brief Macro used to create new parameter base markers
* from the special marker address @ref PVA_COMPLEX_IOVA
*/
#define PVA_COMPLEX_IOVA_V(v) (PVA_COMPLEX_IOVA | ((uint64_t)(v) << 32ULL))
/** @brief Special Marker for @ref pva_vpu_instance_data_t */
#define PVA_SYS_INSTANCE_DATA_V1_IOVA (PVA_COMPLEX_IOVA_V(1) | 0x00000001ULL)
/**
* @brief The minimuim size of the VPU parameter for it to be considered
* as a large parameter
*/
#define PVA_DMA_VMEM_COPY_THRESHOLD (uint32_t)(256U)
/**
* @brief The maximum combined size of all VMEM parameters
* that will be supported by PVA
*/
#define VMEM_PARAMETER_BUFFER_MAX_SIZE (uint32_t)(8192U)
/**
* @brief The maximum number of symbols that will be supported
* for one task
*/
#define TASK_VMEM_PARAMETER_MAX_SYMBOLS (uint32_t)(128U)
/**
* @brief Information of the VPU instance data passed to VPU kernel.
*/
typedef struct PVA_PACKED {
/** @brief ID of the VPU assigned to the task */
uint16_t pve_id;
/** @brief Variable to indicate that ppe task was launched or not */
uint16_t ppe_task_launched;
/** @brief Base of the VMEM memory */
uint32_t vmem_base;
/** @brief Base of the DMA descriptor SRAM memory */
uint32_t dma_descriptor_base;
/** @brief Base of L2SRAM allocated for the task executed */
uint32_t l2ram_base;
/** @brief Size of L2SRAM allocated for the task executed */
uint32_t l2ram_size;
} pva_vpu_instance_data_t;
#endif /* PVA_SYS_PARAMS_H */

View File

@@ -0,0 +1,44 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*/
/*
* Unit: Utility Unit
* SWUD Document:
* p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
*/
#ifndef PVA_TYPES_H
#define PVA_TYPES_H
#include <stdint.h>
/**
* @brief Used to represent address (IOVA) in PVA system.
*/
typedef uint64_t pva_iova;
/**
* @brief Used to store Queue IDs, that represent the
* actual hardware queue id between FW and KMD.
*/
typedef uint8_t pva_queue_id_t;
/**
* @brief Used to store PVE ID, that represents which
* PVE is being referred to .
*/
typedef uint8_t pva_pve_id_t;
/**
* @brief Used to store Status interface ID, that is used
* to know through which status needs to be written.
*/
typedef uint8_t pva_status_interface_id_t;
#endif

View File

@@ -0,0 +1,69 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016-2021 NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited.
*/
/*
* Unit: Host Interface Unit
* SWUD Document:
* p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
*/
#ifndef PVA_VERSION_H
#define PVA_VERSION_H
#include <stdint.h>
#include <pva-bit.h>
#include <pva-fw-version.h>
/**
* @brief Calculate a 32-bit build version with @ref PVA_VERSION_SUBMINOR,
* @ref PVA_VERSION_MINOR, @ref PVA_VERSION_MAJOR and @ref VERSION_TYPE macros.
*
* @param [in] \_type\_ an 8-bit bitfield containing flags indicating which compilation
* features were enabled when the firmware was compiled.
*
* @param [in] \_major\_ an unsigned, 8-bit value containing the major version of the
* compiled firmware.
*
* @param [in] \_minor\_ an unsigned, 8-bit value containing the minor version of the
* compiled firmware.
*
* @param [in] \_subminor\_ an unsigned, 8-bit value containing the sub-minor version
* of the compiled firmware.
@verbatim
| ------------- | ---------------------|
| Bit Ranges | Function |
| ------------- | ---------------------|
| 7-0 | subminor version |
| 15-8 | minor version |
| 23-16 | major version |
| 31-24 | version type |
----------------------------------------
@endverbatim
*/
#define PVA_MAKE_VERSION(_type_, _major_, _minor_, _subminor_) \
(PVA_INSERT(_type_, 31, 24) | PVA_INSERT(_major_, 23, 16) | \
PVA_INSERT(_minor_, 15, 8) | PVA_INSERT(_subminor_, 7, 0))
/**
* @brief Calculate PVA R5 FW binary version by calling @ref PVA_MAKE_VERSION macro.
*
* @param [in] \_type\_ an 8-bit bitfield containing flags indicating which compilation
* features were enabled when the firmware was compiled.
*
* @see VERSION_TYPE For details on how to construct the @p \_type\_ field.
*
* @see PVA_VERSION_MAJOR, PVA_VERSION_MINOR, PVA_VERSION_SUBMINOR for details
* on the values used at the time this documentation was produced.
*/
#define PVA_VERSION(_type_) \
PVA_MAKE_VERSION(_type_, PVA_VERSION_MAJOR, PVA_VERSION_MINOR, \
PVA_VERSION_SUBMINOR)
#endif

View File

@@ -0,0 +1,309 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited.
*/
/*
* Unit: VPU Unit
* SWUD Document:
* p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
*/
/**
* @file pva-vpu-syscall-interface.h
*
* @brief Syscall command specification
*
* VPU uses syscall commands to request services from R5. A syscall command is a
* 32bit value that consists of a 8 bit syscall ID and 24 bit parameter. If more
* information needs to be passed to R5, the parameter field will be a pointer
* to a VMEM location.
*/
#ifndef PVA_VPU_SYSCALL_INTERFACE_H
#define PVA_VPU_SYSCALL_INTERFACE_H
#include <stdint.h>
/**
* @defgroup PVA_VPU_SYSCALL
*
* @brief PVA VPU SYS call IDs for each type of
* SYS call.
* @{
*/
//! @cond DISABLE_DOCUMENTATION
/**
* @brief VPU Syscall id for vpu printf write.
*/
#define PVA_FW_PE_SYSCALL_ID_WRITE (1U)
//! @endcond
/**
* @brief VPU Syscall id for Icache prefetch.
*/
#define PVA_FW_PE_SYSCALL_ID_ICACHE_PREFETCH (2U)
/**
* @brief VPU Syscall id for masking exceptions.
*/
#define PVA_FW_PE_SYSCALL_ID_MASK_EXCEPTION (3U)
/**
* @brief VPU Syscall id for unmasking exceptions.
*/
#define PVA_FW_PE_SYSCALL_ID_UNMASK_EXCEPTION (4U)
//! @cond DISABLE_DOCUMENTATION
/**
* @brief VPU Syscall id for sampling VPU performance counters
*/
#define PVA_FW_PE_SYSCALL_ID_PERFMON_SAMPLE (5U)
//! @endcond
/** @} */
/**
* @defgroup PVA_VPU_SYSCALL_WRITE_PARAM_GROUP
*
* @brief Parameter specification for syscall write
*/
/**
* @defgroup PVA_VPU_SYSCALL_COMMAND_FIELDS_GROUP
*
* @brief The command format to be used while issuing vpu syscall command from VPU kernel to R5.
* The fields mentioned in this group is used for submitting the command
* through the Signal_R5 interface from VPU kernel.
*
* @{
*/
/**
* @brief The most significant bit of the vpu syscall ID field in
* the vpu syscall command interface
*/
#define PVA_FW_PE_SYSCALL_ID_MSB (31U)
/**
* @brief The least significant bit of the vpu syscall ID field in
* the vpu syscall command interface
*/
#define PVA_FW_PE_SYSCALL_ID_LSB (24U)
/**
* @brief The most significant bit of the vpu syscall parameter field in
* the vpu syscall command interface
*/
#define PVA_FW_PE_SYSCALL_PARAM_MSB (23U)
/**
* @brief The least significant bit of the vpu syscall parameter field in
* the vpu syscall command interface
*/
#define PVA_FW_PE_SYSCALL_PARAM_LSB (0U)
/** @} */
/**
* @defgroup PVA_VPU_SYSCALL_ICACHE_PREFETCH_PARAM_FIELDS_GROUP
*
* @brief The parameter format to be used while issuing vpu syscall command from VPU kernel to R5 for syscall icache prefetch.
* The fields mentioned in this group is used for submitting the icache prefetch command
* through the Signal_R5 interface from VPU kernel.
*
* @{
*/
/**
* @brief The most significant bit of the prefetch cache line count field in
* the vpu syscall command interface
*/
#define PVA_FW_PE_SYSCALL_PREFETCH_CACHE_LINE_COUNT_MSB (23U)
/**
* @brief The least significant bit of the prefetch cache line count field in
* the vpu syscall command interface
*/
#define PVA_FW_PE_SYSCALL_PREFETCH_CACHE_LINE_COUNT_LSB (16U)
/**
* @brief The most significant bit of the prefetch address field in
* the vpu syscall command interface
*/
#define PVA_FW_PE_SYSCALL_PREFETCH_ADDR_MSB (15U)
/**
* @brief The least significant bit of the prefetch address field in
* the vpu syscall command interface
*/
#define PVA_FW_PE_SYSCALL_PREFETCH_ADDR_LSB (0U)
/** @} */
/**
* @defgroup PVA_VPU_SYSCALL_MASK_UNMASK_PARAM_FIELDS_GROUP
*
* @brief The parameter format to be used while issuing vpu syscall command from VPU kernel
* to R5 for masking or unmasking FP NaN Exception.
* The fields mentioned in this group is used for submitting the mask and unmask FP NaN eception command
* through the Signal_R5 interface from VPU kernel.
*
* @{
*/
/**
* @brief Parameter specification for syscall mask/unmask exceptions
*/
#define PVA_FW_PE_MASK_FP_INV_NAN (1U << 2U)
/** @} */
/**
* @breif Write syscall parameter will be a pointer to this struct
* @{
*/
typedef union {
struct {
uint32_t addr;
uint32_t size;
} in;
struct {
uint32_t written_size;
} out;
} pva_fw_pe_syscall_write;
/** @} */
/**
* @defgroup PVA_VPU_SYSCALL_PERFMON_SAMPLE_PARAM_GROUP
*
* @brief Parameter specification for syscall perfmon_sample
*
* @{
*/
/**
* @brief Perfmon sample syscall parameter will be a pointer to this struct
*/
typedef struct {
/** counter_mask[0] is for ID: 0-31; counter_mask[1] is for ID: 32-63 */
uint32_t counter_mask[2];
uint32_t output_addr;
} pva_fw_pe_syscall_perfmon_sample;
/**
* @brief Index for t26x performance counters for VPU
*/
#define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T26X (0U)
#define PERFMON_COUNTER_ID_VPS_ID_VALID_T26X (1U)
#define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T26X (2U)
#define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T26X (3U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T26X (4U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T26X (5U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T26X (6U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T26X (7U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T26X (8U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T26X (9U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T26X (10U)
#define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T26X (11U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T26X (12U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T26X (13U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T26X (14U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T26X (15U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T26X (16U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T26X (17U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T26X (18U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T26X (19U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_FETCH_REQ_T26X (20U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_T26X (21U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_PREEMPT_T26X (22U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_LINES_T26X (23U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_DUR_T26X (24U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_DUR_T26X (25U)
#define PERFMON_COUNTER_ID_DLUT_BUSY_T26X (26U)
#define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T26X (27U)
#define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T26X (28U)
#define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T26X (29U)
#define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T26X (30U)
#define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T26X (31U)
#define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T26X (32U)
#define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T26X (33U)
#define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T26X (34U)
#define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T26X (35U)
#define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T26X (36U)
/**
* @brief Index for t23x performance counters
*/
#define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T23X (0U)
#define PERFMON_COUNTER_ID_VPS_ID_VALID_T23X (1U)
#define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T23X (2U)
#define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T23X (3U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T23X (4U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T23X (5U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T23X (6U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T23X (7U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T23X (8U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T23X (9U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T23X (10U)
#define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T23X (11U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T23X (12U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T23X (13U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T23X (14U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T23X (15U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T23X (16U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T23X (17U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T23X (18U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T23X (19U)
#define PERFMON_COUNTER_ID_ICACHE_FETCH_REQ_T23X (20U)
#define PERFMON_COUNTER_ID_ICACHE_MISS_T23X (21U)
#define PERFMON_COUNTER_ID_ICACHE_PREEMP_T23X (22U)
#define PERFMON_COUNTER_ID_ICACHE_PREFETCH_LINES_T23X (23U)
#define PERFMON_COUNTER_ID_ICACHE_MISS_DUR_T23X (24U)
#define PERFMON_COUNTER_ID_ICACHE_PREFETCH_DUR_T23X (25U)
#define PERFMON_COUNTER_ID_DLUT_BUSY_T23X (26U)
#define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T23X (27U)
#define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T23X (28U)
#define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T23X (29U)
#define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T23X (30U)
#define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T23X (31U)
#define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T23X (32U)
#define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T23X (33U)
#define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T23X (34U)
#define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T23X (35U)
#define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T23X (36U)
/**
* @brief Index for t26x performance counters for PPE
*/
#define PERFMON_COUNTER_ID_PPS_STALL_ID_NO_VAL_INSTR_T26X (0U)
#define PERFMON_COUNTER_ID_PPS_ID_VALID_T26X (1U)
#define PERFMON_COUNTER_ID_PPS_STALL_ID_REG_DEPEND_T26X (2U)
#define PERFMON_COUNTER_ID_PPS_STALL_ID_ONLY_T26X (3U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX1_ONLY_T26X (4U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_LD_DEPENDENCY_T26X (5U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_ST_DEPENDENCY_T26X (6U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_DEPENDENCY_T26X (7U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STRM_STORE_FLUSH_T26X (8U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_STORE_FLUSH_T26X (9U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STORE_FLUSH_T26X (10U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_LD_T26X (11U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_ST_T26X (12U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_T26X (13U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LD_T26X (14U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_ST_T26X (15U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LDST_T26X (16U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_PUSHBACK_T26X (17U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STQ_PUSHBACK_T26X (18U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_FLUSH_T26X (19U)
#define PERFMON_COUNTER_ID_PPS_WFE_GPI_EX_STATE_T26X (20U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_FETCH_REQ_T26X (21U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_T26X (22U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_PREEMPT_T26X (23U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_LINES_T26X (24U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_DUR_T26X (25U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_DUR_T26X (26U)
/** @} */
#endif /*PVA_VPU_SYSCALL_INTERFACE_H*/

View File

@@ -0,0 +1,295 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_FW_H
#define PVA_FW_H
#include "pva_api.h"
#include "pva_bit.h"
#include "pva_constants.h"
#include "pva_fw_address_map.h"
#include "pva_math_utils.h"
/* The sizes of these structs must be explicitly padded to align to 4 bytes */
struct pva_fw_prefence {
uint8_t offset_hi;
uint8_t pad0[3];
uint32_t offset_lo;
uint32_t resource_id;
uint32_t value;
};
struct pva_fw_postfence {
uint8_t offset_hi;
uint8_t ts_offset_hi;
/** Privileged user queue may need to trigger fence that exists in user's own
* resource table. Set this flags to tell FW to use user's resource table when
* writing this post fence. This also applies to timestamp resource ID. */
#define PVA_FW_POSTFENCE_FLAGS_USER_FENCE (1 << 0)
uint8_t flags;
uint8_t pad0;
uint32_t offset_lo;
uint32_t resource_id;
uint32_t value;
/* Timestamp part */
uint32_t ts_resource_id;
uint32_t ts_offset_lo;
};
struct pva_fw_memory_addr {
uint8_t offset_hi;
uint8_t pad0[3];
uint32_t resource_id;
uint32_t offset_lo;
};
struct pva_fw_cmdbuf_submit_info {
uint8_t num_prefence;
uint8_t num_postfence;
uint8_t num_input_status;
uint8_t num_output_status;
#define PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_MSB (1)
#define PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_LSB (0)
uint8_t flags;
uint8_t first_chunk_offset_hi;
/** First chunk size*/
uint16_t first_chunk_size;
struct pva_fw_prefence prefences[PVA_MAX_NUM_PREFENCES];
struct pva_fw_memory_addr input_statuses[PVA_MAX_NUM_INPUT_STATUS];
/** Resource ID of the first chunk */
uint32_t first_chunk_resource_id;
/** First chunk offset within the resource*/
uint32_t first_chunk_offset_lo;
/** Execution Timeout */
uint32_t execution_timeout_ms;
struct pva_fw_memory_addr output_statuses[PVA_MAX_NUM_OUTPUT_STATUS];
struct pva_fw_postfence postfences[PVA_MAX_NUM_POSTFENCES];
};
/* This is the header of the circular buffer */
struct pva_fw_submit_queue_header {
/**
* Head index of the circular buffer. Updated by R5, read by CCPLEX
* (UMD/KMD).
*/
volatile uint32_t cb_head;
/**
* Tail index of the circular buffer. Updated by CCPLEX.
*
* CCPLEX informs R5 the tail index through CCQ. In case KMD needs to
* flush the queue. KMD may need to read the tail from here.
*/
volatile uint32_t cb_tail;
/* Immediately followed by an array of struct pva_cmdbuf_submit_info */
};
static inline uint32_t pva_fw_queue_count(uint32_t head, uint32_t tail,
uint32_t size)
{
if (tail >= head) {
return safe_subu32(tail, head);
} else {
return safe_addu32(safe_subu32(size, head), tail);
}
}
static inline uint32_t pva_fw_queue_space(uint32_t head, uint32_t tail,
uint32_t size)
{
return safe_subu32(
safe_subu32(size, pva_fw_queue_count(head, tail, size)), 1u);
}
/* CCQ commands: KMD -> R5, through CCQ FIFO */
/*
* Most CCQ commands are meant to be used at init time.
* During runtime, only use PVA_FW_CCQ_OP_UPDATE_TAIL
*/
#define PVA_FW_CCQ_OPCODE_MSB 63
#define PVA_FW_CCQ_OPCODE_LSB 60
/*
* tail value bit field: 31 - 0
* queue id bit field: 40 - 32
*/
#define PVA_FW_CCQ_OP_UPDATE_TAIL 0
#define PVA_FW_CCQ_TAIL_MSB 31
#define PVA_FW_CCQ_TAIL_LSB 0
#define PVA_FW_CCQ_QUEUE_ID_MSB 40
#define PVA_FW_CCQ_QUEUE_ID_LSB 32
/*
* resource table IOVA addr bit field: 39 - 0
* resource table number of entries bit field: 59 - 40
*/
#define PVA_FW_CCQ_OP_SET_RESOURCE_TABLE 1
#define PVA_FW_CCQ_RESOURCE_TABLE_ADDR_MSB 39
#define PVA_FW_CCQ_RESOURCE_TABLE_ADDR_LSB 0
#define PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_MSB 59
#define PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_LSB 40
/*
* submission queue IOVA addr bit field: 39 - 0
* submission queue number of entries bit field: 59 - 40
*/
#define PVA_FW_CCQ_OP_SET_SUBMISSION_QUEUE 2
#define PVA_FW_CCQ_QUEUE_ADDR_MSB 39
#define PVA_FW_CCQ_QUEUE_ADDR_LSB 0
#define PVA_FW_CCQ_QUEUE_N_ENTRIES_MSB 59
#define PVA_FW_CCQ_QUEUE_N_ENTRIES_LSB 40
/* KMD and FW communicate using messages.
*
* Message can contain up to 6 uint32_t.
*
* The first uint32_t is the header that contains message type and length.
*/
#define PVA_FW_MSG_MAX_LEN 6
/* KMD send messages to R5 using CCQ FIFO. The message length is always 64 bit. */
/* When R5 send messages to KMD using CCQ statuses, we use status 3 - 8
*
* msg[0] = STATUS8 -> generate interrupt to KMD
* msg[1] = STATUS3
* msg[2] = STATUS4
* msg[3] = STATUS5
* msg[4] = STATUS6
* msg[5] = STATUS7
*/
#define PVA_FW_MSG_STATUS_BASE 3
#define PVA_FW_MSG_STATUS_LAST 8
#define PVA_FW_MSG_TYPE_MSB 30
#define PVA_FW_MSG_TYPE_LSB 25
#define PVA_FW_MSG_LEN_MSB 24
#define PVA_FW_MSG_LEN_LSB 22
/* The remaining bits (0 - 21) of msg[0] can be used for message specific
* payload */
/* Message types: R5 -> CCPLEX */
#define PVA_FW_MSG_TYPE_ABORT 1
#define PVA_FW_MSG_TYPE_BOOT_DONE 2
#define PVA_FW_MSG_TYPE_FLUSH_PRINT 3
#define PVA_FW_MSG_TYPE_RESOURCE_UNREGISTER 3
/* Message types: CCPLEX -> R5 */
#define PVA_FW_MSG_TYPE_UPDATE_TAIL 32
/* Parameters for message ABORT
* ABORT message contains a short string (up to 22 chars).
* The first two charactors are in the message header (bit 15 - 0).
*/
#define PVA_FW_MSG_ABORT_STR_MAX_LEN 22
/* Parameters for message BOOT_DONE */
#define PVA_FW_MSG_R5_START_TIME_LO_IDX 1
#define PVA_FW_MSG_R5_START_TIME_HI_IDX 2
#define PVA_FW_MSG_R5_READY_TIME_LO_IDX 3
#define PVA_FW_MSG_R5_READY_TIME_HI_IDX 4
/* Parameters for message FLUSH PRINT */
struct pva_fw_print_buffer_header {
#define PVA_FW_PRINT_BUFFER_OVERFLOWED (1 << 0)
#define PVA_FW_PRINT_FAILURE (1 << 1)
uint32_t flags;
uint32_t tail;
/* Followed by print content */
};
/* Parameters for message resource unregister */
/* Table ID is stored in msg[0], bit: 0 - 7 */
#define PVA_FW_MSG_RESOURCE_TABLE_ID_MSB 7
#define PVA_FW_MSG_RESOURCE_TABLE_ID_LSB 0
/* Followed by up to 5 resource IDs. The actual number of resource ID is
* indicated by the message length. */
/** @brief Circular buffer based data channel to share data between R5 and CCPLEX */
struct pva_data_channel {
uint32_t size;
#define PVA_DATA_CHANNEL_OVERFLOW (1U << 0U)
uint32_t flags;
uint32_t head;
/**
* Offset location in the circular buffer where from VPU printf data will be written by FW
*/
uint32_t tail;
/* Immediately followed by circular buffer data */
};
/* PVA FW Event profiling definitions */
// Event identifiers
#define PVA_FW_EVENT_DO_CMD PVA_BIT8(1)
#define PVA_FW_EVENT_SCAN_QUEUES PVA_BIT8(2)
#define PVA_FW_EVENT_SCAN_SLOTS PVA_BIT8(3)
#define PVA_FW_EVENT_RUN_VPU PVA_BIT8(4)
// Event message format
struct pva_fw_event_message {
uint32_t event : 5;
uint32_t type : 3;
uint32_t arg1 : 8;
uint32_t arg2 : 8;
uint32_t arg3 : 8;
};
// Each event is one of the following types. This should fit within 3 bits
enum pva_fw_events_type {
EVENT_TRY = 0U,
EVENT_START,
EVENT_YIELD,
EVENT_DONE,
EVENT_ERROR,
EVENT_TYPE_MAX = 7U
};
static inline const char *event_type_to_string(enum pva_fw_events_type status)
{
switch (status) {
case EVENT_TRY:
return "TRY";
case EVENT_START:
return "START";
case EVENT_YIELD:
return "YIELD";
case EVENT_DONE:
return "DONE";
case EVENT_ERROR:
return "ERROR";
default:
return "";
}
}
enum pva_fw_timestamp_t {
TIMESTAMP_TYPE_TSE = 0,
TIMESTAMP_TYPE_CYCLE_COUNT = 1
};
struct pva_fw_profiling_buffer_header {
#define PVA_FW_PROFILING_BUFFER_OVERFLOWED (1 << 0)
#define PVA_FW_PROFILING_FAILURE (1 << 1)
uint32_t flags;
uint32_t tail;
/* Followed by print content */
};
/* End of PVA FW Event profiling definitions */
struct pva_kmd_fw_tegrastats {
uint64_t window_start_time;
uint64_t window_end_time;
uint64_t total_utilization[PVA_NUM_PVE];
};
#endif // PVA_FW_H

View File

@@ -0,0 +1,178 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*/
/*
* Unit: Boot Unit
* SWUD Document:
* p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf
*/
#ifndef PVA_FW_ADDRESS_MAP_H
#define PVA_FW_ADDRESS_MAP_H
/**
* @brief Starting R5 address where FW code and data is placed.
* This address is expected to be programmed in PVA_CFG_AR1PRIV_START by KMD.
* This address is also expected to be used as offset where PVA_CFG_R5PRIV_LSEGREG1
* and PVA_CFG_R5PRIV_USEGREG1 registers would point.
*/
#define FW_CODE_DATA_START_ADDR 0x60000000
/**
* @brief R5 address where FW code and data is expected to end.
* This address is expected to be programmed in PVA_CFG_AR1PRIV_END by KMD.
*/
#if PVA_DEV_MAIN_COMPATIBLE == 1
#define FW_CODE_DATA_END_ADDR 0x60220000
#else
#define FW_CODE_DATA_END_ADDR 0x62000000
#endif
/**
* @defgroup PVA_EXCEPTION_VECTORS
*
* @brief Following macros define R5 addresses that are expected to be
* programmed by KMD in EVP registers as is.
* @{
*/
/**
* @brief R5 address of reset exception vector
*/
#define EVP_RESET_VECTOR 0x60040C00
/**
* @brief R5 address of undefined instruction exception vector
*/
#define EVP_UNDEFINED_INSTRUCTION_VECTOR (EVP_RESET_VECTOR + 0x400 * 1)
/**
* @brief R5 address of svc exception vector
*/
#define EVP_SVC_VECTOR (EVP_RESET_VECTOR + 0x400 * 2)
/**
* @brief R5 address of prefetch abort exception vector
*/
#define EVP_PREFETCH_ABORT_VECTOR (EVP_RESET_VECTOR + 0x400 * 3)
/**
* @brief R5 address of data abort exception vector
*/
#define EVP_DATA_ABORT_VECTOR (EVP_RESET_VECTOR + 0x400 * 4)
/**
* @brief R5 address of reserved exception vector.
* It points to a dummy handler.
*/
#define EVP_RESERVED_VECTOR (EVP_RESET_VECTOR + 0x400 * 5)
/**
* @brief R5 address of IRQ exception vector
*/
#define EVP_IRQ_VECTOR (EVP_RESET_VECTOR + 0x400 * 6)
/**
* @brief R5 address of FIQ exception vector
*/
#define EVP_FIQ_VECTOR (EVP_RESET_VECTOR + 0x400 * 7)
/** @} */
/**
* @defgroup PVA_DEBUG_BUFFERS
*
* @brief These buffers are arranged in the following order:
* TRACE_BUFFER followed by CODE_COVERAGE_BUFFER followed by DEBUG_LOG_BUFFER.
* @{
*/
/**
* @brief Maximum size of trace buffer in bytes.
*/
#define FW_TRACE_BUFFER_SIZE 0x40000
/**
* @brief Maximum size of code coverage buffer in bytes.
*/
#define FW_CODE_COVERAGE_BUFFER_SIZE 0x80000
/**
* @brief Maximum size of debug log buffer in bytes.
*/
#if PVA_DEV_MAIN_COMPATIBLE == 1
#define FW_DEBUG_LOG_BUFFER_SIZE 0x40000
#else
#define FW_DEBUG_LOG_BUFFER_SIZE 0x400000
#endif
/** @} */
/**
* @brief Total size of buffers used for FW debug in bytes.
* TBD: Update this address based on build configuration once KMD changes are merged.
*/
#define FW_DEBUG_DATA_TOTAL_SIZE \
(FW_TRACE_BUFFER_SIZE + FW_DEBUG_LOG_BUFFER_SIZE + \
FW_CODE_COVERAGE_BUFFER_SIZE)
/**
* @brief Starting R5 address where FW debug related data is placed.
* This address is expected to be programmed in PVA_CFG_AR2PRIV_START by KMD.
* This address is also expected to be used as offset where PVA_CFG_R5PRIV_LSEGREG2
* and PVA_CFG_R5PRIV_USEGREG2 registers would point.
*/
#define FW_DEBUG_DATA_START_ADDR (0x70000000) //1879048192 0x70000000
/**
* @brief R5 address where FW debug related data is expected to end.
* This address is expected to be programmed in PVA_CFG_AR2PRIV_END by KMD.
*/
#define FW_DEBUG_DATA_END_ADDR \
(FW_DEBUG_DATA_START_ADDR + FW_DEBUG_DATA_TOTAL_SIZE)
/**
* @brief Starting R5 address where FW expects shared buffers between KMD and FW to be placed.
* This is to be used as offset when programming PVA_CFG_R5USER_LSEGREG and PVA_CFG_R5USER_USEGREG.
*/
#define FW_SHARED_MEMORY_START (0x80000000U) //2147483648 0x80000000
/**
* @defgroup PVA_HYP_SCR_VALUES
*
* @brief Following macros specify SCR firewall values that are expected to be
* programmed by Hypervisor.
* @{
*/
/**
* @brief EVP SCR firewall to enable only CCPLEX read/write access.
*/
#define PVA_EVP_SCR_VAL 0x19000202
/**
* @brief PRIV SCR firewall to enable only CCPLEX and R5 read/write access.
*/
#define PVA_PRIV_SCR_VAL 0x1F008282
/**
* @brief CCQ SCR firewall to enable only CCPLEX write access and R5 read access.
*/
#define PVA_CCQ_SCR_VAL 0x19000280
/**
* @brief Status Ctl SCR firewall to enable only CCPLEX read access and R5 read/write access.
*/
#define PVA_STATUS_CTL_SCR_VAL 0x1f008082
/** @} */
/**
* @defgroup PVA_KMD_SCR_VALUES
*
* @brief Following macros specify SCR firewall values that are expected to be
* programmed by KMD.
* @{
*/
/**
* @brief SECEXT_INTR SCR firewall to enable only CCPLEX and R5 read/write access.
*/
#define PVA_SEC_SCR_SECEXT_INTR_EVENT_VAL 0x39008282U
/**
* @brief PROC SCR firewall to enable only CCPLEX read/write access and R5 read only access.
*/
#define PVA_PROC_SCR_PROC_VAL 0x39000282U
/** @} */
#endif

View File

@@ -0,0 +1,120 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_FW_HYP_H
#define PVA_FW_HYP_H
/**
* @defgroup PVA_BOOT_TIME_MBOX
*
* @brief This group defines the mailboxes used by KMD to pass start iovas required for
* user segment and priv2 segment configuration during boot.
* @{
*/
/**
* @brief Used to pass bits 31-0 of start iova of user segment.
*/
#define PVA_MBOXID_USERSEG_L (1U)
/**
* @brief Used to pass bits 39-32 of start iova of user segment.
*/
#define PVA_MBOXID_USERSEG_H (2U)
/**
* @brief Used to pass bits 31-0 of start iova of priv2 segment.
*/
#define PVA_MBOXID_PRIV2SEG_L (3U)
/**
* @brief Used to pass bits 39-32 of start iova of priv2 segment.
*/
#define PVA_MBOXID_PRIV2SEG_H (4U)
/** @} */
/**
* @defgroup PVA_SHARED_SEMAPHORE_STATUS_GROUP
*
* @brief The status bits for the shared semaphore which are mentioned in
* the group are used to communicate various information between KMD and
* PVA R5 FW. The highest 16 bits are used to send information from KMD to
* R5 FW and the lower 16 bits are used to send information from R5 FW to KMD by
* writing to the @ref PVA_BOOT_SEMA semaphore
*
* The bit-mapping of the semaphore is described below. The table below shows the mapping which
* is sent by KMD to FW.
*
* | Bit Position | Bit Field Name | Description |
* |:------------:|:---------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------:|
* | 31 | BOOT INT | To indicate that KMD is expecting an interrupt from R5 once boot is complete |
* | 30 | Reserved | Reserved for future use |
* | 27-25 | Reserved | Reserved for future use |
* | 23-21 | Reserved | Reserved for future use |
* | 20 | CG DISABLE | To indicate the PVA R5 FW should disable the clock gating feature |
* | 19 | VMEM RD WAR DISABLE | To disable the VMEM Read fail workaround feature |
* | 18-16 | Reserved | Reserved for future use |
*
* The table below shows the mapping which is sent by FW to KMD
*
* | Bit Position | Bit Field Name | Description |
* |:------------:|:---------------------:|:-----------------------------------------------------------------------------------------------------------:|
* | 15-11 | Reserved | Reserved for future use |
* | 07-03 | Reserved | Reserved for future use |
* | 02 | HALTED | To indicate to KMD that the PVA R5 FW has halted execution |
* | 01 | BOOT DONE | To indicate to KMD that the PVA R5 FW booting is complete |
*
* @{
*/
//! @endcond
/**
* @brief This field is used to indicate that the R5 FW should
* disable the clock gating feature
*/
#define PVA_BOOT_SEMA_CG_DISABLE PVA_BIT(20U)
//! @cond DISABLE_DOCUMENTATION
/** Tell firmware that block linear surfaces are in XBAR_RAW format instead of
* TEGRA_RAW format */
#define PVA_BOOT_SEMA_USE_XBAR_RAW PVA_BIT(17U)
#define PVA_BOOT_SEMA 0U
/**
* @brief This macro has the value to be set by KMD in the shared semaphores
* @ref PVA_PREFENCE_SYNCPT_REGION_IOVA_SEM or @ref PVA_POSTFENCE_SYNCPT_REGION_IOVA_SEM
* if the syncpoint reserved region must not be configured as uncached
* in R5 MPU.
*/
#define PVA_R5_SYNCPT_REGION_IOVA_OFFSET_NOT_SET (0xFFFFFFFFU)
/** @} */
/* Runtime mailbox messages between firmware and hypervisor */
/* When hypervisor send messages to R5 through mailboxes, we use mailbox 0 - 1
* msg[0] = mailbox 1 -> generate interrupt to R5
* msg[1] = mailbox 0
*/
#define PVA_FW_MBOX_TO_R5_BASE 0
#define PVA_FW_MBOX_TO_R5_LAST 1
/* When R5 send messages to hypervisor through mailboxes, we use mailbox 2 - 7
* msg[0] = mailbox 7 -> generate interrupt to hypervisor
* msg[1] = mailbox 2
* msg[2] = mailbox 3
* msg[3] = mailbox 4
* msg[4] = mailbox 5
* msg[5] = mailbox 6
*/
#define PVA_FW_MBOX_TO_HYP_BASE 2
#define PVA_FW_MBOX_TO_HYP_LAST 7
#define PVA_FW_MBOX_FULL_BIT PVA_BIT(31)
#endif // PVA_FW_HYP_H

View File

@@ -0,0 +1,340 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_RESOURCE_H
#define PVA_RESOURCE_H
#include "pva_api.h"
#include "pva_api_dma.h"
#include "pva_bit.h"
#include "pva_constants.h"
#include "pva_utils.h"
#include "pva_math_utils.h"
/* The sizes of these structs must be explicitly padded to align to 4 bytes */
struct pva_fw_dma_descriptor {
uint8_t transfer_control0;
uint8_t link_did;
uint8_t src_adr1;
uint8_t dst_adr1;
uint32_t src_adr0;
uint32_t dst_adr0;
uint16_t tx;
uint16_t ty;
uint16_t slp_adv;
uint16_t dlp_adv;
uint32_t srcpt1_cntl;
uint32_t dstpt1_cntl;
uint32_t srcpt2_cntl;
uint32_t dstpt2_cntl;
uint32_t srcpt3_cntl;
uint32_t dstpt3_cntl;
uint16_t sb_start;
uint16_t db_start;
uint16_t sb_size;
uint16_t db_size;
uint16_t trig_ch_events;
uint16_t hw_sw_trig_events;
uint8_t px;
uint8_t py;
uint8_t transfer_control1;
uint8_t transfer_control2;
uint8_t cb_ext;
uint8_t rsvd;
uint16_t frda;
};
/** Each slot is mapped to <reloc_count> number of pva_fw_dma_reloc. When
* bind_dram/vmem_slot command is executed, the slot_id will be an index into
* the slot array. The slot contains starting index and count of reloc structs.
* All descriptor fields identified by the reloc structs will be patched.
*/
struct pva_fw_dma_slot {
/** This slot can be bound to a DRAM buffer */
#define PVA_FW_DMA_SLOT_FLAG_DRAM (1u << 0u)
/** This slot can be bound to a L2SRAM buffer */
#define PVA_FW_DMA_SLOT_FLAG_L2SRAM (1u << 1u)
/** This slot can be bound to a VMEM DATA buffer */
#define PVA_FW_DMA_SLOT_FLAG_VMEM_DATA (1u << 2u)
/** This slot can be bound to a VMEM VPU config table buffer */
#define PVA_FW_DMA_SLOT_FLAG_VMEM_VPUC_TABLE (1u << 3u)
/** This slot has enabled circular buffer. Slot with this flags cannot be bound
* to block linear surface. */
#define PVA_FW_DMA_SLOT_FLAG_CB (1u << 4u)
#define PVA_FW_DMA_SLOT_FLAG_BOUND (1u << 5u)
uint8_t flags;
uint8_t pad;
/** Bitmask of channels that use this slot */
uint16_t ch_use_mask;
/** The number of descriptor fields that share this slot. Each field
* will have a pva_fw_dma_reloc struct
*/
uint16_t reloc_count;
/** Starting index in the pva_fw_dma_reloc array */
uint16_t reloc_start_idx;
int64_t start_addr;
int64_t end_addr;
};
static inline uint32_t get_slot_size(struct pva_fw_dma_slot const *slot)
{
uint32_t size = UINT32_MAX;
int64_t tmp_size = 0;
if (slot->end_addr < slot->start_addr) {
return size;
}
tmp_size = slot->end_addr - slot->start_addr;
if (tmp_size > (int64_t)UINT32_MAX) {
return size;
}
size = (uint32_t)tmp_size;
return size;
}
/**
* A relocate struct identifies an address field (src, dst or dst2) in
* the descriptor. The identified address field contains an offset instead of
* absolute address. The base address will be added to the offset during
* binding.
*
* This struct only has 2 bytes, so an array of this struct must have an even
* number of elements to satisfy alignment requirement.
*/
struct pva_fw_dma_reloc {
uint8_t desc_index;
/** This relocation is for source field */
#define PVA_FW_DMA_RELOC_FIELD_SRC 1u
/** This relocation is for destination field */
#define PVA_FW_DMA_RELOC_FIELD_DST 2u
/** This relocation is for destination 2 field */
#define PVA_FW_DMA_RELOC_FIELD_DST2 3u
uint8_t field;
};
struct pva_fw_dma_channel {
uint32_t cntl0;
uint32_t cntl1;
uint32_t boundary_pad;
uint32_t hwseqcntl;
uint32_t hwseqfscntl;
};
struct pva_fw_data_section_info {
uint32_t data_buf_off; /*< offset in data section data byte array */
uint32_t vmem_addr;
uint32_t size;
};
struct pva_dma_resource_map {
// TODO: These macros should be derived using the maximum limits across platforms
// Today, they are being hardcoded. Make it automatic
#define PVA_DMA_NUM_CHANNEL_PARTITIONS \
((PVA_MAX_NUM_DMA_CHANNELS) / (PVA_DMA_CHANNEL_ALIGNMENT))
#define PVA_DMA_NUM_DESCRIPTOR_PARTITIONS \
((PVA_MAX_NUM_DMA_DESC) / (PVA_DMA_DESCRIPTOR_ALIGNMENT))
#define PVA_DMA_NUM_ADB_PARTITIONS \
((PVA_MAX_NUM_ADB_BUFFS) / (PVA_DMA_ADB_ALIGNMENT))
#define PVA_DMA_NUM_HWSEQ_WORD_PARTITIONS \
((PVA_MAX_NUM_HWSEQ_WORDS) / (PVA_DMA_HWSEQ_WORD_ALIGNMENT))
uint64_t channels : PVA_DMA_NUM_CHANNEL_PARTITIONS;
uint64_t descriptors : PVA_DMA_NUM_DESCRIPTOR_PARTITIONS;
uint64_t adbs : PVA_DMA_NUM_ADB_PARTITIONS;
uint64_t hwseq_words : PVA_DMA_NUM_HWSEQ_WORD_PARTITIONS;
uint64_t triggers : 1;
};
static inline void
pva_dma_resource_map_reset(struct pva_dma_resource_map *resource_map)
{
resource_map->channels = 0u;
resource_map->descriptors = 0u;
resource_map->adbs = 0u;
resource_map->hwseq_words = 0u;
resource_map->triggers = 0u;
}
// Note: the following pva_dma_resource_map_* APIs assume an alignment requirement
// on the 'start' index. We do not enforce it here though. If this requirement
// is not met, the FW may falsely predicted resource conflicts between commands.
// However, this will not impact functionality or correctness.
static inline void
pva_dma_resource_map_add_channels(struct pva_dma_resource_map *map,
uint16_t start, uint16_t count)
{
map->channels |= pva_mask64(start, count, PVA_DMA_CHANNEL_ALIGNMENT);
}
static inline void
pva_dma_resource_map_add_descriptors(struct pva_dma_resource_map *map,
uint16_t start, uint16_t count)
{
map->descriptors |=
pva_mask64(start, count, PVA_DMA_DESCRIPTOR_ALIGNMENT);
}
static inline void
pva_dma_resource_map_add_adbs(struct pva_dma_resource_map *map, uint16_t start,
uint16_t count)
{
map->adbs |= pva_mask64(start, count, PVA_DMA_ADB_ALIGNMENT);
}
static inline void
pva_dma_resource_map_add_hwseq_words(struct pva_dma_resource_map *map,
uint16_t start, uint16_t count)
{
map->hwseq_words |=
pva_mask64(start, count, PVA_DMA_HWSEQ_WORD_ALIGNMENT);
}
static inline void
pva_dma_resource_map_add_triggers(struct pva_dma_resource_map *map)
{
// If an application is running on VPU, it has access to all the triggers
// Only FW and DMA-only workloads can initiate transfers in parallel to
// a running VPU application, but they do not require triggers.
map->triggers |= 1;
}
static inline void
pva_dma_resource_map_copy_channels(struct pva_dma_resource_map *dst_map,
struct pva_dma_resource_map *src_map)
{
dst_map->channels |= src_map->channels;
}
static inline void
pva_dma_resource_map_copy_descriptors(struct pva_dma_resource_map *dst_map,
struct pva_dma_resource_map *src_map)
{
dst_map->descriptors |= src_map->descriptors;
}
static inline void
pva_dma_resource_map_copy_adbs(struct pva_dma_resource_map *dst_map,
struct pva_dma_resource_map *src_map)
{
dst_map->adbs |= src_map->adbs;
}
static inline void
pva_dma_resource_map_copy_triggers(struct pva_dma_resource_map *dst_map,
struct pva_dma_resource_map *src_map)
{
dst_map->triggers |= src_map->triggers;
}
static inline void
pva_dma_resource_map_copy_hwseq_words(struct pva_dma_resource_map *dst_map,
struct pva_dma_resource_map *src_map)
{
dst_map->hwseq_words |= src_map->hwseq_words;
}
struct pva_dma_config_resource {
uint8_t base_descriptor;
uint8_t base_channel;
uint8_t num_descriptors;
uint8_t num_channels;
uint16_t num_dynamic_slots;
/** Must be an even number to satisfy padding requirement. */
uint16_t num_relocs;
/** Indices of channels. Once the corresponding bit is set, the block height of
* this channel should not be changed. */
uint16_t ch_block_height_fixed_mask;
uint16_t base_hwseq_word;
uint16_t num_hwseq_words;
uint16_t pad;
uint32_t vpu_exec_resource_id;
uint32_t common_config;
uint32_t output_enable[PVA_NUM_DMA_TRIGGERS];
struct pva_dma_resource_map dma_resource_map;
/* Followed by <num_dynamic_slots> of pva_fw_dma_slot */
/* Followed by <num_reloc_infos> of pva_fw_dma_reloc */
/* Followed by an array of pva_fw_dma_channel */
/* Followed by an array of pva_fw_dma_descriptor */
/* =====================================================================
* The following fields do not need to be fetched into TCM. The DMA config
* resource size (as noted in the resource table) does not include these
* fields */
/* Followed by an array of hwseq words */
};
struct pva_fw_vmem_buffer {
#define PVA_FW_SYM_TYPE_MSB 31
#define PVA_FW_SYM_TYPE_LSB 29
#define PVA_FW_VMEM_ADDR_MSB 28
#define PVA_FW_VMEM_ADDR_LSB 0
uint32_t addr;
uint32_t size;
};
struct pva_exec_bin_resource {
uint8_t code_addr_hi;
uint8_t data_section_addr_hi;
uint8_t num_data_sections;
uint8_t pad;
uint32_t code_addr_lo;
uint32_t data_section_addr_lo;
uint32_t code_size;
uint32_t num_vmem_buffers;
/* Followed by <num_data_sections> number of pva_fw_data_section_info */
/* Followed by <num_vmem_buffers> number of pva_fw_vmem_buffer */
};
static inline struct pva_fw_dma_slot *
pva_dma_config_get_slots(struct pva_dma_config_resource *dma_config)
{
return (struct pva_fw_dma_slot
*)((uint8_t *)dma_config +
sizeof(struct pva_dma_config_resource));
}
static inline struct pva_fw_dma_reloc *
pva_dma_config_get_relocs(struct pva_dma_config_resource *dma_config)
{
return (struct pva_fw_dma_reloc
*)((uint8_t *)pva_dma_config_get_slots(dma_config) +
sizeof(struct pva_fw_dma_slot) *
dma_config->num_dynamic_slots);
}
static inline struct pva_fw_dma_channel *
pva_dma_config_get_channels(struct pva_dma_config_resource *dma_config)
{
return (struct pva_fw_dma_channel *)((uint8_t *)
pva_dma_config_get_relocs(
dma_config) +
sizeof(struct pva_fw_dma_reloc) *
dma_config->num_relocs);
}
static inline struct pva_fw_dma_descriptor *
pva_dma_config_get_descriptors(struct pva_dma_config_resource *dma_config)
{
return (struct pva_fw_dma_descriptor
*)((uint8_t *)pva_dma_config_get_channels(dma_config) +
sizeof(struct pva_fw_dma_channel) *
dma_config->num_channels);
}
#endif // PVA_RESOURCE_H

View File

@@ -0,0 +1,349 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_API_H
#define PVA_API_H
#ifdef __cplusplus
extern "C" {
#endif
#include "pva_api_types.h"
#include "pva_api_dma.h"
#include "pva_api_vpu.h"
#include "pva_api_cmdbuf.h"
/* Core APIs */
/**
* @brief Create a PVA context.
*
* @param[in] pva_index Select which PVA instance to use if there are multiple PVAs
* in the SOC.
* @param[in] max_resource_count Maximum number of resources this context can have.
* @param[out] ctx Pointer to the created context.
*/
enum pva_error pva_context_create(uint32_t pva_index,
uint32_t max_resource_count,
struct pva_context **ctx);
/**
* @brief Destroy a PVA context.
*
* A context can only be destroyed after all queues are destroyed.
*
* @param[in] ctx Pointer to the context to destroy.
*/
void pva_context_destroy(struct pva_context *ctx);
/**
* @brief Create a PVA queue.
*
* @param[in] ctx Pointer to the context.
* @param[in] max_submission_count Max number of submissions that can be queued.
* @param[out] queue Pointer to the created queue.
*/
enum pva_error pva_queue_create(struct pva_context *ctx,
uint32_t max_submission_count,
struct pva_queue **queue);
/**
* @brief Destroy a PVA queue.
*
* @param[in] queue Pointer to the queue to destroy.
*/
void pva_queue_destroy(struct pva_queue *queue);
/**
* @brief Allocate DRAM memory that can be mapped PVA's device space
*
* @param[in] size Size of the memory to allocate.
* @param[out] out_mem Pointer to the allocated memory.
*/
enum pva_error pva_memory_alloc(uint64_t size, struct pva_memory **out_mem);
/**
* @brief Map the memory to CPU's virtual space.
*
* @param[in] mem Pointer to the memory to map.
* @param[in] access_mode Access mode for the memory. PVA_ACCESS_RD or
* PVA_ACCESS_RW.
* @param[out] out_va Pointer to the virtual address of the mapped memory.
*/
enum pva_error pva_memory_cpu_map(struct pva_memory *mem, uint32_t access_mode,
void **out_va);
/**
* @brief Unmap the memory from CPU's virtual space.
*
* @param[in] mem Pointer to the memory to unmap.
* @param[in] va Previously mapped virtual address.
*/
enum pva_error pva_memory_cpu_unmap(struct pva_memory *mem, void *va);
/**
* @brief Free the memory.
*
* Freeing a registered memory is okay since KMD holds a reference to the memory.
*
* @param mem Pointer to the memory to free.
*/
void pva_memory_free(struct pva_memory *mem);
/**
* @brief Wait for a syncpoint to reach a value.
*
* @param[in] ctx Pointer to the context.
* @param[in] syncpiont_id Syncpoint ID to wait on.
* @param[in] value Value to wait for.
* @param[in] timeout_us Timeout in microseconds. PVA_TIMEOUT_INF for infinite.
*/
enum pva_error pva_syncpoint_wait(struct pva_context *ctx,
uint32_t syncpiont_id, uint32_t value,
uint64_t timeout_us);
/**
* @brief Submit a batch of command buffers.
*
* @param[in] queue Pointer to the queue.
* @param[in] submit_infos Array of submit info structures.
* @param[in] count Number of submit info structures.
* @param[in] timeout_us Timeout in microseconds. PVA_TIMEOUT_INF for infinite.
*
* @note Concurrent submission to the same queue needs to be serialized by the
* caller.
*/
enum pva_error
pva_cmdbuf_batch_submit(struct pva_queue *queue,
struct pva_cmdbuf_submit_info *submit_infos,
uint32_t count, uint64_t timeout_us);
/**
* @brief Get the symbol table for a registered executable.
*
* @param[in] ctx Pointer to the context.
* @param[in] exe_resource_id Resource ID of the executable.
* @param[out] out_info Pointer to the symbol info array.
* @param[in] max_num_symbols Maximum number of symbols to return.
*/
enum pva_error pva_executable_get_symbols(struct pva_context *ctx,
uint32_t exe_resource_id,
struct pva_symbol_info *out_info,
uint32_t max_num_symbols);
/**
* @brief Submit a list of asynchronous registration operations to KMD.
*
* The operations can be:
* - Memory registration
* - Executable registration
* - DMA config registration
*
* The response buffer will contain the resource IDs of the registered
* resources. Any command buffers that use these resources should wait on the
* returned post fence.
*
* @param[in] ctx Pointer to the context.
* @param[in] fence Pointer to the post fence to wait on. If NULL, it means the
* caller is not interested in waiting. This usually only applies to unregister
* operations.
* @param[in] Input buffer containing the list of operations.
* @param[out] Output buffer to store the response.
*
* @note Input and output buffer may be the same buffer.
*/
enum pva_error pva_ops_submit_async(struct pva_context *ctx,
struct pva_fence *fence,
struct pva_ops_buffer const *input_buffer,
struct pva_ops_buffer *output_buffer);
/**
* @brief Perform a list of registration operations synchronously.
*
* The operations can be:
* - Memory registration
* - Executable registration
* - DMA config registration
*
* The response buffer will contain the resource IDs of the registered
* resources.
*
* @param[in] ctx Pointer to the context.
* @param[in] Input buffer containing the list of operations.
* @param[out] Output buffer to store the response.
*
* @note Input and output buffer may be the same buffer.
*
*/
enum pva_error pva_ops_submit(struct pva_context *ctx,
struct pva_ops_buffer const *input_buffer,
struct pva_ops_buffer *output_buffer);
/** Size of the ops buffer header. When user allocates memory for ops buffer,
* this size needs to be added. */
#define PVA_OPS_BUFFER_HEADER_SIZE 64
/**
* @brief Initialize pva_ops_buffer to keep track of the state of
* operations buffer during preparation.
*
* @param[out] buf_handle Pointer to the pva_ops_buffer object to initialize.
* @param[in] buf Pointer to the buffer that will store the operations.
* @param[in] size Size of the buffer.
*/
enum pva_error pva_ops_buffer_init(struct pva_ops_buffer *buf_handle, void *buf,
uint32_t size);
#define PVA_OPS_MEMORY_REG_SIZE 64
/**
* @brief Append a memory registration operation to the operations buffer.
*
* @param[in] ctx Pointer to the context.
* @param[in] mem Pointer to the memory to register.
* @param[in] segment Memory segment to register.
* @param[in] access_flags Access flags for the memory.
* @param[out] op_buf Pointer to the operations buffer.
*/
enum pva_error pva_ops_append_memory_register(struct pva_context *ctx,
struct pva_memory *mem,
enum pva_memory_segment segment,
uint32_t access_flags,
struct pva_ops_buffer *op_buf);
#define PVA_OPS_EXEC_REG_HEADER_SIZE 16
/**
* @brief Append an executable registration operation to the operations.
*
* @param[in] ctx Pointer to the context.
* @param[in] executable Pointer to the executable binary content.
* @param[in] executable_size Size of the executable.
* @param[out] op_buf Pointer to the operations buffer.
*/
enum pva_error pva_ops_append_executable_register(
struct pva_context *ctx, void const *executable,
uint32_t executable_size, struct pva_ops_buffer *op_buf);
#define PVA_OPS_DMA_CONFIG_REG_SIZE (24 * 1024)
/**
* @brief Append a DMA config registration operation to the operations.
* @param[in] ctx Pointer to the context.
* @param[in] dma_config Pointer to the DMA config.
* @param[out] op_buf Pointer to the operations buffer.
*/
enum pva_error
pva_ops_append_dma_config_register(struct pva_context *ctx,
struct pva_dma_config const *dma_config,
struct pva_ops_buffer *op_buf);
#define PVA_OPS_UNREG_SIZE 16
enum pva_error pva_ops_append_unregister(struct pva_context *ctx,
uint32_t resource_id,
struct pva_ops_buffer *op_buf);
/**
* @brief Parse the response buffer to get the resource ID of the registered
* memory or DMA configuration.
*
* @param[in] resp_buf Pointer to the response buffer.
* @param[out] resource_id output resource ID.
*/
enum pva_error pva_ops_parse_register_resp(struct pva_ops_buffer *resp_buf,
uint32_t *resource_id);
/**
* @brief Parse the response buffer to get the resource ID of the registered
* executable.
*
* @param[in] resp_buf Pointer to the response buffer.
* @param[out] num_symbols Number of symbols in the executable.
* @param[out] resource_id output resource ID.
*/
enum pva_error pva_ops_parse_exec_register_resp(struct pva_ops_buffer *op_buf,
uint32_t *num_symbols,
uint32_t *resource_id);
#define PVA_DATA_CHANNEL_HEADER_SIZE 32
/**
* @brief Initialize VPU print buffer
*
* @param[in] data Pointer to VPU print buffer.
* @param[in] size Size of VPU print buffer.
*/
struct pva_data_channel;
enum pva_error pva_init_data_channel(void *data, uint32_t size,
struct pva_data_channel **data_channel);
/**
* @brief Read VPU print buffer
*
* @param[in] data Pointer to VPU print buffer.
* @param[out] read_buffer Pointer to output buffer in which data will be read.
* @param[in] bufferSize Size of output buffer.
* @param[out] read_size Size of actual data read in output buffer.
*/
enum pva_error pva_read_data_channel(struct pva_data_channel *data_channel,
uint8_t *read_buffer, uint32_t bufferSize,
uint32_t *read_size);
/**
* @brief Duplicate PVA memory object.
*
* This function duplicates a PVA memory object. The new object will have shared
* ownership of the memory.
*
* @param[in] src Pointer to the source memory object.
* @param[in] access_mode Access mode for the new memory object. It should be
* more restrictive than the source memory. Passing 0 will use the same access
* mode as the source memory.
* @param[out] dst Resulting duplicated memory object.
*/
enum pva_error pva_memory_duplicate(struct pva_memory *src,
uint32_t access_mode,
struct pva_memory **dst);
/**
* @brief Get memory attributes.
*
* @param[in] mem Pointer to the memory.
* @param[out] out_attrs Pointer to the memory attributes.
*/
void pva_memory_get_attrs(struct pva_memory const *mem,
struct pva_memory_attrs *out_attrs);
/** \brief Specifies the PVA system software major version. */
#define PVA_SYSSW_MAJOR_VERSION (2U)
/** \brief Specifies the PVA system software minor version. */
#define PVA_SYSSW_MINOR_VERSION (7U)
/**
* @brief Get PVA system software version.
*
* PVA system software version is defined as the latest version of cuPVA which is fully supported
* by this version of the PVA system software.
*
* @param[out] version version of currently running system SW, computed as:
(PVA_SYSSW_MAJOR_VERSION * 1000) + PVA_SYSSW_MINOR_VERSION
* @return PVA_SUCCESS on success, else error code indicating the failure.
*/
enum pva_error pva_get_version(uint32_t *version);
/**
* @brief Get the hardware characteristics of the PVA.
*
* @param[out] pva_hw_char Pointer to the hardware characteristics.
*/
enum pva_error
pva_get_hw_characteristics(struct pva_characteristics *pva_hw_char);
#ifdef __cplusplus
}
#endif
#endif // PVA_API_H

View File

@@ -0,0 +1,627 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_API_CMDBUF_H
#define PVA_API_CMDBUF_H
#include "pva_api_types.h"
//Maximum number of slots for maintaining Timestamps
#define PVA_MAX_QUERY_SLOTS_COUNT 32U
/** The common header for all commands.
*/
struct pva_cmd_header {
#define PVA_CMD_PRIV_OPCODE_FLAG (1U << 7U)
/** Opcode for the command. MSB of opcode indicates whether this command is
* privileged or not */
uint8_t opcode;
/** Command specific flags */
uint8_t flags;
/**
* For pva_cmd_barrier: barrier_group specifies which group this barrier
* waits for.
* For pva_cmd_retire_barrier_group: barrier_group specifies which id will
* be retired. Retired ids can be re-used by future commands and will refer
* to a new logical group.
* For all other commands: barrier_group specifies which barrier group this
* command belongs to. Other commands are able to defer execution until all
* commands in the barrier group have completed, or stall the cmd buffer
* until such a time. Note that asynchronous commands may complete in an
* order different to the order in which they appear in the commmand
* buffer.
*/
uint8_t barrier_group;
/** Length in 4-bytes, including this header. */
uint8_t len;
};
struct pva_user_dma_allowance {
#define PVA_USER_DMA_ALLOWANCE_ADB_STEP_SIZE 8
uint32_t channel_idx : 4;
uint32_t desc_start_idx : 7;
uint32_t desc_count : 7;
uint32_t adb_start_idx : 6;
uint32_t adb_count : 6;
};
/* Basic Commands */
/** Does nothing. It can be used as a place holder in the command buffer. */
struct pva_cmd_noop {
#define PVA_CMD_OPCODE_NOOP 0U
struct pva_cmd_header header;
};
/** Link next chunk. This command can be placed anywhere in the command buffer.
* Firmware will start fetching the next chunk when this command is executed. */
struct pva_cmd_link_chunk {
#define PVA_CMD_OPCODE_LINK_CHUNK 1U
struct pva_cmd_header header;
uint8_t next_chunk_offset_hi;
uint8_t pad;
uint16_t next_chunk_size; /**< Size of next chunk in bytes */
uint32_t next_chunk_resource_id;
uint32_t next_chunk_offset_lo;
struct pva_user_dma_allowance user_dma;
};
/** Barrier command. The user can assign a barrier group to any asynchronous
* command. The barrier command blocks FW execution until the specified group of
* asynchronous commands have completed. Up to 8 barrier groups are supported.
*
* @note A barrier command is not typically required since FW stalls
* automatically in the event of hardware conflicts or when issuing a command is
* deemed unsafe according to the state machines. However, if a stall is needed
* for other reasons, the barrier command can be utilized.
*/
struct pva_cmd_barrier {
#define PVA_CMD_OPCODE_BARRIER 2U
struct pva_cmd_header header;
};
/** Acquire one or more PVE systems, each of which includes a VPS, DMA and PPE.
* It blocks until specified number of engines are acquired.
* By default, the lowest engine ID acquired is set as the current engine.
* Acquired engines will be automatically released when this command buffer finishes.
* They can also be released using release_engine command.
*/
struct pva_cmd_acquire_engine {
#define PVA_CMD_OPCODE_ACQUIRE_ENGINE 3U
struct pva_cmd_header header;
uint8_t engine_count;
uint8_t pad[3];
};
/** Release all PVE systems acquired. It is legal to release engine when engine
* is still running. The released engine wont be available to be acquired until
* it finishes and becomes idle again. */
struct pva_cmd_release_engine {
#define PVA_CMD_OPCODE_RELEASE_ENGINE 4U
struct pva_cmd_header header;
};
/** Set a PVE engine as current. Following commands will modify this engine. The
* zero-based engine index must be less than the acquired engine number. */
struct pva_cmd_set_current_engine {
#define PVA_CMD_OPCODE_SET_CURRENT_ENGINE 5U
struct pva_cmd_header header;
uint8_t engine_index;
uint8_t pad[3];
};
/** This command specifies the executable to use for the following VPU launches.
* It doesnt do anything other than setting the context for the following
* commands.
*
* Note: This command cannot be initiated if any of the DMA sets (that access
* VMEM) are in a running state, in order to prevent mismatches between DMA sets
* and VPU executables. The command buffer will stall until these DMA sets are
* finished. */
struct pva_cmd_set_vpu_executable {
#define PVA_CMD_OPCODE_SET_VPU_EXECUTABLE 6U
struct pva_cmd_header header;
uint32_t vpu_exec_resource_id;
};
/** This command clears the entire VMEM. User may choose to skip VMEM clear if
* there are no bss sections in the VPU executable. Since VMEM can be accessed
* by both VPU and PPE, this command drives both the VPU state machine and the
* PPE state machine. As a result, it can only be started if both VPU state
* machine and PPE state machine are in valid states (Idle or Binded). */
struct pva_cmd_clear_vmem {
#define PVA_CMD_OPCODE_CLEAR_VMEM 7U
struct pva_cmd_header header;
};
/** This command configures VPU hardware. Specifically, it configures code
* segment register and copies data sections. */
struct pva_cmd_init_vpu_executable {
#define PVA_CMD_OPCODE_INIT_VPU_EXECUTABLE 8U
struct pva_cmd_header header;
struct pva_user_dma_allowance user_dma;
};
/** Start VPU instruction prefetch from specified entry point. Currently, the
* entry point index must be 0. More entry points will be supported in the
* future. Note that this command merely triggers the prefetch but does not wait
* for the prefetch to complete. Therefore, this command is synchronous. */
struct pva_cmd_prefetch_vpu_code {
#define PVA_CMD_OPCODE_PREFETCH_VPU_CODE 9U
struct pva_cmd_header header;
uint32_t entry_point_index;
};
/** Run the VPU program from the specified entry point until finish. The
* lifetime of this command covers the entire VPU program execution. Since this
* command is asynchronous, it doesnt block the following commands from
* execution. */
struct pva_cmd_run_vpu {
#define PVA_CMD_OPCODE_RUN_VPU 10U
struct pva_cmd_header header;
uint32_t entry_point_index;
};
/** Copy data from opaque payload to a VPU variable. Firmware may choose to copy
* with R5 or DMA. If using DMA, channel 0 will be used. */
struct pva_cmd_set_vpu_parameter {
#define PVA_CMD_OPCODE_SET_VPU_PARAMETER 11U
struct pva_cmd_header header;
uint16_t data_size;
uint16_t pad;
uint32_t symbol_id;
uint32_t vmem_offset;
/* Followed by <data_size> number of bytes, padded to 4 bytes */
};
/** Copy data from a DRAM buffer to a VPU variable. DMA will be used to perform
* the copy. The user can optionally provide a user channel, a descriptor and
* ADBs to speed up the copy. */
struct pva_cmd_set_vpu_parameter_with_buffer {
#define PVA_CMD_OPCODE_SET_VPU_PARAMETER_WITH_BUFFER 12U
struct pva_cmd_header header;
struct pva_user_dma_allowance user_dma;
uint8_t src_dram_offset_hi;
uint8_t pad[3];
uint32_t data_size;
uint32_t dst_symbol_id;
uint32_t dst_vmem_offset;
uint32_t src_dram_resource_id;
uint32_t src_dram_offset_lo;
};
/** For set_vpu_parameter_with_address command, set this flag in header.flags to
* indicate that the target symbol is the legacy pointer symbol type:
* pva_fw_vpu_legacy_ptr_symbol, which only supports 32bit offset and 32bit
* size. */
#define PVA_CMD_FLAGS_USE_LEGACY_POINTER 0x1
/** Copy the address of a DRAM buffer to a VPU variable. The variable must be
* laid out exactly according to pva_fw_vpu_ptr_symbol
*/
struct pva_cmd_set_vpu_parameter_with_address {
#define PVA_CMD_OPCODE_SET_VPU_PARAMETER_WITH_ADDRESS 13U
struct pva_cmd_header header;
uint8_t dram_offset_hi;
uint8_t pad[3];
uint32_t symbol_id;
uint32_t dram_resource_id;
uint32_t dram_offset_lo;
};
#define PVA_MAX_DMA_SETS_PER_DMA_ENGINE 4
#define PVA_DMA_CONFIG_FETCH_BUFFER_PER_DMA_ENGINE 1
/** This command first acquires the TCM scratch and then fetches DMA configuration
* into the scratch. The command does not modify DMA
* hardware, allowing FW to continue using user channels for data transfer after
* its execution. This command only uses channel 0 to fetch the DMA
* configuration. However, user can still help speed up the process by
* providing additional ADBs. This command will block if theres no TCM scratch
* available. If theres no pending commands AND theres no TCM scratch, then it
* means we encountered a dead lock, the command buffer will be aborted. */
struct pva_cmd_fetch_dma_configuration {
#define PVA_CMD_OPCODE_FETCH_DMA_CONFIGURATION 14U
struct pva_cmd_header header;
uint8_t dma_set_id;
uint8_t pad[3];
uint32_t resource_id;
struct pva_user_dma_allowance user_dma;
};
/** Setup DMA hardware registers using previously fetched DMA configuration. FW
* uses channel 0 to copy DMA descriptors into descriptor RAM. The user can
* provide additional ADBs to speed up the process. The command will block until
* the needed channels, descriptors and hwseq words are acquired. The command must
* also validate that all source and destinations fields of each DMA descriptor
* being programmed is bound to a resource.
*/
struct pva_cmd_setup_dma {
#define PVA_CMD_OPCODE_SETUP_DMA 15U
struct pva_cmd_header header;
struct pva_user_dma_allowance user_dma;
uint8_t dma_set_id;
uint8_t pad[3];
};
/** Run DMA channels according to the current DMA configuration until they are
* finished. The lifetime of this command covers the entire DMA transfer. The
* command shall block until the needed VDBs/ADBs and triggers (GPIOs) are
* acquired.
* @note This command checks that the DMA set to be started is indeed paired
* with the currently bound VPU executable. If not, this constitutes a
* programming error, and the command buffer will be aborted. */
struct pva_cmd_run_dma {
#define PVA_CMD_OPCODE_RUN_DMA 16U
struct pva_cmd_header header;
uint8_t dma_set_id;
uint8_t pad[3];
};
/** This command specifies the executable to use for the following PPE launches.
* It doesnt do anything other than setting the context for the following
* commands. */
struct pva_cmd_set_ppe_executable {
#define PVA_CMD_OPCODE_SET_PPE_EXECUTABLE 17U
struct pva_cmd_header header;
uint32_t ppe_exec_resource_id;
};
/** Start PPE instruction prefetch from specified entry point. Currently, the
* entry point index must be 0. Note that this command merely triggers the
* prefetch but does not wait for the prefetch to complete. Therefore, this
* command is synchronous. */
struct pva_cmd_prefetch_ppe_code {
#define PVA_CMD_OPCODE_PREFETCH_PPE_CODE 18U
struct pva_cmd_header header;
uint32_t entry_point_index;
};
/** Setup PPE code segment and data sections. */
struct pva_cmd_init_ppe_executable {
#define PVA_CMD_OPCODE_INIT_PPE_EXECUTABLE 19U
struct pva_cmd_header header;
struct pva_user_dma_allowance user_dma;
};
/** Run the PPE program until finish. This lifetime of this command covers the
* entire PPE program execution. */
struct pva_cmd_run_ppe {
#define PVA_CMD_OPCODE_RUN_PPE 20U
struct pva_cmd_header header;
uint32_t entry_point_index;
};
#define PVA_BARRIER_GROUP_0 0U
#define PVA_BARRIER_GROUP_1 1U
#define PVA_BARRIER_GROUP_2 2U
#define PVA_BARRIER_GROUP_3 3U
#define PVA_BARRIER_GROUP_4 4U
#define PVA_BARRIER_GROUP_5 5U
#define PVA_BARRIER_GROUP_6 6U
#define PVA_BARRIER_GROUP_7 7U
#define PVA_MAX_BARRIER_GROUPS 8U
#define PVA_BARRIER_GROUP_INVALID 0xFFU
/**
* @brief Captures a timestamp to DRAM
*
* This command allows you to capture a timestamp using one of three modes:
*
* - **IMMEDIATE_MODE**: Captures the timestamp immediately.
* - **VPU START MODE**: Enqueue a timestamp to be captured the next time the
* current VPU starts. Up to 8 VPU start timestamps may be active at a time
* for a given engine.
* - **VPU DONE MODE**: Enqueue a timestamp to be captured the next time the
* current VPU enters done state. Up to 8 VPU done timestamps may be active at
* a time for a given engine.
* - **DEFER MODE**: Defers the timestamp capture by specifying a barrier group.
* The timestamp will be captured once the commands in the specified barrier
* group have completed. Each barrier group allows one timestamp to be active
* at a time.
*
* The timestamp will be available in DRAM after waiting on any postfence.
*
* @note This command is asynchronous, ensuring it does not block the next command.
*/
struct pva_cmd_capture_timestamp {
#define PVA_CMD_OPCODE_CAPTURE_TIMESTAMP 21U
struct pva_cmd_header header;
uint8_t offset_hi;
uint8_t defer_barrier_group;
#define PVA_CMD_CAPTURE_MODE_IMMEDIATE 0U
#define PVA_CMD_CAPTURE_MODE_VPU_START 1U
#define PVA_CMD_CAPTURE_MODE_VPU_DONE 2U
#define PVA_CMD_CAPTURE_MODE_DEFER 3U
uint8_t capture_mode;
uint8_t pad;
uint32_t resource_id;
uint32_t offset_lo;
};
/** Set the address of the status buffer. FW will output detailed command buffer
* status in case of command buffer abort. */
struct pva_cmd_request_status {
#define PVA_CMD_OPCODE_CAPTURE_STATUS 22U
struct pva_cmd_header header;
uint8_t offset_hi;
uint8_t pad[3];
uint32_t resource_id;
uint32_t offset_lo;
};
/** Blocks until l2ram is available. To prevent deadlock with other command
* buffers, l2ram must be acquired prior to acquiring any engine. It will be
* automatically freed when this command buffer finishes. If persistence is
* required, it must be saved to DRAM. One command buffer may only hold one
* L2SRAM allocation at a time. */
struct pva_cmd_bind_l2sram {
#define PVA_CMD_OPCODE_BIND_L2SRAM 23U
struct pva_cmd_header header;
uint8_t dram_offset_hi;
#define FILL_ON_MISS (1U << 0U)
#define FLUSH_ON_EVICTION (1U << 1U)
uint8_t access_policy;
uint8_t pad[2];
uint32_t dram_resource_id;
uint32_t dram_offset_lo;
uint32_t l2sram_size;
struct pva_user_dma_allowance user_dma;
};
/** Free previously allocated l2ram. This command is asynchronous because it
* needs to wait for all commands that are started before it to complete. */
struct pva_cmd_release_l2sram {
#define PVA_CMD_OPCODE_RELEASE_L2SRAM 24U
struct pva_cmd_header header;
};
/*
* This command writes data to a DRAM region. The DRAM region is described
* by resource ID, offset and size fields. The data to be written is placed
* right after the command struct. For this command to successfully execute,
* the following conditions must be met:
* 1. 'resource_id' should point to a valid resource in DRAM.
* 2. the offset and size fields should add up to be less than or equal to the size of DRAM resource.
*/
struct pva_cmd_write_dram {
#define PVA_CMD_OPCODE_WRITE_DRAM 25U
struct pva_cmd_header header;
uint8_t offset_hi;
uint8_t pad;
uint16_t write_size;
uint32_t resource_id;
uint32_t offset_lo;
/* Followed by write_size bytes, padded to 4 bytes boundary */
};
/** Set this bit to @ref pva_surface_format to indicate if the surface format is
* block linear or pitch linear.
*
* For block linear surfaces, the starting address for a descriptor is:
* IOVA_OF(resource_id) + surface_base_offset + PL2BL(slot_offset + desc_offset).
*
* For pitch linear surfaces, the starting address for a descriptor is:
* IOVA_OF(resource_id) + surface_base_offset + slot_offset + desc_offset
*/
#define PVA_CMD_FLAGS_SURFACE_FORMAT_MSB 0U
#define PVA_CMD_FLAGS_SURFACE_FORMAT_LSB 0U
/** MSB of log2 block height in flags field of the command header */
#define PVA_CMD_FLAGS_LOG2_BLOCK_HEIGHT_MSB 3U
/** LSB of log2 block height in flags field of the command header */
#define PVA_CMD_FLAGS_LOG2_BLOCK_HEIGHT_LSB 1U
/** Bind a DRAM surface to a slot. The surface can be block linear or pitch
* linear. */
struct pva_cmd_bind_dram_slot {
#define PVA_CMD_OPCODE_BIND_DRAM_SLOT 26U
/** flags field will contain block linear flag and block height */
struct pva_cmd_header header;
uint8_t dma_set_id; /**< ID of the DMA set */
uint8_t slot_offset_hi;
uint8_t surface_base_offset_hi;
uint8_t slot_id; /**< ID of slot to bind */
uint32_t resource_id; /**< Resource ID of the DRAM allocation for the surface */
uint32_t slot_offset_lo; /**< Per-slot offset in pitch linear domain, from slot base to surface base */
uint32_t surface_base_offset_lo; /**< Surface base offset in bytes, from surface base to allocation base */
};
struct pva_cmd_bind_vmem_slot {
#define PVA_CMD_OPCODE_BIND_VMEM_SLOT 27U
struct pva_cmd_header header;
uint8_t dma_set_id;
uint8_t slot_id;
uint8_t pad[2];
uint32_t symbol_id;
uint32_t offset;
};
/** @brief Unregisters a resource.
*
* This command immediately removes the specified resource from the resource
* table upon execution. However, FW does not immediately notify KMD to
* deallocate the resource as it may still be in use by other concurrently
* running command buffers in the same context.
*
* The FW takes note of the currently running command buffers and notifies the
* KMD to deallocate the resource once these command buffers have completed
* their execution.
*
* @note If a command buffer in the same context either hangs or executes for an
* extended period, no resources can be effectively freed, potentially leading
* to resource exhaustion.
*/
struct pva_cmd_unregister_resource {
#define PVA_CMD_OPCODE_UNREGISTER_RESOURCE 28U
struct pva_cmd_header header;
uint32_t resource_id;
};
/** Write instance parameter to a VMEM symbol. */
struct pva_cmd_set_vpu_instance_parameter {
#define PVA_CMD_OPCODE_SET_VPU_INSTANCE_PARAMETER 29U
struct pva_cmd_header header;
uint32_t symbol_id;
};
struct pva_cmd_run_unit_tests {
#define PVA_CMD_OPCODE_RUN_UNIT_TESTS 30U
struct pva_cmd_header header;
#define PVA_FW_UTESTS_MAX_ARGC 16U
uint8_t argc;
uint8_t pad[3];
uint32_t in_resource_id;
uint32_t in_offset;
uint32_t in_size;
uint32_t out_resource_id;
uint32_t out_offset;
uint32_t out_size;
};
struct pva_cmd_set_vpu_print_cb {
#define PVA_CMD_OPCODE_SET_VPU_PRINT_CB 31U
struct pva_cmd_header header;
uint32_t cb_resource_id;
uint32_t cb_offset;
};
struct pva_cmd_invalidate_l2sram {
#define PVA_CMD_OPCODE_INVALIDATE_L2SRAM 32U
struct pva_cmd_header header;
uint8_t dram_offset_hi;
uint8_t pad[3];
uint32_t dram_resource_id;
uint32_t dram_offset_lo;
uint32_t l2sram_size;
};
struct pva_cmd_flush_l2sram {
#define PVA_CMD_OPCODE_FLUSH_L2SRAM 33U
struct pva_cmd_header header;
struct pva_user_dma_allowance user_dma;
};
struct pva_cmd_err_inject {
#define PVA_CMD_OPCODE_ERR_INJECT 34U
struct pva_cmd_header header;
enum pva_error_inject_codes err_inject_code;
};
struct pva_cmd_patch_l2sram_offset {
#define PVA_CMD_OPCODE_PATCH_L2SRAM_OFFSET 35U
struct pva_cmd_header header;
uint8_t dma_set_id;
uint8_t slot_id;
uint8_t pad[2];
uint32_t offset;
};
/** After retiring a barrier group, all future commands which refer to that barrier group id will be
* mapped to a new logical barrier group. This allows re-using barrier ids within a command buffer.
*/
struct pva_cmd_retire_barrier_group {
#define PVA_CMD_OPCODE_RETIRE_BARRIER_GROUP 36U
struct pva_cmd_header header;
};
#define PVA_CMD_OPCODE_COUNT 37U
struct pva_cmd_init_resource_table {
#define PVA_CMD_OPCODE_INIT_RESOURCE_TABLE (0U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
/**< Resource table id is from 0 to 7, 0 is the device's resource table,
* 1-7 are users'. */
uint8_t resource_table_id;
uint8_t resource_table_addr_hi;
uint8_t pad[2];
uint32_t resource_table_addr_lo;
uint32_t max_n_entries;
};
struct pva_cmd_deinit_resource_table {
#define PVA_CMD_OPCODE_DEINIT_RESOURCE_TABLE (1U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t resource_table_id;
uint8_t pad[3];
};
struct pva_cmd_update_resource_table {
#define PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE (2U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t resource_table_id;
uint8_t pad[3];
uint32_t resource_id;
struct pva_resource_entry entry;
};
struct pva_cmd_init_queue {
#define PVA_CMD_OPCODE_INIT_QUEUE (3U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t ccq_id;
uint8_t queue_id;
uint8_t queue_addr_hi;
uint8_t pad;
uint32_t queue_addr_lo;
uint32_t max_n_submits;
};
struct pva_cmd_deinit_queue {
#define PVA_CMD_OPCODE_DEINIT_QUEUE (4U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t ccq_id;
uint8_t queue_id;
uint8_t pad[2];
};
struct pva_cmd_enable_fw_profiling {
#define PVA_CMD_OPCODE_ENABLE_FW_PROFILING (5U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t buffer_offset_hi;
uint8_t timestamp_type;
uint8_t pad[2];
uint32_t buffer_resource_id;
uint32_t buffer_size;
uint32_t buffer_offset_lo;
uint32_t filter;
};
struct pva_cmd_disable_fw_profiling {
#define PVA_CMD_OPCODE_DISABLE_FW_PROFILING (6U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
};
struct pva_cmd_get_tegra_stats {
#define PVA_CMD_OPCODE_GET_TEGRA_STATS (7U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t buffer_offset_hi;
bool enabled;
uint8_t pad[2];
uint32_t buffer_resource_id;
uint32_t buffer_size;
uint32_t buffer_offset_lo;
};
struct pva_cmd_suspend_fw {
#define PVA_CMD_OPCODE_SUSPEND_FW (8U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
};
struct pva_cmd_resume_fw {
#define PVA_CMD_OPCODE_RESUME_FW (9U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
};
#define PVA_CMD_PRIV_OPCODE_COUNT 10U
#define PVA_MAX_CMDBUF_CHUNK_LEN 1024
#define PVA_MAX_CMDBUF_CHUNK_SIZE (sizeof(uint32_t) * PVA_MAX_CMDBUF_CHUNK_LEN)
#endif // PVA_API_CMDBUF_H

View File

@@ -0,0 +1,222 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#ifndef PVA_API_CUDA_H
#define PVA_API_CUDA_H
#ifdef __cplusplus
extern "C" {
#endif
#include "cuda.h"
#include "pva_api_types.h"
/**
* @brief Structure for cuExtend queue data needed for command submission.
*/
struct pva_cuextend_queue_data {
/*! Holds a pointer to pva queue object */
struct pva_queue *queue;
/*! Holds engine affinity for command submission*/
uint32_t affinity;
};
/**
* @brief Function type for cuExtend register memory callback
*
* @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
* @param[in] mem The pointer to a \ref pva_memory object. This register memory callback shall transfer the
* ownership of the memory to the client, and it is client's responsibility to release the memory.
* @param[in] cuda_ptr CUDA device pointer.
* @param[in] cached_flags The cached flags for the memory.
* @return \ref pva_error The completion status of register memory operation.
*/
typedef enum pva_error (*pva_cuextend_memory_register)(void *callback_args,
struct pva_memory *mem,
void *cuda_ptr,
uint32_t cached_flags);
/**
* @brief Function type for cuExtend unregister memory callback.
*
* @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
* @param[in] cuda_ptr CUDA device pointer.
* @return \ref pva_error The completion status of unregister memory operation.
*/
typedef enum pva_error (*pva_cuextend_memory_unregister)(void *callback_args,
void *cuda_ptr);
/**
* @brief Function type for cuExtend register stream callback.
*
* @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
* @param[out] stream_payload Client data associated with a CUDA stream.
* @param[in] flags Reserved for future. Must set to 0.
* @return \ref pva_error The completion status of register stream operation.
*/
typedef enum pva_error (*pva_cuextend_stream_register)(void *callback_args,
void **stream_payload,
uint64_t flags);
/**
* @brief Function type for cuExtend unregister stream callback.
*
* @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
* @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register.
* @param[in] flags Reserved for future. Must set to 0.
* @return \ref pva_error The completion status of unregister stream operation.
*/
typedef enum pva_error (*pva_cuextend_stream_unregister)(void *callback_args,
void *stream_payload,
uint64_t flags);
/**
* @brief Function type for cuExtend acquire queue callback.
*
* @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
* @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register.
* @param[out] queue_data Output pointer to a pva_cuextend_queue_data object.
* @return \ref pva_error The completion status of acquire queue operation.
*/
typedef enum pva_error (*pva_cuextend_queue_acquire)(
void *callback_args, void *stream_payload,
struct pva_cuextend_queue_data **queue_data);
/**
* @brief Function type for cuExtend release queue callback.
*
* @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
* @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register.
* @return \ref pva_error The completion status of release queue operation.
*/
typedef enum pva_error (*pva_cuextend_queue_release)(void *callback_args,
void *stream_payload,
void *queue_data);
/**
* @brief Function type for retrieving error code from cuExtend.
*
* @param[in] teardown_ctx Pointer to the cuExtend context pointer.
*/
typedef enum pva_error (*pva_cuextend_get_error)(void *teardown_ctx);
/**
* @brief Function type for cuExtend teardown callback.
*
* It is expected that the client does the following necessary actions in this callback:
* Blocking wait for all pending tasks on all queues. In the wait loop, periodically check for CUDA error by calling \ref pva_cuextend_get_error,
* hop out then loop if there is an error.
*
* @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
* @param[in] teardown_ctx Pointer to a teardown context passed by cuExtend teardown callback.
* @param[in] get_error Function pointer to get CUDA error function.
* @return \ref pva_error The completion status of release queue operation.
*/
typedef enum pva_error (*pva_cuextend_teardown)(
void *callback_args, void *teardown_ctx,
pva_cuextend_get_error get_error);
/**
* @brief Structure for cuExtend callbacks provided by the caller during cuExtend initialization.
*/
struct pva_cuextend_callbacks {
/*! Holds the register memory callback */
pva_cuextend_memory_register mem_reg;
/*! Holds the unregister memory callback */
pva_cuextend_memory_unregister mem_unreg;
/*! Holds the register stream callback */
pva_cuextend_stream_register stream_reg;
/*! Holds the unregister stream callback */
pva_cuextend_stream_unregister stream_unreg;
/*! Holds the acquire queue callback */
pva_cuextend_queue_acquire queue_acquire;
/*! Holds the release queue callback */
pva_cuextend_queue_release queue_release;
/*! Holds the teardown callback */
pva_cuextend_teardown teardown;
/*! Pointer to the callback arguments provided by client during cuExtend initialization */
void *args;
};
/**
* @brief Initialize cuExtend context.
*
* This function must be called before any other cuExtend functions. It does the following:
*
* 1. Load cuExtend library and retrieves function pointers to the library's exported functions.
* 2. Add PVA to CUDA unified context model.
* 3. Initialize the opaque cuExtend impl pointer.
*
* @param[in] ctx Pointer to a PVA context object.
* @param[in] callbacks Pointer to CUDA interop callbacks.
* @return \ref pva_error The completion status of the initialization operation.
*/
enum pva_error pva_cuextend_init(struct pva_context *ctx,
struct pva_cuextend_callbacks *callbacks);
/**
* @brief De-initialize cuExtend context.
*
* This function must be called at the context destructor in the client. It does the following:
*
* 1. Clear the opaque cuExtend impl pointer in pva context object.
* 2. Remove PVA to from cuExtend context.
* 3. Unload cuExtend library and clear all the function pointers.
*
* @param[in] ctx Pointer to a PVA context object.
* @return \ref pva_error The completion status of the de-initialization operation.
*/
enum pva_error pva_cuextend_deinit(struct pva_context *ctx);
/**
* @brief Import a memory region from a CUDA context into a PVA context.
*
* @param[in] ctx Pointer to a PVA context structure.
* @param[in] cuda_ptr Pointer to CUDA memory provided by client.
* @param[in] size Size of the memory region.
* @param[in] access_type Access flag provided by client.
* @param[out] out_mem Pointer to the imported memory object.
* @param[out] cached_flags Output cached flags for the memory.
* @return \ref pva_error The completion status of the initialization operation.
*/
enum pva_error pva_cuextend_memory_import(struct pva_context *ctx,
void *cuda_ptr, uint64_t size,
uint32_t access_mode,
struct pva_memory **out_mem,
uint32_t *cached_flags);
/**
* @brief Submit a batch of command buffers via a CUDA stream.
*
* @param[in] queue Pointer to the queue. If queue is not NULL, this API will try to submit the client tasks to this queue directly.
* Otherwise, it will call queue_acquire callback to query a pva_queue object from stream payload, and then submit
* the tasks to the queried queue.
* @param[in] stream A CUDA stream.
* @param[in] submit_infos Array of submit info structures.
* @param[in] count Number of submit info structures.
* @param[in] timeout_ms Timeout in milliseconds. PVA_TIMEOUT_INF for infinite.
* @return \ref pva_error The completion status of the submit operation.
*
* @note Concurrent submission to the same queue needs to be serialized by the
* caller.
*/
enum pva_error
pva_cuextend_cmdbuf_batch_submit(struct pva_queue *queue, CUstream stream,
struct pva_cmdbuf_submit_info *submit_infos,
uint32_t count, uint64_t timeout_ms);
#ifdef __cplusplus
}
#endif
#endif // PVA_API_CUDA_H

View File

@@ -0,0 +1,343 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_API_DMA_H
#define PVA_API_DMA_H
#include "pva_api_types.h"
/** Bit indices for VPU GPIO triggers */
enum pva_gpio_bit {
GPIO_VPU_CFG_BIT = 4U,
GPIO_READ0_BIT = 16U,
GPIO_READ1_BIT = 17U,
GPIO_READ2_BIT = 18U,
GPIO_READ3_BIT = 19U,
GPIO_READ4_BIT = 20U,
GPIO_READ5_BIT = 21U,
GPIO_READ6_BIT = 22U,
GPIO_WRITE0_BIT = 23U,
GPIO_WRITE1_BIT = 24U,
GPIO_WRITE2_BIT = 25U,
GPIO_WRITE3_BIT = 26U,
GPIO_WRITE4_BIT = 27U,
GPIO_WRITE5_BIT = 28U,
GPIO_WRITE6_BIT = 29U
};
enum pva_dma_descriptor_id {
PVA_DMA_DESC_NONE = 0,
PVA_DMA_DESC0 = 1,
PVA_DMA_DESC1 = 2,
PVA_DMA_DESC2 = 3,
PVA_DMA_DESC3 = 4,
PVA_DMA_DESC4 = 5,
PVA_DMA_DESC5 = 6,
PVA_DMA_DESC6 = 7,
PVA_DMA_DESC7 = 8,
PVA_DMA_DESC8 = 9,
PVA_DMA_DESC9 = 10,
PVA_DMA_DESC10 = 11,
PVA_DMA_DESC11 = 12,
PVA_DMA_DESC12 = 13,
PVA_DMA_DESC13 = 14,
PVA_DMA_DESC14 = 15,
PVA_DMA_DESC15 = 16,
PVA_DMA_DESC16 = 17,
PVA_DMA_DESC17 = 18,
PVA_DMA_DESC18 = 19,
PVA_DMA_DESC19 = 20,
PVA_DMA_DESC20 = 21,
PVA_DMA_DESC21 = 22,
PVA_DMA_DESC22 = 23,
PVA_DMA_DESC23 = 24,
PVA_DMA_DESC24 = 25,
PVA_DMA_DESC25 = 26,
PVA_DMA_DESC26 = 27,
PVA_DMA_DESC27 = 28,
PVA_DMA_DESC28 = 29,
PVA_DMA_DESC29 = 30,
PVA_DMA_DESC30 = 31,
PVA_DMA_DESC31 = 32,
PVA_DMA_DESC32 = 33,
PVA_DMA_DESC33 = 34,
PVA_DMA_DESC34 = 35,
PVA_DMA_DESC35 = 36,
PVA_DMA_DESC36 = 37,
PVA_DMA_DESC37 = 38,
PVA_DMA_DESC38 = 39,
PVA_DMA_DESC39 = 40,
PVA_DMA_DESC40 = 41,
PVA_DMA_DESC41 = 42,
PVA_DMA_DESC42 = 43,
PVA_DMA_DESC43 = 44,
PVA_DMA_DESC44 = 45,
PVA_DMA_DESC45 = 46,
PVA_DMA_DESC46 = 47,
PVA_DMA_DESC47 = 48,
PVA_DMA_DESC48 = 49,
PVA_DMA_DESC49 = 50,
PVA_DMA_DESC50 = 51,
PVA_DMA_DESC51 = 52,
PVA_DMA_DESC52 = 53,
PVA_DMA_DESC53 = 54,
PVA_DMA_DESC54 = 55,
PVA_DMA_DESC55 = 56,
PVA_DMA_DESC56 = 57,
PVA_DMA_DESC57 = 58,
PVA_DMA_DESC58 = 59,
PVA_DMA_DESC59 = 60,
PVA_DMA_DESC60 = 61,
PVA_DMA_DESC61 = 62,
PVA_DMA_DESC62 = 63,
PVA_DMA_DESC63 = 64
};
/**
* The values of the enum members conform to the definitions of DMA descriptors'
* trig_vpu_events field. Therefore, they can be assigned to trig_vpu_events
* directly.
*/
enum pva_dma_trigger {
PVA_DMA_NO_TRIG = 0,
PVA_DMA_TRIG_READ0,
PVA_DMA_TRIG_WRITE0,
PVA_DMA_TRIG_VPU_CFG,
PVA_DMA_TRIG_READ1,
PVA_DMA_TRIG_WRITE1,
PVA_DMA_TRIG_READ2,
PVA_DMA_TRIG_WRITE2,
PVA_DMA_TRIG_READ3,
PVA_DMA_TRIG_WRITE3,
PVA_DMA_TRIG_READ4,
PVA_DMA_TRIG_WRITE4,
PVA_DMA_TRIG_READ5,
PVA_DMA_TRIG_WRITE5,
PVA_DMA_TRIG_READ6,
PVA_DMA_TRIG_WRITE6,
PVA_DMA_TRIG_HWSEQ_RD,
PVA_DMA_TRIG_HWSEQ_WR,
};
enum pva_dma_trigger_mode {
PVA_DMA_TRIG_MODE_DIS = 0,
PVA_DMA_TRIG_MODE_4TH_DIM,
PVA_DMA_TRIG_MODE_3RD_DIM,
PVA_DMA_TRIG_MODE_TILE
};
enum pva_dma_transfer_mode {
PVA_DMA_TRANS_MODE_INVALID = 0,
PVA_DMA_TRANS_MODE_DRAM = 1,
PVA_DMA_TRANS_MODE_VMEM = 2,
PVA_DMA_TRANS_MODE_L2SRAM = 3,
PVA_DMA_TRANS_MODE_TCM = 4,
/** MMIO is valid as dst in VPU config mode only */
PVA_DMA_TRANS_MODE_MMIO = 5,
PVA_DMA_TRANS_MODE_RSVD = 5,
/** VPU config mode, valid for src only */
PVA_DMA_TRANS_MODE_VPUCFG = 7
};
struct pva_dma_transfer_attr {
uint8_t rpt1;
uint8_t rpt2;
uint8_t rpt3;
uint8_t cb_enable;
uint8_t transfer_mode;
/** When dynamic slot flag is set, it means the memory location will be
* relocated by commands.
*/
#define PVA_DMA_DYNAMIC_SLOT (1 << 15)
#define PVA_DMA_STATIC_SLOT (1 << 14)
#define PVA_DMA_SLOT_INVALID 0
#define PVA_DMA_SLOT_ID_MASK 0xFF
#define PVA_DMA_MAX_NUM_SLOTS 256
uint16_t slot;
/** Line pitch in pixels */
uint16_t line_pitch;
uint32_t cb_start;
uint32_t cb_size;
int32_t adv1;
int32_t adv2;
int32_t adv3;
uint64_t offset;
};
struct pva_dma_descriptor {
/**
* Linked descriptor ID
*
* - 0: No linked descriptor
* - N (> 0): Linking to descriptor N - 1 in the descriptor array
*/
uint8_t link_desc_id;
uint8_t px;
uint8_t py;
/** enum pva_dma_trigger_mode */
uint8_t trig_event_mode;
/** Trigger from enum pva_dma_trigger */
uint8_t trig_vpu_events;
uint8_t desc_reload_enable;
/**
* Log2(number bytes per pixel).
*
* - 0: 1 byte per pixel
* - 1: 2 bytes per pixel
* - 2: 4 bytes per pixel
* - others: invalid
*/
uint8_t log2_pixel_size;
uint8_t px_direction;
uint8_t py_direction;
uint8_t boundary_pixel_extension;
/** TCM transfer size */
uint8_t tts;
/**
* - 0: transfer true completion disabled
* - 1: transfer true completion enabled
*/
uint8_t trans_true_completion;
uint8_t prefetch_enable;
uint16_t tx;
uint16_t ty;
uint16_t dst2_slot;
uint32_t dst2_offset;
struct pva_dma_transfer_attr src;
struct pva_dma_transfer_attr dst;
};
struct pva_dma_channel {
/**
* Starting descriptor index in the descriptor array
*
* Valid range is [0, max_num_descriptors - 1]. This is different from
* link_desc_id field, where 0 means no linked descriptor.
*/
uint8_t desc_index;
uint8_t vdb_count;
uint8_t vdb_offset;
uint8_t req_per_grant;
uint8_t prefetch_enable;
uint8_t ch_rep_factor;
uint8_t hwseq_enable;
uint8_t hwseq_traversal_order;
uint8_t hwseq_tx_select;
uint8_t hwseq_trigger_done;
uint8_t hwseq_frame_count;
uint8_t hwseq_con_frame_seq;
uint16_t hwseq_start;
uint16_t hwseq_end;
uint16_t adb_count;
uint16_t adb_offset;
/*!
* Holds the trigger signal this channel will react to.
*
* IAS:
* DMA_COMMON_DMA_OUTPUT_ENABLEn (4 Bytes)
*
* Mapping:
* chanId corresponding to this structure is allocated by KMD.
* DMA_COMMON_DMA_OUTPUT_ENABLE0.bit[chanId] = outputEnableMask.bit[0];
* DMA_COMMON_DMA_OUTPUT_ENABLE0.bit[16 + chanId] = outputEnableMask.bit[1];
* DMA_COMMON_DMA_OUTPUT_ENABLE1.bit[chanId] = outputEnableMask.bit[2];
* DMA_COMMON_DMA_OUTPUT_ENABLE1.bit[16 + chanId] = outputEnableMask.bit[3];
* DMA_COMMON_DMA_OUTPUT_ENABLE2.bit[chanId] = outputEnableMask.bit[4];
* DMA_COMMON_DMA_OUTPUT_ENABLE2.bit[16 + chanId] = outputEnableMask.bit[5];
* DMA_COMMON_DMA_OUTPUT_ENABLE3.bit[chanId] = outputEnableMask.bit[6];
* DMA_COMMON_DMA_OUTPUT_ENABLE3.bit[16 + chanId] = outputEnableMask.bit[7];
* DMA_COMMON_DMA_OUTPUT_ENABLE4.bit[chanId] = outputEnableMask.bit[8];
* DMA_COMMON_DMA_OUTPUT_ENABLE4.bit[16 + chanId] = outputEnableMask.bit[9];
* DMA_COMMON_DMA_OUTPUT_ENABLE5.bit[chanId] = outputEnableMask.bit[10];
* DMA_COMMON_DMA_OUTPUT_ENABLE5.bit[16 + chanId] = outputEnableMask.bit[11];
* DMA_COMMON_DMA_OUTPUT_ENABLE6.bit[chanId] = outputEnableMask.bit[12];
* DMA_COMMON_DMA_OUTPUT_ENABLE6.bit[16 + chanId] = outputEnableMask.bit[13];
* DMA_COMMON_DMA_OUTPUT_ENABLE7.bit[chanId] = outputEnableMask.bit[14];
* DMA_COMMON_DMA_OUTPUT_ENABLE8.bit[chanId] = outputEnableMask.bit[15];
* DMA_COMMON_DMA_OUTPUT_ENABLE8.bit[16 + chanId] = outputEnableMask.bit[16];
*/
uint32_t output_enable_mask;
uint32_t pad_value;
};
struct pva_dma_config_header {
/* In order to make efficient the allocation and tracking of DMA resources, DMA resources
* are allocated in groups. For example, descriptors may be allocated in groups of 4, which
* means that every allocation of descriptors will start at an alignment of 4. The following
* macros control the alignment/grouping requirement of DMA resources.
*/
// TODO: Add compile time asserts to ensure the following alignment requirments don't result
// in fractional resource partitions?
#define PVA_DMA_CHANNEL_ALIGNMENT 1
#define PVA_DMA_DESCRIPTOR_ALIGNMENT 4
#define PVA_DMA_ADB_ALIGNMENT 16
#define PVA_DMA_HWSEQ_WORD_ALIGNMENT 128
uint8_t base_channel;
uint8_t base_descriptor;
uint8_t num_channels;
uint8_t num_descriptors;
uint16_t num_static_slots;
uint16_t num_dynamic_slots;
uint16_t base_hwseq_word;
uint16_t num_hwseq_words;
uint32_t vpu_exec_resource_id;
/* For serialized version of pva_dma_config, the following fields follow
* immediately after this header. The starting addresses of these fields
* must be aligned to 8 bytes */
/* An array of hwseq words */
/* An array of pva_dma_channel */
/* An array of pva_dma_descriptor */
/* An array of pva_dma_slot_buffer */
};
enum pva_dma_static_binding_type {
PVA_DMA_STATIC_BINDING_INVALID = 0,
PVA_DMA_STATIC_BINDING_DRAM,
PVA_DMA_STATIC_BINDING_VMEM,
};
/** Max block height is 32 GOB */
#define PVA_DMA_MAX_LOG2_BLOCK_HEIGHT 5
struct pva_dma_dram_binding {
/** enum pva_surface_format */
uint8_t surface_format;
uint8_t log2_block_height;
uint32_t resource_id;
uint64_t surface_base_offset;
uint64_t slot_offset;
};
struct pva_dma_vmem_binding {
struct pva_vmem_addr addr;
};
struct pva_dma_static_binding {
/** enum pva_dma_static_binding_type */
uint8_t type;
union {
struct pva_dma_dram_binding dram;
struct pva_dma_vmem_binding vmem;
};
};
struct pva_dma_config {
struct pva_dma_config_header header;
uint32_t *hwseq_words;
struct pva_dma_channel *channels;
struct pva_dma_descriptor *descriptors;
struct pva_dma_static_binding *static_bindings;
};
#endif // PVA_API_DMA_H

View File

@@ -0,0 +1,202 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_API_NVSCI_H
#define PVA_API_NVSCI_H
#ifdef __cplusplus
extern "C" {
#endif
#include "pva_api_types.h"
#include "nvscibuf.h"
#include "nvscisync.h"
/**
* @brief Fill NvSciBuf attributes required by PVA.
*
* @param[out] scibuf_attr The NvSciBuf attribute list to be filled with PVA-specific attributes.
*/
enum pva_error pva_nvsci_buf_fill_attrs(NvSciBufAttrList scibuf_attr);
/**
* @brief Fill NvSciSync attributes required by PVA.
*
* @param[in] access_mode Access mode for the sync object, determining how PVA
* will interact with the sync object (read, write, etc.)
* @param[out] attr_list The NvSciSync attribute list to be populated with attributes.
*/
enum pva_error pva_nvsci_sync_fill_attrs(uint32_t access_mode,
NvSciSyncAttrList attr_list);
/**
* @brief Holds the metadata for a NvSci plane.
*/
struct pva_plane_attrs {
uint32_t line_pitch;
uint32_t width_in_bytes;
uint32_t height;
uint64_t offset;
};
#define PVA_SURFACE_ATTRS_MAX_NUM_PLANES 6U
/**
* @brief Holds the metadata for a NvSci surface.
*/
struct pva_surface_attrs {
bool is_surface;
enum pva_surface_format format;
uint32_t n_planes;
uint64_t size;
struct pva_plane_attrs planes[PVA_SURFACE_ATTRS_MAX_NUM_PLANES];
uint8_t log2_gobs_per_block_y[PVA_SURFACE_ATTRS_MAX_NUM_PLANES];
};
/**
* @brief Import an NvSciBuf object into PVA.
*
* This function imports an NvSciBuf buffer object into PVA for further
* operations. It creates a PVA memory object representing the buffer and
* retrieves surface information about the buffer.
*
* The caller is responsible for freeing the PVA memory object.
*
* @param[in] obj The NvSciBuf object to be imported.
* @param[in] access_mode Access mode for the buffer, determining the PVA's permissions for interaction.
* @param[out] out_obj A pointer to the PVA memory object representing the imported buffer.
* @param[out] out_surf_info Surface metadata of the buffer
*/
enum pva_error pva_nvsci_buf_import(NvSciBufObj obj, uint32_t access_mode,
struct pva_memory **out_obj,
struct pva_surface_attrs *out_surf_info);
/**
* @brief An opaque object representing an imported NvSciSync object.
*/
struct pva_nvsci_syncobj;
/**
* @brief Describes the attributes of an imported NvSciSync object.
*
* This structure contains details about the memory buffers associated with the
* imported NvSciSync object.
*/
struct pva_nvsci_syncobj_attrs {
struct pva_memory *
semaphore_buf; /**< Pointer to the semaphore memory buffer; NULL if syncpoints are used. */
struct pva_memory *
timestamp_buf; /**< Pointer to the timestamp memory buffer; NULL if unused. */
struct pva_memory
*status_buf; /**< Pointer to the status memory buffer. */
};
/**
* @brief Import an NvSciSync object into the PVA.
*
* This function imports an NvSciSync object into PVA, enabling it to be used
* for synchronization of operations.
*
* @param[in] ctx The PVA context in which the sync object is to be used.
* @param[in] nvsci_obj The NvSciSync object to be imported.
* @param[in] access_mode The access mode for the sync object, indicating how PVA will use it.
* @param[out] out_obj A pointer to the resulting PVA sync object handle.
*/
enum pva_error pva_nvsci_syncobj_import(struct pva_context *ctx,
NvSciSyncObj nvsci_obj,
uint32_t access_mode,
struct pva_nvsci_syncobj **out_obj);
/**
* @brief Retrieve the attributes of an imported NvSciSync object.
*
* This function fills in the provided attribute structure with details from
* the imported NvSciSync object, including information relevant for semaphores,
* timestamps, and status.
*
* @param[in] syncobj The NvSciSync object whose attributes are to be retrieved.
* @param[out] out_attrs The structure to be filled with the sync object's attributes.
*/
void pva_nvsci_syncobj_get_attrs(struct pva_nvsci_syncobj const *syncobj,
struct pva_nvsci_syncobj_attrs *out_attrs);
/**
* @brief Free an imported NvSciSync object.
*
* This function releases the resources associated with a PVA NvSciSync object,
* including PVA memory objects for semaphores, timestamps and statuses.
*
* @param[in] syncobj The PVA sync object to be freed.
*/
void pva_nvsci_syncobj_free(struct pva_nvsci_syncobj *syncobj);
/**
* @brief Get the next status slot for a new fence.
*
* @param[in] syncobj The imported NvSciSyncObj
* @param[out] out_status_slot The status slot index for the next fence.
*/
enum pva_error pva_nvsci_syncobj_next_status(struct pva_nvsci_syncobj *syncobj,
uint32_t *out_status_slot);
/**
* @brief Get the next timestamp slot for a new fence.
*
* @param[in] syncobj The imported NvSciSyncObj
* @param[out] out_timestamp_slot The timestamp slot index for the next fence.
*/
enum pva_error
pva_nvsci_syncobj_next_timestamp(struct pva_nvsci_syncobj *syncobj,
uint32_t *out_timestamp_slot);
/**
* @brief Fence data for import and export.
*/
struct pva_nvsci_fence_info {
uint32_t index; /**< The index of the fence. */
uint32_t value; /**< The value of the fence. */
uint32_t status_slot; /**< The slot index for the status. */
uint32_t timestamp_slot; /**< The slot index for the timestamp. */
};
/**
* @brief Import a NvSciSync fence into a PVA fence.
*
* @param[in] nvsci_fence The NvSciSync fence to be imported.
* @param[in] pva_syncobj The previously imported NvSciSyncObj that's associated with the fence.
* @param[out] out_fence_info The information about the NvSci fence. It can be used to fill a pva_fence.
*
* @note This function only fills the index and value field of the pva_fence.
* The user needs to set the semaphore resource ID if the sync object is a
* semaphore.
*
*/
enum pva_error
pva_nvsci_fence_import(NvSciSyncFence const *nvsci_fence,
struct pva_nvsci_syncobj const *pva_syncobj,
struct pva_nvsci_fence_info *out_fence_info);
/**
* @brief Export a PVA fence into an NvSciSync fence.
*
* @param[in] fence_info The information about the fence to be exported.
* @param[in] syncobj The previously imported NvSciSyncObj that's associated with the fence.
* @param[out] out_nvsci_fence The resulting NvSciSync fence object.
*/
enum pva_error
pva_nvsci_fence_export(struct pva_nvsci_fence_info const *fence_info,
struct pva_nvsci_syncobj const *syncobj,
NvSciSyncFence *out_nvsci_fence);
#ifdef __cplusplus
}
#endif
#endif // PVA_API_NVSCI_H

View File

@@ -0,0 +1,396 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_API_TYPES_H
#define PVA_API_TYPES_H
#if !defined(__KERNEL__)
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#define container_of(ptr, type, member) \
(type *)((char *)(ptr) - (char *)&((type *)0)->member)
#else
#include <linux/ioctl.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#define UINT64_MAX U64_MAX
#define UINT32_MAX U32_MAX
#endif
#ifndef NULL
#define NULL ((void *)0)
#endif
#define FOREACH_ERR(ACT) \
ACT(PVA_SUCCESS) \
ACT(PVA_UNKNOWN_ERROR) \
ACT(PVA_BAD_PARAMETER_ERROR) \
ACT(PVA_NOT_IMPL) \
ACT(PVA_NOENT) \
ACT(PVA_NOMEM) \
ACT(PVA_INVAL) \
ACT(PVA_TIMEDOUT) \
ACT(PVA_INTERNAL) \
ACT(PVA_CMDBUF_NOT_FOUND) \
ACT(PVA_CMDBUF_INVALID) \
ACT(PVA_CMDBUF_TOO_LARGE) \
ACT(PVA_RES_OUT_OF_RANGE) \
ACT(PVA_AGAIN) \
ACT(PVA_NO_RESOURCE_ID) \
ACT(PVA_INVALID_RESOURCE) \
ACT(PVA_INVALID_RESOURCE_SIZE) \
ACT(PVA_INVALID_RESOURCE_ALIGNMENT) \
ACT(PVA_QUEUE_FULL) \
ACT(PVA_INVALID_IOVA) \
ACT(PVA_NO_PERM) \
ACT(PVA_INVALID_CMD_OPCODE) \
ACT(PVA_BUF_OUT_OF_RANGE) \
ACT(PVA_CMDBUF_NO_BEGIN) \
ACT(PVA_NO_CCQ) \
ACT(PVA_INPUT_STATUS_ERROR) \
ACT(PVA_ENOSPC) \
ACT(PVA_EACCES) \
ACT(PVA_ERANGE) \
ACT(PVA_BAD_SURFACE_BASE_ALIGNMENT) \
ACT(PVA_BAD_DESC_ADDR_ALIGNMENT) \
ACT(PVA_INVALID_DMA_CONFIG) \
ACT(PVA_INVALID_SYMBOL) \
ACT(PVA_INVALID_BINDING) \
ACT(PVA_EINTR) \
ACT(PVA_FILL_NVSCIBUF_ATTRS_FAILED) \
ACT(PVA_NVSCIBUF_SET_ATTR_FAILED) \
ACT(PVA_IMPORT_FROM_NVSCIBUF_FAILED) \
ACT(PVA_NVSCISYNC_SET_ATTR_FAILED) \
ACT(PVA_RETRIEVE_DATA_FROM_NVSCISYNC_FAILED) \
ACT(PVA_UPDATE_DATA_TO_NVSCISYNC_FAILED) \
ACT(PVA_UNSUPPORTED_NVSCISYNC_TIMESTAMP_FORMAT) \
ACT(PVA_INVALID_NVSCISYNC_FENCE) \
ACT(PVA_ERR_CMD_NOT_SUPPORTED) \
ACT(PVA_CUDA_INITIALIZED) \
ACT(PVA_CUDA_LOAD_LIBRARY_FAILED) \
ACT(PVA_CUDA_ADD_CLIENT_FAILED) \
ACT(PVA_CUDA_REMOVE_CLIENT_FAILED) \
ACT(PVA_CUDA_INIT_FAILED) \
ACT(PVA_CUDA_SUBMIT_FAILED) \
ACT(PVA_CUDA_GET_RM_HANDLE_FAILED) \
ACT(PVA_CUDA_INTERNAL_ERROR) \
ACT(PVA_ERR_CMD_INVALID_VPU_STATE) \
ACT(PVA_ERR_CMD_VMEM_BUF_OUT_OF_RANGE) \
ACT(PVA_ERR_CMD_L2SRAM_BUF_OUT_OF_RANGE) \
ACT(PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE) \
ACT(PVA_ERR_CMD_INVALID_BLOCK_HEIGHT) \
ACT(PVA_ERR_CMD_PAYLOAD_TOO_SMALL) \
ACT(PVA_ERR_CMD_ENGINE_NOT_ACQUIRED) \
ACT(PVA_ERR_CMD_INVALID_SYMBOL_TYPE) \
ACT(PVA_ERR_CMD_INVALID_ENGINE) \
ACT(PVA_ERR_CMD_INVALID_DMA_SET_ID) \
ACT(PVA_ERR_CMD_INVALID_DMA_SLOT_ID) \
ACT(PVA_ERR_CMD_INVALID_DMA_SLOT_TYPE) \
ACT(PVA_ERR_CMD_INVALID_USER_ALLOWANCE) \
ACT(PVA_ERR_CMD_INCOMPATIBLE_RESOURCE) \
ACT(PVA_ERR_CMD_INSUFFICIENT_PRIVILEGE) \
ACT(PVA_ERR_CMD_INVALID_BARRIER_ID) \
ACT(PVA_ERR_CMD_CAPTURE_SLOTS_EXCEEDED) \
ACT(PVA_ERR_CMD_INVALID_CAPTURE_MODE) \
ACT(PVA_ERR_CMD_INVALID_L2SRAM_POLICY) \
ACT(PVA_ERR_FW_DMA0_IRQ_ENABLE_FAILED) \
ACT(PVA_ERR_FW_DMA1_IRQ_ENABLE_FAILED) \
ACT(PVA_ERR_FW_BAD_DMA_STATE) \
ACT(PVA_ERR_FW_RESOURCE_IN_USE) \
ACT(PVA_ERR_FW_VPU_ERROR_STATE) \
ACT(PVA_ERR_FW_VPU_RETCODE_NONZERO) \
ACT(PVA_ERR_FW_INVALID_CMD_OPCODE) \
ACT(PVA_ERR_FW_INVALID_VPU_CMD_SEQ) \
ACT(PVA_ERR_FW_INVALID_DMA_CMD_SEQ) \
ACT(PVA_ERR_FW_INVALID_L2SRAM_CMD_SEQ) \
ACT(PVA_ERR_FW_ENGINE_NOT_RELEASED) \
ACT(PVA_ERR_FW_UTEST) \
ACT(PVA_ERR_VPU_ERROR_STATE) \
ACT(PVA_ERR_VPU_RETCODE_NONZERO) \
ACT(PVA_ERR_VPU_ILLEGAL_INSTR) \
ACT(PVA_ERR_VPU_DIVIDE_BY_0) \
ACT(PVA_ERR_VPU_FP_NAN) \
ACT(PVA_ERR_VPU_IN_DEBUG) \
ACT(PVA_ERR_VPU_DLUT_CFG) \
ACT(PVA_ERR_VPU_DLUT_MISS) \
ACT(PVA_ERR_VPU_CP_ACCESS) \
ACT(PVA_ERR_PPE_ILLEGAL_INSTR) \
ACT(PVA_ERR_MATH_OP) \
ACT(PVA_ERR_HWSEQ_INVALID) \
ACT(PVA_ERR_CODE_COUNT)
enum pva_error {
#define ADD_COMMA(name) name,
FOREACH_ERR(ADD_COMMA)
#undef ADD_COMMA
};
enum pva_chip_id {
PVA_CHIP_T19X,
PVA_CHIP_T23X,
PVA_CHIP_T26X,
PVA_CHIP_OTHERS
};
enum pva_hw_gen {
PVA_HW_GEN1,
PVA_HW_GEN2,
PVA_HW_GEN3,
};
/* Opaque API data types */
struct pva_context;
struct pva_queue;
struct pva_memory;
struct pva_memory_attrs {
uint32_t access_mode;
uint64_t offset;
uint64_t size;
};
/**
* @brief A memory address accessible by PVA.
*/
struct pva_dram_addr {
uint32_t resource_id;
uint64_t offset;
};
struct pva_vmem_addr {
uint32_t symbol_id;
uint32_t offset;
};
/**
* @brief Represents a synchronization fence, which can be associated with
* either a memory semaphore or a syncpoint for signaling or waiting operations.
*
* The UMD handles semaphores and syncpoints differently when used as
* postfences:
* - Semaphores: UMD does not track future values.
* - Syncpoints: UMD tracks future values.
*
* To use semaphore for either prefences and postfences:
* - Set `semaphore_resource_id` to the resource ID of the memory backing the semaphore.
* - Set `index` to the byte offset divided by the semaphore size (`sizeof(uint32_t)`).
* - Set `value` to the semaphore's signaling or waiting value.
*
* To use syncpoint for prefences:
* - Set `semaphore_resource_id` to `PVA_RESOURCE_ID_INVALID`.
* - Set `index` to the syncpoint ID to wait for.
* - Set `value` to the waiting value.
*
* To use syncpoint for postfences:
* - Set `semaphore_resource_id` to `PVA_RESOURCE_ID_INVALID`.
* - Do not set `index` or `value`.
* - After submission, UMD will assign `index` to the queue syncpoint ID and `value` to the expected future value.
*/
struct pva_fence {
/** Resource ID of the memory semaphore. If resource ID is
* PVA_RESOURCE_ID_INVALID, then the sync object primitive is assumed to
* be syncpoint. */
uint32_t semaphore_resouce_id;
/** Represents either the semaphore index or the syncpoint ID, depending
* on the sync object primitive type.
*/
uint32_t index;
/** Represents the semaphore or syncpoint value used for signaling or
* waiting. */
uint32_t value;
};
struct pva_fw_vpu_ptr_symbol {
uint64_t base;
uint64_t offset;
uint64_t size;
};
struct pva_fw_vpu_legacy_ptr_symbol {
uint64_t base;
uint32_t offset;
uint32_t size;
};
enum pva_surface_format {
PVA_SURF_FMT_PITCH_LINEAR = 0,
PVA_SURF_FMT_BLOCK_LINEAR
};
enum pva_memory_segment {
/** Memory segment directly reachable by R5. Command buffer chunk
* memories need to be allocated from this segment */
PVA_MEMORY_SEGMENT_R5 = 1,
/** Memory segment reachable only by DMA. User buffers should be
* allocated from this segment */
PVA_MEMORY_SEGMENT_DMA = 2,
};
enum pva_symbol_type {
/*! Specifies the an invalid symbol type */
PVA_SYM_TYPE_INVALID = 0,
/*! Specifies a data symbol */
PVA_SYM_TYPE_DATA,
/*! Specifies a VPU config table symbol */
PVA_SYM_TYPE_VPUC_TABLE,
/*! Specifies a Pointer symbol */
PVA_SYM_TYPE_POINTER,
/*! Specifies a System symbol */
PVA_SYM_TYPE_SYSTEM,
/*! Specifies an extended Pointer symbol */
PVA_SYM_TYPE_POINTER_EX,
PVA_SYM_TYPE_MAX,
};
/**
* \brief Holds PVA Sync Client Type.
* Currently NvSciSync supports NvSciSyncFences with syncpoint primitive type only.
*/
enum pva_sync_client_type {
/*! For a given SyncObj PVA acts as a signaler. This type corresponds to
* postfences from PVA. */
PVA_SYNC_CLIENT_TYPE_SIGNALER,
/*! For a given SyncObj PVA acts as a waiter. This type corresponds to
* prefences to PVA. */
PVA_SYNC_CLIENT_TYPE_WAITER,
/*! For a given SyncObj PVA acts as both signaler and waiter. */
PVA_SYNC_CLIENT_TYPE_SIGNALER_WAITER,
/*! Specifies the non inclusive upper bound of valid values. */
PVA_SYNC_CLIENT_TYPE_MAX,
/*! Reserved bound of valid values. */
PVA_SYNC_CLIENT_TYPE_RESERVED = 0x7FFFFFFF,
};
#define PVA_SYMBOL_ID_INVALID 0U
#define PVA_SYMBOL_ID_BASE 1U
#define PVA_MAX_SYMBOL_NAME_LEN 64U
struct pva_symbol_info {
char name[PVA_MAX_SYMBOL_NAME_LEN + 1U];
enum pva_symbol_type symbol_type;
uint32_t size;
uint32_t vmem_addr;
/** Symbol ID local to this executable */
uint32_t symbol_id; /*< Starting from PVA_SYMBOL_ID_BASE */
};
#define PVA_RESOURCE_ID_INVALID 0U
#define PVA_RESOURCE_ID_BASE 1U
struct pva_resource_entry {
#define PVA_RESOURCE_TYPE_INVALID 0U
#define PVA_RESOURCE_TYPE_DRAM 1U
#define PVA_RESOURCE_TYPE_EXEC_BIN 2U
#define PVA_RESOURCE_TYPE_DMA_CONFIG 3U
uint8_t type;
uint8_t smmu_context_id;
uint8_t addr_hi;
uint8_t size_hi;
uint32_t addr_lo;
uint32_t size_lo;
};
/** \brief Maximum number of queues per context */
#define PVA_MAX_QUEUES_PER_CONTEXT (8)
/** \brief Specifies the memory is GPU CACHED. */
#define PVA_GPU_CACHED_MEMORY (1u << 1u)
#define PVA_ACCESS_RO (1U << 0) /**< Read only access */
#define PVA_ACCESS_WO (1U << 1) /**< Write only access */
#define PVA_ACCESS_RW \
(PVA_ACCESS_RO | PVA_ACCESS_WO) /**< Read and write access */
#define PVA_TIMEOUT_INF UINT64_MAX /**< Infinite timeout */
#define PVA_MAX_NUM_INPUT_STATUS 2 /**< Maximum number of input statuses */
#define PVA_MAX_NUM_OUTPUT_STATUS 2 /**< Maximum number of output statuses */
#define PVA_MAX_NUM_PREFENCES 2 /**< Maximum number of pre-fences */
#define PVA_MAX_NUM_POSTFENCES 2 /**< Maximum number of post-fences */
/** Maximum number of timestamps */
#define PVA_MAX_NUM_TIMESTAMPS PVA_MAX_NUM_POSTFENCES
struct pva_cmdbuf_submit_info {
uint8_t num_prefences;
uint8_t num_postfences;
uint8_t num_input_status;
uint8_t num_output_status;
uint8_t num_timestamps;
#define PVA_ENGINE_AFFINITY_NONE 0
#define PVA_ENGINE_AFFINITY_ENGINE0 (1 << 0)
#define PVA_ENGINE_AFFINITY_ENGINE1 (1 << 1)
#define PVA_ENGINE_AFFINITY_ANY \
(PVA_ENGINE_AFFINITY_ENGINE0 | PVA_ENGINE_AFFINITY_ENGINE1)
uint8_t engine_affinity;
/** Size of the first chunk */
uint16_t first_chunk_size;
/** Resource ID of the first chunk */
uint32_t first_chunk_resource_id;
/** Offset of the first chunk within the resource */
uint64_t first_chunk_offset;
#define PVA_EXEC_TIMEOUT_REUSE 0xFFFFFFFFU
#define PVA_EXEC_TIMEOUT_INF 0U
/** Execution Timeout */
uint32_t execution_timeout_ms;
struct pva_fence prefences[PVA_MAX_NUM_PREFENCES];
struct pva_fence postfences[PVA_MAX_NUM_POSTFENCES];
struct pva_dram_addr input_statuses[PVA_MAX_NUM_INPUT_STATUS];
struct pva_dram_addr output_statuses[PVA_MAX_NUM_OUTPUT_STATUS];
struct pva_dram_addr timestamps[PVA_MAX_NUM_TIMESTAMPS];
};
struct pva_ops_buffer {
void *base; /**< Buffer holding a list of async operations */
uint32_t offset; /**< First unused byte in the buffer */
uint32_t size; /**< Size of the buffer */
};
struct pva_cmdbuf_status {
/** Timestamp reflecting when the status was updated. This is in resolution of ns */
uint64_t timestamp;
/** Additional status information for the engine state */
uint32_t info32;
/** Additional status information for the engine state */
uint16_t info16;
/** Error code. Type: enum pva_error */
uint16_t status;
};
/** \brief Holds the PVA capabilities. */
struct pva_characteristics {
/*! Holds the number of PVA engines. */
uint32_t pva_engine_count;
/*! Holds the number of VPUs per PVA engine. */
uint32_t pva_pve_count;
/*! Holds the PVA generation information */
enum pva_hw_gen hw_version;
uint16_t max_desc_count;
uint16_t max_ch_count;
uint16_t max_adb_count;
uint16_t max_hwseq_word_count;
uint16_t max_vmem_region_count;
uint16_t reserved_desc_start;
uint16_t reserved_desc_count;
uint16_t reserved_adb_start;
uint16_t reserved_adb_count;
};
enum pva_error_inject_codes {
PVA_ERR_INJECT_WDT_HW_ERR, // watchdog Hardware error
PVA_ERR_INJECT_WDT_TIMEOUT, // watchdog Timeout error
};
/*
* !!!! DO NOT MODIFY !!!!!!
* These values are defined as per DriveOS guidelines
*/
#define PVA_INPUT_STATUS_SUCCESS (0)
#define PVA_INPUT_STATUS_INVALID (0xFFFF)
#endif // PVA_API_TYPES_H

View File

@@ -0,0 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_API_VPU_H
#define PVA_API_VPU_H
#include "pva_api_types.h"
/**
* @brief Information of the VPU instance data passed to VPU kernel.
*/
struct pva_vpu_instance_data {
/** @brief ID of the VPU assigned to the task */
uint16_t engine_id;
/** @brief Variable to indicate that ppe task was launched or not */
uint16_t ppe_task_launched;
/** @brief Base of the VMEM memory */
uint32_t vmem_base;
/** @brief Base of the DMA descriptor SRAM memory */
uint32_t dma_descriptor_base;
/** @brief Base of L2SRAM allocated for the task executed */
uint32_t l2ram_base;
/** @brief Size of L2SRAM allocated for the task executed */
uint32_t l2ram_size;
};
#endif // PVA_API_VPU_H

View File

@@ -0,0 +1,125 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_block_allocator.h"
#include "pva_kmd_utils.h"
#include "pva_api.h"
#define INVALID_ID 0xFFFFFFFF
enum pva_error
pva_kmd_block_allocator_init(struct pva_kmd_block_allocator *allocator,
void *block_mem, uint32_t base_id,
uint32_t block_size, uint32_t max_num_blocks)
{
enum pva_error err = PVA_SUCCESS;
allocator->free_slot_head = INVALID_ID;
allocator->next_free_slot = 0;
allocator->max_num_blocks = max_num_blocks;
allocator->block_size = block_size;
allocator->base_id = base_id;
allocator->blocks = block_mem;
allocator->slot_in_use = pva_kmd_zalloc(
sizeof(*allocator->slot_in_use) * max_num_blocks);
if (!allocator->slot_in_use) {
err = PVA_NOMEM;
goto err_out;
}
return PVA_SUCCESS;
err_out:
return err;
}
void pva_kmd_block_allocator_deinit(struct pva_kmd_block_allocator *allocator)
{
pva_kmd_free(allocator->slot_in_use);
}
static inline void *get_block(struct pva_kmd_block_allocator *allocator,
uint32_t slot)
{
uintptr_t base = (uintptr_t)allocator->blocks;
uintptr_t addr = base + (slot * allocator->block_size);
return (void *)addr;
}
static inline uint32_t next_slot(struct pva_kmd_block_allocator *allocator,
uint32_t slot)
{
uint32_t *next = (uint32_t *)get_block(allocator, slot);
return *next;
}
void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator,
uint32_t *out_id)
{
void *block = NULL;
uint32_t slot = INVALID_ID;
if (allocator->free_slot_head != INVALID_ID) {
slot = allocator->free_slot_head;
allocator->free_slot_head =
next_slot(allocator, allocator->free_slot_head);
} else {
if (allocator->next_free_slot < allocator->max_num_blocks) {
slot = allocator->next_free_slot;
allocator->next_free_slot++;
} else {
goto err_out;
}
}
allocator->slot_in_use[slot] = true;
*out_id = slot + allocator->base_id;
block = get_block(allocator, slot);
return block;
err_out:
return NULL;
}
static bool is_slot_valid(struct pva_kmd_block_allocator *allocator,
uint32_t slot)
{
if (slot >= allocator->max_num_blocks) {
return false;
}
return allocator->slot_in_use[slot];
}
void *pva_kmd_get_block(struct pva_kmd_block_allocator *allocator, uint32_t id)
{
uint32_t slot = id - allocator->base_id;
if (!is_slot_valid(allocator, slot)) {
return NULL;
}
return get_block(allocator, slot);
}
enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator,
uint32_t id)
{
uint32_t slot = id - allocator->base_id;
uint32_t *next;
if (!is_slot_valid(allocator, slot)) {
return PVA_INVAL;
}
allocator->slot_in_use[slot] = false;
next = (uint32_t *)get_block(allocator, slot);
*next = allocator->free_slot_head;
allocator->free_slot_head = slot;
return PVA_SUCCESS;
}

View File

@@ -0,0 +1,50 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_BLOCK_ALLOCATOR_H
#define PVA_KMD_BLOCK_ALLOCATOR_H
#include "pva_api.h"
struct pva_kmd_block_allocator {
uint32_t free_slot_head;
uint32_t base_id;
uint32_t max_num_blocks;
uint32_t next_free_slot;
uint32_t block_size;
void *blocks;
bool *slot_in_use;
};
enum pva_error
pva_kmd_block_allocator_init(struct pva_kmd_block_allocator *allocator,
void *chunk_mem, uint32_t base_id,
uint32_t chunk_size, uint32_t max_num_chunks);
void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator,
uint32_t *out_id);
static inline void *
pva_kmd_zalloc_block(struct pva_kmd_block_allocator *allocator,
uint32_t *out_id)
{
void *ptr = pva_kmd_alloc_block(allocator, out_id);
if (ptr != NULL) {
memset(ptr, 0, allocator->block_size);
}
return ptr;
}
void *pva_kmd_get_block(struct pva_kmd_block_allocator *allocator, uint32_t id);
enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator,
uint32_t id);
void pva_kmd_block_allocator_deinit(struct pva_kmd_block_allocator *allocator);
#endif // PVA_KMD_BLOCK_ALLOCATOR_H

View File

@@ -0,0 +1,280 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_cmdbuf.h"
#include "pva_api_cmdbuf.h"
#include "pva_kmd_utils.h"
#include "pva_math_utils.h"
#define CHUNK_STATE_INVALID 0
#define CHUNK_STATE_FENCE_TRIGGERED 1
static uint32_t *
get_chunk_states(struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool)
{
return (uint32_t *)pva_offset_pointer(
cmdbuf_chunk_pool->mem_base_va,
cmdbuf_chunk_pool->chunk_states_offset);
}
static void *get_chunk(struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool,
uint32_t chunk_id)
{
return pva_offset_pointer(cmdbuf_chunk_pool->mem_base_va,
cmdbuf_chunk_pool->chunk_size * chunk_id);
}
static uint32_t get_chunk_id_from_res_offset(
struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, uint64_t offset)
{
ASSERT(offset >= cmdbuf_chunk_pool->mem_offset);
offset -= cmdbuf_chunk_pool->mem_offset;
return offset / cmdbuf_chunk_pool->chunk_size;
}
enum pva_error pva_kmd_cmdbuf_chunk_pool_init(
struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool,
uint32_t mem_resource_id, uint64_t mem_offset, uint32_t mem_size,
uint16_t chunk_size, uint32_t num_chunks, void *mem_base_va)
{
uint32_t *chunk_states;
uint32_t i;
enum pva_error err;
ASSERT(mem_size >= pva_kmd_cmdbuf_pool_get_required_mem_size(
chunk_size, num_chunks));
cmdbuf_chunk_pool->mem_resource_id = mem_resource_id;
cmdbuf_chunk_pool->mem_offset = mem_offset;
cmdbuf_chunk_pool->mem_size = mem_size;
cmdbuf_chunk_pool->chunk_size = chunk_size;
cmdbuf_chunk_pool->num_chunks = num_chunks;
cmdbuf_chunk_pool->mem_base_va = mem_base_va;
cmdbuf_chunk_pool->chunk_states_offset = chunk_size * num_chunks;
chunk_states = get_chunk_states(cmdbuf_chunk_pool);
for (i = 0; i < num_chunks; i++) {
chunk_states[i] = CHUNK_STATE_INVALID;
}
err = pva_kmd_block_allocator_init(&cmdbuf_chunk_pool->block_allocator,
mem_base_va, 0, chunk_size,
num_chunks);
return err;
}
void pva_kmd_cmdbuf_chunk_pool_deinit(struct pva_kmd_cmdbuf_chunk_pool *pool)
{
pva_kmd_block_allocator_deinit(&pool->block_allocator);
}
void pva_kmd_free_linked_cmdbuf_chunks(struct pva_kmd_cmdbuf_chunk_pool *pool,
uint32_t chunk_id)
{
struct pva_cmd_link_chunk *begin;
uint32_t *chunk_states;
uint64_t offset;
uint32_t resource_id;
chunk_states = get_chunk_states(pool);
while (true) {
begin = get_chunk(pool, chunk_id);
chunk_states[chunk_id] = CHUNK_STATE_INVALID;
offset = assemble_addr(begin->next_chunk_offset_hi,
begin->next_chunk_offset_lo);
resource_id = begin->next_chunk_resource_id;
pva_kmd_free_block(&pool->block_allocator, chunk_id);
if (resource_id == PVA_RESOURCE_ID_INVALID) {
break;
}
ASSERT(resource_id == pool->mem_resource_id);
/* Free next chunk */
chunk_id = get_chunk_id_from_res_offset(pool, offset);
}
}
static bool recycle_chunks(struct pva_kmd_cmdbuf_chunk_pool *pool)
{
uint32_t *chunk_states;
uint32_t i;
bool freed = false;
chunk_states = get_chunk_states(pool);
for (i = 0; i < pool->num_chunks; i++) {
if (chunk_states[i] == CHUNK_STATE_FENCE_TRIGGERED) {
pva_kmd_free_linked_cmdbuf_chunks(pool, i);
freed = true;
break;
}
}
return freed;
}
enum pva_error
pva_kmd_alloc_cmdbuf_chunk(struct pva_kmd_cmdbuf_chunk_pool *pool,
uint32_t *out_chunk_id)
{
enum pva_error err = PVA_SUCCESS;
void *chunk;
chunk = pva_kmd_alloc_block(&pool->block_allocator, out_chunk_id);
if (chunk == NULL) {
if (recycle_chunks(pool)) {
chunk = pva_kmd_alloc_block(&pool->block_allocator,
out_chunk_id);
ASSERT(chunk != NULL);
} else {
err = PVA_NOMEM;
}
}
return err;
}
void pva_kmd_get_free_notifier_fence(struct pva_kmd_cmdbuf_chunk_pool *pool,
uint32_t chunk_id,
struct pva_fw_postfence *fence)
{
uint64_t offset_sum =
safe_addu64(pool->mem_offset, pool->chunk_states_offset);
uint64_t chunk_size =
(uint64_t)safe_mulu32((uint32_t)sizeof(uint32_t), chunk_id);
uint64_t state_offset = safe_addu64(offset_sum, chunk_size);
memset(fence, 0, sizeof(*fence));
fence->resource_id = pool->mem_resource_id;
fence->offset_lo = iova_lo(state_offset);
fence->offset_hi = iova_hi(state_offset);
fence->value = CHUNK_STATE_FENCE_TRIGGERED;
fence->ts_resource_id = PVA_RESOURCE_ID_INVALID;
}
static void *current_cmd(struct pva_kmd_cmdbuf_builder *builder)
{
return pva_offset_pointer(
pva_kmd_get_cmdbuf_chunk_va(builder->pool,
builder->current_chunk_id),
builder->current_chunk_offset);
}
static void begin_chunk(struct pva_kmd_cmdbuf_builder *builder)
{
struct pva_cmd_link_chunk *cmd = pva_kmd_get_cmdbuf_chunk_va(
builder->pool, builder->current_chunk_id);
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_LINK_CHUNK;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->next_chunk_resource_id = PVA_RESOURCE_ID_INVALID;
builder->current_chunk_offset = sizeof(*cmd);
}
static void end_chunk(struct pva_kmd_cmdbuf_builder *builder)
{
/* Size of this chunk is now known. Update the header of the previous chunk. */
*builder->chunk_size_ptr = builder->current_chunk_offset;
}
static void link_chunk(struct pva_kmd_cmdbuf_builder *builder,
uint32_t new_chunk_id)
{
struct pva_cmd_link_chunk *old_link;
uint64_t new_chunk_offset;
old_link = (struct pva_cmd_link_chunk *)pva_kmd_get_cmdbuf_chunk_va(
builder->pool, builder->current_chunk_id);
new_chunk_offset = pva_kmd_get_cmdbuf_chunk_res_offset(builder->pool,
new_chunk_id);
old_link->next_chunk_resource_id = builder->pool->mem_resource_id;
old_link->next_chunk_offset_lo = iova_lo(new_chunk_offset);
old_link->next_chunk_offset_hi = iova_hi(new_chunk_offset);
/* The new chunk size is still unknown. We record the pointer here. */
builder->chunk_size_ptr = &old_link->next_chunk_size;
}
void *pva_kmd_reserve_cmd_space(struct pva_kmd_cmdbuf_builder *builder,
uint16_t size)
{
uint16_t max_size;
enum pva_error err;
void *cmd_start;
max_size = safe_subu16(builder->pool->chunk_size,
(uint16_t)sizeof(struct pva_cmd_link_chunk));
ASSERT(size <= max_size);
if ((builder->current_chunk_offset + size) >
builder->pool->chunk_size) {
/* Not enough space in the current chunk. Allocate a new one. */
uint32_t new_chunk_id;
err = pva_kmd_alloc_cmdbuf_chunk(builder->pool, &new_chunk_id);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("No more chunk in the pool");
goto err_out;
}
end_chunk(builder);
link_chunk(builder, new_chunk_id);
builder->current_chunk_id = new_chunk_id;
builder->current_chunk_offset = 0;
begin_chunk(builder);
}
cmd_start = current_cmd(builder);
(void)memset(cmd_start, 0, size);
builder->current_chunk_offset += size;
return cmd_start;
err_out:
return NULL;
}
enum pva_error
pva_kmd_cmdbuf_builder_init(struct pva_kmd_cmdbuf_builder *builder,
struct pva_kmd_cmdbuf_chunk_pool *chunk_pool)
{
enum pva_error err = PVA_SUCCESS;
uint32_t const min_chunk_size = sizeof(struct pva_cmd_link_chunk);
ASSERT(chunk_pool->chunk_size >= min_chunk_size);
builder->pool = chunk_pool;
err = pva_kmd_alloc_cmdbuf_chunk(chunk_pool,
&builder->current_chunk_id);
if (err != PVA_SUCCESS) {
goto err_out;
}
builder->current_chunk_offset = 0;
builder->first_chunk_size = 0;
builder->first_chunk_id = builder->current_chunk_id;
builder->chunk_size_ptr = &builder->first_chunk_size;
begin_chunk(builder);
return PVA_SUCCESS;
err_out:
return err;
}
void pva_kmd_cmdbuf_builder_finalize(struct pva_kmd_cmdbuf_builder *builder,
uint32_t *out_first_chunk_id,
uint16_t *out_first_chunk_size)
{
end_chunk(builder);
*out_first_chunk_id = builder->first_chunk_id;
*out_first_chunk_size = builder->first_chunk_size;
}
void pva_kmd_cmdbuf_builder_cancel(struct pva_kmd_cmdbuf_builder *builder)
{
pva_kmd_free_linked_cmdbuf_chunks(builder->pool,
builder->first_chunk_id);
}

View File

@@ -0,0 +1,265 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_CMDBUF_H
#define PVA_KMD_CMDBUF_H
#include "pva_fw.h"
#include "pva_resource.h"
#include "pva_kmd_block_allocator.h"
#include "pva_kmd_mutex.h"
#include "pva_api_cmdbuf.h"
#include "pva_utils.h"
#include "pva_math_utils.h"
struct pva_kmd_queue;
/**
* A fixed-size pool of command buffer chunks.
*
* We can allocate chunks from this pool. When submitting the chunks, we should
* request a post fence from the pool for the first chunk. When the post fence
* is triggered, the chain of chunks will be considered free by the pool.
*/
struct pva_kmd_cmdbuf_chunk_pool {
uint16_t chunk_size;
uint32_t num_chunks;
uint32_t mem_resource_id;
uint64_t mem_size;
uint64_t mem_offset; /**< Starting offset in the resource that can be
* used by this pool */
uint64_t chunk_states_offset;
void *mem_base_va;
struct pva_kmd_block_allocator block_allocator;
};
static inline uint64_t
pva_kmd_cmdbuf_pool_get_required_mem_size(uint16_t chunk_size,
uint32_t num_chunks)
{
/* Add storage required for free notifier fences */
return (chunk_size + sizeof(uint32_t)) * num_chunks;
}
/**
* Initialize the chunk pool.
*
* @param[out] Pointer to the pool.
*
* @param[in] mem_resource_id Resource ID of the memory to be used for the pool.
*
* @param[in] mem_offset Offset of the memory to be used for the pool.
* @param[in] mem_size Size of the memory to be used for the pool.
*
* @param[in] chunk_size Size of each chunk in the pool.
*
* @param[in] num_chunks Number of chunks in the pool.
*
* @param[in] mem_base_va Virtual address of the memory to be used for the pool.
* The virtual address is the base address of the resource.
*/
enum pva_error pva_kmd_cmdbuf_chunk_pool_init(
struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool,
uint32_t mem_resource_id, uint64_t mem_offset, uint32_t mem_size,
uint16_t chunk_size, uint32_t num_chunks, void *mem_base_va);
void pva_kmd_cmdbuf_chunk_pool_deinit(
struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool);
/**
* Allocate a chunk from the pool.
*
* If the chunk is submitted, then free will be done automatically when
* free-notifier fence is triggered.
*/
enum pva_error
pva_kmd_alloc_cmdbuf_chunk(struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool,
uint32_t *out_chunk_id);
/**
* Free a linked list of chunks.
*
* We only need to call this function if we decide not to submit the chunks,
* usually in error path.
*/
void pva_kmd_free_linked_cmdbuf_chunks(
struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, uint32_t chunk_id);
/**
* Get the free-notifier fence.
*
* @param[in] The first chunk of the command buffer to be submitted.
*
* @param[out] The free-notifier fence that should be submitted with the command buffer.
*/
void pva_kmd_get_free_notifier_fence(
struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, uint32_t chunk_id,
struct pva_fw_postfence *fence);
static inline void *
pva_kmd_get_cmdbuf_chunk_va(struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool,
uint32_t chunk_id)
{
return (void *)((uintptr_t)cmdbuf_chunk_pool->mem_base_va +
chunk_id * cmdbuf_chunk_pool->chunk_size);
}
static inline uint64_t pva_kmd_get_cmdbuf_chunk_res_offset(
struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, uint32_t chunk_id)
{
uint64_t chunk_size = (uint64_t)safe_mulu32(
chunk_id, (uint32_t)cmdbuf_chunk_pool->chunk_size);
return safe_addu64(cmdbuf_chunk_pool->mem_offset, chunk_size);
}
/**
* Utility for building a command buffer with multiple chunks.
*
* The builder will automatically allocate chunks from the pool when the current
* chunk is full.
*/
struct pva_kmd_cmdbuf_builder {
uint16_t first_chunk_size;
uint16_t current_chunk_offset;
uint32_t first_chunk_id;
uint32_t current_chunk_id;
struct pva_kmd_cmdbuf_chunk_pool *pool;
uint16_t *chunk_size_ptr; /**< Pointer to the chunk size field of the previous link_chunk command */
};
enum pva_error
pva_kmd_cmdbuf_builder_init(struct pva_kmd_cmdbuf_builder *builder,
struct pva_kmd_cmdbuf_chunk_pool *chunk_pool);
void *pva_kmd_reserve_cmd_space(struct pva_kmd_cmdbuf_builder *builder,
uint16_t size);
void pva_kmd_cmdbuf_builder_finalize(struct pva_kmd_cmdbuf_builder *builder,
uint32_t *out_first_chunk_id,
uint16_t *out_first_chunk_size);
void pva_kmd_cmdbuf_builder_cancel(struct pva_kmd_cmdbuf_builder *builder);
static inline void pva_kmd_set_cmd_init_resource_table(
struct pva_cmd_init_resource_table *cmd, uint8_t resource_table_id,
uint64_t iova_addr, uint32_t max_num_entries)
{
cmd->header.opcode = PVA_CMD_OPCODE_INIT_RESOURCE_TABLE;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->resource_table_id = resource_table_id;
cmd->resource_table_addr_lo = iova_lo(iova_addr);
cmd->resource_table_addr_hi = iova_hi(iova_addr);
cmd->max_n_entries = max_num_entries;
}
static inline void
pva_kmd_set_cmd_deinit_resource_table(struct pva_cmd_deinit_resource_table *cmd,
uint8_t resource_table_id)
{
cmd->header.opcode = PVA_CMD_OPCODE_DEINIT_RESOURCE_TABLE;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->resource_table_id = resource_table_id;
}
static inline void pva_kmd_set_cmd_init_queue(struct pva_cmd_init_queue *cmd,
uint8_t ccq_id, uint8_t queue_id,
uint64_t iova_addr,
uint32_t max_num_submit)
{
cmd->header.opcode = PVA_CMD_OPCODE_INIT_QUEUE;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->ccq_id = ccq_id;
cmd->queue_id = queue_id;
cmd->queue_addr_lo = iova_lo(iova_addr);
cmd->queue_addr_hi = iova_hi(iova_addr);
cmd->max_n_submits = max_num_submit;
}
static inline void
pva_kmd_set_cmd_deinit_queue(struct pva_cmd_deinit_queue *cmd, uint8_t ccq_id,
uint8_t queue_id)
{
cmd->header.opcode = PVA_CMD_OPCODE_DEINIT_QUEUE;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->ccq_id = ccq_id;
cmd->queue_id = queue_id;
}
static inline void pva_kmd_set_cmd_update_resource_table(
struct pva_cmd_update_resource_table *cmd, uint32_t resource_table_id,
uint32_t resource_id, struct pva_resource_entry const *entry)
{
cmd->header.opcode = PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->resource_table_id = resource_table_id;
cmd->resource_id = resource_id;
cmd->entry = *entry;
}
static inline void
pva_kmd_set_cmd_unregister_resource(struct pva_cmd_unregister_resource *cmd,
uint32_t resource_id)
{
cmd->header.opcode = PVA_CMD_OPCODE_UNREGISTER_RESOURCE;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->resource_id = resource_id;
}
static inline void
pva_kmd_set_cmd_enable_fw_profiling(struct pva_cmd_enable_fw_profiling *cmd,
uint32_t buffer_resource_id,
uint32_t buffer_size, uint64_t offset,
uint32_t filter, uint8_t timestamp_type)
{
cmd->header.opcode = PVA_CMD_OPCODE_ENABLE_FW_PROFILING;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->buffer_resource_id = buffer_resource_id;
cmd->buffer_offset_hi = iova_hi(offset);
cmd->buffer_offset_lo = iova_lo(offset);
cmd->buffer_size = buffer_size;
cmd->filter = filter;
cmd->timestamp_type = timestamp_type;
}
static inline void
pva_kmd_set_cmd_disable_fw_profiling(struct pva_cmd_disable_fw_profiling *cmd)
{
cmd->header.opcode = PVA_CMD_OPCODE_DISABLE_FW_PROFILING;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
}
static inline void pva_kmd_set_cmd_get_tegra_stats(
struct pva_cmd_get_tegra_stats *cmd, uint32_t buffer_resource_id,
uint32_t buffer_size, uint64_t offset, bool enabled)
{
cmd->header.opcode = PVA_CMD_OPCODE_GET_TEGRA_STATS;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->buffer_resource_id = buffer_resource_id;
cmd->buffer_offset_hi = iova_hi(offset);
cmd->buffer_offset_lo = iova_lo(offset);
cmd->buffer_size = buffer_size;
cmd->enabled = enabled;
}
static inline void pva_kmd_set_cmd_suspend_fw(struct pva_cmd_suspend_fw *cmd)
{
uint64_t len = (sizeof(*cmd) / sizeof(uint32_t));
cmd->header.opcode = PVA_CMD_OPCODE_SUSPEND_FW;
ASSERT(len <= 255u);
cmd->header.len = (uint8_t)(len);
}
static inline void pva_kmd_set_cmd_resume_fw(struct pva_cmd_resume_fw *cmd)
{
uint64_t len = (sizeof(*cmd) / sizeof(uint32_t));
cmd->header.opcode = PVA_CMD_OPCODE_RESUME_FW;
ASSERT(len <= 255u);
cmd->header.len = (uint8_t)(len);
}
#endif // PVA_KMD_CMDBUF_H

View File

@@ -0,0 +1,62 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_CONSTANTS_H
#define PVA_KMD_CONSTANTS_H
#include "pva_constants.h"
/* Limits related to KMD's own submission*/
#define PVA_KMD_MAX_NUM_KMD_RESOURCES 32
#define PVA_KMD_MAX_NUM_KMD_DMA_CONFIGS 1
#define PVA_KMD_MAX_NUM_KMD_CHUNKS 32
#define PVA_KMD_MAX_NUM_KMD_SUBMITS 32
/* Limits related to User's privileged submission */
#define PVA_KMD_MAX_NUM_PRIV_CHUNKS 256
#define PVA_KMD_MAX_NUM_PRIV_SUBMITS 256
#define PVA_KMD_USER_CONTEXT_ID_BASE 1u
#define PVA_KMD_PVA0_T23x_REG_BASE 0x16000000
#define PVA_KMD_PVA0_T23x_REG_SIZE 0x800000
#define PVA_KMD_TIMEOUT_INF UINT64_MAX
// clang-format off
#if PVA_BUILD_MODE == PVA_BUILD_MODE_SIM
#define PVA_KMD_TIMEOUT_FACTOR 100
#else
#define PVA_KMD_TIMEOUT_FACTOR 1
#endif
// clang-format on
#define PVA_KMD_TIMEOUT(val) (val * PVA_KMD_TIMEOUT_FACTOR)
#define PVA_KMD_TIMEOUT_RESOURCE_SEMA_MS PVA_KMD_TIMEOUT(100) /*< 100 ms */
#define PVA_KMD_WAIT_FW_TIMEOUT_US PVA_KMD_TIMEOUT(1000000) /*< 1 second*/
#define PVA_KMD_WAIT_FW_POLL_INTERVAL_US PVA_KMD_TIMEOUT(100) /*< 100 us*/
#define PVA_KMD_FW_BOOT_TIMEOUT_MS PVA_KMD_TIMEOUT(1000) /*< 1 seconds */
#define PVA_NUM_RW_SYNCPTS 56
// clang-format off
#if PVA_DEV_MAIN_COMPATIBLE == 1
#define PVA_KMD_LOAD_FROM_GSC_DEFAULT true
#if PVA_SAFETY == 1
#define PVA_KMD_APP_AUTH_DEFAULT true
#else
#define PVA_KMD_APP_AUTH_DEFAULT false
#endif
#else
#define PVA_KMD_LOAD_FROM_GSC_DEFAULT false
#define PVA_KMD_APP_AUTH_DEFAULT false
#endif
// clang-format on
#endif // PVA_KMD_CONSTANTS_H

View File

@@ -0,0 +1,363 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_utils.h"
#include "pva_constants.h"
#include "pva_api_cmdbuf.h"
#include "pva_kmd_resource_table.h"
#include "pva_kmd_device.h"
#include "pva_kmd_queue.h"
#include "pva_kmd_context.h"
#include "pva_kmd_constants.h"
struct pva_kmd_context *pva_kmd_context_create(struct pva_kmd_device *pva)
{
uint32_t alloc_id;
enum pva_error err;
struct pva_kmd_context *ctx;
ctx = pva_kmd_zalloc_block(&pva->context_allocator, &alloc_id);
if (ctx == NULL) {
goto err_out;
}
ctx->ccq_id = alloc_id;
ctx->resource_table_id = ctx->ccq_id;
ctx->smmu_ctx_id = ctx->ccq_id;
ctx->pva = pva;
ctx->max_n_queues = PVA_MAX_NUM_QUEUES_PER_CONTEXT;
ctx->ccq0_lock_ptr = &pva->ccq0_lock;
pva_kmd_mutex_init(&ctx->ccq_lock);
pva_kmd_mutex_init(&ctx->resource_table_lock);
ctx->queue_allocator_mem = pva_kmd_zalloc(sizeof(struct pva_kmd_queue) *
ctx->max_n_queues);
if (ctx->queue_allocator_mem == NULL) {
goto free_ctx;
}
err = pva_kmd_block_allocator_init(&ctx->queue_allocator,
ctx->queue_allocator_mem, 0,
sizeof(struct pva_kmd_queue),
ctx->max_n_queues);
if (err != PVA_SUCCESS) {
goto free_queue_mem;
}
return ctx;
free_queue_mem:
pva_kmd_free(ctx->queue_allocator_mem);
free_ctx:
pva_kmd_free(ctx);
err_out:
return NULL;
}
static enum pva_error notify_fw_context_init(struct pva_kmd_context *ctx)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &ctx->pva->submitter;
struct pva_cmd_init_resource_table *res_cmd;
struct pva_cmd_init_queue *queue_cmd;
struct pva_cmd_update_resource_table *update_cmd;
struct pva_resource_entry entry = { 0 };
uint32_t fence_val;
enum pva_error err;
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
res_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*res_cmd));
ASSERT(res_cmd != NULL);
pva_kmd_set_cmd_init_resource_table(
res_cmd, ctx->resource_table_id,
ctx->ctx_resource_table.table_mem->iova,
ctx->ctx_resource_table.n_entries);
queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd));
ASSERT(queue_cmd != NULL);
pva_kmd_set_cmd_init_queue(
queue_cmd, PVA_PRIV_CCQ_ID,
ctx->ccq_id, /* For privileged queues, queue ID == user CCQ ID*/
ctx->ctx_queue.queue_memory->iova,
ctx->ctx_queue.max_num_submit);
update_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*update_cmd));
ASSERT(update_cmd != NULL);
err = pva_kmd_make_resource_entry(&ctx->pva->dev_resource_table,
ctx->submit_memory_resource_id,
&entry);
ASSERT(err == PVA_SUCCESS);
pva_kmd_set_cmd_update_resource_table(update_cmd,
0, /* KMD's resource table ID */
ctx->submit_memory_resource_id,
&entry);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
// Error is either QUEUE_FULL or TIMEDOUT
goto cancel_builder;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when initializing context");
goto err_out;
}
return PVA_SUCCESS;
cancel_builder:
pva_kmd_cmdbuf_builder_cancel(&builder);
err_out:
return err;
}
static enum pva_error notify_fw_context_deinit(struct pva_kmd_context *ctx)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &ctx->pva->submitter;
struct pva_cmd_deinit_resource_table *deinit_table_cmd;
struct pva_cmd_deinit_queue *deinit_queue_cmd;
uint32_t fence_val;
enum pva_error err;
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
deinit_queue_cmd =
pva_kmd_reserve_cmd_space(&builder, sizeof(*deinit_queue_cmd));
ASSERT(deinit_queue_cmd != NULL);
pva_kmd_set_cmd_deinit_queue(
deinit_queue_cmd, PVA_PRIV_CCQ_ID,
ctx->ccq_id /* For privileged queues, queue ID == user CCQ ID*/
);
deinit_table_cmd =
pva_kmd_reserve_cmd_space(&builder, sizeof(*deinit_table_cmd));
ASSERT(deinit_table_cmd != NULL);
pva_kmd_set_cmd_deinit_resource_table(deinit_table_cmd,
ctx->resource_table_id);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
goto cancel_builder;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when deinitializing context");
goto err_out;
}
return PVA_SUCCESS;
cancel_builder:
pva_kmd_cmdbuf_builder_cancel(&builder);
err_out:
return err;
}
enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx,
uint32_t res_table_capacity)
{
enum pva_error err;
uint32_t queue_mem_size;
uint64_t chunk_mem_size;
struct pva_fw_postfence post_fence = { 0 };
struct pva_syncpt_rw_info *syncpts;
uint64_t size;
/* Power on PVA if not already */
err = pva_kmd_device_busy(ctx->pva);
if (err != PVA_SUCCESS) {
goto err_out;
}
/* Allocate RW syncpoints for this context */
syncpts = (struct pva_syncpt_rw_info *)pva_kmd_alloc_block(
&ctx->pva->syncpt_allocator, &ctx->syncpt_block_index);
ASSERT(syncpts != NULL);
/* Init resource table for this context */
err = pva_kmd_resource_table_init(&ctx->ctx_resource_table, ctx->pva,
ctx->smmu_ctx_id, res_table_capacity,
res_table_capacity);
if (err != PVA_SUCCESS) {
goto drop_device;
}
/* Init privileged queue for this context */
queue_mem_size = pva_get_submission_queue_memory_size(
PVA_KMD_MAX_NUM_PRIV_SUBMITS);
ctx->ctx_queue_mem =
pva_kmd_device_memory_alloc_map(queue_mem_size, ctx->pva,
PVA_ACCESS_RW,
PVA_R5_SMMU_CONTEXT_ID);
if (ctx->ctx_queue_mem == NULL) {
err = PVA_NOMEM;
goto deinit_table;
}
pva_kmd_queue_init(
&ctx->ctx_queue, ctx->pva, PVA_PRIV_CCQ_ID,
ctx->ccq_id, /* Context's PRIV queue ID is identical to CCQ ID */
&ctx->pva->ccq0_lock, ctx->ctx_queue_mem,
PVA_KMD_MAX_NUM_PRIV_SUBMITS);
/* Allocate memory for submission */
chunk_mem_size = pva_kmd_cmdbuf_pool_get_required_mem_size(
PVA_MAX_CMDBUF_CHUNK_SIZE, PVA_KMD_MAX_NUM_PRIV_CHUNKS);
/* Allocate one post fence at the end. This memory will be added to
* KMD's own resource table. We don't need to explicitly free it. It
* will be freed after we drop the resource. */
size = safe_addu64(chunk_mem_size, (uint64_t)sizeof(uint32_t));
ctx->submit_memory = pva_kmd_device_memory_alloc_map(
size, ctx->pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
if (ctx->submit_memory == NULL) {
err = PVA_NOMEM;
goto queue_deinit;
}
/* Add submit memory to resource table */
pva_kmd_mutex_lock(&ctx->pva->resource_table_lock);
err = pva_kmd_add_dram_buffer_resource(&ctx->pva->dev_resource_table,
ctx->submit_memory,
&ctx->submit_memory_resource_id);
pva_kmd_mutex_unlock(&ctx->pva->resource_table_lock);
if (err != PVA_SUCCESS) {
goto free_submit_memory;
}
/* Init chunk pool */
err = pva_kmd_cmdbuf_chunk_pool_init(
&ctx->chunk_pool, ctx->submit_memory_resource_id,
0 /* offset */, chunk_mem_size, PVA_MAX_CMDBUF_CHUNK_SIZE,
PVA_KMD_MAX_NUM_PRIV_CHUNKS, ctx->submit_memory->va);
if (err != PVA_SUCCESS) {
goto free_dram_buffer_resource;
}
/* Init fence */
ctx->fence_offset = chunk_mem_size;
/* Init submitter */
pva_kmd_mutex_init(&ctx->submit_lock);
pva_kmd_mutex_init(&ctx->chunk_pool_lock);
post_fence.resource_id = ctx->submit_memory_resource_id;
post_fence.offset_lo = iova_lo(ctx->fence_offset);
post_fence.offset_hi = iova_hi(ctx->fence_offset);
post_fence.ts_resource_id = PVA_RESOURCE_ID_INVALID;
pva_kmd_submitter_init(
&ctx->submitter, &ctx->ctx_queue, &ctx->submit_lock,
&ctx->chunk_pool, &ctx->chunk_pool_lock,
pva_offset_pointer(ctx->submit_memory->va, ctx->fence_offset),
&post_fence);
/* Use KMD's queue to inform FW */
err = notify_fw_context_init(ctx);
if (err != PVA_SUCCESS) {
goto deinit_submitter;
}
ctx->inited = true;
return PVA_SUCCESS;
deinit_submitter:
pva_kmd_mutex_deinit(&ctx->chunk_pool_lock);
pva_kmd_mutex_deinit(&ctx->submit_lock);
pva_kmd_cmdbuf_chunk_pool_deinit(&ctx->chunk_pool);
free_dram_buffer_resource:
pva_kmd_drop_resource(&ctx->pva->dev_resource_table,
ctx->submit_memory_resource_id);
free_submit_memory:
pva_kmd_device_memory_free(ctx->submit_memory);
queue_deinit:
pva_kmd_queue_deinit(&ctx->ctx_queue);
pva_kmd_device_memory_free(ctx->ctx_queue_mem);
deinit_table:
pva_kmd_resource_table_deinit(&ctx->ctx_resource_table);
drop_device:
pva_kmd_device_idle(ctx->pva);
err_out:
return err;
}
void pva_kmd_context_deinit(struct pva_kmd_context *ctx)
{
enum pva_error err;
if (ctx->inited) {
err = notify_fw_context_deinit(ctx);
ASSERT(err == PVA_SUCCESS);
pva_kmd_verify_all_resources_free(&ctx->ctx_resource_table);
pva_kmd_device_idle(ctx->pva);
pva_kmd_mutex_deinit(&ctx->submit_lock);
pva_kmd_mutex_deinit(&ctx->chunk_pool_lock);
pva_kmd_cmdbuf_chunk_pool_deinit(&ctx->chunk_pool);
pva_kmd_mutex_lock(&ctx->pva->resource_table_lock);
pva_kmd_drop_resource(&ctx->pva->dev_resource_table,
ctx->submit_memory_resource_id);
pva_kmd_mutex_unlock(&ctx->pva->resource_table_lock);
pva_kmd_queue_deinit(&ctx->ctx_queue);
pva_kmd_device_memory_free(ctx->ctx_queue_mem);
pva_kmd_resource_table_deinit(&ctx->ctx_resource_table);
pva_kmd_free_block(&ctx->pva->syncpt_allocator,
ctx->syncpt_block_index);
ctx->inited = false;
}
}
static void pva_kmd_destroy_all_queues(struct pva_kmd_context *ctx)
{
enum pva_error err;
struct pva_kmd_queue_destroy_in_args args;
for (uint32_t queue_id = 0u; queue_id < ctx->max_n_queues; queue_id++) {
struct pva_kmd_queue *queue =
pva_kmd_get_block(&ctx->queue_allocator, queue_id);
if (queue != NULL) {
args.queue_id = queue_id;
err = pva_kmd_queue_destroy(ctx, &args);
ASSERT(err == PVA_SUCCESS);
}
}
}
void pva_kmd_context_destroy(struct pva_kmd_context *ctx)
{
enum pva_error err;
pva_kmd_destroy_all_queues(ctx);
pva_kmd_context_deinit(ctx);
pva_kmd_block_allocator_deinit(&ctx->queue_allocator);
pva_kmd_free(ctx->queue_allocator_mem);
pva_kmd_mutex_deinit(&ctx->ccq_lock);
pva_kmd_mutex_deinit(&ctx->resource_table_lock);
err = pva_kmd_free_block(&ctx->pva->context_allocator, ctx->ccq_id);
ASSERT(err == PVA_SUCCESS);
}
struct pva_kmd_context *pva_kmd_get_context(struct pva_kmd_device *pva,
uint8_t alloc_id)
{
return pva_kmd_get_block(&pva->context_allocator, alloc_id);
}

View File

@@ -0,0 +1,104 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_CONTEXT_H
#define PVA_KMD_CONTEXT_H
#include "pva_api.h"
#include "pva_constants.h"
#include "pva_kmd_block_allocator.h"
#include "pva_kmd_resource_table.h"
#include "pva_kmd_queue.h"
#include "pva_kmd_mutex.h"
#include "pva_kmd_submitter.h"
struct pva_kmd_device;
/**
* @brief This struct manages a user context in KMD.
*
* One KMD user context is uniquely mapped to a UMD user context. Each context
* is assigned a unique CCQ block and, on QNX and Linux, a unique file
* descriptor.
*/
struct pva_kmd_context {
struct pva_kmd_device *pva;
uint8_t resource_table_id;
uint8_t ccq_id;
uint8_t smmu_ctx_id;
bool inited;
pva_kmd_mutex_t resource_table_lock;
struct pva_kmd_resource_table ctx_resource_table;
struct pva_kmd_submitter submitter;
/** The lock protects the submission to the queue, including
* incrementing the post fence */
pva_kmd_mutex_t submit_lock;
/** Privileged queue owned by this context. It uses the privileged
* resource table (ID 0). */
struct pva_kmd_device_memory *ctx_queue_mem;
/** Privileged queue owned by the context */
struct pva_kmd_queue ctx_queue;
/** Pointer to the ccq0 lock owned by device*/
pva_kmd_mutex_t *ccq0_lock_ptr;
/** memory needed for submission: including command buffer chunks and fences */
struct pva_kmd_device_memory *submit_memory;
/** Resource ID of the submission memory, registered with the privileged resource table (ID 0) */
uint32_t submit_memory_resource_id;
uint64_t fence_offset; /**< fence offset within submit_memory*/
pva_kmd_mutex_t chunk_pool_lock;
struct pva_kmd_cmdbuf_chunk_pool chunk_pool;
uint32_t max_n_queues;
void *queue_allocator_mem;
struct pva_kmd_block_allocator queue_allocator;
/** This lock protects the context's own CCQ access. We don't really use
* it because we don't do user queue submission in KMD.
*/
pva_kmd_mutex_t ccq_lock;
void *plat_data;
uint64_t ccq_shm_handle;
/** Index of block of syncpoints allocated for this context */
uint32_t syncpt_block_index;
uint32_t syncpt_ids[PVA_NUM_RW_SYNCPTS_PER_CONTEXT];
};
/**
* @brief Allocate a KMD context.
*/
struct pva_kmd_context *pva_kmd_context_create(struct pva_kmd_device *pva);
/**
* @brief Destroy a KMD context.
*/
void pva_kmd_context_destroy(struct pva_kmd_context *client);
/**
* @brief Initialize a KMD context.
*
* The user provides a CCQ range (inclusive on both ends) and the KMD will pick
* one CCQ from this range.
*/
enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx,
uint32_t res_table_capacity);
void pva_kmd_context_deinit(struct pva_kmd_context *ctx);
struct pva_kmd_context *pva_kmd_get_context(struct pva_kmd_device *pva,
uint8_t alloc_id);
#endif // PVA_KMD_CONTEXT_H

View File

@@ -0,0 +1,142 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_device.h"
#include "pva_kmd_debugfs.h"
#include "pva_kmd_fw_profiler.h"
#include "pva_kmd_silicon_utils.h"
#include "pva_kmd_vpu_ocd.h"
#include "pva_kmd_tegra_stats.h"
#include "pva_kmd_vpu_app_auth.h"
void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *pva)
{
static const char *vpu_ocd_names[NUM_VPU_BLOCKS] = { "ocd_vpu0_v3",
"ocd_vpu1_v3" };
pva_kmd_debugfs_create_bool(pva, "stats_enable",
&pva->debugfs_context.stats_enable);
pva_kmd_debugfs_create_bool(pva, "vpu_debug",
&pva->debugfs_context.vpu_debug);
pva_kmd_debugfs_create_u32(pva, "profile_level",
&pva->debugfs_context.profile_level);
pva->debugfs_context.vpu_fops.read = &update_vpu_stats;
pva->debugfs_context.vpu_fops.pdev = pva;
pva_kmd_debugfs_create_file(pva, "vpu_stats",
&pva->debugfs_context.vpu_fops);
for (uint32_t i = 0; i < NUM_VPU_BLOCKS; i++) {
pva->debugfs_context.vpu_ocd_fops[i].open =
&pva_kmd_vpu_ocd_open;
pva->debugfs_context.vpu_ocd_fops[i].release =
&pva_kmd_vpu_ocd_release;
pva->debugfs_context.vpu_ocd_fops[i].read =
&pva_kmd_vpu_ocd_read;
pva->debugfs_context.vpu_ocd_fops[i].write =
&pva_kmd_vpu_ocd_write;
pva->debugfs_context.vpu_ocd_fops[i].pdev = pva;
pva->debugfs_context.vpu_ocd_fops[i].file_data =
(void *)&pva->regspec.vpu_dbg_instr_reg_offset[i];
pva_kmd_debugfs_create_file(
pva, vpu_ocd_names[i],
&pva->debugfs_context.vpu_ocd_fops[i]);
}
pva->debugfs_context.allowlist_fops.write = &update_vpu_allowlist;
pva->debugfs_context.allowlist_fops.pdev = pva;
pva_kmd_debugfs_create_file(pva, "vpu_app_authentication",
&pva->debugfs_context.allowlist_fops);
pva_kmd_device_init_profiler(pva);
pva_kmd_device_init_tegra_stats(pva);
}
void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *pva)
{
pva_kmd_device_deinit_tegra_stats(pva);
pva_kmd_device_deinit_profiler(pva);
pva_kmd_debugfs_remove_nodes(pva);
}
static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats,
uint8_t *out_buffer, uint64_t len)
{
char kernel_buffer[256];
int64_t formatted_len;
formatted_len = snprintf(
kernel_buffer, sizeof(kernel_buffer),
"%llu\n%llu\n%llu\n%llu\n",
(long long unsigned int)(kmd_tegra_stats->window_start_time),
(long long unsigned int)(kmd_tegra_stats->window_end_time),
(long long unsigned int)
kmd_tegra_stats->average_vpu_utilization[0],
(long long unsigned int)
kmd_tegra_stats->average_vpu_utilization[1]);
if (formatted_len <= 0) {
return 0;
}
formatted_len++; //accounting for null terminating character
if (len < (uint64_t)formatted_len) {
return 0;
}
// Copy the formatted string from kernel buffer to user buffer
if (pva_kmd_copy_data_to_user(out_buffer, kernel_buffer,
formatted_len)) {
pva_kmd_log_err("failed to copy read buffer to user");
return 0;
}
return formatted_len;
}
int64_t update_vpu_stats(struct pva_kmd_device *dev, void *file_data,
uint8_t *out_buffer, uint64_t offset, uint64_t size)
{
uint64_t size_read = 0U;
struct pva_kmd_tegrastats kmd_tegra_stats;
kmd_tegra_stats.window_start_time = 0;
kmd_tegra_stats.window_end_time = 0;
kmd_tegra_stats.average_vpu_utilization[0] = 0;
kmd_tegra_stats.average_vpu_utilization[1] = 0;
pva_kmd_log_err("Reading VPU stats");
pva_kmd_notify_fw_get_tegra_stats(dev, &kmd_tegra_stats);
size_read = print_vpu_stats(&kmd_tegra_stats, out_buffer, size);
return size_read;
}
int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data,
const uint8_t *in_buffer, uint64_t offset,
uint64_t size)
{
char strbuf[2]; // 1 byte for '0' or '1' and another 1 byte for the Null character
uint32_t pva_auth_enable;
unsigned long retval;
retval = pva_kmd_copy_data_from_user(strbuf, in_buffer, sizeof(strbuf));
if (retval != 0u) {
pva_kmd_log_err("Failed to copy write buffer from user");
return -1;
}
pva_auth_enable = pva_kmd_strtol(strbuf, 16);
pva->pva_auth->pva_auth_enable = (pva_auth_enable == 1) ? true : false;
if (pva->pva_auth->pva_auth_enable)
pva->pva_auth->pva_auth_allow_list_parsed = false;
return 2;
}

View File

@@ -0,0 +1,56 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_DEBUGFS_H
#define PVA_KMD_DEBUGFS_H
#include "pva_kmd.h"
#include "pva_kmd_shim_debugfs.h"
#include "pva_kmd_fw_profiler.h"
#define NUM_VPU_BLOCKS 2U
/**
* Maximum length of file operation
*/
#define MAX_FILE_LEN 256U
struct pva_kmd_file_ops {
int (*open)(struct pva_kmd_device *dev);
int (*release)(struct pva_kmd_device *dev);
int64_t (*read)(struct pva_kmd_device *dev, void *file_data,
uint8_t *data, uint64_t offset, uint64_t size);
int64_t (*write)(struct pva_kmd_device *dev, void *file_data,
const uint8_t *data, uint64_t offset, uint64_t size);
void *pdev;
void *file_data;
};
struct pva_kmd_debugfs_context {
bool stats_enable;
bool vpu_debug;
bool vpu_print_enable;
char *allowlist_path;
uint32_t profile_level;
struct pva_kmd_file_ops vpu_fops;
struct pva_kmd_file_ops allowlist_fops;
struct pva_kmd_file_ops hwpm_fops;
void *data_hwpm;
struct pva_kmd_file_ops vpu_ocd_fops[NUM_VPU_BLOCKS];
struct pva_kmd_fw_profiling_config g_fw_profiling_config;
};
void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *dev);
void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *dev);
int64_t update_vpu_stats(struct pva_kmd_device *dev, void *file_data,
uint8_t *out_buffer, uint64_t offset, uint64_t size);
int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data,
const uint8_t *in_buffer, uint64_t offset,
uint64_t size);
#endif //PVA_KMD_DEBUGFS_H

View File

@@ -0,0 +1,338 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_api_types.h"
#include "pva_kmd_fw_debug.h"
#include "pva_kmd_utils.h"
#include "pva_api_cmdbuf.h"
#include "pva_api.h"
#include "pva_kmd_constants.h"
#include "pva_fw.h"
#include "pva_bit.h"
#include "pva_kmd_queue.h"
#include "pva_kmd_resource_table.h"
#include "pva_kmd_device.h"
#include "pva_kmd_context.h"
#include "pva_kmd_t23x.h"
#include "pva_kmd_t26x.h"
#include "pva_kmd_regs.h"
#include "pva_kmd_device_memory.h"
#include "pva_kmd_fw_profiler.h"
#include "pva_kmd_vpu_app_auth.h"
#include "pva_utils.h"
#include "pva_kmd_debugfs.h"
#include "pva_kmd_tegra_stats.h"
#include "pva_kmd_shim_silicon.h"
/**
* @brief Send address and size of the resource table to FW through CCQ.
*
* Initialization through CCQ is only intended for KMD's own resource table (the
* first resource table created).
*/
void pva_kmd_send_resource_table_info_by_ccq(
struct pva_kmd_device *pva, struct pva_kmd_resource_table *res_table)
{
enum pva_error err;
uint64_t addr = res_table->table_mem->iova;
uint32_t n_entries = res_table->n_entries;
uint64_t ccq_entry =
PVA_INSERT64(PVA_FW_CCQ_OP_SET_RESOURCE_TABLE,
PVA_FW_CCQ_OPCODE_MSB, PVA_FW_CCQ_OPCODE_LSB) |
PVA_INSERT64(addr, PVA_FW_CCQ_RESOURCE_TABLE_ADDR_MSB,
PVA_FW_CCQ_RESOURCE_TABLE_ADDR_LSB) |
PVA_INSERT64(n_entries, PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_MSB,
PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_LSB);
pva_kmd_mutex_lock(&pva->ccq0_lock);
err = pva_kmd_ccq_push_with_timeout(pva, PVA_PRIV_CCQ_ID, ccq_entry,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
ASSERT(err == PVA_SUCCESS);
pva_kmd_mutex_unlock(&pva->ccq0_lock);
}
/**
* @brief Send address and size of the queue to FW through CCQ.
*
* Initialization through CCQ is only intended for KMD's own queue (the first
* queue created).
*/
void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva,
struct pva_kmd_queue *queue)
{
enum pva_error err;
uint64_t addr = queue->queue_memory->iova;
uint32_t max_submit = queue->max_num_submit;
uint64_t ccq_entry =
PVA_INSERT64(PVA_FW_CCQ_OP_SET_SUBMISSION_QUEUE,
PVA_FW_CCQ_OPCODE_MSB, PVA_FW_CCQ_OPCODE_LSB) |
PVA_INSERT64(addr, PVA_FW_CCQ_QUEUE_ADDR_MSB,
PVA_FW_CCQ_QUEUE_ADDR_LSB) |
PVA_INSERT64(max_submit, PVA_FW_CCQ_QUEUE_N_ENTRIES_MSB,
PVA_FW_CCQ_QUEUE_N_ENTRIES_LSB);
pva_kmd_mutex_lock(&pva->ccq0_lock);
err = pva_kmd_ccq_push_with_timeout(pva, PVA_PRIV_CCQ_ID, ccq_entry,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
ASSERT(err == PVA_SUCCESS);
pva_kmd_mutex_unlock(&pva->ccq0_lock);
}
/**
* Initialize submission related data structures for this device.
*
* - Create a resource table.
* - Add DRAM resources to the resource table. These are used for command buffer
* chunks and post fences.
* - Create a queue.
*/
static void pva_kmd_device_init_submission(struct pva_kmd_device *pva)
{
uint32_t queue_mem_size;
uint64_t chunk_mem_size;
uint64_t size;
enum pva_error err;
struct pva_fw_postfence post_fence = { 0 };
/* Init KMD's queue */
queue_mem_size = pva_get_submission_queue_memory_size(
PVA_KMD_MAX_NUM_KMD_SUBMITS);
pva->queue_memory = pva_kmd_device_memory_alloc_map(
queue_mem_size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
ASSERT(pva->queue_memory != NULL);
pva_kmd_queue_init(&pva->dev_queue, pva, PVA_PRIV_CCQ_ID,
0 /* KMD's queue ID is 0 */, &pva->ccq0_lock,
pva->queue_memory, PVA_KMD_MAX_NUM_KMD_SUBMITS);
/* Init KMD's resource table */
err = pva_kmd_resource_table_init(&pva->dev_resource_table, pva,
PVA_R5_SMMU_CONTEXT_ID,
PVA_KMD_MAX_NUM_KMD_RESOURCES,
PVA_KMD_MAX_NUM_KMD_DMA_CONFIGS);
ASSERT(err == PVA_SUCCESS);
/* Allocate memory for submission*/
chunk_mem_size = pva_kmd_cmdbuf_pool_get_required_mem_size(
PVA_MAX_CMDBUF_CHUNK_SIZE, PVA_KMD_MAX_NUM_KMD_CHUNKS);
size = safe_addu64(chunk_mem_size, (uint64_t)sizeof(uint32_t));
/* Allocate one post fence at the end. We don't need to free this memory
* explicitly as it will be freed after we drop the resource. */
pva->submit_memory = pva_kmd_device_memory_alloc_map(
size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
ASSERT(pva->submit_memory != NULL);
/* Add submit memory to resource table */
err = pva_kmd_add_dram_buffer_resource(&pva->dev_resource_table,
pva->submit_memory,
&pva->submit_memory_resource_id);
ASSERT(err == PVA_SUCCESS);
pva_kmd_update_fw_resource_table(&pva->dev_resource_table);
/* Init chunk pool */
pva_kmd_cmdbuf_chunk_pool_init(
&pva->chunk_pool, pva->submit_memory_resource_id, 0,
chunk_mem_size, PVA_MAX_CMDBUF_CHUNK_SIZE,
PVA_KMD_MAX_NUM_KMD_CHUNKS, pva->submit_memory->va);
/* Init fence */
pva->fence_offset = chunk_mem_size;
/* Init submitter */
pva_kmd_mutex_init(&pva->submit_lock);
pva_kmd_mutex_init(&pva->chunk_pool_lock);
post_fence.resource_id = pva->submit_memory_resource_id;
post_fence.offset_lo = iova_lo(pva->fence_offset);
post_fence.offset_hi = iova_hi(pva->fence_offset);
post_fence.ts_resource_id = PVA_RESOURCE_ID_INVALID;
pva_kmd_submitter_init(
&pva->submitter, &pva->dev_queue, &pva->submit_lock,
&pva->chunk_pool, &pva->chunk_pool_lock,
pva_offset_pointer(pva->submit_memory->va, pva->fence_offset),
&post_fence);
}
static void pva_kmd_device_deinit_submission(struct pva_kmd_device *pva)
{
pva_kmd_mutex_deinit(&pva->chunk_pool_lock);
pva_kmd_mutex_deinit(&pva->submit_lock);
pva_kmd_cmdbuf_chunk_pool_deinit(&pva->chunk_pool);
/* Submit memory will be freed after dropping the resource */
pva_kmd_drop_resource(&pva->dev_resource_table,
pva->submit_memory_resource_id);
pva_kmd_resource_table_deinit(&pva->dev_resource_table);
pva_kmd_queue_deinit(&pva->dev_queue);
pva_kmd_device_memory_free(pva->queue_memory);
}
struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id,
uint32_t device_index,
bool app_authenticate)
{
struct pva_kmd_device *pva;
enum pva_error err;
uint32_t chunk_size;
uint32_t size;
pva = pva_kmd_zalloc_nofail(sizeof(*pva));
pva->device_index = device_index;
pva->load_from_gsc = false;
pva->is_hv_mode = true;
pva->max_n_contexts = PVA_MAX_NUM_USER_CONTEXTS;
pva_kmd_mutex_init(&pva->powercycle_lock);
pva_kmd_mutex_init(&pva->ccq0_lock);
pva_kmd_mutex_init(&pva->resource_table_lock);
pva_kmd_sema_init(&pva->fw_boot_sema, 0);
size = safe_mulu32((uint32_t)sizeof(struct pva_kmd_context),
pva->max_n_contexts);
pva->context_mem = pva_kmd_zalloc(size);
ASSERT(pva->context_mem != NULL);
err = pva_kmd_block_allocator_init(&pva->context_allocator,
pva->context_mem,
PVA_KMD_USER_CONTEXT_ID_BASE,
sizeof(struct pva_kmd_context),
pva->max_n_contexts);
ASSERT(err == PVA_SUCCESS);
if (chip_id == PVA_CHIP_T23X) {
pva_kmd_device_init_t23x(pva);
} else if (chip_id == PVA_CHIP_T26X) {
pva_kmd_device_init_t26x(pva);
} else {
FAULT("SOC not supported");
}
pva_kmd_device_plat_init(pva);
chunk_size = safe_mulu32((uint32_t)sizeof(struct pva_syncpt_rw_info),
(uint32_t)PVA_NUM_RW_SYNCPTS_PER_CONTEXT);
err = pva_kmd_block_allocator_init(&pva->syncpt_allocator,
pva->syncpt_rw, 0, chunk_size,
PVA_MAX_NUM_USER_CONTEXTS);
ASSERT(err == PVA_SUCCESS);
pva_kmd_device_init_submission(pva);
err = pva_kmd_init_vpu_app_auth(pva, app_authenticate);
ASSERT(err == PVA_SUCCESS);
pva->is_suspended = false;
return pva;
}
static void pva_kmd_wait_for_active_contexts(struct pva_kmd_device *pva)
{
uint8_t allocated = 0;
/* Make sure no context is active by allocating all contexts here. */
while (allocated < pva->max_n_contexts) {
uint32_t unused_id;
struct pva_kmd_context *ctx;
ctx = pva_kmd_alloc_block(&pva->context_allocator, &unused_id);
if (ctx != NULL) {
allocated = safe_addu32(allocated, 1U);
} else {
pva_kmd_sleep_us(1000);
}
}
}
void pva_kmd_device_destroy(struct pva_kmd_device *pva)
{
pva_kmd_wait_for_active_contexts(pva);
pva_kmd_device_deinit_submission(pva);
pva_kmd_device_plat_deinit(pva);
pva_kmd_block_allocator_deinit(&pva->syncpt_allocator);
pva_kmd_block_allocator_deinit(&pva->context_allocator);
pva_kmd_free(pva->context_mem);
pva_kmd_mutex_deinit(&pva->ccq0_lock);
pva_kmd_mutex_deinit(&pva->resource_table_lock);
pva_kmd_mutex_deinit(&pva->powercycle_lock);
pva_kmd_free(pva->pva_auth);
pva_kmd_free(pva);
}
enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva)
{
enum pva_error err = PVA_SUCCESS;
pva_kmd_mutex_lock(&pva->powercycle_lock);
if (pva->refcount == 0) {
pva_kmd_allocate_syncpts(pva);
err = pva_kmd_power_on(pva);
if (err != PVA_SUCCESS) {
goto unlock;
}
err = pva_kmd_init_fw(pva);
if (err != PVA_SUCCESS) {
goto unlock;
}
/* Reset KMD queue */
pva->dev_queue.queue_header->cb_head = 0;
pva->dev_queue.queue_header->cb_tail = 0;
pva_kmd_send_resource_table_info_by_ccq(
pva, &pva->dev_resource_table);
pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue);
pva_kmd_notify_fw_enable_profiling(pva);
}
pva->refcount = safe_addu32(pva->refcount, 1U);
unlock:
pva_kmd_mutex_unlock(&pva->powercycle_lock);
return err;
}
void pva_kmd_device_idle(struct pva_kmd_device *pva)
{
pva_kmd_mutex_lock(&pva->powercycle_lock);
ASSERT(pva->refcount > 0);
pva->refcount--;
if (pva->refcount == 0) {
/* Disable FW profiling */
/* TODO: once debugfs is up, move these calls */
// pva_kmd_notify_fw_disable_profiling(pva);
// pva_kmd_drain_fw_profiling_buffer(pva,
// &pva->fw_profiling_buffer);
pva_kmd_deinit_fw(pva);
pva_kmd_power_off(pva);
}
pva_kmd_mutex_unlock(&pva->powercycle_lock);
}
enum pva_error pva_kmd_ccq_push_with_timeout(struct pva_kmd_device *pva,
uint8_t ccq_id, uint64_t ccq_entry,
uint64_t sleep_interval_us,
uint64_t timeout_us)
{
/* spin until we have space or timeout reached */
while (pva_kmd_get_ccq_space(pva, ccq_id) == 0) {
if (timeout_us == 0) {
pva_kmd_log_err(
"pva_kmd_ccq_push_with_timeout Timed out");
return PVA_TIMEDOUT;
}
pva_kmd_sleep_us(sleep_interval_us);
timeout_us = sat_sub64(timeout_us, sleep_interval_us);
}
/* TODO: memory write barrier is needed here */
pva_kmd_ccq_push(pva, ccq_id, ccq_entry);
return PVA_SUCCESS;
}

View File

@@ -0,0 +1,158 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_DEVICE_H
#define PVA_KMD_DEVICE_H
#include "pva_constants.h"
#include "pva_kmd_cmdbuf.h"
#include "pva_kmd_utils.h"
#include "pva_kmd_mutex.h"
#include "pva_kmd_block_allocator.h"
#include "pva_kmd_queue.h"
#include "pva_kmd_resource_table.h"
#include "pva_kmd_submitter.h"
#include "pva_kmd_regs.h"
#include "pva_kmd_thread_sema.h"
#include "pva_kmd_fw_debug.h"
#include "pva_kmd_shim_init.h"
#include "pva_kmd_shim_ccq.h"
#include "pva_kmd_fw_profiler.h"
#include "pva_kmd_constants.h"
#include "pva_kmd_debugfs.h"
struct pva_syncpt_rw_info {
/** Dont switch order since syncpt_id and syncpt_iova is prefilled during kmd boot
* and first field gets updated by pva_kmd_allocator everytime its freed */
uint32_t syncpt_value;
uint32_t syncpt_id;
uint64_t syncpt_iova;
};
/** A struct to maintain start and end address of vmem region */
struct vmem_region {
/**! Start address of vmem region */
uint32_t start;
/**! End address of vmem region */
uint32_t end;
};
struct pva_kmd_hw_constants {
enum pva_hw_gen hw_gen;
uint8_t n_vmem_regions;
uint32_t n_dma_descriptors;
uint32_t n_user_dma_channels;
uint32_t n_hwseq_words;
uint32_t n_dynamic_adb_buffs;
uint32_t n_smmu_contexts;
};
/**
* @brief This struct manages a single PVA cluster.
*
* Fields in this struct should be common across all platforms. Platform
* specific data is stored in plat_data field.
*/
struct pva_kmd_device {
uint32_t device_index;
uint32_t r5_image_smmu_context_id;
uint32_t stream_ids[PVA_MAX_NUM_SMMU_CONTEXTS];
struct pva_kmd_hw_constants hw_consts;
uint64_t reg_phy_base[PVA_KMD_APERTURE_COUNT];
uint64_t reg_size[PVA_KMD_APERTURE_COUNT];
struct pva_kmd_regspec regspec;
uint8_t max_n_contexts;
void *context_mem;
struct pva_kmd_block_allocator context_allocator;
pva_kmd_mutex_t resource_table_lock;
struct pva_kmd_resource_table dev_resource_table;
struct pva_kmd_submitter submitter;
/** The lock protects the submission to the queue, including
* incrementing the post fence */
pva_kmd_mutex_t submit_lock;
struct pva_kmd_device_memory *queue_memory;
struct pva_kmd_queue dev_queue;
pva_kmd_mutex_t ccq0_lock;
/** memory needed for submission: including command buffer chunks and fences */
struct pva_kmd_device_memory *submit_memory;
uint32_t submit_memory_resource_id;
uint64_t fence_offset; /**< fence offset within submit_memory*/
pva_kmd_mutex_t chunk_pool_lock;
struct pva_kmd_cmdbuf_chunk_pool chunk_pool;
pva_kmd_mutex_t powercycle_lock;
uint32_t refcount;
/** ISR post this semaphore when FW completes boot */
pva_kmd_sema_t fw_boot_sema;
struct pva_kmd_device_memory *fw_debug_mem;
struct pva_kmd_device_memory *fw_bin_mem;
struct pva_kmd_device_memory *fw_profiling_buffer_memory;
uint32_t fw_profiling_buffer_resource_id;
struct pva_kmd_fw_profiling_buffer fw_profiling_buffer;
struct pva_kmd_fw_print_buffer fw_print_buffer;
struct pva_kmd_device_memory *tegra_stats_memory;
uint32_t tegra_stats_resource_id;
uint32_t tegra_stats_buf_size;
bool load_from_gsc;
bool is_hv_mode;
struct pva_kmd_debugfs_context debugfs_context;
/** Sector packing format for block linear surfaces */
uint8_t bl_sector_pack_format;
/** Offset between 2 syncpoints */
uint32_t syncpt_offset;
uint64_t syncpt_ro_iova;
uint64_t syncpt_rw_iova;
uint32_t num_syncpts;
struct pva_syncpt_rw_info syncpt_rw[PVA_NUM_RW_SYNCPTS];
struct pva_kmd_block_allocator syncpt_allocator;
struct vmem_region *vmem_regions_tab;
bool support_hwseq_frame_linking;
void *plat_data;
void *fw_handle;
struct pva_vpu_auth *pva_auth;
bool is_suspended;
};
struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id,
uint32_t device_index,
bool app_authenticate);
void pva_kmd_device_destroy(struct pva_kmd_device *pva);
enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva);
void pva_kmd_device_idle(struct pva_kmd_device *pva);
enum pva_error pva_kmd_ccq_push_with_timeout(struct pva_kmd_device *pva,
uint8_t ccq_id, uint64_t ccq_entry,
uint64_t sleep_interval_us,
uint64_t timeout_us);
void pva_kmd_send_resource_table_info_by_ccq(
struct pva_kmd_device *pva, struct pva_kmd_resource_table *res_table);
void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva,
struct pva_kmd_queue *queue);
#endif // PVA_KMD_DEVICE_H

View File

@@ -0,0 +1,148 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_dma_cfg.h"
#include "pva_utils.h"
#include "pva_kmd_resource_table.h"
#include "pva_kmd_device.h"
#define PVA_KMD_INVALID_CH_IDX 0xFF
void pva_kmd_unload_dma_config(struct pva_kmd_dma_resource_aux *dma_aux)
{
uint32_t i;
for (i = 0; i < dma_aux->dram_res_count; i++) {
pva_kmd_drop_resource(dma_aux->res_table,
dma_aux->static_dram_res_ids[i]);
}
if (dma_aux->vpu_bin_res_id != PVA_RESOURCE_ID_INVALID) {
pva_kmd_drop_resource(dma_aux->res_table,
dma_aux->vpu_bin_res_id);
}
}
static void trace_dma_channels(struct pva_dma_config const *dma_config,
uint8_t *desc_to_ch)
{
uint32_t ch_index;
struct pva_dma_config_header const *cfg_hdr = &dma_config->header;
struct pva_dma_channel *channel;
uint32_t num_descs = dma_config->header.num_descriptors;
for (ch_index = 0; ch_index < cfg_hdr->num_channels; ch_index++) {
uint8_t desc_index;
channel = &dma_config->channels[ch_index];
desc_index = channel->desc_index;
for (uint32_t i = 0; i < PVA_MAX_NUM_DMA_DESC; i++) {
desc_index = array_index_nospec(desc_index, num_descs);
if (desc_to_ch[desc_index] != PVA_KMD_INVALID_CH_IDX) {
//Already traced this descriptor
break;
}
desc_to_ch[desc_index] = ch_index;
desc_index = sat_sub8(
dma_config->descriptors[desc_index].link_desc_id,
1);
}
}
}
enum pva_error
pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table,
void *dma_config_payload, uint32_t dma_config_size,
struct pva_kmd_dma_resource_aux *dma_aux,
void *fw_dma_cfg, uint32_t *out_fw_fetch_size)
{
enum pva_error err = PVA_SUCCESS;
uint32_t fw_fetch_size;
struct pva_dma_config dma_config;
struct pva_fw_dma_slot *dyn_slots;
struct pva_fw_dma_reloc *dyn_relocs;
struct pva_fw_dma_slot *static_slots = dma_aux->static_slots;
struct pva_fw_dma_reloc *static_relocs = dma_aux->static_relocs;
struct pva_kmd_dma_access *access_sizes = dma_aux->access_sizes;
// Mapping descriptor index to channel index
uint8_t desc_to_ch[PVA_MAX_NUM_DMA_DESC];
for (uint32_t i = 0; i < PVA_MAX_NUM_DMA_DESC; i++) {
desc_to_ch[i] = PVA_KMD_INVALID_CH_IDX;
}
//set access_sizes to 0 by default
(void)memset(
access_sizes, 0,
(PVA_MAX_NUM_DMA_DESC * sizeof(struct pva_kmd_dma_access)));
err = pva_kmd_parse_dma_config(dma_config_payload, dma_config_size,
&dma_config,
&resource_table->pva->hw_consts);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = pva_kmd_validate_dma_config(&dma_config,
&resource_table->pva->hw_consts,
access_sizes,
dma_aux->hw_dma_descs_mask);
if (err != PVA_SUCCESS) {
goto err_out;
}
trace_dma_channels(&dma_config, desc_to_ch);
err = pva_kmd_compute_dma_access(&dma_config, access_sizes,
dma_aux->hw_dma_descs_mask);
if (err != PVA_SUCCESS) {
goto err_out;
}
dyn_slots = pva_offset_pointer(fw_dma_cfg,
sizeof(struct pva_dma_config_resource));
dyn_relocs = pva_offset_pointer(dyn_slots,
dma_config.header.num_dynamic_slots *
sizeof(*dyn_slots));
pva_kmd_collect_relocs(&dma_config, access_sizes, static_slots,
dma_config.header.num_static_slots,
static_relocs, dyn_slots,
dma_config.header.num_dynamic_slots, dyn_relocs,
desc_to_ch);
pva_kmd_write_fw_dma_config(
&dma_config, fw_dma_cfg, &fw_fetch_size,
resource_table->pva->support_hwseq_frame_linking);
dma_aux->res_table = resource_table;
err = pva_kmd_dma_use_resources(&dma_config, dma_aux);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = pva_kmd_bind_static_buffers(fw_dma_cfg, dma_aux, static_slots,
dma_config.header.num_static_slots,
static_relocs,
dma_config.static_bindings,
dma_config.header.num_static_slots);
if (err != PVA_SUCCESS) {
goto drop_res;
}
*out_fw_fetch_size = fw_fetch_size;
return PVA_SUCCESS;
drop_res:
pva_kmd_unload_dma_config(dma_aux);
err_out:
return err;
}

View File

@@ -0,0 +1,139 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_DMA_CFG_H
#define PVA_KMD_DMA_CFG_H
#include "pva_kmd.h"
#include "pva_resource.h"
/* Mask to extract the GOB offset from the Surface address */
#define PVA_DMA_BL_GOB_OFFSET_MASK 0x3E00U
/* Right shift value for moving GOB offset value extracted from surface address to LSB */
#define PVA_DMA_BL_GOB_OFFSET_MASK_RSH 6U
struct pva_kmd_dma_access_entry {
int64_t start_addr;
int64_t end_addr;
};
struct pva_kmd_dma_access {
struct pva_kmd_dma_access_entry src;
struct pva_kmd_dma_access_entry dst;
struct pva_kmd_dma_access_entry dst2;
};
struct pva_kmd_resource_table;
struct pva_kmd_hw_constants;
/** Auxiliary information needed for managing DMA resources:
*
* - Hold references to DRAM buffers and VPU bin used by the DMA configuration.
* - Scratch buffers needed during DMA configuration loading.
*/
struct pva_kmd_dma_resource_aux {
struct pva_kmd_resource_table *res_table;
uint32_t vpu_bin_res_id;
uint32_t dram_res_count;
/** DRAM buffers statically referenced by the DMA configuration */
uint32_t static_dram_res_ids[PVA_KMD_MAX_NUM_DMA_DRAM_SLOTS];
/* Below are work buffers need during DMA configuration loading. They
* don't fit on stack. */
struct pva_fw_dma_slot static_slots[PVA_KMD_MAX_NUM_DMA_SLOTS];
struct pva_fw_dma_reloc static_relocs[PVA_KMD_MAX_NUM_DMA_SLOTS];
struct pva_kmd_dma_access access_sizes[PVA_MAX_NUM_DMA_DESC];
uint64_t hw_dma_descs_mask[((PVA_MAX_NUM_DMA_DESC / 64ULL) + 1ULL)];
};
enum pva_error
pva_kmd_parse_dma_config(void *dma_config, uint32_t dma_config_size,
struct pva_dma_config *out_cfg,
struct pva_kmd_hw_constants const *hw_consts);
enum pva_error
pva_kmd_dma_use_resources(struct pva_dma_config const *dma_cfg,
struct pva_kmd_dma_resource_aux *dma_aux);
enum pva_error
pva_kmd_validate_dma_config(struct pva_dma_config const *dma_cfg,
struct pva_kmd_hw_constants const *hw_consts,
struct pva_kmd_dma_access *access_sizes,
uint64_t *hw_dma_descs_mask);
enum pva_error
pva_kmd_compute_dma_access(struct pva_dma_config const *dma_cfg,
struct pva_kmd_dma_access *access_sizes,
uint64_t *hw_dma_descs_mask);
void pva_kmd_collect_relocs(struct pva_dma_config const *dma_cfg,
struct pva_kmd_dma_access const *access_sizes,
struct pva_fw_dma_slot *out_static_slots,
uint16_t num_static_slots,
struct pva_fw_dma_reloc *out_static_relocs,
struct pva_fw_dma_slot *out_dyn_slots,
uint16_t num_dyn_slots,
struct pva_fw_dma_reloc *out_dyn_relocs,
uint8_t const *desc_to_ch);
/**
* @brief Bind static buffers to the DMA configuration.
*
* When binding static buffers, we edit pva_dma_config in-place and replace the
* offset field with the final addresses of static buffers.
*
* We also validate that the DMA configuration does not access those static
* buffers out of range.
*/
enum pva_error pva_kmd_bind_static_buffers(
struct pva_dma_config_resource *fw_dma_cfg,
struct pva_kmd_dma_resource_aux *dma_aux,
struct pva_fw_dma_slot const *static_slots, uint16_t num_static_slots,
struct pva_fw_dma_reloc const *static_relocs,
struct pva_dma_static_binding const *static_bindings,
uint32_t num_static_bindings);
/**
* @brief Convert user DMA configuration to firmware format.
*/
void pva_kmd_write_fw_dma_config(struct pva_dma_config const *dma_cfg,
void *fw_dma_config,
uint32_t *out_fw_fetch_size,
bool support_hwseq_frame_linking);
/**
* @brief Load DMA configuration into firmware format.
*
* This function mostly does the following things:
*
* - Validate the DMA configuration.
* - Bind static resources (buffers) and embed their addresses directly in the
* firmware DMA configuration.
* - Hold references to DRAM buffers and VPU bin used by the DMA configuration.
* - Convert the DMA configuration into firmware format.
*
* @param resource_table the resource table for the context.
* @param dma_config DMA configuration from user space.
* @param dma_config_size Size of the dma_config buffer.
* @param dma_aux Auxiliary information needed for loading the DMA
* configuration.
* @param fw_dma_cfg Output buffer for the firmware DMA configuration.
* @param out_fw_fetch_size Size of the firmware DMA configuration that needs to
* be fetched into TCM.
*/
enum pva_error
pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table,
void *dma_config, uint32_t dma_config_size,
struct pva_kmd_dma_resource_aux *dma_aux,
void *fw_dma_cfg, uint32_t *out_fw_fetch_size);
void pva_kmd_unload_dma_config(struct pva_kmd_dma_resource_aux *dma_aux);
#endif // PVA_KMD_DMA_CFG_H

View File

@@ -0,0 +1,369 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_resource_table.h"
#include "pva_kmd_device_memory.h"
#include "pva_api.h"
#include "pva_kmd_dma_cfg.h"
#include "pva_api_dma.h"
#include "pva_kmd_constants.h"
#include "pva_kmd_device.h"
static uint32_t get_slot_line_pitch(struct pva_fw_dma_descriptor *descs,
struct pva_fw_dma_reloc const *relocs,
struct pva_fw_dma_slot const *slot)
{
struct pva_fw_dma_reloc const *reloc = &relocs[slot->reloc_start_idx];
uint32_t first_desc_index = reloc->desc_index;
struct pva_fw_dma_descriptor *first_desc = &descs[first_desc_index];
uint8_t log2_bpp =
PVA_EXTRACT(first_desc->transfer_control1, 1, 0, uint8_t);
if (reloc->field == PVA_FW_DMA_RELOC_FIELD_SRC) {
return first_desc->slp_adv << log2_bpp;
} else {
return first_desc->dlp_adv << log2_bpp;
}
}
static enum pva_error
set_channel_block_height(struct pva_dma_config_resource *dma_config,
uint16_t ch_mask, uint8_t log2_block_height)
{
struct pva_fw_dma_channel *channels =
pva_dma_config_get_channels(dma_config);
// max block height is 32 GOB
if (log2_block_height > PVA_DMA_MAX_LOG2_BLOCK_HEIGHT) {
pva_kmd_log_err("Invalid block height");
return PVA_ERR_CMD_INVALID_BLOCK_HEIGHT;
}
while (ch_mask > 0) {
uint8_t ch_index = __builtin_ctz(ch_mask);
if (dma_config->ch_block_height_fixed_mask & (1 << ch_index)) {
/* If this bit is already set, it means block height cannot be changed. */
uint8_t set_bh = PVA_EXTRACT(channels[ch_index].cntl0,
27, 25, uint8_t);
if (set_bh != log2_block_height) {
pva_kmd_log_err("Conflicting block height");
return PVA_INVAL;
}
} else {
channels[ch_index].cntl0 &= ~PVA_MASK(27, 25);
channels[ch_index].cntl0 |=
PVA_INSERT(log2_block_height, 27, 25);
dma_config->ch_block_height_fixed_mask |=
(1 << ch_index);
}
ch_mask &= ~(1 << ch_index);
}
return PVA_SUCCESS;
}
static enum pva_error
bind_static_dram_slot(struct pva_dma_config_resource *dma_config,
struct pva_kmd_dma_resource_aux *dma_aux,
struct pva_fw_dma_slot const *slot,
struct pva_fw_dma_reloc const *static_relocs,
struct pva_dma_dram_binding const *dram_bd)
{
struct pva_fw_dma_descriptor *descs =
pva_dma_config_get_descriptors(dma_config);
enum pva_error err = PVA_SUCCESS;
struct pva_fw_dma_reloc const *relocs;
bool is_block_linear =
(dram_bd->surface_format == PVA_SURF_FMT_BLOCK_LINEAR);
uint32_t line_pitch = get_slot_line_pitch(descs, static_relocs, slot);
uint8_t log2_block_height = dram_bd->log2_block_height;
struct pva_kmd_dram_resource *dram_res =
&pva_kmd_peek_resource(dma_aux->res_table, dram_bd->resource_id)
->dram;
uint64_t slot_offset_pl = dram_bd->slot_offset;
uint64_t surface_base_addr =
sat_add64(dram_bd->surface_base_offset, dram_res->mem->iova);
/* When binding a buffer, we add the binding->surface_base_offset to the
* buffer base address. Therefore, the effective buffer size is
* reduced by the offset. */
uint64_t max_surface_size =
sat_sub64(dram_res->mem->size, dram_bd->surface_base_offset);
uint64_t sector_pack_format = 0;
int64_t slot_access_start_addr = 0LL;
int64_t slot_access_end_addr = 0LL;
uint64_t slot_surface_combined_offset = 0ULL;
pva_math_error math_error = MATH_OP_SUCCESS;
if ((slot->flags & PVA_FW_DMA_SLOT_FLAG_DRAM) == 0) {
pva_kmd_log_err("Binding DRAM buffer to incompatible slot");
err = PVA_INVALID_BINDING;
goto out;
}
if (is_block_linear) {
if (slot->flags & PVA_FW_DMA_SLOT_FLAG_CB) {
pva_kmd_log_err(
"Block linear surface is not compatible with circular buffer");
err = PVA_INVALID_BINDING;
goto out;
}
max_surface_size =
pva_max_bl_surface_size(max_surface_size,
log2_block_height, line_pitch,
&math_error);
if (math_error != MATH_OP_SUCCESS) {
pva_kmd_log_err(
"bind_static_dram_slot pva_max_bl_surface_size triggered a math error");
err = PVA_ERR_MATH_OP;
goto out;
}
if (!pva_is_512B_aligned(surface_base_addr)) {
pva_kmd_log_err(
"BL surface base address is not 512B aligned");
err = PVA_BAD_SURFACE_BASE_ALIGNMENT;
goto out;
}
err = set_channel_block_height(dma_config, slot->ch_use_mask,
dram_bd->log2_block_height);
if (err != PVA_SUCCESS) {
goto out;
}
sector_pack_format =
dma_aux->res_table->pva->bl_sector_pack_format;
}
slot_surface_combined_offset = addu64(
slot_offset_pl, dram_bd->surface_base_offset, &math_error);
if (slot_surface_combined_offset >= (uint64_t)MAX_INT64) {
pva_kmd_log_err("Slot surface offset too large");
return PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE;
}
slot_access_start_addr =
adds64(slot->start_addr, (int64_t)slot_surface_combined_offset,
&math_error);
slot_access_end_addr =
adds64(slot->end_addr, (int64_t)slot_surface_combined_offset,
&math_error);
max_surface_size = addu64(max_surface_size,
dram_bd->surface_base_offset, &math_error);
if (max_surface_size >= (uint64_t)MAX_INT64) {
pva_kmd_log_err("DRAM buffer too large for slot binding");
return PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE;
}
if (math_error != MATH_OP_SUCCESS) {
pva_kmd_log_err("Math error during slot binding");
return PVA_ERR_MATH_OP;
}
if (slot_access_start_addr < 0LL) {
pva_kmd_log_err(
"DRAM buffer offset underflows for slot binding");
return PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE;
}
if (slot_access_end_addr > (int64_t)max_surface_size) {
pva_kmd_log_err("DRAM buffer too small for slot binding");
return PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE;
}
relocs = &static_relocs[slot->reloc_start_idx];
for (uint32_t i = 0; i < slot->reloc_count; i++) {
struct pva_fw_dma_reloc const *reloc = &relocs[i];
struct pva_fw_dma_descriptor *desc = &descs[reloc->desc_index];
uint8_t *addr_hi_ptr;
uint32_t *addr_lo_ptr;
uint32_t format_field_shift = 0;
uint64_t addr;
uint64_t desc_offset_pl;
uint64_t offset;
if (reloc->field == PVA_FW_DMA_RELOC_FIELD_SRC) {
addr_hi_ptr = &desc->src_adr1;
addr_lo_ptr = &desc->src_adr0;
format_field_shift = 3; //SRC_TF in TRANSFER_CONTROL0
} else if (reloc->field == PVA_FW_DMA_RELOC_FIELD_DST) {
addr_hi_ptr = &desc->dst_adr1;
addr_lo_ptr = &desc->dst_adr0;
format_field_shift = 7; //DST_TF in TRANSFER_CONTROL0
} else { /* PVA_FW_DMA_RELOC_FIELD_DST2 */
pva_kmd_log_err("Binding DRAM buffer to DST2 slot");
err = PVA_INVAL;
goto out;
}
desc_offset_pl = assemble_addr(*addr_hi_ptr, *addr_lo_ptr);
offset = sat_add64(slot_offset_pl, desc_offset_pl);
desc->transfer_control0 &= ~(1 << format_field_shift);
if (is_block_linear) {
/* We need to insert bits surface_base_addr[13, 9] to
* transfer_control2[7:3] as specified by DMA IAS. This helps the
* HW identify starting GOB index inside a block. */
desc->transfer_control2 &= ~PVA_MASK(7, 3);
desc->transfer_control2 |=
PVA_INSERT8(PVA_EXTRACT64(surface_base_addr, 13,
9, uint8_t),
7, 3);
desc->transfer_control0 |= 1 << format_field_shift;
offset = pva_pl_to_bl_offset(offset, line_pitch,
log2_block_height,
&math_error);
if (math_error != MATH_OP_SUCCESS) {
pva_kmd_log_err(
"pva_fw_do_cmd_bind_dram_slot pva_pl_to_bl_offset triggered a math error");
err = PVA_ERR_MATH_OP;
goto out;
}
if (!pva_is_64B_aligned(offset)) {
pva_kmd_log_err(
"Descriptor starting address is not aligned to 64 bytes");
err = PVA_BAD_DESC_ADDR_ALIGNMENT;
goto out;
}
}
addr = sat_add64(surface_base_addr, offset);
addr |= (sector_pack_format << PVA_BL_SECTOR_PACK_BIT_SHIFT);
*addr_hi_ptr = iova_hi(addr);
*addr_lo_ptr = iova_lo(addr);
}
out:
return err;
}
static enum pva_error
bind_static_vmem_slot(struct pva_dma_config_resource *dma_config,
struct pva_kmd_dma_resource_aux *dma_aux,
struct pva_fw_dma_slot const *slot,
struct pva_fw_dma_reloc const *static_relocs,
struct pva_dma_vmem_binding const *vmem_bd)
{
enum pva_error err = PVA_SUCCESS;
struct pva_fw_dma_descriptor *descs =
pva_dma_config_get_descriptors(dma_config);
struct pva_kmd_vpu_bin_resource *vpu_bin;
struct pva_symbol_info *sym;
uint32_t buffer_size, buffer_addr;
struct pva_fw_dma_reloc const *relocs;
enum pva_symbol_type needed_sym_type;
if (slot->flags & PVA_FW_DMA_SLOT_FLAG_VMEM_DATA) {
needed_sym_type = PVA_SYM_TYPE_DATA;
} else if (slot->flags & PVA_FW_DMA_SLOT_FLAG_VMEM_VPUC_TABLE) {
needed_sym_type = PVA_SYM_TYPE_VPUC_TABLE;
} else {
pva_kmd_log_err("Unexpected VMEM slot flags");
err = PVA_INTERNAL;
goto out;
}
#if defined(WAR_PVAAS16267)
needed_sym_type = PVA_SYM_TYPE_DATA;
#endif
vpu_bin = &pva_kmd_peek_resource(dma_aux->res_table,
dma_aux->vpu_bin_res_id)
->vpu_bin;
sym = pva_kmd_get_symbol_with_type(&vpu_bin->symbol_table,
vmem_bd->addr.symbol_id,
needed_sym_type);
if (sym == NULL) {
err = PVA_INVALID_SYMBOL;
goto out;
}
buffer_size = sat_sub32(sym->size, vmem_bd->addr.offset);
buffer_addr = sat_add32(sym->vmem_addr, vmem_bd->addr.offset);
if (buffer_size < get_slot_size(slot)) {
pva_kmd_log_err("VMEM buffer too small for slot binding");
err = PVA_RES_OUT_OF_RANGE;
goto out;
}
relocs = &static_relocs[slot->reloc_start_idx];
for (uint32_t i = 0; i < slot->reloc_count; i++) {
struct pva_fw_dma_reloc const *reloc = &relocs[i];
struct pva_fw_dma_descriptor *desc = &descs[reloc->desc_index];
if (reloc->field == PVA_FW_DMA_RELOC_FIELD_SRC) {
desc->src_adr0 = sat_add32(buffer_addr, desc->src_adr0);
} else if (reloc->field == PVA_FW_DMA_RELOC_FIELD_DST) {
desc->dst_adr0 = sat_add32(buffer_addr, desc->dst_adr0);
} else {
if (!pva_is_64B_aligned(buffer_addr)) {
pva_kmd_log_err(
"VMEM replication address not aligned to 64 bytes");
err = PVA_INVAL;
goto out;
}
desc->frda =
((uint16_t)(buffer_addr >> 6U) + desc->frda) &
0x3FFF;
}
}
out:
return err;
}
enum pva_error pva_kmd_bind_static_buffers(
struct pva_dma_config_resource *fw_dma_cfg_hdr,
struct pva_kmd_dma_resource_aux *dma_aux,
struct pva_fw_dma_slot const *static_slots, uint16_t num_static_slots,
struct pva_fw_dma_reloc const *static_relocs,
struct pva_dma_static_binding const *static_bindings,
uint32_t num_static_bindings)
{
uint32_t slot_id;
enum pva_error err = PVA_SUCCESS;
if (num_static_bindings != num_static_slots) {
pva_kmd_log_err("Invalid number of static bindings");
err = PVA_INVAL;
goto out;
}
// Reset BL status for each channel
fw_dma_cfg_hdr->ch_block_height_fixed_mask = 0U;
for (slot_id = 0U; slot_id < num_static_slots; slot_id++) {
struct pva_fw_dma_slot const *st_slot = &static_slots[slot_id];
struct pva_dma_static_binding const *binding =
&static_bindings[slot_id];
if (binding->type == PVA_DMA_STATIC_BINDING_DRAM) {
err = bind_static_dram_slot(fw_dma_cfg_hdr, dma_aux,
st_slot, static_relocs,
&binding->dram);
} else { // PVA_FW_DMA_SLOT_FLAG_VMEM
err = bind_static_vmem_slot(fw_dma_cfg_hdr, dma_aux,
st_slot, static_relocs,
&binding->vmem);
}
if (err != PVA_SUCCESS) {
goto out;
}
}
out:
return err;
}

View File

@@ -0,0 +1,821 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_resource_table.h"
#include "pva_kmd_device_memory.h"
#include "pva_kmd_hwseq_validate.h"
#include "pva_api.h"
#include "pva_kmd_dma_cfg.h"
#include "pva_api_dma.h"
#include "pva_kmd_device.h"
#include "pva_math_utils.h"
struct pva_fw_dma_reloc_slot_info {
struct pva_fw_dma_slot *slots;
struct pva_fw_dma_reloc *relocs;
uint16_t num_slots;
uint8_t *reloc_off;
};
struct pva_fw_dma_reloc_slots {
struct pva_fw_dma_reloc_slot_info dyn_slot;
struct pva_fw_dma_reloc_slot_info static_slot;
};
static enum pva_error
validate_channel_mapping(struct pva_dma_config const *out_cfg,
struct pva_kmd_hw_constants const *hw_consts)
{
struct pva_dma_channel *channel;
struct pva_dma_config_header const *cfg_hdr = &out_cfg->header;
pva_math_error math_err = MATH_OP_SUCCESS;
for (uint8_t i = 0U; i < cfg_hdr->num_channels; i++) {
channel = &out_cfg->channels[i];
if ((channel->desc_index >= out_cfg->header.num_descriptors) ||
(pva_is_reserved_desc(channel->desc_index))) {
pva_kmd_log_err(
"ERR: Invalid Channel Descriptor Index");
return PVA_INVAL;
}
if (addu8(channel->vdb_count, channel->vdb_offset, &math_err) >
PVA_NUM_DYNAMIC_VDB_BUFFS) {
pva_kmd_log_err("ERR: Invalid Channel control data");
return PVA_INVAL;
}
if (addu16(channel->adb_count, channel->adb_offset, &math_err) >
hw_consts->n_dynamic_adb_buffs) {
pva_kmd_log_err("ERR: Invalid ADB Buff Size or Offset");
return PVA_INVAL;
}
}
if (math_err != MATH_OP_SUCCESS) {
pva_kmd_log_err("validate_channel_mapping math error");
return PVA_ERR_MATH_OP;
}
return PVA_SUCCESS;
}
static enum pva_error validate_padding(struct pva_dma_descriptor *desc)
{
if ((desc->px != 0U) && (desc->px >= desc->tx)) {
return PVA_INVAL;
}
if ((desc->py != 0U) && (desc->py >= desc->ty)) {
return PVA_INVAL;
}
return PVA_SUCCESS;
}
static bool is_valid_vpu_trigger_mode(struct pva_dma_descriptor *desc)
{
bool valid = true;
if (desc->trig_event_mode != 0U) {
switch (desc->trig_vpu_events) {
case PVA_DMA_NO_TRIG:
//HW Sequencer check
break;
case PVA_DMA_TRIG_VPU_CFG:
if (desc->src.transfer_mode !=
PVA_DMA_TRANS_MODE_VPUCFG) {
valid = false;
}
break;
case PVA_DMA_TRIG_READ0:
case PVA_DMA_TRIG_READ1:
case PVA_DMA_TRIG_READ2:
case PVA_DMA_TRIG_READ3:
case PVA_DMA_TRIG_READ4:
case PVA_DMA_TRIG_READ5:
case PVA_DMA_TRIG_READ6:
if ((desc->src.transfer_mode !=
(uint8_t)PVA_DMA_TRANS_MODE_VPUCFG) &&
(desc->dst.transfer_mode !=
(uint8_t)PVA_DMA_TRANS_MODE_VMEM)) {
valid = false;
}
break;
case PVA_DMA_TRIG_WRITE0:
case PVA_DMA_TRIG_WRITE1:
case PVA_DMA_TRIG_WRITE2:
case PVA_DMA_TRIG_WRITE3:
case PVA_DMA_TRIG_WRITE4:
case PVA_DMA_TRIG_WRITE5:
case PVA_DMA_TRIG_WRITE6:
if ((desc->src.transfer_mode !=
(uint8_t)PVA_DMA_TRANS_MODE_VPUCFG) &&
(desc->src.transfer_mode !=
(uint8_t)PVA_DMA_TRANS_MODE_VMEM)) {
valid = false;
}
break;
default:
valid = false;
break;
}
}
return valid;
}
static bool validate_src_dst_adv_val(struct pva_dma_descriptor *desc,
bool relax_dim3_check)
{
uint8_t is_any_rpt_zero = 0U;
is_any_rpt_zero = desc->src.rpt1 & desc->src.rpt2 & desc->dst.rpt1 &
desc->dst.rpt2;
if ((desc->trig_event_mode == (uint8_t)PVA_DMA_TRIG_MODE_4TH_DIM) &&
(is_any_rpt_zero == 0U)) {
return false;
}
if (desc->trig_event_mode == ((uint8_t)PVA_DMA_TRIG_MODE_3RD_DIM)) {
if (false == relax_dim3_check) {
if (((desc->src.rpt1 == 0U) &&
(desc->dst.rpt1 == 0U))) {
return false;
}
} else {
if (((desc->dst.rpt1 == 0U) ||
(desc->src.rpt1 > desc->dst.rpt1))) {
return false;
}
}
}
return true;
}
static enum pva_error
validate_dma_desc_trans_cntl2(struct pva_dma_descriptor *desc)
{
if ((desc->prefetch_enable != 0U) &&
((desc->tx == 0U) || (desc->ty == 0U) ||
(desc->src.transfer_mode != (uint32_t)PVA_DMA_TRANS_MODE_DRAM) ||
(desc->dst.transfer_mode != (uint32_t)PVA_DMA_TRANS_MODE_VMEM))) {
return PVA_INVAL;
}
return PVA_SUCCESS;
}
static enum pva_error
validate_descriptor(struct pva_dma_descriptor *desc,
struct pva_dma_config_header const *cfg_hdr)
{
enum pva_error err = PVA_SUCCESS;
err = validate_padding(desc);
if ((desc->dst.transfer_mode == PVA_DMA_TRANS_MODE_VMEM) &&
(err != PVA_SUCCESS)) {
return err;
}
if (!(is_valid_vpu_trigger_mode(desc))) {
pva_kmd_log_err("Bad trigger");
return PVA_INVAL;
}
/** Check src/dstADV values with respect to ECET bits */
if (false == validate_src_dst_adv_val(desc, false)) {
pva_kmd_log_err(
"Invalid src/dst ADV values with respect to ECET");
return PVA_INVAL;
}
/* DMA_DESC_TRANS CNTL2 */
if (PVA_SUCCESS != validate_dma_desc_trans_cntl2(desc)) {
pva_kmd_log_err("Bad trans cntl 2");
return PVA_INVAL;
}
/* DMA_DESC_LDID */
if ((desc->link_desc_id > cfg_hdr->num_descriptors) ||
((desc->link_desc_id != 0) &&
pva_is_reserved_desc(desc->link_desc_id - PVA_DMA_DESC0))) {
pva_kmd_log_err("ERR: Invalid linker Desc ID");
return PVA_INVAL;
}
return PVA_SUCCESS;
}
static bool
is_dma_config_header_valid(struct pva_dma_config_header const *cfg_hdr,
struct pva_kmd_hw_constants const *hw_consts)
{
if (((cfg_hdr->base_descriptor + cfg_hdr->num_descriptors) >
hw_consts->n_dma_descriptors) ||
((cfg_hdr->base_channel + cfg_hdr->num_channels) >
(hw_consts->n_user_dma_channels + 1U)) ||
((cfg_hdr->base_hwseq_word + cfg_hdr->num_hwseq_words) >
hw_consts->n_hwseq_words) ||
(cfg_hdr->num_static_slots > PVA_KMD_MAX_NUM_DMA_SLOTS) ||
(cfg_hdr->num_dynamic_slots > PVA_KMD_MAX_NUM_DMA_RELOCS) ||
(cfg_hdr->base_channel == 0U)) {
return false;
}
return true;
}
enum pva_error
pva_kmd_parse_dma_config(void *dma_config, uint32_t dma_config_size,
struct pva_dma_config *out_cfg,
struct pva_kmd_hw_constants const *hw_consts)
{
struct pva_dma_config_header const *cfg_hdr = dma_config;
uintptr_t offset = 0;
if (dma_config_size < sizeof(*cfg_hdr)) {
pva_kmd_log_err("DMA configuration too small");
return PVA_INVAL;
}
out_cfg->header = *cfg_hdr;
if (!(is_dma_config_header_valid(cfg_hdr, hw_consts))) {
pva_kmd_log_err("Invalid PVA DMA Configuration Header");
return PVA_INVAL;
}
offset += PVA_ALIGN8(sizeof(*cfg_hdr));
out_cfg->hwseq_words = pva_offset_pointer(dma_config, offset);
offset += PVA_ALIGN8(cfg_hdr->num_hwseq_words *
sizeof(*out_cfg->hwseq_words));
out_cfg->channels = pva_offset_pointer(dma_config, offset);
offset +=
PVA_ALIGN8(cfg_hdr->num_channels * sizeof(*out_cfg->channels));
out_cfg->descriptors = pva_offset_pointer(dma_config, offset);
offset += PVA_ALIGN8(cfg_hdr->num_descriptors *
sizeof(*out_cfg->descriptors));
out_cfg->static_bindings = pva_offset_pointer(dma_config, offset);
offset += PVA_ALIGN8(cfg_hdr->num_static_slots *
sizeof(*out_cfg->static_bindings));
if (offset > dma_config_size) {
pva_kmd_log_err("DMA configuration is smaller than expected");
return PVA_INVAL;
}
return PVA_SUCCESS;
}
static enum pva_error
validate_descriptors(struct pva_dma_config const *dma_config)
{
uint32_t i = 0U;
enum pva_error err = PVA_SUCCESS;
struct pva_dma_config_header const *cfg_hdr = &dma_config->header;
struct pva_dma_descriptor *desc;
for (i = 0; i < cfg_hdr->num_descriptors; i++) {
if (pva_is_reserved_desc(i)) {
// skip over the reserved descriptor range
i = PVA_RESERVED_DESCRIPTORS_END;
continue;
}
desc = &dma_config->descriptors[i];
err = validate_descriptor(desc, cfg_hdr);
if (err != PVA_SUCCESS) {
return err;
}
}
return err;
}
enum pva_error
pva_kmd_validate_dma_config(struct pva_dma_config const *dma_config,
struct pva_kmd_hw_constants const *hw_consts,
struct pva_kmd_dma_access *access_sizes,
uint64_t *hw_dma_descs_mask)
{
enum pva_error err = PVA_SUCCESS;
err = validate_channel_mapping(dma_config, hw_consts);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Bad Channels");
return err;
}
err = validate_descriptors(dma_config);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Bad Descriptors");
return err;
}
if (dma_config->header.num_hwseq_words != 0U) {
err = validate_hwseq(dma_config, hw_consts, access_sizes,
hw_dma_descs_mask);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Bad HW Sequencer Blob");
return err;
}
}
return err;
}
enum pva_error
pva_kmd_dma_use_resources(struct pva_dma_config const *dma_cfg,
struct pva_kmd_dma_resource_aux *dma_aux)
{
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_vpu_bin_resource *vpu_bin = NULL;
uint32_t i;
/* Increment reference count for VPU bin */
if (dma_cfg->header.vpu_exec_resource_id != PVA_RESOURCE_ID_INVALID) {
struct pva_kmd_resource_record *vpu_bin_rec;
vpu_bin_rec = pva_kmd_use_resource(
dma_aux->res_table,
dma_cfg->header.vpu_exec_resource_id);
if (vpu_bin_rec == NULL) {
pva_kmd_log_err(
"VPU exec resource id used by DMA config does not exist");
err = PVA_INVAL;
goto err_out;
}
if (vpu_bin_rec->type != PVA_RESOURCE_TYPE_EXEC_BIN) {
pva_kmd_log_err(
"Invalid VPU exec resource id used by DMA config");
err = PVA_INVAL;
goto drop_vpu_bin;
}
vpu_bin = &vpu_bin_rec->vpu_bin;
}
dma_aux->vpu_bin_res_id = dma_cfg->header.vpu_exec_resource_id;
dma_aux->dram_res_count = 0;
/* Increment reference count for all static DRAM buffers; For static
* VMEM buffers, check that symbol ID is valid. */
for (i = 0; i < dma_cfg->header.num_static_slots; i++) {
struct pva_dma_static_binding const *slot_buf =
&dma_cfg->static_bindings[i];
if (slot_buf->type == PVA_DMA_STATIC_BINDING_DRAM) {
struct pva_kmd_resource_record *rec;
rec = pva_kmd_use_resource(dma_aux->res_table,
slot_buf->dram.resource_id);
if (rec == NULL) {
pva_kmd_log_err(
"DRAM buffers used by DMA config do not exist");
err = PVA_INVAL;
goto drop_dram;
}
dma_aux->static_dram_res_ids[dma_aux->dram_res_count] =
slot_buf->dram.resource_id;
dma_aux->dram_res_count += 1;
if (rec->type != PVA_RESOURCE_TYPE_DRAM) {
pva_kmd_log_err(
"Invalid DRAM resource id used DMA config");
err = PVA_INVAL;
goto drop_dram;
}
} else if (slot_buf->type == PVA_DMA_STATIC_BINDING_VMEM) {
if (vpu_bin == NULL) {
pva_kmd_log_err(
"VPU bin resource not found for static VMEM buffer");
err = PVA_INVAL;
goto drop_dram;
}
if (pva_kmd_get_symbol(&vpu_bin->symbol_table,
slot_buf->vmem.addr.symbol_id) ==
NULL) {
pva_kmd_log_err("Invalid VMEM symbol ID");
err = PVA_INVAL;
goto drop_dram;
}
} else {
pva_kmd_log_err("Invalid slot buffer type");
err = PVA_INVAL;
goto drop_dram;
}
}
return PVA_SUCCESS;
drop_dram:
for (i = 0; i < dma_aux->dram_res_count; i++) {
pva_kmd_drop_resource(dma_aux->res_table,
dma_aux->static_dram_res_ids[i]);
}
drop_vpu_bin:
if (dma_aux->vpu_bin_res_id != PVA_RESOURCE_ID_INVALID) {
pva_kmd_drop_resource(dma_aux->res_table,
dma_aux->vpu_bin_res_id);
}
err_out:
return err;
}
static uint16_t get_slot_id(uint16_t slot)
{
return slot & PVA_DMA_SLOT_ID_MASK;
}
static uint8_t get_slot_flag(uint8_t transfer_mode, bool cb_enable)
{
uint8_t flags = 0;
if (transfer_mode == PVA_DMA_TRANS_MODE_VMEM) {
flags |= PVA_FW_DMA_SLOT_FLAG_VMEM_DATA;
} else if (transfer_mode == PVA_DMA_TRANS_MODE_L2SRAM) {
flags |= PVA_FW_DMA_SLOT_FLAG_L2SRAM;
} else if (transfer_mode == PVA_DMA_TRANS_MODE_DRAM) {
flags |= PVA_FW_DMA_SLOT_FLAG_DRAM;
} else if (transfer_mode == PVA_DMA_TRANS_MODE_VPUCFG) {
flags |= PVA_FW_DMA_SLOT_FLAG_VMEM_VPUC_TABLE;
}
if (cb_enable) {
flags |= PVA_FW_DMA_SLOT_FLAG_CB;
}
return flags;
}
static void update_reloc_count(uint16_t slot, uint8_t transfer_mode,
bool cb_enable,
struct pva_fw_dma_slot *out_static_slots,
uint16_t num_static_slots,
struct pva_fw_dma_slot *out_dyn_slots,
uint16_t num_dyn_slots)
{
uint8_t slot_id = get_slot_id(slot);
if (slot & PVA_DMA_DYNAMIC_SLOT) {
out_dyn_slots[slot_id].reloc_count =
safe_addu16(out_dyn_slots[slot_id].reloc_count, 1U);
out_dyn_slots[slot_id].flags |=
get_slot_flag(transfer_mode, cb_enable);
} else if (slot & PVA_DMA_STATIC_SLOT) {
out_static_slots[slot_id].reloc_count =
safe_addu16(out_static_slots[slot_id].reloc_count, 1U);
;
out_static_slots[slot_id].flags |=
get_slot_flag(transfer_mode, cb_enable);
}
}
static void count_relocs(struct pva_dma_config const *dma_cfg,
struct pva_fw_dma_slot *out_static_slots,
uint16_t num_static_slots,
struct pva_fw_dma_slot *out_dyn_slots,
uint16_t num_dyn_slots)
{
uint8_t i;
struct pva_dma_descriptor *desc;
for (i = 0U; i < dma_cfg->header.num_descriptors; i++) {
if (pva_is_reserved_desc(i)) {
// skip over the reserved descriptor range
i = PVA_RESERVED_DESCRIPTORS_END;
continue;
}
desc = &dma_cfg->descriptors[i];
update_reloc_count(desc->src.slot, desc->src.transfer_mode,
desc->src.cb_enable, out_static_slots,
num_static_slots, out_dyn_slots,
num_dyn_slots);
update_reloc_count(desc->dst.slot, desc->dst.transfer_mode,
desc->dst.cb_enable, out_static_slots,
num_static_slots, out_dyn_slots,
num_dyn_slots);
update_reloc_count(desc->dst2_slot, desc->dst.transfer_mode,
desc->dst.cb_enable, out_static_slots,
num_static_slots, out_dyn_slots,
num_dyn_slots);
}
}
static void write_one_reloc(uint8_t ch_index, uint32_t desc_index,
uint16_t slot, uint8_t transfer_mode,
uint8_t reloc_field,
struct pva_fw_dma_reloc_slot_info *info,
struct pva_kmd_dma_access_entry const *access_entry)
{
uint16_t slot_id = get_slot_id(slot);
uint16_t reloc_id = safe_addu16(info->slots[slot_id].reloc_start_idx,
info->reloc_off[slot_id]);
int64_t old_start_addr = info->slots[slot_id].start_addr;
int64_t old_end_addr = info->slots[slot_id].end_addr;
info->slots[slot_id].start_addr =
mins64(access_entry->start_addr, old_start_addr);
info->slots[slot_id].end_addr =
maxs64(access_entry->end_addr, old_end_addr);
info->slots[slot_id].ch_use_mask |= (1U << (ch_index & 0x1FU));
info->relocs[reloc_id].desc_index = desc_index;
info->relocs[reloc_id].field = reloc_field;
info->reloc_off[slot_id] = safe_addu8(info->reloc_off[slot_id], 1U);
}
static void handle_reloc(uint16_t slot, uint8_t transfer_mode,
struct pva_kmd_dma_access_entry const *access_entry,
struct pva_fw_dma_reloc_slots *rel_info,
uint8_t reloc_field, uint8_t ch_index,
uint8_t desc_index)
{
if (slot & PVA_DMA_DYNAMIC_SLOT) {
write_one_reloc(ch_index, desc_index, slot, transfer_mode,
reloc_field, &rel_info->dyn_slot, access_entry);
} else if (slot & PVA_DMA_STATIC_SLOT) {
write_one_reloc(ch_index, desc_index, slot, transfer_mode,
reloc_field, &rel_info->static_slot,
access_entry);
}
}
static void write_relocs(struct pva_dma_config const *dma_cfg,
struct pva_kmd_dma_access const *access_sizes,
struct pva_fw_dma_reloc_slots *rel_info,
uint8_t const *desc_to_ch)
{
uint32_t i;
uint16_t start_idx = 0U;
struct pva_dma_descriptor *desc = NULL;
uint8_t ch_index = 0U;
for (i = 0U; i < rel_info->dyn_slot.num_slots; i++) {
rel_info->dyn_slot.slots[i].reloc_start_idx = start_idx;
start_idx = safe_addu16(
start_idx, rel_info->dyn_slot.slots[i].reloc_count);
}
for (i = 0U; i < rel_info->static_slot.num_slots; i++) {
rel_info->static_slot.slots[i].reloc_start_idx = start_idx;
start_idx = safe_addu16(
start_idx, rel_info->static_slot.slots[i].reloc_count);
}
for (i = 0U; i < dma_cfg->header.num_descriptors; i++) {
if (pva_is_reserved_desc(i)) {
// skip over the reserved descriptor range
i = PVA_RESERVED_DESCRIPTORS_END;
continue;
}
desc = &dma_cfg->descriptors[i];
ch_index = desc_to_ch[i];
handle_reloc(desc->src.slot, desc->src.transfer_mode,
&access_sizes[i].src, rel_info,
PVA_FW_DMA_RELOC_FIELD_SRC, ch_index, i);
handle_reloc(desc->dst.slot, desc->dst.transfer_mode,
&access_sizes[i].dst, rel_info,
PVA_FW_DMA_RELOC_FIELD_DST, ch_index, i);
handle_reloc(desc->dst2_slot, desc->dst.transfer_mode,
&access_sizes[i].dst2, rel_info,
PVA_FW_DMA_RELOC_FIELD_DST2, ch_index, i);
}
}
static enum pva_error
validate_descriptor_tile_and_padding(struct pva_dma_descriptor *desc,
bool is_dst)
{
enum pva_error err = PVA_SUCCESS;
if (desc->ty == 0U) {
err = PVA_INVALID_DMA_CONFIG;
return err;
}
if (!is_dst) {
if ((desc->tx <= desc->px) || (desc->ty <= desc->py)) {
// invalid tile size/padding config
err = PVA_INVALID_DMA_CONFIG;
return err;
}
}
return PVA_SUCCESS;
}
static enum pva_error get_access_size(struct pva_dma_descriptor *desc,
struct pva_kmd_dma_access_entry *entry,
bool is_dst,
struct pva_kmd_dma_access_entry *dst2)
{
struct pva_dma_transfer_attr *attr = NULL;
uint32_t tx = 0U;
uint32_t ty = 0U;
uint64_t tile_size = 0U;
int64_t start = 0;
int64_t end = 0;
int32_t dim_offset = 0;
uint32_t dim_offset_U = 0U;
uint32_t num_bytes = 0U;
enum pva_error err = PVA_SUCCESS;
pva_math_error math_err = MATH_OP_SUCCESS;
// early out for empty tiles
if (desc->tx == 0U) {
return err;
}
err = validate_descriptor_tile_and_padding(desc, is_dst);
if (err != PVA_SUCCESS) {
return err;
}
if (is_dst) {
attr = &desc->dst;
tx = desc->tx;
ty = desc->ty;
} else {
attr = &desc->src;
tx = subu32((uint32_t)desc->tx, (uint32_t)desc->px, &math_err);
ty = subu32((uint32_t)desc->ty, (uint32_t)desc->py, &math_err);
}
if (attr->offset > (uint64_t)(MAX_INT64)) {
err = PVA_INVALID_DMA_CONFIG;
pva_kmd_log_err("Offset is too large");
goto err_out;
}
dim_offset_U = mulu32((uint32_t)(attr->line_pitch),
subu32(ty, 1U, &math_err), &math_err);
if (attr->cb_enable != 0U) {
tile_size = addu32(dim_offset_U, tx, &math_err);
tile_size = tile_size
<< (desc->log2_pixel_size & MAX_BYTES_PER_PIXEL);
if (tile_size > attr->cb_size) {
pva_kmd_log_err(
"Tile size is bigger than circular buffer size");
err = PVA_INVALID_DMA_CONFIG;
}
start = 0LL;
end = (int64_t)attr->cb_size;
goto end;
}
end += adds64((int64_t)dim_offset_U, (int64_t)tx, &math_err);
// 3rd dim
// 3rd dim
dim_offset = muls32((attr->adv1), (int32_t)(attr->rpt1), &math_err);
start += mins32(dim_offset, 0);
end += maxs32(dim_offset, 0);
// 4th dim
dim_offset = muls32((attr->adv2), (int32_t)(attr->rpt2), &math_err);
start += mins32(dim_offset, 0);
end += maxs32(dim_offset, 0);
// 5th dim
dim_offset = muls32((attr->adv3), (int32_t)(attr->rpt3), &math_err);
start += mins32(dim_offset, 0);
end += maxs32(dim_offset, 0);
// convert to byte range
num_bytes =
((uint32_t)1U << (desc->log2_pixel_size & MAX_BYTES_PER_PIXEL));
start *= (int64_t)num_bytes;
end *= (int64_t)num_bytes;
if (math_err != MATH_OP_SUCCESS) {
err = PVA_ERR_MATH_OP;
pva_kmd_log_err("get_access_size math error");
goto err_out;
}
end:
entry->start_addr =
adds64(mins64(start, end), convert_to_signed_s64(attr->offset),
&math_err);
entry->end_addr =
adds64(maxs64(start, end), convert_to_signed_s64(attr->offset),
&math_err);
if (is_dst) {
dst2->start_addr =
adds64(mins64(start, end), (int64_t)desc->dst2_offset,
&math_err);
dst2->end_addr = adds64(maxs64(start, end),
(int64_t)desc->dst2_offset, &math_err);
}
if (math_err != MATH_OP_SUCCESS) {
err = PVA_ERR_MATH_OP;
pva_kmd_log_err("get_access_size math error");
}
err_out:
return err;
}
enum pva_error
pva_kmd_compute_dma_access(struct pva_dma_config const *dma_cfg,
struct pva_kmd_dma_access *access_sizes,
uint64_t *hw_dma_descs_mask)
{
uint32_t i;
struct pva_dma_descriptor *desc = NULL;
enum pva_error err = PVA_SUCCESS;
bool skip_swseq_size_compute = false;
for (i = 0; i < dma_cfg->header.num_descriptors; i++) {
/**
* Check if DMA descriptor has been used in HW Sequencer.
* If used, skip_swseq_size_compute = true
* else skip_swseq_size_compute = false
*
* If skip_swseq_size_compute == true then set access_sizes to 0
* else go ahead with access_sizes calculation.access_sizes
*/
skip_swseq_size_compute = ((hw_dma_descs_mask[i / 64ULL] &
(1ULL << (i & 0x3FU))) == 1U);
if (pva_is_reserved_desc(i)) {
// skip over the reserved descriptor range
i = PVA_RESERVED_DESCRIPTORS_END;
continue;
}
if (skip_swseq_size_compute == true) {
continue;
}
desc = &dma_cfg->descriptors[i];
//Calculate src_size
err = get_access_size(desc, &access_sizes[i].src, false,
&access_sizes[i].dst2);
if (err != PVA_SUCCESS) {
goto out;
}
//Calculate dst_size
err = get_access_size(desc, &access_sizes[i].dst, true,
&access_sizes[i].dst2);
if (err != PVA_SUCCESS) {
goto out;
}
}
out:
return err;
}
void pva_kmd_collect_relocs(struct pva_dma_config const *dma_cfg,
struct pva_kmd_dma_access const *access_sizes,
struct pva_fw_dma_slot *out_static_slots,
uint16_t num_static_slots,
struct pva_fw_dma_reloc *out_static_relocs,
struct pva_fw_dma_slot *out_dyn_slots,
uint16_t num_dyn_slots,
struct pva_fw_dma_reloc *out_dyn_relocs,
uint8_t const *desc_to_ch)
{
struct pva_fw_dma_reloc_slots rel_info = { 0 };
uint8_t static_reloc_off[PVA_MAX_NUM_DMA_DESC * 3];
uint8_t dyn_reloc_off[PVA_MAX_NUM_DMA_DESC * 3];
memset(out_static_slots, 0,
num_static_slots * sizeof(*out_static_slots));
memset(out_dyn_slots, 0, num_dyn_slots * sizeof(*out_dyn_slots));
/* First pass: count the number of relocates for each slot */
count_relocs(dma_cfg, out_static_slots, num_static_slots, out_dyn_slots,
num_dyn_slots);
memset(static_reloc_off, 0U, sizeof(static_reloc_off));
memset(dyn_reloc_off, 0U, sizeof(dyn_reloc_off));
rel_info.dyn_slot.slots = out_dyn_slots;
rel_info.dyn_slot.relocs = out_dyn_relocs;
rel_info.dyn_slot.num_slots = num_dyn_slots;
rel_info.dyn_slot.reloc_off = dyn_reloc_off;
rel_info.static_slot.slots = out_static_slots;
rel_info.static_slot.relocs = out_static_relocs;
rel_info.static_slot.num_slots = num_static_slots;
rel_info.static_slot.reloc_off = static_reloc_off;
/* Second pass: write reloc info */
write_relocs(dma_cfg, access_sizes, &rel_info, desc_to_ch);
}

View File

@@ -0,0 +1,294 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_resource_table.h"
#include "pva_kmd_device_memory.h"
#include "pva_api.h"
#include "pva_api_types.h"
#include "pva_kmd_dma_cfg.h"
#include "pva_resource.h"
#include "pva_kmd_hwseq_validate.h"
static void write_dma_channel(struct pva_dma_channel const *ch,
uint8_t base_desc_index,
struct pva_fw_dma_channel *fw_ch,
struct pva_dma_resource_map *dma_resource_map,
bool support_hwseq_frame_linking)
{
/* DMA_CHANNEL_CNTL0_CHSDID: DMA_CHANNEL_CNTL0[0] = descIndex + 1;*/
fw_ch->cntl0 =
(((ch->desc_index + base_desc_index + 1U) & 0xFFU) << 0U);
/* DMA_CHANNEL_CNTL0_CHVMEMOREQ */
fw_ch->cntl0 |= ((ch->vdb_count & 0xFFU) << 8U);
/* DMA_CHANNEL_CNTL0_CHBH */
fw_ch->cntl0 |= ((ch->adb_count & 0x1FFU) << 16U);
/* DMA_CHANNEL_CNTL0_CHPREF */
fw_ch->cntl0 |= ((ch->prefetch_enable & 1U) << 30U);
/* DMA_CHANNEL_CNTL1_CHPWT */
fw_ch->cntl1 = ((ch->req_per_grant & 0x7U) << 2U);
/* DMA_CHANNEL_CNTL1_CHVDBSTART */
fw_ch->cntl1 |= ((ch->vdb_offset & 0x7FU) << 16U);
/* DMA_CHANNEL_CNTL1_CHADBSTART */
fw_ch->cntl1 |= ((ch->adb_offset & 0x1FFU) << 23U);
fw_ch->boundary_pad = ch->pad_value;
fw_ch->cntl1 |= ((ch->ch_rep_factor & 0x7U) << 8U);
/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQSTART */
fw_ch->hwseqcntl = ((ch->hwseq_start & 0x1FFU) << 0U);
/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQEND */
fw_ch->hwseqcntl |= ((ch->hwseq_end & 0x1FFU) << 12U);
/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTD */
fw_ch->hwseqcntl |= ((ch->hwseq_trigger_done & 0x3U) << 24U);
/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTS */
fw_ch->hwseqcntl |= ((ch->hwseq_tx_select & 0x1U) << 27U);
/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTO */
fw_ch->hwseqcntl |= ((ch->hwseq_traversal_order & 0x1U) << 30U);
/* DMA_CHANNEL_HWSEQCNTL_CHHWSEQEN */
fw_ch->hwseqcntl |= ((ch->hwseq_enable & 0x1U) << 31U);
/* DMA_CHANNEL_HWSEQFSCNTL_CHHWSEQFCNT*/
fw_ch->hwseqfscntl |=
(((uint32_t)ch->hwseq_con_frame_seq & 0x1U) << 0U);
/* DMA_CHANNEL_HWSEQFSCNTL_CHHWSEQCFS*/
fw_ch->hwseqfscntl |=
(((uint32_t)ch->hwseq_frame_count & 0x3FU) << 16U);
pva_dma_resource_map_add_adbs(dma_resource_map, ch->adb_offset,
ch->adb_count);
}
static uint32_t assemble_rpt_cntl(uint8_t rpt, uint32_t adv)
{
return PVA_INSERT(rpt, 31, 24) | PVA_INSERT(adv, 23, 0);
}
static void write_dma_descriptor(struct pva_dma_descriptor const *desc,
struct pva_fw_dma_descriptor *fw_desc)
{
fw_desc->src_adr0 = iova_lo(desc->src.offset);
fw_desc->src_adr1 = iova_hi(desc->src.offset);
fw_desc->dst_adr0 = iova_lo(desc->dst.offset);
fw_desc->dst_adr1 = iova_hi(desc->dst.offset);
/* DMA_DESC_TRANS CNTL0 */
fw_desc->transfer_control0 = PVA_INSERT(desc->src.transfer_mode, 2, 0) |
PVA_INSERT(desc->dst.transfer_mode, 6, 4);
/* DMA_DESC_TRANS CNTL1 */
fw_desc->transfer_control1 =
PVA_INSERT(desc->log2_pixel_size, 1, 0) |
PVA_INSERT(desc->px_direction, 2, 2) |
PVA_INSERT(desc->py_direction, 3, 3) |
PVA_INSERT(desc->boundary_pixel_extension, 4, 4) |
PVA_INSERT(desc->tts, 5, 5) |
PVA_INSERT(desc->trans_true_completion, 7, 7);
/* DMA_DESC_TRANS CNTL2 */
fw_desc->transfer_control2 = PVA_INSERT(desc->prefetch_enable, 0, 0) |
PVA_INSERT(desc->dst.cb_enable, 1, 1) |
PVA_INSERT(desc->src.cb_enable, 2, 2);
fw_desc->link_did = desc->link_desc_id;
/* DMA_DESC_TX */
fw_desc->tx = desc->tx;
/* DMA_DESC_TY */
fw_desc->ty = desc->ty;
/* DMA_DESC_DLP_ADV */
fw_desc->dlp_adv = desc->dst.line_pitch;
/* DMA_DESC_SLP_ADV */
fw_desc->slp_adv = desc->src.line_pitch;
/* DMA_DESC_DB_START */
fw_desc->db_start = desc->dst.cb_start;
/* DMA_DESC_DB_SIZE */
fw_desc->db_size = desc->dst.cb_size;
/* DMA_DESC_SB_START */
fw_desc->sb_start = desc->src.cb_start;
/* DMA_DESC_SB_SIZE */
fw_desc->sb_size = desc->src.cb_size;
/* DMA_DESC_TRIG_CH */
/* Channel events are not supported */
fw_desc->trig_ch_events = 0U;
/* DMA_DESC_HW_SW_TRIG */
fw_desc->hw_sw_trig_events =
PVA_INSERT(desc->trig_event_mode, 1, 0) |
PVA_INSERT(desc->trig_vpu_events, 5, 2) |
PVA_INSERT(desc->desc_reload_enable, 12, 12);
/* DMA_DESC_PX */
fw_desc->px = desc->px;
/* DMA_DESC_PY */
fw_desc->py = desc->py;
/* DMA_DESC_FRDA */
fw_desc->frda = ((desc->dst2_offset >> 6U) & 0x3FFF);
/* DMA_DESC_NDTM_CNTL0 */
fw_desc->cb_ext = (((desc->src.cb_start >> 16) & 0x1) << 0) |
(((desc->dst.cb_start >> 16) & 0x1) << 2) |
(((desc->src.cb_size >> 16) & 0x1) << 4) |
(((desc->dst.cb_size >> 16) & 0x1) << 6);
/* DMA_DESC_NS1_ADV & DMA_DESC_ST1_ADV */
fw_desc->srcpt1_cntl =
assemble_rpt_cntl(desc->src.rpt1, desc->src.adv1);
fw_desc->srcpt2_cntl =
assemble_rpt_cntl(desc->src.rpt2, desc->src.adv2);
fw_desc->srcpt3_cntl =
assemble_rpt_cntl(desc->src.rpt3, desc->src.adv3);
fw_desc->dstpt1_cntl =
assemble_rpt_cntl(desc->dst.rpt1, desc->dst.adv1);
fw_desc->dstpt2_cntl =
assemble_rpt_cntl(desc->dst.rpt2, desc->dst.adv2);
fw_desc->dstpt3_cntl =
assemble_rpt_cntl(desc->dst.rpt3, desc->dst.adv3);
}
static void write_triggers(struct pva_dma_config const *dma_cfg,
struct pva_dma_config_resource *fw_cfg,
struct pva_dma_resource_map *dma_resource_map)
{
uint32_t i, j;
bool trigger_required = false;
memset(fw_cfg->output_enable, 0, sizeof(fw_cfg->output_enable));
for (i = 0; i < dma_cfg->header.num_channels; i++) {
struct pva_dma_channel const *ch = &dma_cfg->channels[i];
uint8_t ch_num = i + dma_cfg->header.base_channel;
uint32_t mask;
mask = ch->output_enable_mask;
/* READ/STORE triggers */
for (j = 0; j < 7; j++) {
fw_cfg->output_enable[j] |=
(((mask >> 2 * j) & 1U) << ch_num);
fw_cfg->output_enable[j] |=
(((mask >> (2 * j + 1)) & 1U)
<< (ch_num + 16U));
}
/* VPU config trigger */
fw_cfg->output_enable[7] |= (((mask >> 14) & 1U) << ch_num);
/* HWSEQ tirgger */
fw_cfg->output_enable[8] |= (((mask >> 15) & 1U) << ch_num);
fw_cfg->output_enable[8] |=
(((mask >> 16) & 1U) << (ch_num + 16U));
if (mask != 0) {
trigger_required = true;
}
}
if (trigger_required) {
pva_dma_resource_map_add_triggers(dma_resource_map);
}
}
void pva_kmd_write_fw_dma_config(struct pva_dma_config const *dma_cfg,
void *fw_dma_config,
uint32_t *out_fw_fetch_size,
bool support_hwseq_frame_linking)
{
struct pva_dma_config_resource *hdr;
struct pva_fw_dma_channel *fw_channels;
struct pva_fw_dma_descriptor *fw_descs;
struct pva_fw_dma_slot *fw_slots, *last_slot;
struct pva_dma_resource_map *dma_resource_map;
uint32_t *hwseq_words;
uintptr_t offset;
uint32_t i;
hdr = fw_dma_config;
hdr->base_channel = dma_cfg->header.base_channel;
hdr->base_descriptor = dma_cfg->header.base_descriptor;
hdr->base_hwseq_word = dma_cfg->header.base_hwseq_word;
hdr->num_channels = dma_cfg->header.num_channels;
hdr->num_descriptors = dma_cfg->header.num_descriptors;
hdr->num_hwseq_words = dma_cfg->header.num_hwseq_words;
hdr->vpu_exec_resource_id = dma_cfg->header.vpu_exec_resource_id;
hdr->num_dynamic_slots = dma_cfg->header.num_dynamic_slots;
dma_resource_map = &hdr->dma_resource_map;
pva_dma_resource_map_reset(dma_resource_map);
pva_dma_resource_map_add_channels(dma_resource_map,
dma_cfg->header.base_channel,
dma_cfg->header.num_channels);
pva_dma_resource_map_add_descriptors(dma_resource_map,
dma_cfg->header.base_descriptor,
dma_cfg->header.num_descriptors);
pva_dma_resource_map_add_hwseq_words(dma_resource_map,
dma_cfg->header.base_hwseq_word,
dma_cfg->header.num_hwseq_words);
offset = sizeof(*hdr);
fw_slots = pva_offset_pointer(fw_dma_config, offset);
if (hdr->num_dynamic_slots > 0) {
last_slot = &fw_slots[hdr->num_dynamic_slots - 1];
hdr->num_relocs = safe_addu16(last_slot->reloc_start_idx,
last_slot->reloc_count);
/* Round of the number of relocs to satisfy alignment requirement */
hdr->num_relocs = safe_pow2_roundup_u16(hdr->num_relocs, 2U);
offset += sizeof(struct pva_fw_dma_slot) *
hdr->num_dynamic_slots +
sizeof(struct pva_fw_dma_reloc) * hdr->num_relocs;
} else {
hdr->num_relocs = 0;
}
fw_channels = pva_offset_pointer(fw_dma_config, offset);
offset += sizeof(*fw_channels) * hdr->num_channels;
fw_descs = pva_offset_pointer(fw_dma_config, offset);
offset += sizeof(*fw_descs) * hdr->num_descriptors;
/* Do not include fields beyond descriptors as they are not fetched to
* TCM */
*out_fw_fetch_size = offset;
for (i = 0; i < hdr->num_channels; i++) {
write_dma_channel(&dma_cfg->channels[i],
dma_cfg->header.base_descriptor,
&fw_channels[i], dma_resource_map,
support_hwseq_frame_linking);
}
for (i = 0; i < dma_cfg->header.num_descriptors; i++) {
if (pva_is_reserved_desc(i)) {
// skip over the reserved descriptor range
i = PVA_RESERVED_DESCRIPTORS_END;
continue;
}
write_dma_descriptor(&dma_cfg->descriptors[i], &fw_descs[i]);
}
write_triggers(dma_cfg, fw_dma_config, dma_resource_map);
hwseq_words = pva_offset_pointer(fw_dma_config, offset);
memcpy(hwseq_words, dma_cfg->hwseq_words,
sizeof(*hwseq_words) * hdr->num_hwseq_words);
/*TODO: write hdr->common_config for hwseq and MISR*/
}

View File

@@ -0,0 +1,74 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_EXECUTABLE_H
#define PVA_KMD_EXECUTABLE_H
#include "pva_kmd.h"
#include "pva_resource.h"
#include "pva_kmd_utils.h"
struct pva_kmd_device;
struct pva_kmd_device_memory;
struct pva_kmd_exec_symbol_table {
uint32_t n_symbols;
struct pva_symbol_info *symbols;
};
static inline struct pva_symbol_info *
pva_kmd_get_symbol(struct pva_kmd_exec_symbol_table *symbol_table,
uint32_t symbol_id)
{
struct pva_symbol_info *symbol = NULL;
uint32_t idx = symbol_id - PVA_SYMBOL_ID_BASE;
if (idx >= symbol_table->n_symbols) {
pva_kmd_log_err("Symbol ID out of range\n");
return NULL;
}
symbol = &symbol_table->symbols[idx];
return symbol;
}
static inline struct pva_symbol_info *
pva_kmd_get_symbol_with_type(struct pva_kmd_exec_symbol_table *symbol_table,
uint32_t symbol_id,
enum pva_symbol_type symbol_type)
{
struct pva_symbol_info *symbol = NULL;
symbol = pva_kmd_get_symbol(symbol_table, symbol_id);
if (!symbol) {
return NULL;
}
#if !defined(PVA_SKIP_SYMBOL_TYPE_CHECK)
if (symbol->symbol_type != symbol_type) {
pva_kmd_log_err("Unexpected symbol type\n");
return NULL;
}
#endif
return symbol;
}
enum pva_error
pva_kmd_load_executable(void *executable_data, uint32_t executable_size,
struct pva_kmd_device *pva, uint8_t dma_smmu_id,
struct pva_kmd_exec_symbol_table *out_symbol_table,
struct pva_kmd_device_memory **out_metainfo,
struct pva_kmd_device_memory **out_sections);
void pva_kmd_unload_executable(struct pva_kmd_exec_symbol_table *symbol_table,
struct pva_kmd_device_memory *metainfo,
struct pva_kmd_device_memory *sections);
#endif // PVA_KMD_EXECUTABLE_H

View File

@@ -0,0 +1,52 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_fw_debug.h"
#include "pva_kmd_utils.h"
#include "pva_api.h"
void pva_kmd_drain_fw_print(struct pva_kmd_fw_print_buffer *print_buffer)
{
uint32_t tail = print_buffer->buffer_info->tail;
if (tail > print_buffer->size) {
pva_kmd_log_err(
"Firmware print tail is out of bounds! Refusing to print\n");
pva_dbg_printf("Tail %u vs size %u\n", tail,
print_buffer->size);
return;
}
while (print_buffer->head < tail) {
uint32_t max_len = tail - print_buffer->head;
const char *str = print_buffer->content + print_buffer->head;
uint32_t print_size;
/* It must be null terminted */
if (print_buffer->content[tail - 1] != '\0') {
pva_kmd_log_err(
"Firmware print is not null terminated! Refusing to print");
}
print_size = strnlen(str, max_len);
pva_kmd_print_str(str);
/* +1 for null terminator */
print_buffer->head += print_size + 1;
}
if (print_buffer->buffer_info->flags & PVA_FW_PRINT_BUFFER_OVERFLOWED) {
pva_kmd_log_err("Firmware print buffer overflowed!");
}
if (print_buffer->buffer_info->flags & PVA_FW_PRINT_FAILURE) {
pva_kmd_log_err("Firmware print failed!");
}
}

View File

@@ -0,0 +1,26 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_FW_DEBUG_H
#define PVA_KMD_FW_DEBUG_H
#include "pva_api.h"
#include "pva_fw.h"
struct pva_kmd_fw_print_buffer {
struct pva_fw_print_buffer_header *buffer_info;
char const *content;
uint32_t size;
uint32_t head;
};
void pva_kmd_drain_fw_print(struct pva_kmd_fw_print_buffer *print_buffer);
#endif // PVA_KMD_FW_DEBUG_H

View File

@@ -0,0 +1,338 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_api_cmdbuf.h"
#include "pva_api_types.h"
#include "pva_bit.h"
#include "pva_fw.h"
#include "pva_kmd_cmdbuf.h"
#include "pva_kmd_device.h"
#include "pva_kmd_constants.h"
#include "pva_utils.h"
#include "pva_kmd_fw_profiler.h"
// TODO: This is here temporarily just for testing. Should be moved to a common header
#define CMD_ID(x) PVA_EXTRACT(x, 6, 0, uint8_t)
#define CMD(name) [CMD_ID(PVA_CMD_OPCODE_##name)] = #name
static const char *cmd_names[PVA_CMD_OPCODE_COUNT] = {
CMD(LINK_CHUNK),
CMD(BARRIER),
CMD(ACQUIRE_ENGINE),
CMD(RELEASE_ENGINE),
CMD(SET_CURRENT_ENGINE),
CMD(CLEAR_VMEM),
CMD(BIND_L2SRAM),
CMD(RELEASE_L2SRAM),
CMD(INVALIDATE_L2SRAM),
CMD(FLUSH_L2SRAM),
CMD(PATCH_L2SRAM_OFFSET),
CMD(SET_VPU_EXECUTABLE),
CMD(INIT_VPU_EXECUTABLE),
CMD(PREFETCH_VPU_CODE),
CMD(SET_VPU_PARAMETER),
CMD(SET_VPU_PARAMETER_WITH_ADDRESS),
CMD(SET_VPU_INSTANCE_PARAMETER),
CMD(SET_VPU_PARAMETER_WITH_BUFFER),
CMD(RUN_VPU),
CMD(SET_PPE_EXECUTABLE),
CMD(INIT_PPE_EXECUTABLE),
CMD(PREFETCH_PPE_CODE),
CMD(RUN_PPE),
CMD(FETCH_DMA_CONFIGURATION),
CMD(SETUP_DMA),
CMD(RUN_DMA),
CMD(BIND_DRAM_SLOT),
CMD(BIND_VMEM_SLOT),
CMD(UNREGISTER_RESOURCE),
CMD(WRITE_DRAM),
CMD(CAPTURE_TIMESTAMP),
CMD(RUN_UNIT_TESTS)
};
static const char *priv_cmd_names[PVA_CMD_PRIV_OPCODE_COUNT] = {
CMD(INIT_RESOURCE_TABLE),
CMD(DEINIT_RESOURCE_TABLE),
CMD(UPDATE_RESOURCE_TABLE),
CMD(INIT_QUEUE),
CMD(DEINIT_QUEUE),
CMD(ENABLE_FW_PROFILING),
CMD(DISABLE_FW_PROFILING),
CMD(SUSPEND_FW),
CMD(RESUME_FW)
};
static inline const char *pva_fw_get_cmd_name(uint32_t opcode)
{
uint32_t cmd_id;
const char *name;
cmd_id = CMD_ID(opcode);
if (opcode & PVA_CMD_PRIV_OPCODE_FLAG) {
if (cmd_id >= PVA_CMD_PRIV_OPCODE_COUNT) {
return "INVALID";
}
name = priv_cmd_names[cmd_id];
} else {
if (cmd_id >= PVA_CMD_OPCODE_COUNT) {
return "INVALID";
}
name = cmd_names[cmd_id];
}
if (name == NULL) {
return "UNKNOWN";
} else {
return name;
}
}
void pva_kmd_device_init_profiler(struct pva_kmd_device *pva)
{
enum pva_error err = PVA_SUCCESS;
const uint32_t profiling_buffer_size = PVA_KMD_FW_PROFILING_BUFFER_SIZE;
struct pva_kmd_fw_profiling_buffer *fw_profiling_buffer =
&pva->fw_profiling_buffer;
// Event message should be 32-bit to keep logging latency low
ASSERT(sizeof(struct pva_fw_event_message) == sizeof(uint32_t));
pva->fw_profiling_buffer_memory =
pva_kmd_device_memory_alloc_map(profiling_buffer_size, pva,
PVA_ACCESS_RW,
PVA_R5_SMMU_CONTEXT_ID);
ASSERT(pva->fw_profiling_buffer_memory != NULL);
/* Add profiling memory to resource table */
err = pva_kmd_add_dram_buffer_resource(
&pva->dev_resource_table, pva->fw_profiling_buffer_memory,
&pva->fw_profiling_buffer_resource_id);
ASSERT(err == PVA_SUCCESS);
pva_kmd_update_fw_resource_table(&pva->dev_resource_table);
fw_profiling_buffer->buffer_info =
(struct pva_fw_profiling_buffer_header *)
pva->fw_profiling_buffer_memory->va;
fw_profiling_buffer->content =
pva_offset_pointer(pva->fw_profiling_buffer_memory->va,
sizeof(*fw_profiling_buffer->buffer_info));
fw_profiling_buffer->size = pva->fw_profiling_buffer_memory->size;
fw_profiling_buffer->head = 0U;
fw_profiling_buffer->buffer_info->flags = 0U;
fw_profiling_buffer->buffer_info->tail = 0U;
pva->debugfs_context.g_fw_profiling_config.enabled = false;
pva->debugfs_context.g_fw_profiling_config.filter = 0x0;
}
void pva_kmd_device_deinit_profiler(struct pva_kmd_device *pva)
{
pva_kmd_drop_resource(&pva->dev_resource_table,
pva->fw_profiling_buffer_resource_id);
pva->debugfs_context.g_fw_profiling_config.enabled = false;
}
enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &pva->submitter;
struct pva_cmd_enable_fw_profiling *cmd;
uint64_t buffer_offset = 0U;
uint32_t filter = 0U;
uint8_t timestamp_type = TIMESTAMP_TYPE_CYCLE_COUNT;
uint32_t fence_val;
enum pva_error err;
// filter |= PVA_FW_EVENT_DO_CMD;
filter |= PVA_FW_EVENT_RUN_VPU;
if (pva->debugfs_context.g_fw_profiling_config.enabled) {
return PVA_SUCCESS;
}
pva->fw_profiling_buffer.head = 0U;
pva->fw_profiling_buffer.buffer_info->flags = 0U;
pva->fw_profiling_buffer.buffer_info->tail = 0U;
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd));
ASSERT(cmd != NULL);
pva_kmd_set_cmd_enable_fw_profiling(
cmd, pva->fw_profiling_buffer_resource_id,
pva->fw_profiling_buffer.size, buffer_offset, filter,
timestamp_type);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when initializing context");
goto err_out;
}
pva->debugfs_context.g_fw_profiling_config.enabled = true;
pva->debugfs_context.g_fw_profiling_config.filter = filter;
pva->debugfs_context.g_fw_profiling_config.timestamp_type =
timestamp_type;
pva->debugfs_context.g_fw_profiling_config.timestamp_size =
(pva->debugfs_context.g_fw_profiling_config.timestamp_type ==
TIMESTAMP_TYPE_TSE) ?
8 :
4;
return PVA_SUCCESS;
err_out:
return err;
}
enum pva_error pva_kmd_notify_fw_disable_profiling(struct pva_kmd_device *pva)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &pva->submitter;
struct pva_cmd_disable_fw_profiling *cmd;
uint32_t fence_val;
enum pva_error err;
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd));
ASSERT(cmd != NULL);
pva_kmd_set_cmd_disable_fw_profiling(cmd);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when initializing context");
goto err_out;
}
pva->debugfs_context.g_fw_profiling_config.enabled = false;
pva->debugfs_context.g_fw_profiling_config.filter = 0x0;
return PVA_SUCCESS;
err_out:
return err;
}
static void decode_and_print_event(unsigned long walltime,
unsigned long relative_time,
struct pva_fw_event_message message,
char *msg_string)
{
switch (PVA_BIT(message.event)) {
case PVA_FW_EVENT_DO_CMD: {
sprintf(msg_string,
"pva_fw@%lu: [%8lu] event=%-12s type=%-7s slot=%u idx=%-5u opcode=%s",
walltime, relative_time, "DO_CMD",
event_type_to_string(message.type), message.arg2,
message.arg3, pva_fw_get_cmd_name(message.arg1));
} break;
case PVA_FW_EVENT_SCAN_QUEUES: {
sprintf(msg_string,
"pva_fw@%lu: [%8lu] event=%-12s type=%-7s found=%u ccq_id=%-5u queue_id=%u",
walltime, relative_time, "SCAN_QUEUES",
event_type_to_string(message.type), message.arg1,
message.arg2, message.arg3);
} break;
case PVA_FW_EVENT_SCAN_SLOTS: {
sprintf(msg_string,
"pva_fw@%lu: [%8lu] event=%-12s type=%-7s state=%u slot=%u",
walltime, relative_time, "SCAN_SLOTS",
event_type_to_string(message.type), message.arg1,
message.arg2);
} break;
case PVA_FW_EVENT_RUN_VPU: {
sprintf(msg_string,
"pva_fw@%lu: [%8lu] event=%-12s type=%-7s slot=%u idx=%-5u opcode=%s",
walltime, relative_time, "RUN_VPU",
event_type_to_string(message.type), message.arg2,
message.arg3, pva_fw_get_cmd_name(message.arg1));
} break;
default:
pva_dbg_printf("Unknown event type\n");
break;
}
}
void pva_kmd_drain_fw_profiling_buffer(
struct pva_kmd_device *pva,
struct pva_kmd_fw_profiling_buffer *profiling_buffer)
{
char msg_string[200] = { '\0' };
struct pva_fw_event_message message;
uint64_t prev_walltime = 0U;
uint64_t timestamp = 0U;
uint64_t relative_time = 0U;
uint32_t buffer_space;
// TODO: R5 frequency is hard-coded for now. Get this at runtime.
static const uint32_t r5_freq = 716800000U;
static const unsigned long r5_cycle_duration = 1000000000000 / r5_freq;
unsigned long walltime = 0U; // in nanoseconds
uint64_t walltime_diff;
const uint32_t message_size =
sizeof(message) +
pva->debugfs_context.g_fw_profiling_config.timestamp_size;
uint32_t *profiling_buffer_head = &profiling_buffer->head;
uint32_t profiling_buffer_tail = profiling_buffer->buffer_info->tail;
while (*profiling_buffer_head < profiling_buffer_tail) {
buffer_space = safe_addu32(*profiling_buffer_head,
safe_subu32(message_size, 1U));
ASSERT(buffer_space <= profiling_buffer_tail);
memcpy(&message,
&profiling_buffer->content[*profiling_buffer_head],
sizeof(message));
memcpy(&timestamp,
&profiling_buffer->content[*profiling_buffer_head +
sizeof(message)],
pva->debugfs_context.g_fw_profiling_config
.timestamp_size);
if (pva->debugfs_context.g_fw_profiling_config.timestamp_type ==
TIMESTAMP_TYPE_TSE) {
walltime = (timestamp << 5);
} else if (pva->debugfs_context.g_fw_profiling_config
.timestamp_type ==
TIMESTAMP_TYPE_CYCLE_COUNT) {
timestamp = PVA_LOW32(timestamp);
walltime = (r5_cycle_duration * timestamp) / 1000U;
}
walltime_diff = safe_subu64((uint64_t)walltime, prev_walltime);
relative_time = (prev_walltime == 0U) ? 0U : walltime_diff;
decode_and_print_event(walltime, relative_time, message,
&msg_string[0]);
pva_kmd_print_str(msg_string);
*profiling_buffer_head = *profiling_buffer_head + message_size;
prev_walltime = walltime;
}
return;
}

View File

@@ -0,0 +1,41 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_FW_PROFILER_H
#define PVA_KMD_FW_PROFILER_H
#include "pva_kmd_device.h"
struct pva_kmd_fw_profiling_buffer {
#define PVA_KMD_FW_PROFILING_BUFFER_SIZE (512 * 1024)
struct pva_fw_profiling_buffer_header *buffer_info;
char const *content;
uint32_t size;
uint32_t head;
};
struct pva_kmd_fw_profiling_config {
uint32_t filter;
enum pva_fw_timestamp_t timestamp_type;
uint8_t timestamp_size;
uint8_t enabled;
};
void pva_kmd_device_init_profiler(struct pva_kmd_device *pva);
void pva_kmd_device_deinit_profiler(struct pva_kmd_device *pva);
void pva_kmd_drain_fw_profiling_buffer(
struct pva_kmd_device *pva,
struct pva_kmd_fw_profiling_buffer *profiling_buffer);
enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva);
enum pva_error pva_kmd_notify_fw_disable_profiling(struct pva_kmd_device *pva);
#endif

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,336 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_HWSEQ_VALIDATE_H
#define PVA_KMD_HWSEQ_VALIDATE_H
#include "pva_api_dma.h"
#include "pva_kmd_device.h"
#define PVA_HWSEQ_RRA_MAX_NOCR 31U
#define PVA_HWSEQ_RRA_MAX_FRAME_COUNT 63U
/**
* List of valid Addressing Modes in HW Sequencer Header
*/
enum pva_dma_hwseq_fid {
PVA_DMA_HWSEQ_RRA_MODE = 0xC0DA, /*!< RRA addressing */
PVA_DMA_HWSEQ_FRAME_MODE = 0xC0DE, /*!< frame addressing */
PVA_DMA_HWSEQ_DESC_MODE = 0xDEAD /*!< descriptor addressing */
};
/**
* Combine three headers common in HW Sequencer
*
* ----------------------------------------------------------------------------
* | | byte 3 | byte 2 | byte 1 | byte 0 |
* |--------|---------------|--------------|-----------------|----------------|
* | Head 1 | NOCR | FR | FID1 | FID0 |
* | Head 2 | FO in LP 15:8 | FO in LP 7:0 | TO in P/LP 15:8 | TO in P/LP 7:0 |
* | Head 3 | padB | padL | padT | padR |
* ----------------------------------------------------------------------------
**/
struct pva_dma_hwseq_hdr {
//hdr_1
uint16_t fid; /*!< addressing type: frame or descriptor */
uint8_t fr; /*!< frame repetition factor */
uint8_t nocr; /*!< number of descriptor column/row */
//hdr_2
int16_t to; /*!< tile offset in pixel/Line Pitch */
int16_t fo; /*!< frame offset in Line Pitch */
//hdr_3
uint8_t padr; /*!< pad right */
uint8_t padt; /*!< pad top */
uint8_t padl; /*!< pad left */
uint8_t padb; /*!< pad bottom */
};
/**
* A struct which represents Column/Row Header in HW Sequencer
*/
struct pva_dma_hwseq_colrow_hdr {
uint8_t dec; /*!< descriptor entry count */
uint8_t crr; /*!< col/row repetition factor */
int16_t cro; /*!< col/row ofst in pixel/line pitch */
};
/**
* A struct which represents a DMA Descriptor Header in HW Sequencer
*/
struct pva_dma_hwseq_desc_entry {
uint8_t did; /*!< desc id */
uint8_t dr; /*!< desc repetition */
};
/**
* A struct which represents a Column/Row Header Entry in HW Sequencer
*/
struct pva_dma_hwseq_colrow_entry_hdr {
struct pva_dma_hwseq_colrow_hdr hdr; /*!< Col/Row Header */
};
/**
* A struct representing Grid Information
*/
struct pva_hwseq_grid_info {
/**
* tile co-ordinates
* In Raster Mode:
* - tile_x[0] = Tile width of the first tile in HW Seq DMA Transfer
* - tile_x[1] = Tile width of the last tile in HW Seq DMA Transfer
* In Vertical Mining Mode:
* - tile_x[0] = Tile height of the first tile in HW Seq DMA Transfer
* - tile_x[1] = Tile height of the last tile in HW Seq DMA Transfer
*/
int32_t tile_x[2];
/**
* tile co-ordinates
* In Raster Mode:
* - tile_y[0] = Tile height of the first tile in HW Seq DMA Transfer
* - tile_y[1] = Tile height of the last tile in HW Seq DMA Transfer
* In Vertical Mining Mode:
* - tile_y[0] = Tile width of the first tile in HW Seq DMA Transfer
* - tile_y[1] = Tile width of the last tile in HW Seq DMA Transfer
*/
int32_t tile_y[2];
/**
* tile co-ordinates
* In Tensor Data Flow Mode:
*/
int32_t tile_z;
/**
* Padding values
* In Raster Mode:
* - pad_x[0] = Left Padding
* - pad_x[1] = Right Padding
* In Vertical Mining Mode:
* - pad_x[0] = Top Padding
* - pad_x[1] = Bottom Padding
*/
int32_t pad_x[2];
/**
* Padding values
* In Raster Mode:
* - pad_y[0] = Top Padding
* - pad_y[1] = Bottom Padding
* In Vertical Mining Mode:
* - pad_y[0] = Left Padding
* - pad_y[1] = Right Padding
*/
int32_t pad_y[2];
/**
* Tiles per packet. Grid size in X dimension
*/
uint32_t grid_size_x;
/**
* Repeat Count
*/
uint32_t grid_size_y;
/**
* Grid Size in Z dimension for Tensor Data Flow
*/
uint32_t grid_size_z;
/**
* Tile Offset as specified in the HW Sequencer Header
*/
int32_t grid_step_x;
/**
* Col/Row Offset as specified in the HW Sequencer Col/Row Header
*/
int32_t grid_step_y;
/**
* Repetition factor for Head Descriptor in HW Sequencer Blob
*/
uint32_t head_tile_count;
/**
* Boolean value to indicate if HW Sequencer has split padding
*/
bool is_split_padding;
};
/**
* A struct representing a valid Frame Information
*/
struct pva_hwseq_frame_info {
/**
* X co-ordinate of start of Frame
*/
int64_t start_x;
/**
* Y co-ordinate of start of Frame
*/
int64_t start_y;
/**
* Z co-ordinates of starte of Frame
*/
int64_t start_z;
/**
* X co-ordinate of end of Frame
*/
int64_t end_x;
/**
* Y co-ordinate of end of Frame
*/
int64_t end_y;
/**
* Z co-ordinate of end of Frame
*/
int64_t end_z;
};
/**
* Struct which holds the HW Sequencer Buffer as received from User Space
*/
struct pva_hwseq_buffer {
/**
* Pointer to HW Sequencer Blob in Buffer
*/
const uint8_t *data;
/**
* Number of bytes left to be read from the data buffer
*/
uint32_t bytes_left;
};
/**
* @struct hw_seq_blob_entry
* @brief Structure to hold information about a hardware sequence blob entry.
*
* This structure is used to store the details of a DMA channel and the range of hardware sequencer
* associated with it, along with the number of frames involved.
*/
struct hw_seq_blob_entry {
/**
* Pointer to a const \ref pva_dma_channel which holds the current DMA Channel Information
* in which current HW Sequencer Blob is present
*/
struct pva_dma_channel const *ch;
/**
* The starting index of the hardware sequencer.
*/
uint16_t hwseq_start;
/**
* The ending index of the hardware sequencer.
*/
uint16_t hwseq_end;
/**
* The number of frames associated with the hardware sequencer.
*/
uint32_t num_frames;
};
/**
* TODO: Separate out pva_hwseq_priv to be more modular
*
* Items in pva_hwseq_main
* - dma_config
* - hw_gen
* - blob
* - num_hwseq_words
* Items per segment of main i.e. pva_hwseq_segment
* - hwseq_start, hwseq_end
* - channel id
* - hwseq_header,
* - desc_count
* - num_frames
* - head_desc, tail_desc
* - is_split_padding
* - is_raster_scan
*/
/**
* A struct holding private data to HW Sequencer Blob being parsed
*/
struct pva_hwseq_priv {
/**
* Number of descriptors in the HW Sequencer Blob
*/
uint32_t desc_count;
/**
* Number of tiles in the packet
* This is the sum total of descriptor repetition factors
* present in the HW Sequencer Blob
*/
uint32_t tiles_per_packet;
int32_t max_tx;
int32_t max_ty;
/**
* Struct that holds the entry info of HW Sequencer Blob
*/
struct hw_seq_blob_entry entry;
/**
* Struct that holds HW Sequencer Blob to be read
*/
struct pva_hwseq_buffer blob;
/**
* Boolean to indicate if split padding is present in the HW Sequener Blob
*/
bool is_split_padding;
/**
* Bool to indicate if HW Sequencer uses raster scan or Vertical mining
* TRUE: Raster Scan
* FALSE: Vertical Mining
*/
bool is_raster_scan;
/**
* @brief Indicates the generation of PVA HW.
* Allowed values: 0 (GEN 1), 1 (GEN 2), 2 (GEN 3)
*/
enum pva_hw_gen hw_gen;
/**
* @brief Pointer to the DMA configuration header.
*/
const struct pva_dma_config *dma_config;
/**
* Pointer to \ref pva_dma_hwseq_hdr_t which holds the HW Sequencer Header
*/
const struct pva_dma_hwseq_hdr *hdr;
/**
* Pointer to \ref pva_dma_hwseq_colrow_hdr_t which holds the Header of the
* Col/Row inside HW Sequencer
*/
const struct pva_dma_hwseq_colrow_hdr *colrow;
/**
* Pointer to the Head Descriptor of type \ref nvpva_dma_descriptor in the HW Sequencer
*/
struct pva_dma_descriptor *head_desc;
/**
* Pointer to the Tail Descriptor of type \ref nvpva_dma_descriptor in the HW Sequencer
*/
struct pva_dma_descriptor *tail_desc;
/**
* DMA Descriptor information obtained from HW Sequencer Blob of type
* \ref pva_dma_hwseq_desc_entry_t
*/
struct pva_dma_hwseq_desc_entry dma_descs[2];
/**
* Access Sizes are calculated and stored here from HW Sequencer Blob
*/
struct pva_kmd_dma_access *access_sizes;
};
struct pva_hwseq_per_frame_info {
uint32_t seq_tile_count;
uint32_t vmem_tiles_per_frame;
};
enum pva_error validate_hwseq(struct pva_dma_config const *dma_config,
struct pva_kmd_hw_constants const *hw_consts,
struct pva_kmd_dma_access *access_sizes,
uint64_t *hw_dma_descs_mask);
#endif

View File

@@ -0,0 +1,98 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_msg.h"
#include "pva_fw.h"
#include "pva_kmd_utils.h"
#include "pva_kmd_thread_sema.h"
#include "pva_kmd_fw_debug.h"
#include "pva_kmd_device.h"
#include "pva_kmd_context.h"
static uint8_t get_msg_type(uint32_t hdr)
{
return PVA_EXTRACT(hdr, PVA_FW_MSG_TYPE_MSB, PVA_FW_MSG_TYPE_LSB,
uint32_t);
}
void pva_kmd_handle_hyp_msg(void *pva_dev, uint32_t const *data, uint8_t len)
{
struct pva_kmd_device *pva = pva_dev;
uint8_t type = get_msg_type(data[0]);
uint8_t updated_len = safe_subu8(len, 1U);
uint8_t size = safe_mulu8((uint8_t)sizeof(uint32_t), updated_len);
switch (type) {
case PVA_FW_MSG_TYPE_BOOT_DONE: {
uint64_t r5_start_time =
pack64(data[PVA_FW_MSG_R5_START_TIME_HI_IDX],
data[PVA_FW_MSG_R5_START_TIME_LO_IDX]);
uint64_t r5_ready_time =
pack64(data[PVA_FW_MSG_R5_READY_TIME_HI_IDX],
data[PVA_FW_MSG_R5_READY_TIME_LO_IDX]);
pva_kmd_log_err("Firmware boot completes");
pva_kmd_log_err_u64("R5 start time (us)",
tsc_to_us(r5_start_time));
pva_kmd_log_err_u64("R5 ready time (us)",
tsc_to_us(r5_ready_time));
pva_kmd_sema_post(&pva->fw_boot_sema);
} break;
case PVA_FW_MSG_TYPE_ABORT: {
char abort_msg[PVA_FW_MSG_ABORT_STR_MAX_LEN + 1];
pva_kmd_drain_fw_print(&pva->fw_print_buffer);
pva_kmd_log_err("Firmware aborted! The abort message is: ");
abort_msg[0] = PVA_EXTRACT(data[0], 7, 0, uint32_t);
abort_msg[1] = PVA_EXTRACT(data[0], 15, 8, uint32_t);
memcpy(abort_msg + 2, &data[1], size);
abort_msg[PVA_FW_MSG_ABORT_STR_MAX_LEN] = '\0';
pva_kmd_log_err(abort_msg);
} break;
case PVA_FW_MSG_TYPE_FLUSH_PRINT:
pva_kmd_drain_fw_print(&pva->fw_print_buffer);
break;
default:
FAULT("Unknown message type from firmware");
}
}
void pva_kmd_handle_msg(void *pva_dev, uint32_t const *data, uint8_t len)
{
struct pva_kmd_device *pva = pva_dev;
uint8_t type = get_msg_type(data[0]);
switch (type) {
case PVA_FW_MSG_TYPE_RESOURCE_UNREGISTER: {
uint8_t table_id =
PVA_EXTRACT(data[0], PVA_FW_MSG_RESOURCE_TABLE_ID_MSB,
PVA_FW_MSG_RESOURCE_TABLE_ID_LSB, uint8_t);
/* Resource table ID equals context id */
struct pva_kmd_context *ctx =
pva_kmd_get_context(pva, table_id);
uint32_t i;
pva_kmd_mutex_lock(&ctx->resource_table_lock);
for (i = 1; i < len; i++) {
pva_kmd_drop_resource(&ctx->ctx_resource_table,
data[i]);
}
pva_kmd_mutex_unlock(&ctx->resource_table_lock);
break;
}
default:
FAULT("Unexpected CCQ msg type from FW");
break;
}
}

View File

@@ -0,0 +1,26 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_api.h"
/**
* @brief Handle messages from FW to hypervisor.
*
* This is just a provision for future hypervisor support. For now, this just
* handles all messages from mailboxes.
*/
void pva_kmd_handle_hyp_msg(void *pva_dev, uint32_t const *data, uint8_t len);
/**
* @brief Handle messages from FW to KMD.
*
* These messages come from CCQ0 statues registers.
*/
void pva_kmd_handle_msg(void *pva_dev, uint32_t const *data, uint8_t len);

View File

@@ -0,0 +1,35 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_MUTEX_H
#define PVA_KMD_MUTEX_H
#include "pva_api.h"
#if defined(__KERNEL__) /* For Linux */
#include <linux/mutex.h>
typedef struct mutex pva_kmd_mutex_t;
#else /* For user space code, including QNX KMD */
#include <pthread.h>
/* Mutex */
typedef pthread_mutex_t pva_kmd_mutex_t;
#endif
enum pva_error pva_kmd_mutex_init(pva_kmd_mutex_t *m);
void pva_kmd_mutex_lock(pva_kmd_mutex_t *m);
void pva_kmd_mutex_unlock(pva_kmd_mutex_t *m);
void pva_kmd_mutex_deinit(pva_kmd_mutex_t *m);
#endif // PVA_KMD_MUTEX_H

View File

@@ -0,0 +1,814 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_op_handler.h"
#include "pva_kmd_resource_table.h"
#include "pva_kmd_device_memory.h"
#include "pva_kmd_cmdbuf.h"
#include "pva_kmd_device.h"
#include "pva_kmd_cmdbuf.h"
#include "pva_kmd_queue.h"
#include "pva_kmd_constants.h"
#include "pva_fw.h"
#include "pva_kmd_vpu_app_auth.h"
#include "pva_math_utils.h"
struct pva_kmd_buffer {
void const *base;
uint32_t offset;
uint32_t size;
};
/* Offset will always be multiple of 8 bytes */
static void incr_offset(struct pva_kmd_buffer *buf, uint32_t incr)
{
buf->offset = safe_addu32(buf->offset, incr);
buf->offset =
safe_pow2_roundup_u32(buf->offset, (uint32_t)sizeof(uint64_t));
}
static bool access_ok(struct pva_kmd_buffer const *buf, uint32_t size)
{
return safe_addu32(buf->offset, size) <= buf->size;
}
static void *read_data(struct pva_kmd_buffer *buf, uint32_t size)
{
void *data = (void *)((uint8_t *)buf->base + buf->offset);
incr_offset(buf, size);
return data;
}
static void write_data(struct pva_kmd_buffer *buf, void const *data,
uint32_t size)
{
memcpy((uint8_t *)buf->base + buf->offset, data, size);
incr_offset(buf, size);
}
static enum pva_error
pva_kmd_op_memory_register_async(struct pva_kmd_context *ctx,
struct pva_kmd_buffer *in_buffer,
struct pva_kmd_buffer *out_buffer,
struct pva_kmd_cmdbuf_builder *cmdbuf_builder)
{
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_memory_register_in_args *args;
struct pva_kmd_register_out_args out_args = { 0 };
struct pva_kmd_device_memory *dev_mem;
struct pva_cmd_update_resource_table *update_cmd;
struct pva_resource_entry entry = { 0 };
uint8_t smmu_ctx_id;
uint32_t resource_id = 0;
if (!access_ok(out_buffer, sizeof(struct pva_kmd_register_out_args))) {
return PVA_INVAL;
}
if (!access_ok(in_buffer,
sizeof(struct pva_kmd_memory_register_in_args))) {
err = PVA_INVAL;
goto err_out;
}
args = read_data(in_buffer,
sizeof(struct pva_kmd_memory_register_in_args));
dev_mem = pva_kmd_device_memory_acquire(args->memory_handle,
args->offset, args->size, ctx);
if (dev_mem == NULL) {
err = PVA_NOMEM;
goto err_out;
}
if (args->segment == PVA_MEMORY_SEGMENT_R5) {
smmu_ctx_id = PVA_R5_SMMU_CONTEXT_ID;
} else {
smmu_ctx_id = ctx->smmu_ctx_id;
}
err = pva_kmd_device_memory_iova_map(dev_mem, ctx->pva,
args->access_flags, smmu_ctx_id);
if (err != PVA_SUCCESS) {
goto release;
}
if ((smmu_ctx_id == PVA_R5_SMMU_CONTEXT_ID) &&
(dev_mem->iova < FW_SHARED_MEMORY_START)) {
pva_kmd_log_err(
"Not able to map memory in the R5 shared region");
err = PVA_NOMEM;
goto unmap;
}
pva_kmd_mutex_lock(&ctx->resource_table_lock);
err = pva_kmd_add_dram_buffer_resource(&ctx->ctx_resource_table,
dev_mem, &resource_id);
pva_kmd_mutex_unlock(&ctx->resource_table_lock);
if (err != PVA_SUCCESS) {
goto unmap;
}
update_cmd =
pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd));
if (update_cmd == NULL) {
pva_kmd_log_err("Unable to reserve command buffer space");
err = PVA_NOMEM;
goto free_dram_buffer_resource;
}
err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id,
&entry);
if (err != PVA_SUCCESS) {
goto free_cmdbuf;
}
pva_kmd_set_cmd_update_resource_table(
update_cmd, ctx->resource_table_id, resource_id, &entry);
out_args.error = PVA_SUCCESS;
out_args.resource_id = resource_id;
write_data(out_buffer, &out_args, sizeof(out_args));
return err;
free_cmdbuf:
pva_kmd_cmdbuf_builder_cancel(cmdbuf_builder);
free_dram_buffer_resource:
pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id);
unmap:
pva_kmd_device_memory_iova_unmap(dev_mem);
release:
pva_kmd_device_memory_free(dev_mem);
err_out:
out_args.error = err;
write_data(out_buffer, &out_args, sizeof(out_args));
return err;
}
static enum pva_error pva_kmd_op_executable_register_async(
struct pva_kmd_context *ctx, struct pva_kmd_buffer *in_buffer,
struct pva_kmd_buffer *out_buffer,
struct pva_kmd_cmdbuf_builder *cmdbuf_builder)
{
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_executable_register_in_args *args;
struct pva_kmd_exec_register_out_args out_args = { 0 };
struct pva_cmd_update_resource_table *update_cmd;
struct pva_resource_entry entry = { 0 };
struct pva_kmd_resource_record *rec;
uint32_t num_symbols = 0;
void *exec_data;
uint32_t resource_id = 0;
if (!access_ok(out_buffer,
sizeof(struct pva_kmd_exec_register_out_args))) {
return PVA_INVAL;
}
if (!access_ok(in_buffer,
sizeof(struct pva_kmd_executable_register_in_args))) {
err = PVA_INVAL;
goto err_out;
}
args = read_data(in_buffer,
sizeof(struct pva_kmd_executable_register_in_args));
if (!access_ok(in_buffer, args->size)) {
err = PVA_INVAL;
goto err_out;
}
exec_data = read_data(in_buffer, args->size);
err = pva_kmd_verify_exectuable_hash(ctx->pva, (uint8_t *)exec_data,
args->size);
if (err != PVA_SUCCESS) {
goto err_out;
}
pva_kmd_mutex_lock(&ctx->resource_table_lock);
err = pva_kmd_add_vpu_bin_resource(&ctx->ctx_resource_table, exec_data,
args->size, &resource_id);
if (err == PVA_SUCCESS) {
rec = pva_kmd_use_resource(&ctx->ctx_resource_table,
resource_id);
ASSERT(rec != NULL);
num_symbols = rec->vpu_bin.symbol_table.n_symbols;
pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id);
}
pva_kmd_mutex_unlock(&ctx->resource_table_lock);
if (err != PVA_SUCCESS) {
goto err_out;
}
update_cmd =
pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd));
if (update_cmd == NULL) {
pva_kmd_log_err("Unable to reserve memory in command buffer");
err = PVA_NOMEM;
goto drop_resource;
}
ASSERT(update_cmd != NULL);
err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id,
&entry);
ASSERT(err == PVA_SUCCESS);
pva_kmd_set_cmd_update_resource_table(
update_cmd, ctx->resource_table_id, resource_id, &entry);
out_args.error = PVA_SUCCESS;
out_args.resource_id = resource_id;
out_args.num_symbols = num_symbols;
write_data(out_buffer, &out_args, sizeof(out_args));
return err;
drop_resource:
pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id);
err_out:
out_args.error = err;
write_data(out_buffer, &out_args, sizeof(out_args));
return err;
}
static enum pva_error
pva_kmd_op_dma_register_async(struct pva_kmd_context *ctx,
struct pva_kmd_buffer *in_buffer,
struct pva_kmd_buffer *out_buffer,
struct pva_kmd_cmdbuf_builder *cmdbuf_builder)
{
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_dma_config_register_in_args *args;
struct pva_kmd_register_out_args out_args = { 0 };
struct pva_cmd_update_resource_table *update_cmd;
struct pva_resource_entry entry = { 0 };
void *dma_cfg_data;
uint32_t dma_cfg_payload_size;
uint32_t resource_id = 0;
uint32_t dma_config_size = 0;
if (!access_ok(out_buffer, sizeof(struct pva_kmd_register_out_args))) {
return PVA_INVAL;
}
if (!access_ok(in_buffer,
sizeof(struct pva_kmd_dma_config_register_in_args))) {
return PVA_INVAL;
}
args = read_data(in_buffer,
sizeof(struct pva_kmd_dma_config_register_in_args));
dma_cfg_data = &args->dma_config_header;
dma_cfg_payload_size = in_buffer->size - in_buffer->offset;
// Discard the data we are about to pass to pva_kmd_add_dma_config_resource
read_data(in_buffer, dma_cfg_payload_size);
pva_kmd_mutex_lock(&ctx->resource_table_lock);
dma_config_size =
safe_addu32(dma_cfg_payload_size,
(uint32_t)sizeof(args->dma_config_header));
err = pva_kmd_add_dma_config_resource(&ctx->ctx_resource_table,
dma_cfg_data, dma_config_size,
&resource_id);
pva_kmd_mutex_unlock(&ctx->resource_table_lock);
if (err != PVA_SUCCESS) {
goto err_out;
}
update_cmd =
pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd));
if (update_cmd == NULL) {
err = PVA_NOMEM;
goto drop_dma_config;
}
err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id,
&entry);
ASSERT(err == PVA_SUCCESS);
pva_kmd_set_cmd_update_resource_table(
update_cmd, ctx->resource_table_id, resource_id, &entry);
out_args.error = PVA_SUCCESS;
out_args.resource_id = resource_id;
write_data(out_buffer, &out_args, sizeof(out_args));
return PVA_SUCCESS;
drop_dma_config:
pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id);
err_out:
out_args.error = err;
write_data(out_buffer, &out_args, sizeof(out_args));
/* Error is reported in the output buffer. So we return success here. */
return PVA_SUCCESS;
}
static enum pva_error
pva_kmd_op_unregister_async(struct pva_kmd_context *ctx,
struct pva_kmd_buffer *in_buffer,
struct pva_kmd_buffer *out_buffer,
struct pva_kmd_cmdbuf_builder *cmdbuf_builder)
{
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_unregister_in_args *args;
struct pva_cmd_unregister_resource *unreg_cmd;
if (!access_ok(in_buffer, sizeof(struct pva_kmd_unregister_in_args))) {
err = PVA_INVAL;
goto err_out;
}
args = read_data(in_buffer, sizeof(struct pva_kmd_unregister_in_args));
unreg_cmd =
pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*unreg_cmd));
if (unreg_cmd == NULL) {
pva_kmd_log_err(
"Unable to reserve memory for unregister command");
err = PVA_NOMEM;
goto err_out;
}
pva_kmd_set_cmd_unregister_resource(unreg_cmd, args->resource_id);
return PVA_SUCCESS;
err_out:
return err;
}
static enum pva_error pva_kmd_async_ops_handler(
struct pva_kmd_context *ctx, struct pva_fw_postfence *post_fence,
struct pva_kmd_buffer *in_arg, struct pva_kmd_buffer *out_arg)
{
struct pva_kmd_cmdbuf_builder cmdbuf_builder;
enum pva_error err = PVA_SUCCESS;
uint32_t wait_time = 0;
//first check if we have space in queue
while (pva_kmd_queue_space(&ctx->ctx_queue) == 0) {
pva_kmd_sleep_us(PVA_KMD_WAIT_FW_POLL_INTERVAL_US);
wait_time += PVA_KMD_WAIT_FW_POLL_INTERVAL_US;
if (wait_time > PVA_KMD_WAIT_FW_TIMEOUT_US) {
err = PVA_TIMEDOUT;
goto out;
}
}
err = pva_kmd_submitter_prepare(&ctx->submitter, &cmdbuf_builder);
if (err != PVA_SUCCESS) {
goto out;
}
while (access_ok(in_arg, sizeof(struct pva_kmd_op_header))) {
struct pva_kmd_op_header *header =
read_data(in_arg, sizeof(struct pva_kmd_op_header));
if (header->op_type >= PVA_KMD_OP_MAX) {
err = PVA_INVAL;
goto out;
}
switch (header->op_type) {
case PVA_KMD_OP_MEMORY_REGISTER:
err = pva_kmd_op_memory_register_async(
ctx, in_arg, out_arg, &cmdbuf_builder);
break;
case PVA_KMD_OP_EXECUTABLE_REGISTER:
err = pva_kmd_op_executable_register_async(
ctx, in_arg, out_arg, &cmdbuf_builder);
break;
case PVA_KMD_OP_DMA_CONFIG_REGISTER:
err = pva_kmd_op_dma_register_async(
ctx, in_arg, out_arg, &cmdbuf_builder);
break;
case PVA_KMD_OP_UNREGISTER:
err = pva_kmd_op_unregister_async(ctx, in_arg, out_arg,
&cmdbuf_builder);
break;
default:
err = PVA_INVAL;
break;
}
if (err != PVA_SUCCESS) {
break;
}
}
/* This fence comes from user, so set the flag to inform FW */
post_fence->flags |= PVA_FW_POSTFENCE_FLAGS_USER_FENCE;
err = pva_kmd_submitter_submit_with_fence(&ctx->submitter,
&cmdbuf_builder, post_fence);
ASSERT(err == PVA_SUCCESS);
out:
return err;
}
static enum pva_error pva_kmd_op_context_init(struct pva_kmd_context *ctx,
struct pva_kmd_buffer *in_buffer,
struct pva_kmd_buffer *out_buffer)
{
struct pva_kmd_context_init_in_args *ctx_init_args;
struct pva_kmd_context_init_out_args ctx_init_out = { 0 };
enum pva_error err;
if (!access_ok(in_buffer,
sizeof(struct pva_kmd_context_init_in_args))) {
return PVA_INVAL;
}
if (!access_ok(out_buffer,
sizeof(struct pva_kmd_context_init_out_args))) {
return PVA_INVAL;
}
ctx_init_args = read_data(in_buffer,
sizeof(struct pva_kmd_context_init_in_args));
err = pva_kmd_context_init(ctx, ctx_init_args->resource_table_capacity);
ctx_init_out.error = err;
ctx_init_out.ccq_shm_hdl = (uint64_t)ctx->ccq_shm_handle;
write_data(out_buffer, &ctx_init_out, sizeof(ctx_init_out));
return err;
}
static enum pva_error
pva_kmd_op_syncpt_register_async(struct pva_kmd_context *ctx,
struct pva_kmd_buffer *in_buffer,
struct pva_kmd_buffer *out_buffer,
struct pva_kmd_cmdbuf_builder *cmdbuf_builder)
{
enum pva_error err;
struct pva_syncpt_rw_info *syncpts;
struct pva_kmd_device_memory dev_mem;
uint32_t resource_id = 0;
struct pva_cmd_update_resource_table *update_cmd;
struct pva_resource_entry entry = { 0 };
struct pva_kmd_syncpt_register_out_args syncpt_register_out = { 0 };
/* Register RO syncpts */
dev_mem.iova = ctx->pva->syncpt_ro_iova;
dev_mem.va = 0;
dev_mem.size = ctx->pva->syncpt_offset * ctx->pva->num_syncpts;
dev_mem.pva = ctx->pva;
dev_mem.smmu_ctx_idx = PVA_R5_SMMU_CONTEXT_ID;
pva_kmd_mutex_lock(&ctx->resource_table_lock);
err = pva_kmd_add_syncpt_resource(&ctx->ctx_resource_table, &dev_mem,
&resource_id);
pva_kmd_mutex_unlock(&ctx->resource_table_lock);
if (err != PVA_SUCCESS) {
goto err_out;
}
syncpt_register_out.syncpt_ro_res_id = resource_id;
syncpt_register_out.num_ro_syncpoints = ctx->pva->num_syncpts;
update_cmd =
pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd));
ASSERT(update_cmd != NULL);
err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id,
&entry);
ASSERT(err == PVA_SUCCESS);
pva_kmd_set_cmd_update_resource_table(
update_cmd, ctx->resource_table_id, resource_id, &entry);
/* Register RW syncpts */
syncpts = (struct pva_syncpt_rw_info *)pva_kmd_get_block(
&ctx->pva->syncpt_allocator, ctx->syncpt_block_index);
ASSERT(syncpts != NULL);
for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS_PER_CONTEXT; i++) {
ctx->syncpt_ids[i] = syncpts[i].syncpt_id;
syncpt_register_out.synpt_ids[i] = syncpts[i].syncpt_id;
}
dev_mem.iova = syncpts[0].syncpt_iova;
dev_mem.va = 0;
dev_mem.size = ctx->pva->syncpt_offset * PVA_NUM_RW_SYNCPTS_PER_CONTEXT;
dev_mem.pva = ctx->pva;
dev_mem.smmu_ctx_idx = PVA_R5_SMMU_CONTEXT_ID;
pva_kmd_mutex_lock(&ctx->resource_table_lock);
err = pva_kmd_add_syncpt_resource(&ctx->ctx_resource_table, &dev_mem,
&resource_id);
pva_kmd_mutex_unlock(&ctx->resource_table_lock);
if (err != PVA_SUCCESS) {
goto err_out;
}
syncpt_register_out.syncpt_rw_res_id = resource_id;
syncpt_register_out.synpt_size = ctx->pva->syncpt_offset;
ctx->ctx_resource_table.syncpt_allocator = &ctx->pva->syncpt_allocator;
update_cmd =
pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd));
ASSERT(update_cmd != NULL);
err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id,
&entry);
ASSERT(err == PVA_SUCCESS);
pva_kmd_set_cmd_update_resource_table(
update_cmd, ctx->resource_table_id, resource_id, &entry);
err_out:
syncpt_register_out.error = err;
write_data(out_buffer, &syncpt_register_out,
sizeof(syncpt_register_out));
return err;
}
static enum pva_error pva_kmd_op_queue_create(struct pva_kmd_context *ctx,
struct pva_kmd_buffer *in_arg,
struct pva_kmd_buffer *out_arg)
{
struct pva_kmd_queue_create_in_args *queue_create_args;
struct pva_kmd_queue_create_out_args queue_out_args = { 0 };
uint32_t queue_id = PVA_INVALID_QUEUE_ID;
enum pva_error err = PVA_SUCCESS;
if (!access_ok(in_arg, sizeof(struct pva_kmd_queue_create_in_args))) {
return PVA_INVAL;
}
if (!access_ok(out_arg, sizeof(struct pva_kmd_queue_create_out_args))) {
return PVA_INVAL;
}
queue_create_args =
read_data(in_arg, sizeof(struct pva_kmd_queue_create_in_args));
queue_out_args.error =
pva_kmd_queue_create(ctx, queue_create_args, &queue_id);
if (queue_out_args.error == PVA_SUCCESS) {
queue_out_args.queue_id = queue_id;
}
if (queue_id >= PVA_MAX_NUM_QUEUES_PER_CONTEXT) {
pva_kmd_log_err("pva_kmd_op_queue_create invalid queue id");
err = PVA_INVAL;
goto err_out;
}
pva_kmd_read_syncpt_val(ctx->pva, ctx->syncpt_ids[queue_id],
&queue_out_args.syncpt_fence_counter);
write_data(out_arg, &queue_out_args,
sizeof(struct pva_kmd_queue_create_out_args));
err_out:
return err;
}
static enum pva_error pva_kmd_op_queue_destroy(struct pva_kmd_context *ctx,
struct pva_kmd_buffer *in_arg,
struct pva_kmd_buffer *out_arg)
{
struct pva_kmd_queue_destroy_in_args *queue_destroy_args;
struct pva_kmd_queue_destroy_out_args queue_out_args = { 0 };
if (!access_ok(in_arg, sizeof(struct pva_kmd_queue_destroy_in_args))) {
return PVA_INVAL;
}
if (!access_ok(out_arg,
sizeof(struct pva_kmd_queue_destroy_out_args))) {
return PVA_INVAL;
}
queue_destroy_args =
read_data(in_arg, sizeof(struct pva_kmd_queue_destroy_in_args));
queue_out_args.error = pva_kmd_queue_destroy(ctx, queue_destroy_args);
write_data(out_arg, &queue_out_args,
sizeof(struct pva_kmd_queue_destroy_out_args));
return PVA_SUCCESS;
}
static enum pva_error
pva_kmd_op_executable_get_symbols(struct pva_kmd_context *ctx,
struct pva_kmd_buffer *in_arg,
struct pva_kmd_buffer *out_arg)
{
struct pva_kmd_executable_get_symbols_in_args *sym_in_args;
struct pva_kmd_executable_get_symbols_out_args sym_out_args = { 0 };
struct pva_kmd_resource_record *rec;
enum pva_error err = PVA_SUCCESS;
uint32_t table_size = 0;
uint32_t size = 0;
if (!access_ok(in_arg,
sizeof(struct pva_kmd_executable_get_symbols_in_args))) {
return PVA_INVAL;
}
if (!access_ok(out_arg,
sizeof(struct pva_kmd_executable_get_symbols_out_args))) {
return PVA_INVAL;
}
sym_in_args = read_data(
in_arg, sizeof(struct pva_kmd_executable_get_symbols_in_args));
rec = pva_kmd_use_resource(&ctx->ctx_resource_table,
sym_in_args->exec_resource_id);
if (rec == NULL) {
err = PVA_INVAL;
pva_kmd_log_err("pva_kmd_use_resource failed");
goto err_out;
}
if (rec->type != PVA_RESOURCE_TYPE_EXEC_BIN) {
err = PVA_INVAL;
pva_kmd_log_err("Not an executable resource");
goto err_drop;
}
table_size = safe_mulu32(rec->vpu_bin.symbol_table.n_symbols,
sizeof(struct pva_symbol_info));
size = safe_addu32(
table_size,
sizeof(struct pva_kmd_executable_get_symbols_out_args));
if (!access_ok(out_arg, size)) {
err = PVA_INVAL;
goto err_drop;
}
sym_out_args.error = err;
sym_out_args.num_symbols = rec->vpu_bin.symbol_table.n_symbols;
write_data(out_arg, &sym_out_args, sizeof(sym_out_args));
write_data(out_arg, rec->vpu_bin.symbol_table.symbols, table_size);
pva_kmd_drop_resource(&ctx->ctx_resource_table,
sym_in_args->exec_resource_id);
return PVA_SUCCESS;
err_drop:
pva_kmd_drop_resource(&ctx->ctx_resource_table,
sym_in_args->exec_resource_id);
err_out:
sym_out_args.error = err;
write_data(out_arg, &sym_out_args, sizeof(sym_out_args));
return err;
}
typedef enum pva_error (*pva_kmd_async_op_func_t)(
struct pva_kmd_context *ctx, struct pva_kmd_buffer *in_buffer,
struct pva_kmd_buffer *out_buffer,
struct pva_kmd_cmdbuf_builder *cmdbuf_builder);
static enum pva_error
pva_kmd_op_synced_submit(struct pva_kmd_context *ctx,
struct pva_kmd_buffer *in_buffer,
struct pva_kmd_buffer *out_buffer,
pva_kmd_async_op_func_t async_op_func)
{
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_cmdbuf_builder cmdbuf_builder;
uint32_t fence_val;
err = pva_kmd_submitter_prepare(&ctx->submitter, &cmdbuf_builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = async_op_func(ctx, in_buffer, out_buffer, &cmdbuf_builder);
if (err != PVA_SUCCESS) {
goto cancel_submit;
}
err = pva_kmd_submitter_submit(&ctx->submitter, &cmdbuf_builder,
&fence_val);
/* TODO: handle this error */
ASSERT(err == PVA_SUCCESS);
err = pva_kmd_submitter_wait(&ctx->submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
/* TODO: handle this error when FW reboot is supported */
ASSERT(err == PVA_SUCCESS);
return PVA_SUCCESS;
cancel_submit:
pva_kmd_cmdbuf_builder_cancel(&cmdbuf_builder);
err_out:
return err;
}
static enum pva_error pva_kmd_sync_ops_handler(struct pva_kmd_context *ctx,
struct pva_kmd_buffer *in_arg,
struct pva_kmd_buffer *out_arg)
{
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_op_header *header;
if (!access_ok(in_arg, sizeof(struct pva_kmd_op_header))) {
err = PVA_INVAL;
goto out;
}
header = read_data(in_arg, sizeof(struct pva_kmd_op_header));
switch (header->op_type) {
case PVA_KMD_OP_CONTEXT_INIT:
err = pva_kmd_op_context_init(ctx, in_arg, out_arg);
break;
case PVA_KMD_OP_QUEUE_CREATE:
err = pva_kmd_op_queue_create(ctx, in_arg, out_arg);
break;
case PVA_KMD_OP_QUEUE_DESTROY:
err = pva_kmd_op_queue_destroy(ctx, in_arg, out_arg);
break;
case PVA_KMD_OP_EXECUTABLE_GET_SYMBOLS:
err = pva_kmd_op_executable_get_symbols(ctx, in_arg, out_arg);
break;
case PVA_KMD_OP_MEMORY_REGISTER:
err = pva_kmd_op_synced_submit(
ctx, in_arg, out_arg, pva_kmd_op_memory_register_async);
break;
case PVA_KMD_OP_SYNPT_REGISTER:
err = pva_kmd_op_synced_submit(
ctx, in_arg, out_arg, pva_kmd_op_syncpt_register_async);
break;
case PVA_KMD_OP_EXECUTABLE_REGISTER:
err = pva_kmd_op_synced_submit(
ctx, in_arg, out_arg,
pva_kmd_op_executable_register_async);
break;
case PVA_KMD_OP_DMA_CONFIG_REGISTER:
err = pva_kmd_op_synced_submit(ctx, in_arg, out_arg,
pva_kmd_op_dma_register_async);
break;
case PVA_KMD_OP_UNREGISTER:
err = pva_kmd_op_synced_submit(ctx, in_arg, out_arg,
pva_kmd_op_unregister_async);
break;
default:
err = PVA_INVAL;
break;
}
out:
return err;
}
enum pva_error pva_kmd_ops_handler(struct pva_kmd_context *ctx,
void const *ops_buffer, uint32_t ops_size,
void *response,
uint32_t response_buffer_size,
uint32_t *out_response_size)
{
struct pva_kmd_operations *ops;
struct pva_kmd_buffer in_buffer = { 0 }, out_buffer = { 0 };
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_response_header *resp_hdr;
in_buffer.base = ops_buffer;
in_buffer.size = ops_size;
out_buffer.base = response;
out_buffer.size = response_buffer_size;
if (!access_ok(&in_buffer, sizeof(struct pva_kmd_operations))) {
err = PVA_INVAL;
goto out;
}
if (!access_ok(&out_buffer, sizeof(struct pva_kmd_response_header))) {
err = PVA_INVAL;
goto out;
}
resp_hdr =
read_data(&out_buffer, sizeof(struct pva_kmd_response_header));
ops = read_data(&in_buffer, sizeof(struct pva_kmd_operations));
if (ops->mode == PVA_KMD_OPS_MODE_SYNC) {
/* Process one sync operation */
err = pva_kmd_sync_ops_handler(ctx, &in_buffer, &out_buffer);
} else {
/* Process async operations:
* - memory register
* - executable register
* - DMA configuration registration
* - unregister
*/
err = pva_kmd_async_ops_handler(ctx, &ops->postfence,
&in_buffer, &out_buffer);
}
//Update the size of the responses in the response header.
// This size also include the header size.
resp_hdr->rep_size = out_buffer.offset;
out:
*out_response_size = out_buffer.offset;
return err;
}

View File

@@ -0,0 +1,44 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_OP_HANDLER_H
#define PVA_KMD_OP_HANDLER_H
#include "pva_kmd_context.h"
#include "pva_fw.h"
#include "pva_kmd.h"
/** @brief Handler for PVA KMD operations.
*
* This function implements the only runtime interface with UMD. Shim layers
* receive the input data from UMD and call this function to execute the
* operations. Then, shim layers send the response back to UMD.
*
* @param ctx The KMD context.
* @param ops Pointer to the input buffer containing the operations to be
* executed. The common layer assumes that this buffer is private to
* KMD and will dereference it directly without making a copy.
* Specifically on Linux, this parameter should point to a private
* kernel space buffer instead of the user space buffer.
* @param ops_size Size of the input buffer.
* @param response Pointer to the buffer where the response will be written.
* @param response_buffer_size Size of the response buffer.
* @param out_response_size Pointer to a variable where the actual size of the
* response will be written.
*
* @return pva_error indicating the success or failure of the operation.
*/
enum pva_error pva_kmd_ops_handler(struct pva_kmd_context *ctx, void const *ops,
uint32_t ops_size, void *response,
uint32_t response_buffer_size,
uint32_t *out_response_size);
#endif // PVA_KMD_OP_HANDLER_H

View File

@@ -0,0 +1,210 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_utils.h"
#include "pva_fw.h"
#include "pva_kmd_device_memory.h"
#include "pva_kmd_device.h"
#include "pva_kmd_queue.h"
#include "pva_kmd_context.h"
#include "pva_kmd_block_allocator.h"
#include "pva_utils.h"
#include "pva_kmd_device.h"
#include "pva_kmd_constants.h"
#include "pva_kmd_pm.h"
enum pva_error pva_kmd_prepare_suspend(struct pva_kmd_device *pva)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &pva->submitter;
enum pva_error err = PVA_SUCCESS;
struct pva_cmd_suspend_fw *fw_suspend;
uint32_t fence_val;
pva_kmd_mutex_lock(&pva->powercycle_lock);
if (pva->refcount == 0u) {
pva_dbg_printf("PVA: Nothing to prepare for suspend");
err = PVA_SUCCESS;
goto err_out;
}
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"PVA: Prepare submitter for FW suspend command failed\n");
goto err_out;
}
//Build args
fw_suspend = pva_kmd_reserve_cmd_space(&builder, sizeof(*fw_suspend));
if (fw_suspend == NULL) {
pva_kmd_log_err(
"PVA: Memory alloc for FW suspend command failed\n");
err = PVA_NOMEM;
goto cancel_submit;
}
pva_kmd_set_cmd_suspend_fw(fw_suspend);
//Submit
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"PVA: Submission for FW suspend command failed\n");
goto cancel_submit;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"PVA: Waiting for FW timed out when preparing for suspend state\n");
goto err_out;
}
cancel_submit:
pva_kmd_cmdbuf_builder_cancel(&builder);
err_out:
pva_kmd_mutex_unlock(&pva->powercycle_lock);
return err;
}
enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &pva->submitter;
struct pva_cmd_init_resource_table *res_cmd;
struct pva_cmd_init_queue *queue_cmd;
struct pva_cmd_resume_fw *fw_resume;
enum pva_error err;
uint32_t fence_val;
struct pva_kmd_queue *queue;
pva_kmd_mutex_lock(&pva->powercycle_lock);
if (pva->refcount == 0u) {
pva_dbg_printf(
"PVA : Nothing to check for completion in resume");
err = PVA_SUCCESS;
goto err_out;
}
pva_kmd_send_resource_table_info_by_ccq(pva, &pva->dev_resource_table);
pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue);
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"PVA: Prepare submitter for FW resume command failed\n");
goto err_out;
}
fw_resume = pva_kmd_reserve_cmd_space(&builder, sizeof(*fw_resume));
if (fw_resume == NULL) {
pva_kmd_log_err(
"PVA: Memory alloc for FW resume command failed\n");
err = PVA_NOMEM;
goto cancel_builder;
}
pva_kmd_set_cmd_resume_fw(fw_resume);
for (uint8_t i = 0; i < pva->max_n_contexts; i++) {
struct pva_kmd_context *ctx = pva_kmd_get_context(
pva, sat_add8(i, PVA_KMD_USER_CONTEXT_ID_BASE));
if (ctx != NULL) {
/**Initialize resource table */
res_cmd = pva_kmd_reserve_cmd_space(&builder,
sizeof(*res_cmd));
if (res_cmd == NULL) {
pva_kmd_log_err(
"PVA: Memory alloc for context registration in FW resume command failed\n");
err = PVA_NOMEM;
goto cancel_builder;
}
pva_dbg_printf(
"PVA: Resume init resource table for context %d\n",
ctx->ccq_id);
pva_kmd_set_cmd_init_resource_table(
res_cmd, ctx->resource_table_id,
ctx->ctx_resource_table.table_mem->iova,
ctx->ctx_resource_table.n_entries);
queue_cmd = pva_kmd_reserve_cmd_space(
&builder, sizeof(*queue_cmd));
if (queue_cmd == NULL) {
pva_kmd_log_err(
"PVA: Memory alloc for queue registration in FW resume command failed\n");
err = PVA_NOMEM;
goto cancel_builder;
}
pva_dbg_printf(
"PVA: Resume priv queue for context %d\n",
ctx->ccq_id);
pva_kmd_set_cmd_init_queue(
queue_cmd, PVA_PRIV_CCQ_ID,
ctx->ccq_id, /* For privileged queues, queue ID == user CCQ ID*/
ctx->ctx_queue.queue_memory->iova,
ctx->ctx_queue.max_num_submit);
/**Initialize resource table */
for (uint32_t j = 0; j < ctx->max_n_queues; j++) {
queue = pva_kmd_get_block(&ctx->queue_allocator,
j);
if (queue != NULL) {
pva_dbg_printf(
"PVA: Resume queue for context %d, queue %d\n",
queue->ccq_id, queue->queue_id);
queue_cmd = pva_kmd_reserve_cmd_space(
&builder, sizeof(*queue_cmd));
if (queue_cmd == NULL) {
pva_kmd_log_err(
"PVA: Memory alloc for queue registration in FW resume command failed\n");
err = PVA_NOMEM;
goto cancel_builder;
}
pva_kmd_set_cmd_init_queue(
queue_cmd, queue->ccq_id,
queue->queue_id,
queue->queue_memory->iova,
queue->max_num_submit);
}
}
}
}
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
// Error is either QUEUE_FULL or TIMEDOUT
pva_kmd_log_err(
"PVA: Submission for FW resume command failed\n");
goto cancel_builder;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when resuming from suspend state");
goto err_out;
}
cancel_builder:
pva_kmd_cmdbuf_builder_cancel(&builder);
err_out:
pva_kmd_mutex_unlock(&pva->powercycle_lock);
return err;
}

View File

@@ -0,0 +1,19 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_PM_H
#define PVA_KMD_PM_H
struct pva_kmd_device;
enum pva_error pva_kmd_prepare_suspend(struct pva_kmd_device *pva);
enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva);
#endif

View File

@@ -0,0 +1,252 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_utils.h"
#include "pva_fw.h"
#include "pva_kmd_device_memory.h"
#include "pva_kmd_queue.h"
#include "pva_kmd_context.h"
#include "pva_kmd_block_allocator.h"
#include "pva_utils.h"
#include "pva_kmd_device.h"
#include "pva_kmd_constants.h"
void pva_kmd_queue_init(struct pva_kmd_queue *queue, struct pva_kmd_device *pva,
uint8_t ccq_id, uint8_t queue_id,
pva_kmd_mutex_t *ccq_lock,
struct pva_kmd_device_memory *queue_memory,
uint32_t max_num_submit)
{
queue->pva = pva;
queue->queue_memory = queue_memory;
queue->ccq_id = ccq_id;
queue->queue_id = queue_id;
queue->max_num_submit = max_num_submit;
queue->queue_header = queue_memory->va;
queue->ccq_lock = ccq_lock;
}
uint32_t pva_kmd_queue_space(struct pva_kmd_queue *queue)
{
uint32_t head = queue->queue_header->cb_head;
uint32_t tail = queue->queue_header->cb_tail;
uint32_t size = queue->max_num_submit;
return pva_fw_queue_space(head, tail, size);
}
enum pva_error
pva_kmd_queue_submit(struct pva_kmd_queue *queue,
struct pva_fw_cmdbuf_submit_info const *submit_info)
{
uint32_t head = queue->queue_header->cb_head;
uint32_t tail = queue->queue_header->cb_tail;
uint32_t size = queue->max_num_submit;
uint64_t ccq_entry;
enum pva_error err;
struct pva_fw_cmdbuf_submit_info *items = pva_offset_pointer(
queue->queue_header, sizeof(*queue->queue_header));
if (pva_fw_queue_space(head, tail, size) == 0) {
return PVA_QUEUE_FULL;
}
items[tail] = *submit_info;
/* Update tail */
tail = wrap_add(tail, 1, size);
ccq_entry =
PVA_INSERT64(PVA_FW_CCQ_OP_UPDATE_TAIL, PVA_FW_CCQ_OPCODE_MSB,
PVA_FW_CCQ_OPCODE_LSB) |
PVA_INSERT64(queue->queue_id, PVA_FW_CCQ_QUEUE_ID_MSB,
PVA_FW_CCQ_QUEUE_ID_LSB) |
PVA_INSERT64(tail, PVA_FW_CCQ_TAIL_MSB, PVA_FW_CCQ_TAIL_LSB);
pva_kmd_mutex_lock(queue->ccq_lock);
/* TODO: memory write barrier is needed here */
err = pva_kmd_ccq_push_with_timeout(queue->pva, queue->ccq_id,
ccq_entry,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err == PVA_SUCCESS) {
queue->queue_header->cb_tail = tail;
}
pva_kmd_mutex_unlock(queue->ccq_lock);
return err;
}
void pva_kmd_queue_deinit(struct pva_kmd_queue *queue)
{
queue->queue_memory = NULL;
queue->ccq_id = PVA_INVALID_QUEUE_ID;
queue->max_num_submit = 0;
}
static enum pva_error notify_fw_queue_deinit(struct pva_kmd_context *ctx,
struct pva_kmd_queue *queue)
{
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_cmdbuf_builder builder;
struct pva_cmd_deinit_queue *queue_cmd;
uint32_t fence_val;
err = pva_kmd_submitter_prepare(&ctx->submitter, &builder);
if (err != PVA_SUCCESS) {
goto end;
}
queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd));
if (queue_cmd == NULL) {
err = PVA_NOMEM;
goto cancel_submitter;
}
pva_kmd_set_cmd_deinit_queue(queue_cmd, queue->ccq_id, queue->queue_id);
err = pva_kmd_submitter_submit(&ctx->submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
goto cancel_submitter;
}
err = pva_kmd_submitter_wait(&ctx->submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
ASSERT(err == PVA_SUCCESS);
return PVA_SUCCESS;
cancel_submitter:
pva_kmd_cmdbuf_builder_cancel(&builder);
end:
return err;
}
enum pva_error
pva_kmd_queue_create(struct pva_kmd_context *ctx,
struct pva_kmd_queue_create_in_args *in_args,
uint32_t *queue_id)
{
struct pva_kmd_device_memory *submission_mem_kmd = NULL;
struct pva_kmd_queue *queue = NULL;
struct pva_kmd_cmdbuf_builder builder;
struct pva_cmd_init_queue *queue_cmd;
uint32_t fence_val;
enum pva_error err, tmperr;
queue = pva_kmd_zalloc_block(&ctx->queue_allocator, queue_id);
if (queue == NULL) {
err = PVA_NOMEM;
goto err_out;
}
/* Get handle from mapped memory */
submission_mem_kmd = pva_kmd_device_memory_acquire(
in_args->queue_memory_handle, in_args->queue_memory_offset,
pva_get_submission_queue_memory_size(
in_args->max_submission_count),
ctx);
if (submission_mem_kmd == NULL) {
err = PVA_INVAL;
goto err_free_queue;
}
pva_kmd_queue_init(queue, ctx->pva, ctx->ccq_id, *queue_id,
&ctx->ccq_lock, submission_mem_kmd,
in_args->max_submission_count);
/* Get device mapped IOVA to share with FW */
err = pva_kmd_device_memory_iova_map(submission_mem_kmd, ctx->pva,
PVA_ACCESS_RW,
PVA_R5_SMMU_CONTEXT_ID);
if (err != PVA_SUCCESS) {
goto err_free_kmd_memory;
}
if (submission_mem_kmd->iova < FW_SHARED_MEMORY_START) {
pva_kmd_log_err(
"Not able to map memory in the R5 shared region");
err = PVA_NOMEM;
goto unmap_iova;
}
err = pva_kmd_submitter_prepare(&ctx->submitter, &builder);
if (err != PVA_SUCCESS) {
goto unmap_iova;
}
queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd));
if (queue_cmd == NULL) {
err = PVA_NOMEM;
goto cancel_submitter;
}
ASSERT(queue_cmd != NULL);
pva_kmd_set_cmd_init_queue(queue_cmd, queue->ccq_id, queue->queue_id,
queue->queue_memory->iova,
queue->max_num_submit);
err = pva_kmd_submitter_submit(&ctx->submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
goto cancel_submitter;
}
err = pva_kmd_submitter_wait(&ctx->submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
goto cancel_submitter;
}
return PVA_SUCCESS;
cancel_submitter:
pva_kmd_cmdbuf_builder_cancel(&builder);
unmap_iova:
pva_kmd_device_memory_iova_unmap(submission_mem_kmd);
err_free_kmd_memory:
pva_kmd_device_memory_free(queue->queue_memory);
pva_kmd_queue_deinit(queue);
err_free_queue:
tmperr = pva_kmd_free_block(&ctx->queue_allocator, *queue_id);
ASSERT(tmperr == PVA_SUCCESS);
*queue_id = PVA_INVALID_QUEUE_ID;
err_out:
return err;
}
enum pva_error
pva_kmd_queue_destroy(struct pva_kmd_context *ctx,
struct pva_kmd_queue_destroy_in_args *in_args)
{
struct pva_kmd_queue *queue;
enum pva_error err = PVA_SUCCESS;
/*
* TODO :
* Send command to FW to stop queue usage. Wait for ack.
* This call needs to be added after syncpoint and ccq functions are ready.
*/
queue = pva_kmd_get_block(&ctx->queue_allocator, in_args->queue_id);
if (queue == NULL) {
return PVA_INVAL;
}
err = notify_fw_queue_deinit(ctx, queue);
if (err != PVA_SUCCESS) {
return err;
}
pva_kmd_device_memory_iova_unmap(queue->queue_memory);
pva_kmd_device_memory_free(queue->queue_memory);
pva_kmd_queue_deinit(queue);
err = pva_kmd_free_block(&ctx->queue_allocator, in_args->queue_id);
ASSERT(err == PVA_SUCCESS);
return PVA_SUCCESS;
}

View File

@@ -0,0 +1,48 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_QUEUE_H
#define PVA_KMD_QUEUE_H
#include "pva_fw.h"
#include "pva_kmd_device_memory.h"
#include "pva_kmd_mutex.h"
struct pva_kmd_queue {
struct pva_kmd_device *pva;
struct pva_kmd_device_memory *queue_memory;
struct pva_fw_submit_queue_header *queue_header;
pva_kmd_mutex_t *ccq_lock;
uint8_t ccq_id;
uint8_t queue_id;
uint32_t max_num_submit;
};
void pva_kmd_queue_init(struct pva_kmd_queue *queue, struct pva_kmd_device *pva,
uint8_t ccq_id, uint8_t queue_id,
pva_kmd_mutex_t *ccq_lock,
struct pva_kmd_device_memory *queue_memory,
uint32_t max_num_submit);
enum pva_error
pva_kmd_queue_create(struct pva_kmd_context *ctx,
struct pva_kmd_queue_create_in_args *in_args,
uint32_t *queue_id);
enum pva_error
pva_kmd_queue_destroy(struct pva_kmd_context *ctx,
struct pva_kmd_queue_destroy_in_args *in_args);
enum pva_error
pva_kmd_queue_submit(struct pva_kmd_queue *queue,
struct pva_fw_cmdbuf_submit_info const *submit_info);
uint32_t pva_kmd_queue_space(struct pva_kmd_queue *queue);
void pva_kmd_queue_deinit(struct pva_kmd_queue *queue);
#endif // PVA_KMD_QUEUE_H

View File

@@ -0,0 +1,135 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_REGS_H
#define PVA_KMD_REGS_H
#include "pva_api.h"
#include "pva_constants.h"
/* Exception vectors */
#define PVA_REG_EVP_RESET_ADDR 0x20
#define PVA_REG_EVP_UNDEF_ADDR 0x24
#define PVA_REG_EVP_SWI_ADDR 0x28
#define PVA_REG_EVP_PREFETCH_ABORT_ADDR 0x2c
#define PVA_REG_EVP_DATA_ABORT_ADDR 0x30
#define PVA_REG_EVP_RSVD_ADDR 0x34
#define PVA_REG_EVP_IRQ_ADDR 0x38
#define PVA_REG_EVP_FIQ_ADDR 0x3c
/* R5 */
#define PVA_REG_PROC_CPUHALT_ADDR 0x30000
/* SCRs */
#define PVA_SEC_SCR_SECEXT_INTR_EVENT 0x28804
#define PVA_PROC_SCR_PROC 0x30800
#define PVA_REG_EVP_SCR_ADDR 0x40 //PVA_EVP_SCR_EVP_0
#define PVA_CFG_SCR_STATUS_CNTL 0x258000 //PVA_CFG_SCR_STATUS_CNTL_0
#define PVA_CFG_SCR_PRIV 0x258008 //PVA_CFG_SCR_PRIV_0
#define PVA_CFG_SCR_CCQ_CNTL 0x258010 //PVA_CFG_SCR_CCQ_CNTL_0
/* HSP */
#define PVA_REG_HSP_COMMON_ADDR 0x160000
#define PVA_REG_HSP_INT_IE0_ADDR 0x160100
#define PVA_REG_HSP_INT_IE1_ADDR 0x160104
#define PVA_REG_HSP_INT_IE2_ADDR 0x160108
#define PVA_REG_HSP_INT_IE3_ADDR 0x16010c
#define PVA_REG_HSP_INT_IE4_ADDR 0x160110
#define PVA_REG_HSP_INT_EXTERNAL_ADDR 0x160300
#define PVA_REG_HSP_INT_INTERNAL_ADDR 0x160304
#define PVA_REG_HSP_SM0_ADDR 0x170000
#define PVA_REG_HSP_SM1_ADDR 0x178000
#define PVA_REG_HSP_SM2_ADDR 0x180000
#define PVA_REG_HSP_SM3_ADDR 0x188000
#define PVA_REG_HSP_SM4_ADDR 0x190000
#define PVA_REG_HSP_SM5_ADDR 0x198000
#define PVA_REG_HSP_SM6_ADDR 0x1a0000
#define PVA_REG_HSP_SM7_ADDR 0x1a8000
#define PVA_REG_HSP_SS0_STATE_ADDR 0x1b0000
#define PVA_REG_HSP_SS0_SET_ADDR 0x1b0004
#define PVA_REG_HSP_SS0_CLR_ADDR 0x1b0008
#define PVA_REG_HSP_SS1_STATE_ADDR 0x1c0000
#define PVA_REG_HSP_SS1_SET_ADDR 0x1c0004
#define PVA_REG_HSP_SS1_CLR_ADDR 0x1c0008
#define PVA_REG_HSP_SS2_STATE_ADDR 0x1d0000
#define PVA_REG_HSP_SS2_SET_ADDR 0x1d0004
#define PVA_REG_HSP_SS2_CLR_ADDR 0x1d0008
#define PVA_REG_HSP_SS3_STATE_ADDR 0x1e0000
#define PVA_REG_HSP_SS3_SET_ADDR 0x1e0004
#define PVA_REG_HSP_SS3_CLR_ADDR 0x1e0008
/* SEC */
#define PVA_REG_SEC_ERRSLICE0_MISSIONERR_ENABLE_ADDR 0x20030
#define PVA_REG_SEC_ERRSLICE1_MISSIONERR_ENABLE_ADDR 0x20060
#define PVA_REG_SEC_ERRSLICE2_MISSIONERR_ENABLE_ADDR 0x20090
#define PVA_REG_SEC_ERRSLICE3_MISSIONERR_ENABLE_ADDR 0x200c0
#define PVA_REG_SEC_ERRSLICE0_LATENTERR_ENABLE_ADDR 0x20040
#define PVA_REG_SEC_ERRSLICE1_LATENTERR_ENABLE_ADDR 0x20070
#define PVA_REG_SEC_ERRSLICE2_LATENTERR_ENABLE_ADDR 0x200a0
#define PVA_REG_SEC_ERRSLICE3_LATENTERR_ENABLE_ADDR 0x200d0
/* SEC_LIC_INTR_STATUS */
#define PVA_REG_SEC_LIC_INTR_H1X_MSB 7
#define PVA_REG_SEC_LIC_INTR_H1X_LSB 5
#define PVA_REG_SEC_LIC_INTR_HSP_MSB 4
#define PVA_REG_SEC_LIC_INTR_HSP_LSB 1
#define PVA_REG_SEC_LIC_INTR_WDT_MSB 0
#define PVA_REG_SEC_LIC_INTR_WDT_LSB 0
/* CCQ status 2 */
#define PVA_REG_CCQ_STATUS2_INTR_OVERFLOW_BIT PVA_BIT(28)
#define PVA_REG_CCQ_STATUS2_INTR_STATUS8_BIT PVA_BIT(24)
#define PVA_REG_CCQ_STATUS2_INTR_STATUS7_BIT PVA_BIT(20)
#define PVA_REG_CCQ_STATUS2_INTR_ALL_BITS \
(PVA_REG_CCQ_STATUS2_INTR_OVERFLOW_BIT | \
PVA_REG_CCQ_STATUS2_INTR_STATUS8_BIT | \
PVA_REG_CCQ_STATUS2_INTR_STATUS7_BIT)
#define PVA_REG_CCQ_STATUS2_NUM_ENTRIES_MSB 4
#define PVA_REG_CCQ_STATUS2_NUM_ENTRIES_LSB 0
struct pva_kmd_ccq_regspec {
uint32_t status_count;
uint32_t status[PVA_CFG_CCQ_STATUS_COUNT];
uint32_t fifo;
};
struct pva_kmd_regspec {
uint32_t sec_lic_intr_enable;
uint32_t sec_lic_intr_status;
uint32_t cfg_r5user_lsegreg;
uint32_t cfg_r5user_usegreg;
uint32_t cfg_priv_ar1_lsegreg;
uint32_t cfg_priv_ar1_usegreg;
uint32_t cfg_priv_ar2_lsegreg;
uint32_t cfg_priv_ar2_usegreg;
uint32_t cfg_priv_ar1_start;
uint32_t cfg_priv_ar1_end;
uint32_t cfg_priv_ar2_start;
uint32_t cfg_priv_ar2_end;
uint32_t cfg_user_sid_base;
uint32_t cfg_priv_sid;
uint32_t cfg_vps_sid;
uint32_t cfg_perf_mon;
uint32_t cfg_scr_priv_0;
uint32_t ccq_count;
uint32_t vpu_dbg_instr_reg_offset[PVA_NUM_ENGINES];
struct pva_kmd_ccq_regspec ccq_regs[PVA_MAX_NUM_CCQ];
};
enum pva_kmd_reg_aperture {
PVA_KMD_APERTURE_PVA_CLUSTER = 0,
PVA_KMD_APERTURE_VPU_DEBUG,
PVA_KMD_APERTURE_COUNT,
};
#endif // PVA_KMD_REGS_H

View File

@@ -0,0 +1,477 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_resource_table.h"
#include "pva_kmd_device.h"
#include "pva_kmd_constants.h"
static uint32_t get_max_dma_config_size(struct pva_kmd_device *pva)
{
uint32_t max_num_dyn_slots = PVA_DMA_MAX_NUM_SLOTS;
uint32_t max_num_reloc_infos =
safe_pow2_roundup_u32(max_num_dyn_slots, 2U);
uint32_t max_dma_cfg_size =
(uint32_t)sizeof(struct pva_dma_config_resource);
max_dma_cfg_size = safe_addu32(
max_dma_cfg_size,
safe_mulu32(max_num_dyn_slots,
(uint32_t)sizeof(struct pva_fw_dma_slot)));
max_dma_cfg_size = safe_addu32(
max_dma_cfg_size,
safe_mulu32(max_num_reloc_infos,
(uint32_t)sizeof(struct pva_fw_dma_reloc)));
max_dma_cfg_size = safe_addu32(
max_dma_cfg_size,
safe_mulu32(pva->hw_consts.n_user_dma_channels,
(uint32_t)sizeof(struct pva_dma_channel)));
max_dma_cfg_size = safe_addu32(
max_dma_cfg_size,
safe_mulu32(pva->hw_consts.n_dma_descriptors,
(uint32_t)sizeof(struct pva_dma_descriptor)));
max_dma_cfg_size = safe_addu32(max_dma_cfg_size,
safe_mulu32(pva->hw_consts.n_hwseq_words,
(uint32_t)sizeof(uint32_t)));
//Must be aligned to 8 to form array
return safe_pow2_roundup_u32(max_dma_cfg_size,
(uint32_t)sizeof(uint64_t));
}
enum pva_error
pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table,
struct pva_kmd_device *pva,
uint8_t user_smmu_ctx_id, uint32_t n_entries,
uint32_t max_num_dma_configs)
{
uint32_t max_dma_config_size = get_max_dma_config_size(pva);
enum pva_error err;
uint64_t size;
res_table->pva = pva;
res_table->n_entries = n_entries;
res_table->user_smmu_ctx_id = user_smmu_ctx_id;
size = (uint64_t)safe_mulu32(
n_entries, (uint32_t)sizeof(struct pva_resource_entry));
res_table->table_mem = pva_kmd_device_memory_alloc_map(
size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
ASSERT(res_table->table_mem != NULL);
pva_kmd_sema_init(&res_table->resource_semaphore, n_entries);
size = (uint64_t)safe_mulu32(sizeof(struct pva_kmd_resource_record),
n_entries);
res_table->records_mem = pva_kmd_zalloc(size);
ASSERT(res_table->records_mem != NULL);
err = pva_kmd_block_allocator_init(
&res_table->resource_record_allocator, res_table->records_mem,
PVA_RESOURCE_ID_BASE, sizeof(struct pva_kmd_resource_record),
n_entries);
ASSERT(err == PVA_SUCCESS);
size = (uint64_t)safe_mulu32(max_num_dma_configs, max_dma_config_size);
res_table->dma_config_mem = pva_kmd_device_memory_alloc_map(
size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
ASSERT(res_table->dma_config_mem != NULL);
err = pva_kmd_block_allocator_init(&res_table->dma_config_allocator,
res_table->dma_config_mem->va, 0,
max_dma_config_size,
max_num_dma_configs);
ASSERT(err == PVA_SUCCESS);
res_table->dma_aux = pva_kmd_zalloc(
safe_mulu32((uint32_t)sizeof(struct pva_kmd_dma_resource_aux),
max_num_dma_configs));
ASSERT(res_table->dma_aux != NULL);
return PVA_SUCCESS;
}
void pva_kmd_resource_table_deinit(struct pva_kmd_resource_table *res_table)
{
pva_kmd_free(res_table->dma_aux);
pva_kmd_block_allocator_deinit(&res_table->dma_config_allocator);
pva_kmd_device_memory_free(res_table->dma_config_mem);
pva_kmd_block_allocator_deinit(&res_table->resource_record_allocator);
pva_kmd_free(res_table->records_mem);
pva_kmd_sema_deinit(&res_table->resource_semaphore);
pva_kmd_device_memory_free(res_table->table_mem);
}
static struct pva_kmd_resource_record *
pva_kmd_alloc_resource(struct pva_kmd_resource_table *resource_table,
uint32_t *out_resource_id)
{
enum pva_error err;
struct pva_kmd_resource_record *rec = NULL;
err = pva_kmd_sema_wait_timeout(&resource_table->resource_semaphore,
PVA_KMD_TIMEOUT_RESOURCE_SEMA_MS);
if (err == PVA_TIMEDOUT) {
pva_kmd_log_err("pva_kmd_alloc_resource Timed out");
}
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Failed to wait for resource IDs");
goto out;
}
rec = (struct pva_kmd_resource_record *)pva_kmd_alloc_block(
&resource_table->resource_record_allocator, out_resource_id);
ASSERT(rec != NULL);
out:
return rec;
}
static void pva_kmd_free_resource(struct pva_kmd_resource_table *resource_table,
uint32_t resource_id)
{
enum pva_error err;
err = pva_kmd_free_block(&resource_table->resource_record_allocator,
resource_id);
ASSERT(err == PVA_SUCCESS);
pva_kmd_sema_post(&resource_table->resource_semaphore);
}
enum pva_error
pva_kmd_add_syncpt_resource(struct pva_kmd_resource_table *resource_table,
struct pva_kmd_device_memory *dev_mem,
uint32_t *out_resource_id)
{
struct pva_kmd_resource_record *rec =
pva_kmd_alloc_resource(resource_table, out_resource_id);
if (rec == NULL) {
pva_kmd_log_err("No more resource id");
return PVA_NO_RESOURCE_ID;
}
if (*out_resource_id > resource_table->curr_max_resource_id) {
resource_table->curr_max_resource_id = *out_resource_id;
}
rec->type = PVA_RESOURCE_TYPE_DRAM;
rec->dram.mem = dev_mem;
rec->dram.syncpt = true;
rec->ref_count = 1;
return PVA_SUCCESS;
}
enum pva_error
pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table,
struct pva_kmd_device_memory *dev_mem,
uint32_t *out_resource_id)
{
struct pva_kmd_resource_record *rec =
pva_kmd_alloc_resource(resource_table, out_resource_id);
if (rec == NULL) {
pva_kmd_log_err("No more resource id");
return PVA_NO_RESOURCE_ID;
}
if (*out_resource_id > resource_table->curr_max_resource_id) {
resource_table->curr_max_resource_id = *out_resource_id;
}
rec->type = PVA_RESOURCE_TYPE_DRAM;
rec->dram.mem = dev_mem;
rec->dram.syncpt = false;
rec->ref_count = 1;
return PVA_SUCCESS;
}
static struct pva_resource_entry *
get_fw_resource(struct pva_kmd_resource_table *res_table, uint32_t resource_id)
{
struct pva_resource_entry *entries = res_table->table_mem->va;
uint32_t index;
ASSERT(resource_id >= PVA_RESOURCE_ID_BASE);
index = safe_subu32(resource_id, PVA_RESOURCE_ID_BASE);
return &entries[index];
}
void pva_kmd_update_fw_resource_table(struct pva_kmd_resource_table *res_table)
{
uint32_t id;
for (id = PVA_RESOURCE_ID_BASE; id <= res_table->curr_max_resource_id;
id++) {
struct pva_resource_entry *entry =
get_fw_resource(res_table, id);
struct pva_kmd_resource_record *rec = pva_kmd_get_block(
&res_table->resource_record_allocator, id);
if (rec == NULL) {
continue;
}
entry->type = rec->type;
switch (rec->type) {
case PVA_RESOURCE_TYPE_DRAM:
entry->addr_lo = iova_lo(rec->dram.mem->iova);
entry->addr_hi = iova_hi(rec->dram.mem->iova);
entry->size_lo = iova_lo(rec->dram.mem->size);
entry->size_hi = iova_hi(rec->dram.mem->size);
entry->smmu_context_id = rec->dram.mem->smmu_ctx_idx;
break;
case PVA_RESOURCE_TYPE_INVALID:
break;
default:
pva_kmd_log_err("Unsupported resource type");
pva_kmd_fault();
}
}
}
struct pva_kmd_resource_record *
pva_kmd_use_resource(struct pva_kmd_resource_table *res_table,
uint32_t resource_id)
{
struct pva_kmd_resource_record *rec = pva_kmd_get_block(
&res_table->resource_record_allocator, resource_id);
if (rec == NULL) {
return NULL;
}
rec->ref_count = safe_addu32(rec->ref_count, 1U);
return rec;
}
struct pva_kmd_resource_record *
pva_kmd_peek_resource(struct pva_kmd_resource_table *res_table,
uint32_t resource_id)
{
struct pva_kmd_resource_record *rec = pva_kmd_get_block(
&res_table->resource_record_allocator, resource_id);
return rec;
}
void pva_kmd_drop_resource(struct pva_kmd_resource_table *resource_table,
uint32_t resource_id)
{
struct pva_kmd_resource_record *rec;
rec = pva_kmd_get_block(&resource_table->resource_record_allocator,
resource_id);
ASSERT(rec != NULL);
rec->ref_count = safe_subu32(rec->ref_count, 1U);
if (rec->ref_count == 0) {
pva_dbg_printf("Dropping resource %u of type %u\n", resource_id,
rec->type);
switch (rec->type) {
case PVA_RESOURCE_TYPE_DRAM:
if (rec->dram.syncpt != true) {
pva_kmd_device_memory_free(rec->dram.mem);
}
break;
case PVA_RESOURCE_TYPE_EXEC_BIN:
pva_kmd_unload_executable(&rec->vpu_bin.symbol_table,
rec->vpu_bin.metainfo_mem,
rec->vpu_bin.sections_mem);
break;
case PVA_RESOURCE_TYPE_DMA_CONFIG: {
struct pva_kmd_dma_resource_aux *dma_aux;
dma_aux =
&resource_table
->dma_aux[rec->dma_config.block_index];
pva_kmd_unload_dma_config(dma_aux);
pva_kmd_free_block(
&resource_table->dma_config_allocator,
rec->dma_config.block_index);
break;
}
default:
pva_kmd_log_err("Unsupported resource type");
pva_kmd_fault();
}
pva_kmd_free_resource(resource_table, resource_id);
}
}
enum pva_error
pva_kmd_add_vpu_bin_resource(struct pva_kmd_resource_table *resource_table,
void *executable, uint32_t executable_size,
uint32_t *out_resource_id)
{
uint32_t res_id;
struct pva_kmd_resource_record *rec =
pva_kmd_alloc_resource(resource_table, &res_id);
enum pva_error err;
struct pva_kmd_vpu_bin_resource *vpu_bin;
if (rec == NULL) {
err = PVA_NO_RESOURCE_ID;
goto err_out;
}
vpu_bin = &rec->vpu_bin;
err = pva_kmd_load_executable(
executable, executable_size, resource_table->pva,
resource_table->user_smmu_ctx_id, &vpu_bin->symbol_table,
&vpu_bin->metainfo_mem, &vpu_bin->sections_mem);
if (err != PVA_SUCCESS) {
goto free_block;
}
if (res_id > resource_table->curr_max_resource_id) {
resource_table->curr_max_resource_id = res_id;
}
rec->type = PVA_RESOURCE_TYPE_EXEC_BIN;
rec->ref_count = 1;
*out_resource_id = res_id;
return PVA_SUCCESS;
free_block:
pva_kmd_free_resource(resource_table, res_id);
err_out:
return err;
}
enum pva_error
pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table,
uint32_t resource_id,
struct pva_resource_entry *entry)
{
struct pva_kmd_resource_record *rec =
pva_kmd_use_resource(resource_table, resource_id);
if (rec == NULL) {
return PVA_NO_RESOURCE_ID;
}
switch (rec->type) {
case PVA_RESOURCE_TYPE_DRAM:
entry->type = rec->type;
entry->addr_lo = iova_lo(rec->dram.mem->iova);
entry->addr_hi = iova_hi(rec->dram.mem->iova);
entry->size_lo = iova_lo(rec->dram.mem->size);
entry->size_hi = iova_hi(rec->dram.mem->size);
entry->smmu_context_id = rec->dram.mem->smmu_ctx_idx;
break;
case PVA_RESOURCE_TYPE_EXEC_BIN:
entry->type = rec->type;
entry->addr_lo = iova_lo(rec->vpu_bin.metainfo_mem->iova);
entry->addr_hi = iova_hi(rec->vpu_bin.metainfo_mem->iova);
entry->size_lo = iova_lo(rec->vpu_bin.metainfo_mem->size);
entry->size_hi = iova_hi(rec->vpu_bin.metainfo_mem->size);
entry->smmu_context_id =
rec->vpu_bin.metainfo_mem->smmu_ctx_idx;
break;
case PVA_RESOURCE_TYPE_DMA_CONFIG:
entry->type = rec->type;
entry->addr_lo = iova_lo(rec->dma_config.iova_addr);
entry->addr_hi = iova_hi(rec->dma_config.iova_addr);
entry->size_lo = iova_lo(rec->dma_config.size);
entry->size_hi = iova_hi(rec->dma_config.size);
entry->smmu_context_id = PVA_R5_SMMU_CONTEXT_ID;
break;
default:
pva_kmd_log_err("Unsupported resource type");
pva_kmd_fault();
}
pva_kmd_drop_resource(resource_table, resource_id);
return PVA_SUCCESS;
}
enum pva_error pva_kmd_add_dma_config_resource(
struct pva_kmd_resource_table *resource_table, void *dma_config_payload,
uint32_t dma_config_size, uint32_t *out_resource_id)
{
enum pva_error err = PVA_SUCCESS;
uint32_t block_idx, fw_fetch_size;
void *fw_dma_cfg;
struct pva_kmd_dma_resource_aux *dma_aux;
struct pva_kmd_resource_record *rec;
uint32_t res_id;
fw_dma_cfg = pva_kmd_zalloc_block(&resource_table->dma_config_allocator,
&block_idx);
if (fw_dma_cfg == NULL) {
err = PVA_NOMEM;
goto err_out;
}
// Must satisfy alignment requirement for converting to struct
// pva_dma_config_resource*
ASSERT(((uintptr_t)fw_dma_cfg) % sizeof(uint64_t) == 0);
dma_aux = &resource_table->dma_aux[block_idx];
err = pva_kmd_load_dma_config(resource_table, dma_config_payload,
dma_config_size, dma_aux, fw_dma_cfg,
&fw_fetch_size);
if (err != PVA_SUCCESS) {
goto free_block;
}
rec = pva_kmd_alloc_resource(resource_table, &res_id);
if (rec == NULL) {
err = PVA_NO_RESOURCE_ID;
goto unload_dma;
}
if (res_id > resource_table->curr_max_resource_id) {
resource_table->curr_max_resource_id = res_id;
}
rec->type = PVA_RESOURCE_TYPE_DMA_CONFIG;
rec->ref_count = 1;
rec->dma_config.block_index = block_idx;
rec->dma_config.iova_addr = safe_addu64(
resource_table->dma_config_mem->iova,
(uint64_t)safe_mulu32(
block_idx,
resource_table->dma_config_allocator.block_size));
rec->dma_config.size = fw_fetch_size;
*out_resource_id = res_id;
return PVA_SUCCESS;
unload_dma:
pva_kmd_unload_dma_config(dma_aux);
free_block:
pva_kmd_free_block(&resource_table->dma_config_allocator, block_idx);
err_out:
return err;
}
void pva_kmd_verify_all_resources_free(
struct pva_kmd_resource_table *resource_table)
{
enum pva_error err;
for (uint32_t i = 0; i < resource_table->n_entries; i++) {
err = pva_kmd_sema_wait_timeout(
&resource_table->resource_semaphore,
PVA_KMD_TIMEOUT_RESOURCE_SEMA_MS);
ASSERT(err == PVA_SUCCESS);
}
}

View File

@@ -0,0 +1,153 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_RESOURCE_TABLE_H
#define PVA_KMD_RESOURCE_TABLE_H
#include "pva_fw.h"
#include "pva_bit.h"
#include "pva_resource.h"
#include "pva_kmd_block_allocator.h"
#include "pva_kmd.h"
#include "pva_kmd_utils.h"
#include "pva_kmd_executable.h"
#include "pva_constants.h"
#include "pva_kmd_dma_cfg.h"
#include "pva_kmd_mutex.h"
#include "pva_kmd_thread_sema.h"
struct pva_kmd_device;
struct pva_kmd_dram_resource {
struct pva_kmd_device_memory *mem;
bool syncpt;
};
struct pva_kmd_vpu_bin_resource {
struct pva_kmd_device_memory *metainfo_mem;
struct pva_kmd_device_memory *sections_mem;
struct pva_kmd_exec_symbol_table symbol_table;
};
struct pva_kmd_dma_config_resource {
uint32_t block_index;
uint64_t size;
uint64_t iova_addr;
};
struct pva_kmd_resource_record {
/**
* Possible types:
* PVA_RESOURCE_TYPE_DRAM
* PVA_RESOURCE_TYPE_EXEC_BIN
* PVA_RESOURCE_TYPE_DMA_CONFIG
*/
uint8_t type;
uint32_t ref_count;
union {
struct pva_kmd_dram_resource dram;
struct pva_kmd_vpu_bin_resource vpu_bin;
struct pva_kmd_dma_config_resource dma_config;
};
};
/**
*
*/
struct pva_kmd_resource_table {
/** @brief User smmu context ID.
*
* - DRAM memory, VPU data/text sections will be mapped to this space.
* - VPU metadata, DMA configurations will always be mapped to R5 SMMU
* context. */
uint8_t user_smmu_ctx_id;
uint32_t n_entries;
/** Maximum resource ID we have seen so far */
uint32_t curr_max_resource_id;
/** Semaphore to keep track of resources in use*/
pva_kmd_sema_t resource_semaphore;
/** Memory for resource table entries, in R5 segment */
struct pva_kmd_device_memory *table_mem;
/** Memory for fw dma configs, in DMA segment */
struct pva_kmd_device_memory *dma_config_mem;
struct pva_kmd_block_allocator dma_config_allocator;
/** Memory for tracking resources used by DMA configuration. Single
* allocation shared by all DMA configs */
struct pva_kmd_dma_resource_aux *dma_aux;
/** Pointer to syncpt_allocator in pva_kmd_device created during kmd boot */
struct pva_kmd_block_allocator *syncpt_allocator;
/** Memory for resource records */
void *records_mem;
struct pva_kmd_block_allocator resource_record_allocator;
struct pva_kmd_device *pva;
};
enum pva_error
pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table,
struct pva_kmd_device *pva,
uint8_t user_smmu_ctx_id, uint32_t n_entries,
uint32_t max_num_dma_configs);
void pva_kmd_resource_table_deinit(struct pva_kmd_resource_table *res_table);
/** KMD only writes to FW resource table during init time. Once the address of
* the resource table is sent to FW, all updates should be done through commands.
*/
void pva_kmd_update_fw_resource_table(struct pva_kmd_resource_table *res_table);
enum pva_error
pva_kmd_add_syncpt_resource(struct pva_kmd_resource_table *resource_table,
struct pva_kmd_device_memory *dev_mem,
uint32_t *out_resource_id);
enum pva_error
pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table,
struct pva_kmd_device_memory *memory,
uint32_t *out_resource_id);
enum pva_error
pva_kmd_add_vpu_bin_resource(struct pva_kmd_resource_table *resource_table,
void *executable, uint32_t executable_size,
uint32_t *out_resource_id);
enum pva_error
pva_kmd_add_dma_config_resource(struct pva_kmd_resource_table *resource_table,
void *dma_config, uint32_t dma_config_size,
uint32_t *out_resource_id);
/**
* Increment reference count of the resources
*
* TODO: make use and drop thread safe.
* */
struct pva_kmd_resource_record *
pva_kmd_use_resource(struct pva_kmd_resource_table *resource_table,
uint32_t resource_id);
struct pva_kmd_resource_record *
pva_kmd_peek_resource(struct pva_kmd_resource_table *resource_table,
uint32_t resource_id);
void pva_kmd_drop_resource(struct pva_kmd_resource_table *resource_table,
uint32_t resource_id);
enum pva_error
pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table,
uint32_t resource_id,
struct pva_resource_entry *entry);
void pva_kmd_verify_all_resources_free(
struct pva_kmd_resource_table *resource_table);
#endif // PVA_KMD_RESOURCE_TABLE_H

View File

@@ -0,0 +1,185 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021-2023, NVIDIA Corporation. All rights reserved.
*/
#include "pva_kmd_sha256.h"
#define ROTLEFT(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
#define ROTRIGHT(a, b) (((a) >> (b)) | ((a) << (32 - (b))))
#define CH(x, y, z) (((x) & (y)) ^ (~(x) & (z)))
#define MAJ(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#define SHA_EP0(x) (ROTRIGHT(x, 2) ^ ROTRIGHT(x, 13) ^ ROTRIGHT(x, 22))
#define SHA_EP1(x) (ROTRIGHT(x, 6) ^ ROTRIGHT(x, 11) ^ ROTRIGHT(x, 25))
#define SIG0(x) (ROTRIGHT(x, 7) ^ ROTRIGHT(x, 18) ^ ((x) >> 3))
#define SIG1(x) (ROTRIGHT(x, 17) ^ ROTRIGHT(x, 19) ^ ((x) >> 10))
#define SWAP32(x) __builtin_bswap32(x)
#define SWAP64(x) __builtin_bswap64(x)
/**
* This variable is used internally by \ref sha256_transform()
*/
static const uint32_t k[64] = {
U32(0x428a2f98U), U32(0x71374491U), U32(0xb5c0fbcfU), U32(0xe9b5dba5U),
U32(0x3956c25bU), U32(0x59f111f1U), U32(0x923f82a4U), U32(0xab1c5ed5U),
U32(0xd807aa98U), U32(0x12835b01U), U32(0x243185beU), U32(0x550c7dc3U),
U32(0x72be5d74U), U32(0x80deb1feU), U32(0x9bdc06a7U), U32(0xc19bf174U),
U32(0xe49b69c1U), U32(0xefbe4786U), U32(0x0fc19dc6U), U32(0x240ca1ccU),
U32(0x2de92c6fU), U32(0x4a7484aaU), U32(0x5cb0a9dcU), U32(0x76f988daU),
U32(0x983e5152U), U32(0xa831c66dU), U32(0xb00327c8U), U32(0xbf597fc7U),
U32(0xc6e00bf3U), U32(0xd5a79147U), U32(0x06ca6351U), U32(0x14292967U),
U32(0x27b70a85U), U32(0x2e1b2138U), U32(0x4d2c6dfcU), U32(0x53380d13U),
U32(0x650a7354U), U32(0x766a0abbU), U32(0x81c2c92eU), U32(0x92722c85U),
U32(0xa2bfe8a1U), U32(0xa81a664bU), U32(0xc24b8b70U), U32(0xc76c51a3U),
U32(0xd192e819U), U32(0xd6990624U), U32(0xf40e3585U), U32(0x106aa070U),
U32(0x19a4c116U), U32(0x1e376c08U), U32(0x2748774cU), U32(0x34b0bcb5U),
U32(0x391c0cb3U), U32(0x4ed8aa4aU), U32(0x5b9cca4fU), U32(0x682e6ff3U),
U32(0x748f82eeU), U32(0x78a5636fU), U32(0x84c87814U), U32(0x8cc70208U),
U32(0x90befffaU), U32(0xa4506cebU), U32(0xbef9a3f7U), U32(0xc67178f2U)
};
/**
* \brief
* This function is a helper function used by \ref pva_sha256_update
* to hash 512-bit blocks and forms the core of the algorithm.
* Use \ref sha256_init(), \ref pva_sha256_update(), and
* \ref sha256_finalize() instead of calling sha256_transform() directly.
* \param[in] ctx pointer of struct sha256_ctx context.
* \param[in] data_in pointer to the data block to be hashed.
* \return Void
*/
static void sha256_transform(struct sha256_ctx *ctx, const void *data_in)
{
uint32_t a, b, c, d, e, f, g, h, t1, t2, m[64];
const uint32_t *const data = data_in;
size_t i;
for (i = 0; i < U32(16); i++) {
m[i] = SWAP32(data[i]);
}
for (i = 0; i < U32(64) - U32(16); ++i) {
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
m[i + U32(16)] = SIG1(m[U32(14) + i]) + m[U32(9) + i] +
SIG0(m[U32(1) + i]) + m[i];
}
a = ctx->state[0];
b = ctx->state[1];
c = ctx->state[2];
d = ctx->state[3];
e = ctx->state[4];
f = ctx->state[5];
g = ctx->state[6];
h = ctx->state[7];
for (i = 0; i < U32(64); ++i) {
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
t1 = h + SHA_EP1(e) + CH(e, f, g) + k[i] + m[i];
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
t2 = SHA_EP0(a) + MAJ(a, b, c);
h = g;
g = f;
f = e;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
e = d + t1;
d = c;
c = b;
b = a;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
a = t1 + t2;
}
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[0] += a;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[1] += b;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[2] += c;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[3] += d;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[4] += e;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[5] += f;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[6] += g;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[7] += h;
}
void sha256_init(struct sha256_ctx *ctx)
{
ctx->bitlen = 0;
ctx->state[0] = U32(0x6a09e667);
ctx->state[1] = U32(0xbb67ae85);
ctx->state[2] = U32(0x3c6ef372);
ctx->state[3] = U32(0xa54ff53a);
ctx->state[4] = U32(0x510e527f);
ctx->state[5] = U32(0x9b05688c);
ctx->state[6] = U32(0x1f83d9ab);
ctx->state[7] = U32(0x5be0cd19);
}
void sha256_update(struct sha256_ctx *ctx, const void *data, size_t len)
{
uint32_t i;
for (i = 0; i < len; i += U32(64)) {
ctx->bitlen &= U32(0xffffffff);
sha256_transform(ctx, ((const uint8_t *)data) + i);
ctx->bitlen += U32(512);
}
}
void sha256_copy(const struct sha256_ctx *ctx_in, struct sha256_ctx *ctx_out)
{
*ctx_out = *ctx_in;
}
void sha256_finalize(struct sha256_ctx *ctx, const void *input,
size_t input_size, uint32_t out[8])
{
uint8_t data[64];
void *p = data;
uint32_t t;
input_size &= U32(0xffffffff);
ctx->bitlen &= U32(0xffffffff);
/* the false of this condition is illegal for this API agreement */
/* this check is here only for Coverity INT30-C */
ctx->bitlen += input_size * U32(8);
(void)memcpy(p, input, input_size);
data[input_size] = 0x80;
if (input_size < U32(56)) { /* can we fit an 8-byte counter? */
/* Pad whatever data is left in the buffer. */
(void)memset(data + input_size + U32(1), 0,
U32(56) - input_size - U32(1));
} else { /* Go into another block. We are here only for message hashing */
if (input_size + U32(1) < U32(64)) {
(void)memset(data + input_size + U32(1), 0,
U32(64) - input_size - U32(1));
}
sha256_transform(ctx, data);
(void)memset(data, 0, 56);
}
t = ctx->bitlen_low;
*(uint32_t *)(void *)(data + 56) = 0;
*(uint32_t *)(void *)(data + 60) = SWAP32(t);
sha256_transform(ctx, data);
out[0] = SWAP32(ctx->state[0]);
out[1] = SWAP32(ctx->state[1]);
out[2] = SWAP32(ctx->state[2]);
out[3] = SWAP32(ctx->state[3]);
out[4] = SWAP32(ctx->state[4]);
out[5] = SWAP32(ctx->state[5]);
out[6] = SWAP32(ctx->state[6]);
out[7] = SWAP32(ctx->state[7]);
}

View File

@@ -0,0 +1,76 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2021-2023, NVIDIA Corporation. All rights reserved.
*/
#ifndef PVA_KMD_SHA256_H
#define PVA_KMD_SHA256_H
#include "pva_api_types.h"
#define U32(x) ((uint32_t)(x))
struct sha256_ctx {
/*
* On bitlen:
*
* While we don't exceed 2^32 bit (2^29 byte) length for the input buffer,
* size_t is more efficient at least on RISC-V. This particular
* structure is needed to make Coverity happy.
*/
union {
size_t bitlen;
uint32_t bitlen_low;
};
uint32_t state[8];
};
/**
* Initializes struct sha256_ctx
*
* \param[in] ctx pointer of struct sha256_ctx context
*
* \return void
*/
void sha256_init(struct sha256_ctx *ctx);
/**
* \brief
* Hash full blocks, in units of 64 bytes
* can be called repeatedly with chunks of the message
* to be hashed (len bytes at data).
*
* \param[in] ctx pointer of struct sha256_ctx context
* \param[in] data pointer to the data block to be hashed
* \param[in] len length (in units of 64 bytes) of the data to be hashed.
*
* \return void
*/
void sha256_update(struct sha256_ctx *ctx, const void *data, size_t len);
/**
* \brief
* Finalize the hash and keep the calcualted hash in out.
* Required: input_size < 64. Call pva_sha256_update() first otherwise.
*
* \param[in] ctx pointer of struct sha256_ctx context
* \param[in] input pointer to the data block
* (left over from \ref pva_sha256_update) to be hashed
* \param[in] input_size size of the data block to hashed
* (left over from \ref pva_sha256_update to be hashed)
* \param[out] out places the calcuated sha256 key in out.
*
* \return void
*/
void sha256_finalize(struct sha256_ctx *ctx, const void *input,
size_t input_size, uint32_t out[8]);
/**
* \brief
* copy state information to ctx_out from ctx_in
* \param[in] ctx_in input struct sha256_ctx
* \param[out] ctx_out output struct sha256_ctx
* \return void
*/
void sha256_copy(const struct sha256_ctx *ctx_in, struct sha256_ctx *ctx_out);
#endif /* PVA_SHA256_H */

View File

@@ -0,0 +1,317 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_device.h"
#include "pva_fw_address_map.h"
#include "pva_fw_hyp.h"
#include "pva_kmd_thread_sema.h"
#include "pva_kmd_constants.h"
#include "pva_kmd_silicon_isr.h"
#include "pva_kmd_silicon_boot.h"
#include "pva_kmd_shim_silicon.h"
static inline void pva_kmd_set_sema(struct pva_kmd_device *pva,
uint32_t sema_idx, uint32_t val)
{
uint32_t gap = PVA_REG_HSP_SS1_SET_ADDR - PVA_REG_HSP_SS0_SET_ADDR;
gap = safe_mulu32(gap, sema_idx);
pva_kmd_write(pva, safe_addu32(PVA_REG_HSP_SS0_SET_ADDR, gap), val);
}
static void init_fw_print_buffer(struct pva_kmd_fw_print_buffer *print_buffer,
void *debug_buffer_va)
{
print_buffer->buffer_info = pva_offset_pointer(
debug_buffer_va,
FW_TRACE_BUFFER_SIZE + FW_CODE_COVERAGE_BUFFER_SIZE);
print_buffer->size =
FW_DEBUG_LOG_BUFFER_SIZE - sizeof(*print_buffer->buffer_info);
print_buffer->head = 0;
print_buffer->content = pva_offset_pointer(
print_buffer->buffer_info, sizeof(*print_buffer->buffer_info));
}
static void disable_sec_mission_error_reporting(struct pva_kmd_device *pva)
{
pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE0_MISSIONERR_ENABLE_ADDR, 0U);
pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE1_MISSIONERR_ENABLE_ADDR, 0U);
pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE2_MISSIONERR_ENABLE_ADDR, 0U);
pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE3_MISSIONERR_ENABLE_ADDR, 0U);
}
static void disable_sec_latent_error_reporting(struct pva_kmd_device *pva)
{
pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE0_LATENTERR_ENABLE_ADDR, 0U);
pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE1_LATENTERR_ENABLE_ADDR, 0U);
pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE2_LATENTERR_ENABLE_ADDR, 0U);
pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE3_LATENTERR_ENABLE_ADDR, 0U);
}
void pva_kmd_config_evp_seg_regs(struct pva_kmd_device *pva)
{
uint64_t seg_reg_value;
/* EVP */
pva_kmd_write(pva, PVA_REG_EVP_RESET_ADDR, EVP_RESET_VECTOR);
pva_kmd_write(pva, PVA_REG_EVP_UNDEF_ADDR,
EVP_UNDEFINED_INSTRUCTION_VECTOR);
pva_kmd_write(pva, PVA_REG_EVP_SWI_ADDR, EVP_SVC_VECTOR);
pva_kmd_write(pva, PVA_REG_EVP_PREFETCH_ABORT_ADDR,
EVP_PREFETCH_ABORT_VECTOR);
pva_kmd_write(pva, PVA_REG_EVP_DATA_ABORT_ADDR, EVP_DATA_ABORT_VECTOR);
pva_kmd_write(pva, PVA_REG_EVP_RSVD_ADDR, EVP_RESERVED_VECTOR);
pva_kmd_write(pva, PVA_REG_EVP_IRQ_ADDR, EVP_IRQ_VECTOR);
pva_kmd_write(pva, PVA_REG_EVP_FIQ_ADDR, EVP_FIQ_VECTOR);
/* R5 regions are defined as:
* - PRIV1 region for firmware code and data.
* - PRIV2 region for debug printf data.
* - Remaining region for resource table, queues, etc.
*/
pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_start,
FW_CODE_DATA_START_ADDR);
pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_end,
FW_CODE_DATA_END_ADDR);
pva_kmd_write(pva, pva->regspec.cfg_priv_ar2_start,
FW_DEBUG_DATA_START_ADDR);
pva_kmd_write(pva, pva->regspec.cfg_priv_ar2_end,
FW_DEBUG_DATA_END_ADDR);
/* Firmware expects R5 virtual address FW_CODE_DATA_START_ADDR to be
* mapped to the beginning of firmware binary. Therefore, we adjust
* segment registers accordingly
*
* */
if (pva->load_from_gsc) {
if (pva->is_hv_mode) {
/* Loading from GSC with HV (i.e AV+L or AV+Q case).
* This will be trapped by HV
*/
pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_lsegreg,
0xFFFFFFFFU);
pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_usegreg,
0xFFFFFFFFU);
} else {
/* Loading from GSC without HV i.e L4T case.
* TODO: Program Segment regsites using the GSC Careveout
* fetched from DT file. Till then, ASSERT here.
*/
ASSERT(false);
}
} else {
/* Loading from file.
* In HV case, traps should be bypassed in HV
*/
seg_reg_value =
pva->fw_bin_mem->iova -
FW_CODE_DATA_START_ADDR; /* underflow is totally OK */
pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_lsegreg,
iova_lo(seg_reg_value));
pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_usegreg,
iova_hi(seg_reg_value));
}
}
void pva_kmd_config_scr_regs(struct pva_kmd_device *pva)
{
pva_kmd_write(pva, PVA_REG_EVP_SCR_ADDR, PVA_EVP_SCR_VAL);
pva_kmd_write(pva, PVA_CFG_SCR_STATUS_CNTL, PVA_STATUS_CTL_SCR_VAL);
pva_kmd_write(pva, PVA_CFG_SCR_PRIV, PVA_PRIV_SCR_VAL);
pva_kmd_write(pva, PVA_CFG_SCR_CCQ_CNTL, PVA_CCQ_SCR_VAL);
}
void pva_kmd_config_sid(struct pva_kmd_device *pva)
{
uint32_t addr;
uint32_t i;
uint32_t offset;
uint8_t priv1_sid;
uint8_t priv_sid;
priv_sid = pva->stream_ids[PVA_R5_SMMU_CONTEXT_ID] & 0xFF;
priv1_sid = pva->stream_ids[pva->r5_image_smmu_context_id] & 0xFF;
/* Priv SIDs */
if (pva->load_from_gsc) {
pva_kmd_write(pva, pva->regspec.cfg_priv_sid,
PVA_INSERT(priv_sid, 7, 0) |
PVA_INSERT(priv1_sid, 15, 8) |
PVA_INSERT(priv_sid, 23, 16));
} else {
pva_kmd_write(pva, pva->regspec.cfg_priv_sid,
PVA_INSERT(priv_sid, 7, 0) |
PVA_INSERT(priv_sid, 15, 8) |
PVA_INSERT(priv_sid, 23, 16));
}
/* VPS SIDs */
if ((pva->hw_consts.hw_gen == PVA_HW_GEN3) && pva->load_from_gsc) {
pva_kmd_write(pva, pva->regspec.cfg_vps_sid,
PVA_INSERT(priv1_sid, 7, 0) |
PVA_INSERT(priv1_sid, 15, 8));
} else {
pva_kmd_write(pva, pva->regspec.cfg_vps_sid,
PVA_INSERT(priv_sid, 7, 0) |
PVA_INSERT(priv_sid, 15, 8));
}
/* User SIDs */
offset = 0;
for (i = 1; i < pva->hw_consts.n_smmu_contexts - 1; i++) {
addr = safe_addu32(pva->regspec.cfg_user_sid_base, offset);
pva_kmd_write(pva, addr, pva->stream_ids[i]);
offset = safe_addu32(offset, 4U);
}
}
enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva)
{
uint64_t seg_reg_value;
uint32_t debug_data_size;
uint32_t boot_sema = 0;
enum pva_error err = PVA_SUCCESS;
/* Load firmware */
if (!pva->load_from_gsc) {
err = pva_kmd_read_fw_bin(pva);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Failed to read firmware from filesystem");
goto out;
}
}
debug_data_size = (uint32_t)safe_pow2_roundup_u32(
FW_DEBUG_DATA_TOTAL_SIZE, SIZE_4KB);
pva->fw_debug_mem = pva_kmd_device_memory_alloc_map(
debug_data_size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
if (pva->fw_debug_mem == NULL) {
err = PVA_NOMEM;
goto free_fw_mem;
}
init_fw_print_buffer(&pva->fw_print_buffer, pva->fw_debug_mem->va);
/* Program SCRs */
pva_kmd_write(pva, PVA_SEC_SCR_SECEXT_INTR_EVENT,
PVA_SEC_SCR_SECEXT_INTR_EVENT_VAL);
pva_kmd_write(pva, PVA_PROC_SCR_PROC, PVA_PROC_SCR_PROC_VAL);
pva_kmd_config_evp_seg_scr_regs(pva);
/* Write IOVA address of debug buffer to mailbox and FW will program
* PRIV2 segment register properly such that the debug buffer is located
* at R5 virtual address FW_DEBUG_DATA_START_ADDR */
seg_reg_value = pva->fw_debug_mem->iova;
/* When GSC is enabled, KMD cannot write directly to segment registers,
* therefore we write to mailbox registers and FW will program by
* itself.
* pva_kmd_writel(pva, pva->regspec.cfg_priv_ar2_lsegreg,
* iova_lo(seg_reg_value));
* pva_kmd_writel(pva, pva->regspec.cfg_priv_ar2_usegreg,
* iova_hi(seg_reg_value));
*/
pva_kmd_write_mailbox(pva, PVA_MBOXID_PRIV2SEG_L,
iova_lo(seg_reg_value));
pva_kmd_write_mailbox(pva, PVA_MBOXID_PRIV2SEG_H,
iova_hi(seg_reg_value));
/* Write shared memory allocation start address to mailbox and FW will
* program user segment register accordingly so that virtual address
* PVA_SHARED_MEMORY_START will point to the allocation start address.
*
* We deliberately also choose PVA_SHARED_MEMORY_START as the allocation
* start address so that the net result is that user segment register
* will be programmed to 0.
*/
seg_reg_value = FW_SHARED_MEMORY_START;
pva_kmd_write_mailbox(pva, PVA_MBOXID_USERSEG_L,
iova_lo(seg_reg_value));
pva_kmd_write_mailbox(pva, PVA_MBOXID_USERSEG_H,
iova_hi(seg_reg_value));
/* Boot parameters */
if (pva->bl_sector_pack_format == PVA_BL_XBAR_RAW) {
boot_sema = PVA_BOOT_SEMA_USE_XBAR_RAW;
}
pva_kmd_set_sema(pva, PVA_BOOT_SEMA, boot_sema);
pva_kmd_write(pva, PVA_REG_HSP_SS2_SET_ADDR,
pva_kmd_get_syncpt_ro_offset(pva));
pva_kmd_write(pva, PVA_REG_HSP_SS3_SET_ADDR,
pva_kmd_get_syncpt_rw_offset(pva));
pva_kmd_config_sid_regs(pva);
/* Enable LIC INTR line for HSP1 and WDT */
pva_kmd_write(pva, pva->regspec.sec_lic_intr_enable,
PVA_BIT(0) /*Watchdog*/
| PVA_INSERT(0x1, 4, 1) /* HSP1 */
| PVA_INSERT(0x7, 7, 5) /* All H1X errors */);
/* Bind interrupts */
err = pva_kmd_bind_intr_handler(pva, PVA_KMD_INTR_LINE_SEC_LIC,
pva_kmd_hyp_isr, pva);
if (err != PVA_SUCCESS) {
goto free_fw_debug_mem;
}
err = pva_kmd_bind_intr_handler(pva, PVA_KMD_INTR_LINE_CCQ0,
pva_kmd_isr, pva);
if (err != PVA_SUCCESS) {
goto free_sec_lic;
}
/* Take R5 out of reset */
pva_kmd_write(pva, PVA_REG_PROC_CPUHALT_ADDR, 0x1);
/* Wait until fw boots */
err = pva_kmd_sema_wait_timeout(&pva->fw_boot_sema,
PVA_KMD_FW_BOOT_TIMEOUT_MS);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Waiting for FW boot timed out.");
goto free_ccq0;
}
return err;
free_ccq0:
pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_CCQ0);
free_sec_lic:
pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC);
free_fw_debug_mem:
pva_kmd_drain_fw_print(&pva->fw_print_buffer);
pva_kmd_device_memory_free(pva->fw_debug_mem);
free_fw_mem:
if (!pva->load_from_gsc) {
pva_kmd_device_memory_free(pva->fw_bin_mem);
}
out:
return err;
}
void pva_kmd_deinit_fw(struct pva_kmd_device *pva)
{
pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_CCQ0);
pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC);
pva_kmd_drain_fw_print(&pva->fw_print_buffer);
/*
* Before powering off PVA, disable SEC error reporting.
* While powering off, PVA might generate (unexplained) error interrupts
* This causes HSM to read some PVA SEC registers. However, since PVA might
* already be powergated by this time, access to PVA SEC registers from HSM
* fails. This was discussed in Bug 3785498.
*
* Note: we do not explicity enable these errors during power on since
* 'enable' is their reset value
*/
disable_sec_mission_error_reporting(pva);
disable_sec_latent_error_reporting(pva);
pva_kmd_device_memory_free(pva->fw_debug_mem);
if (!pva->load_from_gsc) {
pva_kmd_device_memory_free(pva->fw_bin_mem);
}
}

View File

@@ -0,0 +1,44 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_SILICON_BOOT_H
#define PVA_KMD_SILICON_BOOT_H
#include "pva_kmd_device.h"
/**
* @brief Configure EVP and Segment config registers
*
* This function configures the EVP and Segment config registers.
*
* @param pva Pointer to the PVA device.
*/
void pva_kmd_config_evp_seg_regs(struct pva_kmd_device *pva);
/**
* @brief Configure SCR registers.
*
* This function configures the SCR registers.
*
* @param pva Pointer to the PVA device.
*/
void pva_kmd_config_scr_regs(struct pva_kmd_device *pva);
/**
* @brief Configure SID registers.
*
* This function configures the SID registers.
*
* @param pva Pointer to the PVA device.
*/
void pva_kmd_config_sid(struct pva_kmd_device *pva);
#endif /* PVA_KMD_SILICON_BOOT_H */

View File

@@ -0,0 +1,414 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_silicon_elf_parser.h"
#include "pva_kmd_utils.h"
#ifndef max
#define max(a, b) (((a) > (b)) ? (a) : (b))
#endif
#ifndef UINT8_MAX
#define UINT8_MAX 0xFF
#endif
// CERT complains about casts from const uint8_t*, so do intermediate cast to void*
static inline const void *uint_8_to_void(const uint8_t *const p)
{
return (const void *)p;
}
bool elf_header_check(const elf_ct e)
{
const elfFileHeader *efh = (const elfFileHeader *)e;
if ((ELFCLASS32 == efh->oclass) &&
(ELFMAGIC_LSB == *(const elfWord *)e)) {
return true;
}
return false;
}
/**
* @brief Return pointer to ELF file header
*
* Cast the elf image data to \ref elfFileHeader*
*
* @param [in] e pointer to elf image data
* @return
* - Valid poniter to ELF file header
* - NULL if \a e is NULL or correct elf magic ID is not present
* in first 4 bytes of elf file pointed by \a e.
*
*/
static const elfFileHeader *elf_file_header(const elf_ct e)
{
return (const elfFileHeader *)e;
}
/**
* @brief Get start address of the section table.
*
* @param[in] e pointer to elf image
* @return const elfSectionHeader*
* - Valid address of section header.
* - NULL if \a e is NULL or Header in ELF file is NULL.
*/
static inline const elfSectionHeader *elf_section_table(const elf_parser_ctx e)
{
const elfFileHeader *efh = elf_file_header(e.elf_file);
const char *p = (const char *)e.elf_file;
if (efh->shoff > e.size) {
pva_kmd_log_err("Invalid Section header Offset");
return NULL;
}
p = &p[efh->shoff];
// proper ELF should always have offsets be aligned,
// but add check just in case.
return (const elfSectionHeader *)(const void *)(p);
}
/**
* @brief Get the size of ELF section
*
* @param esh pointer to ELF section header
* @return elfWord
* - size of the corresponding section header.
* - 0, if \a esh is NULL.
*
*/
static elfWord elf_section_size(const elfSectionHeader *esh)
{
if (NULL == esh) {
return UZERO;
}
return (elfWord)esh->size;
}
elfWord elf_shnum(const elf_parser_ctx e)
{
const elfFileHeader *efh = elf_file_header(e.elf_file);
if (NULL == efh) {
return UZERO;
}
if (UZERO == efh->shnum) {
/* get value from size of first (empty) section */
/* to avoid recursion, don't call elf_section_header(0) */
const elfSectionHeader *esh = elf_section_table(e);
// if esh is somehow NULL, section_size will return UZERO
elfWord size = elf_section_size(esh);
if (size > e.size) { // make sure we don't lose precision
return UZERO;
} else {
return size;
}
} else {
return (elfWord)efh->shnum;
}
}
const elfSectionHeader *elf_section_header(const elf_parser_ctx e,
unsigned int index)
{
const elfSectionHeader *esh = elf_section_table(e);
if (NULL == esh) {
return NULL;
}
if (index >= elf_shnum(e)) {
return NULL;
}
esh = &esh[index];
return esh;
}
static inline elfOff get_table_end(elfWord num, elfHalf entsize, elfOff off)
{
elfOff end;
elfWord tablesize = 0;
/**
* Guaranteed to be less than UINT32_MAX and not overflow
* num if set as efh->shnum is UINT16_MAX
* num if set as section_header->size is file size of ELF which
* is bound to 2 MB
*/
tablesize = safe_mulu32(num, (uint32_t)entsize);
end = off + tablesize;
if (end < off) {
return UZERO; //Wrap around error
}
return end;
}
bool elf_has_valid_sections(const elf_parser_ctx e)
{
elfOff max_size = UZERO;
uint32_t i;
elfOff ph_end, sh_end;
const elfFileHeader *efh = elf_file_header(e.elf_file);
if (efh == NULL) {
return false;
}
ph_end = get_table_end(efh->phnum, efh->phentsize, efh->phoff);
sh_end = get_table_end(elf_shnum(e), efh->shentsize, efh->shoff);
max_size = max(ph_end, sh_end);
if ((max_size == UZERO) || (max_size > e.size)) {
return false;
}
for (i = UZERO; i < elf_shnum(e); ++i) {
elfOff esh_end;
const elfSectionHeader *esh = elf_section_header(e, i);
/*We have already validated the whole section header array is within the file*/
ASSERT(esh != NULL);
esh_end = esh->offset + esh->size;
if (esh_end < esh->offset) {
return false; //WRAP around error;
}
if ((esh->type != SHT_NOBITS) && (esh_end > e.size)) {
return false;
}
}
return true;
}
/**
* @brief Get section header index
* get elf_file_header and check it's not null,
* get value from link field of first (empty) section
* if esh is somehow NULL, return esh link
*
* @param[in] e elf context
*
* @return section header index
*/
static elfWord elf_shstrndx(const elf_parser_ctx e)
{
const elfFileHeader *efh = elf_file_header(e.elf_file);
if (NULL == efh) {
return UZERO;
}
if (efh->shstrndx == SHN_XINDEX) {
/* get value from link field of first (empty) section */
/* to avoid recursion, don't call elf_section_header(0) */
const elfSectionHeader *esh = elf_section_table(e);
if (NULL == esh) {
return UZERO;
}
return esh->link;
}
return efh->shstrndx;
}
/**
* @brief Get name of string from strtab section
* check elf context and section header not null,
* check from section header for type and size are not null.
* Get strtab section, check that stroffset doesn't wrap
*
* @param[in] e elf context
* @param[in] eshstr pointer to elf Section header
* @param[in] offset offset in integer
* Valid range: 0 to eshstr->size
*
* @return name of string from strtab section "eshstr" at "offset"
*/
static const char *elf_string_at_offset(const elf_parser_ctx e,
const elfSectionHeader *eshstr,
unsigned int offset)
{
const char *strtab;
elfOff stroffset;
if (SHT_STRTAB != eshstr->type) {
return NULL;
}
if (offset >= eshstr->size) {
return NULL;
}
strtab = (const char *)e.elf_file;
stroffset = eshstr->offset + offset;
if (stroffset < eshstr->offset) { // check that stroffset doesn't wrap
return NULL;
}
strtab = &strtab[stroffset];
return strtab;
}
const char *elf_section_name(const elf_parser_ctx e,
const elfSectionHeader *esh)
{
const char *name;
const elfSectionHeader *eshstr;
elfWord shstrndx;
/* get section header string table */
shstrndx = elf_shstrndx(e);
if (shstrndx == UZERO) {
return NULL;
}
eshstr = elf_section_header(e, shstrndx);
if ((NULL == esh) || (NULL == eshstr)) {
return NULL;
}
name = elf_string_at_offset(e, eshstr, esh->name);
return name;
}
const elfSectionHeader *elf_named_section_header(const elf_parser_ctx e,
const char *name)
{
const elfSectionHeader *esh;
unsigned int i;
if (NULL == name) {
return NULL;
}
esh = elf_section_table(e);
if (NULL == esh) {
return NULL;
}
/* iterate through sections till find matching name */
for (i = UZERO; i < elf_shnum(e); ++i) {
const char *secname = elf_section_name(e, esh);
if (NULL != secname) {
size_t seclen = strlen(secname);
// use strncmp to avoid problem with input not being null-terminated,
// but then need to check for false partial match
if ((ZERO == strncmp(secname, name, seclen)) &&
(UZERO == (uint8_t)name[seclen])) {
return esh;
}
}
++esh;
}
return NULL;
}
/**
* @brief Get section header
* Get elf_section_table pointer and check it and
* iterate through sections till find matching type
*
* @param[in] e elf context
* @param[in] type type in word size
*
* @return elf section header with given "type"
*/
static const elfSectionHeader *elf_typed_section_header(const elf_parser_ctx e,
elfWord type)
{
unsigned int i;
const elfSectionHeader *esh = elf_section_table(e);
if (NULL == esh) {
return NULL;
}
/* iterate through sections till find matching type */
for (i = UZERO; i < elf_shnum(e); ++i) {
if (esh->type == type) {
return esh;
}
++esh;
}
return NULL;
}
const elfByte *elf_section_contents(const elf_parser_ctx e,
const elfSectionHeader *esh)
{
const elfByte *p;
if ((NULL == e.elf_file) || (NULL == esh)) {
return NULL;
}
p = (const elfByte *)e.elf_file;
if ((esh->offset > e.size) ||
((uint64_t)((uint64_t)esh->offset + (uint64_t)esh->size) >
e.size)) {
return NULL;
}
return &p[esh->offset];
}
const elfSymbol *elf_symbol(const elf_parser_ctx e, unsigned int index)
{
const elfSectionHeader *esh;
const elfSymbol *esymtab;
const uint8_t *p = e.elf_file;
uint8_t align = 0;
/* get symbol table */
esh = elf_typed_section_header(e, SHT_SYMTAB);
if ((NULL == esh) || (UZERO == esh->entsize)) {
return NULL;
}
if (index >= (esh->size / esh->entsize)) {
return NULL;
}
if (esh->addralign <= (uint8_t)UINT8_MAX) {
align = (uint8_t)esh->addralign;
} else {
return NULL;
}
if ((uint64_t)((uint64_t)esh->size + (uint64_t)esh->offset) > e.size) {
return NULL;
}
p = &p[esh->offset];
esymtab = (const elfSymbol *)uint_8_to_void(p);
if ((align != 0U) && ((((uintptr_t)(esymtab) % align) != UZERO))) {
return NULL;
}
return &esymtab[index];
}
const char *elf_symbol_name(const elf_parser_ctx e, const elfSectionHeader *esh,
unsigned int index)
{
const elfSectionHeader *eshstr;
const elfSymbol *esymtab;
const elfSymbol *esym;
const char *name;
const char *p;
uint8_t align = 0;
if ((NULL == esh) || (UZERO == esh->entsize)) {
return NULL;
}
if (SHT_SYMTAB != esh->type) {
return NULL;
}
if (index >= (esh->size / esh->entsize)) {
return NULL;
}
/* get string table */
eshstr = elf_section_header(e, esh->link);
if (NULL == eshstr) {
return NULL;
}
p = (const char *)e.elf_file;
if (esh->addralign <= (uint8_t)UINT8_MAX) {
align = (uint8_t)esh->addralign;
} else {
return NULL;
}
if (esh->offset > e.size) {
return NULL;
}
p = &p[esh->offset];
esymtab = (const elfSymbol *)(const void *)(p);
if ((align != 0U) && ((((uintptr_t)(esymtab) % align) != UZERO))) {
return NULL;
}
esym = &esymtab[index];
name = elf_string_at_offset(e, eshstr, esym->name);
return name;
}

View File

@@ -0,0 +1,363 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_SILICON_ELF_PARSER_H
#define PVA_KMD_SILICON_ELF_PARSER_H
#include "pva_api.h"
#define ZERO 0
#define UZERO 0U
#define ULLZERO 0ULL
/*
* Define mapping from VPU data, rodata and program sections into
* corresponding segment types.
*/
typedef const void *elf_ct; /* points to const image of elf file */
/**
* Struct containing the ELF Buffer and size of the buffer.
*/
typedef struct {
/** Pointer to buffer containing ELF File */
elf_ct elf_file;
/** Size of the buffer containing ELF File */
uint64_t size;
} elf_parser_ctx;
/*--------------------------------- Types ----------------------------------*/
/** unsinged 8-bit data type */
typedef uint8_t elfByte;
/** unsinged 16-bit data type */
typedef uint16_t elfHalf;
/** unsinged 32-bit data type */
typedef uint32_t elfWord;
/** unsinged 32-bit data type */
typedef uint32_t elfAddr;
/** unsinged 32-bit data type */
typedef uint32_t elfOff;
/**
* @brief ELF File Header
*
*/
typedef struct {
/** ELF magic number : 0x7f,0x45,0x4c,0x46 */
elfWord magic;
/** Object file class */
elfByte oclass;
/** Data encoding */
elfByte data;
/** Object format version */
elfByte formatVersion;
/** OS application binary interface */
elfByte abi;
/** Version of abi */
elfByte abiVersion;
/** Elf ident padding */
elfByte padd[7];
/** Object file type */
elfHalf type;
/** Architecture */
elfHalf machine;
/** Object file version */
elfWord version;
/** Entry point virtual address */
elfAddr entry;
/** Program header table file offset */
elfOff phoff;
/** Section header table file offset */
elfOff shoff;
/** Processor-specific flags */
elfWord flags;
/** ELF header size in bytes */
elfHalf ehsize;
/** Program header table entry size */
elfHalf phentsize;
/** Program header table entry count */
elfHalf phnum;
/** Section header table entry size */
elfHalf shentsize;
/** Section header table entry count */
elfHalf shnum;
/** Section header string table index */
elfHalf shstrndx;
} elfFileHeader;
/** ELF magic number in big endian */
#define ELFMAGIC 0x7f454c46U
#define ELFMAGIC_LSB 0x464c457fU // ELF magic number in little endian
#define ELFCLASS32 1U // 32 bit object file
#define EV_NONE 0 // Invalid version
#define EV_CURRENT 1 // Current version
/**
* @brief ELF Section Header
*
*/
typedef struct {
/** Section name, string table index */
elfWord name;
/** Type of section */
elfWord type;
/** Miscellaneous section attributes */
elfWord flags;
/** Section virtual addr at execution */
elfAddr addr;
/** Section file offset */
elfOff offset;
/** Size of section in bytes */
elfWord size;
/** Index of another section */
elfWord link;
/** Additional section information */
elfWord info;
/** Section alignment */
elfWord addralign;
/** Entry size if section holds table */
elfWord entsize;
} elfSectionHeader;
/*
* Section Header Type
*/
#define SHT_NULL 0x00U /// NULL section (entry unused)
#define SHT_PROGBITS 0x01U /// Loadable program data
#define SHT_SYMTAB 0x02U /// Symbol table
#define SHT_STRTAB 0x03U /// String table
#define SHT_RELA 0x04U /// Relocation table with addents
#define SHT_HASH 0x05U /// Hash table
#define SHT_DYNAMIC 0x06U /// Information for dynamic linking
#define SHT_NOTE 0x07U /// Information that marks file
#define SHT_NOBITS 0x08U /// Section does not have data in file
#define SHT_REL 0x09U /// Relocation table without addents
#define SHT_SHLIB 0x0aU /// Reserved
#define SHT_DYNSYM 0x0bU /// Dynamic linker symbol table
#define SHT_INIT_ARRAY 0x0eU /// Array of pointers to init funcs
#define SHT_FINI_ARRAY 0x0fU /// Array of function to finish funcs
#define SHT_PREINIT_ARRAY 0x10U /// Array of pointers to pre-init functions
#define SHT_GROUP 0x11U /// Section group
#define SHT_SYMTAB_SHNDX 0x12U /// Table of 32bit symtab shndx
#define SHT_LOOS 0x60000000U /// Start OS-specific.
#define SHT_HIOS 0x6fffffffU /// End OS-specific type
#define SHT_LOPROC 0x70000000U /// Start of processor-specific
#define SHT_HIPROC 0x7fffffffU /// End of processor-specific
#define SHT_LOUSER 0x80000000U /// Start of application-specific
#define SHT_HIUSER 0x8fffffffU /// End of application-specific
/*
* Special section index
*/
#define SHN_UNDEF 0U // Undefined section
#define SHN_LORESERVE 0xff00U // lower bound of reserved indexes
#define SHN_ABS 0xfff1U // Associated symbol is absolute
#define SHN_COMMON 0xfff2U // Associated symbol is common
#define SHN_XINDEX 0xffffU // Index is in symtab_shndx
/*
* Special section names
*/
#define SHNAME_SHSTRTAB ".shstrtab" /// section string table
#define SHNAME_STRTAB ".strtab" /// string table
#define SHNAME_SYMTAB ".symtab" /// symbol table
#define SHNAME_SYMTAB_SHNDX ".symtab_shndx" /// symbol table shndx array
#define SHNAME_TEXT ".text." /// suffix with entry name
/**
* @brief Symbol's information
*
*/
typedef struct {
/** Symbol name, index in string tbl */
elfWord name;
/** Value of the symbol */
elfAddr value;
/** Associated symbol size */
elfWord size;
/** Type and binding attributes */
elfByte info;
/** Extra flags */
elfByte other;
/** Associated section index */
elfHalf shndx;
} elfSymbol;
/** Get the \a binding info of the symbol */
#define ELF_ST_BIND(s) ((elfWord)((s)->info) >> 4)
/** Get the \a type info of the symbol */
#define ELF_ST_TYPE(s) ((elfWord)((s)->info) & 0xfU)
/*
* ELF symbol type
*/
#define STT_NOTYPE 0U // No type known
#define STT_OBJECT 1U // Data symbol
#define STT_FUNC 2U // Code symbol
#define STT_SECTION 3U // Section
#define STT_FILE 4U // File
#define STT_COMMON 5U // Common symbol
#define STT_LOOS 10U // Start of OS-specific
/*
* ELF symbol scope (binding)
*/
#define STB_LOCAL 0U /// Symbol not visible outside object
#define STB_GLOBAL 1U /// Symbol visible outside object
#define STB_WEAK 2U /// Weak symbol
/*
* The following routines that return file/program/section headers
* all return NULL when not found.
*/
/*
* Typical elf readers create a table of information that is passed
* to the different routines. For simplicity, we're going to just
* keep the image of the whole file and pass that around. Later, if we see
* a need to speed this up, we could consider changing elf_parser_ctx to be something
* more complicated.
*/
/**
* @brief Checks if the file stored in \a e is a 32-bit elf file
* and if the first 4 bytes contain elf magic ID.
*
* @param[in] e elf context containing complete ELF in a const buffer
*
* @return
* - TRUE if valid 32-bit elf file and correct elf magic ID present
* in first 4 bytes of elf file
* - FALSE if either of the above condition is not met
*/
bool elf_header_check(const elf_ct e);
/**
* @brief Provide number of sections in sections header table
* get elf_file_header and check it's not null,
* get value from size of first (empty) section
* if esh is NULL, section_size will return zero
*
* @param[in] e elf context containing complete ELF in a const buffer
*
* @return section header number
*/
elfWord elf_shnum(const elf_parser_ctx e);
/**
* @brief This function checks all sections in the elf to be valid
*
* The function validates all sections as follows:
* - Valid section offset i.e. within file bounds.
* - Valid section size i.e. non-zero section size
* and offset + section size is within file bounds
*
* @param[in]e elf context containing completeELF in a const buffer
*
* @return
* - TRUE if all sections are valid
* - FALSE if any invalid section found
*/
bool elf_has_valid_sections(const elf_parser_ctx e);
/**
* @brief This function traverses the elf and
* returns a valid \ref elfSectionHeader if present
* at the index provided
*
* @param[in] e elf context containing complete ELF in a const buffer
* @param[in] index The index of the elfSectionHeader that is requested
* Valid range : 0 to elf_shnum(e)
*
* @return
* - valid elfSectionHeader from elf if index is valid and if sectionHeader is present
* - NULL if invalid or out of bounds index
*/
const elfSectionHeader *elf_section_header(const elf_parser_ctx e,
unsigned int index);
/**
* @brief This function obtains the name of the \ref elfSectionHeader
* by going to the index specified by elfSectionHeader->name in the string table
* of the elf
*
* @param[in] e elf context
*
* @param[in] esh Valid \ref elfSectionHeader whose name is requested
*
* @return
* - Non NULL character array containing name of the elfSectionHeader
* if found in elf String Table
* - NULL if invalid elfSectionHeader or invalid index in elfSectionHeader->name
* going out of bounds of string table of elf
*/
const char *elf_section_name(const elf_parser_ctx e,
const elfSectionHeader *esh);
/**
* @brief Provide elf section header with given "name".
* check elf context not a null, get elf_section_table and
* then iterate through sections till find matching name
*
* @param[in] e elf context
* @param[in] name name of section
*
* @return
* - elf section header with given "name"
* - NULL if @a name is NULL or invalid elfSectionHeader is found
*/
const elfSectionHeader *elf_named_section_header(const elf_parser_ctx e,
const char *name);
/**
* @brief Provide contents of section.
* check elf context and section header not a null,
* return byte pointer of section header offset of elf context
* @param[in] e elf context
* @param[in] esh section header
*
i* @return Bytepointer of elf (NULL if e or esh == NULL)
*/
const elfByte *elf_section_contents(const elf_parser_ctx e,
const elfSectionHeader *esh);
/**
* @brief Get ELF symbol
* get elf_typed_section_header section header,
* check header or it's entsize not null.
* check index is not crossing section header & table size
* Also make sure it is address aligned and get symbol table.
*
* @param[in] e elf context
* @param[in] index unsigned index
* Valid range: 0 to number of entries in SHT_SYMTAB of e
*
* @return elf symbol at given index (NULL if not found).
*/
const elfSymbol *elf_symbol(const elf_parser_ctx e, unsigned int index);
/**
* @brief Get symbol table section
* check section header or it's entsize not null.
* check index is not crossing section header & table size
* get elf_section_header and Also make sure it is address
* aligned and get symbol table.
*
* @param[in] e elf context
* @param[in] esh pointer to structure elfSectionHeader
* @param[in] index unsigned index
* Valid range: 0 to number of entries in SHT_SYMTAB of e
*
* @return name of symbol from symtab section "esh" at "index".
*/
const char *elf_symbol_name(const elf_parser_ctx e, const elfSectionHeader *esh,
unsigned int index);
#endif // PVA_KMD_SILICON_ELF_PARSER_H

View File

@@ -0,0 +1,920 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_executable.h"
#include "pva_kmd_silicon_elf_parser.h"
#include "pva_kmd_utils.h"
#include "pva_resource.h"
#include "pva_kmd_device.h"
#include "pva_api_types.h"
#include "pva_kmd_t23x.h"
#include "pva_kmd_t26x.h"
#include "pva_math_utils.h"
/**
* enum to identify different segments of VPU ELF
*/
enum pva_elf_seg_type {
/** Code segment in VPU ELF */
PVA_SEG_VPU_CODE = 0U,
/** DATA segment in VPU ELF */
PVA_SEG_VPU_DATA,
/** DATA segment in VPU ELF containing symbol information*/
PVA_SEG_VPU_IN_PARAMS,
/** Not a valid segment in VPU ELF */
PVA_SEG_VPU_MAX_TYPE
};
/** Maximum number of characters in symbol name */
#define ELF_MAXIMUM_SYMBOL_LENGTH 64U
/** Maximum number of characters in section name */
#define ELF_MAXIMUM_SECTION_NAME 64
/** Section name of EXPORTS section */
#define ELF_EXPORTS_SECTION "EXPORTS"
/** Section name of EXPORTS section name length */
#define ELF_EXPORTS_SECTION_NAME_LENGTH 7
/** Alignment needed for Data section of ELFs */
#define DATA_SECTION_ALIGNMENT 32U
/** Alignment needed for Text section of ELFs */
#define TEXT_SECTION_ALIGNMENT 128U
/** VPU icache size: 16KB */
#define VPU_ICACHE_SIZE (16U * 1024U)
/** This value indicates the that current symbol can be ignored in the VPU ELF */
#define SYM_IGNORE 1
#define SIZE_EXPORTS_TABLE_ENTRY (3U * sizeof(uint32_t))
static uint32_t change_byte_order(uint32_t word)
{
uint32_t out_word = 0U;
out_word = PVA_INSERT(PVA_EXTRACT(word, 31, 24, uint32_t), 7, 0);
out_word |= PVA_INSERT(PVA_EXTRACT(word, 23, 16, uint32_t), 15, 8);
out_word |= PVA_INSERT(PVA_EXTRACT(word, 15, 8, uint32_t), 23, 16);
out_word |= PVA_INSERT(PVA_EXTRACT(word, 7, 0, uint32_t), 31, 24);
return out_word;
}
/*
* Define mapping from VPU data, rodata and program sections into
* corresponding segment types.
*/
static const struct pack_rule {
const char *elf_sec_name;
int32_t pva_type;
} pack_rules[] = { {
.elf_sec_name = ".data",
.pva_type = (int32_t)PVA_SEG_VPU_DATA,
},
{
.elf_sec_name = ".rodata",
.pva_type = (int32_t)PVA_SEG_VPU_DATA,
},
{
.elf_sec_name = ".text",
.pva_type = (int32_t)PVA_SEG_VPU_CODE,
} };
/**
* \brief Compares the \a section_name with all
* vpu elf section names until it finds a match and
* then return corresponding segment type.
* If the segment type is \ref PVA_SEG_VPU_DATA, then it further
* checks if its PVA_SEG_VPU_IN_PARAMS.
* \param[in] section_name Name of the section to be searched for, in VPU ELF
* \return returns corresponding value from enum pva_elf_seg_type.
*/
static int32_t find_pva_ucode_segment_type(const char *section_name)
{
uint32_t i;
int32_t ret = (int32_t)PVA_SEG_VPU_MAX_TYPE;
for (i = 0; i < PVA_ARRAY_SIZE(pack_rules); i += 1U) {
/* Ignore the suffix of the section name */
if (strncmp(section_name, pack_rules[i].elf_sec_name,
strlen(pack_rules[i].elf_sec_name)) == 0) {
ret = pack_rules[i].pva_type;
break;
}
}
if (ret == (int32_t)PVA_SEG_VPU_DATA) {
uint64_t section_name_len =
strnlen(section_name, ELF_MAXIMUM_SECTION_NAME);
uint64_t exports_section_name_len =
ELF_EXPORTS_SECTION_NAME_LENGTH;
// Check Export section present in DATA segment. Only support export sections.
if ((section_name_len >= exports_section_name_len) &&
(strncmp((section_name +
(section_name_len - exports_section_name_len)),
ELF_EXPORTS_SECTION,
(size_t)exports_section_name_len)) == 0) {
ret = (int32_t)PVA_SEG_VPU_IN_PARAMS;
}
}
return ret;
}
static enum pva_error validate_elf(const elf_parser_ctx elf)
{
enum pva_error err = PVA_SUCCESS;
if (!elf_header_check(elf.elf_file)) {
pva_kmd_log_err("Invalid 32 bit VPU ELF");
err = PVA_INVAL;
goto done;
}
if (!elf_has_valid_sections(elf)) {
pva_kmd_log_err("ELF has invalid sections");
err = PVA_INVAL;
}
done:
return err;
}
static int32_t validate_symbol(elf_parser_ctx elf, uint32_t symbol_entry_id,
const elfSymbol **sym)
{
const elfSectionHeader *sym_scn;
const char *section_name = NULL;
int32_t section_type = (int32_t)PVA_SEG_VPU_MAX_TYPE;
int32_t err = 0;
*sym = elf_symbol(elf, symbol_entry_id);
if ((*sym == NULL) || ((*sym)->size == 0U) ||
(ELF_ST_BIND(*sym) != STB_GLOBAL) ||
(ELF_ST_TYPE(*sym) == STT_FUNC)) {
err = SYM_IGNORE;
goto end;
}
sym_scn = elf_section_header(elf, (*sym)->shndx);
section_name = elf_section_name(elf, sym_scn);
if (section_name == NULL) {
err = SYM_IGNORE;
goto end;
}
section_type = find_pva_ucode_segment_type(section_name);
if (section_type != (int32_t)PVA_SEG_VPU_IN_PARAMS) {
err = SYM_IGNORE;
goto end;
}
err = 0;
end:
if (err != 0) {
*sym = NULL;
}
return err;
}
static enum pva_error count_symbols(const elf_parser_ctx elf,
uint32_t *out_num_symbols)
{
enum pva_error err = PVA_SUCCESS;
pva_math_error math_err = MATH_OP_SUCCESS;
const elfSectionHeader *section_header;
uint32_t i, ent_count;
const elfSymbol *sym;
int32_t ret;
uint32_t num_symbols = 0;
section_header = elf_named_section_header(elf, ".symtab");
if (section_header == NULL) {
err = PVA_INVAL;
pva_kmd_log_err("No symbol table found");
goto done;
}
ent_count = section_header->size / section_header->entsize;
for (i = 0; i < ent_count; i++) {
ret = validate_symbol(elf, i, &sym);
if (ret < 0) {
err = PVA_INVAL;
pva_kmd_log_err("Validation of symbol failed");
goto done;
}
if (ret == SYM_IGNORE) {
continue;
}
num_symbols = addu32(num_symbols, 1U, &math_err);
}
if (math_err != MATH_OP_SUCCESS) {
err = PVA_ERR_MATH_OP;
pva_kmd_log_err("count_symbols math error");
goto done;
}
*out_num_symbols = num_symbols;
done:
return err;
}
/**
* @brief updates symbol information (type, addr and size) from
* VPU ELF PVA_SEG_VPU_IN_PARAMS segment.
*
* Data about symbol information in EXPORTS section of ELF is present as follows.
* typedef struct {
* uint32_t type; From VMEM_TYPE enums
* uint32_t addr_offset; Offset from VMEM base
* uint32_t size; Size of VMEM region in bytes
* };
* @param[in] elf pointer to const image of elf file.
* @param[in] section_header pointer to VPU ELF PVA_SEG_VPU_IN_PARAMS section header
* @param[in, out] symbol_info pointer to ELF image symbol which needs to be updated.
*/
static enum pva_error
update_exports_symbol(elf_parser_ctx elf,
const elfSectionHeader *section_header,
struct pva_symbol_info *symbol_info)
{
const elfByte *data;
uint32_t symOffset = 0U;
enum pva_error err = PVA_SUCCESS;
pva_math_error math_err = MATH_OP_SUCCESS;
if ((section_header == NULL) ||
(symbol_info->vmem_addr < section_header->addr) ||
(addu32(symbol_info->vmem_addr, (uint32_t)SIZE_EXPORTS_TABLE_ENTRY,
&math_err) >
addu32(section_header->addr, section_header->size, &math_err))) {
err = PVA_INVAL;
goto done;
} else {
symOffset = subu32(symbol_info->vmem_addr, section_header->addr,
&math_err);
}
data = elf_section_contents(elf, section_header);
if (data == NULL) {
pva_kmd_log_err("Export section in ELF is NULL");
err = PVA_INVAL;
goto done;
}
symbol_info->symbol_type = *(uint8_t *)((uintptr_t)&data[symOffset]);
if ((symbol_info->symbol_type == (uint8_t)PVA_SYM_TYPE_INVALID) ||
(symbol_info->symbol_type >= (uint8_t)PVA_SYM_TYPE_MAX)) {
pva_kmd_log_err("Invalid symbol type found");
err = PVA_INVAL;
goto done;
}
symbol_info->vmem_addr =
*(uint32_t *)((uintptr_t)&data[symOffset + sizeof(uint32_t)]);
symbol_info->size = *(uint32_t *)((
uintptr_t)&data[symOffset + (2UL * sizeof(uint32_t))]);
if (math_err != MATH_OP_SUCCESS) {
pva_kmd_log_err("update_exports_symbol math error");
err = PVA_ERR_MATH_OP;
goto done;
}
done:
return err;
}
static bool validate_vmem_offset(const uint32_t vmem_offset,
const uint32_t size,
const uint8_t vmem_region_count,
const struct vmem_region *vmem_regions_tab)
{
bool valid = false;
uint32_t i = 0U;
uint32_t prev_idx;
pva_math_error math_err = MATH_OP_SUCCESS;
for (i = vmem_region_count; i > 0U; i--) {
prev_idx = subu32(i, 1U, &math_err);
if (vmem_offset >= vmem_regions_tab[prev_idx].start) {
break;
}
}
if ((i > 0U) && (addu32(vmem_offset, size, &math_err) <=
vmem_regions_tab[prev_idx].end)) {
valid = true;
}
return (math_err != MATH_OP_SUCCESS) ? false : valid;
}
static enum pva_error copy_symbol(elf_parser_ctx elf, const elfSymbol *sym,
const char *symname,
struct pva_symbol_info *symbol_info,
const uint8_t vmem_region_count,
const struct vmem_region *vmem_regions_tab)
{
const elfSectionHeader *sym_scn;
enum pva_error err = PVA_SUCCESS;
size_t symname_len = strnlen(symname, PVA_MAX_SYMBOL_NAME_LEN);
if (symname_len > 0U) {
(void)memcpy(symbol_info->name, symname, symname_len);
}
symbol_info->name[PVA_MAX_SYMBOL_NAME_LEN] = '\0';
symbol_info->size = sym->size;
symbol_info->vmem_addr = sym->value;
sym_scn = elf_section_header(elf, sym->shndx);
err = update_exports_symbol(elf, sym_scn, symbol_info);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Updating symbol from EXPORTS table failed");
goto out;
}
if (!validate_vmem_offset(symbol_info->vmem_addr, symbol_info->size,
vmem_region_count, vmem_regions_tab)) {
pva_kmd_log_err("Invalid symbol vmem offset in ELF");
err = PVA_INVAL;
goto out;
}
out:
return err;
}
static enum pva_error
fill_symbol_table(const elf_parser_ctx elf,
struct pva_kmd_exec_symbol_table *sym_table,
const uint8_t vmem_region_count,
const struct vmem_region *vmem_regions_tab)
{
enum pva_error err = PVA_SUCCESS;
pva_math_error math_err = MATH_OP_SUCCESS;
const elfSectionHeader *section_header;
uint32_t i, ent_count;
const elfSymbol *sym;
const char *symname;
int32_t ret;
uint32_t export_sym_idx = 0;
section_header = elf_named_section_header(elf, ".symtab");
if (section_header == NULL) {
err = PVA_INVAL;
pva_kmd_log_err("No symbol table found");
goto done;
}
ent_count = section_header->size / section_header->entsize;
for (i = 0; i < ent_count; i++) {
struct pva_symbol_info *symbol_info;
ret = validate_symbol(elf, i, &sym);
if (ret < 0) {
err = PVA_INVAL;
pva_kmd_log_err("Validation of symbol failed");
goto done;
}
if (ret == SYM_IGNORE) {
continue;
}
symbol_info = &sym_table->symbols[export_sym_idx];
ASSERT(symbol_info != NULL);
symname = elf_symbol_name(elf, section_header, i);
if (symname == NULL) {
err = PVA_INVAL;
pva_kmd_log_err("elf_symbol_name failed");
goto done;
}
err = copy_symbol(elf, sym, symname, symbol_info,
vmem_region_count, vmem_regions_tab);
if (err != PVA_SUCCESS) {
goto done;
}
symbol_info->symbol_id =
addu32(export_sym_idx, PVA_SYMBOL_ID_BASE, &math_err);
export_sym_idx = addu32(export_sym_idx, 1U, &math_err);
if (math_err != MATH_OP_SUCCESS) {
err = PVA_ERR_MATH_OP;
pva_kmd_log_err("fill_symbol_table math error");
goto done;
}
}
done:
return err;
}
/**
* The simplify caller's life: the input ptr should always be considered freed
* after this call. The returned new ptr should always be considered a new
* allocation and it needs to be freed if not NULL.
*/
static void *pva_realloc(void *ptr, uint32_t old_size, uint32_t new_size)
{
void *new_buffer;
if (ptr == NULL) {
return pva_kmd_zalloc(new_size);
}
if (new_size <= old_size) {
return ptr;
}
new_buffer = pva_kmd_zalloc(new_size);
if (new_buffer == NULL) {
goto out;
}
memcpy(new_buffer, ptr, old_size);
out:
pva_kmd_free(ptr);
return new_buffer;
}
static void *copy_text_section(const elf_parser_ctx elf,
const elfSectionHeader *section_header,
void *out_buffer, uint32_t *buffer_size)
{
const elfByte *elf_data;
uint32_t const *word;
uint32_t *dst_word;
uint32_t wi;
/* The load address in section header is in words (uint32_t) */
uint32_t load_addr_bytes =
safe_mulu32(section_header->addr, (uint32_t)sizeof(uint32_t));
uint32_t needed_size =
safe_addu32(load_addr_bytes, section_header->size);
// Align required text section size
needed_size =
safe_pow2_roundup_u32(needed_size, TEXT_SECTION_ALIGNMENT);
if (needed_size > *buffer_size) {
out_buffer = pva_realloc(out_buffer, *buffer_size, needed_size);
*buffer_size = needed_size;
}
if (out_buffer == NULL) {
return NULL;
}
elf_data = elf_section_contents(elf, section_header);
if (elf_data == NULL) {
pva_kmd_log_err("copy_text_section elf_data error");
return NULL;
}
word = (uint32_t const *)elf_data;
dst_word = (uint32_t *)((uintptr_t)out_buffer + load_addr_bytes);
for (wi = 0; wi < (section_header->size / sizeof(uint32_t)); wi++) {
dst_word[wi] = change_byte_order(word[wi]);
}
return out_buffer;
}
/**
* @brief Aggregate all text sections into a single, dynamically
* allocated buffer.
*
* The placement of text sections needs to take into account of the loading
* addresses.
*
* The endianness of text section needs to be changed.
*
* Caller is responsible for freeing the returned buffer.
*/
static void *aggregate_text_sections(const elf_parser_ctx elf,
uint32_t *out_size)
{
const elfSectionHeader *section_header;
uint32_t index = 0;
const char *section_name;
const elfWord sectionCount = elf_shnum(elf);
void *sections_content = NULL;
uint32_t sections_size = 0;
for (index = 0; index < sectionCount; index++) {
int32_t segment_type;
section_header = elf_section_header(elf, index);
if (section_header == NULL) {
pva_kmd_log_err(
"aggregate_text_sections elf_section_header error");
goto out;
}
section_name = elf_section_name(elf, section_header);
if (section_name == NULL) {
pva_kmd_log_err(
"aggregate_text_sections elf_section_name error");
goto out;
}
segment_type = find_pva_ucode_segment_type(section_name);
if ((section_header->type == SHT_PROGBITS) &&
(segment_type == (int32_t)PVA_SEG_VPU_CODE)) {
sections_content =
copy_text_section(elf, section_header,
sections_content,
&sections_size);
if (sections_content == NULL) {
pva_kmd_log_err(
"aggregate_text_sections copy_text_section error");
goto out;
}
}
}
out:
*out_size = sections_size;
return sections_content;
}
static void copy_data_section(const elf_parser_ctx elf,
const elfSectionHeader *section_header,
void *out_buffer, uint32_t *buffer_offset,
uint32_t buffer_size)
{
const elfByte *elf_data;
void *dst;
uint32_t aligned_size = safe_pow2_roundup_u32(section_header->size,
DATA_SECTION_ALIGNMENT);
uint32_t size = safe_addu32(*buffer_offset, aligned_size);
ASSERT(size <= buffer_size);
dst = pva_offset_pointer(out_buffer, *buffer_offset);
elf_data = elf_section_contents(elf, section_header);
ASSERT(elf_data != NULL);
memcpy(dst, elf_data, section_header->size);
*buffer_offset = safe_addu32(*buffer_offset, aligned_size);
}
static enum pva_error count_data_sections(const elf_parser_ctx elf,
uint32_t *out_n_data_sections,
uint32_t *out_total_size)
{
const elfSectionHeader *section_header;
uint32_t index = 0;
const char *section_name;
const elfWord sectionCount = elf_shnum(elf);
uint32_t n_data_sections = 0;
uint32_t total_size = 0;
enum pva_error err = PVA_SUCCESS;
pva_math_error math_err = MATH_OP_SUCCESS;
for (index = 0; index < sectionCount; index++) {
int32_t segment_type;
section_header = elf_section_header(elf, index);
if (section_header == NULL) {
err = PVA_INVAL;
goto out;
}
section_name = elf_section_name(elf, section_header);
if (section_name == NULL) {
err = PVA_INVAL;
goto out;
}
segment_type = find_pva_ucode_segment_type(section_name);
if ((section_header->type == SHT_PROGBITS) &&
(segment_type == (int32_t)PVA_SEG_VPU_DATA)) {
n_data_sections =
addu32(n_data_sections, 1U, &math_err);
total_size += safe_pow2_roundup_u32(
section_header->size, DATA_SECTION_ALIGNMENT);
}
}
if (math_err != MATH_OP_SUCCESS) {
err = PVA_ERR_MATH_OP;
pva_kmd_log_err("count_data_sections math error");
goto out;
}
*out_n_data_sections = n_data_sections;
*out_total_size = total_size;
out:
return err;
}
/**
* @brief Aggregate all data sections into a single, dynamically
* allocated buffer.
*
* The offset of each data section must be aligned to DATA_SEGMENT_ALIGNMENT.
*
* The caller must free the returned data buffer and out_section_infos.
*
*/
static void *
aggregate_data_sections(const elf_parser_ctx elf, uint32_t n_data_sections,
uint32_t total_sections_size,
struct pva_fw_data_section_info **out_section_infos)
{
const elfSectionHeader *section_header;
uint32_t index = 0;
const char *section_name;
const elfWord sectionCount = elf_shnum(elf);
void *sections_content = NULL;
struct pva_fw_data_section_info *section_infos;
uint32_t buffer_offset = 0;
uint32_t sec_idx = 0;
sections_content = pva_kmd_zalloc(total_sections_size);
if (sections_content == NULL) {
goto err_out;
}
section_infos =
pva_kmd_zalloc(sizeof(*section_infos) * n_data_sections);
if (section_infos == NULL) {
goto free_content;
}
for (index = 0; index < sectionCount; index++) {
int32_t segment_type;
section_header = elf_section_header(elf, index);
/* Already checked when count data sections */
ASSERT(section_header != NULL);
section_name = elf_section_name(elf, section_header);
ASSERT(section_name != NULL);
segment_type = find_pva_ucode_segment_type(section_name);
if ((section_header->type == SHT_PROGBITS) &&
(segment_type == (int32_t)PVA_SEG_VPU_DATA)) {
section_infos[sec_idx].data_buf_off = buffer_offset;
section_infos[sec_idx].vmem_addr = section_header->addr;
section_infos[sec_idx].size = section_header->size;
sec_idx = safe_addu32(sec_idx, 1U);
copy_data_section(elf, section_header, sections_content,
&buffer_offset, total_sections_size);
}
}
*out_section_infos = section_infos;
return sections_content;
free_content:
pva_kmd_free(sections_content);
err_out:
return NULL;
}
/**
* @brief layout text and data sections in a single continuous buffer that is
* mapped to PVA IOVA space (user SID).
*
* We need to pad text size by an entire VPU icache size to avoid SMMU fault
* when prefetching.
*/
static struct pva_kmd_device_memory *
load_sections(struct pva_kmd_device *pva, uint8_t smmu_id,
const void *text_section_buf, uint32_t text_size,
const void *data_section_buf, uint32_t data_size,
uint32_t *out_data_begin_offset)
{
uint32_t size = safe_addu32(text_size, (uint32_t)VPU_ICACHE_SIZE);
uint32_t alloc_size = safe_addu32(size, data_size);
uint32_t data_begin = safe_addu32(text_size, (uint32_t)VPU_ICACHE_SIZE);
struct pva_kmd_device_memory *dev_mem;
ASSERT(TEXT_SECTION_ALIGNMENT >= DATA_SECTION_ALIGNMENT);
/* This is guaranteed to be true as TEXT_SECTION_ALIGNMENT is more strict */
ASSERT(data_begin % DATA_SECTION_ALIGNMENT == 0);
/* Map it as read-only. TODO: when VPU debugger is supported, we may
* need to map text as READ_WRITE conditionally. */
dev_mem = pva_kmd_device_memory_alloc_map(alloc_size, pva,
PVA_ACCESS_RO, smmu_id);
if (dev_mem == NULL) {
goto out;
}
memcpy(dev_mem->va, text_section_buf, text_size);
memcpy(pva_offset_pointer(dev_mem->va, data_begin), data_section_buf,
data_size);
*out_data_begin_offset = data_begin;
out:
return dev_mem;
}
static struct pva_kmd_device_memory *
load_metainfo(struct pva_kmd_device *pva, uint64_t section_iova,
uint32_t text_size, uint32_t data_begin_off, uint32_t data_size,
struct pva_fw_data_section_info const *section_infos,
uint32_t n_data_sections, struct pva_symbol_info *symbol_table,
uint32_t n_symbols)
{
struct pva_kmd_device_memory *dev_mem;
struct pva_exec_bin_resource *metainfo;
struct pva_fw_vmem_buffer *vmem_buffers_mem;
struct pva_fw_data_section_info *data_sections_mem;
uint32_t i;
uint32_t alloc_size = (uint32_t)sizeof(struct pva_exec_bin_resource);
pva_math_error math_err = MATH_OP_SUCCESS;
alloc_size =
addu32(alloc_size,
mulu32(n_data_sections,
(uint32_t)sizeof(struct pva_fw_data_section_info),
&math_err),
&math_err);
alloc_size = addu32(alloc_size,
mulu32(n_symbols,
(uint32_t)sizeof(struct pva_fw_vmem_buffer),
&math_err),
&math_err);
dev_mem = pva_kmd_device_memory_alloc_map(
alloc_size, pva, PVA_ACCESS_RO, PVA_R5_SMMU_CONTEXT_ID);
if (dev_mem == NULL) {
goto out;
}
metainfo = dev_mem->va;
metainfo->code_addr_hi = iova_hi(section_iova);
metainfo->code_addr_lo = iova_lo(section_iova);
metainfo->code_size = text_size;
metainfo->data_section_addr_hi =
iova_hi(addu64(section_iova, data_begin_off, &math_err));
metainfo->data_section_addr_lo =
iova_lo(addu64(section_iova, data_begin_off, &math_err));
metainfo->num_data_sections = n_data_sections;
metainfo->num_vmem_buffers = n_symbols;
data_sections_mem = pva_offset_pointer(metainfo, sizeof(*metainfo));
memcpy(data_sections_mem, section_infos,
mulu32(n_data_sections, (uint32_t)sizeof(*section_infos),
&math_err));
vmem_buffers_mem = pva_offset_pointer(
data_sections_mem,
mulu32(n_data_sections, (uint32_t)sizeof(*section_infos),
&math_err));
if (math_err != MATH_OP_SUCCESS) {
dev_mem = NULL;
goto out;
}
for (i = 0; i < n_symbols; i++) {
vmem_buffers_mem[i].addr =
PVA_INSERT(symbol_table[i].vmem_addr,
PVA_FW_VMEM_ADDR_MSB, PVA_FW_VMEM_ADDR_LSB) |
PVA_INSERT((uint32_t)symbol_table[i].symbol_type,
PVA_FW_SYM_TYPE_MSB, PVA_FW_SYM_TYPE_LSB);
vmem_buffers_mem[i].size = symbol_table[i].size;
}
out:
return dev_mem;
}
enum pva_error
pva_kmd_load_executable(void *executable_data, uint32_t executable_size,
struct pva_kmd_device *pva, uint8_t dma_smmu_id,
struct pva_kmd_exec_symbol_table *out_symbol_table,
struct pva_kmd_device_memory **out_metainfo,
struct pva_kmd_device_memory **out_sections)
{
enum pva_error err = PVA_SUCCESS;
pva_math_error math_err = MATH_OP_SUCCESS;
elf_parser_ctx elf = { 0 };
uint32_t num_symbols = 0;
uint32_t n_data_sections;
uint32_t total_data_section_size = 0;
struct pva_fw_data_section_info *section_infos = NULL;
void *data_section_buf = NULL;
void *text_section_buf = NULL;
uint32_t total_text_section_size = 0;
struct pva_kmd_device_memory *metainfo_mem = NULL;
struct pva_kmd_device_memory *sections_mem = NULL;
uint32_t data_begin_off;
elf.elf_file = executable_data;
elf.size = executable_size;
err = validate_elf(elf);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = count_symbols(elf, &num_symbols);
if (err != PVA_SUCCESS) {
goto err_out;
}
out_symbol_table->n_symbols = num_symbols;
if (num_symbols > 0) {
out_symbol_table->symbols = pva_kmd_zalloc(
mulu32((uint32_t)sizeof(struct pva_symbol_info),
num_symbols, &math_err));
if (out_symbol_table->symbols == NULL) {
err = PVA_NOMEM;
goto err_out;
}
if (math_err != MATH_OP_SUCCESS) {
err = PVA_ERR_MATH_OP;
pva_kmd_log_err("pva_kmd_load_executable math error");
goto err_out;
}
}
err = fill_symbol_table(elf, out_symbol_table,
pva->hw_consts.n_vmem_regions,
pva->vmem_regions_tab);
if (err != PVA_SUCCESS) {
goto free_syms;
}
text_section_buf =
aggregate_text_sections(elf, &total_text_section_size);
/* Must have text sections */
if (text_section_buf == NULL) {
pva_kmd_log_err(
"pva_kmd_load_executable aggregate_text_sections error");
goto free_syms;
}
err = count_data_sections(elf, &n_data_sections,
&total_data_section_size);
if (err != PVA_SUCCESS) {
goto free_text_buf;
}
/* It's OK to not have data sections */
if (total_data_section_size != 0) {
data_section_buf =
aggregate_data_sections(elf, n_data_sections,
total_data_section_size,
&section_infos);
ASSERT(data_section_buf != NULL);
}
sections_mem = load_sections(pva, dma_smmu_id, text_section_buf,
total_text_section_size, data_section_buf,
total_data_section_size, &data_begin_off);
if (sections_mem == NULL) {
goto free_data_buf;
}
metainfo_mem =
load_metainfo(pva, sections_mem->iova, total_text_section_size,
data_begin_off, total_data_section_size,
section_infos, n_data_sections,
out_symbol_table->symbols, num_symbols);
if (metainfo_mem == NULL) {
goto free_sec_mem;
}
/* Success. Now clean up temporary allocations */
if (data_section_buf != NULL) {
pva_kmd_free(data_section_buf);
}
if (section_infos != NULL) {
pva_kmd_free(section_infos);
}
pva_kmd_free(text_section_buf);
*out_metainfo = metainfo_mem;
*out_sections = sections_mem;
return PVA_SUCCESS;
free_sec_mem:
pva_kmd_device_memory_free(sections_mem);
free_data_buf:
if (data_section_buf != NULL) {
pva_kmd_free(data_section_buf);
}
if (section_infos != NULL) {
pva_kmd_free(section_infos);
}
free_text_buf:
pva_kmd_free(text_section_buf);
free_syms:
pva_kmd_free(out_symbol_table->symbols);
err_out:
return err;
}
void pva_kmd_unload_executable(struct pva_kmd_exec_symbol_table *symbol_table,
struct pva_kmd_device_memory *metainfo,
struct pva_kmd_device_memory *sections)
{
pva_kmd_device_memory_free(metainfo);
pva_kmd_device_memory_free(sections);
if (symbol_table->symbols != NULL) {
pva_kmd_free(symbol_table->symbols);
symbol_table->symbols = NULL;
}
}

View File

@@ -0,0 +1,63 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_device.h"
#include "pva_kmd_silicon_hwpm.h"
#include "pva_kmd_silicon_utils.h"
#ifndef TEGRA_SOC_HWPM_IP_REG_OP_READ
#define TEGRA_SOC_HWPM_IP_REG_OP_READ 0x1
#endif
#ifndef TEGRA_SOC_HWPM_IP_REG_OP_WRITE
#define TEGRA_SOC_HWPM_IP_REG_OP_WRITE 0x2
#endif
int pva_kmd_hwpm_ip_reg_op(void *ip_dev, uint32_t reg_op,
uint32_t inst_element_index, uint64_t reg_offset,
uint32_t *reg_data)
{
struct pva_kmd_device *pva = ip_dev;
if (reg_offset > UINT32_MAX)
return PVA_INVAL;
switch (reg_op) {
case TEGRA_SOC_HWPM_IP_REG_OP_READ:
*reg_data =
pva_kmd_read(pva, safe_addu32(pva->regspec.cfg_perf_mon,
reg_offset));
break;
case TEGRA_SOC_HWPM_IP_REG_OP_WRITE:
pva_kmd_write(
pva, safe_addu32(pva->regspec.cfg_perf_mon, reg_offset),
*reg_data);
break;
default:
pva_kmd_log_err("Invalid HWPM operation");
return PVA_INVAL;
}
return PVA_SUCCESS;
}
int pva_kmd_hwpm_ip_pm(void *ip_dev, bool disable)
{
struct pva_kmd_device *dev = ip_dev;
enum pva_error err = PVA_SUCCESS;
if (disable) {
err = pva_kmd_device_busy(dev);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Failed to busy");
}
} else {
pva_kmd_device_idle(dev);
}
return err;
}

View File

@@ -0,0 +1,50 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_SILICON_HWPM_H
#define PVA_KMD_SILICON_HWPM_H
#include "pva_kmd.h"
#include "pva_kmd_shim_debugfs.h"
/**
* @brief pva_hwpm_ip_pm
*
* This function called from Tegra HWPM driver to
* poweron/off pva device.
*
* @param ip_dev Pointer to PVA device
* @param disable disable/enable power management. PVA is
* powered on when false.
* @param reg_offset offset of register relative to PVA HWP base
* @return 0 on Success or negative error code
*
*/
int pva_kmd_hwpm_ip_pm(void *ip_dev, bool disable);
/**
* @brief pva_hwpm_ip_reg_op
*
* This function called from Tegra HWPM driver to
* access PVA HWPM registers.
*
* @param ip_dev Pointer to PVA device
* @param reg_op access operation and can be one of
* TEGRA_SOC_HWPM_IP_REG_OP_READ
* TEGRA_SOC_HWPM_IP_REG_OP_WRITE
* @param inst_element_index element index within PVA instance
* @param reg_offset offset of register relative to PVA HWP base
* @param reg_data pointer to where data is to be placed or read.
* @return 0 on Success or negative error code
*
*/
int pva_kmd_hwpm_ip_reg_op(void *ip_dev, uint32_t reg_op,
uint32_t inst_element_index, uint64_t reg_offset,
uint32_t *reg_data);
#endif //PVA_KMD_SILICON_HWPM_H

View File

@@ -0,0 +1,135 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_silicon_isr.h"
#include "pva_kmd_device.h"
#include "pva_fw_hyp.h"
#include "pva_kmd_msg.h"
struct pva_fw_msg {
uint8_t len;
uint32_t data[PVA_FW_MSG_MAX_LEN];
};
static void read_hyp_msg(struct pva_kmd_device *pva, struct pva_fw_msg *msg)
{
uint32_t i;
msg->data[0] = pva_kmd_read_mailbox(pva, PVA_FW_MBOX_TO_HYP_LAST);
msg->len = PVA_EXTRACT(msg->data[0], PVA_FW_MSG_LEN_MSB,
PVA_FW_MSG_LEN_LSB, uint8_t);
ASSERT(msg->len <= PVA_ARRAY_SIZE(msg->data));
for (i = 1; i < msg->len; i++) {
msg->data[i] = pva_kmd_read_mailbox(
pva, PVA_FW_MBOX_TO_HYP_BASE + i - 1);
}
}
void pva_kmd_hyp_isr(void *data)
{
struct pva_kmd_device *pva = data;
uint32_t intr_status;
uint32_t wdt_val, hsp_val, h1x_val;
intr_status = pva_kmd_read(pva, pva->regspec.sec_lic_intr_status);
wdt_val = PVA_EXTRACT(intr_status, PVA_REG_SEC_LIC_INTR_WDT_MSB,
PVA_REG_SEC_LIC_INTR_WDT_LSB, uint32_t);
hsp_val = PVA_EXTRACT(intr_status, PVA_REG_SEC_LIC_INTR_HSP_MSB,
PVA_REG_SEC_LIC_INTR_HSP_LSB, uint32_t);
h1x_val = PVA_EXTRACT(intr_status, PVA_REG_SEC_LIC_INTR_H1X_MSB,
PVA_REG_SEC_LIC_INTR_H1X_LSB, uint32_t);
if (wdt_val != 0) {
/* Clear interrupt status */
pva_kmd_write(pva, pva->regspec.sec_lic_intr_status,
intr_status &
PVA_MASK(PVA_REG_SEC_LIC_INTR_WDT_MSB,
PVA_REG_SEC_LIC_INTR_WDT_LSB));
/* TODO: reboot firmware when we can */
FAULT("PVA watchdog timeout!");
}
if (h1x_val != 0) {
pva_kmd_log_err_u64("Host1x errors", h1x_val);
/* Clear interrupt status */
pva_kmd_write(pva, pva->regspec.sec_lic_intr_status,
intr_status &
PVA_MASK(PVA_REG_SEC_LIC_INTR_H1X_MSB,
PVA_REG_SEC_LIC_INTR_H1X_LSB));
}
if (hsp_val != 0) {
struct pva_fw_msg msg = { 0 };
read_hyp_msg(pva, &msg);
pva_kmd_handle_hyp_msg(pva, &msg.data[0], msg.len);
msg.data[0] &= ~PVA_FW_MBOX_FULL_BIT;
/* Clear interrupt bit in mailbox */
pva_kmd_write_mailbox(pva, PVA_FW_MBOX_TO_HYP_LAST,
msg.data[0]);
}
}
static uint32_t read_ccq0_status(struct pva_kmd_device *pva, uint8_t status_id)
{
return pva_kmd_read(pva, pva->regspec.ccq_regs[0].status[status_id]);
}
static void write_ccq0_status(struct pva_kmd_device *pva, uint8_t status_id,
uint32_t value)
{
pva_kmd_write(pva, pva->regspec.ccq_regs[0].status[status_id], value);
}
static void read_ccq_msg(struct pva_kmd_device *pva, struct pva_fw_msg *msg)
{
uint32_t i;
msg->data[0] = read_ccq0_status(pva, PVA_FW_MSG_STATUS_LAST);
msg->len = PVA_EXTRACT(msg->data[0], PVA_FW_MSG_LEN_MSB,
PVA_FW_MSG_LEN_LSB, uint8_t);
ASSERT(msg->len <= PVA_ARRAY_SIZE(msg->data));
for (i = 1; i < msg->len; i++) {
msg->data[i] =
read_ccq0_status(pva, PVA_FW_MSG_STATUS_BASE + i - 1);
}
}
/* Handle interrupt from CCQ0 */
void pva_kmd_isr(void *data)
{
struct pva_kmd_device *pva = data;
uint32_t intr_status;
intr_status =
read_ccq0_status(pva, 2) & PVA_REG_CCQ_STATUS2_INTR_ALL_BITS;
pva_dbg_printf("CCQ0_INTR_STATUS 0x%x\n", intr_status);
/* Clear interupt status This must be done prior to ack CCQ messages
* otherwise we risk losing CCQ messages.
*/
write_ccq0_status(pva, 2, intr_status);
if (intr_status & PVA_REG_CCQ_STATUS2_INTR_STATUS8_BIT) {
struct pva_fw_msg msg;
read_ccq_msg(pva, &msg);
pva_kmd_handle_msg(pva, &msg.data[0], msg.len);
/* Ack through status1 write. */
write_ccq0_status(pva, 1, 0 /* Value doesn't matter for now */);
}
/* We don't care about Status7 or CCQ overflow interrupt */
}

View File

@@ -0,0 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_SILICON_ISR_H
#define PVA_KMD_SILICON_ISR_H
#include "pva_kmd_silicon_utils.h"
#include "pva_kmd_device.h"
void pva_kmd_hyp_isr(void *data);
void pva_kmd_isr(void *data);
#endif // PVA_KMD_SILICON_ISR_H

View File

@@ -0,0 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_silicon_utils.h"
#include "pva_kmd_device.h"
#include "pva_math_utils.h"
void pva_kmd_ccq_push(struct pva_kmd_device *pva, uint8_t ccq_id,
uint64_t ccq_entry)
{
pva_kmd_write(pva, pva->regspec.ccq_regs[ccq_id].fifo,
PVA_EXTRACT64(ccq_entry, 31, 0, uint32_t));
pva_kmd_write(pva, pva->regspec.ccq_regs[ccq_id].fifo,
PVA_EXTRACT64(ccq_entry, 63, 32, uint32_t));
}
uint32_t pva_kmd_get_ccq_space(struct pva_kmd_device *pva, uint8_t ccq_id)
{
uint32_t status2 =
pva_kmd_read(pva, pva->regspec.ccq_regs[ccq_id].status[2]);
uint32_t len =
PVA_EXTRACT(status2, PVA_REG_CCQ_STATUS2_NUM_ENTRIES_MSB,
PVA_REG_CCQ_STATUS2_NUM_ENTRIES_LSB, uint32_t);
return safe_subu32((uint32_t)PVA_CCQ_DEPTH, len) / 2U;
}

View File

@@ -0,0 +1,52 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_SILICON_UTILS_H
#define PVA_KMD_SILICON_UTILS_H
#include "pva_utils.h"
#include "pva_kmd_regs.h"
#include "pva_kmd_shim_silicon.h"
#include "pva_math_utils.h"
static inline void pva_kmd_write(struct pva_kmd_device *pva, uint32_t addr,
uint32_t val)
{
pva_dbg_printf("pva_kmd_write: addr=0x%x, val=0x%x\n", addr, val);
pva_kmd_aperture_write(pva, PVA_KMD_APERTURE_PVA_CLUSTER, addr, val);
}
static inline uint32_t pva_kmd_read(struct pva_kmd_device *pva, uint32_t addr)
{
uint32_t val;
val = pva_kmd_aperture_read(pva, PVA_KMD_APERTURE_PVA_CLUSTER, addr);
return val;
}
static inline void pva_kmd_write_mailbox(struct pva_kmd_device *pva,
uint32_t mailbox_idx, uint32_t val)
{
uint32_t gap = PVA_REG_HSP_SM1_ADDR - PVA_REG_HSP_SM0_ADDR;
uint32_t offset = safe_mulu32(gap, mailbox_idx);
uint32_t addr = safe_addu32(PVA_REG_HSP_SM0_ADDR, offset);
pva_kmd_write(pva, addr, val);
}
static inline uint32_t pva_kmd_read_mailbox(struct pva_kmd_device *pva,
uint32_t mailbox_idx)
{
uint32_t gap = PVA_REG_HSP_SM1_ADDR - PVA_REG_HSP_SM0_ADDR;
uint32_t offset = safe_mulu32(gap, mailbox_idx);
uint32_t addr = safe_addu32(PVA_REG_HSP_SM0_ADDR, offset);
return pva_kmd_read(pva, addr);
}
#endif // PVA_KMD_SILICON_UTILS_H

View File

@@ -0,0 +1,156 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_submitter.h"
#include "pva_kmd_utils.h"
void pva_kmd_submitter_init(struct pva_kmd_submitter *submitter,
struct pva_kmd_queue *queue,
pva_kmd_mutex_t *submit_lock,
struct pva_kmd_cmdbuf_chunk_pool *chunk_pool,
pva_kmd_mutex_t *chunk_pool_lock,
uint32_t *post_fence_va,
struct pva_fw_postfence const *post_fence)
{
submitter->queue = queue;
submitter->submit_lock = submit_lock;
submitter->post_fence_va = post_fence_va;
submitter->post_fence = *post_fence;
submitter->fence_future_value = 0;
submitter->chunk_pool = chunk_pool;
submitter->chunk_pool_lock = chunk_pool_lock;
*submitter->post_fence_va = submitter->fence_future_value;
}
enum pva_error pva_kmd_submitter_prepare(struct pva_kmd_submitter *submitter,
struct pva_kmd_cmdbuf_builder *builder)
{
enum pva_error err;
err = pva_kmd_cmdbuf_builder_init(builder, submitter->chunk_pool);
if (err != PVA_SUCCESS) {
goto err_out;
}
return PVA_SUCCESS;
err_out:
return err;
}
enum pva_error
pva_kmd_submitter_submit_with_fence(struct pva_kmd_submitter *submitter,
struct pva_kmd_cmdbuf_builder *builder,
struct pva_fw_postfence *fence)
{
enum pva_error err;
uint32_t first_chunk_id;
uint16_t first_chunk_size;
uint64_t first_chunk_offset;
struct pva_fw_cmdbuf_submit_info submit_info = { 0 };
struct pva_fw_postfence free_notifier_fence;
pva_kmd_cmdbuf_builder_finalize(builder, &first_chunk_id,
&first_chunk_size);
pva_kmd_get_free_notifier_fence(submitter->chunk_pool, first_chunk_id,
&free_notifier_fence);
first_chunk_offset = pva_kmd_get_cmdbuf_chunk_res_offset(
submitter->chunk_pool, first_chunk_id);
submit_info.postfences[0] = free_notifier_fence;
submit_info.num_postfence = 1;
if (fence->resource_id != PVA_RESOURCE_ID_INVALID) {
submit_info.postfences[1] = *fence;
submit_info.num_postfence = 2;
}
submit_info.first_chunk_resource_id =
submitter->chunk_pool->mem_resource_id;
submit_info.first_chunk_offset_lo = iova_lo(first_chunk_offset);
submit_info.first_chunk_offset_hi = iova_hi(first_chunk_offset);
submit_info.first_chunk_size = first_chunk_size;
pva_kmd_mutex_lock(submitter->submit_lock);
err = pva_kmd_queue_submit(submitter->queue, &submit_info);
if (err != PVA_SUCCESS) {
pva_kmd_cmdbuf_builder_cancel(builder);
}
pva_kmd_mutex_unlock(submitter->submit_lock);
return err;
}
enum pva_error pva_kmd_submitter_submit(struct pva_kmd_submitter *submitter,
struct pva_kmd_cmdbuf_builder *builder,
uint32_t *out_fence_val)
{
enum pva_error err;
uint32_t first_chunk_id;
uint16_t first_chunk_size;
uint64_t first_chunk_offset;
struct pva_fw_cmdbuf_submit_info submit_info = { 0 };
struct pva_fw_postfence free_notifier_fence;
pva_kmd_cmdbuf_builder_finalize(builder, &first_chunk_id,
&first_chunk_size);
pva_kmd_get_free_notifier_fence(submitter->chunk_pool, first_chunk_id,
&free_notifier_fence);
first_chunk_offset = pva_kmd_get_cmdbuf_chunk_res_offset(
submitter->chunk_pool, first_chunk_id);
submit_info.num_postfence = 2;
submit_info.postfences[0] = submitter->post_fence;
submit_info.postfences[1] = free_notifier_fence;
submit_info.first_chunk_resource_id =
submitter->chunk_pool->mem_resource_id;
submit_info.first_chunk_offset_lo = iova_lo(first_chunk_offset);
submit_info.first_chunk_offset_hi = iova_hi(first_chunk_offset);
submit_info.first_chunk_size = first_chunk_size;
/* TODO: remove these flags after FW execute command buffer with no engines. */
submit_info.flags =
PVA_INSERT8(0x3, PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_MSB,
PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_LSB);
pva_kmd_mutex_lock(submitter->submit_lock);
submitter->fence_future_value += 1U;
submit_info.postfences[0].value = submitter->fence_future_value;
err = pva_kmd_queue_submit(submitter->queue, &submit_info);
if (err == PVA_SUCCESS) {
*out_fence_val = submitter->fence_future_value;
} else {
submitter->fence_future_value -= 1U;
pva_kmd_cmdbuf_builder_cancel(builder);
}
pva_kmd_mutex_unlock(submitter->submit_lock);
return err;
}
enum pva_error pva_kmd_submitter_wait(struct pva_kmd_submitter *submitter,
uint32_t fence_val,
uint32_t poll_interval_us,
uint32_t timeout_us)
{
uint32_t volatile *fence_addr = submitter->post_fence_va;
uint32_t time_spent = 0;
while (*fence_addr < fence_val) {
pva_kmd_sleep_us(poll_interval_us);
time_spent = safe_addu32(time_spent, poll_interval_us);
if (time_spent >= timeout_us) {
pva_kmd_log_err("pva_kmd_submitter_wait Timed out");
return PVA_TIMEDOUT;
}
}
return PVA_SUCCESS;
}

View File

@@ -0,0 +1,68 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_SUBMITTER_H
#define PVA_KMD_SUBMITTER_H
#include "pva_kmd_cmdbuf.h"
#include "pva_kmd_mutex.h"
#include "pva_kmd_queue.h"
/** A thread-safe submitter utility */
struct pva_kmd_submitter {
/** The lock protects the submission to the queue, including
* incrementing the post fence */
pva_kmd_mutex_t *submit_lock;
struct pva_kmd_queue *queue;
uint32_t *post_fence_va;
struct pva_fw_postfence post_fence;
uint32_t fence_future_value;
/** This lock protects the use of the chunk_pool*/
pva_kmd_mutex_t *chunk_pool_lock;
struct pva_kmd_cmdbuf_chunk_pool *chunk_pool;
};
void pva_kmd_submitter_init(struct pva_kmd_submitter *submitter,
struct pva_kmd_queue *queue,
pva_kmd_mutex_t *submit_lock,
struct pva_kmd_cmdbuf_chunk_pool *chunk_pool,
pva_kmd_mutex_t *chunk_pool_lock,
uint32_t *post_fence_va,
struct pva_fw_postfence const *post_fence);
enum pva_error
pva_kmd_submitter_prepare(struct pva_kmd_submitter *submitter,
struct pva_kmd_cmdbuf_builder *builder);
enum pva_error pva_kmd_submitter_submit(struct pva_kmd_submitter *submitter,
struct pva_kmd_cmdbuf_builder *builder,
uint32_t *out_fence_val);
enum pva_error pva_kmd_submitter_wait(struct pva_kmd_submitter *submitter,
uint32_t fence_val,
uint32_t poll_interval_ms,
uint32_t timeout_ms);
enum pva_error
pva_kmd_submitter_submit_with_fence(struct pva_kmd_submitter *submitter,
struct pva_kmd_cmdbuf_builder *builder,
struct pva_fw_postfence *fence);
/* prepare submission */
/* add cmd */
/* add cmd */
/* do submit -> fence value */
/* wait for fence */
/* prepare submission */
/* add cmd */
/* add cmd */
/* do submit with fence (provide a fence) */
#endif // PVA_KMD_SUBMITTER_H

View File

@@ -0,0 +1,88 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_t23x.h"
#include "pva_kmd_constants.h"
struct vmem_region vmem_regions_tab_t23x[PVA_VMEM_REGION_COUNT_T23X] = {
{ .start = T23x_VMEM0_START, .end = T23x_VMEM0_END },
{ .start = T23x_VMEM1_START, .end = T23x_VMEM1_END },
{ .start = T23x_VMEM2_START, .end = T23x_VMEM2_END },
};
void pva_kmd_device_init_t23x(struct pva_kmd_device *pva)
{
uint32_t ccq;
uint32_t st_idx;
pva->hw_consts.hw_gen = PVA_HW_GEN2;
pva->hw_consts.n_smmu_contexts = PVA_NUM_SMMU_CONTEXTS_T23X;
pva->r5_image_smmu_context_id = PVA_NUM_SMMU_CONTEXTS_T23X - 1;
pva->hw_consts.n_dma_descriptors = PVA_NUM_DMA_DESC_T23X;
pva->hw_consts.n_user_dma_channels = PVA_DMA_NUM_CHANNELS_T23X - 1U;
pva->hw_consts.n_hwseq_words = PVA_NUM_HWSEQ_WORDS_T23X;
pva->hw_consts.n_dynamic_adb_buffs = PVA_NUM_DYNAMIC_ADB_BUFFS_T23X;
pva->hw_consts.n_vmem_regions = PVA_VMEM_REGION_COUNT_T23X;
pva->support_hwseq_frame_linking = false;
pva->vmem_regions_tab = vmem_regions_tab_t23x;
pva->reg_phy_base[PVA_KMD_APERTURE_PVA_CLUSTER] =
PVA_KMD_PVA0_T23x_REG_BASE;
pva->reg_size[PVA_KMD_APERTURE_PVA_CLUSTER] =
PVA_KMD_PVA0_T23x_REG_SIZE;
pva->reg_phy_base[PVA_KMD_APERTURE_VPU_DEBUG] = TEGRA_PVA0_VPU_DBG_BASE;
pva->reg_size[PVA_KMD_APERTURE_VPU_DEBUG] = TEGRA_PVA0_VPU_DBG_SIZE;
pva->regspec.sec_lic_intr_enable = 0x28064;
pva->regspec.sec_lic_intr_status = 0x2806C;
pva->regspec.cfg_user_sid_base = 0x240000;
pva->regspec.cfg_priv_sid = 0x240020;
pva->regspec.cfg_vps_sid = 0x240024;
pva->regspec.cfg_r5user_lsegreg = 0x250008;
pva->regspec.cfg_r5user_usegreg = 0x25001c;
pva->regspec.cfg_priv_ar1_lsegreg = 0x25000c;
pva->regspec.cfg_priv_ar1_usegreg = 0x250020;
pva->regspec.cfg_priv_ar2_lsegreg = 0x250010;
pva->regspec.cfg_priv_ar2_usegreg = 0x250024;
pva->regspec.cfg_priv_ar1_start = 0x250028;
pva->regspec.cfg_priv_ar1_end = 0x25002c;
pva->regspec.cfg_priv_ar2_start = 0x250030;
pva->regspec.cfg_priv_ar2_end = 0x250034;
pva->regspec.cfg_scr_priv_0 = 0x18004;
pva->regspec.cfg_perf_mon = 0x200000;
pva->regspec.ccq_count = 8U;
/* For VPU 0*/
pva->regspec.vpu_dbg_instr_reg_offset[0] = 0x50000U;
/* For VPU 1*/
pva->regspec.vpu_dbg_instr_reg_offset[1] = 0x70000U;
for (ccq = 0; ccq < pva->regspec.ccq_count; ccq++) {
uint32_t n_st = PVA_CFG_CCQ_STATUS_COUNT;
uint32_t ccq_base = safe_addu32(
(uint32_t)0x260000,
safe_mulu32((uint32_t)PVA_CFG_CCQ_BLOCK_SIZE, ccq));
pva->regspec.ccq_regs[ccq].status_count = n_st;
pva->regspec.ccq_regs[ccq].fifo = ccq_base;
for (st_idx = 0; st_idx < n_st; st_idx++) {
pva->regspec.ccq_regs[ccq].status[st_idx] = safe_addu32(
ccq_base,
safe_addu32((uint32_t)0x4U,
safe_mulu32((uint32_t)0x4U,
st_idx)));
}
}
#if PVA_SUPPORT_XBAR_RAW == 1
pva->bl_sector_pack_format = PVA_BL_XBAR_RAW;
#else
pva->bl_sector_pack_format = PVA_BL_TEGRA_RAW;
#endif
}

View File

@@ -0,0 +1,39 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_T23X_H
#define PVA_KMD_T23X_H
#include "pva_kmd_device.h"
/** Number of VMEM regions */
#define PVA_VMEM_REGION_COUNT_T23X 3U
/** Start Address of VMEM0 Bank in T23X */
#define T23x_VMEM0_START 0x40U
/** End Address of VMEM0 Bank in T23X */
#define T23x_VMEM0_END 0x20000U
/** Start Address of VMEM1 Bank in T23X */
#define T23x_VMEM1_START 0x40000U
/** End Address of VMEM1 Bank in T23X */
#define T23x_VMEM1_END 0x60000U
/** Start Address of VMEM2 Bank in T23X */
#define T23x_VMEM2_START 0x80000U
/** End Address of VMEM2 Bank in T23X */
#define T23x_VMEM2_END 0xA0000U
/** @brief Base address for PVA0 VPU Debug Register space (CSITE_PVA0VPU) */
#define TEGRA_PVA0_VPU_DBG_BASE 0x24740000U
/** @brief Size (in bytes) of the PVA0 VPU Debug Register space (CSITE_PVA0VPU) */
#define TEGRA_PVA0_VPU_DBG_SIZE 0x40000U
void pva_kmd_device_init_t23x(struct pva_kmd_device *pva);
#endif // PVA_KMD_T23X_H

View File

@@ -0,0 +1,84 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_t26x.h"
#include "pva_kmd_constants.h"
struct vmem_region vmem_regions_tab_t26x[PVA_VMEM_REGION_COUNT_T26X] = {
{ .start = T26x_VMEM0_START, .end = T26x_VMEM0_END },
{ .start = T26x_VMEM1_START, .end = T26x_VMEM1_END },
{ .start = T26x_VMEM2_START, .end = T26x_VMEM2_END },
{ .start = T26x_VMEM3_START, .end = T26x_VMEM3_END },
};
void pva_kmd_device_init_t26x(struct pva_kmd_device *pva)
{
uint32_t ccq;
uint32_t st_idx;
pva->hw_consts.hw_gen = PVA_HW_GEN3;
pva->hw_consts.n_smmu_contexts = PVA_NUM_SMMU_CONTEXTS_T26X;
pva->r5_image_smmu_context_id = PVA_NUM_SMMU_CONTEXTS_T26X - 1;
pva->hw_consts.n_dma_descriptors = PVA_NUM_DMA_DESC_T26X;
pva->hw_consts.n_user_dma_channels = PVA_DMA_NUM_CHANNELS_T26X - 1U;
pva->hw_consts.n_hwseq_words = PVA_NUM_HWSEQ_WORDS_T26X;
pva->hw_consts.n_dynamic_adb_buffs = PVA_NUM_DYNAMIC_ADB_BUFFS_T26X;
pva->hw_consts.n_vmem_regions = PVA_VMEM_REGION_COUNT_T26X;
pva->vmem_regions_tab = vmem_regions_tab_t26x;
pva->support_hwseq_frame_linking = true;
pva->reg_phy_base[PVA_KMD_APERTURE_PVA_CLUSTER] =
PVA_KMD_PVA0_T26x_REG_BASE;
pva->reg_size[PVA_KMD_APERTURE_PVA_CLUSTER] =
PVA_KMD_PVA0_T26x_REG_SIZE;
pva->reg_phy_base[PVA_KMD_APERTURE_VPU_DEBUG] = TEGRA_PVA0_VPU_DBG_BASE;
pva->reg_size[PVA_KMD_APERTURE_VPU_DEBUG] = TEGRA_PVA0_VPU_DBG_SIZE;
pva->regspec.sec_lic_intr_enable = 0x28064;
pva->regspec.sec_lic_intr_status = 0x2806C;
pva->regspec.cfg_user_sid_base = 0x240000;
pva->regspec.cfg_priv_sid = 0x240020;
pva->regspec.cfg_vps_sid = 0x240024;
pva->regspec.cfg_r5user_lsegreg = 0x250008;
pva->regspec.cfg_r5user_usegreg = 0x25001c;
pva->regspec.cfg_priv_ar1_lsegreg = 0x25000c;
pva->regspec.cfg_priv_ar1_usegreg = 0x250020;
pva->regspec.cfg_priv_ar2_lsegreg = 0x250010;
pva->regspec.cfg_priv_ar2_usegreg = 0x250024;
pva->regspec.cfg_priv_ar1_start = 0x250028;
pva->regspec.cfg_priv_ar1_end = 0x25002c;
pva->regspec.cfg_priv_ar2_start = 0x250030;
pva->regspec.cfg_priv_ar2_end = 0x250034;
pva->regspec.cfg_scr_priv_0 = 0x18004;
pva->regspec.cfg_perf_mon = 0x200000;
pva->regspec.ccq_count = 8U;
/* For VPU 0*/
pva->regspec.vpu_dbg_instr_reg_offset[0] = 0x50000U;
/* For VPU 1*/
pva->regspec.vpu_dbg_instr_reg_offset[1] = 0x70000U;
for (ccq = 0; ccq < pva->regspec.ccq_count; ccq++) {
uint32_t n_st = PVA_CFG_CCQ_STATUS_COUNT;
uint32_t ccq_base = safe_addu32(
(uint32_t)0x260000,
safe_mulu32((uint32_t)PVA_CFG_CCQ_BLOCK_SIZE, ccq));
pva->regspec.ccq_regs[ccq].status_count = n_st;
pva->regspec.ccq_regs[ccq].fifo = ccq_base;
for (st_idx = 0; st_idx < n_st; st_idx++) {
pva->regspec.ccq_regs[ccq].status[st_idx] = safe_addu32(
ccq_base,
safe_addu32((uint32_t)0x4U,
safe_mulu32((uint32_t)0x4U,
st_idx)));
}
}
pva->bl_sector_pack_format = PVA_BL_TEGRA_RAW;
}

View File

@@ -0,0 +1,46 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_T26X_H
#define PVA_KMD_T26X_H
#include "pva_kmd_device.h"
#define PVA_KMD_PVA0_T26x_REG_BASE 0x818c000000
#define PVA_KMD_PVA0_T26x_REG_SIZE 0x900000
/** Number of VMEM regions in T26X */
#define PVA_VMEM_REGION_COUNT_T26X 4U
/** Start Address of VMEM0 Bank in T26X */
#define T26x_VMEM0_START 0x40U
/** End Address of VMEM0 Bank in T26X */
#define T26x_VMEM0_END 0x20000U
/** Start Address of VMEM1 Bank in T26X */
#define T26x_VMEM1_START 0x40000U
/** End Address of VMEM1 Bank in T26X */
#define T26x_VMEM1_END 0x60000U
/** End Address of VMEM2 Bank in T26X */
#define T26x_VMEM2_START 0x80000U
/** End Address of VMEM2 Bank in T26X */
#define T26x_VMEM2_END 0xA0000U
/** End Address of VMEM3 Bank in T26X */
#define T26x_VMEM3_START 0xC0000U
/** End Address of VMEM3 Bank in T26X */
#define T26x_VMEM3_END 0xE0000U
/** @brief Base address for PVA0 VPU Debug Register space (CSITE_PVA0VPU) */
#define TEGRA_PVA0_VPU_DBG_BASE 0x24740000U
/** @brief Size (in bytes) of the PVA0 VPU Debug Register space (CSITE_PVA0VPU) */
#define TEGRA_PVA0_VPU_DBG_SIZE 0x40000U
void pva_kmd_device_init_t26x(struct pva_kmd_device *pva);
#endif // PVA_KMD_T26X_H

View File

@@ -0,0 +1,141 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_api_cmdbuf.h"
#include "pva_api_types.h"
#include "pva_bit.h"
#include "pva_fw.h"
#include "pva_kmd_cmdbuf.h"
#include "pva_kmd_device.h"
#include "pva_kmd_constants.h"
#include "pva_utils.h"
#include "pva_kmd_tegra_stats.h"
void pva_kmd_device_init_tegra_stats(struct pva_kmd_device *pva)
{
enum pva_error err = PVA_SUCCESS;
pva->tegra_stats_buf_size = sizeof(struct pva_kmd_fw_tegrastats);
pva->tegra_stats_memory =
pva_kmd_device_memory_alloc_map(pva->tegra_stats_buf_size, pva,
PVA_ACCESS_RW,
PVA_R5_SMMU_CONTEXT_ID);
ASSERT(pva->tegra_stats_memory != NULL);
err = pva_kmd_add_dram_buffer_resource(&pva->dev_resource_table,
pva->tegra_stats_memory,
&pva->tegra_stats_resource_id);
ASSERT(err == PVA_SUCCESS);
pva_kmd_update_fw_resource_table(&pva->dev_resource_table);
}
void pva_kmd_device_deinit_tegra_stats(struct pva_kmd_device *pva)
{
pva_kmd_drop_resource(&pva->dev_resource_table,
pva->tegra_stats_resource_id);
}
enum pva_error
pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva,
struct pva_kmd_tegrastats *kmd_tegra_stats)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &pva->submitter;
struct pva_cmd_get_tegra_stats *cmd;
uint64_t buffer_offset = 0U;
uint32_t fence_val;
enum pva_error err;
struct pva_kmd_fw_tegrastats *fw_tegra_stats;
bool stats_enabled = pva->debugfs_context.stats_enable;
uint64_t duration = 0U;
/* Power on PVA if not already */
err = pva_kmd_device_busy(pva);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"pva_kmd_device_busy failed when submitting tegra stats cmd");
return err;
}
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd));
ASSERT(cmd != NULL);
pva_kmd_set_cmd_get_tegra_stats(cmd, pva->tegra_stats_resource_id,
pva->tegra_stats_buf_size,
buffer_offset, stats_enabled);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("tegra stats cmd submission failed");
goto cancel_builder;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when getting tegra stats");
goto err_out;
}
if (stats_enabled == false)
goto err_out;
fw_tegra_stats =
(struct pva_kmd_fw_tegrastats *)(pva->tegra_stats_memory->va);
duration = safe_subu64(fw_tegra_stats->window_end_time,
fw_tegra_stats->window_start_time);
if (duration == 0) {
pva_kmd_print_str("VPU Stats: Duration is zero");
goto err_out;
}
pva_kmd_print_str("VPU Stats");
pva_kmd_print_str_u64("Window Start Time",
fw_tegra_stats->window_start_time);
pva_kmd_print_str_u64("Window End Time",
fw_tegra_stats->window_end_time);
pva_kmd_print_str_u64("Total utilization VPU 0",
fw_tegra_stats->total_utilization[0]);
pva_kmd_print_str_u64("Total utilization VPU 1",
fw_tegra_stats->total_utilization[1]);
pva_kmd_print_str_u64(
"VPU 0 percent utilization",
safe_mulu64(100ULL, fw_tegra_stats->total_utilization[0]) /
duration);
pva_kmd_print_str_u64(
"VPU 1 percent utilization",
safe_mulu64(100ULL, fw_tegra_stats->total_utilization[1]) /
duration);
kmd_tegra_stats->average_vpu_utilization[0] =
safe_mulu64(100ULL, fw_tegra_stats->total_utilization[0]) /
duration;
kmd_tegra_stats->average_vpu_utilization[1] =
safe_mulu64(100ULL, fw_tegra_stats->total_utilization[1]) /
duration;
kmd_tegra_stats->window_start_time = fw_tegra_stats->window_start_time;
kmd_tegra_stats->window_end_time = fw_tegra_stats->window_end_time;
err = PVA_SUCCESS;
cancel_builder:
pva_kmd_cmdbuf_builder_cancel(&builder);
err_out:
pva_kmd_device_idle(pva);
return err;
}

View File

@@ -0,0 +1,34 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_TEGRA_STATS_H
#define PVA_KMD_TEGRA_STATS_H
#include "pva_kmd_device.h"
/**
* @brief Structure which holds vpu stats information
*/
struct pva_kmd_tegrastats {
/** Holds vpu utilization as a percentage for each VPU in the PVA */
uint64_t average_vpu_utilization[PVA_NUM_PVE];
/** Current state of pva_kmd_tegrastats */
uint64_t window_start_time;
uint64_t window_end_time;
};
void pva_kmd_device_init_tegra_stats(struct pva_kmd_device *pva);
void pva_kmd_device_deinit_tegra_stats(struct pva_kmd_device *pva);
enum pva_error
pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva,
struct pva_kmd_tegrastats *kmd_tegra_stats);
#endif

View File

@@ -0,0 +1,148 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_mutex.h"
#include "pva_kmd_utils.h"
#include "pva_kmd_thread_sema.h"
#include "pva_kmd_device_memory.h"
#include <pthread.h>
#include <time.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
enum pva_error pva_kmd_mutex_init(pva_kmd_mutex_t *m)
{
int ret = pthread_mutex_init(m, NULL);
ASSERT(ret == 0);
return PVA_SUCCESS;
}
void pva_kmd_mutex_lock(pva_kmd_mutex_t *m)
{
int ret = pthread_mutex_lock(m);
ASSERT(ret == 0);
}
void pva_kmd_mutex_unlock(pva_kmd_mutex_t *m)
{
int ret = pthread_mutex_unlock(m);
ASSERT(ret == 0);
}
void pva_kmd_mutex_deinit(pva_kmd_mutex_t *m)
{
int ret = pthread_mutex_destroy(m);
ASSERT(ret == 0);
}
void *pva_kmd_zalloc(uint64_t size)
{
return calloc(1, size);
}
void pva_kmd_free(void *ptr)
{
free(ptr);
}
void pva_kmd_fault(void)
{
abort();
}
void pva_kmd_sema_init(pva_kmd_sema_t *sem, uint32_t val)
{
int ret;
ret = sem_init(sem, 0 /* Only sharing in threads */, val);
ASSERT(ret == 0);
}
enum pva_error pva_kmd_sema_wait_timeout(pva_kmd_sema_t *sem,
uint32_t timeout_ms)
{
struct timespec ts;
int ret;
ret = clock_gettime(CLOCK_REALTIME, &ts);
ASSERT(ret == 0);
/* Add timeout (specified in milliseconds) to the current time */
ts.tv_sec += timeout_ms / 1000;
ts.tv_nsec += (timeout_ms % 1000) * 1000000;
/* Handle case where nanoseconds exceed 1 second */
if (ts.tv_nsec >= 1000000000) {
ts.tv_nsec -= 1000000000;
ts.tv_sec += 1;
}
wait_again:
ret = sem_timedwait(sem, &ts);
if (ret != 0) {
if (errno == ETIMEDOUT) {
pva_kmd_log_err("pva_kmd_sema_wait_timeout Timed out");
return PVA_TIMEDOUT;
} else if (errno == EINTR) {
goto wait_again;
} else {
FAULT("Unexpected sem_timedwait error");
}
}
return PVA_SUCCESS;
}
void pva_kmd_sema_deinit(pva_kmd_sema_t *sem)
{
int ret = sem_destroy(sem);
ASSERT(ret == 0);
}
void pva_kmd_sema_post(pva_kmd_sema_t *sem)
{
int ret = sem_post(sem);
ASSERT(ret == 0);
}
struct pva_kmd_device_memory *
pva_kmd_device_memory_alloc_map(uint64_t size, struct pva_kmd_device *pva,
uint32_t iova_access_flags,
uint32_t smmu_ctx_idx)
{
struct pva_kmd_device_memory *mem;
enum pva_error err;
mem = pva_kmd_device_memory_alloc(size);
if (mem == NULL) {
goto err_out;
}
err = pva_kmd_device_memory_iova_map(mem, pva, iova_access_flags,
smmu_ctx_idx);
if (err != PVA_SUCCESS) {
goto free_mem;
}
err = pva_kmd_device_memory_cpu_map(mem);
if (err != PVA_SUCCESS) {
goto iova_unmap;
}
return mem;
iova_unmap:
pva_kmd_device_memory_iova_unmap(mem);
free_mem:
pva_kmd_device_memory_free(mem);
err_out:
return NULL;
}

View File

@@ -0,0 +1,29 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_utils.h"
void *pva_kmd_zalloc_nofail(uint64_t size)
{
void *ptr = pva_kmd_zalloc(size);
ASSERT(ptr != NULL);
return ptr;
}
void pva_kmd_log_err(const char *msg)
{
pva_kmd_print_str(msg);
}
void pva_kmd_log_err_u64(const char *msg, uint64_t val)
{
pva_kmd_print_str_u64(msg, val);
}

View File

@@ -0,0 +1,28 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_UTILS_H
#define PVA_KMD_UTILS_H
#include "pva_kmd.h"
#include "pva_api.h"
#include "pva_kmd_shim_utils.h"
#include "pva_bit.h"
#include "pva_utils.h"
#include "pva_plat_faults.h"
#include "pva_math_utils.h"
#define SIZE_4KB (4 * 1024)
void pva_kmd_log_err(const char *msg);
void pva_kmd_log_err_u64(const char *msg, uint64_t val);
void *pva_kmd_zalloc_nofail(uint64_t size);
#endif // PVA_KMD_UTILS_H

View File

@@ -0,0 +1,368 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_api_types.h"
#include "pva_kmd_vpu_app_auth.h"
#include "pva_kmd_device.h"
#include "pva_kmd_sha256.h"
#include "pva_kmd_utils.h"
enum pva_error pva_kmd_init_vpu_app_auth(struct pva_kmd_device *pva, bool ena)
{
enum pva_error err = PVA_SUCCESS;
const char *default_path = pva_kmd_get_default_allowlist();
size_t default_path_len;
struct pva_vpu_auth *pva_auth = pva_kmd_zalloc(sizeof(*pva_auth));
if (pva_auth == NULL) {
pva_kmd_log_err("Unable to allocate memory");
return PVA_NOMEM;
}
pva->pva_auth = pva_auth;
ASSERT(pva_auth != NULL);
pva_auth->vpu_hash_keys = NULL;
pva_auth->pva_auth_allow_list_parsed = false;
/**TODO - This will be disabled by default. Authentication will be enabled based on 2 things
* 1. Debug FS (For non production)
* 2. Device tree property (For production)
* Either of the 2 conditions if satisfied will enable authentication
*/
pva_auth->pva_auth_enable = ena;
default_path_len = strnlen(default_path, ALLOWLIST_FILE_LEN);
if (default_path_len > 0U) {
(void)memcpy(pva_auth->pva_auth_allowlist_path, default_path,
default_path_len);
}
return err;
}
/**
* \brief
* is_key_match calculates the sha256 key of ELF and checks if it matches with key.
* \param[in] dataptr Pointer to the data to which sha256 to ba calculated
* \param[in] size length in bytes of the data to which sha256 to be calculated.
* \param[in] key the key with which calculated key would be compared for match.
* \return The completion status of the operation. Possible values are:
* \ref PVA_SUCCESS Success. Passed in key matched wth calculated key.
* \ref -EACCES. Passed in Key doesn't match with calcualted key.
*/
static enum pva_error is_key_match(uint8_t *dataptr, size_t size,
struct shakey key)
{
enum pva_error err = PVA_SUCCESS;
int32_t status = 0;
uint32_t calc_key[8];
size_t off;
struct sha256_ctx ctx1;
struct sha256_ctx ctx2;
sha256_init(&ctx1);
off = (size / 64U) * 64U;
if (off > 0U) {
sha256_update(&ctx1, dataptr, off);
}
/* clone */
sha256_copy(&ctx1, &ctx2);
/* finalize with leftover, if any */
sha256_finalize(&ctx2, dataptr + off, size % 64U, calc_key);
status = memcmp((void *)&(key.sha_key), (void *)calc_key,
NVPVA_SHA256_DIGEST_SIZE);
if (status != 0) {
err = PVA_EACCES;
}
return err;
}
/**
* \brief
* Keeps checking all the keys accociated with match_hash
* against the calculated sha256 key for dataptr, until it finds a match.
* \param[in] pallkeys Pointer to array of SHA keys \ref shakey
* \param[in] dataptr pointer to ELF data
* \param[in] size length (in bytes) of ELF data
* \param[in] match_hash pointer to matching hash structure, \ref struct vpu_hash_vector.
* \return Matching status of the calculated key
* against the keys asscociated with match_hash. possible values:
* - 0 Success, one of the keys associated with match_hash
* matches with the calculated sha256 key.
* - -EACCES if no match found.
*/
static enum pva_error
check_all_keys_for_match(struct shakey *pallkeys, uint8_t *dataptr, size_t size,
const struct vpu_hash_vector *match_hash)
{
enum pva_error err = PVA_SUCCESS;
uint32_t idx;
uint32_t count;
uint32_t end;
struct shakey key;
uint32_t i;
idx = match_hash->index;
count = match_hash->count;
end = idx + count;
if (end < idx) {
err = PVA_ERANGE;
goto fail;
}
for (i = 0; i < count; i++) {
key = pallkeys[idx + i];
err = is_key_match(dataptr, size, key);
if (err == PVA_SUCCESS) {
break;
}
}
fail:
return err;
}
/**
* @brief
* Helper function for \ref binary_search.
* Uses a specific field in @ref pkey to compare with the same filed in @ref pbase.
* @param[in] pkey pointer to the object that needs to be compared.
* @param[in] pbase pointer to the starting element of the array.
* @retval
* - -1 when @ref pkey is less than starting element of array pointed to by @ref pbase.
* - 1 when @ref pkey is greater than starting element of array pointed to by @ref pbase.
* - 0 when @ref pkey is equal to starting element of array pointed to by @ref pbase.
*/
static int32_t compare_hash_value(const struct vpu_hash_vector *pkey,
const struct vpu_hash_vector *pbase)
{
int32_t ret;
if (pkey->crc32_hash < pbase->crc32_hash) {
ret = -1;
} else if (pkey->crc32_hash > pbase->crc32_hash) {
ret = 1;
} else {
ret = 0;
}
return ret;
}
/**
* @brief
* calculates crc32.
* @param[in] crc initial crc value. usually 0.
* @param[in] buf pointer to the buffer whose crc32 to be calculated.
* @param[in] len length (in bytes) of data at @ref buf.
* @retval value of calculated crc32.
*/
static uint32_t pva_crc32(uint32_t crc, uint8_t *buf, size_t len)
{
int32_t k;
size_t count;
count = len;
crc = ~crc;
while (count != 0U) {
crc ^= *buf++;
for (k = 0; k < 8; k++) {
crc = ((crc & 1U) == 1U) ? (crc >> 1U) ^ 0xedb88320U :
crc >> 1U;
}
count--;
}
return ~crc;
}
static const struct vpu_hash_vector *
binary_search(const struct vpu_hash_vector *key,
const struct vpu_hash_vector *base, size_t num_elems,
int32_t (*compare)(const struct vpu_hash_vector *pkey,
const struct vpu_hash_vector *pbase))
{
size_t low = 0U;
size_t high;
if (num_elems == 0U) {
return NULL;
}
high = num_elems - 1U;
for (;;) {
const struct vpu_hash_vector *mid_elem;
int32_t r;
size_t mid = low + ((high - low) / 2U);
mid_elem = &(base[mid]);
r = compare(key, mid_elem);
if (r < 0) {
if (mid == 0U) {
return NULL;
}
high = mid - 1U;
} else if (r > 0) {
low = mid + 1U;
if (low < mid || low > high) {
return NULL;
}
} else {
return mid_elem;
}
}
}
static enum pva_error
pva_kmd_vpu_check_sha256_key(struct vpu_hash_key_pair *vpu_hash_keys,
uint8_t *dataptr, size_t size)
{
enum pva_error err = PVA_SUCCESS;
struct vpu_hash_vector cal_Hash;
const struct vpu_hash_vector *match_Hash;
cal_Hash.crc32_hash = pva_crc32(0L, dataptr, size);
match_Hash = (const struct vpu_hash_vector *)binary_search(
&cal_Hash, vpu_hash_keys->pvpu_hash_vector,
vpu_hash_keys->num_hashes, compare_hash_value);
if (match_Hash == NULL) {
pva_kmd_log_err("No Hash Match Found");
err = PVA_EACCES;
goto fail;
}
err = check_all_keys_for_match(vpu_hash_keys->psha_key, dataptr, size,
match_Hash);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Match key not found");
}
fail:
return err;
}
enum pva_error pva_kmd_verify_exectuable_hash(struct pva_kmd_device *pva,
uint8_t *dataptr, size_t size)
{
enum pva_error err = PVA_SUCCESS;
struct pva_vpu_auth *pva_auth;
ASSERT(pva != NULL);
ASSERT(dataptr != NULL);
pva_auth = pva->pva_auth;
ASSERT(pva_auth != NULL);
pva_kmd_mutex_lock(&pva_auth->allow_list_lock);
if (pva_auth->pva_auth_enable) {
pva_dbg_printf("App authentication enabled");
if (pva_auth->pva_auth_allow_list_parsed == false) {
err = pva_kmd_allowlist_parse(pva);
if (err == PVA_SUCCESS) {
pva_dbg_printf(
"App authentication allowlist parsing successfull");
} else {
pva_dbg_printf(
"App authentication allowlist parsing failed");
}
}
if (err == PVA_SUCCESS) {
err = pva_kmd_vpu_check_sha256_key(
pva_auth->vpu_hash_keys, (uint8_t *)dataptr,
size);
if (err == PVA_SUCCESS) {
pva_dbg_printf(
"App authentication successfull");
} else {
pva_dbg_printf("App authentication failed : %d",
err);
}
}
} else {
pva_dbg_printf("App authentication disabled");
}
pva_kmd_mutex_unlock(&pva_auth->allow_list_lock);
return err;
}
static void pva_kmd_allowlist_destroy(struct pva_vpu_auth *pva_auth)
{
if (pva_auth->vpu_hash_keys != NULL) {
pva_kmd_free(pva_auth->vpu_hash_keys->ptr_file_data);
pva_kmd_free(pva_auth->vpu_hash_keys);
pva_auth->vpu_hash_keys = NULL;
}
}
enum pva_error pva_kmd_allowlist_parse(struct pva_kmd_device *pva)
{
struct pva_vpu_auth *pva_auth = pva->pva_auth;
enum pva_error err = PVA_SUCCESS;
uint8_t *data = NULL;
uint64_t size = 0;
struct vpu_hash_key_pair *vhashk;
size_t vkey_size = 0;
size_t vhash_size = 0;
ASSERT(pva_auth != NULL);
//Destroy previously parsed allowlist data
pva_kmd_allowlist_destroy(pva_auth);
err = pva_kmd_auth_allowlist_load(
pva, pva_auth->pva_auth_allowlist_path, &data, &size);
if (err != PVA_SUCCESS) {
if (data != NULL) {
pva_kmd_free(data);
}
goto fail;
}
vhashk = (struct vpu_hash_key_pair *)pva_kmd_zalloc(
sizeof(struct vpu_hash_key_pair));
if (vhashk == NULL) {
pva_kmd_log_err("Unable to allocate memory");
pva_kmd_free(data);
err = PVA_NOMEM;
goto fail;
}
vhashk->ptr_file_data = data;
vhashk->num_keys = ((uint32_t *)(uintptr_t)data)[0];
vhashk->psha_key =
(struct shakey *)(uintptr_t)(data + sizeof(uint32_t));
vkey_size = sizeof(struct shakey) * (vhashk->num_keys);
vhashk->num_hashes = ((uint32_t *)(uintptr_t)((char *)vhashk->psha_key +
vkey_size))[0];
vhashk->pvpu_hash_vector =
(struct vpu_hash_vector
*)(uintptr_t)((char *)(vhashk->psha_key) + vkey_size +
sizeof(uint32_t));
vhash_size = sizeof(struct vpu_hash_vector) * (vhashk->num_hashes);
if ((sizeof(uint32_t) + sizeof(uint32_t) + vkey_size + vhash_size) !=
size) {
pva_kmd_free(data);
pva_kmd_free(vhashk);
err = PVA_EACCES;
goto fail;
}
pva_auth->pva_auth_allow_list_parsed = true;
pva_auth->vpu_hash_keys = vhashk;
fail:
return err;
}

View File

@@ -0,0 +1,77 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef PVA_KMD_VPU_APP_AUTH_H
#define PVA_KMD_VPU_APP_AUTH_H
#include "pva_kmd_shim_vpu_app_auth.h"
#include "pva_kmd_mutex.h"
/**
* Maximum length of allowlist file path
*/
#define ALLOWLIST_FILE_LEN 128U
/**
* Size of sha256 keys in bytes.
*/
#define NVPVA_SHA256_DIGEST_SIZE 32U
struct pva_kmd_device;
/**
* Array of all VPU Hash'es
*/
struct vpu_hash_vector {
/*! Number of Keys for this crc32_hash */
uint32_t count;
/*! Starting Index into Keys Array */
uint32_t index;
/*! CRC32 hash value */
uint32_t crc32_hash;
};
/**
* Stores sha256 key
*/
struct shakey {
/** 256-bit (32 Bytes) SHA Key */
uint8_t sha_key[NVPVA_SHA256_DIGEST_SIZE];
};
/**
* Stores Hash Vector and Keys vector
*/
struct vpu_hash_key_pair {
/*! Total number of Keys in binary file */
uint32_t num_keys;
/*! pointer to SHA keys Array. */
struct shakey *psha_key;
/*! Total number of Hashes in binary file */
uint32_t num_hashes;
/*! pointer to Array of Hash'es */
struct vpu_hash_vector *pvpu_hash_vector;
/*! pointer to data loaded from file (QNX Specific)*/
uint8_t *ptr_file_data;
};
/**
* Stores all the information related to pva vpu elf authentication.
*/
struct pva_vpu_auth {
/** Stores crc32-sha256 of ELFs */
struct vpu_hash_key_pair *vpu_hash_keys;
pva_kmd_mutex_t allow_list_lock;
/** Flag to check if allowlist is enabled */
bool pva_auth_enable;
/** Flag to track if the allow list is already parsed */
bool pva_auth_allow_list_parsed;
/** Stores the path to allowlist binary file. */
char pva_auth_allowlist_path[ALLOWLIST_FILE_LEN + 1U];
};
enum pva_error pva_kmd_init_vpu_app_auth(struct pva_kmd_device *pva, bool ena);
enum pva_error pva_kmd_verify_exectuable_hash(struct pva_kmd_device *pva,
uint8_t *dataptr, size_t size);
enum pva_error pva_kmd_allowlist_parse(struct pva_kmd_device *pva);
#endif

View File

@@ -0,0 +1,128 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "pva_kmd_device.h"
#include "pva_math_utils.h"
#include "pva_kmd_vpu_ocd.h"
#include "pva_kmd_silicon_utils.h"
#define PVA_DEBUG_APERTURE_INDEX 1U
int pva_kmd_vpu_ocd_open(struct pva_kmd_device *dev)
{
int retval = 0;
enum pva_error err;
err = pva_kmd_device_busy(dev);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"pva_kmd_vpu_ocd_open pva_kmd_device_busy failed");
retval = -1;
goto out;
}
out:
return retval;
}
int pva_kmd_vpu_ocd_release(struct pva_kmd_device *dev)
{
pva_kmd_device_idle(dev);
return 0;
}
int64_t pva_kmd_vpu_ocd_write(struct pva_kmd_device *dev, void *file_data,
const uint8_t *data, uint64_t offset,
uint64_t size)
{
struct pva_vpu_ocd_write_param write_param;
uint32_t i;
unsigned long retval;
uint32_t reg_offset;
uint32_t const *vpu_ocd_offset = (uint32_t *)file_data;
retval = pva_kmd_copy_data_from_user(&write_param, data,
sizeof(write_param));
if (retval != 0u) {
pva_kmd_log_err("Failed to copy write buffer from user");
return -1;
}
if (write_param.n_write > VPU_OCD_MAX_NUM_DATA_ACCESS) {
pva_kmd_log_err_u64("pva: too many vpu dbg reg write",
write_param.n_write);
return -1;
}
/* Write instruction first */
pva_kmd_aperture_write(dev, PVA_DEBUG_APERTURE_INDEX, *vpu_ocd_offset,
write_param.instr);
/*
* Write data
* if there's 1 word, write to addr 0x4,
* if there's 2 words, write to addr 2 * 0x4,
* ...
*/
reg_offset = safe_addu32((uint32_t)*vpu_ocd_offset,
safe_mulu32(write_param.n_write,
(uint32_t)sizeof(uint32_t)));
for (i = 0u; i < write_param.n_write; i++) {
pva_kmd_aperture_write(dev, PVA_DEBUG_APERTURE_INDEX,
reg_offset, write_param.data[i]);
}
return 0;
}
int64_t pva_kmd_vpu_ocd_read(struct pva_kmd_device *dev, void *file_data,
uint8_t *data, uint64_t offset, uint64_t size)
{
struct pva_vpu_ocd_read_param read_param;
unsigned long retval;
uint32_t i;
uint32_t reg_offset;
uint32_t const *vpu_ocd_offset = (uint32_t *)file_data;
retval = pva_kmd_copy_data_from_user(&read_param, data,
sizeof(read_param));
if (retval != 0u) {
pva_kmd_log_err("failed to copy read buffer from user");
return -1;
}
if (read_param.n_read > VPU_OCD_MAX_NUM_DATA_ACCESS) {
pva_kmd_log_err_u64("pva: too many vpu dbg reg read",
read_param.n_read);
return -1;
}
/*
* Read data
* if there's 1 word, read from addr 0x4,
* if there's 2 words, read from addr 2 * 0x4,
* ...
*/
reg_offset = safe_addu32((uint32_t)*vpu_ocd_offset,
safe_mulu32(read_param.n_read,
(uint32_t)sizeof(uint32_t)));
for (i = 0; i < read_param.n_read; i++) {
read_param.data[i] = pva_kmd_aperture_read(
dev, PVA_DEBUG_APERTURE_INDEX, reg_offset);
}
retval = pva_kmd_copy_data_to_user(data, &read_param,
sizeof(read_param));
if (retval != 0u) {
pva_kmd_log_err("failed to copy read buffer to user");
return -1;
}
return 0;
}

View File

@@ -0,0 +1,36 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_VPU_OCD_H
#define PVA_KMD_VPU_OCD_H
#define VPU_OCD_MAX_NUM_DATA_ACCESS 7U
struct pva_vpu_ocd_write_param {
uint32_t instr;
uint32_t n_write;
uint32_t data[VPU_OCD_MAX_NUM_DATA_ACCESS];
};
struct pva_vpu_ocd_read_param {
uint32_t n_read;
uint32_t data[VPU_OCD_MAX_NUM_DATA_ACCESS];
};
int64_t pva_kmd_vpu_ocd_read(struct pva_kmd_device *dev, void *file_data,
uint8_t *data, uint64_t offset, uint64_t size);
int64_t pva_kmd_vpu_ocd_write(struct pva_kmd_device *dev, void *file_data,
const uint8_t *data, uint64_t offset,
uint64_t size);
int pva_kmd_vpu_ocd_open(struct pva_kmd_device *dev);
int pva_kmd_vpu_ocd_release(struct pva_kmd_device *dev);
#endif

View File

@@ -0,0 +1,40 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_PLAT_FAULTS_H
#define PVA_PLAT_FAULTS_H
#include "pva_kmd_shim_utils.h"
#define ASSERT(x) \
if (!(x)) { \
pva_kmd_print_str_u64("PVA KMD ASSERT at " __FILE__, \
__LINE__); \
pva_kmd_fault(); \
}
#define FAULT(msg) \
{ \
pva_kmd_print_str_u64("PVA KMD FAULT at " __FILE__, __LINE__); \
pva_kmd_print_str(msg); \
pva_kmd_fault(); \
} \
while (0)
#define ASSERT_WITH_LOC(x, err_file, err_line) \
if (!(x)) { \
pva_kmd_print_str_u64("Error at line", err_line); \
pva_kmd_print_str(err_file); \
pva_kmd_print_str("PVA KMD ASSERT"); \
pva_kmd_fault(); \
}
#endif

View File

@@ -0,0 +1,112 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_DEVICE_MEMORY_H
#define PVA_KMD_DEVICE_MEMORY_H
#include "pva_kmd.h"
#include "pva_api.h"
struct pva_kmd_context;
/**
* @brief KMD device memory structure.
*
* This structure is essentially a base object. More information is needed to
* manage memory allocations but the required information is platform dependent.
* Therefore, each platform will have a derived implementation and this
* structure is just part of it.
*/
struct pva_kmd_device_memory {
uint64_t iova; /**< IOVA address if mapped. Otherwise 0 */
void *va; /**< CPU address if mapped. Otherwise 0. */
uint64_t size; /**< Size of the mapping. */
struct pva_kmd_device *pva; /**< The PVA this memory is mapped to. */
uint32_t smmu_ctx_idx; /**< The SMMU context this memory is mapped to. */
};
/**
* This API is not available in Linux and should not be used by the common code.
*/
struct pva_kmd_device_memory *pva_kmd_device_memory_alloc(uint64_t size);
/**
* Allocate memory and map to both IOVA space and CPU space.
*
* @note We cannot just allocate without mapping or just mapping to one
* space. This restriction comes from the Linux dma_alloc_coherent API, which
* allocates and maps at the same time.
*
* @note iova_access_flag is only supported by QNX implementation.
*
* @param size Size of the allocation
* @param pva The PVA device to map to
* @param iova_access_flags Access flags for IOVA space. PVA_ACCESS_RO or
* PVA_ACCESS_RW. For CPU space, it's always
* read and write.
* @param smmu_ctx_idx The SMMU context to map to
*/
struct pva_kmd_device_memory *
pva_kmd_device_memory_alloc_map(uint64_t size, struct pva_kmd_device *pva,
uint32_t iova_access_flags,
uint32_t smmu_ctx_idx);
/** @brief Acquire memory shared from UMD.
*
* This function takes a shared ownership of the memory allocation so that KMD
* can keep the allocation alive even after UMD closed the memory handle.
*
* @param memory_handle Memory handle passed from user space. On Linux, this is
* a file descriptor associated with dma_buf object. On
* QNX, this is NvRM import ID.
* @param offset Offset into the allocation. This affects the mapped address.
* @param size Size of the mapping, which can be smaller than the size of the
* allocation.
* @param ctx The user from whom we are importing the memory.
*/
struct pva_kmd_device_memory *
pva_kmd_device_memory_acquire(uint64_t memory_handle, uint64_t offset,
uint64_t size, struct pva_kmd_context *ctx);
/**
* @brief Release the memory.
*
* This function frees memory allocated from acquire or alloc_map. If there are
* active CPU mapping or IOVA mapping, this function will unmap them.
*
* @param memory Pointer to the memory to release.
*/
void pva_kmd_device_memory_free(struct pva_kmd_device_memory *memory);
/**
* @brief Map the memory to CPU space.
*/
enum pva_error
pva_kmd_device_memory_cpu_map(struct pva_kmd_device_memory *memory);
/**
* @brief Unmap the memory from CPU space.
*
* Unmap a not mapped memory will trigger abort.
*/
void pva_kmd_device_memory_cpu_unmap(struct pva_kmd_device_memory *memory);
/**
* @brief Map the memory to IOVA space.
*/
enum pva_error
pva_kmd_device_memory_iova_map(struct pva_kmd_device_memory *memory,
struct pva_kmd_device *pva,
uint32_t access_flags, uint32_t smmu_ctx_idx);
/**
* @brief Unmap the memory from IOVA space.
*
* Unmap a not mapped memory will trigger abort.
*/
void pva_kmd_device_memory_iova_unmap(struct pva_kmd_device_memory *memory);
#endif // PVA_KMD_DEVICE_MEMORY_H

View File

@@ -0,0 +1,34 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_SHIM_CCQ_H
#define PVA_KMD_SHIM_CCQ_H
#include "pva_api.h"
struct pva_kmd_device;
/**
* @brief Push a 64 bit entry to CCQ FIFO.
*
* Push low 32 bits first and then high 32 bits.
*
* @note The caller is responsible for checking if CCQ has enough spaces.
*
*/
void pva_kmd_ccq_push(struct pva_kmd_device *pva, uint8_t ccq_id,
uint64_t ccq_entry);
/**
* @brief Get the number of available spaces in the CCQ.
*
* One CCQ entry is 64 bits. One CCQ can hold up to 4 entries. Therefore, this
* function returns values from 0 to 4.
*/
uint32_t pva_kmd_get_ccq_space(struct pva_kmd_device *pva, uint8_t ccq_id);
#endif // PVA_KMD_SHIM_CCQ_H

View File

@@ -0,0 +1,29 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_SHIM_DEBUGFS_H
#define PVA_KMD_SHIM_DEBUGFS_H
#include "pva_api.h"
#include "pva_kmd_tegra_stats.h"
void pva_kmd_debugfs_create_bool(struct pva_kmd_device *pva, const char *name,
bool *val);
void pva_kmd_debugfs_create_u32(struct pva_kmd_device *pva, const char *name,
uint32_t *val);
void pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, const char *name,
struct pva_kmd_file_ops *fops);
void pva_kmd_debugfs_remove_nodes(struct pva_kmd_device *pva);
unsigned long pva_kmd_copy_data_from_user(void *dst, const void *src,
uint64_t size);
unsigned long pva_kmd_copy_data_to_user(void *to, const void *from,
unsigned long size);
unsigned long pva_kmd_strtol(const char *str, int base);
#endif //PVA_KMD_SHIM_DEBUGFS_H

View File

@@ -0,0 +1,64 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_SHIM_INIT_H
#define PVA_KMD_SHIM_INIT_H
#include "pva_api.h"
struct pva_kmd_device;
struct pva_kmd_file_ops;
/* TODO: remove plat_init APIs. We should just pass in plat_data directly to
* pva_kmd_device_create. */
void pva_kmd_device_plat_init(struct pva_kmd_device *pva);
void pva_kmd_device_plat_deinit(struct pva_kmd_device *pva);
void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id,
uint32_t *syncpt_value);
void pva_kmd_get_syncpt_iova(struct pva_kmd_device *pva, uint32_t syncpt_id,
uint64_t *syncpt_iova);
void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva);
/**
* @brief Power on PVA cluster.
*/
enum pva_error pva_kmd_power_on(struct pva_kmd_device *pva);
/**
* @brief Power off PVA cluster.
*/
void pva_kmd_power_off(struct pva_kmd_device *pva);
/**
* @brief Initialize firmware.
*
* This function initializes firmware. On silicon, this includes
* - power on R5,
* - load firmware,
* - bind interrupts,
* - and wait for firmware boot to complete.
*
* @param pva pointer to the PVA device to initialize
*/
enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva);
/**
* @brief De-init firmware.
*
* This function de-initializes firmware. On silicon, this includes
* - free interrupts,
* - power off R5,
* - and free firmware memories.
*
* @param pva pointer to the PVA device to de-initialize
*/
void pva_kmd_deinit_fw(struct pva_kmd_device *pva);
#endif // PVA_KMD_SHIM_INIT_H

View File

@@ -0,0 +1,142 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_SHIM_SILICON_H
#define PVA_KMD_SHIM_SILICON_H
#include "pva_api.h"
#include "pva_kmd_regs.h"
struct pva_kmd_device;
/**
* @file This file defines silicon APIs.
*
* Silicon APIs are only implemented by platforms that closely resemble the
* silicon PVA, a.k.a Linux, QNX and SIM platforms. Silicon APIs are used to
* implement message APIs and some init APIs.
*
* On native platform, message APIs are implemented differently. Therefore,
* native platform does not need to implement silicon APIs.
*/
/**
* @brief Write to a register in a MMIO region.
*
* @param pva pointer the PVA cluser.
* @param aperture the MMIO region.
* @param addr the register offset in the MMIO region.
* @param val value to write.
*/
void pva_kmd_aperture_write(struct pva_kmd_device *pva,
enum pva_kmd_reg_aperture aperture, uint32_t addr,
uint32_t val);
/**
* @brief Read from a register in a MMIO region.
*
* @param pva pointer the PVA cluser.
* @param aperture the MMIO region.
* @param addr the register offset in the MMIO region.
*
* @return the value of the register.
*/
uint32_t pva_kmd_aperture_read(struct pva_kmd_device *pva,
enum pva_kmd_reg_aperture aperture,
uint32_t addr);
/**
* @brief PVA's interrupt lines.
*/
enum pva_kmd_intr_line {
/** Interrupt line from SEC block. We receive mailbox interrupts from
* this line. */
PVA_KMD_INTR_LINE_SEC_LIC = 0,
PVA_KMD_INTR_LINE_CCQ0,
PVA_KMD_INTR_LINE_CCQ1,
PVA_KMD_INTR_LINE_CCQ2,
PVA_KMD_INTR_LINE_CCQ3,
PVA_KMD_INTR_LINE_CCQ4,
PVA_KMD_INTR_LINE_CCQ5,
PVA_KMD_INTR_LINE_CCQ6,
PVA_KMD_INTR_LINE_CCQ7,
PVA_KMD_INTR_LINE_COUNT,
};
/**
* @brief Interrupt handler function prototype.
*/
typedef void (*pva_kmd_intr_handler_t)(void *data);
/**
* @brief Bind an interrupt handler to an interrupt line.
*
* Interrupt will be enabled after binding.
*/
enum pva_error pva_kmd_bind_intr_handler(struct pva_kmd_device *pva,
enum pva_kmd_intr_line intr_line,
pva_kmd_intr_handler_t handler,
void *data);
/**
* @brief Enable an interrupt line.
*/
void pva_kmd_enable_intr(struct pva_kmd_device *pva,
enum pva_kmd_intr_line intr_line);
/**
* @brief Disable an interrupt line.
*/
void pva_kmd_disable_intr(struct pva_kmd_device *pva,
enum pva_kmd_intr_line intr_line);
/**
* @brief Free an interrupt line.
*
* This will disable the interrupt line and unbind the handler.
*/
void pva_kmd_free_intr(struct pva_kmd_device *pva,
enum pva_kmd_intr_line intr_line);
/**
* @brief Read firmware binary from file system.
*
* Firmware binary is loaded into pva->fw_bin_mem, which is directly accessible
* by R5.
*
* KMD will free pva->fw_bin_mem during firmware deinit.
*/
enum pva_error pva_kmd_read_fw_bin(struct pva_kmd_device *pva);
/**
* @brief Get base address of read only syncpoints.
*/
uint32_t pva_kmd_get_syncpt_ro_offset(struct pva_kmd_device *pva);
/**
* @brief Get base address of read write syncpoints.
*/
uint32_t pva_kmd_get_syncpt_rw_offset(struct pva_kmd_device *pva);
/**
* @brief Configure EVP, Segment config registers and SCR registers.
*
* This function configures the EVP, Segment config registers and SCR registers.
*
* @param pva Pointer to the PVA device.
*/
void pva_kmd_config_evp_seg_scr_regs(struct pva_kmd_device *pva);
/**
* @brief Configure SID registers.
*
* This function configures the SID registers.
*
* @param pva Pointer to the PVA device.
*/
void pva_kmd_config_sid_regs(struct pva_kmd_device *pva);
#endif // PVA_KMD_SHIM_SILICON_H

View File

@@ -0,0 +1,72 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_SHIM_UTILS_H
#define PVA_KMD_SHIM_UTILS_H
#include "pva_api.h"
/**
* @brief Allocate memory for KMD's private use.
*
* Memory will be zero initialized.
*/
void *pva_kmd_zalloc(uint64_t size);
/**
* @brief Free memory allocated by pva_kmd_zalloc.
*/
void pva_kmd_free(void *ptr);
/**
* @brief Print a string.
*
* This function is used for logging errors, enabled even in safety environment.
* For debug print, use pva_dbg_printf.
*
* @param str The string to print.
*/
void pva_kmd_print_str(const char *str);
/**
* @brief Print a string followed by a 64-bit unsigned number.
*
* This function is used for logging errors, enabled even in safety environment.
* For debug print, use pva_dbg_printf.
*
* @param str The string to print.
* @param n The number to print.
*/
void pva_kmd_print_str_u64(const char *str, uint64_t n);
/**
* @brief Fault KMD.
*
* Abort KMD due to critical unrecoverable error.
*/
void pva_kmd_fault(void) __attribute__((noreturn));
/**
* @brief Sleep for some microseconds.
*
* @param us The number of microseconds to sleep.
*/
void pva_kmd_sleep_us(uint64_t us);
#if defined(__KERNEL__)
#include <linux/nospec.h>
#else
static inline uint32_t array_index_nospec(uint32_t index, uint32_t size)
{
return index < size ? index : 0;
}
#endif
#endif // PVA_KMD_SHIM_UTILS_H

View File

@@ -0,0 +1,17 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2021-2023, NVIDIA Corporation. All rights reserved.
*/
#ifndef PVA_KMD_SHIM_VPU_APP_AUTH_H
#define PVA_KMD_SHIM_VPU_APP_AUTH_H
#include "pva_api_types.h"
struct pva_kmd_device;
const char *pva_kmd_get_default_allowlist(void);
enum pva_error pva_kmd_auth_allowlist_load(struct pva_kmd_device *pva,
const char *file_name,
uint8_t **hash_keys_data,
uint64_t *psize);
#endif

View File

@@ -0,0 +1,69 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_THREAD_SEMA_H
#define PVA_KMD_THREAD_SEMA_H
#include "pva_api.h"
#if defined(__KERNEL__) /* For Linux */
#include <linux/semaphore.h>
typedef struct semaphore pva_kmd_sema_t;
#else /* For user space code, including QNX KMD */
#include <semaphore.h>
/* Mutex */
typedef sem_t pva_kmd_sema_t;
#endif
/**
* @brief Initialize a semaphore.
*
* @param sem Pointer to the semaphore.
* @param val Initial value of the semaphore.
*/
void pva_kmd_sema_init(pva_kmd_sema_t *sem, uint32_t val);
/**
* @brief Wait on a semaphore.
*
* Decrement the semaphore count. If the count is zero, the caller will block
* until the semaphore is posted or the timeout expires.
*
* @param sem Pointer to the semaphore.
* @param timeout_ms Timeout in milliseconds.
*
* @retval PVA_SUCCESS if the semaphore was successfully acquired.
* @retval PVA_TIMEDOUT if the semaphore was not acquired within the timeout.
*/
enum pva_error pva_kmd_sema_wait_timeout(pva_kmd_sema_t *sem,
uint32_t timeout_ms);
/**
* @brief Signal a semaphore.
*
* Increment the semaphore count.
*
* @param sem Pointer to the semaphore.
*/
void pva_kmd_sema_post(pva_kmd_sema_t *sem);
/**
* @brief Deinitialize a semaphore.
*
* @param sem Pointer to the semaphore.
*/
void pva_kmd_sema_deinit(pva_kmd_sema_t *sem);
#endif // PVA_KMD_THREAD_SEMA_H

View File

@@ -0,0 +1,183 @@
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_H
#define PVA_KMD_H
#include "pva_api.h"
#include "pva_fw.h"
#include "pva_constants.h"
#include "pva_math_utils.h"
/* KMD API: context init */
struct pva_kmd_context_init_in_args {
uint32_t resource_table_capacity;
};
struct pva_kmd_context_init_out_args {
enum pva_error error;
uint64_t ccq_shm_hdl;
};
struct pva_kmd_syncpt_register_out_args {
enum pva_error error;
uint32_t syncpt_ro_res_id;
uint32_t syncpt_rw_res_id;
uint32_t synpt_size;
uint32_t synpt_ids[PVA_NUM_RW_SYNCPTS_PER_CONTEXT];
uint32_t num_ro_syncpoints;
};
/**
* Calculates the total memory size required for a PVA submission queue.
* This includes the size of the queue header and the combined size of all command buffer submission info structures.
*
* @param x The number of command buffer submission info structures.
* @return The total memory size in bytes.
*/
static inline uint32_t pva_get_submission_queue_memory_size(uint32_t x)
{
uint32_t submit_info_size =
(uint32_t)sizeof(struct pva_fw_cmdbuf_submit_info);
uint32_t num_submit_infos = safe_mulu32(x, submit_info_size);
uint32_t header_size =
(uint32_t)sizeof(struct pva_fw_submit_queue_header);
return safe_addu32(header_size, num_submit_infos);
}
/* KMD API: queue create */
struct pva_kmd_queue_create_in_args {
uint32_t max_submission_count;
uint64_t queue_memory_handle;
uint64_t queue_memory_offset;
};
struct pva_kmd_queue_create_out_args {
enum pva_error error;
uint32_t queue_id;
uint32_t syncpt_fence_counter;
};
/* KMD API: queue destroy */
struct pva_kmd_queue_destroy_in_args {
uint32_t queue_id;
};
struct pva_kmd_queue_destroy_out_args {
enum pva_error error;
};
struct pva_kmd_memory_register_in_args {
enum pva_memory_segment segment;
uint32_t access_flags;
uint64_t memory_handle;
uint64_t offset;
uint64_t size;
};
/* KMD API: executable */
struct pva_kmd_executable_register_in_args {
uint32_t size;
};
struct pva_kmd_executable_get_symbols_in_args {
uint32_t exec_resource_id;
};
struct pva_kmd_executable_get_symbols_out_args {
enum pva_error error;
uint32_t num_symbols;
/* Followed by <num_symbols> of struct pva_symbol_info */
};
/* KMD API: DMA config */
struct pva_kmd_dma_config_register_in_args {
struct pva_dma_config_header dma_config_header;
/* Followed by hwseq words, channels, descriptors, etc. */
};
struct pva_kmd_register_out_args {
enum pva_error error;
uint32_t resource_id;
};
struct pva_kmd_exec_register_out_args {
enum pva_error error;
uint32_t resource_id;
uint32_t num_symbols;
};
struct pva_kmd_unregister_in_args {
uint32_t resource_id;
};
enum pva_kmd_op_type {
PVA_KMD_OP_CONTEXT_INIT,
PVA_KMD_OP_QUEUE_CREATE,
PVA_KMD_OP_QUEUE_DESTROY,
PVA_KMD_OP_EXECUTABLE_GET_SYMBOLS,
PVA_KMD_OP_MEMORY_REGISTER,
PVA_KMD_OP_SYNPT_REGISTER,
PVA_KMD_OP_EXECUTABLE_REGISTER,
PVA_KMD_OP_DMA_CONFIG_REGISTER,
PVA_KMD_OP_UNREGISTER,
PVA_KMD_OP_MAX,
};
/**
* The header of a KMD operation
*/
struct pva_kmd_op_header {
enum pva_kmd_op_type op_type; /**< Type of the KMD operation */
};
/**
* The header of a KMD response
*/
struct pva_kmd_response_header {
uint32_t rep_size; /** Size of the response, including the header */
};
enum pva_kmd_ops_mode {
/**
* Only one operation is allowed. The
* operation will be done synchronously.
* KMD will wait for the fence if
* necessary. */
PVA_KMD_OPS_MODE_SYNC,
/**
* A list of registration operations are allowed. These operations will
* trigger a post fence. KMD will not wait for the fence.
*/
PVA_KMD_OPS_MODE_ASYNC,
};
/**
* A buffer contains a list of KMD operations and a post fence.
*
* In general, the list of KMD operations contain jobs that need to be done by
* the KMD and FW. KMD will first perform its part and then submit a privileged
* command buffer to FW. FW will trigger the provided post fence when done.
*
* NOTE: Starting address of every struct/array in the buffer must be aligned to
* 8 bytes.
*/
struct pva_kmd_operations {
enum pva_kmd_ops_mode mode;
struct pva_fw_postfence postfence;
/** Followed by a list of KMD operation(s) */
};
/* Max op buffer sizer is 8 MB */
#define PVA_KMD_MAX_OP_BUFFER_SIZE (8 * 1024 * 1024)
/* Max respone size is 8 KB */
#define PVA_KMD_MAX_RESP_BUFFER_SIZE (8 * 1024)
#endif // PVA_KMD_H

View File

@@ -0,0 +1,19 @@
################################### tell Emacs this is a -*- makefile-gmake -*-
#
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
###############################################################################
obj-m := pva_kmd_linux.o
pva_kmd_linux-objs += ${PVA_KMD_LINUX_SRC}
ccflags-y += ${PVA_KMD_LINUX_INC}
ccflags-y += ${PVA_KMD_LINUX_DEF}
ccflags-y += ${PVA_KMD_LINUX_CFLAGS}
ccflags-y += -std=gnu11

View File

@@ -0,0 +1,39 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef PVA_KMD_LINUX_H
#define PVA_KMD_LINUX_H
#include "pva_kmd.h"
#define PVA_LINUX_DEV_PATH_PREFIX "/dev/nvhost-ctrl-pva"
#define NVPVA_IOCTL_MAGIC 'Q'
#define PVA_KMD_IOCTL_GENERIC \
_IOWR(NVPVA_IOCTL_MAGIC, 1, struct pva_kmd_linux_ioctl_header)
#define NVPVA_IOCTL_MAX_SIZE 256 //Temp value which can be updated later
struct nvpva_ioctl_part {
void *addr;
uint64_t size;
};
/**
* The header of request to KMD
*/
struct pva_kmd_linux_ioctl_header {
struct nvpva_ioctl_part request;
struct nvpva_ioctl_part response;
};
#endif // PVA_KMD_LINUX_H

View File

@@ -0,0 +1,145 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include <linux/debugfs.h>
#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/nvhost.h>
#include <uapi/linux/tegra-soc-hwpm-uapi.h>
#include "pva_kmd_linux.h"
#include "pva_kmd_linux_device.h"
#include "pva_kmd_debugfs.h"
static int pva_handle_fops(struct seq_file *s, void *data)
{
return 0;
}
static int debugfs_node_open(struct inode *inode, struct file *file)
{
int retval;
struct pva_kmd_file_ops *fops = file_inode(file)->i_private;
retval = single_open(file, pva_handle_fops, inode->i_private);
if (retval != 0) {
pva_kmd_log_err("debugfs_node_open single_open failed");
goto out;
}
if (fops->open != NULL) {
retval = fops->open(fops->pdev);
}
out:
return retval;
}
static int debugfs_node_release(struct inode *inode, struct file *file)
{
int retval;
struct pva_kmd_file_ops *fops = file_inode(file)->i_private;
if (fops->release != NULL) {
retval = fops->release(fops->pdev);
if (retval != 0) {
pva_kmd_log_err("debugfs_node_release release failed");
goto out;
}
}
retval = single_release(inode, file);
out:
return retval;
}
static long int debugfs_node_read(struct file *file, char *data,
long unsigned int size, long long int *offset)
{
int64_t retval;
struct pva_kmd_file_ops *fops = file_inode(file)->i_private;
retval = fops->read(fops->pdev, fops->file_data, data, *offset, size);
return retval;
}
static long int debugfs_node_write(struct file *file, const char *data,
long unsigned int size,
long long int *offset)
{
long int retval;
struct pva_kmd_file_ops *fops = file_inode(file)->i_private;
retval = fops->write(fops->pdev, fops->file_data, data, *offset, size);
return retval;
}
static const struct file_operations pva_linux_debugfs_fops = {
.open = debugfs_node_open,
.read = debugfs_node_read,
.write = debugfs_node_write,
.release = debugfs_node_release,
};
void pva_kmd_debugfs_create_bool(struct pva_kmd_device *pva, const char *name,
bool *pdata)
{
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *props = device_data->pva_device_properties;
struct dentry *de = props->debugfs;
debugfs_create_bool(name, 0644, de, pdata);
}
void pva_kmd_debugfs_create_u32(struct pva_kmd_device *pva, const char *name,
uint32_t *pdata)
{
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *props = device_data->pva_device_properties;
struct dentry *de = props->debugfs;
debugfs_create_u32(name, 0644, de, pdata);
}
void pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, const char *name,
struct pva_kmd_file_ops *pvafops)
{
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *props = device_data->pva_device_properties;
struct dentry *de = props->debugfs;
struct file_operations *fops =
(struct file_operations *)&pva_linux_debugfs_fops;
struct dentry *file;
file = debugfs_create_file(name, 0644, de, pvafops, fops);
ASSERT(file != NULL);
}
void pva_kmd_debugfs_remove_nodes(struct pva_kmd_device *pva)
{
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *props = device_data->pva_device_properties;
struct dentry *de = props->debugfs;
debugfs_lookup_and_remove("stats_enable", de);
debugfs_lookup_and_remove("vpu_debug", de);
debugfs_lookup_and_remove("profile_level", de);
debugfs_lookup_and_remove("vpu_stats", de);
}

View File

@@ -0,0 +1,390 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include <linux/of.h>
#include <linux/clk.h>
#include <linux/reset.h>
#include <linux/pm_runtime.h>
#include <linux/debugfs.h>
#include <linux/firmware.h>
#include <linux/version.h>
#include <linux/nvhost.h>
#include <linux/nvhost_t194.h>
#include <linux/iommu.h>
#include <linux/dma-mapping.h>
#include <soc/tegra/virt/syscalls.h>
#include <asm/io.h>
#include "pva_kmd_device.h"
#include "pva_kmd_linux_device.h"
#include "pva_kmd_device_memory.h"
#include "pva_kmd_constants.h"
#include "pva_kmd_silicon_utils.h"
#include "pva_kmd_silicon_boot.h"
struct nvhost_device_data *
pva_kmd_linux_device_get_properties(struct platform_device *pdev)
{
struct nvhost_device_data *props = platform_get_drvdata(pdev);
return props;
}
struct pva_kmd_linux_device_data *
pva_kmd_linux_device_get_data(struct pva_kmd_device *device)
{
return (struct pva_kmd_linux_device_data *)device->plat_data;
}
void pva_kmd_linux_device_set_data(struct pva_kmd_device *device,
struct pva_kmd_linux_device_data *data)
{
device->plat_data = (void *)data;
}
void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id,
uint32_t *syncpt_value)
{
int err = 0;
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *props = device_data->pva_device_properties;
err = nvhost_syncpt_read_ext_check(props->pdev, syncpt_id,
syncpt_value);
if (err < 0) {
FAULT("Failed to read syncpoint value\n");
}
}
void pva_kmd_get_syncpt_iova(struct pva_kmd_device *pva, uint32_t syncpt_id,
uint64_t *syncpt_iova)
{
uint32_t offset = 0;
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *props = device_data->pva_device_properties;
struct platform_device *host_pdev =
to_platform_device(props->pdev->dev.parent);
offset = nvhost_syncpt_unit_interface_get_byte_offset_ext(host_pdev,
syncpt_id);
*syncpt_iova = safe_addu64(pva->syncpt_ro_iova, (uint64_t)offset);
}
void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva)
{
phys_addr_t base;
size_t size;
int err = 0;
uint32_t syncpt_page_size;
uint32_t syncpt_offset[PVA_NUM_RW_SYNCPTS];
dma_addr_t sp_start;
struct platform_device *host_pdev;
struct device *dev;
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *props = device_data->pva_device_properties;
nvhost_syncpt_unit_interface_init(props->pdev);
host_pdev = to_platform_device(props->pdev->dev.parent);
err = nvhost_syncpt_unit_interface_get_aperture(host_pdev, &base,
&size);
if (err < 0) {
FAULT("Failed to get syncpt aperture\n");
}
/** Get page size of a syncpoint */
syncpt_page_size =
nvhost_syncpt_unit_interface_get_byte_offset_ext(host_pdev, 1);
dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
if (iommu_get_domain_for_dev(dev)) {
sp_start = dma_map_resource(dev, base, size, DMA_TO_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(dev, sp_start)) {
FAULT("Failed to pin RO syncpoints\n");
}
} else {
FAULT("Failed to pin RO syncpoints\n");
}
pva->syncpt_ro_iova = sp_start;
pva->syncpt_offset = syncpt_page_size;
pva->num_syncpts = (size / syncpt_page_size);
for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
pva->syncpt_rw[i].syncpt_id = nvhost_get_syncpt_client_managed(
props->pdev, "pva_syncpt");
if (pva->syncpt_rw[i].syncpt_id == 0) {
FAULT("Failed to get syncpt\n");
}
syncpt_offset[i] =
nvhost_syncpt_unit_interface_get_byte_offset_ext(
host_pdev, pva->syncpt_rw[i].syncpt_id);
err = nvhost_syncpt_read_ext_check(
props->pdev, pva->syncpt_rw[i].syncpt_id,
&pva->syncpt_rw[i].syncpt_value);
if (err < 0) {
FAULT("Failed to read syncpoint value\n");
}
}
pva->syncpt_rw_iova =
dma_map_resource(dev,
safe_addu64(base, (uint64_t)syncpt_offset[0]),
safe_mulu64((uint64_t)pva->syncpt_offset,
(uint64_t)PVA_NUM_RW_SYNCPTS),
DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(dev, pva->syncpt_rw_iova)) {
FAULT("Failed to pin RW syncpoints\n");
}
pva->syncpt_rw[0].syncpt_iova = pva->syncpt_rw_iova;
for (uint32_t i = 1; i < PVA_NUM_RW_SYNCPTS; i++) {
if (safe_addu32(syncpt_offset[i - 1], pva->syncpt_offset) !=
syncpt_offset[i]) {
FAULT("RW syncpts are not contiguous\n");
}
pva->syncpt_rw[i].syncpt_iova = safe_addu64(
pva->syncpt_rw_iova,
safe_mulu64((uint64_t)pva->syncpt_offset, (uint64_t)i));
}
}
void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva)
{
}
void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva)
{
int err = 0;
phys_addr_t base;
size_t size;
struct device *dev;
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *props = device_data->pva_device_properties;
struct platform_device *host_pdev =
to_platform_device(props->pdev->dev.parent);
err = nvhost_syncpt_unit_interface_get_aperture(host_pdev, &base,
&size);
if (err < 0) {
FAULT("Failed to get syncpt aperture\n");
}
dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
if (iommu_get_domain_for_dev(dev)) {
dma_unmap_resource(dev, pva->syncpt_ro_iova, size,
DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
dma_unmap_resource(dev, pva->syncpt_rw_iova,
safe_mulu64((uint64_t)pva->syncpt_offset,
(uint64_t)PVA_NUM_RW_SYNCPTS),
DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
} else {
FAULT("Failed to unmap syncpts\n");
}
for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
nvhost_syncpt_put_ref_ext(props->pdev,
pva->syncpt_rw[i].syncpt_id);
pva->syncpt_rw[i].syncpt_id = 0;
pva->syncpt_rw[i].syncpt_iova = 0;
pva->syncpt_rw[i].syncpt_value = 0;
}
pva->syncpt_ro_iova = 0;
pva->syncpt_rw_iova = 0;
pva->syncpt_offset = 0;
nvhost_syncpt_unit_interface_deinit(props->pdev);
}
void pva_kmd_device_plat_init(struct pva_kmd_device *pva)
{
struct pva_kmd_linux_device_data *plat_data =
pva_kmd_zalloc_nofail(sizeof(struct pva_kmd_linux_device_data));
pva_kmd_linux_device_set_data(pva, plat_data);
/* Get SMMU context devices that were probed earlier and their SIDs */
pva_kmd_linux_device_smmu_contexts_init(pva);
}
void pva_kmd_device_plat_deinit(struct pva_kmd_device *pva)
{
pva_kmd_linux_host1x_deinit(pva);
pva_kmd_free(pva_kmd_linux_device_get_data(pva));
}
enum pva_error pva_kmd_power_on(struct pva_kmd_device *pva)
{
int err = 0;
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *props = device_data->pva_device_properties;
err = pm_runtime_get_sync(&props->pdev->dev);
if (err < 0) {
pm_runtime_put_noidle(&props->pdev->dev);
goto out;
}
/* Power management operation is asynchronous. PVA may not be power
* cycled between power_off -> power_on call. Therefore, we need to
* reset it here to make sure it is in a clean state. */
reset_control_acquire(props->reset_control);
reset_control_reset(props->reset_control);
reset_control_release(props->reset_control);
out:
return kernel_err2pva_err(err);
}
void pva_kmd_power_off(struct pva_kmd_device *pva)
{
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *props = device_data->pva_device_properties;
pm_runtime_mark_last_busy(&props->pdev->dev);
pm_runtime_put(&props->pdev->dev);
/* Power management operation is asynchronous. We don't control when PVA
* will really be powered down. However, we need to free memories after
* this call. Therefore, we assert the reset line to stop PVA from any
* further activity. */
reset_control_acquire(props->reset_control);
reset_control_assert(props->reset_control);
reset_control_release(props->reset_control);
}
uint32_t pva_kmd_get_syncpt_ro_offset(struct pva_kmd_device *pva)
{
return safe_subu64(pva->syncpt_ro_iova, FW_SHARED_MEMORY_START);
}
uint32_t pva_kmd_get_syncpt_rw_offset(struct pva_kmd_device *pva)
{
return safe_subu64(pva->syncpt_rw_iova, FW_SHARED_MEMORY_START);
}
enum pva_error pva_kmd_read_fw_bin(struct pva_kmd_device *pva)
{
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *device_props =
device_data->pva_device_properties;
struct pva_kmd_device_memory *fw_bin_mem;
const struct firmware *fw_ucode;
int kerr = request_firmware(&fw_ucode, device_props->firmware_name,
&device_props->pdev->dev);
if (kerr < 0) {
err = kernel_err2pva_err(kerr);
goto out;
}
fw_bin_mem = pva_kmd_device_memory_alloc_map(
safe_pow2_roundup_u64(fw_ucode->size, SIZE_4KB), pva,
PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
if (fw_bin_mem == NULL) {
err = PVA_NOMEM;
goto release;
}
memcpy(fw_bin_mem->va, fw_ucode->data, fw_ucode->size);
pva->fw_bin_mem = fw_bin_mem;
release:
release_firmware(fw_ucode);
out:
return err;
}
void pva_kmd_aperture_write(struct pva_kmd_device *pva,
enum pva_kmd_reg_aperture aperture, uint32_t reg,
uint32_t val)
{
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *device_props =
device_data->pva_device_properties;
void __iomem *addr = device_props->aperture[aperture] + reg;
writel(val, addr);
}
uint32_t pva_kmd_aperture_read(struct pva_kmd_device *pva,
enum pva_kmd_reg_aperture aperture, uint32_t reg)
{
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvhost_device_data *device_props =
device_data->pva_device_properties;
void __iomem *addr = device_props->aperture[aperture] + reg;
return readl(addr);
}
enum pva_error kernel_err2pva_err(int err)
{
if (err >= 0) {
return PVA_SUCCESS;
}
switch (err) {
case -EINVAL:
return PVA_INVAL;
case -EINTR:
return PVA_EINTR;
default:
return PVA_UNKNOWN_ERROR;
}
}
unsigned long pva_kmd_copy_data_from_user(void *dst, const void *src,
uint64_t size)
{
return copy_from_user(dst, src, size);
}
unsigned long pva_kmd_copy_data_to_user(void __user *to, const void *from,
unsigned long size)
{
return copy_to_user(to, from, size);
}
unsigned long pva_kmd_strtol(const char *str, int base)
{
unsigned long val;
int ret;
ret = kstrtoul(str, base, &val);
if (ret < 0)
return 0;
return val;
}
/* TODO: Enable HVC call once HVC fix is available on dev-main */
//static void pva_kmd_config_regs(void)
//{
//bool hv_err = true;
//hv_err = hyp_pva_config_regs();
//ASSERT(hv_err == true);
//ASSERT(false);
//}
void pva_kmd_config_evp_seg_scr_regs(struct pva_kmd_device *pva)
{
pva_kmd_config_evp_seg_regs(pva);
pva_kmd_config_scr_regs(pva);
}
void pva_kmd_config_sid_regs(struct pva_kmd_device *pva)
{
pva_kmd_config_sid(pva);
}

Some files were not shown because too many files have changed in this diff Show More