pva: mirror from gitlab cv/pva-sys-sw

Gitlab commit a307885246be7 ("umd: port intf tests to umd - ...")

Changes since last deployment:

- umd: port intf tests to umd - add NegativeTest_MaxContextCreate_Single...
- Remove VPU auth default macro
- kmd: Add a null check to fix static defect
- tests: Fix sync unregistration test
- fw: Handle DMA error when fetching chunk
- umd_tests: add requirements tests
- Fix error path of Linux KMD memory API
- Add kernel code style check script
- address review comments for access flag feat
- fix memory leak in access buffer tests
- kmd: use correct formatting
- fw: update license and use macro
- tests: add tests for buffer access type
- feat: respect buffer access flags
- Update deployment document
- Add a default fence wait timeout
- Fix error path of KMD sync ops submit
- Move recovery state check to pva_kmd_device_busy
- Fix error path of profiling level update
- Increase max CCQ FIFO depth to 14
- kmd: zero initialize all commands
- Make KMD robust against firmware abort
- Add multi user stress tests
- Conditional VMEM Clear Check
- Conditional VMEM Clear Check
- Fix static defects in KMD
- Reading auth for all the PVA devices
- Add support for VPU Device Tree authentication
- UMD: Add Null and range checks
- Remove assert and propogate error
- Add error injection tests
- Bug fix - 5207608
- Update CUPVA tests in CI to 2.7.0 rc3
- tests: add register_exec_noop_with_bit_flip
- fw: Fix static defects
- kmd: freeze PVA before freeing code/data memory
- Add missing test_mode parameter for run_test_plan
- deploy: allow deploying to different branches
- pva kmd: linux: handle test mode dt entry
- fw: baremetal: bound breaklink params
- coverity: Set expiry for code quality report
- kmd: Remove PVA_IS_DEBUG from native timeout calc
- Reduce iterations of long duration UMD tests
- Fix IOVA leak in Linux KMD
- fw:common: fix order of enable/disable virt
- umd_test: add missing requirement test specs
- umd_test: add test for perf spikes
- Fix nsight fence logging
- deploy: fix GVS build failure
- Add FSP Abort Hook
- Execution timeout
- Trace fences for NSIGHT
- Fix shared buffer handling during suspend/resume
- tests: add more tests for resource unregistration
- Add MODS test support
- KMD:Fix static defect
- umd: fix double free in cuextend
- umd: Free pva_memory object on free()
- Unify VPU and PPE syscall ID
- Clean up public API
- cuextend: Cleanup implementation
- cuextend: Add API to get stream payload
- compat: Fix missing flushes of event fences
- cuExtend: Unified code path for stream submit
- cuExtend: Implementation of cuExtend Stream Submit
- cuExtend: Stream submit API definitions
- cuExtend: Sync to new cuExtend header
- Set test mode default through macro
- fw: Add PPE error codes
- Use zalloc when allocating resource record
- Allocate Temporary Buffers for DMA Config Loading
- Fix fast reset failure test
- Add DMA config allocator
- kmd: Add unsafe API for block allocator
- Add missing warning for Linux kernel build
- Set err cmd idx to zero if there's no error
- ci: Run tests for MODS test mode
- Use 1K command buffer chunk size in MODS test mode
- Allow developer to provide its own target lease
- tests: add nvsci prefence_postfence_test
- kmd: Sha calculation static defects fix
- kmd: fix INT30-c static defect
- Fix command index logging for PVA_FW_EVENT_RUN_VPU
- Enable vpucfg_destroy_after_submit
- tests: add tests spec for deterministic test
- test: add cpu_signaller_pva_waiter_deterministic
- tests: add cpu_waiter_pva_signaller_deterministic
- Disable verbosity control of FW log
- Ops free API should accept NULL ptr
- Report TCM usage for t26x as well
- Support non-contiguous syncpoints
- umd: fix new top 25 CWE
- License header update
- L2SRAM flush command changes
- debugfs: disable tests for broken nodes
- debugfs: handle 0 input size for allowlist path
- Move pva_kmd_device_idle to context destroy
- Refactor interrupt handler binding in PVA KMD
- Fix DMA registration error path
- debugfs: Add read support for fw log level
- Add stress test suites to CI
- Fix error path for context init
- Add stress test suites
- umd: add NULL checks
- ci: Perf Test Updates
- ci: perf test updates
- Enable boot from GSC in L4T GVS
- Updating comment

Change-Id: I98be7ec270ba5f6fd5bc0978d084d731a88e70b6
Signed-off-by: abhinayaa <abhinayaa@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3348508
GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
Reviewed-by: Mohnish Jain <mohnishj@nvidia.com>
This commit is contained in:
abhinayaa
2025-04-24 04:43:25 +00:00
committed by Jon Hunter
parent 0ac4068c89
commit 63f6f2f159
69 changed files with 2205 additions and 1891 deletions

View File

@@ -22,6 +22,7 @@ pva_objs += \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_context.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_debugfs.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_device.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_devmem_pool.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_binding.o \
$(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_validate.o \
@@ -70,6 +71,7 @@ pva_inc_flags += \
-I$(PVA_SYS_ABSDIR)/src/kmd/include \
-I$(PVA_SYS_ABSDIR)/src/kmd/linux/include \
-I$(PVA_SYS_ABSDIR)/src/libs/pva/include \
-I$(PVA_SYS_ABSDIR)/src/private_api \
pva_def_flags += \
-DPVA_BUILD_MODE=PVA_BUILD_MODE_L4T \

View File

@@ -224,6 +224,11 @@
#define PVA_ABORT_NOC_BIST (0xfcU)
//! @endcond
/**
* @brief Minor code for abort in case of FSP abort.
*/
#define PVA_ABORT_FSP 0x42U
/** @} */
/**
@@ -299,4 +304,36 @@
#define PVA_ABORT_IRQ_TEST_HOST (0xE002U)
#endif
/** @} */
/**
* @defgroup PVA_ABORT_ARGUMENTS_FSP Argument to pva_abort() from FSP abort
* @ingroup PVA_ABORT_ARGUMENTS
* @{
*/
/**
* @brief Minor Code for FSP aborts because of safertos errors
*/
#define PVA_ABORT_FSP_SAFERTOS (0xE001U)
/**
* @brief Minor Code for FSP aborts because of asserts in fsp
*/
#define PVA_ABORT_FSP_ASSERT (0xE002U)
/**
* @brief Minor Code for FSP aborts because of exception in fsp
*/
#define PVA_ABORT_FSP_EXCEPTION (0xE003U)
/**
* @brief Minor Code for FSP aborts because of stack guard failure
*/
#define PVA_ABORT_FSP_STACK (0xE004U)
/**
* @brief Minor Code for Unknown FSP aborts
*/
#define PVA_ABORT_FSP_UNKNOWN (0xE005U)
/** @} */
#endif

View File

@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
/* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#ifndef PVA_ERRORS_H
#define PVA_ERRORS_H
@@ -154,15 +154,6 @@ typedef uint16_t pva_errors_t;
*/
//! @cond DISABLE_DOCUMENTATION
/**
* @brief Error in case of Floating point NAN.
*/
#define PVA_ERR_PPE_DIVIDE_BY_0 (0x34U)
/**
* @brief Error in case of Floating point NAN.
*/
#define PVA_ERR_PPE_ILLEGAL_DEBUG (0x36U)
#define PVA_ERR_PPE_ILLEGAL_INSTR_ALIGN (0x37U)
/**
@@ -270,40 +261,6 @@ typedef uint16_t pva_errors_t;
* more than HW Seq RAM size.
*/
#define PVA_ERR_DMA_HWSEQ_PROGRAM_TOO_LONG (0x217U)
/** @} */
/**
* @defgroup PVA_MISR_ERRORS
*
* @brief MISR error codes used across PVA.
* @{
*/
/**
* @brief Error status when DMA MISR test is not run.
*/
#define PVA_ERR_MISR_NOT_RUN (0x280U)
/**
* @brief Error status when DMA MISR test did not complete.
*/
#define PVA_ERR_MISR_NOT_DONE (0x281U)
/**
* @brief Error status when DMA MISR test timed out.
*/
#define PVA_ERR_MISR_TIMEOUT (0x282U)
/**
* @brief Error status in case of DMA MISR test address failure.
*/
#define PVA_ERR_MISR_ADDR (0x283U)
/**
* @brief Error status in case of DMA MISR test data failure.
*/
#define PVA_ERR_MISR_DATA (0x284U)
/**
* @brief Error status in case of DMA MISR test data and address failure.
*/
#define PVA_ERR_MISR_ADDR_DATA (0x285U)
/** @} */
/**
* @defgroup PVA_VPU_ISR_ERRORS
*

View File

@@ -6,150 +6,4 @@
#include <stdint.h>
/**
* @breif Write syscall parameter will be a pointer to this struct
* @{
*/
typedef union {
struct {
uint32_t addr;
uint32_t size;
} in;
struct {
uint32_t written_size;
} out;
} pva_fw_pe_syscall_write;
/** @} */
/**
* @defgroup PVA_VPU_SYSCALL_PERFMON_SAMPLE_PARAM_GROUP
*
* @brief Parameter specification for syscall perfmon_sample
*
* @{
*/
/**
* @brief Perfmon sample syscall parameter will be a pointer to this struct
*/
typedef struct {
/** counter_mask[0] is for ID: 0-31; counter_mask[1] is for ID: 32-63 */
uint32_t counter_mask[2];
uint32_t output_addr;
} pva_fw_pe_syscall_perfmon_sample;
/**
* @brief Index for t26x performance counters for VPU
*/
#define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T26X (0U)
#define PERFMON_COUNTER_ID_VPS_ID_VALID_T26X (1U)
#define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T26X (2U)
#define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T26X (3U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T26X (4U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T26X (5U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T26X (6U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T26X (7U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T26X (8U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T26X (9U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T26X (10U)
#define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T26X (11U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T26X (12U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T26X (13U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T26X (14U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T26X (15U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T26X (16U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T26X (17U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T26X (18U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T26X (19U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_FETCH_REQ_T26X (20U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_T26X (21U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_PREEMPT_T26X (22U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_LINES_T26X (23U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_DUR_T26X (24U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_DUR_T26X (25U)
#define PERFMON_COUNTER_ID_DLUT_BUSY_T26X (26U)
#define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T26X (27U)
#define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T26X (28U)
#define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T26X (29U)
#define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T26X (30U)
#define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T26X (31U)
#define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T26X (32U)
#define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T26X (33U)
#define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T26X (34U)
#define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T26X (35U)
#define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T26X (36U)
/**
* @brief Index for t23x performance counters
*/
#define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T23X (0U)
#define PERFMON_COUNTER_ID_VPS_ID_VALID_T23X (1U)
#define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T23X (2U)
#define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T23X (3U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T23X (4U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T23X (5U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T23X (6U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T23X (7U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T23X (8U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T23X (9U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T23X (10U)
#define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T23X (11U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T23X (12U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T23X (13U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T23X (14U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T23X (15U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T23X (16U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T23X (17U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T23X (18U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T23X (19U)
#define PERFMON_COUNTER_ID_ICACHE_FETCH_REQ_T23X (20U)
#define PERFMON_COUNTER_ID_ICACHE_MISS_T23X (21U)
#define PERFMON_COUNTER_ID_ICACHE_PREEMP_T23X (22U)
#define PERFMON_COUNTER_ID_ICACHE_PREFETCH_LINES_T23X (23U)
#define PERFMON_COUNTER_ID_ICACHE_MISS_DUR_T23X (24U)
#define PERFMON_COUNTER_ID_ICACHE_PREFETCH_DUR_T23X (25U)
#define PERFMON_COUNTER_ID_DLUT_BUSY_T23X (26U)
#define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T23X (27U)
#define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T23X (28U)
#define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T23X (29U)
#define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T23X (30U)
#define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T23X (31U)
#define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T23X (32U)
#define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T23X (33U)
#define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T23X (34U)
#define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T23X (35U)
#define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T23X (36U)
/**
* @brief Index for t26x performance counters for PPE
*/
#define PERFMON_COUNTER_ID_PPS_STALL_ID_NO_VAL_INSTR_T26X (0U)
#define PERFMON_COUNTER_ID_PPS_ID_VALID_T26X (1U)
#define PERFMON_COUNTER_ID_PPS_STALL_ID_REG_DEPEND_T26X (2U)
#define PERFMON_COUNTER_ID_PPS_STALL_ID_ONLY_T26X (3U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX1_ONLY_T26X (4U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_LD_DEPENDENCY_T26X (5U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_ST_DEPENDENCY_T26X (6U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_DEPENDENCY_T26X (7U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STRM_STORE_FLUSH_T26X (8U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_STORE_FLUSH_T26X (9U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STORE_FLUSH_T26X (10U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_LD_T26X (11U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_ST_T26X (12U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_T26X (13U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LD_T26X (14U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_ST_T26X (15U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LDST_T26X (16U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_PUSHBACK_T26X (17U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STQ_PUSHBACK_T26X (18U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_FLUSH_T26X (19U)
#define PERFMON_COUNTER_ID_PPS_WFE_GPI_EX_STATE_T26X (20U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_FETCH_REQ_T26X (21U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_T26X (22U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_PREEMPT_T26X (23U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_LINES_T26X (24U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_DUR_T26X (25U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_DUR_T26X (26U)
/** @} */
#endif /*PVA_VPU_SYSCALL_INTERFACE_H*/

View File

@@ -11,6 +11,138 @@
/* The sizes of these structs must be explicitly padded to align to 4 bytes */
#define PVA_CMD_PRIV_OPCODE_FLAG (1U << 7U)
#define PVA_RESOURCE_ID_BASE 1U
struct pva_resource_entry {
uint8_t access_flags : 2; // 1: RO, 2: WO, 3: RW
uint8_t reserved : 4;
#define PVA_RESOURCE_TYPE_INVALID 0U
#define PVA_RESOURCE_TYPE_DRAM 1U
#define PVA_RESOURCE_TYPE_EXEC_BIN 2U
#define PVA_RESOURCE_TYPE_DMA_CONFIG 3U
uint8_t type : 2;
uint8_t smmu_context_id;
uint8_t addr_hi;
uint8_t size_hi;
uint32_t addr_lo;
uint32_t size_lo;
};
struct pva_cmd_init_resource_table {
#define PVA_CMD_OPCODE_INIT_RESOURCE_TABLE (0U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
/**< Resource table id is from 0 to 7, 0 is the device's resource table,
* 1-7 are users'. */
uint8_t resource_table_id;
uint8_t resource_table_addr_hi;
uint8_t pad[2];
uint32_t resource_table_addr_lo;
uint32_t max_n_entries;
};
struct pva_cmd_deinit_resource_table {
#define PVA_CMD_OPCODE_DEINIT_RESOURCE_TABLE (1U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t resource_table_id;
uint8_t pad[3];
};
struct pva_cmd_update_resource_table {
#define PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE (2U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t resource_table_id;
uint8_t pad[3];
uint32_t resource_id;
struct pva_resource_entry entry;
};
struct pva_cmd_init_queue {
#define PVA_CMD_OPCODE_INIT_QUEUE (3U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t ccq_id;
uint8_t queue_id;
uint8_t queue_addr_hi;
uint8_t syncpt_addr_hi;
uint32_t queue_addr_lo;
uint32_t max_n_submits;
uint32_t syncpt_addr_lo;
uint32_t syncpt_id;
};
struct pva_cmd_deinit_queue {
#define PVA_CMD_OPCODE_DEINIT_QUEUE (4U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t ccq_id;
uint8_t queue_id;
uint8_t pad[2];
};
struct pva_cmd_enable_fw_profiling {
#define PVA_CMD_OPCODE_ENABLE_FW_PROFILING (5U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t timestamp_type;
uint8_t pad[3];
uint32_t filter;
};
struct pva_cmd_disable_fw_profiling {
#define PVA_CMD_OPCODE_DISABLE_FW_PROFILING (6U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
};
struct pva_cmd_get_tegra_stats {
#define PVA_CMD_OPCODE_GET_TEGRA_STATS (7U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t buffer_offset_hi;
bool enabled;
uint8_t pad[2];
uint32_t buffer_resource_id;
uint32_t buffer_size;
uint32_t buffer_offset_lo;
};
struct pva_cmd_suspend_fw {
#define PVA_CMD_OPCODE_SUSPEND_FW (8U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
};
struct pva_cmd_resume_fw {
#define PVA_CMD_OPCODE_RESUME_FW (9U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
};
struct pva_cmd_init_shared_dram_buffer {
#define PVA_CMD_OPCODE_INIT_SHARED_DRAM_BUFFER (10U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t interface;
uint8_t buffer_iova_hi;
uint8_t pad[2];
uint32_t buffer_iova_lo;
uint32_t buffer_size;
};
struct pva_cmd_deinit_shared_dram_buffer {
#define PVA_CMD_OPCODE_DEINIT_SHARED_DRAM_BUFFER \
(11U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t interface;
uint8_t pad[3];
};
struct pva_cmd_set_debug_log_level {
#define PVA_CMD_OPCODE_SET_DEBUG_LOG_LEVEL (12U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint32_t log_level;
};
struct pva_cmd_set_profiling_level {
#define PVA_CMD_OPCODE_SET_PROFILING_LEVEL (13U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint32_t level;
};
#define PVA_CMD_PRIV_OPCODE_COUNT 14U
struct pva_fw_prefence {
uint8_t offset_hi;
uint8_t pad0[3];
@@ -301,7 +433,8 @@ struct pva_fw_shared_buffer_header {
struct pva_kmd_fw_buffer_msg_header {
#define PVA_KMD_FW_BUF_MSG_TYPE_FW_EVENT 0
#define PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE 1
#define PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG 2
#define PVA_KMD_FW_BUF_MSG_TYPE_FENCE_TRACE 2
#define PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG 3
uint32_t type : 8;
// Size of payload in bytes. Includes the size of the header.
uint32_t size : 24;
@@ -334,6 +467,27 @@ struct pva_kmd_fw_msg_vpu_trace {
uint64_t submit_id;
};
struct pva_kmd_fw_msg_fence_trace {
uint64_t submit_id;
uint64_t timestamp;
// For syncpt fences, fence_id is the syncpt index
// For semaphore fences, fence_id is the serial ID of the semaphore NvRM memory
uint64_t fence_id;
// 'offset' is the offset into the semaphore memory where the value is stored
// This is only valid for semaphore fences
// Note: Trace APIs in KMD only support 32-bit offset
uint32_t offset;
uint32_t value;
uint8_t ccq_id;
uint8_t queue_id;
#define PVA_KMD_FW_BUF_MSG_FENCE_ACTION_WAIT 0U
#define PVA_KMD_FW_BUF_MSG_FENCE_ACTION_SIGNAL 1U
uint8_t action;
#define PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SYNCPT 0U
#define PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SEMAPHORE 1U
uint8_t type;
};
// Resource unregister message
struct pva_kmd_fw_msg_res_unreg {
uint32_t resource_id;
@@ -345,4 +499,11 @@ struct pva_kmd_fw_tegrastats {
uint64_t total_utilization[PVA_NUM_PVE];
};
#define PVA_MAX_CMDBUF_CHUNK_LEN 1024
#define PVA_MAX_CMDBUF_CHUNK_SIZE (sizeof(uint32_t) * PVA_MAX_CMDBUF_CHUNK_LEN)
#define PVA_TEST_MODE_MAX_CMDBUF_CHUNK_LEN 256
#define PVA_TEST_MODE_MAX_CMDBUF_CHUNK_SIZE \
(sizeof(uint32_t) * PVA_TEST_MODE_MAX_CMDBUF_CHUNK_LEN)
#endif // PVA_FW_H

View File

@@ -49,7 +49,9 @@
* | 23-21 | Reserved | Reserved for future use |
* | 20 | CG DISABLE | To indicate the PVA R5 FW should disable the clock gating feature |
* | 19 | VMEM RD WAR DISABLE | To disable the VMEM Read fail workaround feature |
* | 18-16 | Reserved | Reserved for future use |
* | 18 | TEST_MODE_ENABLE | To enter test mode. See Documentation. |
* | 17 | USE_XBAR_RAW | Reserved for future use |
* | 16 | Reserved | Reserved for future use |
*
* The table below shows the mapping which is sent by FW to KMD
*
@@ -72,11 +74,20 @@
#define PVA_BOOT_SEMA_CG_DISABLE PVA_BIT(20U)
//! @cond DISABLE_DOCUMENTATION
/** Tell firmware to enter test mode */
#define PVA_BOOT_SEMA_TEST_MODE_ENABLE PVA_BIT(18U)
/** Tell firmware that block linear surfaces are in XBAR_RAW format instead of
* TEGRA_RAW format */
#define PVA_BOOT_SEMA_USE_XBAR_RAW PVA_BIT(17U)
/** Tell firmware to enable test mode */
#define PVA_BOOT_SEMA_TEST_MODE PVA_BIT(16U)
#define PVA_BOOT_SEMA 0U
#define PVA_RO_SYNC_BASE_SEMA 1U
#define PVA_RW_SYNC_BASE_SEMA 2U
#define PVA_RW_SYNC_SIZE_SEMA 3U
/**
* @brief This macro has the value to be set by KMD in the shared semaphores

View File

@@ -62,8 +62,10 @@ struct pva_fw_dma_slot {
* to block linear surface. */
#define PVA_FW_DMA_SLOT_FLAG_CB (1u << 4u)
#define PVA_FW_DMA_SLOT_FLAG_BOUND (1u << 5u)
uint8_t flags;
uint8_t pad;
#define PVA_FW_DMA_SLOT_FLAG_MASKED (1u << 6u)
#define PVA_FW_DMA_SLOT_FLAG_ACCESS_LSB 7u
#define PVA_FW_DMA_SLOT_FLAG_ACCESS_MSB 8u
uint16_t flags;
/** Bitmask of channels that use this slot */
uint16_t ch_use_mask;

View File

@@ -16,6 +16,7 @@ extern "C" {
/* Core APIs */
#define PVA_MAX_NUM_RESOURCES_PER_CONTEXT (16U * 1024U)
/**
* @brief Create a PVA context.
*
@@ -37,6 +38,18 @@ enum pva_error pva_context_create(uint32_t pva_index,
*/
void pva_context_destroy(struct pva_context *ctx);
/**
* @brief Get the value of a context attribute.
*
* @param[in] ctx Pointer to the context.
* @param[in] attr Attribute to get.
* @param[out] out_value Pointer to the value of the attribute.
* @param[size] size of the attribute structure
*/
enum pva_error pva_get_attribute(struct pva_context *ctx, enum pva_attr attr,
void *out_value, uint64_t size);
#define PVA_MAX_NUM_SUBMISSIONS_PER_QUEUE (8U * 1024U)
/**
* @brief Create a PVA queue.
*
@@ -97,7 +110,7 @@ void pva_memory_free(struct pva_memory *mem);
* @param[in] ctx Pointer to the context.
* @param[in] syncpiont_id Syncpoint ID to wait on.
* @param[in] value Value to wait for.
* @param[in] timeout_us Timeout in microseconds. PVA_TIMEOUT_INF for infinite.
* @param[in] timeout_us Timeout in microseconds. PVA_SUBMIT_TIMEOUT_INF for infinite.
*/
enum pva_error pva_syncpoint_wait(struct pva_context *ctx,
uint32_t syncpiont_id, uint32_t value,
@@ -109,7 +122,7 @@ enum pva_error pva_syncpoint_wait(struct pva_context *ctx,
* @param[in] queue Pointer to the queue.
* @param[in] submit_infos Array of submit info structures.
* @param[in] count Number of submit info structures.
* @param[in] timeout_us Timeout in microseconds. PVA_TIMEOUT_INF for infinite.
* @param[in] timeout_us Timeout in microseconds. PVA_SUBMIT_TIMEOUT_INF for infinite.
*
* @note Concurrent submission to the same queue needs to be serialized by the
* caller.
@@ -206,26 +219,6 @@ enum pva_error pva_memory_import_id_destroy(uint64_t import_id);
/** \brief Specifies the PVA system software minor version. */
#define PVA_SYSSW_MINOR_VERSION (7U)
/**
* @brief Get PVA system software version.
*
* PVA system software version is defined as the latest version of cuPVA which is fully supported
* by this version of the PVA system software.
*
* @param[out] version version of currently running system SW, computed as:
(PVA_SYSSW_MAJOR_VERSION * 1000) + PVA_SYSSW_MINOR_VERSION
* @return PVA_SUCCESS on success, else error code indicating the failure.
*/
enum pva_error pva_get_version(uint32_t *version);
/**
* @brief Get the hardware characteristics of the PVA.
*
* @param[out] pva_hw_char Pointer to the hardware characteristics.
*/
enum pva_error
pva_get_hw_characteristics(struct pva_characteristics *pva_hw_char);
#ifdef __cplusplus
}
#endif

View File

@@ -5,13 +5,9 @@
#define PVA_API_CMDBUF_H
#include "pva_api_types.h"
//Maximum number of slots for maintaining Timestamps
#define PVA_MAX_QUERY_SLOTS_COUNT 32U
/** The common header for all commands.
*/
struct pva_cmd_header {
#define PVA_CMD_PRIV_OPCODE_FLAG (1U << 7U)
/** Opcode for the command. MSB of opcode indicates whether this command is
* privileged or not */
uint8_t opcode;
@@ -35,6 +31,26 @@ struct pva_cmd_header {
uint8_t len;
};
struct pva_dma_misr_config {
#define PVA_DMA_FLAG_MISR_ENABLE 1u
uint8_t enabled;
uint8_t reserved;
uint16_t channel_mask;
uint32_t seed_crc0;
uint32_t seed_crc1;
uint32_t ref_addr;
uint32_t ref_data_1;
uint32_t ref_data_2;
uint32_t misr_timeout;
};
struct pva_dma_misr {
uint32_t slot_mask_low0;
uint32_t slot_mask_low1;
uint32_t slot_mask_high;
struct pva_dma_misr_config misr_config;
};
struct pva_user_dma_allowance {
#define PVA_USER_DMA_ALLOWANCE_ADB_STEP_SIZE 8
uint32_t channel_idx : 4;
@@ -189,11 +205,6 @@ struct pva_cmd_set_vpu_parameter_with_buffer {
uint32_t src_dram_offset_lo;
};
/** For set_vpu_parameter_with_address command, set this flag in header.flags to
* indicate that the target symbol is the legacy pointer symbol type:
* pva_fw_vpu_legacy_ptr_symbol, which only supports 32bit offset and 32bit
* size. */
#define PVA_CMD_FLAGS_USE_LEGACY_POINTER 0x1
/** Copy the address of a DRAM buffer to a VPU variable. The variable must be
* laid out exactly according to pva_fw_vpu_ptr_symbol
*/
@@ -208,7 +219,6 @@ struct pva_cmd_set_vpu_parameter_with_address {
};
#define PVA_MAX_DMA_SETS_PER_DMA_ENGINE 4
#define PVA_DMA_CONFIG_FETCH_BUFFER_PER_DMA_ENGINE 1
/** This command first acquires the TCM scratch and then fetches DMA configuration
* into the scratch. The command does not modify DMA
@@ -291,17 +301,7 @@ struct pva_cmd_run_ppe {
uint32_t entry_point_index;
};
#define PVA_BARRIER_GROUP_0 0U
#define PVA_BARRIER_GROUP_1 1U
#define PVA_BARRIER_GROUP_2 2U
#define PVA_BARRIER_GROUP_3 3U
#define PVA_BARRIER_GROUP_4 4U
#define PVA_BARRIER_GROUP_5 5U
#define PVA_BARRIER_GROUP_6 6U
#define PVA_BARRIER_GROUP_7 7U
#define PVA_MAX_BARRIER_GROUPS 8U
#define PVA_BARRIER_GROUP_INVALID 0xFFU
/**
@@ -464,29 +464,15 @@ struct pva_cmd_set_vpu_instance_parameter {
uint32_t symbol_id;
};
struct pva_cmd_run_unit_tests {
#define PVA_CMD_OPCODE_RUN_UNIT_TESTS 30U
struct pva_cmd_set_vpu_print_buffer {
#define PVA_CMD_OPCODE_SET_VPU_PRINT_BUFFER 30U
struct pva_cmd_header header;
#define PVA_FW_UTESTS_MAX_ARGC 16U
uint8_t argc;
uint8_t pad[3];
uint32_t in_resource_id;
uint32_t in_offset;
uint32_t in_size;
uint32_t out_resource_id;
uint32_t out_offset;
uint32_t out_size;
};
struct pva_cmd_set_vpu_print_cb {
#define PVA_CMD_OPCODE_SET_VPU_PRINT_CB 31U
struct pva_cmd_header header;
uint32_t cb_resource_id;
uint32_t cb_offset;
uint32_t resource_id;
uint32_t offset;
};
struct pva_cmd_invalidate_l2sram {
#define PVA_CMD_OPCODE_INVALIDATE_L2SRAM 32U
#define PVA_CMD_OPCODE_INVALIDATE_L2SRAM 31U
struct pva_cmd_header header;
uint8_t dram_offset_hi;
uint8_t pad[3];
@@ -496,19 +482,18 @@ struct pva_cmd_invalidate_l2sram {
};
struct pva_cmd_flush_l2sram {
#define PVA_CMD_OPCODE_FLUSH_L2SRAM 33U
#define PVA_CMD_OPCODE_FLUSH_L2SRAM 32U
struct pva_cmd_header header;
uint8_t dram_offset_hi;
uint8_t pad[3];
uint32_t dram_resource_id;
uint32_t dram_offset_lo;
uint32_t l2sram_size;
struct pva_user_dma_allowance user_dma;
};
struct pva_cmd_err_inject {
#define PVA_CMD_OPCODE_ERR_INJECT 34U
struct pva_cmd_header header;
enum pva_error_inject_codes err_inject_code;
};
struct pva_cmd_patch_l2sram_offset {
#define PVA_CMD_OPCODE_PATCH_L2SRAM_OFFSET 35U
#define PVA_CMD_OPCODE_PATCH_L2SRAM_OFFSET 33U
struct pva_cmd_header header;
uint8_t dma_set_id;
uint8_t slot_id;
@@ -520,130 +505,16 @@ struct pva_cmd_patch_l2sram_offset {
* mapped to a new logical barrier group. This allows re-using barrier ids within a command buffer.
*/
struct pva_cmd_retire_barrier_group {
#define PVA_CMD_OPCODE_RETIRE_BARRIER_GROUP 36U
#define PVA_CMD_OPCODE_RETIRE_BARRIER_GROUP 34U
struct pva_cmd_header header;
};
struct pva_cmd_gr_check {
#define PVA_CMD_OPCODE_GR_CHECK 37U
struct pva_cmd_setup_misr {
#define PVA_CMD_OPCODE_SETUP_MISR 35U
struct pva_cmd_header header;
struct pva_dma_misr misr_params;
};
#define PVA_CMD_OPCODE_COUNT 38U
struct pva_cmd_init_resource_table {
#define PVA_CMD_OPCODE_INIT_RESOURCE_TABLE (0U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
/**< Resource table id is from 0 to 7, 0 is the device's resource table,
* 1-7 are users'. */
uint8_t resource_table_id;
uint8_t resource_table_addr_hi;
uint8_t pad[2];
uint32_t resource_table_addr_lo;
uint32_t max_n_entries;
};
struct pva_cmd_deinit_resource_table {
#define PVA_CMD_OPCODE_DEINIT_RESOURCE_TABLE (1U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t resource_table_id;
uint8_t pad[3];
};
struct pva_cmd_update_resource_table {
#define PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE (2U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t resource_table_id;
uint8_t pad[3];
uint32_t resource_id;
struct pva_resource_entry entry;
};
struct pva_cmd_init_queue {
#define PVA_CMD_OPCODE_INIT_QUEUE (3U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t ccq_id;
uint8_t queue_id;
uint8_t queue_addr_hi;
uint8_t pad;
uint32_t queue_addr_lo;
uint32_t max_n_submits;
};
struct pva_cmd_deinit_queue {
#define PVA_CMD_OPCODE_DEINIT_QUEUE (4U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t ccq_id;
uint8_t queue_id;
uint8_t pad[2];
};
struct pva_cmd_enable_fw_profiling {
#define PVA_CMD_OPCODE_ENABLE_FW_PROFILING (5U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t timestamp_type;
uint8_t pad[3];
uint32_t filter;
};
struct pva_cmd_disable_fw_profiling {
#define PVA_CMD_OPCODE_DISABLE_FW_PROFILING (6U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
};
struct pva_cmd_get_tegra_stats {
#define PVA_CMD_OPCODE_GET_TEGRA_STATS (7U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t buffer_offset_hi;
bool enabled;
uint8_t pad[2];
uint32_t buffer_resource_id;
uint32_t buffer_size;
uint32_t buffer_offset_lo;
};
struct pva_cmd_suspend_fw {
#define PVA_CMD_OPCODE_SUSPEND_FW (8U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
};
struct pva_cmd_resume_fw {
#define PVA_CMD_OPCODE_RESUME_FW (9U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
};
struct pva_cmd_init_shared_dram_buffer {
#define PVA_CMD_OPCODE_INIT_SHARED_DRAM_BUFFER (10U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t interface;
uint8_t buffer_iova_hi;
uint8_t pad[2];
uint32_t buffer_iova_lo;
uint32_t buffer_size;
};
struct pva_cmd_deinit_shared_dram_buffer {
#define PVA_CMD_OPCODE_DEINIT_SHARED_DRAM_BUFFER \
(11U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint8_t interface;
uint8_t pad[3];
};
struct pva_cmd_set_debug_log_level {
#define PVA_CMD_OPCODE_SET_DEBUG_LOG_LEVEL (12U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint32_t log_level;
};
struct pva_cmd_set_profiling_level {
#define PVA_CMD_OPCODE_SET_PROFILING_LEVEL (13U | PVA_CMD_PRIV_OPCODE_FLAG)
struct pva_cmd_header header;
uint32_t level;
};
#define PVA_CMD_PRIV_OPCODE_COUNT 14U
#define PVA_MAX_CMDBUF_CHUNK_LEN 1024
#define PVA_MAX_CMDBUF_CHUNK_SIZE (sizeof(uint32_t) * PVA_MAX_CMDBUF_CHUNK_LEN)
#define PVA_CMD_OPCODE_MAX 36U
#endif // PVA_API_CMDBUF_H

View File

@@ -11,14 +11,14 @@ extern "C" {
#include "cuda.h"
#include "pva_api_types.h"
/**
* @brief Structure for cuExtend queue data needed for command submission.
*/
struct pva_cuextend_queue_data {
/*! Holds a pointer to pva queue object */
struct pva_queue *queue;
/*! Holds engine affinity for command submission*/
uint32_t affinity;
#define PVA_CUEXTEND_MAX_NUM_PREFENCES 16
#define PVA_CUEXTEND_MAX_NUM_POSTFENCES 16
struct pva_cuextend_submit_events {
struct pva_fence prefences[PVA_CUEXTEND_MAX_NUM_PREFENCES];
struct pva_fence postfences[PVA_CUEXTEND_MAX_NUM_POSTFENCES];
uint32_t num_prefences;
uint32_t num_postfences;
};
/**
@@ -71,27 +71,16 @@ typedef enum pva_error (*pva_cuextend_stream_unregister)(void *callback_args,
uint64_t flags);
/**
* @brief Function type for cuExtend acquire queue callback.
* @brief Function type for submitting a batch of command buffers via a CUDA stream.
*
* @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
* @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register.
* @param[out] queue_data Output pointer to a pva_cuextend_queue_data object.
* @return \ref pva_error The completion status of acquire queue operation.
* @param[in] submit_payload Pointer to the submit payload.
* @return \ref pva_error The completion status of the submit operation.
*/
typedef enum pva_error (*pva_cuextend_queue_acquire)(
void *callback_args, void *stream_payload,
struct pva_cuextend_queue_data **queue_data);
/**
* @brief Function type for cuExtend release queue callback.
*
* @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization.
* @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register.
* @return \ref pva_error The completion status of release queue operation.
*/
typedef enum pva_error (*pva_cuextend_queue_release)(void *callback_args,
void *stream_payload,
void *queue_data);
typedef enum pva_error (*pva_cuextend_stream_submit)(
void *callback_args, void *stream_payload, void *submit_payload,
struct pva_cuextend_submit_events *submit_events);
/**
* @brief Function type for retrieving error code from cuExtend.
@@ -128,12 +117,10 @@ struct pva_cuextend_callbacks {
pva_cuextend_stream_register stream_reg;
/*! Holds the unregister stream callback */
pva_cuextend_stream_unregister stream_unreg;
/*! Holds the acquire queue callback */
pva_cuextend_queue_acquire queue_acquire;
/*! Holds the release queue callback */
pva_cuextend_queue_release queue_release;
/*! Holds the teardown callback */
pva_cuextend_teardown teardown;
/*! Holds the stream submit callback */
pva_cuextend_stream_submit stream_submit;
/*! Pointer to the callback arguments provided by client during cuExtend initialization */
void *args;
};
@@ -188,22 +175,32 @@ enum pva_error pva_cuextend_memory_import(struct pva_context *ctx,
/**
* @brief Submit a batch of command buffers via a CUDA stream.
*
* @param[in] queue Pointer to the queue. If queue is not NULL, this API will try to submit the client tasks to this queue directly.
* Otherwise, it will call queue_acquire callback to query a pva_queue object from stream payload, and then submit
* the tasks to the queried queue.
* @param[in] stream A CUDA stream.
* @param[in] submit_infos Array of submit info structures.
* @param[in] count Number of submit info structures.
* @param[in] timeout_ms Timeout in milliseconds. PVA_TIMEOUT_INF for infinite.
* @param[in] ctx Pointer to the PVA context.
* @param[in] cuStream A CUDA stream.
* @param[in] client_stream A client stream.
* @param[in] submit_payload Pointer to the submit payload.
* @return \ref pva_error The completion status of the submit operation.
*
* @note Concurrent submission to the same queue needs to be serialized by the
* caller.
*/
enum pva_error
pva_cuextend_cmdbuf_batch_submit(struct pva_queue *queue, CUstream stream,
struct pva_cmdbuf_submit_info *submit_infos,
uint32_t count, uint64_t timeout_ms);
enum pva_error pva_cuextend_cmdbuf_batch_submit(struct pva_context *ctx,
CUstream cuStream,
void *client_stream,
void *submit_payload);
/**
* @brief Get the payload associated with a CUDA stream.
*
* Returns the payload which was associated with the CUDA stream during registration callback.
*
* @param[in] ctx Pointer to the PVA context.
* @param[in] cuStream A CUDA stream.
* @param[out] stream_payload Pointer to the stream payload.
* @return PVA_SUCCESS if the stream payload is successfully retrieved
* PVA_BAD_PARAMETER_ERROR if any of the parameters are NULL
* PVA_CUDA_INIT_FAILED if the cuExtend was not initialized for the context
*/
enum pva_error pva_cuextend_get_stream_payload(struct pva_context *ctx,
CUstream cuStream,
void **stream_payload);
#ifdef __cplusplus
}

View File

@@ -24,73 +24,8 @@ enum pva_gpio_bit {
GPIO_WRITE6_BIT = 29U
};
enum pva_dma_descriptor_id {
PVA_DMA_DESC_NONE = 0,
PVA_DMA_DESC0 = 1,
PVA_DMA_DESC1 = 2,
PVA_DMA_DESC2 = 3,
PVA_DMA_DESC3 = 4,
PVA_DMA_DESC4 = 5,
PVA_DMA_DESC5 = 6,
PVA_DMA_DESC6 = 7,
PVA_DMA_DESC7 = 8,
PVA_DMA_DESC8 = 9,
PVA_DMA_DESC9 = 10,
PVA_DMA_DESC10 = 11,
PVA_DMA_DESC11 = 12,
PVA_DMA_DESC12 = 13,
PVA_DMA_DESC13 = 14,
PVA_DMA_DESC14 = 15,
PVA_DMA_DESC15 = 16,
PVA_DMA_DESC16 = 17,
PVA_DMA_DESC17 = 18,
PVA_DMA_DESC18 = 19,
PVA_DMA_DESC19 = 20,
PVA_DMA_DESC20 = 21,
PVA_DMA_DESC21 = 22,
PVA_DMA_DESC22 = 23,
PVA_DMA_DESC23 = 24,
PVA_DMA_DESC24 = 25,
PVA_DMA_DESC25 = 26,
PVA_DMA_DESC26 = 27,
PVA_DMA_DESC27 = 28,
PVA_DMA_DESC28 = 29,
PVA_DMA_DESC29 = 30,
PVA_DMA_DESC30 = 31,
PVA_DMA_DESC31 = 32,
PVA_DMA_DESC32 = 33,
PVA_DMA_DESC33 = 34,
PVA_DMA_DESC34 = 35,
PVA_DMA_DESC35 = 36,
PVA_DMA_DESC36 = 37,
PVA_DMA_DESC37 = 38,
PVA_DMA_DESC38 = 39,
PVA_DMA_DESC39 = 40,
PVA_DMA_DESC40 = 41,
PVA_DMA_DESC41 = 42,
PVA_DMA_DESC42 = 43,
PVA_DMA_DESC43 = 44,
PVA_DMA_DESC44 = 45,
PVA_DMA_DESC45 = 46,
PVA_DMA_DESC46 = 47,
PVA_DMA_DESC47 = 48,
PVA_DMA_DESC48 = 49,
PVA_DMA_DESC49 = 50,
PVA_DMA_DESC50 = 51,
PVA_DMA_DESC51 = 52,
PVA_DMA_DESC52 = 53,
PVA_DMA_DESC53 = 54,
PVA_DMA_DESC54 = 55,
PVA_DMA_DESC55 = 56,
PVA_DMA_DESC56 = 57,
PVA_DMA_DESC57 = 58,
PVA_DMA_DESC58 = 59,
PVA_DMA_DESC59 = 60,
PVA_DMA_DESC60 = 61,
PVA_DMA_DESC61 = 62,
PVA_DMA_DESC62 = 63,
PVA_DMA_DESC63 = 64
};
#define PVA_DMA_DESC_ID_NULL 0
#define PVA_DMA_DESC_ID_BASE 1
/**
* The values of the enum members conform to the definitions of DMA descriptors'
@@ -266,8 +201,6 @@ struct pva_dma_config_header {
* means that every allocation of descriptors will start at an alignment of 4. The following
* macros control the alignment/grouping requirement of DMA resources.
*/
// TODO: Add compile time asserts to ensure the following alignment requirments don't result
// in fractional resource partitions?
#define PVA_DMA_CHANNEL_ALIGNMENT 1
#define PVA_DMA_DESCRIPTOR_ALIGNMENT 4
#define PVA_DMA_ADB_ALIGNMENT 16

View File

@@ -11,7 +11,7 @@
*/
struct pva_ops_memory {
uint32_t handle; /**< Memory handle */
uint32_t size; /**< Size of memory */
uint64_t size; /**< Size of memory */
void *va; /**< Virtual address */
};
@@ -27,8 +27,8 @@ struct pva_ops_memory {
*/
struct pva_ops_buffer {
struct pva_ops_memory *memory; /**< Pointer to buffer memory */
uint32_t start_offset; /**< Start offset in buffer memory */
uint32_t end_offset; /**< End offset (exclusive) in buffer memory */
uint64_t start_offset; /**< Start offset in buffer memory */
uint64_t end_offset; /**< End offset (exclusive) in buffer memory */
};
/**
@@ -45,9 +45,9 @@ struct pva_ops_buffer {
* @brief Header structure for PVA operations.
*/
struct pva_ops_header {
uint32_t opcode; /**< Operation code identifying the operation type */
uint64_t opcode; /**< Operation code identifying the operation type */
/** Size of the operation in bytes. This size must be a multiple of 8 bytes. */
uint32_t size;
uint64_t size;
};
/**
@@ -56,8 +56,7 @@ struct pva_ops_header {
struct pva_ops_executable_register {
#define PVA_OPS_OPCODE_EXECUTABLE_REGISTER 1U
struct pva_ops_header header; /**< Operation header */
uint32_t exec_size; /**< Size of executable data */
uint32_t pad; /**< Padding for 8 bytes alignment */
uint64_t exec_size; /**< Size of executable data */
//followed by executable data
};
@@ -144,7 +143,7 @@ enum pva_error pva_ops_parse_unregister_resp(struct pva_ops_buffer *resp_buf);
*
* @return PVA_SUCCESS on success, appropriate error code otherwise.
*/
enum pva_error pva_ops_memory_alloc(struct pva_context *ctx, uint32_t size,
enum pva_error pva_ops_memory_alloc(struct pva_context *ctx, uint64_t size,
struct pva_ops_memory *ops_buf);
/**

View File

@@ -118,6 +118,16 @@
ACT(PVA_ERR_MATH_OP) \
ACT(PVA_ERR_HWSEQ_INVALID) \
ACT(PVA_ERR_FW_ABORTED) \
ACT(PVA_ERR_PPE_DIVIDE_BY_0) \
ACT(PVA_ERR_PPE_FP_NAN) \
ACT(PVA_ERR_INVALID_ACCESS_MODE_COMBINATION) \
ACT(PVA_ERR_CMD_TCM_BUF_OUT_OF_RANGE) \
ACT(PVA_ERR_MISR_NOT_RUN) \
ACT(PVA_ERR_MISR_DATA) \
ACT(PVA_ERR_MISR_ADDR) \
ACT(PVA_ERR_MISR_NOT_DONE) \
ACT(PVA_ERR_MISR_ADDR_DATA) \
ACT(PVA_ERR_MISR_TIMEOUT) \
ACT(PVA_ERR_CODE_COUNT)
enum pva_error {
@@ -207,12 +217,6 @@ struct pva_fw_vpu_ptr_symbol {
uint64_t size;
};
struct pva_fw_vpu_legacy_ptr_symbol {
uint64_t base;
uint32_t offset;
uint32_t size;
};
enum pva_surface_format {
PVA_SURF_FMT_PITCH_LINEAR = 0,
PVA_SURF_FMT_BLOCK_LINEAR
@@ -243,25 +247,6 @@ enum pva_symbol_type {
PVA_SYM_TYPE_MAX,
};
/**
* \brief Holds PVA Sync Client Type.
* Currently NvSciSync supports NvSciSyncFences with syncpoint primitive type only.
*/
enum pva_sync_client_type {
/*! For a given SyncObj PVA acts as a signaler. This type corresponds to
* postfences from PVA. */
PVA_SYNC_CLIENT_TYPE_SIGNALER,
/*! For a given SyncObj PVA acts as a waiter. This type corresponds to
* prefences to PVA. */
PVA_SYNC_CLIENT_TYPE_WAITER,
/*! For a given SyncObj PVA acts as both signaler and waiter. */
PVA_SYNC_CLIENT_TYPE_SIGNALER_WAITER,
/*! Specifies the non inclusive upper bound of valid values. */
PVA_SYNC_CLIENT_TYPE_MAX,
/*! Reserved bound of valid values. */
PVA_SYNC_CLIENT_TYPE_RESERVED = 0x7FFFFFFF,
};
#define PVA_SYMBOL_ID_INVALID 0U
#define PVA_SYMBOL_ID_BASE 1U
#define PVA_MAX_SYMBOL_NAME_LEN 64U
@@ -275,19 +260,6 @@ struct pva_symbol_info {
};
#define PVA_RESOURCE_ID_INVALID 0U
#define PVA_RESOURCE_ID_BASE 1U
struct pva_resource_entry {
#define PVA_RESOURCE_TYPE_INVALID 0U
#define PVA_RESOURCE_TYPE_DRAM 1U
#define PVA_RESOURCE_TYPE_EXEC_BIN 2U
#define PVA_RESOURCE_TYPE_DMA_CONFIG 3U
uint8_t type;
uint8_t smmu_context_id;
uint8_t addr_hi;
uint8_t size_hi;
uint32_t addr_lo;
uint32_t size_lo;
};
/** \brief Maximum number of queues per context */
#define PVA_MAX_QUEUES_PER_CONTEXT (8)
@@ -300,7 +272,8 @@ struct pva_resource_entry {
#define PVA_ACCESS_RW \
(PVA_ACCESS_RO | PVA_ACCESS_WO) /**< Read and write access */
#define PVA_TIMEOUT_INF UINT64_MAX /**< Infinite timeout */
// unify timeout to uint64_t, in microseconds
#define PVA_SUBMIT_TIMEOUT_INF UINT64_MAX /**< Infinite timeout */
#define PVA_MAX_NUM_INPUT_STATUS 2 /**< Maximum number of input statuses */
#define PVA_MAX_NUM_OUTPUT_STATUS 2 /**< Maximum number of output statuses */
@@ -329,8 +302,9 @@ struct pva_cmdbuf_submit_info {
uint64_t submit_id;
/** Offset of the first chunk within the resource */
uint64_t first_chunk_offset;
#define PVA_EXEC_TIMEOUT_REUSE 0xFFFFFFFFU
#define PVA_EXEC_TIMEOUT_INF 0U
/** Execution timeout is in ms */
#define PVA_EXEC_TIMEOUT_INF UINT32_MAX
#define PVA_EXEC_TIMEOUT_REUSE (UINT32_MAX - 1)
/** Execution Timeout */
uint32_t execution_timeout_ms;
struct pva_fence prefences[PVA_MAX_NUM_PREFENCES];
@@ -351,13 +325,13 @@ struct pva_cmdbuf_status {
uint16_t status;
};
/** \brief Holds the PVA capabilities. */
/** @brief Holds the PVA capabilities. */
struct pva_characteristics {
/*! Holds the number of PVA engines. */
/** Holds the number of PVA engines. */
uint32_t pva_engine_count;
/*! Holds the number of VPUs per PVA engine. */
/** Holds the number of VPUs per PVA engine. */
uint32_t pva_pve_count;
/*! Holds the PVA generation information */
/** Holds the PVA generation information */
enum pva_hw_gen hw_version;
uint16_t max_desc_count;
uint16_t max_ch_count;
@@ -370,11 +344,6 @@ struct pva_characteristics {
uint16_t reserved_adb_count;
};
enum pva_error_inject_codes {
PVA_ERR_INJECT_WDT_HW_ERR, // watchdog Hardware error
PVA_ERR_INJECT_WDT_TIMEOUT, // watchdog Timeout error
};
/*
* !!!! DO NOT MODIFY !!!!!!
* These values are defined as per DriveOS guidelines
@@ -382,4 +351,20 @@ enum pva_error_inject_codes {
#define PVA_INPUT_STATUS_SUCCESS (0)
#define PVA_INPUT_STATUS_INVALID (0xFFFF)
/**
* @brief Context attribute keys.
*/
enum pva_attr {
PVA_CONTEXT_ATTR_MAX_CMDBUF_CHUNK_SIZE,
PVA_ATTR_HW_CHARACTERISTICS,
PVA_ATTR_VERSION
};
/**
* @brief Maximum size of a command buffer chunk.
*/
struct pva_ctx_attr_max_cmdbuf_chunk_size {
uint16_t max_size;
};
#endif // PVA_API_TYPES_H

View File

@@ -23,113 +23,10 @@ struct pva_vpu_instance_data {
};
/**
* @defgroup PVA_VPU_SYSCALL
*
* @brief PVA VPU SYS call IDs for each type of
* SYS call.
* @{
*/
//! @cond DISABLE_DOCUMENTATION
/**
* @brief VPU Syscall id for vpu printf write.
*/
#define PVA_FW_PE_SYSCALL_ID_WRITE (1U)
//! @endcond
/**
* @brief VPU Syscall id for Icache prefetch.
*/
#define PVA_FW_PE_SYSCALL_ID_ICACHE_PREFETCH (2U)
/**
* @brief VPU Syscall id for masking exceptions.
*/
#define PVA_FW_PE_SYSCALL_ID_MASK_EXCEPTION (3U)
/**
* @brief VPU Syscall id for unmasking exceptions.
*/
#define PVA_FW_PE_SYSCALL_ID_UNMASK_EXCEPTION (4U)
//! @cond DISABLE_DOCUMENTATION
/**
* @brief VPU Syscall id for sampling VPU performance counters
*/
#define PVA_FW_PE_SYSCALL_ID_PERFMON_SAMPLE (5U)
//! @endcond
/** @} */
/**
* @defgroup PVA_PPE_SYSCALL
*
* @brief PVA PPE SYS call IDs for each type of
* SYS call.
* @{
*/
//! @cond DISABLE_DOCUMENTATION
/**
* @brief PPE Syscall id for ppe printf write.
*/
#define PVA_FW_PPE_SYSCALL_ID_WRITE (1U)
/**
* @brief PPE Syscall id for masking exceptions.
*/
#define PVA_FW_PPE_SYSCALL_ID_MASK_EXCEPTION (2U)
/**
* @brief PPE Syscall id for unmasking exceptions.
*/
#define PVA_FW_PPE_SYSCALL_ID_UNMASK_EXCEPTION (3U)
/**
* @brief VPU Syscall id for sampling VPU performance counters
*/
#define PVA_FW_PPE_SYSCALL_ID_PERFMON_SAMPLE (4U)
/**
* @brief PPE Syscall id for Icache prefetch.
*/
#define PVA_FW_PPE_SYSCALL_ID_ICACHE_PREFETCH (5U)
//! @endcond
/** @} */
/**
* @brief Lookup table to convert PPE syscall IDs to VPU syscall IDs
* Index is PPE syscall ID, value is corresponding VPU syscall ID
*/
#define PVA_FW_PPE_TO_VPU_SYSCALL_LUT \
{ \
0U, /* Index 0: Invalid */ \
PVA_FW_PE_SYSCALL_ID_WRITE, /* Index 1: Write */ \
PVA_FW_PE_SYSCALL_ID_MASK_EXCEPTION, /* Index 2: Mask Exception */ \
PVA_FW_PE_SYSCALL_ID_UNMASK_EXCEPTION, /* Index 3: Unmask Exception */ \
PVA_FW_PE_SYSCALL_ID_PERFMON_SAMPLE, /* Index 4: Perfmon Sample */ \
PVA_FW_PE_SYSCALL_ID_ICACHE_PREFETCH /* Index 5: ICache Prefetch */ \
}
/**
* @brief Maximum valid PPE syscall ID
*/
#define PVA_FW_PPE_SYSCALL_ID_MAX PVA_FW_PPE_SYSCALL_ID_ICACHE_PREFETCH
/**
* @defgroup PVA_VPU_SYSCALL_WRITE_PARAM_GROUP
*
* @brief Parameter specification for syscall write
*/
/**
* @defgroup PVA_VPU_SYSCALL_COMMAND_FIELDS_GROUP
*
* @brief The command format to be used while issuing vpu syscall command from VPU kernel to R5.
* The fields mentioned in this group is used for submitting the command
* through the Signal_R5 interface from VPU kernel.
*
* @{
* @brief Used to store VPU Syscall IDs, that represent the
* vpu syscall id between FW and VPU kernel.
*/
typedef uint32_t pva_vpu_syscall_id_t;
/**
* @brief The most significant bit of the vpu syscall ID field in
@@ -154,17 +51,56 @@ struct pva_vpu_instance_data {
* the vpu syscall command interface
*/
#define PVA_FW_PE_SYSCALL_PARAM_LSB (0U)
/** @} */
/**
* @defgroup PVA_VPU_SYSCALL_ICACHE_PREFETCH_PARAM_FIELDS_GROUP
*
* @brief The parameter format to be used while issuing vpu syscall command from VPU kernel to R5 for syscall icache prefetch.
* The fields mentioned in this group is used for submitting the icache prefetch command
* through the Signal_R5 interface from VPU kernel.
*
* @{
* @brief VPU Syscall id for vpu printf write.
*/
#define PVA_FW_PE_SYSCALL_ID_WRITE (1U)
/**
* @brief VPU Syscall id for Icache prefetch.
*/
#define PVA_FW_PE_SYSCALL_ID_ICACHE_PREFETCH (2U)
/**
* @brief VPU Syscall id for masking exceptions.
*/
#define PVA_FW_PE_SYSCALL_ID_MASK_EXCEPTION (3U)
/**
* @brief VPU Syscall id for unmasking exceptions.
*/
#define PVA_FW_PE_SYSCALL_ID_UNMASK_EXCEPTION (4U)
/**
* @brief VPU Syscall id for sampling VPU performance counters
*/
#define PVA_FW_PE_SYSCALL_ID_PERFMON_SAMPLE (5U)
/**
* @brief PPE Syscall id for ppe printf write.
*/
#define PVA_FW_PPE_SYSCALL_ID_WRITE (1U)
/**
* @brief PPE Syscall id for Icache prefetch.
*/
#define PVA_FW_PPE_SYSCALL_ID_ICACHE_PREFETCH (2U)
/**
* @brief PPE Syscall id for masking exceptions.
*/
#define PVA_FW_PPE_SYSCALL_ID_MASK_EXCEPTION (3U)
/**
* @brief PPE Syscall id for unmasking exceptions.
*/
#define PVA_FW_PPE_SYSCALL_ID_UNMASK_EXCEPTION (4U)
/**
* @brief PPE Syscall id for sampling PPE performance counters
*/
#define PVA_FW_PPE_SYSCALL_ID_PERFMON_SAMPLE (5U)
/**
* @brief The most significant bit of the prefetch cache line count field in
@@ -189,23 +125,146 @@ struct pva_vpu_instance_data {
* the vpu syscall command interface
*/
#define PVA_FW_PE_SYSCALL_PREFETCH_ADDR_LSB (0U)
/** @} */
/**
* @defgroup PVA_VPU_SYSCALL_MASK_UNMASK_PARAM_FIELDS_GROUP
*
* @brief The parameter format to be used while issuing vpu syscall command from VPU kernel
* to R5 for masking or unmasking FP NaN Exception.
* The fields mentioned in this group is used for submitting the mask and unmask FP NaN eception command
* through the Signal_R5 interface from VPU kernel.
*
* @{
*/
/**
* @brief Parameter specification for syscall mask/unmask exceptions
*/
#define PVA_FW_PE_MASK_DIV_BY_0 (1U << 1U)
#define PVA_FW_PE_MASK_FP_INV_NAN (1U << 2U)
/** @} */
/**
* @breif Write syscall parameter will be a pointer to this struct
*/
union pva_fw_pe_syscall_write {
struct {
uint32_t addr;
uint32_t size;
} in;
struct {
uint32_t written_size;
} out;
};
/**
* @brief Perfmon sample syscall parameter will be a pointer to this struct
*/
struct pva_fw_pe_syscall_perfmon_sample {
/** counter_mask[0] is for ID: 0-31; counter_mask[1] is for ID: 32-63 */
uint32_t counter_mask[2];
uint32_t output_addr;
};
/**
* @brief Index for t26x performance counters for VPU
*/
#define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T26X (0U)
#define PERFMON_COUNTER_ID_VPS_ID_VALID_T26X (1U)
#define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T26X (2U)
#define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T26X (3U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T26X (4U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T26X (5U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T26X (6U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T26X (7U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T26X (8U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T26X (9U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T26X (10U)
#define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T26X (11U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T26X (12U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T26X (13U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T26X (14U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T26X (15U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T26X (16U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T26X (17U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T26X (18U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T26X (19U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_FETCH_REQ_T26X (20U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_T26X (21U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_PREEMPT_T26X (22U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_LINES_T26X (23U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_DUR_T26X (24U)
#define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_DUR_T26X (25U)
#define PERFMON_COUNTER_ID_DLUT_BUSY_T26X (26U)
#define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T26X (27U)
#define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T26X (28U)
#define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T26X (29U)
#define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T26X (30U)
#define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T26X (31U)
#define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T26X (32U)
#define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T26X (33U)
#define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T26X (34U)
#define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T26X (35U)
#define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T26X (36U)
/**
* @brief Index for t23x performance counters
*/
#define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T23X (0U)
#define PERFMON_COUNTER_ID_VPS_ID_VALID_T23X (1U)
#define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T23X (2U)
#define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T23X (3U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T23X (4U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T23X (5U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T23X (6U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T23X (7U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T23X (8U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T23X (9U)
#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T23X (10U)
#define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T23X (11U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T23X (12U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T23X (13U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T23X (14U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T23X (15U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T23X (16U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T23X (17U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T23X (18U)
#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T23X (19U)
#define PERFMON_COUNTER_ID_ICACHE_FETCH_REQ_T23X (20U)
#define PERFMON_COUNTER_ID_ICACHE_MISS_T23X (21U)
#define PERFMON_COUNTER_ID_ICACHE_PREEMP_T23X (22U)
#define PERFMON_COUNTER_ID_ICACHE_PREFETCH_LINES_T23X (23U)
#define PERFMON_COUNTER_ID_ICACHE_MISS_DUR_T23X (24U)
#define PERFMON_COUNTER_ID_ICACHE_PREFETCH_DUR_T23X (25U)
#define PERFMON_COUNTER_ID_DLUT_BUSY_T23X (26U)
#define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T23X (27U)
#define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T23X (28U)
#define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T23X (29U)
#define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T23X (30U)
#define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T23X (31U)
#define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T23X (32U)
#define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T23X (33U)
#define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T23X (34U)
#define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T23X (35U)
#define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T23X (36U)
/**
* @brief Index for t26x performance counters for PPE
*/
#define PERFMON_COUNTER_ID_PPS_STALL_ID_NO_VAL_INSTR_T26X (0U)
#define PERFMON_COUNTER_ID_PPS_ID_VALID_T26X (1U)
#define PERFMON_COUNTER_ID_PPS_STALL_ID_REG_DEPEND_T26X (2U)
#define PERFMON_COUNTER_ID_PPS_STALL_ID_ONLY_T26X (3U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX1_ONLY_T26X (4U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_LD_DEPENDENCY_T26X (5U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_ST_DEPENDENCY_T26X (6U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_DEPENDENCY_T26X (7U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STRM_STORE_FLUSH_T26X (8U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_STORE_FLUSH_T26X (9U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STORE_FLUSH_T26X (10U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_LD_T26X (11U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_ST_T26X (12U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_T26X (13U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LD_T26X (14U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_ST_T26X (15U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LDST_T26X (16U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_PUSHBACK_T26X (17U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STQ_PUSHBACK_T26X (18U)
#define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_FLUSH_T26X (19U)
#define PERFMON_COUNTER_ID_PPS_WFE_GPI_EX_STATE_T26X (20U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_FETCH_REQ_T26X (21U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_T26X (22U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_PREEMPT_T26X (23U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_LINES_T26X (24U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_DUR_T26X (25U)
#define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_DUR_T26X (26U)
#endif // PVA_API_VPU_H

View File

@@ -2,17 +2,17 @@
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#include "pva_kmd_abort.h"
#include "pva_kmd_shim_init.h"
#include "pva_kmd_device.h"
#include "pva_kmd_regs.h"
#include "pva_kmd_silicon_utils.h"
void pva_kmd_abort(struct pva_kmd_device *pva)
void pva_kmd_abort_fw(struct pva_kmd_device *pva)
{
//TODO: Report to FSI first about the SW error code.
pva_kmd_log_err("Abort: FW Reset Assert");
/* Put the FW in reset ASSERT so the user space
cannot access the CCQ and thus force them to
destroy the contexts. On destroy all the contexts.
KMD poweroff the FW whereas on first new contexts creation,
KMD will load the firmware image & poweron device */
pva_kmd_fw_reset_assert(pva);
// HW watchdog may fire repeatedly if PVA is hung. Therefore, disable all
// interrupts to protect KMD from potential interrupt floods.
pva_kmd_disable_all_interrupts_nosync(pva);
// We will handle firmware reboot after all contexts are closed and a new
// one is re-opened again
pva->recovery = true;
}

View File

@@ -5,6 +5,6 @@
#include "pva_kmd_device.h"
#include "pva_kmd_utils.h"
void pva_kmd_abort(struct pva_kmd_device *pva);
void pva_kmd_abort_fw(struct pva_kmd_device *pva);
#endif //PVA_KMD_ABORT_H

View File

@@ -53,13 +53,12 @@ static inline uint32_t next_slot(struct pva_kmd_block_allocator *allocator,
return *next;
}
void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator,
void *pva_kmd_alloc_block_unsafe(struct pva_kmd_block_allocator *allocator,
uint32_t *out_id)
{
void *block = NULL;
uint32_t slot = INVALID_ID;
pva_kmd_mutex_lock(&allocator->allocator_lock);
if (allocator->free_slot_head != INVALID_ID) {
slot = allocator->free_slot_head;
allocator->free_slot_head =
@@ -69,18 +68,24 @@ void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator,
slot = allocator->next_free_slot;
allocator->next_free_slot++;
} else {
goto unlock;
return NULL;
}
}
allocator->slot_in_use[slot] = true;
pva_kmd_mutex_unlock(&allocator->allocator_lock);
*out_id = slot + allocator->base_id;
block = get_block(allocator, slot);
return block;
unlock:
}
void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator,
uint32_t *out_id)
{
void *block = NULL;
pva_kmd_mutex_lock(&allocator->allocator_lock);
block = pva_kmd_alloc_block_unsafe(allocator, out_id);
pva_kmd_mutex_unlock(&allocator->allocator_lock);
return NULL;
return block;
}
static bool is_slot_valid(struct pva_kmd_block_allocator *allocator,
@@ -103,16 +108,15 @@ void *pva_kmd_get_block_unsafe(struct pva_kmd_block_allocator *allocator,
return get_block(allocator, slot);
}
enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator,
enum pva_error
pva_kmd_free_block_unsafe(struct pva_kmd_block_allocator *allocator,
uint32_t id)
{
uint32_t slot = id - allocator->base_id;
uint32_t *next;
enum pva_error err = PVA_SUCCESS;
pva_kmd_mutex_lock(&allocator->allocator_lock);
if (!is_slot_valid(allocator, slot)) {
err = PVA_INVAL;
goto unlock;
return PVA_INVAL;
}
allocator->slot_in_use[slot] = false;
@@ -120,7 +124,16 @@ enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator,
*next = allocator->free_slot_head;
allocator->free_slot_head = slot;
unlock:
return PVA_SUCCESS;
}
enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator,
uint32_t id)
{
enum pva_error err = PVA_SUCCESS;
pva_kmd_mutex_lock(&allocator->allocator_lock);
err = pva_kmd_free_block_unsafe(allocator, id);
pva_kmd_mutex_unlock(&allocator->allocator_lock);
return err;
}

View File

@@ -24,6 +24,8 @@ pva_kmd_block_allocator_init(struct pva_kmd_block_allocator *allocator,
void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator,
uint32_t *out_id);
void *pva_kmd_alloc_block_unsafe(struct pva_kmd_block_allocator *allocator,
uint32_t *out_id);
static inline void *
pva_kmd_zalloc_block(struct pva_kmd_block_allocator *allocator,
uint32_t *out_id)
@@ -47,6 +49,9 @@ void *pva_kmd_get_block_unsafe(struct pva_kmd_block_allocator *allocator,
uint32_t id);
enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator,
uint32_t id);
enum pva_error
pva_kmd_free_block_unsafe(struct pva_kmd_block_allocator *allocator,
uint32_t id);
void pva_kmd_block_allocator_deinit(struct pva_kmd_block_allocator *allocator);

View File

@@ -143,6 +143,7 @@ static inline void pva_kmd_set_cmd_init_resource_table(
struct pva_cmd_init_resource_table *cmd, uint8_t resource_table_id,
uint64_t iova_addr, uint32_t max_num_entries)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_INIT_RESOURCE_TABLE;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->resource_table_id = resource_table_id;
@@ -155,6 +156,7 @@ static inline void
pva_kmd_set_cmd_deinit_resource_table(struct pva_cmd_deinit_resource_table *cmd,
uint8_t resource_table_id)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_DEINIT_RESOURCE_TABLE;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->resource_table_id = resource_table_id;
@@ -162,22 +164,29 @@ pva_kmd_set_cmd_deinit_resource_table(struct pva_cmd_deinit_resource_table *cmd,
static inline void pva_kmd_set_cmd_init_queue(struct pva_cmd_init_queue *cmd,
uint8_t ccq_id, uint8_t queue_id,
uint64_t iova_addr,
uint32_t max_num_submit)
uint64_t queue_addr,
uint32_t max_num_submit,
uint32_t syncpt_id,
uint64_t syncpt_addr)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_INIT_QUEUE;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->ccq_id = ccq_id;
cmd->queue_id = queue_id;
cmd->queue_addr_lo = iova_lo(iova_addr);
cmd->queue_addr_hi = iova_hi(iova_addr);
cmd->queue_addr_lo = iova_lo(queue_addr);
cmd->queue_addr_hi = iova_hi(queue_addr);
cmd->max_n_submits = max_num_submit;
cmd->syncpt_id = syncpt_id;
cmd->syncpt_addr_lo = iova_lo(syncpt_addr);
cmd->syncpt_addr_hi = iova_hi(syncpt_addr);
}
static inline void
pva_kmd_set_cmd_deinit_queue(struct pva_cmd_deinit_queue *cmd, uint8_t ccq_id,
uint8_t queue_id)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_DEINIT_QUEUE;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->ccq_id = ccq_id;
@@ -188,6 +197,7 @@ static inline void pva_kmd_set_cmd_update_resource_table(
struct pva_cmd_update_resource_table *cmd, uint32_t resource_table_id,
uint32_t resource_id, struct pva_resource_entry const *entry)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->resource_table_id = resource_table_id;
@@ -199,6 +209,7 @@ static inline void
pva_kmd_set_cmd_unregister_resource(struct pva_cmd_unregister_resource *cmd,
uint32_t resource_id)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_UNREGISTER_RESOURCE;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->resource_id = resource_id;
@@ -208,6 +219,7 @@ static inline void
pva_kmd_set_cmd_enable_fw_profiling(struct pva_cmd_enable_fw_profiling *cmd,
uint32_t filter, uint8_t timestamp_type)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_ENABLE_FW_PROFILING;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->filter = filter;
@@ -217,6 +229,7 @@ pva_kmd_set_cmd_enable_fw_profiling(struct pva_cmd_enable_fw_profiling *cmd,
static inline void
pva_kmd_set_cmd_disable_fw_profiling(struct pva_cmd_disable_fw_profiling *cmd)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_DISABLE_FW_PROFILING;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
}
@@ -225,6 +238,7 @@ static inline void pva_kmd_set_cmd_get_tegra_stats(
struct pva_cmd_get_tegra_stats *cmd, uint32_t buffer_resource_id,
uint32_t buffer_size, uint64_t offset, bool enabled)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_GET_TEGRA_STATS;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->buffer_resource_id = buffer_resource_id;
@@ -238,6 +252,7 @@ static inline void
pva_kmd_set_cmd_set_debug_log_level(struct pva_cmd_set_debug_log_level *cmd,
uint32_t log_level)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_SET_DEBUG_LOG_LEVEL;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->log_level = log_level;
@@ -245,24 +260,23 @@ pva_kmd_set_cmd_set_debug_log_level(struct pva_cmd_set_debug_log_level *cmd,
static inline void pva_kmd_set_cmd_suspend_fw(struct pva_cmd_suspend_fw *cmd)
{
uint64_t len = (sizeof(*cmd) / sizeof(uint32_t));
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_SUSPEND_FW;
ASSERT(len <= 255u);
cmd->header.len = (uint8_t)(len);
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
}
static inline void pva_kmd_set_cmd_resume_fw(struct pva_cmd_resume_fw *cmd)
{
uint64_t len = (sizeof(*cmd) / sizeof(uint32_t));
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_RESUME_FW;
ASSERT(len <= 255u);
cmd->header.len = (uint8_t)(len);
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
}
static inline void pva_kmd_set_cmd_init_shared_dram_buffer(
struct pva_cmd_init_shared_dram_buffer *cmd, uint8_t interface,
uint32_t buffer_iova, uint32_t buffer_size)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_INIT_SHARED_DRAM_BUFFER;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->buffer_iova_hi = iova_hi(buffer_iova);
@@ -274,6 +288,7 @@ static inline void pva_kmd_set_cmd_init_shared_dram_buffer(
static inline void pva_kmd_set_cmd_deinit_shared_dram_buffer(
struct pva_cmd_deinit_shared_dram_buffer *cmd, uint8_t interface)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_DEINIT_SHARED_DRAM_BUFFER;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->interface = interface;
@@ -283,8 +298,12 @@ static inline void
pva_kmd_set_cmd_set_profiling_level(struct pva_cmd_set_profiling_level *cmd,
uint32_t level)
{
memset(cmd, 0, sizeof(*cmd));
cmd->header.opcode = PVA_CMD_OPCODE_SET_PROFILING_LEVEL;
cmd->header.len = sizeof(*cmd) / sizeof(uint32_t);
cmd->level = level;
}
#define CMD_LEN(cmd_type) (sizeof(cmd_type) / sizeof(uint32_t))
#endif // PVA_KMD_CMDBUF_H

View File

@@ -23,7 +23,7 @@
// clang-format off
#if PVA_BUILD_MODE == PVA_BUILD_MODE_SIM
#define PVA_KMD_TIMEOUT_FACTOR 100
#elif (PVA_BUILD_MODE == PVA_BUILD_MODE_NATIVE) && (PVA_IS_DEBUG == 1)
#elif (PVA_BUILD_MODE == PVA_BUILD_MODE_NATIVE)
// On native builds, the FW calls the KMD's shared buffer handler in its
// own thread. In debug builds, if there are a large number of messages
// (prints, unregister, etc.), this handler might take a while to execute,
@@ -42,22 +42,16 @@
#define PVA_KMD_WAIT_FW_POLL_INTERVAL_US PVA_KMD_TIMEOUT(100) /*< 100 us*/
#define PVA_KMD_FW_BOOT_TIMEOUT_MS PVA_KMD_TIMEOUT(1000) /*< 1 seconds */
#define PVA_NUM_RW_SYNCPTS 56
#define PVA_NUM_RW_SYNCPTS (PVA_MAX_NUM_CCQ * PVA_NUM_RW_SYNCPTS_PER_CONTEXT)
// clang-format off
#if PVA_DEV_MAIN_COMPATIBLE == 1
#define PVA_KMD_LOAD_FROM_GSC_DEFAULT true
#if PVA_SAFETY == 1
#define PVA_KMD_APP_AUTH_DEFAULT true
#else
#define PVA_KMD_APP_AUTH_DEFAULT false
#endif
#else
#define PVA_KMD_LOAD_FROM_GSC_DEFAULT false
#define PVA_KMD_APP_AUTH_DEFAULT false
#endif
// clang-format on
#define PVA_KMD_MAX_NUM_USER_DMA_CONFIG 1024
#define PVA_KMD_DMA_CONFIG_POOL_INCR 256
#endif // PVA_KMD_CONSTANTS_H

View File

@@ -67,118 +67,86 @@ err_out:
static enum pva_error notify_fw_context_init(struct pva_kmd_context *ctx)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &ctx->pva->submitter;
struct pva_cmd_init_resource_table *res_cmd;
struct pva_cmd_init_queue *queue_cmd;
struct pva_cmd_update_resource_table *update_cmd;
struct pva_resource_entry entry = { 0 };
uint32_t fence_val;
const struct pva_syncpt_rw_info *syncpt_info;
enum pva_error err;
uint32_t current_offset = 0;
uint32_t cmd_scratch[CMD_LEN(struct pva_cmd_init_resource_table) +
CMD_LEN(struct pva_cmd_init_queue) +
CMD_LEN(struct pva_cmd_update_resource_table)];
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
res_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*res_cmd));
ASSERT(res_cmd != NULL);
res_cmd = (struct pva_cmd_init_resource_table *)pva_offset_pointer(
&cmd_scratch[0], current_offset);
current_offset += sizeof(*res_cmd);
queue_cmd = (struct pva_cmd_init_queue *)pva_offset_pointer(
&cmd_scratch[0], current_offset);
current_offset += sizeof(*queue_cmd);
update_cmd = (struct pva_cmd_update_resource_table *)pva_offset_pointer(
&cmd_scratch[0], current_offset);
current_offset += sizeof(*update_cmd);
pva_kmd_set_cmd_init_resource_table(
res_cmd, ctx->resource_table_id,
ctx->ctx_resource_table.table_mem->iova,
ctx->ctx_resource_table.n_entries);
queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd));
ASSERT(queue_cmd != NULL);
syncpt_info = pva_kmd_queue_get_rw_syncpt_info(ctx, ctx->ccq_id);
pva_kmd_set_cmd_init_queue(
queue_cmd, PVA_PRIV_CCQ_ID,
ctx->ccq_id, /* For privileged queues, queue ID == user CCQ ID*/
ctx->ctx_queue.queue_memory->iova,
ctx->ctx_queue.max_num_submit);
update_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*update_cmd));
ASSERT(update_cmd != NULL);
ctx->ctx_queue.max_num_submit, syncpt_info->syncpt_id,
syncpt_info->syncpt_iova);
err = pva_kmd_make_resource_entry(&ctx->pva->dev_resource_table,
ctx->submit_memory_resource_id,
&entry);
ASSERT(err == PVA_SUCCESS);
pva_kmd_set_cmd_update_resource_table(update_cmd,
0, /* KMD's resource table ID */
ctx->submit_memory_resource_id,
&entry);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
// Error is either QUEUE_FULL or TIMEDOUT
goto cancel_builder;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
err = pva_kmd_submit_cmd_sync(dev_submitter, cmd_scratch,
sizeof(cmd_scratch),
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when initializing context");
goto err_out;
}
return PVA_SUCCESS;
cancel_builder:
pva_kmd_cmdbuf_builder_cancel(&builder);
err_out:
return err;
}
static enum pva_error notify_fw_context_deinit(struct pva_kmd_context *ctx)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &ctx->pva->submitter;
struct pva_cmd_deinit_resource_table *deinit_table_cmd;
struct pva_cmd_deinit_queue *deinit_queue_cmd;
uint32_t fence_val;
uint32_t cmd_scratch[CMD_LEN(struct pva_cmd_deinit_queue) +
CMD_LEN(struct pva_cmd_deinit_resource_table)];
enum pva_error err;
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
deinit_queue_cmd = (struct pva_cmd_deinit_queue *)pva_offset_pointer(
&cmd_scratch[0], 0);
deinit_table_cmd =
(struct pva_cmd_deinit_resource_table *)pva_offset_pointer(
&cmd_scratch[0], sizeof(struct pva_cmd_deinit_queue));
deinit_queue_cmd =
pva_kmd_reserve_cmd_space(&builder, sizeof(*deinit_queue_cmd));
ASSERT(deinit_queue_cmd != NULL);
pva_kmd_set_cmd_deinit_queue(
deinit_queue_cmd, PVA_PRIV_CCQ_ID,
ctx->ccq_id /* For privileged queues, queue ID == user CCQ ID*/
);
deinit_table_cmd =
pva_kmd_reserve_cmd_space(&builder, sizeof(*deinit_table_cmd));
ASSERT(deinit_table_cmd != NULL);
pva_kmd_set_cmd_deinit_resource_table(deinit_table_cmd,
ctx->resource_table_id);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
goto cancel_builder;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
err = pva_kmd_submit_cmd_sync(dev_submitter, cmd_scratch,
sizeof(cmd_scratch),
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when deinitializing context");
goto err_out;
}
return PVA_SUCCESS;
cancel_builder:
pva_kmd_cmdbuf_builder_cancel(&builder);
err_out:
return err;
}
@@ -189,20 +157,24 @@ enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx,
uint32_t queue_mem_size;
uint64_t chunk_mem_size;
struct pva_fw_postfence post_fence = { 0 };
struct pva_syncpt_rw_info *syncpts;
uint64_t size;
/* Allocate RW syncpoints for this context */
syncpts = (struct pva_syncpt_rw_info *)pva_kmd_alloc_block(
&ctx->pva->syncpt_allocator, &ctx->syncpt_block_index);
ASSERT(syncpts != NULL);
if (ctx->inited) {
err = PVA_INVAL;
goto err_out;
}
if (res_table_capacity == 0u) {
pva_kmd_log_err("Invalid resource capacity");
err = PVA_BAD_PARAMETER_ERROR;
goto err_out;
}
/* Init resource table for this context */
err = pva_kmd_resource_table_init(&ctx->ctx_resource_table, ctx->pva,
ctx->smmu_ctx_id, res_table_capacity,
PVA_KMD_MAX_NUM_USER_DMA_CONFIG);
ctx->smmu_ctx_id, res_table_capacity);
if (err != PVA_SUCCESS) {
goto drop_device;
goto err_out;
}
/* Init privileged queue for this context */
@@ -225,7 +197,8 @@ enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx,
/* Allocate memory for submission */
chunk_mem_size = pva_kmd_cmdbuf_pool_get_required_mem_size(
PVA_MAX_CMDBUF_CHUNK_SIZE, PVA_KMD_MAX_NUM_PRIV_CHUNKS);
pva_kmd_get_max_cmdbuf_chunk_size(ctx->pva),
PVA_KMD_MAX_NUM_PRIV_CHUNKS);
/* Allocate one post fence at the end. This memory will be added to
* KMD's own resource table. We don't need to explicitly free it. It
* will be freed after we drop the resource. */
@@ -242,6 +215,8 @@ enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx,
ctx->submit_memory,
&ctx->submit_memory_resource_id);
if (err != PVA_SUCCESS) {
// Ownership of submit memory is transferred to KMD's resource table so
// if adding to resource table fails, we need to free it here.
pva_kmd_device_memory_free(ctx->submit_memory);
goto queue_deinit;
}
@@ -249,7 +224,8 @@ enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx,
/* Init chunk pool */
err = pva_kmd_cmdbuf_chunk_pool_init(
&ctx->chunk_pool, ctx->submit_memory_resource_id,
0 /* offset */, chunk_mem_size, PVA_MAX_CMDBUF_CHUNK_SIZE,
0 /* offset */, chunk_mem_size,
pva_kmd_get_max_cmdbuf_chunk_size(ctx->pva),
PVA_KMD_MAX_NUM_PRIV_CHUNKS, ctx->submit_memory->va);
if (err != PVA_SUCCESS) {
goto free_dram_buffer_resource;
@@ -283,13 +259,15 @@ enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx,
pva_kmd_resource_table_lock,
pva_kmd_resource_table_unlock);
if (err != PVA_SUCCESS) {
goto deinit_submitter;
goto deinit_fw_context;
}
ctx->inited = true;
return PVA_SUCCESS;
deinit_fw_context:
notify_fw_context_deinit(ctx);
deinit_submitter:
pva_kmd_mutex_deinit(&ctx->chunk_pool_lock);
pva_kmd_mutex_deinit(&ctx->submit_lock);
@@ -298,12 +276,10 @@ free_dram_buffer_resource:
pva_kmd_drop_resource(&ctx->pva->dev_resource_table,
ctx->submit_memory_resource_id);
queue_deinit:
pva_kmd_queue_deinit(&ctx->ctx_queue);
pva_kmd_device_memory_free(ctx->ctx_queue_mem);
deinit_table:
pva_kmd_resource_table_deinit(&ctx->ctx_resource_table);
drop_device:
pva_kmd_device_idle(ctx->pva);
err_out:
return err;
}
@@ -312,25 +288,24 @@ void pva_kmd_context_deinit(struct pva_kmd_context *ctx)
enum pva_error err;
if (ctx->inited) {
if (!ctx->pva->recovery) {
err = notify_fw_context_deinit(ctx);
ASSERT(err == PVA_SUCCESS);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Failed to notify FW of context deinit");
}
err = pva_kmd_shared_buffer_deinit(ctx->pva, ctx->ccq_id);
ASSERT(err == PVA_SUCCESS);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Failed to deinit FW buffer");
}
pva_kmd_device_idle(ctx->pva);
pva_kmd_mutex_deinit(&ctx->submit_lock);
pva_kmd_mutex_deinit(&ctx->chunk_pool_lock);
pva_kmd_cmdbuf_chunk_pool_deinit(&ctx->chunk_pool);
pva_kmd_drop_resource(&ctx->pva->dev_resource_table,
ctx->submit_memory_resource_id);
pva_kmd_queue_deinit(&ctx->ctx_queue);
pva_kmd_device_memory_free(ctx->ctx_queue_mem);
pva_kmd_resource_table_deinit(&ctx->ctx_resource_table);
pva_kmd_free_block(&ctx->pva->syncpt_allocator,
ctx->syncpt_block_index);
ctx->inited = false;
}
}
@@ -345,14 +320,13 @@ static void pva_kmd_destroy_all_queues(struct pva_kmd_context *ctx)
pva_kmd_mutex_lock(&ctx->queue_allocator.allocator_lock);
queue = pva_kmd_get_block_unsafe(&ctx->queue_allocator,
queue_id);
pva_kmd_mutex_unlock(&ctx->queue_allocator.allocator_lock);
if (queue != NULL) {
pva_kmd_mutex_unlock(
&ctx->queue_allocator.allocator_lock);
err = pva_kmd_queue_destroy(ctx, queue_id);
ASSERT(err == PVA_SUCCESS);
} else {
pva_kmd_mutex_unlock(
&ctx->queue_allocator.allocator_lock);
if (err != PVA_SUCCESS) {
pva_kmd_log_err_u64(
"Failed to destroy queue %d", queue_id);
}
}
}
}
@@ -363,11 +337,12 @@ void pva_kmd_context_destroy(struct pva_kmd_context *ctx)
pva_kmd_destroy_all_queues(ctx);
pva_kmd_context_deinit(ctx);
pva_kmd_device_idle(ctx->pva);
pva_kmd_block_allocator_deinit(&ctx->queue_allocator);
pva_kmd_free(ctx->queue_allocator_mem);
pva_kmd_mutex_deinit(&ctx->ccq_lock);
err = pva_kmd_free_block(&ctx->pva->context_allocator, ctx->ccq_id);
pva_kmd_mutex_deinit(&ctx->ocb_lock);
err = pva_kmd_free_block(&ctx->pva->context_allocator, ctx->ccq_id);
ASSERT(err == PVA_SUCCESS);
}

View File

@@ -63,9 +63,6 @@ struct pva_kmd_context {
void *plat_data;
uint64_t ccq_shm_handle;
/** Index of block of syncpoints allocated for this context */
uint32_t syncpt_block_index;
uint32_t syncpt_ids[PVA_NUM_RW_SYNCPTS_PER_CONTEXT];
pva_kmd_mutex_t ocb_lock;
};

View File

@@ -27,45 +27,6 @@ static uint64_t read_from_buffer_to_user(void *to, uint64_t count,
return count;
}
static enum pva_error
pva_kmd_notify_fw_set_profiling_level(struct pva_kmd_device *pva,
uint32_t level)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &pva->submitter;
struct pva_cmd_set_profiling_level *cmd;
uint32_t fence_val;
enum pva_error err;
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd));
ASSERT(cmd != NULL);
pva_kmd_set_cmd_set_profiling_level(cmd, level);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when setting profiling level");
goto err_out;
}
return PVA_SUCCESS;
err_out:
return err;
}
static int64_t profiling_level_read(struct pva_kmd_device *dev, void *file_data,
uint8_t *out_buffer, uint64_t offset,
uint64_t size)
@@ -118,92 +79,20 @@ static int64_t profiling_level_write(struct pva_kmd_device *dev,
"pva_kmd_device_busy failed when submitting set profiling level cmd");
return 0;
}
err = pva_kmd_notify_fw_set_profiling_level(dev, value);
pva_kmd_device_idle(dev);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Failed to notify FW about profiling level change");
return 0;
}
pva_kmd_device_idle(dev);
}
return size;
}
void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *pva)
{
static const char *vpu_ocd_names[NUM_VPU_BLOCKS] = { "ocd_vpu0_v3",
"ocd_vpu1_v3" };
struct pva_kmd_file_ops *profiling_fops;
pva_kmd_debugfs_create_bool(pva, "stats_enabled",
&pva->debugfs_context.stats_enable);
pva_kmd_debugfs_create_bool(pva, "vpu_debug",
&pva->debugfs_context.vpu_debug);
// Create profiling_level file operations
profiling_fops = &pva->debugfs_context.profiling_level_fops;
profiling_fops->read = profiling_level_read;
profiling_fops->write = profiling_level_write;
profiling_fops->open = NULL;
profiling_fops->release = NULL;
profiling_fops->pdev = pva;
pva_kmd_debugfs_create_file(pva, "profiling_level", profiling_fops);
pva->debugfs_context.vpu_fops.read = &get_vpu_stats;
pva->debugfs_context.vpu_fops.write = NULL;
pva->debugfs_context.vpu_fops.pdev = pva;
pva_kmd_debugfs_create_file(pva, "vpu_stats",
&pva->debugfs_context.vpu_fops);
for (uint32_t i = 0; i < NUM_VPU_BLOCKS; i++) {
pva->debugfs_context.vpu_ocd_fops[i].open =
&pva_kmd_vpu_ocd_open;
pva->debugfs_context.vpu_ocd_fops[i].release =
&pva_kmd_vpu_ocd_release;
pva->debugfs_context.vpu_ocd_fops[i].read =
&pva_kmd_vpu_ocd_read;
pva->debugfs_context.vpu_ocd_fops[i].write =
&pva_kmd_vpu_ocd_write;
pva->debugfs_context.vpu_ocd_fops[i].pdev = pva;
pva->debugfs_context.vpu_ocd_fops[i].file_data =
(void *)&pva->regspec.vpu_dbg_instr_reg_offset[i];
pva_kmd_debugfs_create_file(
pva, vpu_ocd_names[i],
&pva->debugfs_context.vpu_ocd_fops[i]);
}
pva->debugfs_context.allowlist_ena_fops.read =
&get_vpu_allowlist_enabled;
pva->debugfs_context.allowlist_ena_fops.write = &update_vpu_allowlist;
pva->debugfs_context.allowlist_ena_fops.pdev = pva;
pva_kmd_debugfs_create_file(pva, "vpu_app_authentication",
&pva->debugfs_context.allowlist_ena_fops);
pva->debugfs_context.allowlist_path_fops.read = &get_vpu_allowlist_path;
pva->debugfs_context.allowlist_path_fops.write =
&update_vpu_allowlist_path;
pva->debugfs_context.allowlist_path_fops.pdev = pva;
pva_kmd_debugfs_create_file(pva, "allowlist_path",
&pva->debugfs_context.allowlist_path_fops);
pva->debugfs_context.fw_debug_log_level_fops.write =
&update_fw_debug_log_level;
pva->debugfs_context.fw_debug_log_level_fops.read = NULL;
pva->debugfs_context.fw_debug_log_level_fops.pdev = pva;
pva_kmd_debugfs_create_file(
pva, "fw_debug_log_level",
&pva->debugfs_context.fw_debug_log_level_fops);
pva_kmd_device_init_profiler(pva);
pva_kmd_device_init_tegra_stats(pva);
}
void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *pva)
{
pva_kmd_device_deinit_tegra_stats(pva);
pva_kmd_device_deinit_profiler(pva);
pva_kmd_debugfs_remove_nodes(pva);
}
static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats,
uint8_t *out_buffer, uint64_t offset,
uint64_t len)
@@ -236,8 +125,9 @@ static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats,
formatted_len);
}
int64_t get_vpu_stats(struct pva_kmd_device *dev, void *file_data,
uint8_t *out_buffer, uint64_t offset, uint64_t size)
static int64_t get_vpu_stats(struct pva_kmd_device *dev, void *file_data,
uint8_t *out_buffer, uint64_t offset,
uint64_t size)
{
struct pva_kmd_tegrastats kmd_tegra_stats;
@@ -251,9 +141,9 @@ int64_t get_vpu_stats(struct pva_kmd_device *dev, void *file_data,
return print_vpu_stats(&kmd_tegra_stats, out_buffer, offset, size);
}
int64_t get_vpu_allowlist_enabled(struct pva_kmd_device *pva, void *file_data,
uint8_t *out_buffer, uint64_t offset,
uint64_t size)
static int64_t get_vpu_allowlist_enabled(struct pva_kmd_device *pva,
void *file_data, uint8_t *out_buffer,
uint64_t offset, uint64_t size)
{
// 1 byte for '0' or '1' and another 1 byte for the Null character
char out_str[2];
@@ -267,7 +157,7 @@ int64_t get_vpu_allowlist_enabled(struct pva_kmd_device *pva, void *file_data,
sizeof(out_str));
}
int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data,
static int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data,
const uint8_t *in_buffer, uint64_t offset,
uint64_t size)
{
@@ -302,9 +192,9 @@ int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data,
return size;
}
int64_t get_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data,
uint8_t *out_buffer, uint64_t offset,
uint64_t size)
static int64_t get_vpu_allowlist_path(struct pva_kmd_device *pva,
void *file_data, uint8_t *out_buffer,
uint64_t offset, uint64_t size)
{
uint64_t len;
pva_kmd_mutex_lock(&(pva->pva_auth->allow_list_lock));
@@ -317,13 +207,18 @@ int64_t get_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data,
return len;
}
int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data,
const uint8_t *in_buffer, uint64_t offset,
uint64_t size)
static int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva,
void *file_data,
const uint8_t *in_buffer,
uint64_t offset, uint64_t size)
{
char buffer[ALLOWLIST_FILE_LEN];
unsigned long retval;
if (size == 0) {
return 0;
}
if (size > sizeof(buffer)) {
pva_kmd_log_err_u64(
"Length of allowlist path is too long. It must be less than ",
@@ -338,7 +233,7 @@ int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data,
}
//Replacing last character from new-line to null terminator
buffer[safe_subu64(size, 1u)] = '\0';
buffer[size - 1u] = '\0';
pva_kmd_mutex_lock(&(pva->pva_auth->allow_list_lock));
pva_kmd_update_allowlist_path(pva, buffer);
@@ -347,9 +242,10 @@ int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data,
return size;
}
int64_t update_fw_debug_log_level(struct pva_kmd_device *pva, void *file_data,
const uint8_t *in_buffer, uint64_t offset,
uint64_t size)
static int64_t update_fw_debug_log_level(struct pva_kmd_device *pva,
void *file_data,
const uint8_t *in_buffer,
uint64_t offset, uint64_t size)
{
uint32_t log_level;
unsigned long retval;
@@ -387,10 +283,143 @@ int64_t update_fw_debug_log_level(struct pva_kmd_device *pva, void *file_data,
goto err_end;
}
pva_kmd_notify_fw_set_debug_log_level(pva, log_level);
err = pva_kmd_notify_fw_set_debug_log_level(pva, log_level);
pva_kmd_device_idle(pva);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Failed to notify FW about debug log level change");
}
}
err_end:
return copy_size;
}
static int64_t get_fw_debug_log_level(struct pva_kmd_device *dev,
void *file_data, uint8_t *out_buffer,
uint64_t offset, uint64_t size)
{
char print_buffer[64];
int formatted_len;
formatted_len = snprintf(print_buffer, sizeof(print_buffer), "%u\n",
dev->fw_debug_log_level);
if (formatted_len <= 0) {
return -1;
}
return read_from_buffer_to_user(out_buffer, size, offset, print_buffer,
(uint64_t)formatted_len);
}
enum pva_error pva_kmd_debugfs_create_nodes(struct pva_kmd_device *pva)
{
static const char *vpu_ocd_names[NUM_VPU_BLOCKS] = { "ocd_vpu0_v3",
"ocd_vpu1_v3" };
struct pva_kmd_file_ops *profiling_fops;
enum pva_error err;
pva_kmd_debugfs_create_bool(pva, "stats_enabled",
&pva->debugfs_context.stats_enable);
pva_kmd_debugfs_create_bool(pva, "vpu_debug",
&pva->debugfs_context.vpu_debug);
// Create profiling_level file operations
profiling_fops = &pva->debugfs_context.profiling_level_fops;
profiling_fops->read = profiling_level_read;
profiling_fops->write = profiling_level_write;
profiling_fops->open = NULL;
profiling_fops->release = NULL;
profiling_fops->pdev = pva;
err = pva_kmd_debugfs_create_file(pva, "profiling_level",
profiling_fops);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Failed to create profiling_level debugfs file");
return err;
}
pva->debugfs_context.vpu_fops.read = &get_vpu_stats;
pva->debugfs_context.vpu_fops.write = NULL;
pva->debugfs_context.vpu_fops.pdev = pva;
err = pva_kmd_debugfs_create_file(pva, "vpu_stats",
&pva->debugfs_context.vpu_fops);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Failed to create vpu_stats debugfs file");
return err;
}
for (uint32_t i = 0; i < NUM_VPU_BLOCKS; i++) {
pva->debugfs_context.vpu_ocd_fops[i].open =
&pva_kmd_vpu_ocd_open;
pva->debugfs_context.vpu_ocd_fops[i].release =
&pva_kmd_vpu_ocd_release;
pva->debugfs_context.vpu_ocd_fops[i].read =
&pva_kmd_vpu_ocd_read;
pva->debugfs_context.vpu_ocd_fops[i].write =
&pva_kmd_vpu_ocd_write;
pva->debugfs_context.vpu_ocd_fops[i].pdev = pva;
pva->debugfs_context.vpu_ocd_fops[i].file_data =
(void *)&pva->regspec.vpu_dbg_instr_reg_offset[i];
err = pva_kmd_debugfs_create_file(
pva, vpu_ocd_names[i],
&pva->debugfs_context.vpu_ocd_fops[i]);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Failed to create vpu_ocd debugfs file");
return err;
}
}
pva->debugfs_context.allowlist_ena_fops.read =
&get_vpu_allowlist_enabled;
pva->debugfs_context.allowlist_ena_fops.write = &update_vpu_allowlist;
pva->debugfs_context.allowlist_ena_fops.pdev = pva;
err = pva_kmd_debugfs_create_file(
pva, "vpu_app_authentication",
&pva->debugfs_context.allowlist_ena_fops);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Failed to create vpu_app_authentication debugfs file");
return err;
}
pva->debugfs_context.allowlist_path_fops.read = &get_vpu_allowlist_path;
pva->debugfs_context.allowlist_path_fops.write =
&update_vpu_allowlist_path;
pva->debugfs_context.allowlist_path_fops.pdev = pva;
err = pva_kmd_debugfs_create_file(
pva, "allowlist_path",
&pva->debugfs_context.allowlist_path_fops);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Failed to create allowlist_path debugfs file");
return err;
}
pva->debugfs_context.fw_debug_log_level_fops.write =
&update_fw_debug_log_level;
pva->debugfs_context.fw_debug_log_level_fops.read =
&get_fw_debug_log_level;
pva->debugfs_context.fw_debug_log_level_fops.pdev = pva;
err = pva_kmd_debugfs_create_file(
pva, "fw_debug_log_level",
&pva->debugfs_context.fw_debug_log_level_fops);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Failed to create fw_debug_log_level debugfs file");
return err;
}
pva_kmd_device_init_profiler(pva);
pva_kmd_device_init_tegra_stats(pva);
return PVA_SUCCESS;
}
void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *pva)
{
pva_kmd_device_deinit_tegra_stats(pva);
pva_kmd_device_deinit_profiler(pva);
pva_kmd_debugfs_remove_nodes(pva);
}

View File

@@ -37,24 +37,7 @@ struct pva_kmd_debugfs_context {
struct pva_kmd_file_ops fw_debug_log_level_fops;
};
void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *dev);
enum pva_error pva_kmd_debugfs_create_nodes(struct pva_kmd_device *dev);
void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *dev);
int64_t get_vpu_stats(struct pva_kmd_device *dev, void *file_data,
uint8_t *out_buffer, uint64_t offset, uint64_t size);
int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data,
const uint8_t *in_buffer, uint64_t offset,
uint64_t size);
int64_t get_vpu_allowlist_enabled(struct pva_kmd_device *pva, void *file_data,
uint8_t *out_buffer, uint64_t offset,
uint64_t size);
int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data,
const uint8_t *in_buffer, uint64_t offset,
uint64_t size);
int64_t get_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data,
uint8_t *out_buffer, uint64_t offset,
uint64_t size);
int64_t update_fw_debug_log_level(struct pva_kmd_device *dev, void *file_data,
const uint8_t *in_buffer, uint64_t offset,
uint64_t size);
#endif //PVA_KMD_DEBUGFS_H

View File

@@ -33,10 +33,11 @@
* Initialization through CCQ is only intended for KMD's own resource table (the
* first resource table created).
*/
void pva_kmd_send_resource_table_info_by_ccq(
static enum pva_error pva_kmd_send_resource_table_info_by_ccq(
struct pva_kmd_device *pva, struct pva_kmd_resource_table *res_table)
{
enum pva_error err;
uint64_t addr = res_table->table_mem->iova;
uint32_t n_entries = res_table->n_entries;
uint64_t ccq_entry =
@@ -51,8 +52,9 @@ void pva_kmd_send_resource_table_info_by_ccq(
err = pva_kmd_ccq_push_with_timeout(pva, PVA_PRIV_CCQ_ID, ccq_entry,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
ASSERT(err == PVA_SUCCESS);
pva_kmd_mutex_unlock(&pva->ccq0_lock);
return err;
}
/**
@@ -61,7 +63,8 @@ void pva_kmd_send_resource_table_info_by_ccq(
* Initialization through CCQ is only intended for KMD's own queue (the first
* queue created).
*/
void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva,
static enum pva_error
pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva,
struct pva_kmd_queue *queue)
{
enum pva_error err;
@@ -78,8 +81,9 @@ void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva,
err = pva_kmd_ccq_push_with_timeout(pva, PVA_PRIV_CCQ_ID, ccq_entry,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
ASSERT(err == PVA_SUCCESS);
pva_kmd_mutex_unlock(&pva->ccq0_lock);
return err;
}
/**
@@ -113,13 +117,13 @@ static void pva_kmd_device_init_submission(struct pva_kmd_device *pva)
/* Init KMD's resource table */
err = pva_kmd_resource_table_init(&pva->dev_resource_table, pva,
PVA_R5_SMMU_CONTEXT_ID,
PVA_KMD_MAX_NUM_KMD_RESOURCES,
PVA_KMD_MAX_NUM_KMD_DMA_CONFIGS);
PVA_KMD_MAX_NUM_KMD_RESOURCES);
ASSERT(err == PVA_SUCCESS);
/* Allocate memory for submission*/
chunk_mem_size = pva_kmd_cmdbuf_pool_get_required_mem_size(
PVA_MAX_CMDBUF_CHUNK_SIZE, PVA_KMD_MAX_NUM_KMD_CHUNKS);
pva_kmd_get_max_cmdbuf_chunk_size(pva),
PVA_KMD_MAX_NUM_KMD_CHUNKS);
size = safe_addu64(chunk_mem_size, (uint64_t)sizeof(uint32_t));
/* Allocate one post fence at the end. We don't need to free this memory
@@ -138,7 +142,7 @@ static void pva_kmd_device_init_submission(struct pva_kmd_device *pva)
/* Init chunk pool */
pva_kmd_cmdbuf_chunk_pool_init(
&pva->chunk_pool, pva->submit_memory_resource_id, 0,
chunk_mem_size, PVA_MAX_CMDBUF_CHUNK_SIZE,
chunk_mem_size, pva_kmd_get_max_cmdbuf_chunk_size(pva),
PVA_KMD_MAX_NUM_KMD_CHUNKS, pva->submit_memory->va);
/* Init fence */
@@ -167,21 +171,25 @@ static void pva_kmd_device_deinit_submission(struct pva_kmd_device *pva)
pva_kmd_drop_resource(&pva->dev_resource_table,
pva->submit_memory_resource_id);
pva_kmd_resource_table_deinit(&pva->dev_resource_table);
pva_kmd_queue_deinit(&pva->dev_queue);
pva_kmd_device_memory_free(pva->queue_memory);
}
struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id,
uint32_t device_index,
bool app_authenticate)
bool app_authenticate,
bool test_mode)
{
struct pva_kmd_device *pva;
enum pva_error err;
uint32_t chunk_size;
uint32_t size;
if (test_mode) {
pva_kmd_log_err("Test mode is enabled");
}
pva = pva_kmd_zalloc_nofail(sizeof(*pva));
pva->test_mode = test_mode;
pva->device_index = device_index;
pva->load_from_gsc = false;
pva->is_hv_mode = true;
@@ -211,13 +219,6 @@ struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id,
pva_kmd_device_plat_init(pva);
chunk_size = safe_mulu32((uint32_t)sizeof(struct pva_syncpt_rw_info),
(uint32_t)PVA_NUM_RW_SYNCPTS_PER_CONTEXT);
err = pva_kmd_block_allocator_init(&pva->syncpt_allocator,
pva->syncpt_rw, 0, chunk_size,
PVA_MAX_NUM_USER_CONTEXTS);
ASSERT(err == PVA_SUCCESS);
pva_kmd_device_init_submission(pva);
err = pva_kmd_init_vpu_app_auth(pva, app_authenticate);
@@ -257,7 +258,6 @@ void pva_kmd_device_destroy(struct pva_kmd_device *pva)
pva_kmd_wait_for_active_contexts(pva);
pva_kmd_device_deinit_submission(pva);
pva_kmd_device_plat_deinit(pva);
pva_kmd_block_allocator_deinit(&pva->syncpt_allocator);
pva_kmd_block_allocator_deinit(&pva->context_allocator);
pva_kmd_free(pva->context_mem);
pva_kmd_mutex_deinit(&pva->ccq0_lock);
@@ -266,44 +266,71 @@ void pva_kmd_device_destroy(struct pva_kmd_device *pva)
pva_kmd_free(pva);
}
static enum pva_error
pva_kmd_notify_fw_set_profiling_level(struct pva_kmd_device *pva,
uint32_t level)
static enum pva_error config_fw_by_cmds(struct pva_kmd_device *pva)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &pva->submitter;
struct pva_cmd_set_profiling_level *cmd;
uint32_t fence_val;
enum pva_error err;
enum pva_error err = PVA_SUCCESS;
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
err = pva_kmd_notify_fw_enable_profiling(pva);
if (err != PVA_SUCCESS) {
goto err_out;
}
cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd));
ASSERT(cmd != NULL);
pva_kmd_set_cmd_set_profiling_level(cmd, level);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
/* Set FW debug log level */
err = pva_kmd_notify_fw_set_debug_log_level(pva,
pva->fw_debug_log_level);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
// If the user had set profiling level before power-on, send the update to FW
err = pva_kmd_notify_fw_set_profiling_level(
pva, pva->debugfs_context.profiling_level);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when setting profiling level");
goto err_out;
}
return PVA_SUCCESS;
err_out:
return err;
}
enum pva_error pva_kmd_config_fw_after_boot(struct pva_kmd_device *pva)
{
enum pva_error err = PVA_SUCCESS;
/* Reset KMD queue */
pva->dev_queue.queue_header->cb_head = 0;
pva->dev_queue.queue_header->cb_tail = 0;
err = pva_kmd_send_resource_table_info_by_ccq(pva,
&pva->dev_resource_table);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = pva_kmd_shared_buffer_init(pva, PVA_PRIV_CCQ_ID,
PVA_KMD_FW_BUF_ELEMENT_SIZE,
PVA_KMD_FW_PROFILING_BUF_NUM_ELEMENTS,
NULL, NULL);
if (err != PVA_SUCCESS) {
pva_kmd_log_err_u64(
"pva kmd buffer initialization failed for interface ",
PVA_PRIV_CCQ_ID);
goto err_out;
}
err = config_fw_by_cmds(pva);
if (err != PVA_SUCCESS) {
goto err_out;
}
err_out:
return err;
}
enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva)
{
enum pva_error err = PVA_SUCCESS;
@@ -321,36 +348,26 @@ enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva)
if (err != PVA_SUCCESS) {
goto poweroff;
}
/* Reset KMD queue */
pva->dev_queue.queue_header->cb_head = 0;
pva->dev_queue.queue_header->cb_tail = 0;
pva_kmd_send_resource_table_info_by_ccq(
pva, &pva->dev_resource_table);
pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue);
// TODO: need better error handling here
err = pva_kmd_shared_buffer_init(
pva, PVA_PRIV_CCQ_ID, PVA_KMD_FW_BUF_ELEMENT_SIZE,
PVA_KMD_FW_PROFILING_BUF_NUM_ELEMENTS, NULL, NULL);
err = pva_kmd_config_fw_after_boot(pva);
if (err != PVA_SUCCESS) {
pva_kmd_log_err_u64(
"pva kmd buffer initialization failed for interface ",
PVA_PRIV_CCQ_ID);
goto deinit_fw;
}
pva_kmd_notify_fw_enable_profiling(pva);
/* Set FW debug log level */
pva_kmd_notify_fw_set_debug_log_level(pva,
pva->fw_debug_log_level);
// If the user had set profiling level before power-on, send the update to FW
pva_kmd_notify_fw_set_profiling_level(
pva, pva->debugfs_context.profiling_level);
} else {
// Once firwmare is aborted, we no longer allow incrementing PVA
// refcount. This makes sure refcount will eventually reach 0 and allow
// device to be powered off.
if (pva->recovery) {
pva_kmd_log_err_u64(
"PVA firmware aborted. "
"Waiting for active PVA uses to finish. Remaining",
pva->refcount);
err = PVA_ERR_FW_ABORTED;
goto unlock;
}
}
pva->refcount = safe_addu32(pva->refcount, 1U);
pva->refcount = safe_addu32(pva->refcount, 1U);
pva_kmd_mutex_unlock(&pva->powercycle_lock);
return PVA_SUCCESS;
@@ -371,15 +388,15 @@ void pva_kmd_device_idle(struct pva_kmd_device *pva)
ASSERT(pva->refcount > 0);
pva->refcount--;
if (pva->refcount == 0) {
if (!pva->recovery) {
/* Disable FW profiling */
/* TODO: once debugfs is up, move these calls */
pva_kmd_notify_fw_disable_profiling(pva);
err = pva_kmd_notify_fw_disable_profiling(pva);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"pva_kmd_notify_fw_disable_profiling failed during device idle");
}
// TOOD: need better error handling here
err = pva_kmd_shared_buffer_deinit(pva, PVA_PRIV_CCQ_ID);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("pva_kmd_shared_buffer_deinit failed");
pva_kmd_log_err(
"pva_kmd_shared_buffer_deinit failed during device idle");
}
pva_kmd_deinit_fw(pva);
pva_kmd_power_off(pva);
@@ -397,9 +414,12 @@ enum pva_error pva_kmd_ccq_push_with_timeout(struct pva_kmd_device *pva,
if (timeout_us == 0) {
pva_kmd_log_err(
"pva_kmd_ccq_push_with_timeout Timed out");
pva_kmd_abort(pva);
pva_kmd_abort_fw(pva);
return PVA_TIMEDOUT;
}
if (pva->recovery) {
return PVA_ERR_FW_ABORTED;
}
pva_kmd_sleep_us(sleep_interval_us);
timeout_us = sat_sub64(timeout_us, sleep_interval_us);
}

View File

@@ -4,6 +4,7 @@
#ifndef PVA_KMD_DEVICE_H
#define PVA_KMD_DEVICE_H
#include "pva_constants.h"
#include "pva_fw.h"
#include "pva_kmd_cmdbuf.h"
#include "pva_kmd_utils.h"
#include "pva_kmd_mutex.h"
@@ -26,9 +27,6 @@
#define NV_PVA1_CLASS_ID 0xF2
struct pva_syncpt_rw_info {
/** Dont switch order since syncpt_id and syncpt_iova is prefilled during kmd boot
* and first field gets updated by pva_kmd_allocator everytime its freed */
uint32_t syncpt_value;
uint32_t syncpt_id;
uint64_t syncpt_iova;
};
@@ -127,12 +125,13 @@ struct pva_kmd_device {
uint8_t bl_sector_pack_format;
/** Offset between 2 syncpoints */
uint32_t syncpt_offset;
uint64_t syncpt_ro_iova;
uint64_t syncpt_rw_iova;
uint32_t num_syncpts;
struct pva_syncpt_rw_info syncpt_rw[PVA_NUM_RW_SYNCPTS];
struct pva_kmd_block_allocator syncpt_allocator;
uint32_t syncpt_page_size;
uint64_t ro_syncpt_base_iova;
uint32_t num_ro_syncpts;
uint64_t rw_syncpt_base_iova;
uint32_t rw_syncpt_region_size;
struct pva_syncpt_rw_info rw_syncpts[PVA_NUM_RW_SYNCPTS];
struct vmem_region *vmem_regions_tab;
bool support_hwseq_frame_linking;
@@ -145,11 +144,14 @@ struct pva_kmd_device {
/** Carveout info for FW */
struct pva_co_info fw_carveout;
bool test_mode;
};
struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id,
uint32_t device_index,
bool app_authenticate);
bool app_authenticate,
bool test_mode);
void pva_kmd_device_destroy(struct pva_kmd_device *pva);
@@ -161,11 +163,7 @@ enum pva_error pva_kmd_ccq_push_with_timeout(struct pva_kmd_device *pva,
uint64_t sleep_interval_us,
uint64_t timeout_us);
void pva_kmd_send_resource_table_info_by_ccq(
struct pva_kmd_device *pva, struct pva_kmd_resource_table *res_table);
void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva,
struct pva_kmd_queue *queue);
enum pva_error pva_kmd_config_fw_after_boot(struct pva_kmd_device *pva);
bool pva_kmd_device_maybe_on(struct pva_kmd_device *pva);
@@ -177,4 +175,14 @@ static inline uint32_t pva_kmd_get_device_class_id(struct pva_kmd_device *pva)
return NV_PVA1_CLASS_ID;
}
}
static inline uint16_t
pva_kmd_get_max_cmdbuf_chunk_size(struct pva_kmd_device *pva)
{
if (pva->test_mode) {
return PVA_TEST_MODE_MAX_CMDBUF_CHUNK_SIZE;
} else {
return PVA_MAX_CMDBUF_CHUNK_SIZE;
}
}
#endif // PVA_KMD_DEVICE_H

View File

@@ -0,0 +1,266 @@
// SPDX-License-Identifier: GPL-2.0-only
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#include "pva_kmd_devmem_pool.h"
#include "pva_kmd_utils.h"
#include "pva_api.h"
#include "pva_utils.h"
static uint64_t get_devmem_offset(struct pva_kmd_devmem_element const *devmem)
{
return (uint64_t)safe_mulu32(devmem->ele_idx,
devmem->segment->owner_pool->element_size);
}
uint64_t pva_kmd_get_devmem_iova(struct pva_kmd_devmem_element const *devmem)
{
return safe_addu64(devmem->segment->mem->iova,
get_devmem_offset(devmem));
}
void *pva_kmd_get_devmem_va(struct pva_kmd_devmem_element const *devmem)
{
return pva_offset_pointer(devmem->segment->mem->va,
get_devmem_offset(devmem));
}
static struct pva_kmd_devmem_pool_segment *
allocate_segment(struct pva_kmd_devmem_pool *pool)
{
struct pva_kmd_devmem_pool_segment *segment;
struct pva_kmd_device_memory *mem = NULL;
uint64_t segment_size = safe_mulu64((uint64_t)pool->element_size,
(uint64_t)pool->n_element_incr);
void *va;
enum pva_error err;
/* Allocate the segment structure */
segment = pva_kmd_zalloc(sizeof(*segment));
if (segment == NULL) {
goto err_out;
}
/* Allocate device memory */
mem = pva_kmd_device_memory_alloc_map(
segment_size, pool->pva, PVA_ACCESS_RW, pool->smmu_ctx_idx);
if (mem == NULL) {
goto free_segment;
}
segment->mem = mem;
segment->owner_pool = pool;
segment->n_free_ele =
pool->n_element_incr; /* Initialize all elements as free */
va = mem->va;
/* Initialize the segment allocator */
err = pva_kmd_block_allocator_init(&segment->elem_allocator, va, 0,
pool->element_size,
pool->n_element_incr);
if (err != PVA_SUCCESS) {
goto free_mem;
}
/* Add segment to the pool */
segment->next = pool->segment_list_head;
pool->segment_list_head = segment;
pool->n_free_element =
safe_addu32(pool->n_free_element, pool->n_element_incr);
return segment;
free_mem:
pva_kmd_device_memory_free(mem);
free_segment:
pva_kmd_free(segment);
err_out:
return NULL;
}
enum pva_error pva_kmd_devmem_pool_init(struct pva_kmd_devmem_pool *pool,
struct pva_kmd_device *pva,
uint8_t smmu_ctx_idx,
uint32_t element_size,
uint32_t ele_incr_count)
{
struct pva_kmd_devmem_pool_segment *segment;
enum pva_error err = PVA_SUCCESS;
/* Initialize the pool structure */
memset(pool, 0, sizeof(*pool));
pool->smmu_ctx_idx = smmu_ctx_idx;
pool->element_size =
safe_pow2_roundup_u32(element_size, sizeof(uint64_t));
pool->n_element_incr = ele_incr_count;
pool->n_free_element = 0;
pool->segment_list_head = NULL;
pool->pva = pva;
err = pva_kmd_mutex_init(&pool->pool_lock);
if (err != PVA_SUCCESS) {
goto err_out;
}
/* Allocate the first segment */
segment = allocate_segment(pool);
if (segment == NULL) {
err = PVA_NOMEM;
goto deinit_mutex;
}
return PVA_SUCCESS;
deinit_mutex:
pva_kmd_mutex_deinit(&pool->pool_lock);
err_out:
return err;
}
static enum pva_error
pva_kmd_devmem_pool_alloc(struct pva_kmd_devmem_pool *pool,
struct pva_kmd_devmem_element *devmem)
{
struct pva_kmd_devmem_pool_segment *segment = NULL;
struct pva_kmd_devmem_pool_segment *new_segment = NULL;
uint32_t ele_idx = (uint32_t)-1;
enum pva_error err = PVA_SUCCESS;
pva_kmd_mutex_lock(&pool->pool_lock);
/* Check if we have any free elements */
if (pool->n_free_element == 0) {
/* Need to allocate a new segment */
new_segment = allocate_segment(pool);
if (new_segment == NULL) {
err = PVA_NOMEM;
goto unlock;
}
}
/* Try to find a free element in the pool */
segment = pool->segment_list_head;
while (segment != NULL) {
void *va = NULL;
va = pva_kmd_alloc_block_unsafe(&segment->elem_allocator,
&ele_idx);
if (va != NULL) {
/* Found a free element */
break;
}
segment = segment->next;
}
ASSERT(segment != NULL);
devmem->segment = segment;
devmem->ele_idx = ele_idx;
pool->n_free_element = safe_subu32(pool->n_free_element, 1);
segment->n_free_ele = safe_subu32(segment->n_free_ele, 1);
unlock:
pva_kmd_mutex_unlock(&pool->pool_lock);
return err;
}
enum pva_error pva_kmd_devmem_pool_zalloc(struct pva_kmd_devmem_pool *pool,
struct pva_kmd_devmem_element *devmem)
{
enum pva_error err = pva_kmd_devmem_pool_alloc(pool, devmem);
if (err != PVA_SUCCESS) {
return err;
}
memset(pva_kmd_get_devmem_va(devmem), 0, pool->element_size);
return PVA_SUCCESS;
}
static void free_segment(struct pva_kmd_devmem_pool *pool,
struct pva_kmd_devmem_pool_segment *target_segment)
{
struct pva_kmd_devmem_pool_segment *segment;
struct pva_kmd_devmem_pool_segment *prev_segment = NULL;
/* Find previous segment to update the linked list */
segment = pool->segment_list_head;
while (segment != NULL && segment != target_segment) {
prev_segment = segment;
segment = segment->next;
}
/* Segment not found in the list */
ASSERT(segment != NULL);
/* Remove this segment from the list */
if (prev_segment == NULL) {
/* This is the head segment */
pool->segment_list_head = target_segment->next;
} else {
prev_segment->next = target_segment->next;
}
/* Free the segment allocator */
pva_kmd_block_allocator_deinit(&target_segment->elem_allocator);
/* Free the device memory */
pva_kmd_device_memory_free(target_segment->mem);
/* Free the segment structure */
pva_kmd_free(target_segment);
/* Update the free element count */
pool->n_free_element =
safe_subu32(pool->n_free_element, pool->n_element_incr);
}
void pva_kmd_devmem_pool_free(struct pva_kmd_devmem_element *devmem)
{
struct pva_kmd_devmem_pool *pool = devmem->segment->owner_pool;
struct pva_kmd_devmem_pool_segment *current_segment = devmem->segment;
uint32_t threshold;
pva_kmd_mutex_lock(&pool->pool_lock);
/* Free the element */
pva_kmd_free_block_unsafe(&current_segment->elem_allocator,
devmem->ele_idx);
pool->n_free_element = safe_addu32(pool->n_free_element, 1);
current_segment->n_free_ele =
safe_addu32(current_segment->n_free_ele, 1);
/* Check if the current segment is now empty using n_free_ele counter */
if (current_segment->n_free_ele ==
current_segment->elem_allocator.max_num_blocks) {
/* We only free the segment if we still have n_ele_incr free elements
after the free */
threshold = safe_mulu32(pool->n_element_incr, 2);
if (pool->n_free_element >= threshold) {
free_segment(pool, current_segment);
}
}
pva_kmd_mutex_unlock(&pool->pool_lock);
}
void pva_kmd_devmem_pool_deinit(struct pva_kmd_devmem_pool *pool)
{
struct pva_kmd_devmem_pool_segment *segment = pool->segment_list_head;
struct pva_kmd_devmem_pool_segment *next;
/* Free all segments */
while (segment != NULL) {
next = segment->next;
/* Free the segment allocator */
pva_kmd_block_allocator_deinit(&segment->elem_allocator);
/* Free the device memory */
pva_kmd_device_memory_free(segment->mem);
/* Free the segment structure */
pva_kmd_free(segment);
segment = next;
}
pool->segment_list_head = NULL;
pva_kmd_mutex_deinit(&pool->pool_lock);
}

View File

@@ -0,0 +1,100 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#ifndef PVA_KMD_DEVMEM_POOL_H
#define PVA_KMD_DEVMEM_POOL_H
#include "pva_api.h"
#include "pva_kmd_block_allocator.h"
#include "pva_kmd_device_memory.h"
/** @brief A segment of a device memory pool.
*
* It holds a fixed size array of device memory blocks. A pool is a linked list
* of segments.
*/
struct pva_kmd_devmem_pool_segment {
/** The owner pool. */
struct pva_kmd_devmem_pool *owner_pool;
/** The next segment in the pool. */
struct pva_kmd_devmem_pool_segment *next;
/** The device memory for the segment. */
struct pva_kmd_device_memory *mem;
/** The allocator for the elements in the segment. */
struct pva_kmd_block_allocator elem_allocator;
/** The number of free elements in the segment. */
uint32_t n_free_ele;
};
/** @brief A device memory pool that holds fixed size elements.
*
* It allocates memory in segments, each segment contains n_element_incr
* elements.
* - element_size will be rounded up to the nearest 8 bytes for alignment.
* - The pool is initialized with element_size * n_element_incr capacity.
* - Once exhausted, the pool will allocate a new segment of memory and increase
* the capacity by n_element_incr.
* - When an element is freed, the pool does not immediately release the whole
* segment even if the whole segment is empty. However, if there are 2 *
* n_element_incr free elements, the pool will release a whole segment, so
* that there's still at least n_element_incr free elements.
* - The pool is thread safe.
*/
struct pva_kmd_devmem_pool {
/** The SMMU context index for the pool. */
uint8_t smmu_ctx_idx;
/** The size of each element in the pool. */
uint32_t element_size;
/** The number of elements to allocate in each segment. */
uint32_t n_element_incr;
/** The total number of free elements in the pool, across all segments. */
uint32_t n_free_element;
/** The head of the segment list. */
struct pva_kmd_devmem_pool_segment *segment_list_head;
/** The PVA device. */
struct pva_kmd_device *pva;
/** The mutex for the pool. */
pva_kmd_mutex_t pool_lock;
};
/** @brief Device memory from a pool.
*
* It is an element in a segment of a pool.
*/
struct pva_kmd_devmem_element {
/** The segment that contains the element. */
struct pva_kmd_devmem_pool_segment *segment;
/** The index of the element in the segment. */
uint32_t ele_idx;
};
/** @brief Get the IOVA of a device memory element. */
uint64_t pva_kmd_get_devmem_iova(struct pva_kmd_devmem_element const *devmem);
/** @brief Get the virtual address of a device memory element. */
void *pva_kmd_get_devmem_va(struct pva_kmd_devmem_element const *devmem);
/** @brief Initialize a device memory pool.
*
* @param pool The device memory pool to initialize.
* @param pva The PVA device.
* @param smmu_ctx_idx The SMMU context index for the pool.
* @param element_size The size of each element in the pool.
* @param ele_incr_count The number of elements to allocate in each segment.
*/
enum pva_error pva_kmd_devmem_pool_init(struct pva_kmd_devmem_pool *pool,
struct pva_kmd_device *pva,
uint8_t smmu_ctx_idx,
uint32_t element_size,
uint32_t ele_incr_count);
/** @brief Allocate a device memory element from a pool and zero-initialize it. */
enum pva_error
pva_kmd_devmem_pool_zalloc(struct pva_kmd_devmem_pool *pool,
struct pva_kmd_devmem_element *devmem);
/** @brief Free a device memory element from a pool. */
void pva_kmd_devmem_pool_free(struct pva_kmd_devmem_element *devmem);
/** @brief Deinitialize a device memory pool. */
void pva_kmd_devmem_pool_deinit(struct pva_kmd_devmem_pool *pool);
#endif

View File

@@ -62,42 +62,41 @@ pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table,
struct pva_dma_config dma_config;
struct pva_fw_dma_slot *dyn_slots;
struct pva_fw_dma_reloc *dyn_relocs;
struct pva_fw_dma_slot *static_slots = dma_aux->static_slots;
struct pva_fw_dma_reloc *static_relocs = dma_aux->static_relocs;
struct pva_kmd_dma_access *access_sizes = dma_aux->access_sizes;
struct pva_kmd_dma_scratch_buffer *scratch_buf;
// Mapping descriptor index to channel index
uint8_t desc_to_ch[PVA_MAX_NUM_DMA_DESC];
scratch_buf = pva_kmd_zalloc(sizeof(*scratch_buf));
if (scratch_buf == NULL) {
err = PVA_NOMEM;
goto err_out;
}
for (uint32_t i = 0; i < PVA_MAX_NUM_DMA_DESC; i++) {
desc_to_ch[i] = PVA_KMD_INVALID_CH_IDX;
}
//set access_sizes to 0 by default
(void)memset(
access_sizes, 0,
(PVA_MAX_NUM_DMA_DESC * sizeof(struct pva_kmd_dma_access)));
err = pva_kmd_parse_dma_config(dma_cfg_hdr, dma_config_size,
&dma_config,
&resource_table->pva->hw_consts);
if (err != PVA_SUCCESS) {
goto err_out;
goto free_scratch_buf;
}
err = pva_kmd_validate_dma_config(&dma_config,
&resource_table->pva->hw_consts,
access_sizes,
dma_aux->hw_dma_descs_mask);
scratch_buf->access_sizes,
scratch_buf->hw_dma_descs_mask);
if (err != PVA_SUCCESS) {
goto err_out;
goto free_scratch_buf;
}
trace_dma_channels(&dma_config, desc_to_ch);
err = pva_kmd_compute_dma_access(&dma_config, access_sizes,
dma_aux->hw_dma_descs_mask);
err = pva_kmd_compute_dma_access(&dma_config, scratch_buf->access_sizes,
scratch_buf->hw_dma_descs_mask);
if (err != PVA_SUCCESS) {
goto err_out;
goto free_scratch_buf;
}
dyn_slots = pva_offset_pointer(fw_dma_cfg,
@@ -107,9 +106,10 @@ pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table,
dma_config.header.num_dynamic_slots *
sizeof(*dyn_slots));
pva_kmd_collect_relocs(&dma_config, access_sizes, static_slots,
pva_kmd_collect_relocs(&dma_config, scratch_buf->access_sizes,
scratch_buf->static_slots,
dma_config.header.num_static_slots,
static_relocs, dyn_slots,
scratch_buf->static_relocs, dyn_slots,
dma_config.header.num_dynamic_slots, dyn_relocs,
desc_to_ch);
@@ -117,26 +117,27 @@ pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table,
&dma_config, fw_dma_cfg, &fw_fetch_size,
resource_table->pva->support_hwseq_frame_linking);
dma_aux->res_table = resource_table;
err = pva_kmd_dma_use_resources(&dma_config, dma_aux);
if (err != PVA_SUCCESS) {
goto err_out;
goto free_scratch_buf;
}
err = pva_kmd_bind_static_buffers(fw_dma_cfg, dma_aux, static_slots,
dma_config.header.num_static_slots,
static_relocs,
dma_config.static_bindings,
dma_config.header.num_static_slots);
err = pva_kmd_bind_static_buffers(
fw_dma_cfg, dma_aux, scratch_buf->static_slots,
dma_config.header.num_static_slots, scratch_buf->static_relocs,
dma_config.static_bindings, dma_config.header.num_static_slots);
if (err != PVA_SUCCESS) {
goto drop_res;
}
*out_fw_fetch_size = fw_fetch_size;
pva_kmd_free(scratch_buf);
return PVA_SUCCESS;
drop_res:
pva_kmd_unload_dma_config_unsafe(dma_aux);
free_scratch_buf:
pva_kmd_free(scratch_buf);
err_out:
return err;
}

View File

@@ -50,9 +50,10 @@ struct pva_kmd_dma_resource_aux {
uint32_t dram_res_count;
/** DRAM buffers statically referenced by the DMA configuration */
uint32_t static_dram_res_ids[PVA_KMD_MAX_NUM_DMA_DRAM_SLOTS];
};
/* Below are work buffers need during DMA configuration loading. They
* don't fit on stack. */
/* Scratch buffers needed during DMA configuration loading. They don't fit on stack. */
struct pva_kmd_dma_scratch_buffer {
struct pva_fw_dma_slot static_slots[PVA_KMD_MAX_NUM_DMA_SLOTS];
struct pva_fw_dma_reloc static_relocs[PVA_KMD_MAX_NUM_DMA_SLOTS];
struct pva_kmd_dma_access access_sizes[PVA_MAX_NUM_DMA_DESC];

View File

@@ -94,6 +94,9 @@ bind_static_dram_slot(struct pva_dma_config_resource *dma_config,
int64_t slot_access_end_addr = 0LL;
uint64_t slot_surface_combined_offset = 0ULL;
pva_math_error math_error = MATH_OP_SUCCESS;
uint8_t slot_access_flags =
PVA_EXTRACT16(slot->flags, PVA_FW_DMA_SLOT_FLAG_ACCESS_MSB,
PVA_FW_DMA_SLOT_FLAG_ACCESS_LSB, uint8_t);
if ((slot->flags & PVA_FW_DMA_SLOT_FLAG_DRAM) == 0) {
pva_kmd_log_err("Binding DRAM buffer to incompatible slot");
@@ -101,6 +104,14 @@ bind_static_dram_slot(struct pva_dma_config_resource *dma_config,
goto out;
}
if ((slot_access_flags & dram_res->mem->iova_access_flags) !=
slot_access_flags) {
pva_kmd_log_err(
"DRAM buffer does not have the required access permissions");
err = PVA_INVALID_BINDING;
goto out;
}
if (is_block_linear) {
if (slot->flags & PVA_FW_DMA_SLOT_FLAG_CB) {
pva_kmd_log_err(

View File

@@ -218,7 +218,7 @@ validate_descriptor(const struct pva_dma_descriptor *desc,
/* DMA_DESC_LDID */
if ((desc->link_desc_id > cfg_hdr->num_descriptors) ||
((desc->link_desc_id != 0) &&
pva_is_reserved_desc(desc->link_desc_id - PVA_DMA_DESC0))) {
pva_is_reserved_desc(desc->link_desc_id - PVA_DMA_DESC_ID_BASE))) {
pva_kmd_log_err("ERR: Invalid linker Desc ID");
return PVA_INVAL;
}
@@ -423,6 +423,8 @@ pva_kmd_dma_use_resources(struct pva_dma_config const *dma_cfg,
err = PVA_INVAL;
goto err_out;
}
dma_aux->vpu_bin_res_id = dma_cfg->header.vpu_exec_resource_id;
if (vpu_bin_rec->type != PVA_RESOURCE_TYPE_EXEC_BIN) {
pva_kmd_log_err(
"Invalid VPU exec resource id used by DMA config");
@@ -432,9 +434,6 @@ pva_kmd_dma_use_resources(struct pva_dma_config const *dma_cfg,
vpu_bin = &vpu_bin_rec->vpu_bin;
}
dma_aux->vpu_bin_res_id = dma_cfg->header.vpu_exec_resource_id;
dma_aux->dram_res_count = 0;
/* Increment reference count for all static DRAM buffers; For static
* VMEM buffers, check that symbol ID is valid. */
for (i = 0; i < dma_cfg->header.num_static_slots; i++) {
@@ -455,7 +454,8 @@ pva_kmd_dma_use_resources(struct pva_dma_config const *dma_cfg,
dma_aux->static_dram_res_ids[dma_aux->dram_res_count] =
slot_buf->dram.resource_id;
dma_aux->dram_res_count += 1;
dma_aux->dram_res_count =
safe_addu32(dma_aux->dram_res_count, 1U);
if (rec->type != PVA_RESOURCE_TYPE_DRAM) {
pva_kmd_log_err(
@@ -505,9 +505,10 @@ static uint16_t get_slot_id(uint16_t slot)
return slot & PVA_DMA_SLOT_ID_MASK;
}
static uint8_t get_slot_flag(uint8_t transfer_mode, bool cb_enable)
static uint16_t get_slot_flag(uint8_t transfer_mode, bool cb_enable,
bool is_dst)
{
uint8_t flags = 0;
uint16_t flags = 0;
if (transfer_mode == PVA_DMA_TRANS_MODE_VMEM) {
flags |= PVA_FW_DMA_SLOT_FLAG_VMEM_DATA;
} else if (transfer_mode == PVA_DMA_TRANS_MODE_L2SRAM) {
@@ -521,6 +522,15 @@ static uint8_t get_slot_flag(uint8_t transfer_mode, bool cb_enable)
if (cb_enable) {
flags |= PVA_FW_DMA_SLOT_FLAG_CB;
}
if (is_dst) {
flags |= PVA_INSERT(PVA_ACCESS_WO,
PVA_FW_DMA_SLOT_FLAG_ACCESS_MSB,
PVA_FW_DMA_SLOT_FLAG_ACCESS_LSB);
} else {
flags |= PVA_INSERT(PVA_ACCESS_RO,
PVA_FW_DMA_SLOT_FLAG_ACCESS_MSB,
PVA_FW_DMA_SLOT_FLAG_ACCESS_LSB);
}
return flags;
}
@@ -529,7 +539,7 @@ static void update_reloc_count(uint16_t slot, uint8_t transfer_mode,
struct pva_fw_dma_slot *out_static_slots,
uint16_t num_static_slots,
struct pva_fw_dma_slot *out_dyn_slots,
uint16_t num_dyn_slots)
uint16_t num_dyn_slots, bool is_dst)
{
uint8_t slot_id = get_slot_id(slot);
@@ -537,13 +547,12 @@ static void update_reloc_count(uint16_t slot, uint8_t transfer_mode,
out_dyn_slots[slot_id].reloc_count =
safe_addu16(out_dyn_slots[slot_id].reloc_count, 1U);
out_dyn_slots[slot_id].flags |=
get_slot_flag(transfer_mode, cb_enable);
get_slot_flag(transfer_mode, cb_enable, is_dst);
} else if (slot & PVA_DMA_STATIC_SLOT) {
out_static_slots[slot_id].reloc_count =
safe_addu16(out_static_slots[slot_id].reloc_count, 1U);
;
out_static_slots[slot_id].flags |=
get_slot_flag(transfer_mode, cb_enable);
get_slot_flag(transfer_mode, cb_enable, is_dst);
}
}
@@ -567,17 +576,17 @@ static void count_relocs(struct pva_dma_config const *dma_cfg,
update_reloc_count(desc->src.slot, desc->src.transfer_mode,
desc->src.cb_enable, out_static_slots,
num_static_slots, out_dyn_slots,
num_dyn_slots);
num_dyn_slots, false);
update_reloc_count(desc->dst.slot, desc->dst.transfer_mode,
desc->dst.cb_enable, out_static_slots,
num_static_slots, out_dyn_slots,
num_dyn_slots);
num_dyn_slots, true);
update_reloc_count(desc->dst2_slot, desc->dst.transfer_mode,
desc->dst.cb_enable, out_static_slots,
num_static_slots, out_dyn_slots,
num_dyn_slots);
num_dyn_slots, true);
}
}
@@ -867,10 +876,6 @@ void pva_kmd_collect_relocs(struct pva_dma_config const *dma_cfg,
uint8_t static_reloc_off[PVA_MAX_NUM_DMA_DESC * 3];
uint8_t dyn_reloc_off[PVA_MAX_NUM_DMA_DESC * 3];
memset(out_static_slots, 0,
num_static_slots * sizeof(*out_static_slots));
memset(out_dyn_slots, 0, num_dyn_slots * sizeof(*out_dyn_slots));
/* First pass: count the number of relocates for each slot */
count_relocs(dma_cfg, out_static_slots, num_static_slots, out_dyn_slots,
num_dyn_slots);

View File

@@ -16,42 +16,23 @@
enum pva_error pva_kmd_notify_fw_set_debug_log_level(struct pva_kmd_device *pva,
uint32_t log_level)
{
struct pva_kmd_submitter *submitter = &pva->submitter;
struct pva_kmd_cmdbuf_builder builder;
struct pva_cmd_set_debug_log_level *cmd;
uint32_t fence_val;
enum pva_error err;
struct pva_cmd_set_debug_log_level cmd = { 0 };
pva_kmd_set_cmd_set_debug_log_level(&cmd, log_level);
err = pva_kmd_submitter_prepare(submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd));
ASSERT(cmd != NULL);
pva_kmd_set_cmd_set_debug_log_level(cmd, log_level);
err = pva_kmd_submitter_submit(submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("set debug log level cmd submission failed");
goto cancel_builder;
}
err = pva_kmd_submitter_wait(submitter, fence_val,
return pva_kmd_submit_cmd_sync(&pva->submitter, &cmd, sizeof(cmd),
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when setting debug log level");
goto err_out;
}
}
cancel_builder:
pva_kmd_cmdbuf_builder_cancel(&builder);
enum pva_error pva_kmd_notify_fw_set_profiling_level(struct pva_kmd_device *pva,
uint32_t level)
{
struct pva_cmd_set_profiling_level cmd = { 0 };
pva_kmd_set_cmd_set_profiling_level(&cmd, level);
err_out:
return err;
return pva_kmd_submit_cmd_sync(&pva->submitter, &cmd, sizeof(cmd),
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
}
void pva_kmd_drain_fw_print(struct pva_kmd_fw_print_buffer *print_buffer)

View File

@@ -15,6 +15,9 @@ struct pva_kmd_fw_print_buffer {
enum pva_error pva_kmd_notify_fw_set_debug_log_level(struct pva_kmd_device *pva,
uint32_t log_level);
enum pva_error pva_kmd_notify_fw_set_profiling_level(struct pva_kmd_device *pva,
uint32_t level);
void pva_kmd_drain_fw_print(struct pva_kmd_fw_print_buffer *print_buffer);
#endif // PVA_KMD_FW_DEBUG_H

View File

@@ -10,6 +10,7 @@
#include "pva_utils.h"
#include "pva_kmd_fw_profiler.h"
#include "pva_kmd_shared_buffer.h"
#include "pva_api_private.h"
// TODO: This is here temporarily just for testing. Should be moved to a common header
#define CMD_ID(x) PVA_EXTRACT(x, 6, 0, uint8_t)
@@ -101,13 +102,11 @@ void pva_kmd_device_deinit_profiler(struct pva_kmd_device *pva)
enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &pva->submitter;
struct pva_cmd_enable_fw_profiling *cmd;
struct pva_cmd_enable_fw_profiling cmd = { 0 };
uint32_t filter = 0U;
uint8_t timestamp_type = TIMESTAMP_TYPE_CYCLE_COUNT;
uint32_t fence_val;
enum pva_error err;
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_shared_buffer *profiling_buffer =
&pva->kmd_fw_buffers[PVA_PRIV_CCQ_ID];
@@ -123,26 +122,14 @@ enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva)
return PVA_SUCCESS;
}
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd));
ASSERT(cmd != NULL);
pva_kmd_set_cmd_enable_fw_profiling(cmd, filter, timestamp_type);
pva_kmd_set_cmd_enable_fw_profiling(&cmd, filter, timestamp_type);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
err = pva_kmd_submit_cmd_sync(dev_submitter, &cmd, sizeof(cmd),
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when initializing context");
goto err_out;
pva_kmd_log_err("Failed to submit command");
goto out;
}
pva->debugfs_context.g_fw_profiling_config.enabled = true;
@@ -155,38 +142,22 @@ enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva)
8 :
4;
return PVA_SUCCESS;
err_out:
out:
return err;
}
enum pva_error pva_kmd_notify_fw_disable_profiling(struct pva_kmd_device *pva)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &pva->submitter;
struct pva_cmd_disable_fw_profiling *cmd;
uint32_t fence_val;
struct pva_cmd_disable_fw_profiling cmd = { 0 };
enum pva_error err;
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd));
ASSERT(cmd != NULL);
pva_kmd_set_cmd_disable_fw_profiling(cmd);
pva_kmd_set_cmd_disable_fw_profiling(&cmd);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
err = pva_kmd_submit_cmd_sync(&pva->submitter, &cmd, sizeof(cmd),
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when initializing context");
pva_kmd_log_err("Failed to submit command");
goto err_out;
}
@@ -194,6 +165,7 @@ enum pva_error pva_kmd_notify_fw_disable_profiling(struct pva_kmd_device *pva)
pva->debugfs_context.g_fw_profiling_config.filter = 0x0;
return PVA_SUCCESS;
err_out:
return err;
}

View File

@@ -50,7 +50,7 @@ void pva_kmd_handle_hyp_msg(void *pva_dev, uint32_t const *data, uint8_t len)
memcpy(abort_msg + 2, &data[1], size);
abort_msg[PVA_FW_MSG_ABORT_STR_MAX_LEN] = '\0';
pva_kmd_log_err(abort_msg);
pva_kmd_abort(pva);
pva_kmd_abort_fw(pva);
} break;
case PVA_FW_MSG_TYPE_FLUSH_PRINT:
pva_kmd_drain_fw_print(&pva->fw_print_buffer);

View File

@@ -86,7 +86,6 @@ pva_kmd_op_memory_register_async(struct pva_kmd_context *ctx,
err = PVA_NOMEM;
goto err_out;
}
if (args->segment == PVA_MEMORY_SEGMENT_R5) {
smmu_ctx_id = PVA_R5_SMMU_CONTEXT_ID;
} else {
@@ -168,8 +167,8 @@ static enum pva_error pva_kmd_op_executable_register_async(
}
args = (struct pva_ops_executable_register *)input_buffer;
if (args->exec_size + sizeof(struct pva_ops_executable_register) >
size) {
if (args->exec_size >
(size - sizeof(struct pva_ops_executable_register))) {
pva_kmd_log_err("Executable register payload size too small");
return PVA_INVAL;
}
@@ -404,8 +403,10 @@ exit_loop:
post_fence->flags |= PVA_FW_POSTFENCE_FLAGS_USER_FENCE;
submit_error = pva_kmd_submitter_submit_with_fence(
&ctx->submitter, &cmdbuf_builder, post_fence);
ASSERT(submit_error == PVA_SUCCESS);
if (err == PVA_SUCCESS) {
err = submit_error;
}
out:
return err;
}
@@ -434,97 +435,14 @@ pva_kmd_op_context_init(struct pva_kmd_context *ctx, const void *input_buffer,
err = pva_kmd_context_init(ctx, ctx_init_args->resource_table_capacity);
ctx_init_out.error = err;
ctx_init_out.ccq_shm_hdl = (uint64_t)ctx->ccq_shm_handle;
ctx_init_out.max_cmdbuf_chunk_size =
pva_kmd_get_max_cmdbuf_chunk_size(ctx->pva);
produce_data(out_buffer, &ctx_init_out, sizeof(ctx_init_out));
return PVA_SUCCESS;
}
static enum pva_error pva_kmd_op_syncpt_register_async(
struct pva_kmd_context *ctx, const void *input_buffer,
uint32_t input_buffer_size, struct pva_kmd_ops_buffer *out_buffer,
struct pva_kmd_cmdbuf_builder *cmdbuf_builder)
{
enum pva_error err;
struct pva_syncpt_rw_info *syncpts;
struct pva_kmd_device_memory dev_mem;
uint32_t resource_id = 0;
struct pva_cmd_update_resource_table *update_cmd;
struct pva_resource_entry entry = { 0 };
struct pva_ops_response_syncpt_register syncpt_register_out = { 0 };
if (input_buffer_size != sizeof(struct pva_ops_syncpt_register)) {
pva_kmd_log_err("Syncpt register size is not correct");
return PVA_INVAL;
}
if (!access_ok(out_buffer,
sizeof(struct pva_ops_response_syncpt_register))) {
return PVA_INVAL;
}
/* Register RO syncpts */
dev_mem.iova = ctx->pva->syncpt_ro_iova;
dev_mem.va = 0;
dev_mem.size = ctx->pva->syncpt_offset * ctx->pva->num_syncpts;
dev_mem.pva = ctx->pva;
dev_mem.smmu_ctx_idx = PVA_R5_SMMU_CONTEXT_ID;
err = pva_kmd_add_syncpt_resource(&ctx->ctx_resource_table, &dev_mem,
&resource_id);
if (err != PVA_SUCCESS) {
goto err_out;
}
syncpt_register_out.syncpt_ro_res_id = resource_id;
syncpt_register_out.num_ro_syncpoints = ctx->pva->num_syncpts;
update_cmd =
pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd));
ASSERT(update_cmd != NULL);
err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id,
&entry);
ASSERT(err == PVA_SUCCESS);
pva_kmd_set_cmd_update_resource_table(
update_cmd, ctx->resource_table_id, resource_id, &entry);
/* Register RW syncpts */
pva_kmd_mutex_lock(&ctx->pva->syncpt_allocator.allocator_lock);
syncpts = (struct pva_syncpt_rw_info *)pva_kmd_get_block_unsafe(
&ctx->pva->syncpt_allocator, ctx->syncpt_block_index);
ASSERT(syncpts != NULL);
for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS_PER_CONTEXT; i++) {
ctx->syncpt_ids[i] = syncpts[i].syncpt_id;
syncpt_register_out.synpt_ids[i] = syncpts[i].syncpt_id;
}
dev_mem.iova = syncpts[0].syncpt_iova;
pva_kmd_mutex_unlock(&ctx->pva->syncpt_allocator.allocator_lock);
dev_mem.va = 0;
dev_mem.size = ctx->pva->syncpt_offset * PVA_NUM_RW_SYNCPTS_PER_CONTEXT;
dev_mem.pva = ctx->pva;
dev_mem.smmu_ctx_idx = PVA_R5_SMMU_CONTEXT_ID;
err = pva_kmd_add_syncpt_resource(&ctx->ctx_resource_table, &dev_mem,
&resource_id);
if (err != PVA_SUCCESS) {
goto err_out;
}
syncpt_register_out.syncpt_rw_res_id = resource_id;
syncpt_register_out.synpt_size = ctx->pva->syncpt_offset;
update_cmd =
pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd));
ASSERT(update_cmd != NULL);
err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id,
&entry);
ASSERT(err == PVA_SUCCESS);
pva_kmd_set_cmd_update_resource_table(
update_cmd, ctx->resource_table_id, resource_id, &entry);
err_out:
syncpt_register_out.error = err;
produce_data(out_buffer, &syncpt_register_out,
sizeof(syncpt_register_out));
return PVA_SUCCESS;
}
static enum pva_error
pva_kmd_op_queue_create(struct pva_kmd_context *ctx, const void *input_buffer,
uint32_t input_buffer_size,
@@ -532,6 +450,7 @@ pva_kmd_op_queue_create(struct pva_kmd_context *ctx, const void *input_buffer,
{
const struct pva_ops_queue_create *queue_create_args;
struct pva_ops_response_queue_create queue_out_args = { 0 };
const struct pva_syncpt_rw_info *syncpt_info;
uint32_t queue_id = PVA_INVALID_QUEUE_ID;
enum pva_error err = PVA_SUCCESS;
@@ -553,10 +472,12 @@ pva_kmd_op_queue_create(struct pva_kmd_context *ctx, const void *input_buffer,
goto out;
}
syncpt_info = pva_kmd_queue_get_rw_syncpt_info(ctx, queue_id);
queue_out_args.error = err;
queue_out_args.queue_id = queue_id;
pva_kmd_read_syncpt_val(ctx->pva, ctx->syncpt_ids[queue_id],
&queue_out_args.syncpt_fence_counter);
queue_out_args.syncpt_id = syncpt_info->syncpt_id;
pva_kmd_read_syncpt_val(ctx->pva, syncpt_info->syncpt_id,
&queue_out_args.syncpt_current_value);
out:
produce_data(out_buffer, &queue_out_args,
@@ -687,15 +608,16 @@ pva_kmd_op_synced_submit(struct pva_kmd_context *ctx, const void *input_buffer,
err = pva_kmd_submitter_submit(&ctx->submitter, &cmdbuf_builder,
&fence_val);
/* TODO: handle this error */
ASSERT(err == PVA_SUCCESS);
if (err != PVA_SUCCESS) {
goto cancel_submit;
}
err = pva_kmd_submitter_wait(&ctx->submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
goto err_out;
goto cancel_submit;
}
return PVA_SUCCESS;
@@ -758,11 +680,6 @@ pva_kmd_sync_ops_handler(struct pva_kmd_context *ctx,
ctx, input_buffer, input_buffer_size, out_arg,
pva_kmd_op_memory_register_async);
break;
case PVA_OPS_OPCODE_SYNCPT_REGISTER:
err = pva_kmd_op_synced_submit(
ctx, input_buffer, input_buffer_size, out_arg,
pva_kmd_op_syncpt_register_async);
break;
case PVA_OPS_OPCODE_EXECUTABLE_REGISTER:
err = pva_kmd_op_synced_submit(
ctx, input_buffer, input_buffer_size, out_arg,
@@ -798,11 +715,6 @@ enum pva_error pva_kmd_ops_handler(struct pva_kmd_context *ctx,
struct pva_kmd_ops_buffer in_buffer = { 0 }, out_buffer = { 0 };
enum pva_error err = PVA_SUCCESS;
if (ctx->pva->recovery) {
pva_kmd_log_err("PVA firmware aborted. No KMD ops allowed.");
return PVA_ERR_FW_ABORTED;
}
in_buffer.base = ops_buffer;
in_buffer.size = ops_size;

View File

@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#include "pva_kmd_utils.h"
#include "pva_fw.h"
#include "pva_kmd_device_memory.h"
@@ -14,11 +15,8 @@
enum pva_error pva_kmd_prepare_suspend(struct pva_kmd_device *pva)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &pva->submitter;
enum pva_error err = PVA_SUCCESS;
struct pva_cmd_suspend_fw *fw_suspend;
uint32_t fence_val;
struct pva_cmd_suspend_fw cmd = { 0 };
pva_kmd_mutex_lock(&pva->powercycle_lock);
if (pva->refcount == 0u) {
@@ -27,44 +25,16 @@ enum pva_error pva_kmd_prepare_suspend(struct pva_kmd_device *pva)
goto err_out;
}
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"PVA: Prepare submitter for FW suspend command failed\n");
goto err_out;
}
pva_kmd_set_cmd_suspend_fw(&cmd);
//Build args
fw_suspend = pva_kmd_reserve_cmd_space(&builder, sizeof(*fw_suspend));
if (fw_suspend == NULL) {
pva_kmd_log_err(
"PVA: Memory alloc for FW suspend command failed\n");
err = PVA_NOMEM;
goto cancel_submit;
}
pva_kmd_set_cmd_suspend_fw(fw_suspend);
//Submit
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"PVA: Submission for FW suspend command failed\n");
goto cancel_submit;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
err = pva_kmd_submit_cmd_sync(&pva->submitter, &cmd, sizeof(cmd),
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"PVA: Waiting for FW timed out when preparing for suspend state\n");
pva_kmd_log_err("PVA: Failed to submit FW suspend command\n");
goto err_out;
}
cancel_submit:
pva_kmd_cmdbuf_builder_cancel(&builder);
err_out:
pva_kmd_mutex_unlock(&pva->powercycle_lock);
return err;
@@ -77,9 +47,11 @@ enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva)
struct pva_cmd_init_resource_table *res_cmd;
struct pva_cmd_init_queue *queue_cmd;
struct pva_cmd_resume_fw *fw_resume;
struct pva_cmd_init_shared_dram_buffer *shared_buf_cmd;
enum pva_error err;
uint32_t fence_val;
struct pva_kmd_queue *queue;
const struct pva_syncpt_rw_info *syncpt_info;
pva_kmd_mutex_lock(&pva->powercycle_lock);
if (pva->refcount == 0u) {
@@ -89,8 +61,10 @@ enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva)
goto err_out;
}
pva_kmd_send_resource_table_info_by_ccq(pva, &pva->dev_resource_table);
pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue);
err = pva_kmd_config_fw_after_boot(pva);
if (err != PVA_SUCCESS) {
goto err_out;
}
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
@@ -140,14 +114,38 @@ enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva)
goto cancel_builder;
}
/* Initialize shared buffer */
shared_buf_cmd = pva_kmd_reserve_cmd_space(
&builder, sizeof(*shared_buf_cmd));
if (shared_buf_cmd == NULL) {
pva_kmd_log_err(
"PVA: Memory alloc for shared buffer registration in FW resume command failed\n");
err = PVA_NOMEM;
goto cancel_builder;
}
pva_dbg_printf(
"PVA: Resume shared buffer for context %d\n",
ctx->ccq_id);
pva_kmd_set_cmd_init_shared_dram_buffer(
shared_buf_cmd, ctx->ccq_id,
pva->kmd_fw_buffers[ctx->ccq_id]
.resource_memory->iova,
pva->kmd_fw_buffers[ctx->ccq_id]
.resource_memory->size);
pva_dbg_printf(
"PVA: Resume priv queue for context %d\n",
ctx->ccq_id);
syncpt_info = pva_kmd_queue_get_rw_syncpt_info(
PVA_PRIV_CCQ_ID, ctx->ccq_id);
pva_kmd_set_cmd_init_queue(
queue_cmd, PVA_PRIV_CCQ_ID,
ctx->ccq_id, /* For privileged queues, queue ID == user CCQ ID*/
ctx->ctx_queue.queue_memory->iova,
ctx->ctx_queue.max_num_submit);
ctx->ctx_queue.max_num_submit,
syncpt_info->syncpt_id,
syncpt_info->syncpt_iova);
/**Initialize resource table */
for (uint32_t j = 0; j < ctx->max_n_queues; j++) {
@@ -168,11 +166,16 @@ enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva)
goto cancel_builder;
}
syncpt_info =
pva_kmd_queue_get_rw_syncpt_info(
ctx, queue->queue_id);
pva_kmd_set_cmd_init_queue(
queue_cmd, queue->ccq_id,
queue->queue_id,
queue->queue_memory->iova,
queue->max_num_submit);
queue->max_num_submit,
syncpt_info->syncpt_id,
syncpt_info->syncpt_iova);
}
pva_kmd_mutex_unlock(
&ctx->queue_allocator.allocator_lock);
@@ -194,9 +197,12 @@ enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva)
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when resuming from suspend state");
goto err_out;
goto cancel_builder;
}
pva_kmd_mutex_unlock(&pva->powercycle_lock);
return PVA_SUCCESS;
cancel_builder:
pva_kmd_cmdbuf_builder_cancel(&builder);

View File

@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#include "pva_constants.h"
#include "pva_kmd.h"
#include "pva_kmd_utils.h"
#include "pva_fw.h"
@@ -74,48 +75,23 @@ pva_kmd_queue_submit(struct pva_kmd_queue *queue,
return err;
}
void pva_kmd_queue_deinit(struct pva_kmd_queue *queue)
{
queue->queue_memory = NULL;
queue->ccq_id = PVA_INVALID_QUEUE_ID;
queue->max_num_submit = 0;
}
static enum pva_error notify_fw_queue_deinit(struct pva_kmd_context *ctx,
struct pva_kmd_queue *queue)
{
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_cmdbuf_builder builder;
struct pva_cmd_deinit_queue *queue_cmd;
uint32_t fence_val;
struct pva_cmd_deinit_queue cmd = { 0 };
enum pva_error err;
err = pva_kmd_submitter_prepare(&ctx->submitter, &builder);
if (err != PVA_SUCCESS) {
goto end;
}
pva_kmd_set_cmd_deinit_queue(&cmd, queue->ccq_id, queue->queue_id);
queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd));
if (queue_cmd == NULL) {
err = PVA_NOMEM;
goto cancel_submitter;
}
pva_kmd_set_cmd_deinit_queue(queue_cmd, queue->ccq_id, queue->queue_id);
err = pva_kmd_submitter_submit(&ctx->submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
goto cancel_submitter;
}
err = pva_kmd_submitter_wait(&ctx->submitter, fence_val,
err = pva_kmd_submit_cmd_sync(&ctx->submitter, &cmd, sizeof(cmd),
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
goto end;
}
return PVA_SUCCESS;
cancel_submitter:
pva_kmd_cmdbuf_builder_cancel(&builder);
end:
return err;
}
@@ -126,10 +102,9 @@ enum pva_error pva_kmd_queue_create(struct pva_kmd_context *ctx,
{
struct pva_kmd_device_memory *submission_mem_kmd = NULL;
struct pva_kmd_queue *queue = NULL;
struct pva_kmd_cmdbuf_builder builder;
struct pva_cmd_init_queue *queue_cmd;
uint32_t fence_val;
struct pva_cmd_init_queue cmd = { 0 };
enum pva_error err, tmperr;
const struct pva_syncpt_rw_info *syncpt_info;
queue = pva_kmd_zalloc_block(&ctx->queue_allocator, queue_id);
if (queue == NULL) {
@@ -160,42 +135,26 @@ enum pva_error pva_kmd_queue_create(struct pva_kmd_context *ctx,
goto err_free_kmd_memory;
}
err = pva_kmd_submitter_prepare(&ctx->submitter, &builder);
syncpt_info = pva_kmd_queue_get_rw_syncpt_info(ctx, queue->queue_id);
pva_kmd_set_cmd_init_queue(&cmd, queue->ccq_id, queue->queue_id,
queue->queue_memory->iova,
queue->max_num_submit,
syncpt_info->syncpt_id,
syncpt_info->syncpt_iova);
err = pva_kmd_submit_cmd_sync(&ctx->submitter, &cmd, sizeof(cmd),
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
goto unmap_iova;
}
queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd));
if (queue_cmd == NULL) {
err = PVA_NOMEM;
goto cancel_submitter;
}
ASSERT(queue_cmd != NULL);
pva_kmd_set_cmd_init_queue(queue_cmd, queue->ccq_id, queue->queue_id,
queue->queue_memory->iova,
queue->max_num_submit);
err = pva_kmd_submitter_submit(&ctx->submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
goto cancel_submitter;
}
err = pva_kmd_submitter_wait(&ctx->submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
goto cancel_submitter;
}
return PVA_SUCCESS;
cancel_submitter:
pva_kmd_cmdbuf_builder_cancel(&builder);
unmap_iova:
pva_kmd_device_memory_iova_unmap(submission_mem_kmd);
err_free_kmd_memory:
pva_kmd_device_memory_free(queue->queue_memory);
pva_kmd_queue_deinit(queue);
err_free_queue:
tmperr = pva_kmd_free_block(&ctx->queue_allocator, *queue_id);
ASSERT(tmperr == PVA_SUCCESS);
@@ -210,35 +169,40 @@ enum pva_error pva_kmd_queue_destroy(struct pva_kmd_context *ctx,
{
struct pva_kmd_queue *queue;
enum pva_error err = PVA_SUCCESS;
enum pva_error tmp_err;
/*
* TODO :
* Send command to FW to stop queue usage. Wait for ack.
* This call needs to be added after syncpoint and ccq functions are ready.
*/
pva_kmd_mutex_lock(&ctx->queue_allocator.allocator_lock);
queue = pva_kmd_get_block_unsafe(&ctx->queue_allocator, queue_id);
if (queue == NULL) {
pva_kmd_mutex_unlock(&ctx->queue_allocator.allocator_lock);
return PVA_INVAL;
pva_kmd_log_err("Destroying non-existent queue");
err = PVA_INVAL;
goto unlock;
}
if (!ctx->pva->recovery) {
err = notify_fw_queue_deinit(ctx, queue);
if (err != PVA_SUCCESS) {
pva_kmd_mutex_unlock(
&ctx->queue_allocator.allocator_lock);
return err;
}
//Might happen if FW is aborted. It's safe to keep going.
pva_kmd_log_err("Failed to notify FW to destroy queue");
}
pva_kmd_device_memory_iova_unmap(queue->queue_memory);
pva_kmd_device_memory_free(queue->queue_memory);
pva_kmd_queue_deinit(queue);
tmp_err = pva_kmd_free_block_unsafe(&ctx->queue_allocator, queue_id);
// This cannot fail as we have already checked for queue existence and we
// are still holding the lock
ASSERT(tmp_err == PVA_SUCCESS);
unlock:
pva_kmd_mutex_unlock(&ctx->queue_allocator.allocator_lock);
err = pva_kmd_free_block(&ctx->queue_allocator, queue_id);
ASSERT(err == PVA_SUCCESS);
return PVA_SUCCESS;
return err;
}
const struct pva_syncpt_rw_info *
pva_kmd_queue_get_rw_syncpt_info(struct pva_kmd_context *ctx, uint8_t queue_id)
{
uint8_t ctx_offset =
safe_mulu32(ctx->ccq_id, PVA_NUM_RW_SYNCPTS_PER_CONTEXT);
uint32_t syncpt_index = safe_addu32(ctx_offset, queue_id);
ASSERT(syncpt_index < PVA_NUM_RW_SYNCPTS);
return &ctx->pva->rw_syncpts[syncpt_index];
}

View File

@@ -33,6 +33,8 @@ enum pva_error
pva_kmd_queue_submit(struct pva_kmd_queue *queue,
struct pva_fw_cmdbuf_submit_info const *submit_info);
uint32_t pva_kmd_queue_space(struct pva_kmd_queue *queue);
void pva_kmd_queue_deinit(struct pva_kmd_queue *queue);
const struct pva_syncpt_rw_info *
pva_kmd_queue_get_rw_syncpt_info(struct pva_kmd_context *ctx, uint8_t queue_id);
#endif // PVA_KMD_QUEUE_H

View File

@@ -46,8 +46,7 @@ static uint32_t get_max_dma_config_size(struct pva_kmd_device *pva)
enum pva_error
pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table,
struct pva_kmd_device *pva,
uint8_t user_smmu_ctx_id, uint32_t n_entries,
uint32_t max_num_dma_configs)
uint8_t user_smmu_ctx_id, uint32_t n_entries)
{
uint32_t max_dma_config_size = get_max_dma_config_size(pva);
enum pva_error err;
@@ -56,45 +55,55 @@ pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table,
res_table->pva = pva;
res_table->n_entries = n_entries;
res_table->user_smmu_ctx_id = user_smmu_ctx_id;
pva_kmd_sema_init(&res_table->resource_semaphore, n_entries);
pva_kmd_mutex_init(&res_table->resource_table_lock);
size = (uint64_t)safe_mulu32(
n_entries, (uint32_t)sizeof(struct pva_resource_entry));
res_table->table_mem = pva_kmd_device_memory_alloc_map(
size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
ASSERT(res_table->table_mem != NULL);
pva_kmd_sema_init(&res_table->resource_semaphore, n_entries);
pva_kmd_mutex_init(&res_table->resource_table_lock);
if (res_table->table_mem == NULL) {
err = PVA_NOMEM;
goto deinit_locks;
}
size = (uint64_t)safe_mulu32(sizeof(struct pva_kmd_resource_record),
n_entries);
res_table->records_mem = pva_kmd_zalloc(size);
ASSERT(res_table->records_mem != NULL);
if (res_table->records_mem == NULL) {
err = PVA_NOMEM;
goto free_table_mem;
}
err = pva_kmd_block_allocator_init(
&res_table->resource_record_allocator, res_table->records_mem,
PVA_RESOURCE_ID_BASE, sizeof(struct pva_kmd_resource_record),
n_entries);
ASSERT(err == PVA_SUCCESS);
if (err != PVA_SUCCESS) {
goto free_records_mem;
}
size = (uint64_t)safe_mulu32(max_num_dma_configs, max_dma_config_size);
res_table->dma_config_mem = pva_kmd_device_memory_alloc_map(
size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
ASSERT(res_table->dma_config_mem != NULL);
err = pva_kmd_block_allocator_init(&res_table->dma_config_allocator,
res_table->dma_config_mem->va, 0,
err = pva_kmd_devmem_pool_init(&res_table->dma_config_pool, pva,
PVA_R5_SMMU_CONTEXT_ID,
max_dma_config_size,
max_num_dma_configs);
ASSERT(err == PVA_SUCCESS);
res_table->dma_aux = pva_kmd_zalloc(
safe_mulu32((uint32_t)sizeof(struct pva_kmd_dma_resource_aux),
max_num_dma_configs));
ASSERT(res_table->dma_aux != NULL);
PVA_KMD_DMA_CONFIG_POOL_INCR);
if (err != PVA_SUCCESS) {
goto free_resource_record_allocator;
}
return PVA_SUCCESS;
free_resource_record_allocator:
pva_kmd_block_allocator_deinit(&res_table->resource_record_allocator);
free_records_mem:
pva_kmd_free(res_table->records_mem);
free_table_mem:
pva_kmd_device_memory_free(res_table->table_mem);
deinit_locks:
pva_kmd_mutex_deinit(&res_table->resource_table_lock);
pva_kmd_sema_deinit(&res_table->resource_semaphore);
return err;
}
static struct pva_kmd_resource_record *
@@ -118,7 +127,7 @@ pva_kmd_alloc_resource_id(struct pva_kmd_resource_table *resource_table,
goto out;
}
rec = (struct pva_kmd_resource_record *)pva_kmd_alloc_block(
rec = (struct pva_kmd_resource_record *)pva_kmd_zalloc_block(
&resource_table->resource_record_allocator, out_resource_id);
ASSERT(rec != NULL);
@@ -141,9 +150,8 @@ pva_kmd_free_resource_id(struct pva_kmd_resource_table *resource_table,
static void
pva_kmd_release_resource(struct pva_kmd_resource_table *resource_table,
uint32_t resource_id)
uint32_t resource_id, bool drop_dma_reference)
{
enum pva_error err;
struct pva_kmd_resource_record *rec = pva_kmd_get_block_unsafe(
&resource_table->resource_record_allocator, resource_id);
@@ -151,9 +159,7 @@ pva_kmd_release_resource(struct pva_kmd_resource_table *resource_table,
switch (rec->type) {
case PVA_RESOURCE_TYPE_DRAM:
if (rec->dram.syncpt != true) {
pva_kmd_device_memory_free(rec->dram.mem);
}
break;
case PVA_RESOURCE_TYPE_EXEC_BIN:
pva_kmd_unload_executable(&rec->vpu_bin.symbol_table,
@@ -161,12 +167,12 @@ pva_kmd_release_resource(struct pva_kmd_resource_table *resource_table,
rec->vpu_bin.sections_mem);
break;
case PVA_RESOURCE_TYPE_DMA_CONFIG: {
struct pva_kmd_dma_resource_aux *dma_aux;
dma_aux = &resource_table->dma_aux[rec->dma_config.block_index];
pva_kmd_unload_dma_config_unsafe(dma_aux);
err = pva_kmd_free_block(&resource_table->dma_config_allocator,
rec->dma_config.block_index);
ASSERT(err == PVA_SUCCESS);
if (drop_dma_reference) {
pva_kmd_unload_dma_config_unsafe(
rec->dma_config.aux_mem);
}
pva_kmd_free(rec->dma_config.aux_mem);
pva_kmd_devmem_pool_free(&rec->dma_config.devmem);
break;
}
@@ -177,33 +183,6 @@ pva_kmd_release_resource(struct pva_kmd_resource_table *resource_table,
pva_kmd_free_resource_id(resource_table, resource_id);
}
enum pva_error
pva_kmd_add_syncpt_resource(struct pva_kmd_resource_table *resource_table,
struct pva_kmd_device_memory *dev_mem,
uint32_t *out_resource_id)
{
struct pva_kmd_resource_record *rec =
pva_kmd_alloc_resource_id(resource_table, out_resource_id);
if (rec == NULL) {
pva_kmd_log_err("No more resource id");
return PVA_NO_RESOURCE_ID;
}
pva_kmd_mutex_lock(&resource_table->resource_table_lock);
if (*out_resource_id > resource_table->curr_max_resource_id) {
resource_table->curr_max_resource_id = *out_resource_id;
}
pva_kmd_mutex_unlock(&resource_table->resource_table_lock);
rec->type = PVA_RESOURCE_TYPE_DRAM;
rec->dram.mem = dev_mem;
rec->dram.syncpt = true;
rec->ref_count = 1;
return PVA_SUCCESS;
}
enum pva_error
pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table,
struct pva_kmd_device_memory *dev_mem,
@@ -225,7 +204,6 @@ pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table,
rec->type = PVA_RESOURCE_TYPE_DRAM;
rec->dram.mem = dev_mem;
rec->dram.syncpt = false;
rec->ref_count = 1;
return PVA_SUCCESS;
@@ -271,6 +249,7 @@ void pva_kmd_update_fw_resource_table(struct pva_kmd_resource_table *res_table)
entry->size_lo = iova_lo(rec->dram.mem->size);
entry->size_hi = iova_hi(rec->dram.mem->size);
entry->smmu_context_id = rec->dram.mem->smmu_ctx_idx;
entry->access_flags = rec->dram.mem->iova_access_flags;
break;
case PVA_RESOURCE_TYPE_INVALID:
break;
@@ -349,7 +328,7 @@ void pva_kmd_drop_resource_unsafe(struct pva_kmd_resource_table *resource_table,
rec->ref_count = safe_subu32(rec->ref_count, 1U);
if (rec->ref_count == 0) {
pva_kmd_release_resource(resource_table, resource_id);
pva_kmd_release_resource(resource_table, resource_id, true);
}
}
@@ -414,6 +393,7 @@ pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table,
entry->size_lo = iova_lo(rec->dram.mem->size);
entry->size_hi = iova_hi(rec->dram.mem->size);
entry->smmu_context_id = rec->dram.mem->smmu_ctx_idx;
entry->access_flags = rec->dram.mem->iova_access_flags;
break;
case PVA_RESOURCE_TYPE_EXEC_BIN:
entry->type = rec->type;
@@ -423,6 +403,7 @@ pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table,
entry->size_hi = iova_hi(rec->vpu_bin.metainfo_mem->size);
entry->smmu_context_id =
rec->vpu_bin.metainfo_mem->smmu_ctx_idx;
entry->access_flags = PVA_ACCESS_RO;
break;
case PVA_RESOURCE_TYPE_DMA_CONFIG:
entry->type = rec->type;
@@ -431,6 +412,7 @@ pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table,
entry->size_lo = iova_lo(rec->dma_config.size);
entry->size_hi = iova_hi(rec->dma_config.size);
entry->smmu_context_id = PVA_R5_SMMU_CONTEXT_ID;
entry->access_flags = PVA_ACCESS_RO;
break;
default:
pva_kmd_log_err("Unsupported resource type");
@@ -447,24 +429,30 @@ enum pva_error pva_kmd_add_dma_config_resource(
uint32_t dma_config_size, uint32_t *out_resource_id)
{
enum pva_error err = PVA_SUCCESS;
uint32_t block_idx, fw_fetch_size;
uint32_t fw_fetch_size;
void *fw_dma_cfg;
struct pva_kmd_dma_resource_aux *dma_aux;
struct pva_kmd_resource_record *rec;
uint32_t res_id;
struct pva_kmd_devmem_element dma_cfg_mem = { 0 };
fw_dma_cfg = pva_kmd_zalloc_block(&resource_table->dma_config_allocator,
&block_idx);
if (fw_dma_cfg == NULL) {
err = PVA_NOMEM;
err = pva_kmd_devmem_pool_zalloc(&resource_table->dma_config_pool,
&dma_cfg_mem);
if (err != PVA_SUCCESS) {
goto err_out;
}
fw_dma_cfg = pva_kmd_get_devmem_va(&dma_cfg_mem);
// Must satisfy alignment requirement for converting to struct
// pva_dma_config_resource*
ASSERT(((uintptr_t)fw_dma_cfg) % sizeof(uint64_t) == 0);
dma_aux = &resource_table->dma_aux[block_idx];
dma_aux = pva_kmd_zalloc(sizeof(struct pva_kmd_dma_resource_aux));
if (dma_aux == NULL) {
err = PVA_NOMEM;
goto free_dma_cfg_mem;
}
dma_aux->res_table = resource_table;
pva_kmd_mutex_lock(&resource_table->resource_table_lock);
err = pva_kmd_load_dma_config(resource_table, dma_cfg_hdr,
@@ -472,7 +460,7 @@ enum pva_error pva_kmd_add_dma_config_resource(
&fw_fetch_size);
pva_kmd_mutex_unlock(&resource_table->resource_table_lock);
if (err != PVA_SUCCESS) {
goto free_block;
goto free_dma_aux;
}
rec = pva_kmd_alloc_resource_id(resource_table, &res_id);
@@ -489,12 +477,9 @@ enum pva_error pva_kmd_add_dma_config_resource(
rec->type = PVA_RESOURCE_TYPE_DMA_CONFIG;
rec->ref_count = 1;
rec->dma_config.block_index = block_idx;
rec->dma_config.iova_addr = safe_addu64(
resource_table->dma_config_mem->iova,
(uint64_t)safe_mulu32(
block_idx,
resource_table->dma_config_allocator.block_size));
rec->dma_config.devmem = dma_cfg_mem;
rec->dma_config.aux_mem = dma_aux;
rec->dma_config.iova_addr = pva_kmd_get_devmem_iova(&dma_cfg_mem);
rec->dma_config.size = fw_fetch_size;
*out_resource_id = res_id;
@@ -504,8 +489,10 @@ unload_dma:
pva_kmd_mutex_lock(&resource_table->resource_table_lock);
pva_kmd_unload_dma_config_unsafe(dma_aux);
pva_kmd_mutex_unlock(&resource_table->resource_table_lock);
free_block:
pva_kmd_free_block(&resource_table->dma_config_allocator, block_idx);
free_dma_aux:
pva_kmd_free(dma_aux);
free_dma_cfg_mem:
pva_kmd_devmem_pool_free(&dma_cfg_mem);
err_out:
return err;
}
@@ -523,7 +510,7 @@ pva_kmd_release_all_resources(struct pva_kmd_resource_table *res_table)
struct pva_kmd_resource_record *rec =
pva_kmd_peek_resource(res_table, id);
if (rec != NULL) {
pva_kmd_release_resource(res_table, id);
pva_kmd_release_resource(res_table, id, false);
}
}
pva_kmd_mutex_unlock(&res_table->resource_table_lock);
@@ -533,11 +520,9 @@ pva_kmd_release_all_resources(struct pva_kmd_resource_table *res_table)
void pva_kmd_resource_table_deinit(struct pva_kmd_resource_table *res_table)
{
pva_kmd_release_all_resources(res_table);
pva_kmd_free(res_table->dma_aux);
pva_kmd_block_allocator_deinit(&res_table->dma_config_allocator);
pva_kmd_device_memory_free(res_table->dma_config_mem);
pva_kmd_block_allocator_deinit(&res_table->resource_record_allocator);
pva_kmd_free(res_table->records_mem);
pva_kmd_devmem_pool_deinit(&res_table->dma_config_pool);
pva_kmd_mutex_deinit(&res_table->resource_table_lock);
pva_kmd_sema_deinit(&res_table->resource_semaphore);
pva_kmd_device_memory_free(res_table->table_mem);

View File

@@ -14,12 +14,12 @@
#include "pva_kmd_dma_cfg.h"
#include "pva_kmd_mutex.h"
#include "pva_kmd_thread_sema.h"
#include "pva_kmd_devmem_pool.h"
struct pva_kmd_device;
struct pva_kmd_dram_resource {
struct pva_kmd_device_memory *mem;
bool syncpt;
};
struct pva_kmd_vpu_bin_resource {
@@ -29,7 +29,8 @@ struct pva_kmd_vpu_bin_resource {
};
struct pva_kmd_dma_config_resource {
uint32_t block_index;
struct pva_kmd_devmem_element devmem;
struct pva_kmd_dma_resource_aux *aux_mem;
uint64_t size;
uint64_t iova_addr;
};
@@ -70,13 +71,8 @@ struct pva_kmd_resource_table {
/** Memory for resource table entries, in R5 segment */
struct pva_kmd_device_memory *table_mem;
/** Memory for fw dma configs, in DMA segment */
struct pva_kmd_device_memory *dma_config_mem;
struct pva_kmd_block_allocator dma_config_allocator;
/** Memory for tracking resources used by DMA configuration. Single
* allocation shared by all DMA configs */
struct pva_kmd_dma_resource_aux *dma_aux;
/** Pool for FW DMA configurations */
struct pva_kmd_devmem_pool dma_config_pool;
/** Memory for resource records */
void *records_mem;
@@ -88,8 +84,7 @@ struct pva_kmd_resource_table {
enum pva_error
pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table,
struct pva_kmd_device *pva,
uint8_t user_smmu_ctx_id, uint32_t n_entries,
uint32_t max_num_dma_configs);
uint8_t user_smmu_ctx_id, uint32_t n_entries);
void pva_kmd_resource_table_deinit(struct pva_kmd_resource_table *res_table);
/** KMD only writes to FW resource table during init time. Once the address of
@@ -97,11 +92,6 @@ void pva_kmd_resource_table_deinit(struct pva_kmd_resource_table *res_table);
*/
void pva_kmd_update_fw_resource_table(struct pva_kmd_resource_table *res_table);
enum pva_error
pva_kmd_add_syncpt_resource(struct pva_kmd_resource_table *resource_table,
struct pva_kmd_device_memory *dev_mem,
uint32_t *out_resource_id);
enum pva_error
pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table,
struct pva_kmd_device_memory *memory,

View File

@@ -2,6 +2,7 @@
// SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#include "pva_kmd_sha256.h"
#include "pva_math_utils.h"
#define ROTLEFT(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
#define ROTRIGHT(a, b) (((a) >> (b)) | ((a) << (32 - (b))))
@@ -58,9 +59,11 @@ static void sha256_transform(struct sha256_ctx *ctx, const void *data_in)
m[i] = SWAP32(data[i]);
}
for (i = 0; i < U32(64) - U32(16); ++i) {
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
m[i + U32(16)] = SIG1(m[U32(14) + i]) + m[U32(9) + i] +
SIG0(m[U32(1) + i]) + m[i];
m[i + U32(16)] = safe_wrap_add_u32(
safe_wrap_add_u32(safe_wrap_add_u32(SIG1(m[U32(14) + i]),
m[U32(9) + i]),
SIG0(m[U32(1) + i])),
m[i]);
}
a = ctx->state[0];
@@ -73,38 +76,32 @@ static void sha256_transform(struct sha256_ctx *ctx, const void *data_in)
h = ctx->state[7];
for (i = 0; i < U32(64); ++i) {
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
t1 = h + SHA_EP1(e) + CH(e, f, g) + k[i] + m[i];
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
t2 = SHA_EP0(a) + MAJ(a, b, c);
t1 = safe_wrap_add_u32(
safe_wrap_add_u32(
safe_wrap_add_u32(safe_wrap_add_u32(h,
SHA_EP1(e)),
CH(e, f, g)),
k[i]),
m[i]);
t2 = safe_wrap_add_u32(SHA_EP0(a), MAJ(a, b, c));
h = g;
g = f;
f = e;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
e = d + t1;
e = safe_wrap_add_u32(d, t1);
d = c;
c = b;
b = a;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
a = t1 + t2;
a = safe_wrap_add_u32(t1, t2);
}
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[0] += a;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[1] += b;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[2] += c;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[3] += d;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[4] += e;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[5] += f;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[6] += g;
/* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */
ctx->state[7] += h;
ctx->state[0] = safe_wrap_add_u32(ctx->state[0], a);
ctx->state[1] = safe_wrap_add_u32(ctx->state[1], b);
ctx->state[2] = safe_wrap_add_u32(ctx->state[2], c);
ctx->state[3] = safe_wrap_add_u32(ctx->state[3], d);
ctx->state[4] = safe_wrap_add_u32(ctx->state[4], e);
ctx->state[5] = safe_wrap_add_u32(ctx->state[5], f);
ctx->state[6] = safe_wrap_add_u32(ctx->state[6], g);
ctx->state[7] = safe_wrap_add_u32(ctx->state[7], h);
}
void sha256_init(struct sha256_ctx *ctx)
@@ -127,7 +124,8 @@ void sha256_update(struct sha256_ctx *ctx, const void *data, size_t len)
for (i = 0; i < len; i += U32(64)) {
ctx->bitlen &= U32(0xffffffff);
sha256_transform(ctx, ((const uint8_t *)data) + i);
ctx->bitlen += U32(512);
ctx->bitlen =
safe_wrap_add_u32((uint32_t)ctx->bitlen, U32(512));
}
}
@@ -148,7 +146,9 @@ void sha256_finalize(struct sha256_ctx *ctx, const void *input,
/* the false of this condition is illegal for this API agreement */
/* this check is here only for Coverity INT30-C */
ctx->bitlen += input_size * U32(8);
ctx->bitlen = safe_wrap_add_u32((uint32_t)ctx->bitlen,
safe_wrap_mul_u32((uint32_t)input_size,
U32(8)));
(void)memcpy(p, input, input_size);
data[input_size] = 0x80;

View File

@@ -7,82 +7,6 @@
#include "pva_kmd_shim_trace_event.h"
#include "pva_kmd_shared_buffer.h"
static void
setup_cmd_init_shared_dram_buffer(void *cmd, uint8_t interface,
struct pva_kmd_shared_buffer *fw_buffer)
{
struct pva_cmd_init_shared_dram_buffer *init_cmd =
(struct pva_cmd_init_shared_dram_buffer *)cmd;
pva_kmd_set_cmd_init_shared_dram_buffer(
init_cmd, interface, fw_buffer->resource_memory->iova,
fw_buffer->resource_memory->size);
}
static void
setup_cmd_deinit_shared_dram_buffer(void *cmd, uint8_t interface,
struct pva_kmd_shared_buffer *fw_buffer)
{
struct pva_cmd_deinit_shared_dram_buffer *deinit_cmd =
(struct pva_cmd_deinit_shared_dram_buffer *)cmd;
pva_kmd_set_cmd_deinit_shared_dram_buffer(deinit_cmd, interface);
}
static enum pva_error
notify_fw(struct pva_kmd_device *pva, uint8_t interface,
void (*setup_cmd_cb)(void *cmd, uint8_t interface,
struct pva_kmd_shared_buffer *fw_buffer),
size_t cmd_size)
{
enum pva_error err;
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &pva->submitter;
struct pva_kmd_shared_buffer *fw_buffer;
void *cmd_space;
uint32_t fence_val;
ASSERT(interface < PVA_MAX_NUM_CCQ);
fw_buffer = &pva->kmd_fw_buffers[interface];
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
// Make sure FW buffer was allocated
ASSERT(fw_buffer->header != NULL);
cmd_space = pva_kmd_reserve_cmd_space(&builder, cmd_size);
ASSERT(cmd_space != NULL);
// Let the setup callback configure the specific command
setup_cmd_cb(cmd_space, interface, fw_buffer);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
// Error is either QUEUE_FULL or TIMEDOUT
goto cancel_builder;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out while processing buffer command");
goto err_out;
}
return PVA_SUCCESS;
cancel_builder:
pva_kmd_cmdbuf_builder_cancel(&builder);
err_out:
return err;
}
enum pva_error pva_kmd_shared_buffer_init(struct pva_kmd_device *pva,
uint8_t interface,
uint32_t element_size,
@@ -95,17 +19,24 @@ enum pva_error pva_kmd_shared_buffer_init(struct pva_kmd_device *pva,
struct pva_kmd_device_memory *device_memory;
struct pva_kmd_shared_buffer *buffer;
uint64_t buffer_size;
struct pva_cmd_init_shared_dram_buffer init_cmd = { 0 };
ASSERT(interface < PVA_MAX_NUM_CCQ);
buffer = &pva->kmd_fw_buffers[interface];
// If the buffer is already initialized, skip buffer allocation and just notify FW.
// This is needed to support suspend/resume.
if (buffer->header == NULL) {
// Ensure that the buffer body is a multiple of 'element size'
buffer_size = safe_mulu64(num_entries, element_size);
buffer_size = safe_addu64(buffer_size,
buffer_size =
safe_addu64(buffer_size,
sizeof(struct pva_fw_shared_buffer_header));
device_memory = pva_kmd_device_memory_alloc_map(
buffer_size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID);
device_memory =
pva_kmd_device_memory_alloc_map(buffer_size, pva,
PVA_ACCESS_RW,
PVA_R5_SMMU_CONTEXT_ID);
if (device_memory == NULL) {
return PVA_NOMEM;
}
@@ -116,8 +47,8 @@ enum pva_error pva_kmd_shared_buffer_init(struct pva_kmd_device *pva,
buffer->header->element_size = element_size;
buffer->header->head = 0U;
buffer->header->tail = 0U;
buffer->body =
(pva_offset_pointer(buffer->header, sizeof(*buffer->header)));
buffer->body = (pva_offset_pointer(buffer->header,
sizeof(*buffer->header)));
buffer->lock_cb = lock_cb;
buffer->unlock_cb = unlock_cb;
buffer->resource_offset = 0U;
@@ -125,12 +56,24 @@ enum pva_error pva_kmd_shared_buffer_init(struct pva_kmd_device *pva,
err = pva_kmd_bind_shared_buffer_handler(pva, interface, pva);
if (err != PVA_SUCCESS) {
pva_kmd_log_err_u64(
"Failed to bind shared buffer handler for interface",
interface);
goto free_buffer_memory;
}
} else {
device_memory = buffer->resource_memory;
}
err = notify_fw(pva, interface, setup_cmd_init_shared_dram_buffer,
sizeof(struct pva_cmd_init_shared_dram_buffer));
pva_kmd_set_cmd_init_shared_dram_buffer(
&init_cmd, interface, device_memory->iova, device_memory->size);
err = pva_kmd_submit_cmd_sync(&pva->submitter, &init_cmd,
sizeof(init_cmd),
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Failed to submit command");
goto release_handler;
}
@@ -140,6 +83,8 @@ release_handler:
pva_kmd_release_shared_buffer_handler(pva, interface);
free_buffer_memory:
pva_kmd_device_memory_free(device_memory);
buffer->header = NULL;
buffer->resource_memory = NULL;
return err;
}
@@ -148,22 +93,26 @@ enum pva_error pva_kmd_shared_buffer_deinit(struct pva_kmd_device *pva,
{
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_shared_buffer *buffer;
struct pva_cmd_deinit_shared_dram_buffer deinit_cmd = { 0 };
ASSERT(interface < PVA_MAX_NUM_CCQ);
buffer = &pva->kmd_fw_buffers[interface];
if (!pva->recovery) {
err = notify_fw(
pva, interface, setup_cmd_deinit_shared_dram_buffer,
sizeof(struct pva_cmd_deinit_shared_dram_buffer));
pva_kmd_set_cmd_deinit_shared_dram_buffer(&deinit_cmd, interface);
err = pva_kmd_submit_cmd_sync(&pva->submitter, &deinit_cmd,
sizeof(deinit_cmd),
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("Failed to deinit FW buffer");
}
// This might happen if FW is aborted. It's safe to keep going.
pva_kmd_log_err("Failed to notify FW of buffer deinit");
}
pva_kmd_release_shared_buffer_handler(pva, interface);
pva_kmd_shared_buffer_process(pva, interface);
buffer->header = NULL;
pva_kmd_device_memory_free(buffer->resource_memory);
buffer->resource_memory = NULL;
@@ -176,6 +125,7 @@ static void shared_buffer_process_msg(struct pva_kmd_device *pva,
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_fw_buffer_msg_header header;
struct pva_kmd_fw_msg_vpu_trace vpu_trace;
struct pva_kmd_fw_msg_fence_trace fence_trace;
struct pva_kmd_fw_msg_res_unreg unreg_data;
struct pva_kmd_context *ctx = NULL;
void *msg_body;
@@ -214,6 +164,12 @@ static void shared_buffer_process_msg(struct pva_kmd_device *pva,
pva_kmd_shim_add_trace_vpu_exec(pva, &vpu_trace);
break;
}
case PVA_KMD_FW_BUF_MSG_TYPE_FENCE_TRACE: {
ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_fence_trace));
memcpy(&fence_trace, msg_body, sizeof(fence_trace));
pva_kmd_shim_add_trace_fence(pva, &fence_trace);
break;
}
case PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG: {
ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_res_unreg));
memcpy(&unreg_data, msg_body, sizeof(unreg_data));
@@ -281,7 +237,7 @@ void pva_kmd_shared_buffer_process(void *pva_dev, uint8_t interface)
// Note that ideally this should never happen as the buffer is expected to be
// the same size as the resource table.
// TODO: abort only the user context, not the device.
pva_kmd_abort(pva);
pva_kmd_abort_fw(pva);
}
// Buffer corresponding to CCQ 0 is used for sending messages common to a VM.

View File

@@ -4,6 +4,7 @@
#include "pva_kmd_device.h"
#include "pva_fw_address_map.h"
#include "pva_fw_hyp.h"
#include "pva_kmd_shim_init.h"
#include "pva_kmd_thread_sema.h"
#include "pva_kmd_constants.h"
#include "pva_kmd_silicon_isr.h"
@@ -153,27 +154,12 @@ void pva_kmd_config_sid(struct pva_kmd_device *pva)
}
}
static uint32_t pva_kmd_get_syncpt_ro_offset(struct pva_kmd_device *pva)
static uint32_t get_syncpt_offset(struct pva_kmd_device *pva,
uint64_t syncpt_iova)
{
if (pva->num_syncpts > 0U) {
if (pva->num_ro_syncpts > 0U) {
uint64_t offset;
offset = safe_subu64(pva->syncpt_ro_iova,
pva_kmd_get_r5_iova_start());
ASSERT(offset <= UINT32_MAX);
return (uint32_t)offset;
} else {
// This is only for SIM mode where syncpoints are not supported.
return PVA_R5_SYNCPT_REGION_IOVA_OFFSET_NOT_SET;
}
}
static uint32_t pva_kmd_get_syncpt_rw_offset(struct pva_kmd_device *pva)
{
if (pva->num_syncpts > 0U) {
uint64_t offset;
offset = safe_subu64(pva->syncpt_rw_iova,
pva_kmd_get_r5_iova_start());
offset = safe_subu64(syncpt_iova, pva_kmd_get_r5_iova_start());
ASSERT(offset <= UINT32_MAX);
return (uint32_t)offset;
@@ -249,12 +235,17 @@ enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva)
if (pva->bl_sector_pack_format == PVA_BL_XBAR_RAW) {
boot_sema = PVA_BOOT_SEMA_USE_XBAR_RAW;
}
if (pva->test_mode) {
boot_sema |= PVA_BOOT_SEMA_TEST_MODE;
}
pva_kmd_set_sema(pva, PVA_BOOT_SEMA, boot_sema);
pva_kmd_write(pva, PVA_REG_HSP_SS2_SET_ADDR,
pva_kmd_get_syncpt_ro_offset(pva));
pva_kmd_write(pva, PVA_REG_HSP_SS3_SET_ADDR,
pva_kmd_get_syncpt_rw_offset(pva));
pva_kmd_set_sema(pva, PVA_RO_SYNC_BASE_SEMA,
get_syncpt_offset(pva, pva->ro_syncpt_base_iova));
pva_kmd_set_sema(pva, PVA_RW_SYNC_BASE_SEMA,
get_syncpt_offset(pva, pva->rw_syncpt_base_iova));
pva_kmd_set_sema(pva, PVA_RW_SYNC_SIZE_SEMA,
pva->rw_syncpt_region_size);
pva_kmd_config_sid_regs(pva);
@@ -290,6 +281,7 @@ free_sec_lic:
pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC);
free_fw_debug_mem:
pva_kmd_drain_fw_print(&pva->fw_print_buffer);
pva_kmd_freeze_fw(pva);
pva_kmd_device_memory_free(pva->fw_debug_mem);
free_fw_mem:
if (!pva->load_from_gsc) {
@@ -299,17 +291,14 @@ out:
return err;
}
void pva_kmd_deinit_fw(struct pva_kmd_device *pva)
void pva_kmd_freeze_fw(struct pva_kmd_device *pva)
{
pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC);
pva_kmd_drain_fw_print(&pva->fw_print_buffer);
/*
* Before powering off PVA, disable SEC error reporting.
* While powering off, PVA might generate (unexplained) error interrupts
* This causes HSM to read some PVA SEC registers. However, since PVA might
* already be powergated by this time, access to PVA SEC registers from HSM
* fails. This was discussed in Bug 3785498.
* Before freezing PVA, disable SEC error reporting.
* While setting the reset line, PVA might generate (unexplained) error
* interrupts This causes HSM to read some PVA SEC registers. However,
* since PVA might already be powergated by this time, access to PVA SEC
* registers from HSM fails. This was discussed in Bug 3785498.
*
* Note: we do not explicity enable these errors during power on since
* 'enable' is their reset value
@@ -317,6 +306,17 @@ void pva_kmd_deinit_fw(struct pva_kmd_device *pva)
disable_sec_mission_error_reporting(pva);
disable_sec_latent_error_reporting(pva);
pva_kmd_set_reset_line(pva);
}
void pva_kmd_deinit_fw(struct pva_kmd_device *pva)
{
pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC);
pva_kmd_drain_fw_print(&pva->fw_print_buffer);
// FW so that we can free memory
pva_kmd_freeze_fw(pva);
pva_kmd_device_memory_free(pva->fw_debug_mem);
if (!pva->load_from_gsc) {
pva_kmd_device_memory_free(pva->fw_bin_mem);

View File

@@ -748,9 +748,11 @@ load_metainfo(struct pva_kmd_device *pva, uint64_t section_iova,
metainfo->num_vmem_buffers = n_symbols;
data_sections_mem = pva_offset_pointer(metainfo, sizeof(*metainfo));
if (n_data_sections > 0U && section_infos != NULL) {
memcpy(data_sections_mem, section_infos,
mulu32(n_data_sections, (uint32_t)sizeof(*section_infos),
&math_err));
}
vmem_buffers_mem = pva_offset_pointer(
data_sections_mem,

View File

@@ -42,6 +42,7 @@ int pva_kmd_hwpm_ip_pm(void *ip_dev, bool disable)
{
struct pva_kmd_device *dev = ip_dev;
enum pva_error err = PVA_SUCCESS;
int ret = 0;
if (disable) {
err = pva_kmd_device_busy(dev);
@@ -51,5 +52,10 @@ int pva_kmd_hwpm_ip_pm(void *ip_dev, bool disable)
} else {
pva_kmd_device_idle(dev);
}
return err;
if (err != PVA_SUCCESS) {
ret = -1;
}
return ret;
}

View File

@@ -45,22 +45,16 @@ void pva_kmd_hyp_isr(void *data, enum pva_kmd_intr_line intr_line)
if (wdt_val != 0) {
/* Clear interrupt status */
pva_kmd_write(pva, pva->regspec.sec_lic_intr_status,
intr_status &
PVA_MASK(PVA_REG_SEC_LIC_INTR_WDT_MSB,
PVA_REG_SEC_LIC_INTR_WDT_LSB));
pva_kmd_write(pva, pva->regspec.sec_lic_intr_status, wdt_val);
pva_kmd_log_err("PVA watchdog timeout!");
pva_kmd_abort(pva);
pva_kmd_abort_fw(pva);
}
if (h1x_val != 0) {
pva_kmd_log_err_u64("Host1x errors", h1x_val);
/* Clear interrupt status */
pva_kmd_write(pva, pva->regspec.sec_lic_intr_status,
intr_status &
PVA_MASK(PVA_REG_SEC_LIC_INTR_H1X_MSB,
PVA_REG_SEC_LIC_INTR_H1X_LSB));
pva_kmd_abort(pva);
pva_kmd_write(pva, pva->regspec.sec_lic_intr_status, h1x_val);
pva_kmd_abort_fw(pva);
}
if (hsp_val != 0) {

View File

@@ -23,3 +23,10 @@ uint32_t pva_kmd_get_ccq_space(struct pva_kmd_device *pva, uint8_t ccq_id)
PVA_REG_CCQ_STATUS2_NUM_ENTRIES_LSB, uint32_t);
return safe_subu32((uint32_t)PVA_CCQ_DEPTH, len) / 2U;
}
void pva_kmd_disable_all_interrupts_nosync(struct pva_kmd_device *pva)
{
for (int i = 0; i < PVA_KMD_INTR_LINE_COUNT; i++) {
pva_kmd_disable_intr_nosync(pva, (enum pva_kmd_intr_line)i);
}
}

View File

@@ -2,6 +2,7 @@
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#include "pva_kmd_submitter.h"
#include "pva_api_types.h"
#include "pva_kmd_utils.h"
#include "pva_kmd_abort.h"
@@ -70,6 +71,7 @@ pva_kmd_submitter_submit_with_fence(struct pva_kmd_submitter *submitter,
submit_info.first_chunk_offset_lo = iova_lo(first_chunk_offset);
submit_info.first_chunk_offset_hi = iova_hi(first_chunk_offset);
submit_info.first_chunk_size = first_chunk_size;
submit_info.execution_timeout_ms = PVA_EXEC_TIMEOUT_INF;
pva_kmd_mutex_lock(submitter->submit_lock);
err = pva_kmd_queue_submit(submitter->queue, &submit_info);
@@ -108,6 +110,7 @@ enum pva_error pva_kmd_submitter_submit(struct pva_kmd_submitter *submitter,
submit_info.first_chunk_offset_lo = iova_lo(first_chunk_offset);
submit_info.first_chunk_offset_hi = iova_hi(first_chunk_offset);
submit_info.first_chunk_size = first_chunk_size;
submit_info.execution_timeout_ms = PVA_EXEC_TIMEOUT_INF;
/* TODO: remove these flags after FW execute command buffer with no engines. */
submit_info.flags =
PVA_INSERT8(0x3, PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_MSB,
@@ -137,16 +140,63 @@ enum pva_error pva_kmd_submitter_wait(struct pva_kmd_submitter *submitter,
{
uint32_t volatile *fence_addr = submitter->post_fence_va;
uint32_t time_spent = 0;
struct pva_kmd_device *pva = submitter->queue->pva;
while (*fence_addr < fence_val) {
if (pva->recovery) {
return PVA_ERR_FW_ABORTED;
}
pva_kmd_sleep_us(poll_interval_us);
time_spent = safe_addu32(time_spent, poll_interval_us);
if (time_spent >= timeout_us) {
pva_kmd_log_err("pva_kmd_submitter_wait Timed out");
pva_kmd_abort(submitter->queue->pva);
pva_kmd_abort_fw(submitter->queue->pva);
return PVA_TIMEDOUT;
}
}
return PVA_SUCCESS;
}
enum pva_error pva_kmd_submit_cmd_sync(struct pva_kmd_submitter *submitter,
void *cmds, uint32_t size,
uint32_t poll_interval_us,
uint32_t timeout_us)
{
struct pva_kmd_cmdbuf_builder builder = { 0 };
enum pva_error err;
void *cmd_dst = NULL;
uint32_t fence_val = 0;
err = pva_kmd_submitter_prepare(submitter, &builder);
if (err != PVA_SUCCESS) {
goto err_out;
}
cmd_dst = pva_kmd_reserve_cmd_space(&builder, size);
if (cmd_dst == NULL) {
err = PVA_INVAL;
pva_kmd_log_err(
"Trying to submit too many commands using pva_kmd_submit_cmd_sync.");
goto cancel_builder;
}
memcpy(cmd_dst, cmds, size);
err = pva_kmd_submitter_submit(submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
goto cancel_builder;
}
err = pva_kmd_submitter_wait(submitter, fence_val, poll_interval_us,
timeout_us);
if (err != PVA_SUCCESS) {
goto cancel_builder;
}
return err;
cancel_builder:
pva_kmd_cmdbuf_builder_cancel(&builder);
err_out:
return err;
}

View File

@@ -57,4 +57,11 @@ pva_kmd_submitter_submit_with_fence(struct pva_kmd_submitter *submitter,
/* add cmd */
/* do submit with fence (provide a fence) */
/* Helper function to submit several commands and wait for them to complete.
Total size must be smaller than a chunk. */
enum pva_error pva_kmd_submit_cmd_sync(struct pva_kmd_submitter *submitter,
void *cmds, uint32_t size,
uint32_t poll_interval_us,
uint32_t timeout_us);
#endif // PVA_KMD_SUBMITTER_H

View File

@@ -59,11 +59,8 @@ enum pva_error
pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva,
struct pva_kmd_tegrastats *kmd_tegra_stats)
{
struct pva_kmd_cmdbuf_builder builder;
struct pva_kmd_submitter *dev_submitter = &pva->submitter;
struct pva_cmd_get_tegra_stats *cmd;
struct pva_cmd_get_tegra_stats cmd = { 0 };
uint64_t buffer_offset = 0U;
uint32_t fence_val;
enum pva_error err = PVA_SUCCESS;
struct pva_kmd_fw_tegrastats fw_tegra_stats = { 0 };
bool stats_enabled = pva->debugfs_context.stats_enable;
@@ -86,29 +83,15 @@ pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva,
goto err_out;
}
err = pva_kmd_submitter_prepare(dev_submitter, &builder);
if (err != PVA_SUCCESS) {
goto dev_idle;
}
cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd));
ASSERT(cmd != NULL);
pva_kmd_set_cmd_get_tegra_stats(cmd, pva->tegra_stats_resource_id,
pva_kmd_set_cmd_get_tegra_stats(&cmd, pva->tegra_stats_resource_id,
pva->tegra_stats_buf_size,
buffer_offset, stats_enabled);
err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val);
if (err != PVA_SUCCESS) {
pva_kmd_log_err("tegra stats cmd submission failed");
goto cancel_builder;
}
err = pva_kmd_submitter_wait(dev_submitter, fence_val,
err = pva_kmd_submit_cmd_sync(&pva->submitter, &cmd, sizeof(cmd),
PVA_KMD_WAIT_FW_POLL_INTERVAL_US,
PVA_KMD_WAIT_FW_TIMEOUT_US);
if (err != PVA_SUCCESS) {
pva_kmd_log_err(
"Waiting for FW timed out when getting tegra stats");
pva_kmd_log_err("tegra stats cmd submission failed");
goto dev_idle;
}
@@ -129,8 +112,7 @@ out:
kmd_tegra_stats->window_end_time = fw_tegra_stats.window_end_time;
return PVA_SUCCESS;
cancel_builder:
pva_kmd_cmdbuf_builder_cancel(&builder);
dev_idle:
pva_kmd_device_idle(pva);
err_out:

View File

@@ -21,6 +21,7 @@ struct pva_kmd_device_memory {
uint64_t size; /**< Size of the mapping. */
struct pva_kmd_device *pva; /**< The PVA this memory is mapped to. */
uint32_t smmu_ctx_idx; /**< The SMMU context this memory is mapped to. */
uint32_t iova_access_flags; /**< Access flags for the memory. RO - 1/WO - 2/RW - 3 */
};
/**

View File

@@ -9,7 +9,8 @@ void pva_kmd_debugfs_create_bool(struct pva_kmd_device *pva, const char *name,
bool *val);
void pva_kmd_debugfs_create_u32(struct pva_kmd_device *pva, const char *name,
uint32_t *val);
void pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, const char *name,
enum pva_error pva_kmd_debugfs_create_file(struct pva_kmd_device *pva,
const char *name,
struct pva_kmd_file_ops *fops);
void pva_kmd_debugfs_remove_nodes(struct pva_kmd_device *pva);
unsigned long pva_kmd_copy_data_from_user(void *dst, const void *src,

View File

@@ -14,9 +14,6 @@ void pva_kmd_device_plat_deinit(struct pva_kmd_device *pva);
void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id,
uint32_t *syncpt_value);
void pva_kmd_get_syncpt_iova(struct pva_kmd_device *pva, uint32_t syncpt_id,
uint64_t *syncpt_iova);
void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva);
/**
@@ -34,7 +31,7 @@ void pva_kmd_power_off(struct pva_kmd_device *pva);
* user submission halted. This is requied for host1x
* watchdog, or kmd submission timeout failures.
*/
void pva_kmd_fw_reset_assert(struct pva_kmd_device *pva);
void pva_kmd_freeze_fw(struct pva_kmd_device *pva);
/**
* @brief Initialize firmware.
@@ -60,4 +57,18 @@ enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva);
* @param pva pointer to the PVA device to de-initialize
*/
void pva_kmd_deinit_fw(struct pva_kmd_device *pva);
/**
* @brief Disable all interrupts without waiting for running interrupt handlers
* to complete.
*
* We don't wait for running interrupt handlers to complete because we want to
* be able to call this function from interrupt handles themselves.
*
* This function is to be called when PVA enters bad state and we want to
* protect KMD from potential interrupt floods from PVA (particularly watchdog
* interrupt that will trigger repeatedly by HW).
*/
void pva_kmd_disable_all_interrupts_nosync(struct pva_kmd_device *pva);
#endif // PVA_KMD_SHIM_INIT_H

View File

@@ -81,9 +81,9 @@ void pva_kmd_enable_intr(struct pva_kmd_device *pva,
enum pva_kmd_intr_line intr_line);
/**
* @brief Disable an interrupt line.
* @brief Disable an interrupt line without waiting for running interrupt handlers to complete.
*/
void pva_kmd_disable_intr(struct pva_kmd_device *pva,
void pva_kmd_disable_intr_nosync(struct pva_kmd_device *pva,
enum pva_kmd_intr_line intr_line);
/**
@@ -104,13 +104,6 @@ void pva_kmd_free_intr(struct pva_kmd_device *pva,
*/
enum pva_error pva_kmd_read_fw_bin(struct pva_kmd_device *pva);
/**
* @brief Reset assert FW so it can be in recovery and
* user submission halted. This is requied for host1x
* watchdog, or kmd submission timeout failures.
*/
void pva_kmd_fw_reset_assert(struct pva_kmd_device *pva);
/**
* @brief Get starting IOVA of the memory shared by R5 and KMD.
*
@@ -141,4 +134,9 @@ void pva_kmd_config_evp_seg_scr_regs(struct pva_kmd_device *pva);
*/
void pva_kmd_config_sid_regs(struct pva_kmd_device *pva);
/**
* @brief Set the PVA HW reset line.
*/
void pva_kmd_set_reset_line(struct pva_kmd_device *pva);
#endif // PVA_KMD_SHIM_SILICON_H

View File

@@ -9,4 +9,8 @@ void pva_kmd_shim_add_trace_vpu_exec(
struct pva_kmd_device *pva,
struct pva_kmd_fw_msg_vpu_trace const *trace_info);
void pva_kmd_shim_add_trace_fence(
struct pva_kmd_device *pva,
struct pva_kmd_fw_msg_fence_trace const *trace_info);
#endif // PVA_KMD_SHIM_TRACE_EVENT_H

View File

@@ -20,24 +20,10 @@ struct pva_ops_context_init {
struct pva_ops_response_context_init {
enum pva_error error;
uint16_t max_cmdbuf_chunk_size;
uint64_t ccq_shm_hdl;
};
struct pva_ops_syncpt_register {
#define PVA_OPS_OPCODE_SYNCPT_REGISTER (2U | PVA_OPS_PRIVATE_OPCODE_FLAG)
struct pva_ops_header header;
};
struct pva_ops_response_syncpt_register {
enum pva_error error;
uint32_t syncpt_ro_res_id;
uint32_t syncpt_rw_res_id;
uint32_t synpt_size;
uint32_t synpt_ids[PVA_NUM_RW_SYNCPTS_PER_CONTEXT];
uint32_t num_ro_syncpoints;
uint32_t pad;
};
/**
* Calculates the total memory size required for a PVA submission queue.
* This includes the size of the queue header and the combined size of all command buffer submission info structures.
@@ -67,7 +53,8 @@ struct pva_ops_queue_create {
struct pva_ops_response_queue_create {
enum pva_error error;
uint32_t queue_id;
uint32_t syncpt_fence_counter;
uint32_t syncpt_id;
uint32_t syncpt_current_value;
};
/* KMD API: queue destroy */

View File

@@ -130,7 +130,8 @@ void pva_kmd_debugfs_create_u32(struct pva_kmd_device *pva, const char *name,
debugfs_create_u32(name, 0644, de, pdata);
}
void pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, const char *name,
enum pva_error pva_kmd_debugfs_create_file(struct pva_kmd_device *pva,
const char *name,
struct pva_kmd_file_ops *pvafops)
{
struct pva_kmd_linux_device_data *device_data =
@@ -142,7 +143,12 @@ void pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, const char *name,
struct dentry *file;
file = debugfs_create_file(name, 0644, de, pvafops, fops);
ASSERT(file != NULL);
if (file == NULL) {
pva_kmd_log_err("Failed to create debugfs file");
return PVA_INVAL;
}
return PVA_SUCCESS;
}
void pva_kmd_debugfs_remove_nodes(struct pva_kmd_device *pva)

View File

@@ -53,15 +53,6 @@ void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id,
}
}
void pva_kmd_get_syncpt_iova(struct pva_kmd_device *pva, uint32_t syncpt_id,
uint64_t *syncpt_iova)
{
uint32_t offset = 0;
offset = nvpva_syncpt_unit_interface_get_byte_offset_ext(syncpt_id);
*syncpt_iova = safe_addu64(pva->syncpt_ro_iova, (uint64_t)offset);
}
void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva)
{
phys_addr_t base;
@@ -69,7 +60,6 @@ void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva)
int err = 0;
uint32_t stride, num_syncpts;
uint32_t syncpt_page_size;
uint32_t syncpt_offset[PVA_NUM_RW_SYNCPTS];
dma_addr_t sp_start;
struct device *dev;
struct pva_kmd_linux_device_data *device_data =
@@ -92,53 +82,38 @@ void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva)
syncpt_page_size = nvpva_syncpt_unit_interface_get_byte_offset_ext(1);
dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
if (iommu_get_domain_for_dev(dev)) {
sp_start = dma_map_resource(dev, base, size, DMA_TO_DEVICE,
sp_start = dma_map_resource(dev, base, size, DMA_BIDIRECTIONAL,
DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(dev, sp_start)) {
FAULT("Failed to pin RO syncpoints\n");
FAULT("Failed to pin syncpoints\n");
}
} else {
FAULT("Failed to pin RO syncpoints\n");
FAULT("Failed to pin syncpoints\n");
}
pva->syncpt_ro_iova = sp_start;
pva->syncpt_offset = syncpt_page_size;
pva->num_syncpts = (size / syncpt_page_size);
pva->ro_syncpt_base_iova = sp_start;
pva->syncpt_page_size = syncpt_page_size;
pva->num_ro_syncpts = num_syncpts;
// The same region is also used for RW syncpts...
pva->rw_syncpt_base_iova = sp_start;
pva->rw_syncpt_region_size = size;
for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
pva->syncpt_rw[i].syncpt_id = nvpva_get_syncpt_client_managed(
props->pdev, "pva_syncpt");
if (pva->syncpt_rw[i].syncpt_id == 0) {
uint32_t syncpt_id;
uint64_t syncpt_iova;
syncpt_id = nvpva_get_syncpt_client_managed(props->pdev,
"pva_syncpt");
if (syncpt_id == 0) {
FAULT("Failed to get syncpt\n");
}
syncpt_offset[i] =
syncpt_iova = safe_addu64(
sp_start,
nvpva_syncpt_unit_interface_get_byte_offset_ext(
pva->syncpt_rw[i].syncpt_id);
err = nvpva_syncpt_read_ext_check(
props->pdev, pva->syncpt_rw[i].syncpt_id,
&pva->syncpt_rw[i].syncpt_value);
if (err < 0) {
FAULT("Failed to read syncpoint value\n");
}
}
syncpt_id));
pva->syncpt_rw_iova =
dma_map_resource(dev,
safe_addu64(base, (uint64_t)syncpt_offset[0]),
safe_mulu64((uint64_t)pva->syncpt_offset,
(uint64_t)PVA_NUM_RW_SYNCPTS),
DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(dev, pva->syncpt_rw_iova)) {
FAULT("Failed to pin RW syncpoints\n");
}
pva->syncpt_rw[0].syncpt_iova = pva->syncpt_rw_iova;
for (uint32_t i = 1; i < PVA_NUM_RW_SYNCPTS; i++) {
if (safe_addu32(syncpt_offset[i - 1], pva->syncpt_offset) !=
syncpt_offset[i]) {
FAULT("RW syncpts are not contiguous\n");
}
pva->syncpt_rw[i].syncpt_iova = safe_addu64(
pva->syncpt_rw_iova,
safe_mulu64((uint64_t)pva->syncpt_offset, (uint64_t)i));
pva->rw_syncpts[i].syncpt_iova = syncpt_iova;
pva->rw_syncpts[i].syncpt_id = syncpt_id;
}
}
@@ -166,25 +141,19 @@ void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva)
dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
if (iommu_get_domain_for_dev(dev)) {
dma_unmap_resource(dev, pva->syncpt_ro_iova, size,
DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
dma_unmap_resource(dev, pva->syncpt_rw_iova,
safe_mulu64((uint64_t)pva->syncpt_offset,
(uint64_t)PVA_NUM_RW_SYNCPTS),
dma_unmap_resource(dev, pva->ro_syncpt_base_iova, size,
DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
} else {
FAULT("Failed to unmap syncpts\n");
}
for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
nvpva_syncpt_put_ref_ext(props->pdev,
pva->syncpt_rw[i].syncpt_id);
pva->syncpt_rw[i].syncpt_id = 0;
pva->syncpt_rw[i].syncpt_iova = 0;
pva->syncpt_rw[i].syncpt_value = 0;
pva->rw_syncpts[i].syncpt_id);
pva->rw_syncpts[i].syncpt_id = 0;
pva->rw_syncpts[i].syncpt_iova = 0;
}
pva->syncpt_ro_iova = 0;
pva->syncpt_rw_iova = 0;
pva->syncpt_offset = 0;
pva->ro_syncpt_base_iova = 0;
pva->syncpt_page_size = 0;
nvpva_syncpt_unit_interface_deinit(props->pdev);
}
@@ -235,21 +204,11 @@ void pva_kmd_power_off(struct pva_kmd_device *pva)
pva_kmd_linux_device_get_data(pva);
struct nvpva_device_data *props = device_data->pva_device_properties;
// Set reset line before cutting off power
/* Power management operation is asynchronous. We don't control when PVA
* will really be powered down. However, we need to free memories after
* this call. Therefore, we assert the reset line to stop PVA from any
* further activity. */
reset_control_acquire(props->reset_control);
reset_control_assert(props->reset_control);
reset_control_release(props->reset_control);
pm_runtime_mark_last_busy(&props->pdev->dev);
pm_runtime_put(&props->pdev->dev);
}
void pva_kmd_fw_reset_assert(struct pva_kmd_device *pva)
void pva_kmd_set_reset_line(struct pva_kmd_device *pva)
{
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);

View File

@@ -23,7 +23,7 @@ struct pva_kmd_device_memory_impl {
struct pva_kmd_device_memory dev_mem;
struct dma_buf *dmabuf;
struct iosys_map iosysmap;
struct dma_buf_attachment *dmabuf_attch;
struct dma_buf_attachment *dmabuf_attach;
struct sg_table *sgt;
uint64_t offset;
};
@@ -36,11 +36,20 @@ pva_kmd_device_memory_alloc_map(uint64_t size, struct pva_kmd_device *pva,
struct device *dev = get_context_device(pva, smmu_ctx_idx);
dma_addr_t pa = 0U;
void *va = NULL;
struct pva_kmd_device_memory_impl *mem_impl;
mem_impl = pva_kmd_zalloc(sizeof(struct pva_kmd_device_memory_impl));
if (mem_impl == NULL) {
goto err_out;
}
if (size == 0u) {
pva_kmd_log_err("Invalid allocation size");
goto free_mem;
}
struct pva_kmd_device_memory_impl *mem_impl =
pva_kmd_zalloc(sizeof(struct pva_kmd_device_memory_impl));
va = dma_alloc_coherent(dev, size, &pa, GFP_KERNEL);
if (va == NULL) {
if (IS_ERR_OR_NULL(va)) {
pva_kmd_log_err("dma_alloc_coherent failed");
goto free_mem;
}
@@ -49,12 +58,13 @@ pva_kmd_device_memory_alloc_map(uint64_t size, struct pva_kmd_device *pva,
mem_impl->dev_mem.size = size;
mem_impl->dev_mem.pva = pva;
mem_impl->dev_mem.smmu_ctx_idx = smmu_ctx_idx;
mem_impl->dev_mem.iova_access_flags = iova_access_flags;
mem_impl->dmabuf = NULL;
return &mem_impl->dev_mem;
free_mem:
pva_kmd_free(mem_impl);
err_out:
return NULL;
}
@@ -66,13 +76,16 @@ struct pva_kmd_device_memory *
pva_kmd_device_memory_acquire(uint64_t memory_handle, uint64_t offset,
uint64_t size, struct pva_kmd_context *ctx)
{
struct pva_kmd_device_memory_impl *mem_impl =
(struct pva_kmd_device_memory_impl *)pva_kmd_zalloc(
sizeof(struct pva_kmd_device_memory_impl));
struct dma_buf *dma_buf;
struct pva_kmd_device_memory_impl *mem_impl;
mem_impl = pva_kmd_zalloc(sizeof(struct pva_kmd_device_memory_impl));
if (mem_impl == NULL) {
goto err_out;
}
dma_buf = dma_buf_get(memory_handle);
if (dma_buf == NULL) {
if (IS_ERR_OR_NULL(dma_buf)) {
pva_kmd_log_err("Failed to acquire memory");
goto free_mem;
}
@@ -92,6 +105,7 @@ put_dmabuf:
dma_buf_put(dma_buf);
free_mem:
pva_kmd_free(mem_impl);
err_out:
return NULL;
}
@@ -103,7 +117,7 @@ void pva_kmd_device_memory_free(struct pva_kmd_device_memory *mem)
if (mem_impl->dmabuf != NULL) {
/* This memory comes from dma_buf_get */
if (mem->iova != 0U) {
if (mem_impl->dmabuf_attach != NULL) {
pva_kmd_device_memory_iova_unmap(mem);
}
@@ -160,14 +174,28 @@ pva_kmd_device_memory_iova_map(struct pva_kmd_device_memory *memory,
pva_math_error math_err = MATH_OP_SUCCESS;
struct pva_kmd_device_memory_impl *mem_impl = container_of(
memory, struct pva_kmd_device_memory_impl, dev_mem);
// struct pva_kmd_linux_device_plat_data *plat_data =
// pva_kmd_linux_device_get_plat_data(pva);
// struct device *dev = plat_data->dev[smmu_ctx_idx];
struct device *dev = get_context_device(pva, smmu_ctx_idx);
struct dma_buf_attachment *attach;
struct sg_table *sgt;
enum pva_error err = PVA_SUCCESS;
enum dma_data_direction dma_direction;
uint64_t iova;
switch (access_flags) {
case PVA_ACCESS_RO: // Read-Only
dma_direction = DMA_TO_DEVICE;
break;
case PVA_ACCESS_WO: // Write-Only
dma_direction = DMA_FROM_DEVICE;
break;
case PVA_ACCESS_RW: // Read-Write
dma_direction = DMA_BIDIRECTIONAL;
break;
default:
pva_kmd_log_err("Invalid access flags\n");
err = PVA_INVAL;
goto err_out;
}
attach = dma_buf_attach(mem_impl->dmabuf, dev);
if (IS_ERR_OR_NULL(attach)) {
@@ -176,28 +204,32 @@ pva_kmd_device_memory_iova_map(struct pva_kmd_device_memory *memory,
goto err_out;
}
mem_impl->dmabuf_attch = attach;
sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
sgt = dma_buf_map_attachment(attach, dma_direction);
if (IS_ERR_OR_NULL(sgt)) {
err = PVA_INVAL;
pva_kmd_log_err("Failed to map attachment\n");
goto detach;
}
mem_impl->sgt = sgt;
mem_impl->dev_mem.iova =
addu64(sg_dma_address(sgt->sgl), mem_impl->offset, &math_err);
iova = addu64(sg_dma_address(sgt->sgl), mem_impl->offset, &math_err);
if (math_err != MATH_OP_SUCCESS) {
err = PVA_INVAL;
pva_kmd_log_err(
"pva_kmd_device_memory_iova_map Invalid DMA address\n");
goto detach;
goto unmap;
}
mem_impl->sgt = sgt;
mem_impl->dmabuf_attach = attach;
mem_impl->dev_mem.iova = iova;
mem_impl->dev_mem.pva = pva;
mem_impl->dev_mem.smmu_ctx_idx = smmu_ctx_idx;
mem_impl->dev_mem.iova_access_flags = access_flags;
return PVA_SUCCESS;
unmap:
dma_buf_unmap_attachment(attach, sgt, dma_direction);
detach:
dma_buf_detach(mem_impl->dmabuf, mem_impl->dmabuf_attch);
dma_buf_detach(mem_impl->dmabuf, attach);
err_out:
return err;
}
@@ -209,10 +241,11 @@ void pva_kmd_device_memory_iova_unmap(struct pva_kmd_device_memory *memory)
ASSERT(mem_impl->dmabuf != NULL);
dma_buf_unmap_attachment(mem_impl->dmabuf_attch, mem_impl->sgt,
dma_buf_unmap_attachment(mem_impl->dmabuf_attach, mem_impl->sgt,
DMA_BIDIRECTIONAL);
dma_buf_detach(mem_impl->dmabuf, mem_impl->dmabuf_attch);
memory->iova = 0;
dma_buf_detach(mem_impl->dmabuf, mem_impl->dmabuf_attach);
mem_impl->sgt = NULL;
mem_impl->dmabuf_attach = NULL;
}
uint64_t pva_kmd_get_r5_iova_start(void)

View File

@@ -50,13 +50,13 @@
extern struct platform_driver pva_kmd_linux_smmu_context_driver;
extern atomic_t g_num_smmu_ctxs;
static bool load_from_gsc = PVA_KMD_LOAD_FROM_GSC_DEFAULT;
static bool app_authenticate = PVA_KMD_APP_AUTH_DEFAULT;
static bool pva_test_mode; //false by default
module_param(load_from_gsc, bool, 0);
MODULE_PARM_DESC(load_from_gsc, "Load V3 FW from GSC");
module_param(app_authenticate, bool, 0);
MODULE_PARM_DESC(app_authenticate, "Enable app authentication");
module_param(pva_test_mode, bool, 0);
MODULE_PARM_DESC(pva_test_mode, "Enable test mode");
struct nvpva_device_data t23x_pva0_props = {
.version = PVA_CHIP_T23X,
@@ -112,11 +112,15 @@ static int pva_get_gsc_priv_hwid(struct platform_device *pdev)
return fwspec->ids[0] & 0xffff;
}
static void pva_kmd_linux_register_hwpm(struct pva_kmd_device *pva)
static int pva_kmd_linux_register_hwpm(struct pva_kmd_device *pva)
{
struct tegra_soc_hwpm_ip_ops *hwpm_ip_ops =
pva_kmd_zalloc(sizeof(*hwpm_ip_ops));
if (hwpm_ip_ops == NULL) {
return -ENOMEM;
}
hwpm_ip_ops->ip_dev = pva;
hwpm_ip_ops->ip_base_address = safe_addu64(
pva->reg_phy_base[0], (uint64_t)pva->regspec.cfg_perf_mon);
@@ -125,6 +129,7 @@ static void pva_kmd_linux_register_hwpm(struct pva_kmd_device *pva)
hwpm_ip_ops->hwpm_ip_reg_op = &pva_kmd_hwpm_ip_reg_op;
tegra_soc_hwpm_ip_register(hwpm_ip_ops);
pva->debugfs_context.data_hwpm = hwpm_ip_ops;
return 0;
}
static void pva_kmd_linux_unregister_hwpm(struct pva_kmd_device *pva)
@@ -256,10 +261,57 @@ static void pva_kmd_free_co_mem(struct platform_device *pdev)
}
}
static bool pva_kmd_in_test_mode(struct device *dev, bool param_test_mode)
{
const char *dt_test_mode = NULL;
if (of_property_read_string(dev->of_node, "nvidia,test_mode_enable",
&dt_test_mode)) {
return param_test_mode;
}
if (strcmp(dt_test_mode, "true")) {
return param_test_mode;
}
return true;
}
static struct kobj_type nvpva_kobj_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
};
/**
* Read VPU authentication property from device tree
*
* @param dev Pointer to the device structure
* @return true if authentication should be enabled, false otherwise
*/
static bool pva_kmd_linux_read_vpu_auth(const struct device *dev)
{
bool auth_enabled = false;
int len;
const __be32 *val;
val = of_get_property(dev->of_node, "nvidia,vpu-auth", &len);
if ((val != NULL) && (len >= (int)sizeof(__be32))) {
u32 value = (u32)be32_to_cpu(*val);
if (value != 0U) {
auth_enabled = true;
dev_dbg(dev, "VPU authentication enabled\n");
} else {
auth_enabled = false;
dev_dbg(dev, "VPU authentication disabled\n");
}
} else {
dev_dbg(dev,
"No VPU authentication property found, using default: %d\n",
auth_enabled);
}
return auth_enabled;
}
static int pva_probe(struct platform_device *pdev)
{
int err = 0U;
@@ -273,6 +325,9 @@ static int pva_probe(struct platform_device *pdev)
struct clk_bulk_data *clks;
struct clk *c;
bool pva_enter_test_mode = false;
bool app_authenticate;
device_id = of_match_device(tegra_pva_of_match, dev);
if (!device_id) {
dev_err(dev, "no match for pva dev\n");
@@ -286,6 +341,8 @@ static int pva_probe(struct platform_device *pdev)
return -ENODATA;
}
app_authenticate = pva_kmd_linux_read_vpu_auth(dev);
/* Create devices for child nodes of this device */
of_platform_default_populate(dev->of_node, NULL, dev);
@@ -300,17 +357,12 @@ static int pva_probe(struct platform_device *pdev)
pva_props->pdev = pdev;
mutex_init(&pva_props->lock);
pva_device =
pva_kmd_device_create(pva_props->version, 0, app_authenticate);
pva_enter_test_mode = pva_kmd_in_test_mode(dev, pva_test_mode);
pva_device = pva_kmd_device_create(
pva_props->version, 0, app_authenticate, pva_enter_test_mode);
pva_device->is_hv_mode = is_tegra_hypervisor_mode();
/* On L4T, forcing boot from file */
/* If needed to load from GSC, remove the below block */
if (!pva_device->is_hv_mode) {
load_from_gsc = false;
}
pva_device->load_from_gsc = load_from_gsc;
pva_device->stream_ids[pva_device->r5_image_smmu_context_id] =
pva_get_gsc_priv_hwid(pdev);
@@ -352,8 +404,17 @@ static int pva_probe(struct platform_device *pdev)
pva_kmd_linux_host1x_init(pva_device);
pva_kmd_debugfs_create_nodes(pva_device);
pva_kmd_linux_register_hwpm(pva_device);
err = pva_kmd_debugfs_create_nodes(pva_device);
if (err != PVA_SUCCESS) {
dev_err(dev, "debugfs creation failed\n");
goto err_cdev_init;
}
err = pva_kmd_linux_register_hwpm(pva_device);
if (err != PVA_SUCCESS) {
dev_err(dev, "pva_kmd_linux_register_hwpm failed\n");
goto err_cdev_init;
}
if (!pva_device->is_hv_mode && pva_device->load_from_gsc) {
err = pva_kmd_get_co_info(pdev);

View File

@@ -6,6 +6,11 @@
#include "trace/events/nvpva_ftrace.h"
#include <linux/nvhost.h>
static uint32_t get_job_id(uint32_t queue_id, uint64_t submit_id)
{
return (queue_id & 0x000000FF) << 24 | (submit_id & 0xFFFFFFU);
}
void pva_kmd_shim_add_trace_vpu_exec(
struct pva_kmd_device *pva,
struct pva_kmd_fw_msg_vpu_trace const *trace_info)
@@ -38,7 +43,8 @@ void pva_kmd_shim_add_trace_vpu_exec(
// In V2, Job ID is a 32-bit value with the top 8 bits being the queue ID
// and the bottom 24 bits being a per-task counter. In V3, we only use the
// queue ID.
uint32_t job_id = (trace_info->queue_id & 0x000000FF) << 24;
uint32_t job_id =
get_job_id(trace_info->queue_id, trace_info->submit_id);
trace_pva_job_ext_event(job_id, trace_info->ccq_id,
0, // syncpt_thresh,
@@ -50,3 +56,42 @@ void pva_kmd_shim_add_trace_vpu_exec(
trace_info->num_prefences, trace_info->prog_id,
trace_info->submit_id, vpu_start);
}
void pva_kmd_shim_add_trace_fence(
struct pva_kmd_device *pva,
struct pva_kmd_fw_msg_fence_trace const *trace_info)
{
uint32_t job_id;
// We want to log events only for user workloads
if (trace_info->ccq_id == PVA_PRIV_CCQ_ID) {
return;
}
job_id = get_job_id(trace_info->queue_id, trace_info->submit_id);
if (trace_info->action == PVA_KMD_FW_BUF_MSG_FENCE_ACTION_WAIT) {
if (trace_info->type == PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SYNCPT) {
trace_job_prefence(job_id, trace_info->fence_id,
trace_info->value);
} else if (trace_info->type ==
PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SEMAPHORE) {
trace_job_prefence_semaphore(job_id,
trace_info->fence_id,
trace_info->offset,
trace_info->value);
}
} else if (trace_info->action ==
PVA_KMD_FW_BUF_MSG_FENCE_ACTION_SIGNAL) {
if (trace_info->type == PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SYNCPT) {
trace_job_postfence(job_id, trace_info->fence_id,
trace_info->value);
} else if (trace_info->type ==
PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SEMAPHORE) {
trace_job_postfence_semaphore(job_id,
trace_info->fence_id,
trace_info->offset,
trace_info->value);
}
}
}

View File

@@ -14,9 +14,13 @@ static struct pva_kmd_isr_data *get_isr(struct pva_kmd_device *pva,
struct pva_kmd_isr_data *isr_data;
ASSERT(intr_line < PVA_KMD_INTR_LINE_COUNT);
isr_data = &plat_data->isr[intr_line];
ASSERT(isr_data->binded);
if (!isr_data->binded) {
return NULL;
}
return isr_data;
}
static irqreturn_t pva_isr(int irq, void *dev_id)
{
struct pva_kmd_isr_data *isr_data = (struct pva_kmd_isr_data *)dev_id;
@@ -35,40 +39,60 @@ enum pva_error pva_kmd_bind_intr_handler(struct pva_kmd_device *pva,
pva_kmd_linux_device_get_data(pva);
struct pva_kmd_isr_data *isr_data = &plat_data->isr[intr_line];
struct nvpva_device_data *props = plat_data->pva_device_properties;
enum pva_error pva_err = PVA_SUCCESS;
int irq;
isr_data->irq = platform_get_irq(props->pdev, intr_line);
ASSERT(isr_data->binded == false);
irq = platform_get_irq(props->pdev, intr_line);
if (irq < 0) {
pva_kmd_log_err("Failed to get irq number");
pva_err = kernel_err2pva_err(irq);
goto err_out;
}
isr_data->irq = irq;
isr_data->handler = handler;
isr_data->handler_data = data;
isr_data->binded = true;
isr_data->intr_line = intr_line;
err = request_threaded_irq(isr_data->irq, NULL, pva_isr, IRQF_ONESHOT,
"pva-isr", isr_data);
if (err != 0) {
pva_kmd_log_err("Failed to bind interrupt handler");
pva_err = kernel_err2pva_err(err);
goto err_out;
}
return kernel_err2pva_err(err);
isr_data->binded = true;
return PVA_SUCCESS;
err_out:
return pva_err;
}
void pva_kmd_enable_intr(struct pva_kmd_device *pva,
enum pva_kmd_intr_line intr_line)
{
struct pva_kmd_isr_data *isr_data = get_isr(pva, intr_line);
if (isr_data != NULL) {
enable_irq(isr_data->irq);
}
}
void pva_kmd_disable_intr(struct pva_kmd_device *pva,
void pva_kmd_disable_intr_nosync(struct pva_kmd_device *pva,
enum pva_kmd_intr_line intr_line)
{
struct pva_kmd_isr_data *isr_data = get_isr(pva, intr_line);
disable_irq(isr_data->irq);
if (isr_data != NULL) {
disable_irq_nosync(isr_data->irq);
}
}
void pva_kmd_free_intr(struct pva_kmd_device *pva,
enum pva_kmd_intr_line intr_line)
{
struct pva_kmd_isr_data *isr_data = get_isr(pva, intr_line);
free_irq(isr_data->irq, isr_data);
ASSERT(isr_data != NULL);
(void)free_irq(isr_data->irq, isr_data);
isr_data->binded = false;
}

View File

@@ -11,7 +11,12 @@
void *pva_kmd_zalloc(uint64_t size)
{
return kvzalloc(size, GFP_KERNEL);
void *ptr = kvzalloc(size, GFP_KERNEL);
if (IS_ERR_OR_NULL(ptr)) {
return NULL;
}
return ptr;
}
void pva_kmd_free(void *ptr)

View File

@@ -132,12 +132,16 @@ void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device)
sid_idx < safe_subu32(pva_device->hw_consts.n_smmu_contexts, 2U);
sid_idx++) {
uint32_t smmu_ctx_idx = safe_addu32(sid_idx, 1U);
pva_device->stream_ids[smmu_ctx_idx] = g_smmu_ctxs[sid_idx].sid;
device_data->smmu_contexts[smmu_ctx_idx] =
g_smmu_ctxs[sid_idx].pdev;
dma_set_mask_and_coherent(
&device_data->smmu_contexts[smmu_ctx_idx]->dev,
struct pva_kmd_linux_smmu_ctx *smmu_ctx = &g_smmu_ctxs[sid_idx];
pva_device->stream_ids[smmu_ctx_idx] = smmu_ctx->sid;
device_data->smmu_contexts[smmu_ctx_idx] = smmu_ctx->pdev;
dma_set_mask_and_coherent(&smmu_ctx->pdev->dev,
DMA_BIT_MASK(39));
//set max segment size to UINT_MAX to avoid creating scatterlist >= 4GB
//during IOVA mapping, which will overflow the scatterlist length field,
//causing IOVA leak
dma_set_max_seg_size(&smmu_ctx->pdev->dev, UINT_MAX);
}
/* Configure SMMU contexts for privileged operations */

View File

@@ -6,7 +6,7 @@
#define PVA_NUM_ENGINES 2U
#define PVA_MAX_NUM_CCQ 8
#define PVA_CCQ_DEPTH 8U
#define PVA_CCQ_DEPTH 14U
#define PVA_USER_CCQ_BASE 1
#define PVA_INVALID_CCQ_ID 0xFF
#define PVA_INVALID_ENGINE_ID 0xFFU
@@ -138,4 +138,6 @@
#define PVA_KMD_CHIP_ID_T26X "GEN3"
#define PVA_KMD_CHIP_ID_DEFAULT PVA_KMD_CHIP_ID_T23X
#define PVA_KMD_TEST_MODE_ENV_VAR "PVA_TEST_MODE"
#endif // PVA_CONSTANTS_H

View File

@@ -689,6 +689,21 @@ static inline uint32_t safe_wraparound_dec_u32(uint32_t counter)
return result;
}
static inline uint32_t safe_wrap_add_u32(uint32_t a, uint32_t b)
{
return (uint32_t)(((uint64_t)a + (uint64_t)b) & 0xFFFFFFFFU);
}
static inline uint32_t safe_wrap_sub_u32(uint32_t a, uint32_t b)
{
return (uint32_t)(((uint64_t)a - (uint64_t)b) & 0xFFFFFFFFU);
}
static inline uint32_t safe_wrap_mul_u32(uint32_t a, uint32_t b)
{
return (uint32_t)(((uint64_t)a * (uint64_t)b) & 0xFFFFFFFFU);
}
#define SAT_ADD_DEFINE(a, b, name, type) \
static inline type sat_add##name(type a, type b) \
{ \

View File

@@ -0,0 +1,51 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#ifndef PVA_API_PRIVATE_H
#define PVA_API_PRIVATE_H
#include "pva_api.h"
//For legacy support not exposed by public API
#define PVA_CMD_FLAGS_USE_LEGACY_POINTER 0x1
struct pva_fw_vpu_legacy_ptr_symbol {
uint64_t base;
uint32_t offset;
uint32_t size;
};
enum pva_error_inject_codes {
PVA_ERR_INJECT_WDT_HW_ERR, // watchdog Hardware error
PVA_ERR_INJECT_WDT_TIMEOUT, // watchdog Timeout error
PVA_ERR_INJECT_VMEM_CLEAR, // vmem clear
PVA_ERR_INJECT_ASSERT_CHECK, // assert check
PVA_ERR_INJECT_ARMV7_EXCEPTION, // ARMv7 exception
};
struct pva_cmd_run_unit_tests {
#define PVA_CMD_OPCODE_RUN_UNIT_TESTS (PVA_CMD_OPCODE_MAX + 0U)
struct pva_cmd_header header;
#define PVA_FW_UTESTS_MAX_ARGC 16U
uint8_t argc;
uint8_t pad[3];
uint32_t in_resource_id;
uint32_t in_offset;
uint32_t in_size;
uint32_t out_resource_id;
uint32_t out_offset;
uint32_t out_size;
};
struct pva_cmd_err_inject {
#define PVA_CMD_OPCODE_ERR_INJECT (PVA_CMD_OPCODE_MAX + 1U)
struct pva_cmd_header header;
uint32_t err_inject_code; // enum pva_error_inject_codes
};
struct pva_cmd_gr_check {
#define PVA_CMD_OPCODE_GR_CHECK (PVA_CMD_OPCODE_MAX + 2U)
struct pva_cmd_header header;
};
#define PVA_CMD_OPCODE_COUNT (PVA_CMD_OPCODE_MAX + 3U)
#endif // PVA_API_PRIVATE_H