From 63f6f2f15956e7d6b827c9d07071aa2d0fac6f9d Mon Sep 17 00:00:00 2001 From: abhinayaa Date: Thu, 24 Apr 2025 04:43:25 +0000 Subject: [PATCH] pva: mirror from gitlab cv/pva-sys-sw Gitlab commit a307885246be7 ("umd: port intf tests to umd - ...") Changes since last deployment: - umd: port intf tests to umd - add NegativeTest_MaxContextCreate_Single... - Remove VPU auth default macro - kmd: Add a null check to fix static defect - tests: Fix sync unregistration test - fw: Handle DMA error when fetching chunk - umd_tests: add requirements tests - Fix error path of Linux KMD memory API - Add kernel code style check script - address review comments for access flag feat - fix memory leak in access buffer tests - kmd: use correct formatting - fw: update license and use macro - tests: add tests for buffer access type - feat: respect buffer access flags - Update deployment document - Add a default fence wait timeout - Fix error path of KMD sync ops submit - Move recovery state check to pva_kmd_device_busy - Fix error path of profiling level update - Increase max CCQ FIFO depth to 14 - kmd: zero initialize all commands - Make KMD robust against firmware abort - Add multi user stress tests - Conditional VMEM Clear Check - Conditional VMEM Clear Check - Fix static defects in KMD - Reading auth for all the PVA devices - Add support for VPU Device Tree authentication - UMD: Add Null and range checks - Remove assert and propogate error - Add error injection tests - Bug fix - 5207608 - Update CUPVA tests in CI to 2.7.0 rc3 - tests: add register_exec_noop_with_bit_flip - fw: Fix static defects - kmd: freeze PVA before freeing code/data memory - Add missing test_mode parameter for run_test_plan - deploy: allow deploying to different branches - pva kmd: linux: handle test mode dt entry - fw: baremetal: bound breaklink params - coverity: Set expiry for code quality report - kmd: Remove PVA_IS_DEBUG from native timeout calc - Reduce iterations of long duration UMD tests - Fix IOVA leak in Linux KMD - fw:common: fix order of enable/disable virt - umd_test: add missing requirement test specs - umd_test: add test for perf spikes - Fix nsight fence logging - deploy: fix GVS build failure - Add FSP Abort Hook - Execution timeout - Trace fences for NSIGHT - Fix shared buffer handling during suspend/resume - tests: add more tests for resource unregistration - Add MODS test support - KMD:Fix static defect - umd: fix double free in cuextend - umd: Free pva_memory object on free() - Unify VPU and PPE syscall ID - Clean up public API - cuextend: Cleanup implementation - cuextend: Add API to get stream payload - compat: Fix missing flushes of event fences - cuExtend: Unified code path for stream submit - cuExtend: Implementation of cuExtend Stream Submit - cuExtend: Stream submit API definitions - cuExtend: Sync to new cuExtend header - Set test mode default through macro - fw: Add PPE error codes - Use zalloc when allocating resource record - Allocate Temporary Buffers for DMA Config Loading - Fix fast reset failure test - Add DMA config allocator - kmd: Add unsafe API for block allocator - Add missing warning for Linux kernel build - Set err cmd idx to zero if there's no error - ci: Run tests for MODS test mode - Use 1K command buffer chunk size in MODS test mode - Allow developer to provide its own target lease - tests: add nvsci prefence_postfence_test - kmd: Sha calculation static defects fix - kmd: fix INT30-c static defect - Fix command index logging for PVA_FW_EVENT_RUN_VPU - Enable vpucfg_destroy_after_submit - tests: add tests spec for deterministic test - test: add cpu_signaller_pva_waiter_deterministic - tests: add cpu_waiter_pva_signaller_deterministic - Disable verbosity control of FW log - Ops free API should accept NULL ptr - Report TCM usage for t26x as well - Support non-contiguous syncpoints - umd: fix new top 25 CWE - License header update - L2SRAM flush command changes - debugfs: disable tests for broken nodes - debugfs: handle 0 input size for allowlist path - Move pva_kmd_device_idle to context destroy - Refactor interrupt handler binding in PVA KMD - Fix DMA registration error path - debugfs: Add read support for fw log level - Add stress test suites to CI - Fix error path for context init - Add stress test suites - umd: add NULL checks - ci: Perf Test Updates - ci: perf test updates - Enable boot from GSC in L4T GVS - Updating comment Change-Id: I98be7ec270ba5f6fd5bc0978d084d731a88e70b6 Signed-off-by: abhinayaa Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3348508 GVS: buildbot_gerritrpt Reviewed-by: Mohnish Jain --- drivers/video/tegra/host/pva/Makefile | 2 + .../src/fw/baremetal/include/pva-checkpoint.h | 37 +++ .../pva/src/fw/baremetal/include/pva-errors.h | 45 +-- .../include/pva-vpu-syscall-interface.h | 146 -------- .../tegra/host/pva/src/fw/include/pva_fw.h | 163 ++++++++- .../host/pva/src/fw/include/pva_fw_hyp.h | 13 +- .../host/pva/src/fw/include/pva_resource.h | 6 +- .../tegra/host/pva/src/include/pva_api.h | 37 +-- .../host/pva/src/include/pva_api_cmdbuf.h | 203 +++--------- .../tegra/host/pva/src/include/pva_api_cuda.h | 83 +++-- .../tegra/host/pva/src/include/pva_api_dma.h | 71 +--- .../tegra/host/pva/src/include/pva_api_ops.h | 15 +- .../host/pva/src/include/pva_api_types.h | 85 ++--- .../tegra/host/pva/src/include/pva_api_vpu.h | 313 +++++++++++------- .../host/pva/src/kmd/common/pva_kmd_abort.c | 22 +- .../host/pva/src/kmd/common/pva_kmd_abort.h | 4 +- .../src/kmd/common/pva_kmd_block_allocator.c | 43 ++- .../src/kmd/common/pva_kmd_block_allocator.h | 5 + .../host/pva/src/kmd/common/pva_kmd_cmdbuf.h | 39 ++- .../pva/src/kmd/common/pva_kmd_constants.h | 12 +- .../host/pva/src/kmd/common/pva_kmd_context.c | 165 ++++----- .../host/pva/src/kmd/common/pva_kmd_context.h | 3 - .../host/pva/src/kmd/common/pva_kmd_debugfs.c | 299 +++++++++-------- .../host/pva/src/kmd/common/pva_kmd_debugfs.h | 19 +- .../host/pva/src/kmd/common/pva_kmd_device.c | 164 +++++---- .../host/pva/src/kmd/common/pva_kmd_device.h | 38 ++- .../pva/src/kmd/common/pva_kmd_devmem_pool.c | 266 +++++++++++++++ .../pva/src/kmd/common/pva_kmd_devmem_pool.h | 100 ++++++ .../host/pva/src/kmd/common/pva_kmd_dma_cfg.c | 49 +-- .../host/pva/src/kmd/common/pva_kmd_dma_cfg.h | 5 +- .../src/kmd/common/pva_kmd_dma_cfg_binding.c | 11 + .../src/kmd/common/pva_kmd_dma_cfg_validate.c | 41 ++- .../pva/src/kmd/common/pva_kmd_fw_debug.c | 47 +-- .../pva/src/kmd/common/pva_kmd_fw_debug.h | 3 + .../pva/src/kmd/common/pva_kmd_fw_profiler.c | 62 +--- .../host/pva/src/kmd/common/pva_kmd_msg.c | 2 +- .../pva/src/kmd/common/pva_kmd_op_handler.c | 120 +------ .../host/pva/src/kmd/common/pva_kmd_pm.c | 92 ++--- .../host/pva/src/kmd/common/pva_kmd_queue.c | 130 +++----- .../host/pva/src/kmd/common/pva_kmd_queue.h | 4 +- .../src/kmd/common/pva_kmd_resource_table.c | 151 ++++----- .../src/kmd/common/pva_kmd_resource_table.h | 22 +- .../host/pva/src/kmd/common/pva_kmd_sha256.c | 58 ++-- .../src/kmd/common/pva_kmd_shared_buffer.c | 178 ++++------ .../pva/src/kmd/common/pva_kmd_silicon_boot.c | 64 ++-- .../kmd/common/pva_kmd_silicon_executable.c | 8 +- .../pva/src/kmd/common/pva_kmd_silicon_hwpm.c | 8 +- .../pva/src/kmd/common/pva_kmd_silicon_isr.c | 14 +- .../pva/src/kmd/common/pva_kmd_silicon_misc.c | 7 + .../pva/src/kmd/common/pva_kmd_submitter.c | 52 ++- .../pva/src/kmd/common/pva_kmd_submitter.h | 7 + .../pva/src/kmd/common/pva_kmd_tegra_stats.c | 30 +- .../kmd/common/shim/pva_kmd_device_memory.h | 1 + .../kmd/common/shim/pva_kmd_shim_debugfs.h | 5 +- .../src/kmd/common/shim/pva_kmd_shim_init.h | 19 +- .../kmd/common/shim/pva_kmd_shim_silicon.h | 18 +- .../common/shim/pva_kmd_shim_trace_event.h | 4 + .../tegra/host/pva/src/kmd/include/pva_kmd.h | 19 +- .../pva/src/kmd/linux/pva_kmd_linux_debugfs.c | 12 +- .../pva/src/kmd/linux/pva_kmd_linux_device.c | 97 ++---- .../kmd/linux/pva_kmd_linux_device_memory.c | 85 +++-- .../pva/src/kmd/linux/pva_kmd_linux_driver.c | 89 ++++- .../src/kmd/linux/pva_kmd_linux_event_trace.c | 47 ++- .../pva/src/kmd/linux/pva_kmd_linux_isr.c | 44 ++- .../pva/src/kmd/linux/pva_kmd_linux_misc.c | 7 +- .../pva/src/kmd/linux/pva_kmd_linux_smmu.c | 16 +- .../pva/src/libs/pva/include/pva_constants.h | 4 +- .../pva/src/libs/pva/include/pva_math_utils.h | 15 + .../pva/src/private_api/pva_api_private.h | 51 +++ 69 files changed, 2205 insertions(+), 1891 deletions(-) create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_devmem_pool.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_devmem_pool.h create mode 100644 drivers/video/tegra/host/pva/src/private_api/pva_api_private.h diff --git a/drivers/video/tegra/host/pva/Makefile b/drivers/video/tegra/host/pva/Makefile index 5a950acc..88cb7169 100644 --- a/drivers/video/tegra/host/pva/Makefile +++ b/drivers/video/tegra/host/pva/Makefile @@ -22,6 +22,7 @@ pva_objs += \ $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_context.o \ $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_debugfs.o \ $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_device.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_devmem_pool.o \ $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg.o \ $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_binding.o \ $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_validate.o \ @@ -70,6 +71,7 @@ pva_inc_flags += \ -I$(PVA_SYS_ABSDIR)/src/kmd/include \ -I$(PVA_SYS_ABSDIR)/src/kmd/linux/include \ -I$(PVA_SYS_ABSDIR)/src/libs/pva/include \ + -I$(PVA_SYS_ABSDIR)/src/private_api \ pva_def_flags += \ -DPVA_BUILD_MODE=PVA_BUILD_MODE_L4T \ diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-checkpoint.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-checkpoint.h index 869efb25..1d4b9d22 100644 --- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-checkpoint.h +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-checkpoint.h @@ -224,6 +224,11 @@ #define PVA_ABORT_NOC_BIST (0xfcU) //! @endcond +/** + * @brief Minor code for abort in case of FSP abort. + */ +#define PVA_ABORT_FSP 0x42U + /** @} */ /** @@ -299,4 +304,36 @@ #define PVA_ABORT_IRQ_TEST_HOST (0xE002U) #endif /** @} */ + +/** + * @defgroup PVA_ABORT_ARGUMENTS_FSP Argument to pva_abort() from FSP abort + * @ingroup PVA_ABORT_ARGUMENTS + * @{ + */ + +/** + * @brief Minor Code for FSP aborts because of safertos errors + */ +#define PVA_ABORT_FSP_SAFERTOS (0xE001U) + +/** + * @brief Minor Code for FSP aborts because of asserts in fsp + */ +#define PVA_ABORT_FSP_ASSERT (0xE002U) + +/** + * @brief Minor Code for FSP aborts because of exception in fsp + */ +#define PVA_ABORT_FSP_EXCEPTION (0xE003U) + +/** + * @brief Minor Code for FSP aborts because of stack guard failure + */ +#define PVA_ABORT_FSP_STACK (0xE004U) + +/** + * @brief Minor Code for Unknown FSP aborts + */ +#define PVA_ABORT_FSP_UNKNOWN (0xE005U) +/** @} */ #endif diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-errors.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-errors.h index bdc0ab74..b25fde33 100644 --- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-errors.h +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-errors.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ +/* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #ifndef PVA_ERRORS_H #define PVA_ERRORS_H @@ -154,15 +154,6 @@ typedef uint16_t pva_errors_t; */ //! @cond DISABLE_DOCUMENTATION -/** - * @brief Error in case of Floating point NAN. - */ -#define PVA_ERR_PPE_DIVIDE_BY_0 (0x34U) -/** - * @brief Error in case of Floating point NAN. - */ -#define PVA_ERR_PPE_ILLEGAL_DEBUG (0x36U) - #define PVA_ERR_PPE_ILLEGAL_INSTR_ALIGN (0x37U) /** @@ -270,40 +261,6 @@ typedef uint16_t pva_errors_t; * more than HW Seq RAM size. */ #define PVA_ERR_DMA_HWSEQ_PROGRAM_TOO_LONG (0x217U) -/** @} */ - -/** - * @defgroup PVA_MISR_ERRORS - * - * @brief MISR error codes used across PVA. - * @{ - */ -/** - * @brief Error status when DMA MISR test is not run. - */ -#define PVA_ERR_MISR_NOT_RUN (0x280U) -/** - * @brief Error status when DMA MISR test did not complete. - */ -#define PVA_ERR_MISR_NOT_DONE (0x281U) -/** - * @brief Error status when DMA MISR test timed out. - */ -#define PVA_ERR_MISR_TIMEOUT (0x282U) -/** - * @brief Error status in case of DMA MISR test address failure. - */ -#define PVA_ERR_MISR_ADDR (0x283U) -/** - * @brief Error status in case of DMA MISR test data failure. - */ -#define PVA_ERR_MISR_DATA (0x284U) -/** - * @brief Error status in case of DMA MISR test data and address failure. - */ -#define PVA_ERR_MISR_ADDR_DATA (0x285U) -/** @} */ - /** * @defgroup PVA_VPU_ISR_ERRORS * diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-vpu-syscall-interface.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-vpu-syscall-interface.h index 91755f29..27e14cec 100644 --- a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-vpu-syscall-interface.h +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-vpu-syscall-interface.h @@ -6,150 +6,4 @@ #include -/** - * @breif Write syscall parameter will be a pointer to this struct - * @{ - */ -typedef union { - struct { - uint32_t addr; - uint32_t size; - } in; - struct { - uint32_t written_size; - } out; -} pva_fw_pe_syscall_write; -/** @} */ - -/** - * @defgroup PVA_VPU_SYSCALL_PERFMON_SAMPLE_PARAM_GROUP - * - * @brief Parameter specification for syscall perfmon_sample - * - * @{ - */ - -/** - * @brief Perfmon sample syscall parameter will be a pointer to this struct - */ -typedef struct { - /** counter_mask[0] is for ID: 0-31; counter_mask[1] is for ID: 32-63 */ - uint32_t counter_mask[2]; - uint32_t output_addr; -} pva_fw_pe_syscall_perfmon_sample; - -/** - * @brief Index for t26x performance counters for VPU - */ -#define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T26X (0U) -#define PERFMON_COUNTER_ID_VPS_ID_VALID_T26X (1U) -#define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T26X (2U) -#define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T26X (3U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T26X (4U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T26X (5U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T26X (6U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T26X (7U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T26X (8U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T26X (9U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T26X (10U) -#define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T26X (11U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T26X (12U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T26X (13U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T26X (14U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T26X (15U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T26X (16U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T26X (17U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T26X (18U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T26X (19U) -#define PERFMON_COUNTER_ID_VPS_ICACHE_FETCH_REQ_T26X (20U) -#define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_T26X (21U) -#define PERFMON_COUNTER_ID_VPS_ICACHE_PREEMPT_T26X (22U) -#define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_LINES_T26X (23U) -#define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_DUR_T26X (24U) -#define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_DUR_T26X (25U) -#define PERFMON_COUNTER_ID_DLUT_BUSY_T26X (26U) -#define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T26X (27U) -#define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T26X (28U) -#define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T26X (29U) -#define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T26X (30U) -#define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T26X (31U) -#define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T26X (32U) -#define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T26X (33U) -#define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T26X (34U) -#define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T26X (35U) -#define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T26X (36U) - -/** - * @brief Index for t23x performance counters - */ -#define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T23X (0U) -#define PERFMON_COUNTER_ID_VPS_ID_VALID_T23X (1U) -#define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T23X (2U) -#define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T23X (3U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T23X (4U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T23X (5U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T23X (6U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T23X (7U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T23X (8U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T23X (9U) -#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T23X (10U) -#define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T23X (11U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T23X (12U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T23X (13U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T23X (14U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T23X (15U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T23X (16U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T23X (17U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T23X (18U) -#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T23X (19U) -#define PERFMON_COUNTER_ID_ICACHE_FETCH_REQ_T23X (20U) -#define PERFMON_COUNTER_ID_ICACHE_MISS_T23X (21U) -#define PERFMON_COUNTER_ID_ICACHE_PREEMP_T23X (22U) -#define PERFMON_COUNTER_ID_ICACHE_PREFETCH_LINES_T23X (23U) -#define PERFMON_COUNTER_ID_ICACHE_MISS_DUR_T23X (24U) -#define PERFMON_COUNTER_ID_ICACHE_PREFETCH_DUR_T23X (25U) -#define PERFMON_COUNTER_ID_DLUT_BUSY_T23X (26U) -#define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T23X (27U) -#define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T23X (28U) -#define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T23X (29U) -#define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T23X (30U) -#define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T23X (31U) -#define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T23X (32U) -#define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T23X (33U) -#define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T23X (34U) -#define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T23X (35U) -#define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T23X (36U) - -/** - * @brief Index for t26x performance counters for PPE - */ -#define PERFMON_COUNTER_ID_PPS_STALL_ID_NO_VAL_INSTR_T26X (0U) -#define PERFMON_COUNTER_ID_PPS_ID_VALID_T26X (1U) -#define PERFMON_COUNTER_ID_PPS_STALL_ID_REG_DEPEND_T26X (2U) -#define PERFMON_COUNTER_ID_PPS_STALL_ID_ONLY_T26X (3U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX1_ONLY_T26X (4U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_LD_DEPENDENCY_T26X (5U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_ST_DEPENDENCY_T26X (6U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_DEPENDENCY_T26X (7U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STRM_STORE_FLUSH_T26X (8U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_STORE_FLUSH_T26X (9U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STORE_FLUSH_T26X (10U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_LD_T26X (11U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_ST_T26X (12U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_T26X (13U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LD_T26X (14U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_ST_T26X (15U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LDST_T26X (16U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_PUSHBACK_T26X (17U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STQ_PUSHBACK_T26X (18U) -#define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_FLUSH_T26X (19U) -#define PERFMON_COUNTER_ID_PPS_WFE_GPI_EX_STATE_T26X (20U) -#define PERFMON_COUNTER_ID_PPS_ICACHE_FETCH_REQ_T26X (21U) -#define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_T26X (22U) -#define PERFMON_COUNTER_ID_PPS_ICACHE_PREEMPT_T26X (23U) -#define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_LINES_T26X (24U) -#define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_DUR_T26X (25U) -#define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_DUR_T26X (26U) -/** @} */ - #endif /*PVA_VPU_SYSCALL_INTERFACE_H*/ diff --git a/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h b/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h index f59b3b21..2f645bc0 100644 --- a/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h +++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h @@ -11,6 +11,138 @@ /* The sizes of these structs must be explicitly padded to align to 4 bytes */ +#define PVA_CMD_PRIV_OPCODE_FLAG (1U << 7U) + +#define PVA_RESOURCE_ID_BASE 1U +struct pva_resource_entry { + uint8_t access_flags : 2; // 1: RO, 2: WO, 3: RW + uint8_t reserved : 4; +#define PVA_RESOURCE_TYPE_INVALID 0U +#define PVA_RESOURCE_TYPE_DRAM 1U +#define PVA_RESOURCE_TYPE_EXEC_BIN 2U +#define PVA_RESOURCE_TYPE_DMA_CONFIG 3U + uint8_t type : 2; + uint8_t smmu_context_id; + uint8_t addr_hi; + uint8_t size_hi; + uint32_t addr_lo; + uint32_t size_lo; +}; + +struct pva_cmd_init_resource_table { +#define PVA_CMD_OPCODE_INIT_RESOURCE_TABLE (0U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + /**< Resource table id is from 0 to 7, 0 is the device's resource table, + * 1-7 are users'. */ + uint8_t resource_table_id; + uint8_t resource_table_addr_hi; + uint8_t pad[2]; + uint32_t resource_table_addr_lo; + uint32_t max_n_entries; +}; + +struct pva_cmd_deinit_resource_table { +#define PVA_CMD_OPCODE_DEINIT_RESOURCE_TABLE (1U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t resource_table_id; + uint8_t pad[3]; +}; + +struct pva_cmd_update_resource_table { +#define PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE (2U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t resource_table_id; + uint8_t pad[3]; + uint32_t resource_id; + struct pva_resource_entry entry; +}; + +struct pva_cmd_init_queue { +#define PVA_CMD_OPCODE_INIT_QUEUE (3U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t ccq_id; + uint8_t queue_id; + uint8_t queue_addr_hi; + uint8_t syncpt_addr_hi; + uint32_t queue_addr_lo; + uint32_t max_n_submits; + uint32_t syncpt_addr_lo; + uint32_t syncpt_id; +}; + +struct pva_cmd_deinit_queue { +#define PVA_CMD_OPCODE_DEINIT_QUEUE (4U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t ccq_id; + uint8_t queue_id; + uint8_t pad[2]; +}; + +struct pva_cmd_enable_fw_profiling { +#define PVA_CMD_OPCODE_ENABLE_FW_PROFILING (5U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t timestamp_type; + uint8_t pad[3]; + uint32_t filter; +}; + +struct pva_cmd_disable_fw_profiling { +#define PVA_CMD_OPCODE_DISABLE_FW_PROFILING (6U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; +}; + +struct pva_cmd_get_tegra_stats { +#define PVA_CMD_OPCODE_GET_TEGRA_STATS (7U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t buffer_offset_hi; + bool enabled; + uint8_t pad[2]; + uint32_t buffer_resource_id; + uint32_t buffer_size; + uint32_t buffer_offset_lo; +}; + +struct pva_cmd_suspend_fw { +#define PVA_CMD_OPCODE_SUSPEND_FW (8U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; +}; + +struct pva_cmd_resume_fw { +#define PVA_CMD_OPCODE_RESUME_FW (9U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; +}; + +struct pva_cmd_init_shared_dram_buffer { +#define PVA_CMD_OPCODE_INIT_SHARED_DRAM_BUFFER (10U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t interface; + uint8_t buffer_iova_hi; + uint8_t pad[2]; + uint32_t buffer_iova_lo; + uint32_t buffer_size; +}; + +struct pva_cmd_deinit_shared_dram_buffer { +#define PVA_CMD_OPCODE_DEINIT_SHARED_DRAM_BUFFER \ + (11U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t interface; + uint8_t pad[3]; +}; +struct pva_cmd_set_debug_log_level { +#define PVA_CMD_OPCODE_SET_DEBUG_LOG_LEVEL (12U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint32_t log_level; +}; + +struct pva_cmd_set_profiling_level { +#define PVA_CMD_OPCODE_SET_PROFILING_LEVEL (13U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint32_t level; +}; + +#define PVA_CMD_PRIV_OPCODE_COUNT 14U + struct pva_fw_prefence { uint8_t offset_hi; uint8_t pad0[3]; @@ -301,7 +433,8 @@ struct pva_fw_shared_buffer_header { struct pva_kmd_fw_buffer_msg_header { #define PVA_KMD_FW_BUF_MSG_TYPE_FW_EVENT 0 #define PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE 1 -#define PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG 2 +#define PVA_KMD_FW_BUF_MSG_TYPE_FENCE_TRACE 2 +#define PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG 3 uint32_t type : 8; // Size of payload in bytes. Includes the size of the header. uint32_t size : 24; @@ -334,6 +467,27 @@ struct pva_kmd_fw_msg_vpu_trace { uint64_t submit_id; }; +struct pva_kmd_fw_msg_fence_trace { + uint64_t submit_id; + uint64_t timestamp; + // For syncpt fences, fence_id is the syncpt index + // For semaphore fences, fence_id is the serial ID of the semaphore NvRM memory + uint64_t fence_id; + // 'offset' is the offset into the semaphore memory where the value is stored + // This is only valid for semaphore fences + // Note: Trace APIs in KMD only support 32-bit offset + uint32_t offset; + uint32_t value; + uint8_t ccq_id; + uint8_t queue_id; +#define PVA_KMD_FW_BUF_MSG_FENCE_ACTION_WAIT 0U +#define PVA_KMD_FW_BUF_MSG_FENCE_ACTION_SIGNAL 1U + uint8_t action; +#define PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SYNCPT 0U +#define PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SEMAPHORE 1U + uint8_t type; +}; + // Resource unregister message struct pva_kmd_fw_msg_res_unreg { uint32_t resource_id; @@ -345,4 +499,11 @@ struct pva_kmd_fw_tegrastats { uint64_t total_utilization[PVA_NUM_PVE]; }; +#define PVA_MAX_CMDBUF_CHUNK_LEN 1024 +#define PVA_MAX_CMDBUF_CHUNK_SIZE (sizeof(uint32_t) * PVA_MAX_CMDBUF_CHUNK_LEN) + +#define PVA_TEST_MODE_MAX_CMDBUF_CHUNK_LEN 256 +#define PVA_TEST_MODE_MAX_CMDBUF_CHUNK_SIZE \ + (sizeof(uint32_t) * PVA_TEST_MODE_MAX_CMDBUF_CHUNK_LEN) + #endif // PVA_FW_H diff --git a/drivers/video/tegra/host/pva/src/fw/include/pva_fw_hyp.h b/drivers/video/tegra/host/pva/src/fw/include/pva_fw_hyp.h index 5db9072f..02edf3b0 100644 --- a/drivers/video/tegra/host/pva/src/fw/include/pva_fw_hyp.h +++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw_hyp.h @@ -49,7 +49,9 @@ * | 23-21 | Reserved | Reserved for future use | * | 20 | CG DISABLE | To indicate the PVA R5 FW should disable the clock gating feature | * | 19 | VMEM RD WAR DISABLE | To disable the VMEM Read fail workaround feature | - * | 18-16 | Reserved | Reserved for future use | + * | 18 | TEST_MODE_ENABLE | To enter test mode. See Documentation. | + * | 17 | USE_XBAR_RAW | Reserved for future use | + * | 16 | Reserved | Reserved for future use | * * The table below shows the mapping which is sent by FW to KMD * @@ -72,11 +74,20 @@ #define PVA_BOOT_SEMA_CG_DISABLE PVA_BIT(20U) //! @cond DISABLE_DOCUMENTATION +/** Tell firmware to enter test mode */ +#define PVA_BOOT_SEMA_TEST_MODE_ENABLE PVA_BIT(18U) + /** Tell firmware that block linear surfaces are in XBAR_RAW format instead of * TEGRA_RAW format */ #define PVA_BOOT_SEMA_USE_XBAR_RAW PVA_BIT(17U) +/** Tell firmware to enable test mode */ +#define PVA_BOOT_SEMA_TEST_MODE PVA_BIT(16U) + #define PVA_BOOT_SEMA 0U +#define PVA_RO_SYNC_BASE_SEMA 1U +#define PVA_RW_SYNC_BASE_SEMA 2U +#define PVA_RW_SYNC_SIZE_SEMA 3U /** * @brief This macro has the value to be set by KMD in the shared semaphores diff --git a/drivers/video/tegra/host/pva/src/fw/include/pva_resource.h b/drivers/video/tegra/host/pva/src/fw/include/pva_resource.h index e5861e19..3439b278 100644 --- a/drivers/video/tegra/host/pva/src/fw/include/pva_resource.h +++ b/drivers/video/tegra/host/pva/src/fw/include/pva_resource.h @@ -62,8 +62,10 @@ struct pva_fw_dma_slot { * to block linear surface. */ #define PVA_FW_DMA_SLOT_FLAG_CB (1u << 4u) #define PVA_FW_DMA_SLOT_FLAG_BOUND (1u << 5u) - uint8_t flags; - uint8_t pad; +#define PVA_FW_DMA_SLOT_FLAG_MASKED (1u << 6u) +#define PVA_FW_DMA_SLOT_FLAG_ACCESS_LSB 7u +#define PVA_FW_DMA_SLOT_FLAG_ACCESS_MSB 8u + uint16_t flags; /** Bitmask of channels that use this slot */ uint16_t ch_use_mask; diff --git a/drivers/video/tegra/host/pva/src/include/pva_api.h b/drivers/video/tegra/host/pva/src/include/pva_api.h index 7d7d4878..69b9d367 100644 --- a/drivers/video/tegra/host/pva/src/include/pva_api.h +++ b/drivers/video/tegra/host/pva/src/include/pva_api.h @@ -16,6 +16,7 @@ extern "C" { /* Core APIs */ +#define PVA_MAX_NUM_RESOURCES_PER_CONTEXT (16U * 1024U) /** * @brief Create a PVA context. * @@ -37,6 +38,18 @@ enum pva_error pva_context_create(uint32_t pva_index, */ void pva_context_destroy(struct pva_context *ctx); +/** + * @brief Get the value of a context attribute. + * + * @param[in] ctx Pointer to the context. + * @param[in] attr Attribute to get. + * @param[out] out_value Pointer to the value of the attribute. + * @param[size] size of the attribute structure + */ +enum pva_error pva_get_attribute(struct pva_context *ctx, enum pva_attr attr, + void *out_value, uint64_t size); + +#define PVA_MAX_NUM_SUBMISSIONS_PER_QUEUE (8U * 1024U) /** * @brief Create a PVA queue. * @@ -97,7 +110,7 @@ void pva_memory_free(struct pva_memory *mem); * @param[in] ctx Pointer to the context. * @param[in] syncpiont_id Syncpoint ID to wait on. * @param[in] value Value to wait for. - * @param[in] timeout_us Timeout in microseconds. PVA_TIMEOUT_INF for infinite. + * @param[in] timeout_us Timeout in microseconds. PVA_SUBMIT_TIMEOUT_INF for infinite. */ enum pva_error pva_syncpoint_wait(struct pva_context *ctx, uint32_t syncpiont_id, uint32_t value, @@ -109,7 +122,7 @@ enum pva_error pva_syncpoint_wait(struct pva_context *ctx, * @param[in] queue Pointer to the queue. * @param[in] submit_infos Array of submit info structures. * @param[in] count Number of submit info structures. - * @param[in] timeout_us Timeout in microseconds. PVA_TIMEOUT_INF for infinite. + * @param[in] timeout_us Timeout in microseconds. PVA_SUBMIT_TIMEOUT_INF for infinite. * * @note Concurrent submission to the same queue needs to be serialized by the * caller. @@ -206,26 +219,6 @@ enum pva_error pva_memory_import_id_destroy(uint64_t import_id); /** \brief Specifies the PVA system software minor version. */ #define PVA_SYSSW_MINOR_VERSION (7U) -/** - * @brief Get PVA system software version. - * - * PVA system software version is defined as the latest version of cuPVA which is fully supported - * by this version of the PVA system software. - * - * @param[out] version version of currently running system SW, computed as: - (PVA_SYSSW_MAJOR_VERSION * 1000) + PVA_SYSSW_MINOR_VERSION - * @return PVA_SUCCESS on success, else error code indicating the failure. - */ -enum pva_error pva_get_version(uint32_t *version); - -/** - * @brief Get the hardware characteristics of the PVA. - * - * @param[out] pva_hw_char Pointer to the hardware characteristics. - */ -enum pva_error -pva_get_hw_characteristics(struct pva_characteristics *pva_hw_char); - #ifdef __cplusplus } #endif diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h b/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h index 340714df..58f1c81e 100644 --- a/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h +++ b/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h @@ -5,13 +5,9 @@ #define PVA_API_CMDBUF_H #include "pva_api_types.h" -//Maximum number of slots for maintaining Timestamps -#define PVA_MAX_QUERY_SLOTS_COUNT 32U - /** The common header for all commands. */ struct pva_cmd_header { -#define PVA_CMD_PRIV_OPCODE_FLAG (1U << 7U) /** Opcode for the command. MSB of opcode indicates whether this command is * privileged or not */ uint8_t opcode; @@ -35,6 +31,26 @@ struct pva_cmd_header { uint8_t len; }; +struct pva_dma_misr_config { +#define PVA_DMA_FLAG_MISR_ENABLE 1u + uint8_t enabled; + uint8_t reserved; + uint16_t channel_mask; + uint32_t seed_crc0; + uint32_t seed_crc1; + uint32_t ref_addr; + uint32_t ref_data_1; + uint32_t ref_data_2; + uint32_t misr_timeout; +}; + +struct pva_dma_misr { + uint32_t slot_mask_low0; + uint32_t slot_mask_low1; + uint32_t slot_mask_high; + struct pva_dma_misr_config misr_config; +}; + struct pva_user_dma_allowance { #define PVA_USER_DMA_ALLOWANCE_ADB_STEP_SIZE 8 uint32_t channel_idx : 4; @@ -189,11 +205,6 @@ struct pva_cmd_set_vpu_parameter_with_buffer { uint32_t src_dram_offset_lo; }; -/** For set_vpu_parameter_with_address command, set this flag in header.flags to - * indicate that the target symbol is the legacy pointer symbol type: - * pva_fw_vpu_legacy_ptr_symbol, which only supports 32bit offset and 32bit - * size. */ -#define PVA_CMD_FLAGS_USE_LEGACY_POINTER 0x1 /** Copy the address of a DRAM buffer to a VPU variable. The variable must be * laid out exactly according to pva_fw_vpu_ptr_symbol */ @@ -208,7 +219,6 @@ struct pva_cmd_set_vpu_parameter_with_address { }; #define PVA_MAX_DMA_SETS_PER_DMA_ENGINE 4 -#define PVA_DMA_CONFIG_FETCH_BUFFER_PER_DMA_ENGINE 1 /** This command first acquires the TCM scratch and then fetches DMA configuration * into the scratch. The command does not modify DMA @@ -291,17 +301,7 @@ struct pva_cmd_run_ppe { uint32_t entry_point_index; }; -#define PVA_BARRIER_GROUP_0 0U -#define PVA_BARRIER_GROUP_1 1U -#define PVA_BARRIER_GROUP_2 2U -#define PVA_BARRIER_GROUP_3 3U -#define PVA_BARRIER_GROUP_4 4U -#define PVA_BARRIER_GROUP_5 5U -#define PVA_BARRIER_GROUP_6 6U -#define PVA_BARRIER_GROUP_7 7U - #define PVA_MAX_BARRIER_GROUPS 8U - #define PVA_BARRIER_GROUP_INVALID 0xFFU /** @@ -464,29 +464,15 @@ struct pva_cmd_set_vpu_instance_parameter { uint32_t symbol_id; }; -struct pva_cmd_run_unit_tests { -#define PVA_CMD_OPCODE_RUN_UNIT_TESTS 30U +struct pva_cmd_set_vpu_print_buffer { +#define PVA_CMD_OPCODE_SET_VPU_PRINT_BUFFER 30U struct pva_cmd_header header; -#define PVA_FW_UTESTS_MAX_ARGC 16U - uint8_t argc; - uint8_t pad[3]; - uint32_t in_resource_id; - uint32_t in_offset; - uint32_t in_size; - uint32_t out_resource_id; - uint32_t out_offset; - uint32_t out_size; -}; - -struct pva_cmd_set_vpu_print_cb { -#define PVA_CMD_OPCODE_SET_VPU_PRINT_CB 31U - struct pva_cmd_header header; - uint32_t cb_resource_id; - uint32_t cb_offset; + uint32_t resource_id; + uint32_t offset; }; struct pva_cmd_invalidate_l2sram { -#define PVA_CMD_OPCODE_INVALIDATE_L2SRAM 32U +#define PVA_CMD_OPCODE_INVALIDATE_L2SRAM 31U struct pva_cmd_header header; uint8_t dram_offset_hi; uint8_t pad[3]; @@ -496,19 +482,18 @@ struct pva_cmd_invalidate_l2sram { }; struct pva_cmd_flush_l2sram { -#define PVA_CMD_OPCODE_FLUSH_L2SRAM 33U +#define PVA_CMD_OPCODE_FLUSH_L2SRAM 32U struct pva_cmd_header header; + uint8_t dram_offset_hi; + uint8_t pad[3]; + uint32_t dram_resource_id; + uint32_t dram_offset_lo; + uint32_t l2sram_size; struct pva_user_dma_allowance user_dma; }; -struct pva_cmd_err_inject { -#define PVA_CMD_OPCODE_ERR_INJECT 34U - struct pva_cmd_header header; - enum pva_error_inject_codes err_inject_code; -}; - struct pva_cmd_patch_l2sram_offset { -#define PVA_CMD_OPCODE_PATCH_L2SRAM_OFFSET 35U +#define PVA_CMD_OPCODE_PATCH_L2SRAM_OFFSET 33U struct pva_cmd_header header; uint8_t dma_set_id; uint8_t slot_id; @@ -520,130 +505,16 @@ struct pva_cmd_patch_l2sram_offset { * mapped to a new logical barrier group. This allows re-using barrier ids within a command buffer. */ struct pva_cmd_retire_barrier_group { -#define PVA_CMD_OPCODE_RETIRE_BARRIER_GROUP 36U +#define PVA_CMD_OPCODE_RETIRE_BARRIER_GROUP 34U struct pva_cmd_header header; }; -struct pva_cmd_gr_check { -#define PVA_CMD_OPCODE_GR_CHECK 37U +struct pva_cmd_setup_misr { +#define PVA_CMD_OPCODE_SETUP_MISR 35U struct pva_cmd_header header; + struct pva_dma_misr misr_params; }; -#define PVA_CMD_OPCODE_COUNT 38U - -struct pva_cmd_init_resource_table { -#define PVA_CMD_OPCODE_INIT_RESOURCE_TABLE (0U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; - /**< Resource table id is from 0 to 7, 0 is the device's resource table, - * 1-7 are users'. */ - uint8_t resource_table_id; - uint8_t resource_table_addr_hi; - uint8_t pad[2]; - uint32_t resource_table_addr_lo; - uint32_t max_n_entries; -}; - -struct pva_cmd_deinit_resource_table { -#define PVA_CMD_OPCODE_DEINIT_RESOURCE_TABLE (1U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; - uint8_t resource_table_id; - uint8_t pad[3]; -}; - -struct pva_cmd_update_resource_table { -#define PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE (2U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; - uint8_t resource_table_id; - uint8_t pad[3]; - uint32_t resource_id; - struct pva_resource_entry entry; -}; - -struct pva_cmd_init_queue { -#define PVA_CMD_OPCODE_INIT_QUEUE (3U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; - uint8_t ccq_id; - uint8_t queue_id; - uint8_t queue_addr_hi; - uint8_t pad; - uint32_t queue_addr_lo; - uint32_t max_n_submits; -}; - -struct pva_cmd_deinit_queue { -#define PVA_CMD_OPCODE_DEINIT_QUEUE (4U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; - uint8_t ccq_id; - uint8_t queue_id; - uint8_t pad[2]; -}; - -struct pva_cmd_enable_fw_profiling { -#define PVA_CMD_OPCODE_ENABLE_FW_PROFILING (5U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; - uint8_t timestamp_type; - uint8_t pad[3]; - uint32_t filter; -}; - -struct pva_cmd_disable_fw_profiling { -#define PVA_CMD_OPCODE_DISABLE_FW_PROFILING (6U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; -}; - -struct pva_cmd_get_tegra_stats { -#define PVA_CMD_OPCODE_GET_TEGRA_STATS (7U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; - uint8_t buffer_offset_hi; - bool enabled; - uint8_t pad[2]; - uint32_t buffer_resource_id; - uint32_t buffer_size; - uint32_t buffer_offset_lo; -}; - -struct pva_cmd_suspend_fw { -#define PVA_CMD_OPCODE_SUSPEND_FW (8U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; -}; - -struct pva_cmd_resume_fw { -#define PVA_CMD_OPCODE_RESUME_FW (9U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; -}; - -struct pva_cmd_init_shared_dram_buffer { -#define PVA_CMD_OPCODE_INIT_SHARED_DRAM_BUFFER (10U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; - uint8_t interface; - uint8_t buffer_iova_hi; - uint8_t pad[2]; - uint32_t buffer_iova_lo; - uint32_t buffer_size; -}; - -struct pva_cmd_deinit_shared_dram_buffer { -#define PVA_CMD_OPCODE_DEINIT_SHARED_DRAM_BUFFER \ - (11U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; - uint8_t interface; - uint8_t pad[3]; -}; -struct pva_cmd_set_debug_log_level { -#define PVA_CMD_OPCODE_SET_DEBUG_LOG_LEVEL (12U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; - uint32_t log_level; -}; - -struct pva_cmd_set_profiling_level { -#define PVA_CMD_OPCODE_SET_PROFILING_LEVEL (13U | PVA_CMD_PRIV_OPCODE_FLAG) - struct pva_cmd_header header; - uint32_t level; -}; - -#define PVA_CMD_PRIV_OPCODE_COUNT 14U - -#define PVA_MAX_CMDBUF_CHUNK_LEN 1024 -#define PVA_MAX_CMDBUF_CHUNK_SIZE (sizeof(uint32_t) * PVA_MAX_CMDBUF_CHUNK_LEN) +#define PVA_CMD_OPCODE_MAX 36U #endif // PVA_API_CMDBUF_H diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_cuda.h b/drivers/video/tegra/host/pva/src/include/pva_api_cuda.h index 3c4b19eb..74f4ad63 100644 --- a/drivers/video/tegra/host/pva/src/include/pva_api_cuda.h +++ b/drivers/video/tegra/host/pva/src/include/pva_api_cuda.h @@ -11,14 +11,14 @@ extern "C" { #include "cuda.h" #include "pva_api_types.h" -/** - * @brief Structure for cuExtend queue data needed for command submission. - */ -struct pva_cuextend_queue_data { - /*! Holds a pointer to pva queue object */ - struct pva_queue *queue; - /*! Holds engine affinity for command submission*/ - uint32_t affinity; +#define PVA_CUEXTEND_MAX_NUM_PREFENCES 16 +#define PVA_CUEXTEND_MAX_NUM_POSTFENCES 16 + +struct pva_cuextend_submit_events { + struct pva_fence prefences[PVA_CUEXTEND_MAX_NUM_PREFENCES]; + struct pva_fence postfences[PVA_CUEXTEND_MAX_NUM_POSTFENCES]; + uint32_t num_prefences; + uint32_t num_postfences; }; /** @@ -71,27 +71,16 @@ typedef enum pva_error (*pva_cuextend_stream_unregister)(void *callback_args, uint64_t flags); /** - * @brief Function type for cuExtend acquire queue callback. + * @brief Function type for submitting a batch of command buffers via a CUDA stream. * * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization. * @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register. - * @param[out] queue_data Output pointer to a pva_cuextend_queue_data object. - * @return \ref pva_error The completion status of acquire queue operation. + * @param[in] submit_payload Pointer to the submit payload. + * @return \ref pva_error The completion status of the submit operation. */ -typedef enum pva_error (*pva_cuextend_queue_acquire)( - void *callback_args, void *stream_payload, - struct pva_cuextend_queue_data **queue_data); - -/** - * @brief Function type for cuExtend release queue callback. - * - * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization. - * @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register. - * @return \ref pva_error The completion status of release queue operation. - */ -typedef enum pva_error (*pva_cuextend_queue_release)(void *callback_args, - void *stream_payload, - void *queue_data); +typedef enum pva_error (*pva_cuextend_stream_submit)( + void *callback_args, void *stream_payload, void *submit_payload, + struct pva_cuextend_submit_events *submit_events); /** * @brief Function type for retrieving error code from cuExtend. @@ -128,12 +117,10 @@ struct pva_cuextend_callbacks { pva_cuextend_stream_register stream_reg; /*! Holds the unregister stream callback */ pva_cuextend_stream_unregister stream_unreg; - /*! Holds the acquire queue callback */ - pva_cuextend_queue_acquire queue_acquire; - /*! Holds the release queue callback */ - pva_cuextend_queue_release queue_release; /*! Holds the teardown callback */ pva_cuextend_teardown teardown; + /*! Holds the stream submit callback */ + pva_cuextend_stream_submit stream_submit; /*! Pointer to the callback arguments provided by client during cuExtend initialization */ void *args; }; @@ -188,22 +175,32 @@ enum pva_error pva_cuextend_memory_import(struct pva_context *ctx, /** * @brief Submit a batch of command buffers via a CUDA stream. * - * @param[in] queue Pointer to the queue. If queue is not NULL, this API will try to submit the client tasks to this queue directly. - * Otherwise, it will call queue_acquire callback to query a pva_queue object from stream payload, and then submit - * the tasks to the queried queue. - * @param[in] stream A CUDA stream. - * @param[in] submit_infos Array of submit info structures. - * @param[in] count Number of submit info structures. - * @param[in] timeout_ms Timeout in milliseconds. PVA_TIMEOUT_INF for infinite. + * @param[in] ctx Pointer to the PVA context. + * @param[in] cuStream A CUDA stream. + * @param[in] client_stream A client stream. + * @param[in] submit_payload Pointer to the submit payload. * @return \ref pva_error The completion status of the submit operation. - * - * @note Concurrent submission to the same queue needs to be serialized by the - * caller. */ -enum pva_error -pva_cuextend_cmdbuf_batch_submit(struct pva_queue *queue, CUstream stream, - struct pva_cmdbuf_submit_info *submit_infos, - uint32_t count, uint64_t timeout_ms); +enum pva_error pva_cuextend_cmdbuf_batch_submit(struct pva_context *ctx, + CUstream cuStream, + void *client_stream, + void *submit_payload); + +/** + * @brief Get the payload associated with a CUDA stream. + * + * Returns the payload which was associated with the CUDA stream during registration callback. + * + * @param[in] ctx Pointer to the PVA context. + * @param[in] cuStream A CUDA stream. + * @param[out] stream_payload Pointer to the stream payload. + * @return PVA_SUCCESS if the stream payload is successfully retrieved + * PVA_BAD_PARAMETER_ERROR if any of the parameters are NULL + * PVA_CUDA_INIT_FAILED if the cuExtend was not initialized for the context + */ +enum pva_error pva_cuextend_get_stream_payload(struct pva_context *ctx, + CUstream cuStream, + void **stream_payload); #ifdef __cplusplus } diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_dma.h b/drivers/video/tegra/host/pva/src/include/pva_api_dma.h index 18b4864a..23d353e4 100644 --- a/drivers/video/tegra/host/pva/src/include/pva_api_dma.h +++ b/drivers/video/tegra/host/pva/src/include/pva_api_dma.h @@ -24,73 +24,8 @@ enum pva_gpio_bit { GPIO_WRITE6_BIT = 29U }; -enum pva_dma_descriptor_id { - PVA_DMA_DESC_NONE = 0, - PVA_DMA_DESC0 = 1, - PVA_DMA_DESC1 = 2, - PVA_DMA_DESC2 = 3, - PVA_DMA_DESC3 = 4, - PVA_DMA_DESC4 = 5, - PVA_DMA_DESC5 = 6, - PVA_DMA_DESC6 = 7, - PVA_DMA_DESC7 = 8, - PVA_DMA_DESC8 = 9, - PVA_DMA_DESC9 = 10, - PVA_DMA_DESC10 = 11, - PVA_DMA_DESC11 = 12, - PVA_DMA_DESC12 = 13, - PVA_DMA_DESC13 = 14, - PVA_DMA_DESC14 = 15, - PVA_DMA_DESC15 = 16, - PVA_DMA_DESC16 = 17, - PVA_DMA_DESC17 = 18, - PVA_DMA_DESC18 = 19, - PVA_DMA_DESC19 = 20, - PVA_DMA_DESC20 = 21, - PVA_DMA_DESC21 = 22, - PVA_DMA_DESC22 = 23, - PVA_DMA_DESC23 = 24, - PVA_DMA_DESC24 = 25, - PVA_DMA_DESC25 = 26, - PVA_DMA_DESC26 = 27, - PVA_DMA_DESC27 = 28, - PVA_DMA_DESC28 = 29, - PVA_DMA_DESC29 = 30, - PVA_DMA_DESC30 = 31, - PVA_DMA_DESC31 = 32, - PVA_DMA_DESC32 = 33, - PVA_DMA_DESC33 = 34, - PVA_DMA_DESC34 = 35, - PVA_DMA_DESC35 = 36, - PVA_DMA_DESC36 = 37, - PVA_DMA_DESC37 = 38, - PVA_DMA_DESC38 = 39, - PVA_DMA_DESC39 = 40, - PVA_DMA_DESC40 = 41, - PVA_DMA_DESC41 = 42, - PVA_DMA_DESC42 = 43, - PVA_DMA_DESC43 = 44, - PVA_DMA_DESC44 = 45, - PVA_DMA_DESC45 = 46, - PVA_DMA_DESC46 = 47, - PVA_DMA_DESC47 = 48, - PVA_DMA_DESC48 = 49, - PVA_DMA_DESC49 = 50, - PVA_DMA_DESC50 = 51, - PVA_DMA_DESC51 = 52, - PVA_DMA_DESC52 = 53, - PVA_DMA_DESC53 = 54, - PVA_DMA_DESC54 = 55, - PVA_DMA_DESC55 = 56, - PVA_DMA_DESC56 = 57, - PVA_DMA_DESC57 = 58, - PVA_DMA_DESC58 = 59, - PVA_DMA_DESC59 = 60, - PVA_DMA_DESC60 = 61, - PVA_DMA_DESC61 = 62, - PVA_DMA_DESC62 = 63, - PVA_DMA_DESC63 = 64 -}; +#define PVA_DMA_DESC_ID_NULL 0 +#define PVA_DMA_DESC_ID_BASE 1 /** * The values of the enum members conform to the definitions of DMA descriptors' @@ -266,8 +201,6 @@ struct pva_dma_config_header { * means that every allocation of descriptors will start at an alignment of 4. The following * macros control the alignment/grouping requirement of DMA resources. */ -// TODO: Add compile time asserts to ensure the following alignment requirments don't result -// in fractional resource partitions? #define PVA_DMA_CHANNEL_ALIGNMENT 1 #define PVA_DMA_DESCRIPTOR_ALIGNMENT 4 #define PVA_DMA_ADB_ALIGNMENT 16 diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_ops.h b/drivers/video/tegra/host/pva/src/include/pva_api_ops.h index 217e0f8c..8bc5f9fd 100644 --- a/drivers/video/tegra/host/pva/src/include/pva_api_ops.h +++ b/drivers/video/tegra/host/pva/src/include/pva_api_ops.h @@ -11,7 +11,7 @@ */ struct pva_ops_memory { uint32_t handle; /**< Memory handle */ - uint32_t size; /**< Size of memory */ + uint64_t size; /**< Size of memory */ void *va; /**< Virtual address */ }; @@ -27,8 +27,8 @@ struct pva_ops_memory { */ struct pva_ops_buffer { struct pva_ops_memory *memory; /**< Pointer to buffer memory */ - uint32_t start_offset; /**< Start offset in buffer memory */ - uint32_t end_offset; /**< End offset (exclusive) in buffer memory */ + uint64_t start_offset; /**< Start offset in buffer memory */ + uint64_t end_offset; /**< End offset (exclusive) in buffer memory */ }; /** @@ -45,9 +45,9 @@ struct pva_ops_buffer { * @brief Header structure for PVA operations. */ struct pva_ops_header { - uint32_t opcode; /**< Operation code identifying the operation type */ + uint64_t opcode; /**< Operation code identifying the operation type */ /** Size of the operation in bytes. This size must be a multiple of 8 bytes. */ - uint32_t size; + uint64_t size; }; /** @@ -56,8 +56,7 @@ struct pva_ops_header { struct pva_ops_executable_register { #define PVA_OPS_OPCODE_EXECUTABLE_REGISTER 1U struct pva_ops_header header; /**< Operation header */ - uint32_t exec_size; /**< Size of executable data */ - uint32_t pad; /**< Padding for 8 bytes alignment */ + uint64_t exec_size; /**< Size of executable data */ //followed by executable data }; @@ -144,7 +143,7 @@ enum pva_error pva_ops_parse_unregister_resp(struct pva_ops_buffer *resp_buf); * * @return PVA_SUCCESS on success, appropriate error code otherwise. */ -enum pva_error pva_ops_memory_alloc(struct pva_context *ctx, uint32_t size, +enum pva_error pva_ops_memory_alloc(struct pva_context *ctx, uint64_t size, struct pva_ops_memory *ops_buf); /** diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_types.h b/drivers/video/tegra/host/pva/src/include/pva_api_types.h index 8d7522d0..4f496698 100644 --- a/drivers/video/tegra/host/pva/src/include/pva_api_types.h +++ b/drivers/video/tegra/host/pva/src/include/pva_api_types.h @@ -118,6 +118,16 @@ ACT(PVA_ERR_MATH_OP) \ ACT(PVA_ERR_HWSEQ_INVALID) \ ACT(PVA_ERR_FW_ABORTED) \ + ACT(PVA_ERR_PPE_DIVIDE_BY_0) \ + ACT(PVA_ERR_PPE_FP_NAN) \ + ACT(PVA_ERR_INVALID_ACCESS_MODE_COMBINATION) \ + ACT(PVA_ERR_CMD_TCM_BUF_OUT_OF_RANGE) \ + ACT(PVA_ERR_MISR_NOT_RUN) \ + ACT(PVA_ERR_MISR_DATA) \ + ACT(PVA_ERR_MISR_ADDR) \ + ACT(PVA_ERR_MISR_NOT_DONE) \ + ACT(PVA_ERR_MISR_ADDR_DATA) \ + ACT(PVA_ERR_MISR_TIMEOUT) \ ACT(PVA_ERR_CODE_COUNT) enum pva_error { @@ -207,12 +217,6 @@ struct pva_fw_vpu_ptr_symbol { uint64_t size; }; -struct pva_fw_vpu_legacy_ptr_symbol { - uint64_t base; - uint32_t offset; - uint32_t size; -}; - enum pva_surface_format { PVA_SURF_FMT_PITCH_LINEAR = 0, PVA_SURF_FMT_BLOCK_LINEAR @@ -243,25 +247,6 @@ enum pva_symbol_type { PVA_SYM_TYPE_MAX, }; -/** - * \brief Holds PVA Sync Client Type. - * Currently NvSciSync supports NvSciSyncFences with syncpoint primitive type only. - */ -enum pva_sync_client_type { - /*! For a given SyncObj PVA acts as a signaler. This type corresponds to - * postfences from PVA. */ - PVA_SYNC_CLIENT_TYPE_SIGNALER, - /*! For a given SyncObj PVA acts as a waiter. This type corresponds to - * prefences to PVA. */ - PVA_SYNC_CLIENT_TYPE_WAITER, - /*! For a given SyncObj PVA acts as both signaler and waiter. */ - PVA_SYNC_CLIENT_TYPE_SIGNALER_WAITER, - /*! Specifies the non inclusive upper bound of valid values. */ - PVA_SYNC_CLIENT_TYPE_MAX, - /*! Reserved bound of valid values. */ - PVA_SYNC_CLIENT_TYPE_RESERVED = 0x7FFFFFFF, -}; - #define PVA_SYMBOL_ID_INVALID 0U #define PVA_SYMBOL_ID_BASE 1U #define PVA_MAX_SYMBOL_NAME_LEN 64U @@ -275,19 +260,6 @@ struct pva_symbol_info { }; #define PVA_RESOURCE_ID_INVALID 0U -#define PVA_RESOURCE_ID_BASE 1U -struct pva_resource_entry { -#define PVA_RESOURCE_TYPE_INVALID 0U -#define PVA_RESOURCE_TYPE_DRAM 1U -#define PVA_RESOURCE_TYPE_EXEC_BIN 2U -#define PVA_RESOURCE_TYPE_DMA_CONFIG 3U - uint8_t type; - uint8_t smmu_context_id; - uint8_t addr_hi; - uint8_t size_hi; - uint32_t addr_lo; - uint32_t size_lo; -}; /** \brief Maximum number of queues per context */ #define PVA_MAX_QUEUES_PER_CONTEXT (8) @@ -300,7 +272,8 @@ struct pva_resource_entry { #define PVA_ACCESS_RW \ (PVA_ACCESS_RO | PVA_ACCESS_WO) /**< Read and write access */ -#define PVA_TIMEOUT_INF UINT64_MAX /**< Infinite timeout */ +// unify timeout to uint64_t, in microseconds +#define PVA_SUBMIT_TIMEOUT_INF UINT64_MAX /**< Infinite timeout */ #define PVA_MAX_NUM_INPUT_STATUS 2 /**< Maximum number of input statuses */ #define PVA_MAX_NUM_OUTPUT_STATUS 2 /**< Maximum number of output statuses */ @@ -329,8 +302,9 @@ struct pva_cmdbuf_submit_info { uint64_t submit_id; /** Offset of the first chunk within the resource */ uint64_t first_chunk_offset; -#define PVA_EXEC_TIMEOUT_REUSE 0xFFFFFFFFU -#define PVA_EXEC_TIMEOUT_INF 0U +/** Execution timeout is in ms */ +#define PVA_EXEC_TIMEOUT_INF UINT32_MAX +#define PVA_EXEC_TIMEOUT_REUSE (UINT32_MAX - 1) /** Execution Timeout */ uint32_t execution_timeout_ms; struct pva_fence prefences[PVA_MAX_NUM_PREFENCES]; @@ -351,13 +325,13 @@ struct pva_cmdbuf_status { uint16_t status; }; -/** \brief Holds the PVA capabilities. */ +/** @brief Holds the PVA capabilities. */ struct pva_characteristics { - /*! Holds the number of PVA engines. */ + /** Holds the number of PVA engines. */ uint32_t pva_engine_count; - /*! Holds the number of VPUs per PVA engine. */ + /** Holds the number of VPUs per PVA engine. */ uint32_t pva_pve_count; - /*! Holds the PVA generation information */ + /** Holds the PVA generation information */ enum pva_hw_gen hw_version; uint16_t max_desc_count; uint16_t max_ch_count; @@ -370,11 +344,6 @@ struct pva_characteristics { uint16_t reserved_adb_count; }; -enum pva_error_inject_codes { - PVA_ERR_INJECT_WDT_HW_ERR, // watchdog Hardware error - PVA_ERR_INJECT_WDT_TIMEOUT, // watchdog Timeout error -}; - /* * !!!! DO NOT MODIFY !!!!!! * These values are defined as per DriveOS guidelines @@ -382,4 +351,20 @@ enum pva_error_inject_codes { #define PVA_INPUT_STATUS_SUCCESS (0) #define PVA_INPUT_STATUS_INVALID (0xFFFF) +/** + * @brief Context attribute keys. + */ +enum pva_attr { + PVA_CONTEXT_ATTR_MAX_CMDBUF_CHUNK_SIZE, + PVA_ATTR_HW_CHARACTERISTICS, + PVA_ATTR_VERSION +}; + +/** + * @brief Maximum size of a command buffer chunk. + */ +struct pva_ctx_attr_max_cmdbuf_chunk_size { + uint16_t max_size; +}; + #endif // PVA_API_TYPES_H diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_vpu.h b/drivers/video/tegra/host/pva/src/include/pva_api_vpu.h index 073de57d..ce5389bf 100644 --- a/drivers/video/tegra/host/pva/src/include/pva_api_vpu.h +++ b/drivers/video/tegra/host/pva/src/include/pva_api_vpu.h @@ -23,113 +23,10 @@ struct pva_vpu_instance_data { }; /** - * @defgroup PVA_VPU_SYSCALL - * - * @brief PVA VPU SYS call IDs for each type of - * SYS call. - * @{ - */ - -//! @cond DISABLE_DOCUMENTATION - -/** - * @brief VPU Syscall id for vpu printf write. - */ -#define PVA_FW_PE_SYSCALL_ID_WRITE (1U) -//! @endcond -/** - * @brief VPU Syscall id for Icache prefetch. - */ -#define PVA_FW_PE_SYSCALL_ID_ICACHE_PREFETCH (2U) - -/** - * @brief VPU Syscall id for masking exceptions. - */ -#define PVA_FW_PE_SYSCALL_ID_MASK_EXCEPTION (3U) - -/** - * @brief VPU Syscall id for unmasking exceptions. - */ -#define PVA_FW_PE_SYSCALL_ID_UNMASK_EXCEPTION (4U) -//! @cond DISABLE_DOCUMENTATION -/** - * @brief VPU Syscall id for sampling VPU performance counters - */ -#define PVA_FW_PE_SYSCALL_ID_PERFMON_SAMPLE (5U) -//! @endcond -/** @} */ - -/** - * @defgroup PVA_PPE_SYSCALL - * - * @brief PVA PPE SYS call IDs for each type of - * SYS call. - * @{ - */ - -//! @cond DISABLE_DOCUMENTATION - -/** - * @brief PPE Syscall id for ppe printf write. - */ -#define PVA_FW_PPE_SYSCALL_ID_WRITE (1U) - -/** - * @brief PPE Syscall id for masking exceptions. - */ -#define PVA_FW_PPE_SYSCALL_ID_MASK_EXCEPTION (2U) - -/** - * @brief PPE Syscall id for unmasking exceptions. - */ -#define PVA_FW_PPE_SYSCALL_ID_UNMASK_EXCEPTION (3U) - -/** - * @brief VPU Syscall id for sampling VPU performance counters - */ -#define PVA_FW_PPE_SYSCALL_ID_PERFMON_SAMPLE (4U) -/** - * @brief PPE Syscall id for Icache prefetch. - */ -#define PVA_FW_PPE_SYSCALL_ID_ICACHE_PREFETCH (5U) - -//! @endcond -/** @} */ - -/** - * @brief Lookup table to convert PPE syscall IDs to VPU syscall IDs - * Index is PPE syscall ID, value is corresponding VPU syscall ID - */ -#define PVA_FW_PPE_TO_VPU_SYSCALL_LUT \ - { \ - 0U, /* Index 0: Invalid */ \ - PVA_FW_PE_SYSCALL_ID_WRITE, /* Index 1: Write */ \ - PVA_FW_PE_SYSCALL_ID_MASK_EXCEPTION, /* Index 2: Mask Exception */ \ - PVA_FW_PE_SYSCALL_ID_UNMASK_EXCEPTION, /* Index 3: Unmask Exception */ \ - PVA_FW_PE_SYSCALL_ID_PERFMON_SAMPLE, /* Index 4: Perfmon Sample */ \ - PVA_FW_PE_SYSCALL_ID_ICACHE_PREFETCH /* Index 5: ICache Prefetch */ \ - } - -/** - * @brief Maximum valid PPE syscall ID - */ -#define PVA_FW_PPE_SYSCALL_ID_MAX PVA_FW_PPE_SYSCALL_ID_ICACHE_PREFETCH - -/** - * @defgroup PVA_VPU_SYSCALL_WRITE_PARAM_GROUP - * - * @brief Parameter specification for syscall write - */ - -/** - * @defgroup PVA_VPU_SYSCALL_COMMAND_FIELDS_GROUP - * - * @brief The command format to be used while issuing vpu syscall command from VPU kernel to R5. - * The fields mentioned in this group is used for submitting the command - * through the Signal_R5 interface from VPU kernel. - * - * @{ + * @brief Used to store VPU Syscall IDs, that represent the + * vpu syscall id between FW and VPU kernel. */ +typedef uint32_t pva_vpu_syscall_id_t; /** * @brief The most significant bit of the vpu syscall ID field in @@ -154,17 +51,56 @@ struct pva_vpu_instance_data { * the vpu syscall command interface */ #define PVA_FW_PE_SYSCALL_PARAM_LSB (0U) -/** @} */ /** - * @defgroup PVA_VPU_SYSCALL_ICACHE_PREFETCH_PARAM_FIELDS_GROUP - * - * @brief The parameter format to be used while issuing vpu syscall command from VPU kernel to R5 for syscall icache prefetch. - * The fields mentioned in this group is used for submitting the icache prefetch command - * through the Signal_R5 interface from VPU kernel. - * - * @{ + * @brief VPU Syscall id for vpu printf write. */ +#define PVA_FW_PE_SYSCALL_ID_WRITE (1U) + +/** + * @brief VPU Syscall id for Icache prefetch. + */ +#define PVA_FW_PE_SYSCALL_ID_ICACHE_PREFETCH (2U) + +/** + * @brief VPU Syscall id for masking exceptions. + */ +#define PVA_FW_PE_SYSCALL_ID_MASK_EXCEPTION (3U) + +/** + * @brief VPU Syscall id for unmasking exceptions. + */ +#define PVA_FW_PE_SYSCALL_ID_UNMASK_EXCEPTION (4U) + +/** + * @brief VPU Syscall id for sampling VPU performance counters + */ +#define PVA_FW_PE_SYSCALL_ID_PERFMON_SAMPLE (5U) + +/** + * @brief PPE Syscall id for ppe printf write. + */ +#define PVA_FW_PPE_SYSCALL_ID_WRITE (1U) + +/** + * @brief PPE Syscall id for Icache prefetch. + */ +#define PVA_FW_PPE_SYSCALL_ID_ICACHE_PREFETCH (2U) + +/** + * @brief PPE Syscall id for masking exceptions. + */ +#define PVA_FW_PPE_SYSCALL_ID_MASK_EXCEPTION (3U) + +/** + * @brief PPE Syscall id for unmasking exceptions. + */ +#define PVA_FW_PPE_SYSCALL_ID_UNMASK_EXCEPTION (4U) + +/** + * @brief PPE Syscall id for sampling PPE performance counters + */ +#define PVA_FW_PPE_SYSCALL_ID_PERFMON_SAMPLE (5U) /** * @brief The most significant bit of the prefetch cache line count field in @@ -189,23 +125,146 @@ struct pva_vpu_instance_data { * the vpu syscall command interface */ #define PVA_FW_PE_SYSCALL_PREFETCH_ADDR_LSB (0U) -/** @} */ - -/** - * @defgroup PVA_VPU_SYSCALL_MASK_UNMASK_PARAM_FIELDS_GROUP - * - * @brief The parameter format to be used while issuing vpu syscall command from VPU kernel - * to R5 for masking or unmasking FP NaN Exception. - * The fields mentioned in this group is used for submitting the mask and unmask FP NaN eception command - * through the Signal_R5 interface from VPU kernel. - * - * @{ - */ /** * @brief Parameter specification for syscall mask/unmask exceptions */ +#define PVA_FW_PE_MASK_DIV_BY_0 (1U << 1U) #define PVA_FW_PE_MASK_FP_INV_NAN (1U << 2U) -/** @} */ + +/** + * @breif Write syscall parameter will be a pointer to this struct + */ +union pva_fw_pe_syscall_write { + struct { + uint32_t addr; + uint32_t size; + } in; + struct { + uint32_t written_size; + } out; +}; + +/** + * @brief Perfmon sample syscall parameter will be a pointer to this struct + */ +struct pva_fw_pe_syscall_perfmon_sample { + /** counter_mask[0] is for ID: 0-31; counter_mask[1] is for ID: 32-63 */ + uint32_t counter_mask[2]; + uint32_t output_addr; +}; + +/** + * @brief Index for t26x performance counters for VPU + */ +#define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T26X (0U) +#define PERFMON_COUNTER_ID_VPS_ID_VALID_T26X (1U) +#define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T26X (2U) +#define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T26X (3U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T26X (4U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T26X (5U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T26X (6U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T26X (7U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T26X (8U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T26X (9U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T26X (10U) +#define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T26X (11U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T26X (12U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T26X (13U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T26X (14U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T26X (15U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T26X (16U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T26X (17U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T26X (18U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T26X (19U) +#define PERFMON_COUNTER_ID_VPS_ICACHE_FETCH_REQ_T26X (20U) +#define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_T26X (21U) +#define PERFMON_COUNTER_ID_VPS_ICACHE_PREEMPT_T26X (22U) +#define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_LINES_T26X (23U) +#define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_DUR_T26X (24U) +#define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_DUR_T26X (25U) +#define PERFMON_COUNTER_ID_DLUT_BUSY_T26X (26U) +#define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T26X (27U) +#define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T26X (28U) +#define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T26X (29U) +#define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T26X (30U) +#define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T26X (31U) +#define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T26X (32U) +#define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T26X (33U) +#define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T26X (34U) +#define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T26X (35U) +#define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T26X (36U) + +/** + * @brief Index for t23x performance counters + */ +#define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T23X (0U) +#define PERFMON_COUNTER_ID_VPS_ID_VALID_T23X (1U) +#define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T23X (2U) +#define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T23X (3U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T23X (4U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T23X (5U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T23X (6U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T23X (7U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T23X (8U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T23X (9U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T23X (10U) +#define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T23X (11U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T23X (12U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T23X (13U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T23X (14U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T23X (15U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T23X (16U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T23X (17U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T23X (18U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T23X (19U) +#define PERFMON_COUNTER_ID_ICACHE_FETCH_REQ_T23X (20U) +#define PERFMON_COUNTER_ID_ICACHE_MISS_T23X (21U) +#define PERFMON_COUNTER_ID_ICACHE_PREEMP_T23X (22U) +#define PERFMON_COUNTER_ID_ICACHE_PREFETCH_LINES_T23X (23U) +#define PERFMON_COUNTER_ID_ICACHE_MISS_DUR_T23X (24U) +#define PERFMON_COUNTER_ID_ICACHE_PREFETCH_DUR_T23X (25U) +#define PERFMON_COUNTER_ID_DLUT_BUSY_T23X (26U) +#define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T23X (27U) +#define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T23X (28U) +#define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T23X (29U) +#define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T23X (30U) +#define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T23X (31U) +#define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T23X (32U) +#define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T23X (33U) +#define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T23X (34U) +#define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T23X (35U) +#define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T23X (36U) + +/** + * @brief Index for t26x performance counters for PPE + */ +#define PERFMON_COUNTER_ID_PPS_STALL_ID_NO_VAL_INSTR_T26X (0U) +#define PERFMON_COUNTER_ID_PPS_ID_VALID_T26X (1U) +#define PERFMON_COUNTER_ID_PPS_STALL_ID_REG_DEPEND_T26X (2U) +#define PERFMON_COUNTER_ID_PPS_STALL_ID_ONLY_T26X (3U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX1_ONLY_T26X (4U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_LD_DEPENDENCY_T26X (5U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_ST_DEPENDENCY_T26X (6U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_DEPENDENCY_T26X (7U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STRM_STORE_FLUSH_T26X (8U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_STORE_FLUSH_T26X (9U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STORE_FLUSH_T26X (10U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_LD_T26X (11U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_ST_T26X (12U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_T26X (13U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LD_T26X (14U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_ST_T26X (15U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LDST_T26X (16U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_PUSHBACK_T26X (17U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STQ_PUSHBACK_T26X (18U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_FLUSH_T26X (19U) +#define PERFMON_COUNTER_ID_PPS_WFE_GPI_EX_STATE_T26X (20U) +#define PERFMON_COUNTER_ID_PPS_ICACHE_FETCH_REQ_T26X (21U) +#define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_T26X (22U) +#define PERFMON_COUNTER_ID_PPS_ICACHE_PREEMPT_T26X (23U) +#define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_LINES_T26X (24U) +#define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_DUR_T26X (25U) +#define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_DUR_T26X (26U) #endif // PVA_API_VPU_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_abort.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_abort.c index 5344b899..d78a1ba4 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_abort.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_abort.c @@ -2,17 +2,17 @@ // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. #include "pva_kmd_abort.h" -#include "pva_kmd_shim_init.h" +#include "pva_kmd_device.h" +#include "pva_kmd_regs.h" +#include "pva_kmd_silicon_utils.h" -void pva_kmd_abort(struct pva_kmd_device *pva) +void pva_kmd_abort_fw(struct pva_kmd_device *pva) { - //TODO: Report to FSI first about the SW error code. - pva_kmd_log_err("Abort: FW Reset Assert"); - /* Put the FW in reset ASSERT so the user space - cannot access the CCQ and thus force them to - destroy the contexts. On destroy all the contexts. - KMD poweroff the FW whereas on first new contexts creation, - KMD will load the firmware image & poweron device */ - pva_kmd_fw_reset_assert(pva); + // HW watchdog may fire repeatedly if PVA is hung. Therefore, disable all + // interrupts to protect KMD from potential interrupt floods. + pva_kmd_disable_all_interrupts_nosync(pva); + + // We will handle firmware reboot after all contexts are closed and a new + // one is re-opened again pva->recovery = true; -} \ No newline at end of file +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_abort.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_abort.h index 98ebb7cf..1881b959 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_abort.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_abort.h @@ -5,6 +5,6 @@ #include "pva_kmd_device.h" #include "pva_kmd_utils.h" -void pva_kmd_abort(struct pva_kmd_device *pva); +void pva_kmd_abort_fw(struct pva_kmd_device *pva); -#endif //PVA_KMD_ABORT_H \ No newline at end of file +#endif //PVA_KMD_ABORT_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.c index 2f9c5bde..43306f67 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.c @@ -53,13 +53,12 @@ static inline uint32_t next_slot(struct pva_kmd_block_allocator *allocator, return *next; } -void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator, - uint32_t *out_id) +void *pva_kmd_alloc_block_unsafe(struct pva_kmd_block_allocator *allocator, + uint32_t *out_id) { void *block = NULL; uint32_t slot = INVALID_ID; - pva_kmd_mutex_lock(&allocator->allocator_lock); if (allocator->free_slot_head != INVALID_ID) { slot = allocator->free_slot_head; allocator->free_slot_head = @@ -69,18 +68,24 @@ void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator, slot = allocator->next_free_slot; allocator->next_free_slot++; } else { - goto unlock; + return NULL; } } allocator->slot_in_use[slot] = true; - pva_kmd_mutex_unlock(&allocator->allocator_lock); - *out_id = slot + allocator->base_id; block = get_block(allocator, slot); return block; -unlock: +} + +void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator, + uint32_t *out_id) +{ + void *block = NULL; + + pva_kmd_mutex_lock(&allocator->allocator_lock); + block = pva_kmd_alloc_block_unsafe(allocator, out_id); pva_kmd_mutex_unlock(&allocator->allocator_lock); - return NULL; + return block; } static bool is_slot_valid(struct pva_kmd_block_allocator *allocator, @@ -103,16 +108,15 @@ void *pva_kmd_get_block_unsafe(struct pva_kmd_block_allocator *allocator, return get_block(allocator, slot); } -enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator, - uint32_t id) +enum pva_error +pva_kmd_free_block_unsafe(struct pva_kmd_block_allocator *allocator, + uint32_t id) { uint32_t slot = id - allocator->base_id; uint32_t *next; - enum pva_error err = PVA_SUCCESS; - pva_kmd_mutex_lock(&allocator->allocator_lock); + if (!is_slot_valid(allocator, slot)) { - err = PVA_INVAL; - goto unlock; + return PVA_INVAL; } allocator->slot_in_use[slot] = false; @@ -120,7 +124,16 @@ enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator, *next = allocator->free_slot_head; allocator->free_slot_head = slot; -unlock: + return PVA_SUCCESS; +} + +enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator, + uint32_t id) +{ + enum pva_error err = PVA_SUCCESS; + + pva_kmd_mutex_lock(&allocator->allocator_lock); + err = pva_kmd_free_block_unsafe(allocator, id); pva_kmd_mutex_unlock(&allocator->allocator_lock); return err; } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.h index 16ca93e2..1343a546 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.h @@ -24,6 +24,8 @@ pva_kmd_block_allocator_init(struct pva_kmd_block_allocator *allocator, void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator, uint32_t *out_id); +void *pva_kmd_alloc_block_unsafe(struct pva_kmd_block_allocator *allocator, + uint32_t *out_id); static inline void * pva_kmd_zalloc_block(struct pva_kmd_block_allocator *allocator, uint32_t *out_id) @@ -47,6 +49,9 @@ void *pva_kmd_get_block_unsafe(struct pva_kmd_block_allocator *allocator, uint32_t id); enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator, uint32_t id); +enum pva_error +pva_kmd_free_block_unsafe(struct pva_kmd_block_allocator *allocator, + uint32_t id); void pva_kmd_block_allocator_deinit(struct pva_kmd_block_allocator *allocator); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h index 2e8d3d59..91963639 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h @@ -143,6 +143,7 @@ static inline void pva_kmd_set_cmd_init_resource_table( struct pva_cmd_init_resource_table *cmd, uint8_t resource_table_id, uint64_t iova_addr, uint32_t max_num_entries) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_INIT_RESOURCE_TABLE; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->resource_table_id = resource_table_id; @@ -155,6 +156,7 @@ static inline void pva_kmd_set_cmd_deinit_resource_table(struct pva_cmd_deinit_resource_table *cmd, uint8_t resource_table_id) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_DEINIT_RESOURCE_TABLE; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->resource_table_id = resource_table_id; @@ -162,22 +164,29 @@ pva_kmd_set_cmd_deinit_resource_table(struct pva_cmd_deinit_resource_table *cmd, static inline void pva_kmd_set_cmd_init_queue(struct pva_cmd_init_queue *cmd, uint8_t ccq_id, uint8_t queue_id, - uint64_t iova_addr, - uint32_t max_num_submit) + uint64_t queue_addr, + uint32_t max_num_submit, + uint32_t syncpt_id, + uint64_t syncpt_addr) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_INIT_QUEUE; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->ccq_id = ccq_id; cmd->queue_id = queue_id; - cmd->queue_addr_lo = iova_lo(iova_addr); - cmd->queue_addr_hi = iova_hi(iova_addr); + cmd->queue_addr_lo = iova_lo(queue_addr); + cmd->queue_addr_hi = iova_hi(queue_addr); cmd->max_n_submits = max_num_submit; + cmd->syncpt_id = syncpt_id; + cmd->syncpt_addr_lo = iova_lo(syncpt_addr); + cmd->syncpt_addr_hi = iova_hi(syncpt_addr); } static inline void pva_kmd_set_cmd_deinit_queue(struct pva_cmd_deinit_queue *cmd, uint8_t ccq_id, uint8_t queue_id) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_DEINIT_QUEUE; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->ccq_id = ccq_id; @@ -188,6 +197,7 @@ static inline void pva_kmd_set_cmd_update_resource_table( struct pva_cmd_update_resource_table *cmd, uint32_t resource_table_id, uint32_t resource_id, struct pva_resource_entry const *entry) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->resource_table_id = resource_table_id; @@ -199,6 +209,7 @@ static inline void pva_kmd_set_cmd_unregister_resource(struct pva_cmd_unregister_resource *cmd, uint32_t resource_id) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_UNREGISTER_RESOURCE; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->resource_id = resource_id; @@ -208,6 +219,7 @@ static inline void pva_kmd_set_cmd_enable_fw_profiling(struct pva_cmd_enable_fw_profiling *cmd, uint32_t filter, uint8_t timestamp_type) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_ENABLE_FW_PROFILING; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->filter = filter; @@ -217,6 +229,7 @@ pva_kmd_set_cmd_enable_fw_profiling(struct pva_cmd_enable_fw_profiling *cmd, static inline void pva_kmd_set_cmd_disable_fw_profiling(struct pva_cmd_disable_fw_profiling *cmd) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_DISABLE_FW_PROFILING; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); } @@ -225,6 +238,7 @@ static inline void pva_kmd_set_cmd_get_tegra_stats( struct pva_cmd_get_tegra_stats *cmd, uint32_t buffer_resource_id, uint32_t buffer_size, uint64_t offset, bool enabled) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_GET_TEGRA_STATS; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->buffer_resource_id = buffer_resource_id; @@ -238,6 +252,7 @@ static inline void pva_kmd_set_cmd_set_debug_log_level(struct pva_cmd_set_debug_log_level *cmd, uint32_t log_level) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_SET_DEBUG_LOG_LEVEL; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->log_level = log_level; @@ -245,24 +260,23 @@ pva_kmd_set_cmd_set_debug_log_level(struct pva_cmd_set_debug_log_level *cmd, static inline void pva_kmd_set_cmd_suspend_fw(struct pva_cmd_suspend_fw *cmd) { - uint64_t len = (sizeof(*cmd) / sizeof(uint32_t)); + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_SUSPEND_FW; - ASSERT(len <= 255u); - cmd->header.len = (uint8_t)(len); + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); } static inline void pva_kmd_set_cmd_resume_fw(struct pva_cmd_resume_fw *cmd) { - uint64_t len = (sizeof(*cmd) / sizeof(uint32_t)); + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_RESUME_FW; - ASSERT(len <= 255u); - cmd->header.len = (uint8_t)(len); + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); } static inline void pva_kmd_set_cmd_init_shared_dram_buffer( struct pva_cmd_init_shared_dram_buffer *cmd, uint8_t interface, uint32_t buffer_iova, uint32_t buffer_size) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_INIT_SHARED_DRAM_BUFFER; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->buffer_iova_hi = iova_hi(buffer_iova); @@ -274,6 +288,7 @@ static inline void pva_kmd_set_cmd_init_shared_dram_buffer( static inline void pva_kmd_set_cmd_deinit_shared_dram_buffer( struct pva_cmd_deinit_shared_dram_buffer *cmd, uint8_t interface) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_DEINIT_SHARED_DRAM_BUFFER; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->interface = interface; @@ -283,8 +298,12 @@ static inline void pva_kmd_set_cmd_set_profiling_level(struct pva_cmd_set_profiling_level *cmd, uint32_t level) { + memset(cmd, 0, sizeof(*cmd)); cmd->header.opcode = PVA_CMD_OPCODE_SET_PROFILING_LEVEL; cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->level = level; } + +#define CMD_LEN(cmd_type) (sizeof(cmd_type) / sizeof(uint32_t)) + #endif // PVA_KMD_CMDBUF_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_constants.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_constants.h index dd62df31..13e9a1c7 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_constants.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_constants.h @@ -23,7 +23,7 @@ // clang-format off #if PVA_BUILD_MODE == PVA_BUILD_MODE_SIM #define PVA_KMD_TIMEOUT_FACTOR 100 -#elif (PVA_BUILD_MODE == PVA_BUILD_MODE_NATIVE) && (PVA_IS_DEBUG == 1) +#elif (PVA_BUILD_MODE == PVA_BUILD_MODE_NATIVE) // On native builds, the FW calls the KMD's shared buffer handler in its // own thread. In debug builds, if there are a large number of messages // (prints, unregister, etc.), this handler might take a while to execute, @@ -42,22 +42,16 @@ #define PVA_KMD_WAIT_FW_POLL_INTERVAL_US PVA_KMD_TIMEOUT(100) /*< 100 us*/ #define PVA_KMD_FW_BOOT_TIMEOUT_MS PVA_KMD_TIMEOUT(1000) /*< 1 seconds */ -#define PVA_NUM_RW_SYNCPTS 56 +#define PVA_NUM_RW_SYNCPTS (PVA_MAX_NUM_CCQ * PVA_NUM_RW_SYNCPTS_PER_CONTEXT) // clang-format off #if PVA_DEV_MAIN_COMPATIBLE == 1 #define PVA_KMD_LOAD_FROM_GSC_DEFAULT true - #if PVA_SAFETY == 1 - #define PVA_KMD_APP_AUTH_DEFAULT true - #else - #define PVA_KMD_APP_AUTH_DEFAULT false - #endif #else #define PVA_KMD_LOAD_FROM_GSC_DEFAULT false - #define PVA_KMD_APP_AUTH_DEFAULT false #endif // clang-format on -#define PVA_KMD_MAX_NUM_USER_DMA_CONFIG 1024 +#define PVA_KMD_DMA_CONFIG_POOL_INCR 256 #endif // PVA_KMD_CONSTANTS_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c index 3d2ac145..bd64cadd 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c @@ -67,118 +67,86 @@ err_out: static enum pva_error notify_fw_context_init(struct pva_kmd_context *ctx) { - struct pva_kmd_cmdbuf_builder builder; struct pva_kmd_submitter *dev_submitter = &ctx->pva->submitter; struct pva_cmd_init_resource_table *res_cmd; struct pva_cmd_init_queue *queue_cmd; struct pva_cmd_update_resource_table *update_cmd; struct pva_resource_entry entry = { 0 }; - uint32_t fence_val; + const struct pva_syncpt_rw_info *syncpt_info; enum pva_error err; + uint32_t current_offset = 0; + uint32_t cmd_scratch[CMD_LEN(struct pva_cmd_init_resource_table) + + CMD_LEN(struct pva_cmd_init_queue) + + CMD_LEN(struct pva_cmd_update_resource_table)]; - err = pva_kmd_submitter_prepare(dev_submitter, &builder); - if (err != PVA_SUCCESS) { - goto err_out; - } - res_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*res_cmd)); - ASSERT(res_cmd != NULL); + res_cmd = (struct pva_cmd_init_resource_table *)pva_offset_pointer( + &cmd_scratch[0], current_offset); + current_offset += sizeof(*res_cmd); + + queue_cmd = (struct pva_cmd_init_queue *)pva_offset_pointer( + &cmd_scratch[0], current_offset); + current_offset += sizeof(*queue_cmd); + + update_cmd = (struct pva_cmd_update_resource_table *)pva_offset_pointer( + &cmd_scratch[0], current_offset); + current_offset += sizeof(*update_cmd); pva_kmd_set_cmd_init_resource_table( res_cmd, ctx->resource_table_id, ctx->ctx_resource_table.table_mem->iova, ctx->ctx_resource_table.n_entries); - queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd)); - ASSERT(queue_cmd != NULL); - + syncpt_info = pva_kmd_queue_get_rw_syncpt_info(ctx, ctx->ccq_id); pva_kmd_set_cmd_init_queue( queue_cmd, PVA_PRIV_CCQ_ID, ctx->ccq_id, /* For privileged queues, queue ID == user CCQ ID*/ ctx->ctx_queue.queue_memory->iova, - ctx->ctx_queue.max_num_submit); - - update_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*update_cmd)); - ASSERT(update_cmd != NULL); + ctx->ctx_queue.max_num_submit, syncpt_info->syncpt_id, + syncpt_info->syncpt_iova); err = pva_kmd_make_resource_entry(&ctx->pva->dev_resource_table, ctx->submit_memory_resource_id, &entry); ASSERT(err == PVA_SUCCESS); - pva_kmd_set_cmd_update_resource_table(update_cmd, 0, /* KMD's resource table ID */ ctx->submit_memory_resource_id, &entry); - err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); - if (err != PVA_SUCCESS) { - // Error is either QUEUE_FULL or TIMEDOUT - goto cancel_builder; - } - - err = pva_kmd_submitter_wait(dev_submitter, fence_val, - PVA_KMD_WAIT_FW_POLL_INTERVAL_US, - PVA_KMD_WAIT_FW_TIMEOUT_US); - if (err != PVA_SUCCESS) { - pva_kmd_log_err( - "Waiting for FW timed out when initializing context"); - goto err_out; - } - - return PVA_SUCCESS; - -cancel_builder: - pva_kmd_cmdbuf_builder_cancel(&builder); -err_out: + err = pva_kmd_submit_cmd_sync(dev_submitter, cmd_scratch, + sizeof(cmd_scratch), + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); return err; } static enum pva_error notify_fw_context_deinit(struct pva_kmd_context *ctx) { - struct pva_kmd_cmdbuf_builder builder; struct pva_kmd_submitter *dev_submitter = &ctx->pva->submitter; struct pva_cmd_deinit_resource_table *deinit_table_cmd; struct pva_cmd_deinit_queue *deinit_queue_cmd; - uint32_t fence_val; + uint32_t cmd_scratch[CMD_LEN(struct pva_cmd_deinit_queue) + + CMD_LEN(struct pva_cmd_deinit_resource_table)]; enum pva_error err; - err = pva_kmd_submitter_prepare(dev_submitter, &builder); - if (err != PVA_SUCCESS) { - goto err_out; - } + deinit_queue_cmd = (struct pva_cmd_deinit_queue *)pva_offset_pointer( + &cmd_scratch[0], 0); + deinit_table_cmd = + (struct pva_cmd_deinit_resource_table *)pva_offset_pointer( + &cmd_scratch[0], sizeof(struct pva_cmd_deinit_queue)); - deinit_queue_cmd = - pva_kmd_reserve_cmd_space(&builder, sizeof(*deinit_queue_cmd)); - ASSERT(deinit_queue_cmd != NULL); pva_kmd_set_cmd_deinit_queue( deinit_queue_cmd, PVA_PRIV_CCQ_ID, ctx->ccq_id /* For privileged queues, queue ID == user CCQ ID*/ ); - deinit_table_cmd = - pva_kmd_reserve_cmd_space(&builder, sizeof(*deinit_table_cmd)); - ASSERT(deinit_table_cmd != NULL); pva_kmd_set_cmd_deinit_resource_table(deinit_table_cmd, ctx->resource_table_id); - err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); - if (err != PVA_SUCCESS) { - goto cancel_builder; - } - - err = pva_kmd_submitter_wait(dev_submitter, fence_val, - PVA_KMD_WAIT_FW_POLL_INTERVAL_US, - PVA_KMD_WAIT_FW_TIMEOUT_US); - if (err != PVA_SUCCESS) { - pva_kmd_log_err( - "Waiting for FW timed out when deinitializing context"); - goto err_out; - } - - return PVA_SUCCESS; -cancel_builder: - pva_kmd_cmdbuf_builder_cancel(&builder); -err_out: + err = pva_kmd_submit_cmd_sync(dev_submitter, cmd_scratch, + sizeof(cmd_scratch), + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); return err; } @@ -189,20 +157,24 @@ enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx, uint32_t queue_mem_size; uint64_t chunk_mem_size; struct pva_fw_postfence post_fence = { 0 }; - struct pva_syncpt_rw_info *syncpts; uint64_t size; - /* Allocate RW syncpoints for this context */ - syncpts = (struct pva_syncpt_rw_info *)pva_kmd_alloc_block( - &ctx->pva->syncpt_allocator, &ctx->syncpt_block_index); - ASSERT(syncpts != NULL); + if (ctx->inited) { + err = PVA_INVAL; + goto err_out; + } + + if (res_table_capacity == 0u) { + pva_kmd_log_err("Invalid resource capacity"); + err = PVA_BAD_PARAMETER_ERROR; + goto err_out; + } /* Init resource table for this context */ err = pva_kmd_resource_table_init(&ctx->ctx_resource_table, ctx->pva, - ctx->smmu_ctx_id, res_table_capacity, - PVA_KMD_MAX_NUM_USER_DMA_CONFIG); + ctx->smmu_ctx_id, res_table_capacity); if (err != PVA_SUCCESS) { - goto drop_device; + goto err_out; } /* Init privileged queue for this context */ @@ -225,7 +197,8 @@ enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx, /* Allocate memory for submission */ chunk_mem_size = pva_kmd_cmdbuf_pool_get_required_mem_size( - PVA_MAX_CMDBUF_CHUNK_SIZE, PVA_KMD_MAX_NUM_PRIV_CHUNKS); + pva_kmd_get_max_cmdbuf_chunk_size(ctx->pva), + PVA_KMD_MAX_NUM_PRIV_CHUNKS); /* Allocate one post fence at the end. This memory will be added to * KMD's own resource table. We don't need to explicitly free it. It * will be freed after we drop the resource. */ @@ -242,6 +215,8 @@ enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx, ctx->submit_memory, &ctx->submit_memory_resource_id); if (err != PVA_SUCCESS) { + // Ownership of submit memory is transferred to KMD's resource table so + // if adding to resource table fails, we need to free it here. pva_kmd_device_memory_free(ctx->submit_memory); goto queue_deinit; } @@ -249,7 +224,8 @@ enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx, /* Init chunk pool */ err = pva_kmd_cmdbuf_chunk_pool_init( &ctx->chunk_pool, ctx->submit_memory_resource_id, - 0 /* offset */, chunk_mem_size, PVA_MAX_CMDBUF_CHUNK_SIZE, + 0 /* offset */, chunk_mem_size, + pva_kmd_get_max_cmdbuf_chunk_size(ctx->pva), PVA_KMD_MAX_NUM_PRIV_CHUNKS, ctx->submit_memory->va); if (err != PVA_SUCCESS) { goto free_dram_buffer_resource; @@ -283,13 +259,15 @@ enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx, pva_kmd_resource_table_lock, pva_kmd_resource_table_unlock); if (err != PVA_SUCCESS) { - goto deinit_submitter; + goto deinit_fw_context; } ctx->inited = true; return PVA_SUCCESS; +deinit_fw_context: + notify_fw_context_deinit(ctx); deinit_submitter: pva_kmd_mutex_deinit(&ctx->chunk_pool_lock); pva_kmd_mutex_deinit(&ctx->submit_lock); @@ -298,12 +276,10 @@ free_dram_buffer_resource: pva_kmd_drop_resource(&ctx->pva->dev_resource_table, ctx->submit_memory_resource_id); queue_deinit: - pva_kmd_queue_deinit(&ctx->ctx_queue); pva_kmd_device_memory_free(ctx->ctx_queue_mem); deinit_table: pva_kmd_resource_table_deinit(&ctx->ctx_resource_table); -drop_device: - pva_kmd_device_idle(ctx->pva); +err_out: return err; } @@ -312,25 +288,24 @@ void pva_kmd_context_deinit(struct pva_kmd_context *ctx) enum pva_error err; if (ctx->inited) { - if (!ctx->pva->recovery) { - err = notify_fw_context_deinit(ctx); - ASSERT(err == PVA_SUCCESS); + err = notify_fw_context_deinit(ctx); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Failed to notify FW of context deinit"); } err = pva_kmd_shared_buffer_deinit(ctx->pva, ctx->ccq_id); - ASSERT(err == PVA_SUCCESS); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Failed to deinit FW buffer"); + } - pva_kmd_device_idle(ctx->pva); pva_kmd_mutex_deinit(&ctx->submit_lock); pva_kmd_mutex_deinit(&ctx->chunk_pool_lock); pva_kmd_cmdbuf_chunk_pool_deinit(&ctx->chunk_pool); pva_kmd_drop_resource(&ctx->pva->dev_resource_table, ctx->submit_memory_resource_id); - pva_kmd_queue_deinit(&ctx->ctx_queue); pva_kmd_device_memory_free(ctx->ctx_queue_mem); pva_kmd_resource_table_deinit(&ctx->ctx_resource_table); - pva_kmd_free_block(&ctx->pva->syncpt_allocator, - ctx->syncpt_block_index); ctx->inited = false; } } @@ -345,14 +320,13 @@ static void pva_kmd_destroy_all_queues(struct pva_kmd_context *ctx) pva_kmd_mutex_lock(&ctx->queue_allocator.allocator_lock); queue = pva_kmd_get_block_unsafe(&ctx->queue_allocator, queue_id); + pva_kmd_mutex_unlock(&ctx->queue_allocator.allocator_lock); if (queue != NULL) { - pva_kmd_mutex_unlock( - &ctx->queue_allocator.allocator_lock); err = pva_kmd_queue_destroy(ctx, queue_id); - ASSERT(err == PVA_SUCCESS); - } else { - pva_kmd_mutex_unlock( - &ctx->queue_allocator.allocator_lock); + if (err != PVA_SUCCESS) { + pva_kmd_log_err_u64( + "Failed to destroy queue %d", queue_id); + } } } } @@ -363,11 +337,12 @@ void pva_kmd_context_destroy(struct pva_kmd_context *ctx) pva_kmd_destroy_all_queues(ctx); pva_kmd_context_deinit(ctx); + pva_kmd_device_idle(ctx->pva); pva_kmd_block_allocator_deinit(&ctx->queue_allocator); pva_kmd_free(ctx->queue_allocator_mem); pva_kmd_mutex_deinit(&ctx->ccq_lock); - err = pva_kmd_free_block(&ctx->pva->context_allocator, ctx->ccq_id); pva_kmd_mutex_deinit(&ctx->ocb_lock); + err = pva_kmd_free_block(&ctx->pva->context_allocator, ctx->ccq_id); ASSERT(err == PVA_SUCCESS); } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.h index d4a23eb2..03b0abc9 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.h @@ -63,9 +63,6 @@ struct pva_kmd_context { void *plat_data; uint64_t ccq_shm_handle; - /** Index of block of syncpoints allocated for this context */ - uint32_t syncpt_block_index; - uint32_t syncpt_ids[PVA_NUM_RW_SYNCPTS_PER_CONTEXT]; pva_kmd_mutex_t ocb_lock; }; diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c index 3fdbd3c9..f9066016 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c @@ -27,45 +27,6 @@ static uint64_t read_from_buffer_to_user(void *to, uint64_t count, return count; } -static enum pva_error -pva_kmd_notify_fw_set_profiling_level(struct pva_kmd_device *pva, - uint32_t level) -{ - struct pva_kmd_cmdbuf_builder builder; - struct pva_kmd_submitter *dev_submitter = &pva->submitter; - struct pva_cmd_set_profiling_level *cmd; - uint32_t fence_val; - enum pva_error err; - - err = pva_kmd_submitter_prepare(dev_submitter, &builder); - if (err != PVA_SUCCESS) { - goto err_out; - } - - cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd)); - ASSERT(cmd != NULL); - pva_kmd_set_cmd_set_profiling_level(cmd, level); - - err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); - if (err != PVA_SUCCESS) { - goto err_out; - } - - err = pva_kmd_submitter_wait(dev_submitter, fence_val, - PVA_KMD_WAIT_FW_POLL_INTERVAL_US, - PVA_KMD_WAIT_FW_TIMEOUT_US); - if (err != PVA_SUCCESS) { - pva_kmd_log_err( - "Waiting for FW timed out when setting profiling level"); - goto err_out; - } - - return PVA_SUCCESS; - -err_out: - return err; -} - static int64_t profiling_level_read(struct pva_kmd_device *dev, void *file_data, uint8_t *out_buffer, uint64_t offset, uint64_t size) @@ -118,92 +79,20 @@ static int64_t profiling_level_write(struct pva_kmd_device *dev, "pva_kmd_device_busy failed when submitting set profiling level cmd"); return 0; } + err = pva_kmd_notify_fw_set_profiling_level(dev, value); + pva_kmd_device_idle(dev); + if (err != PVA_SUCCESS) { pva_kmd_log_err( "Failed to notify FW about profiling level change"); return 0; } - pva_kmd_device_idle(dev); } + return size; } -void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *pva) -{ - static const char *vpu_ocd_names[NUM_VPU_BLOCKS] = { "ocd_vpu0_v3", - "ocd_vpu1_v3" }; - struct pva_kmd_file_ops *profiling_fops; - - pva_kmd_debugfs_create_bool(pva, "stats_enabled", - &pva->debugfs_context.stats_enable); - pva_kmd_debugfs_create_bool(pva, "vpu_debug", - &pva->debugfs_context.vpu_debug); - - // Create profiling_level file operations - profiling_fops = &pva->debugfs_context.profiling_level_fops; - profiling_fops->read = profiling_level_read; - profiling_fops->write = profiling_level_write; - profiling_fops->open = NULL; - profiling_fops->release = NULL; - profiling_fops->pdev = pva; - pva_kmd_debugfs_create_file(pva, "profiling_level", profiling_fops); - - pva->debugfs_context.vpu_fops.read = &get_vpu_stats; - pva->debugfs_context.vpu_fops.write = NULL; - pva->debugfs_context.vpu_fops.pdev = pva; - pva_kmd_debugfs_create_file(pva, "vpu_stats", - &pva->debugfs_context.vpu_fops); - for (uint32_t i = 0; i < NUM_VPU_BLOCKS; i++) { - pva->debugfs_context.vpu_ocd_fops[i].open = - &pva_kmd_vpu_ocd_open; - pva->debugfs_context.vpu_ocd_fops[i].release = - &pva_kmd_vpu_ocd_release; - pva->debugfs_context.vpu_ocd_fops[i].read = - &pva_kmd_vpu_ocd_read; - pva->debugfs_context.vpu_ocd_fops[i].write = - &pva_kmd_vpu_ocd_write; - pva->debugfs_context.vpu_ocd_fops[i].pdev = pva; - pva->debugfs_context.vpu_ocd_fops[i].file_data = - (void *)&pva->regspec.vpu_dbg_instr_reg_offset[i]; - pva_kmd_debugfs_create_file( - pva, vpu_ocd_names[i], - &pva->debugfs_context.vpu_ocd_fops[i]); - } - - pva->debugfs_context.allowlist_ena_fops.read = - &get_vpu_allowlist_enabled; - pva->debugfs_context.allowlist_ena_fops.write = &update_vpu_allowlist; - pva->debugfs_context.allowlist_ena_fops.pdev = pva; - pva_kmd_debugfs_create_file(pva, "vpu_app_authentication", - &pva->debugfs_context.allowlist_ena_fops); - - pva->debugfs_context.allowlist_path_fops.read = &get_vpu_allowlist_path; - pva->debugfs_context.allowlist_path_fops.write = - &update_vpu_allowlist_path; - pva->debugfs_context.allowlist_path_fops.pdev = pva; - pva_kmd_debugfs_create_file(pva, "allowlist_path", - &pva->debugfs_context.allowlist_path_fops); - - pva->debugfs_context.fw_debug_log_level_fops.write = - &update_fw_debug_log_level; - pva->debugfs_context.fw_debug_log_level_fops.read = NULL; - pva->debugfs_context.fw_debug_log_level_fops.pdev = pva; - pva_kmd_debugfs_create_file( - pva, "fw_debug_log_level", - &pva->debugfs_context.fw_debug_log_level_fops); - - pva_kmd_device_init_profiler(pva); - pva_kmd_device_init_tegra_stats(pva); -} - -void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *pva) -{ - pva_kmd_device_deinit_tegra_stats(pva); - pva_kmd_device_deinit_profiler(pva); - pva_kmd_debugfs_remove_nodes(pva); -} - static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats, uint8_t *out_buffer, uint64_t offset, uint64_t len) @@ -236,8 +125,9 @@ static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats, formatted_len); } -int64_t get_vpu_stats(struct pva_kmd_device *dev, void *file_data, - uint8_t *out_buffer, uint64_t offset, uint64_t size) +static int64_t get_vpu_stats(struct pva_kmd_device *dev, void *file_data, + uint8_t *out_buffer, uint64_t offset, + uint64_t size) { struct pva_kmd_tegrastats kmd_tegra_stats; @@ -251,9 +141,9 @@ int64_t get_vpu_stats(struct pva_kmd_device *dev, void *file_data, return print_vpu_stats(&kmd_tegra_stats, out_buffer, offset, size); } -int64_t get_vpu_allowlist_enabled(struct pva_kmd_device *pva, void *file_data, - uint8_t *out_buffer, uint64_t offset, - uint64_t size) +static int64_t get_vpu_allowlist_enabled(struct pva_kmd_device *pva, + void *file_data, uint8_t *out_buffer, + uint64_t offset, uint64_t size) { // 1 byte for '0' or '1' and another 1 byte for the Null character char out_str[2]; @@ -267,9 +157,9 @@ int64_t get_vpu_allowlist_enabled(struct pva_kmd_device *pva, void *file_data, sizeof(out_str)); } -int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data, - const uint8_t *in_buffer, uint64_t offset, - uint64_t size) +static int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data, + const uint8_t *in_buffer, uint64_t offset, + uint64_t size) { char strbuf[2]; // 1 byte for '0' or '1' and another 1 byte for the Null character uint32_t base = 10; @@ -302,9 +192,9 @@ int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data, return size; } -int64_t get_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data, - uint8_t *out_buffer, uint64_t offset, - uint64_t size) +static int64_t get_vpu_allowlist_path(struct pva_kmd_device *pva, + void *file_data, uint8_t *out_buffer, + uint64_t offset, uint64_t size) { uint64_t len; pva_kmd_mutex_lock(&(pva->pva_auth->allow_list_lock)); @@ -317,13 +207,18 @@ int64_t get_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data, return len; } -int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data, - const uint8_t *in_buffer, uint64_t offset, - uint64_t size) +static int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva, + void *file_data, + const uint8_t *in_buffer, + uint64_t offset, uint64_t size) { char buffer[ALLOWLIST_FILE_LEN]; unsigned long retval; + if (size == 0) { + return 0; + } + if (size > sizeof(buffer)) { pva_kmd_log_err_u64( "Length of allowlist path is too long. It must be less than ", @@ -338,7 +233,7 @@ int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data, } //Replacing last character from new-line to null terminator - buffer[safe_subu64(size, 1u)] = '\0'; + buffer[size - 1u] = '\0'; pva_kmd_mutex_lock(&(pva->pva_auth->allow_list_lock)); pva_kmd_update_allowlist_path(pva, buffer); @@ -347,9 +242,10 @@ int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data, return size; } -int64_t update_fw_debug_log_level(struct pva_kmd_device *pva, void *file_data, - const uint8_t *in_buffer, uint64_t offset, - uint64_t size) +static int64_t update_fw_debug_log_level(struct pva_kmd_device *pva, + void *file_data, + const uint8_t *in_buffer, + uint64_t offset, uint64_t size) { uint32_t log_level; unsigned long retval; @@ -387,10 +283,143 @@ int64_t update_fw_debug_log_level(struct pva_kmd_device *pva, void *file_data, goto err_end; } - pva_kmd_notify_fw_set_debug_log_level(pva, log_level); - + err = pva_kmd_notify_fw_set_debug_log_level(pva, log_level); pva_kmd_device_idle(pva); + + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Failed to notify FW about debug log level change"); + } } err_end: return copy_size; } + +static int64_t get_fw_debug_log_level(struct pva_kmd_device *dev, + void *file_data, uint8_t *out_buffer, + uint64_t offset, uint64_t size) +{ + char print_buffer[64]; + int formatted_len; + + formatted_len = snprintf(print_buffer, sizeof(print_buffer), "%u\n", + dev->fw_debug_log_level); + + if (formatted_len <= 0) { + return -1; + } + + return read_from_buffer_to_user(out_buffer, size, offset, print_buffer, + (uint64_t)formatted_len); +} + +enum pva_error pva_kmd_debugfs_create_nodes(struct pva_kmd_device *pva) +{ + static const char *vpu_ocd_names[NUM_VPU_BLOCKS] = { "ocd_vpu0_v3", + "ocd_vpu1_v3" }; + struct pva_kmd_file_ops *profiling_fops; + enum pva_error err; + + pva_kmd_debugfs_create_bool(pva, "stats_enabled", + &pva->debugfs_context.stats_enable); + pva_kmd_debugfs_create_bool(pva, "vpu_debug", + &pva->debugfs_context.vpu_debug); + + // Create profiling_level file operations + profiling_fops = &pva->debugfs_context.profiling_level_fops; + profiling_fops->read = profiling_level_read; + profiling_fops->write = profiling_level_write; + profiling_fops->open = NULL; + profiling_fops->release = NULL; + profiling_fops->pdev = pva; + err = pva_kmd_debugfs_create_file(pva, "profiling_level", + profiling_fops); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Failed to create profiling_level debugfs file"); + return err; + } + + pva->debugfs_context.vpu_fops.read = &get_vpu_stats; + pva->debugfs_context.vpu_fops.write = NULL; + pva->debugfs_context.vpu_fops.pdev = pva; + err = pva_kmd_debugfs_create_file(pva, "vpu_stats", + &pva->debugfs_context.vpu_fops); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Failed to create vpu_stats debugfs file"); + return err; + } + + for (uint32_t i = 0; i < NUM_VPU_BLOCKS; i++) { + pva->debugfs_context.vpu_ocd_fops[i].open = + &pva_kmd_vpu_ocd_open; + pva->debugfs_context.vpu_ocd_fops[i].release = + &pva_kmd_vpu_ocd_release; + pva->debugfs_context.vpu_ocd_fops[i].read = + &pva_kmd_vpu_ocd_read; + pva->debugfs_context.vpu_ocd_fops[i].write = + &pva_kmd_vpu_ocd_write; + pva->debugfs_context.vpu_ocd_fops[i].pdev = pva; + pva->debugfs_context.vpu_ocd_fops[i].file_data = + (void *)&pva->regspec.vpu_dbg_instr_reg_offset[i]; + err = pva_kmd_debugfs_create_file( + pva, vpu_ocd_names[i], + &pva->debugfs_context.vpu_ocd_fops[i]); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Failed to create vpu_ocd debugfs file"); + return err; + } + } + + pva->debugfs_context.allowlist_ena_fops.read = + &get_vpu_allowlist_enabled; + pva->debugfs_context.allowlist_ena_fops.write = &update_vpu_allowlist; + pva->debugfs_context.allowlist_ena_fops.pdev = pva; + err = pva_kmd_debugfs_create_file( + pva, "vpu_app_authentication", + &pva->debugfs_context.allowlist_ena_fops); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Failed to create vpu_app_authentication debugfs file"); + return err; + } + + pva->debugfs_context.allowlist_path_fops.read = &get_vpu_allowlist_path; + pva->debugfs_context.allowlist_path_fops.write = + &update_vpu_allowlist_path; + pva->debugfs_context.allowlist_path_fops.pdev = pva; + err = pva_kmd_debugfs_create_file( + pva, "allowlist_path", + &pva->debugfs_context.allowlist_path_fops); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Failed to create allowlist_path debugfs file"); + return err; + } + + pva->debugfs_context.fw_debug_log_level_fops.write = + &update_fw_debug_log_level; + pva->debugfs_context.fw_debug_log_level_fops.read = + &get_fw_debug_log_level; + pva->debugfs_context.fw_debug_log_level_fops.pdev = pva; + err = pva_kmd_debugfs_create_file( + pva, "fw_debug_log_level", + &pva->debugfs_context.fw_debug_log_level_fops); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Failed to create fw_debug_log_level debugfs file"); + return err; + } + + pva_kmd_device_init_profiler(pva); + pva_kmd_device_init_tegra_stats(pva); + + return PVA_SUCCESS; +} + +void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *pva) +{ + pva_kmd_device_deinit_tegra_stats(pva); + pva_kmd_device_deinit_profiler(pva); + pva_kmd_debugfs_remove_nodes(pva); +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h index 6e8b4269..e9e0e0c4 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h @@ -37,24 +37,7 @@ struct pva_kmd_debugfs_context { struct pva_kmd_file_ops fw_debug_log_level_fops; }; -void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *dev); +enum pva_error pva_kmd_debugfs_create_nodes(struct pva_kmd_device *dev); void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *dev); -int64_t get_vpu_stats(struct pva_kmd_device *dev, void *file_data, - uint8_t *out_buffer, uint64_t offset, uint64_t size); -int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data, - const uint8_t *in_buffer, uint64_t offset, - uint64_t size); -int64_t get_vpu_allowlist_enabled(struct pva_kmd_device *pva, void *file_data, - uint8_t *out_buffer, uint64_t offset, - uint64_t size); -int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data, - const uint8_t *in_buffer, uint64_t offset, - uint64_t size); -int64_t get_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data, - uint8_t *out_buffer, uint64_t offset, - uint64_t size); -int64_t update_fw_debug_log_level(struct pva_kmd_device *dev, void *file_data, - const uint8_t *in_buffer, uint64_t offset, - uint64_t size); #endif //PVA_KMD_DEBUGFS_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c index d31c3551..3d3158c8 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c @@ -33,10 +33,11 @@ * Initialization through CCQ is only intended for KMD's own resource table (the * first resource table created). */ -void pva_kmd_send_resource_table_info_by_ccq( +static enum pva_error pva_kmd_send_resource_table_info_by_ccq( struct pva_kmd_device *pva, struct pva_kmd_resource_table *res_table) { enum pva_error err; + uint64_t addr = res_table->table_mem->iova; uint32_t n_entries = res_table->n_entries; uint64_t ccq_entry = @@ -51,8 +52,9 @@ void pva_kmd_send_resource_table_info_by_ccq( err = pva_kmd_ccq_push_with_timeout(pva, PVA_PRIV_CCQ_ID, ccq_entry, PVA_KMD_WAIT_FW_POLL_INTERVAL_US, PVA_KMD_WAIT_FW_TIMEOUT_US); - ASSERT(err == PVA_SUCCESS); pva_kmd_mutex_unlock(&pva->ccq0_lock); + + return err; } /** @@ -61,8 +63,9 @@ void pva_kmd_send_resource_table_info_by_ccq( * Initialization through CCQ is only intended for KMD's own queue (the first * queue created). */ -void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva, - struct pva_kmd_queue *queue) +static enum pva_error +pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva, + struct pva_kmd_queue *queue) { enum pva_error err; uint64_t addr = queue->queue_memory->iova; @@ -78,8 +81,9 @@ void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva, err = pva_kmd_ccq_push_with_timeout(pva, PVA_PRIV_CCQ_ID, ccq_entry, PVA_KMD_WAIT_FW_POLL_INTERVAL_US, PVA_KMD_WAIT_FW_TIMEOUT_US); - ASSERT(err == PVA_SUCCESS); pva_kmd_mutex_unlock(&pva->ccq0_lock); + + return err; } /** @@ -113,13 +117,13 @@ static void pva_kmd_device_init_submission(struct pva_kmd_device *pva) /* Init KMD's resource table */ err = pva_kmd_resource_table_init(&pva->dev_resource_table, pva, PVA_R5_SMMU_CONTEXT_ID, - PVA_KMD_MAX_NUM_KMD_RESOURCES, - PVA_KMD_MAX_NUM_KMD_DMA_CONFIGS); + PVA_KMD_MAX_NUM_KMD_RESOURCES); ASSERT(err == PVA_SUCCESS); /* Allocate memory for submission*/ chunk_mem_size = pva_kmd_cmdbuf_pool_get_required_mem_size( - PVA_MAX_CMDBUF_CHUNK_SIZE, PVA_KMD_MAX_NUM_KMD_CHUNKS); + pva_kmd_get_max_cmdbuf_chunk_size(pva), + PVA_KMD_MAX_NUM_KMD_CHUNKS); size = safe_addu64(chunk_mem_size, (uint64_t)sizeof(uint32_t)); /* Allocate one post fence at the end. We don't need to free this memory @@ -138,7 +142,7 @@ static void pva_kmd_device_init_submission(struct pva_kmd_device *pva) /* Init chunk pool */ pva_kmd_cmdbuf_chunk_pool_init( &pva->chunk_pool, pva->submit_memory_resource_id, 0, - chunk_mem_size, PVA_MAX_CMDBUF_CHUNK_SIZE, + chunk_mem_size, pva_kmd_get_max_cmdbuf_chunk_size(pva), PVA_KMD_MAX_NUM_KMD_CHUNKS, pva->submit_memory->va); /* Init fence */ @@ -167,21 +171,25 @@ static void pva_kmd_device_deinit_submission(struct pva_kmd_device *pva) pva_kmd_drop_resource(&pva->dev_resource_table, pva->submit_memory_resource_id); pva_kmd_resource_table_deinit(&pva->dev_resource_table); - pva_kmd_queue_deinit(&pva->dev_queue); pva_kmd_device_memory_free(pva->queue_memory); } struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id, uint32_t device_index, - bool app_authenticate) + bool app_authenticate, + bool test_mode) { struct pva_kmd_device *pva; enum pva_error err; - uint32_t chunk_size; uint32_t size; + if (test_mode) { + pva_kmd_log_err("Test mode is enabled"); + } + pva = pva_kmd_zalloc_nofail(sizeof(*pva)); + pva->test_mode = test_mode; pva->device_index = device_index; pva->load_from_gsc = false; pva->is_hv_mode = true; @@ -211,13 +219,6 @@ struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id, pva_kmd_device_plat_init(pva); - chunk_size = safe_mulu32((uint32_t)sizeof(struct pva_syncpt_rw_info), - (uint32_t)PVA_NUM_RW_SYNCPTS_PER_CONTEXT); - err = pva_kmd_block_allocator_init(&pva->syncpt_allocator, - pva->syncpt_rw, 0, chunk_size, - PVA_MAX_NUM_USER_CONTEXTS); - ASSERT(err == PVA_SUCCESS); - pva_kmd_device_init_submission(pva); err = pva_kmd_init_vpu_app_auth(pva, app_authenticate); @@ -257,7 +258,6 @@ void pva_kmd_device_destroy(struct pva_kmd_device *pva) pva_kmd_wait_for_active_contexts(pva); pva_kmd_device_deinit_submission(pva); pva_kmd_device_plat_deinit(pva); - pva_kmd_block_allocator_deinit(&pva->syncpt_allocator); pva_kmd_block_allocator_deinit(&pva->context_allocator); pva_kmd_free(pva->context_mem); pva_kmd_mutex_deinit(&pva->ccq0_lock); @@ -266,44 +266,71 @@ void pva_kmd_device_destroy(struct pva_kmd_device *pva) pva_kmd_free(pva); } -static enum pva_error -pva_kmd_notify_fw_set_profiling_level(struct pva_kmd_device *pva, - uint32_t level) +static enum pva_error config_fw_by_cmds(struct pva_kmd_device *pva) { - struct pva_kmd_cmdbuf_builder builder; - struct pva_kmd_submitter *dev_submitter = &pva->submitter; - struct pva_cmd_set_profiling_level *cmd; - uint32_t fence_val; - enum pva_error err; + enum pva_error err = PVA_SUCCESS; - err = pva_kmd_submitter_prepare(dev_submitter, &builder); + err = pva_kmd_notify_fw_enable_profiling(pva); if (err != PVA_SUCCESS) { goto err_out; } - cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd)); - ASSERT(cmd != NULL); - pva_kmd_set_cmd_set_profiling_level(cmd, level); - - err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + /* Set FW debug log level */ + err = pva_kmd_notify_fw_set_debug_log_level(pva, + pva->fw_debug_log_level); if (err != PVA_SUCCESS) { goto err_out; } - err = pva_kmd_submitter_wait(dev_submitter, fence_val, - PVA_KMD_WAIT_FW_POLL_INTERVAL_US, - PVA_KMD_WAIT_FW_TIMEOUT_US); + // If the user had set profiling level before power-on, send the update to FW + err = pva_kmd_notify_fw_set_profiling_level( + pva, pva->debugfs_context.profiling_level); if (err != PVA_SUCCESS) { - pva_kmd_log_err( - "Waiting for FW timed out when setting profiling level"); goto err_out; } - return PVA_SUCCESS; - err_out: return err; } + +enum pva_error pva_kmd_config_fw_after_boot(struct pva_kmd_device *pva) +{ + enum pva_error err = PVA_SUCCESS; + + /* Reset KMD queue */ + pva->dev_queue.queue_header->cb_head = 0; + pva->dev_queue.queue_header->cb_tail = 0; + + err = pva_kmd_send_resource_table_info_by_ccq(pva, + &pva->dev_resource_table); + if (err != PVA_SUCCESS) { + goto err_out; + } + err = pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue); + if (err != PVA_SUCCESS) { + goto err_out; + } + + err = pva_kmd_shared_buffer_init(pva, PVA_PRIV_CCQ_ID, + PVA_KMD_FW_BUF_ELEMENT_SIZE, + PVA_KMD_FW_PROFILING_BUF_NUM_ELEMENTS, + NULL, NULL); + if (err != PVA_SUCCESS) { + pva_kmd_log_err_u64( + "pva kmd buffer initialization failed for interface ", + PVA_PRIV_CCQ_ID); + goto err_out; + } + + err = config_fw_by_cmds(pva); + if (err != PVA_SUCCESS) { + goto err_out; + } + +err_out: + return err; +} + enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva) { enum pva_error err = PVA_SUCCESS; @@ -321,36 +348,26 @@ enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva) if (err != PVA_SUCCESS) { goto poweroff; } - /* Reset KMD queue */ - pva->dev_queue.queue_header->cb_head = 0; - pva->dev_queue.queue_header->cb_tail = 0; - pva_kmd_send_resource_table_info_by_ccq( - pva, &pva->dev_resource_table); - pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue); - - // TODO: need better error handling here - err = pva_kmd_shared_buffer_init( - pva, PVA_PRIV_CCQ_ID, PVA_KMD_FW_BUF_ELEMENT_SIZE, - PVA_KMD_FW_PROFILING_BUF_NUM_ELEMENTS, NULL, NULL); + err = pva_kmd_config_fw_after_boot(pva); if (err != PVA_SUCCESS) { - pva_kmd_log_err_u64( - "pva kmd buffer initialization failed for interface ", - PVA_PRIV_CCQ_ID); goto deinit_fw; } - pva_kmd_notify_fw_enable_profiling(pva); - - /* Set FW debug log level */ - pva_kmd_notify_fw_set_debug_log_level(pva, - pva->fw_debug_log_level); - - // If the user had set profiling level before power-on, send the update to FW - pva_kmd_notify_fw_set_profiling_level( - pva, pva->debugfs_context.profiling_level); + } else { + // Once firwmare is aborted, we no longer allow incrementing PVA + // refcount. This makes sure refcount will eventually reach 0 and allow + // device to be powered off. + if (pva->recovery) { + pva_kmd_log_err_u64( + "PVA firmware aborted. " + "Waiting for active PVA uses to finish. Remaining", + pva->refcount); + err = PVA_ERR_FW_ABORTED; + goto unlock; + } } - pva->refcount = safe_addu32(pva->refcount, 1U); + pva->refcount = safe_addu32(pva->refcount, 1U); pva_kmd_mutex_unlock(&pva->powercycle_lock); return PVA_SUCCESS; @@ -371,15 +388,15 @@ void pva_kmd_device_idle(struct pva_kmd_device *pva) ASSERT(pva->refcount > 0); pva->refcount--; if (pva->refcount == 0) { - if (!pva->recovery) { - /* Disable FW profiling */ - /* TODO: once debugfs is up, move these calls */ - pva_kmd_notify_fw_disable_profiling(pva); + err = pva_kmd_notify_fw_disable_profiling(pva); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "pva_kmd_notify_fw_disable_profiling failed during device idle"); } - // TOOD: need better error handling here err = pva_kmd_shared_buffer_deinit(pva, PVA_PRIV_CCQ_ID); if (err != PVA_SUCCESS) { - pva_kmd_log_err("pva_kmd_shared_buffer_deinit failed"); + pva_kmd_log_err( + "pva_kmd_shared_buffer_deinit failed during device idle"); } pva_kmd_deinit_fw(pva); pva_kmd_power_off(pva); @@ -397,9 +414,12 @@ enum pva_error pva_kmd_ccq_push_with_timeout(struct pva_kmd_device *pva, if (timeout_us == 0) { pva_kmd_log_err( "pva_kmd_ccq_push_with_timeout Timed out"); - pva_kmd_abort(pva); + pva_kmd_abort_fw(pva); return PVA_TIMEDOUT; } + if (pva->recovery) { + return PVA_ERR_FW_ABORTED; + } pva_kmd_sleep_us(sleep_interval_us); timeout_us = sat_sub64(timeout_us, sleep_interval_us); } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h index dedcab7c..42fcdc7d 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h @@ -4,6 +4,7 @@ #ifndef PVA_KMD_DEVICE_H #define PVA_KMD_DEVICE_H #include "pva_constants.h" +#include "pva_fw.h" #include "pva_kmd_cmdbuf.h" #include "pva_kmd_utils.h" #include "pva_kmd_mutex.h" @@ -26,9 +27,6 @@ #define NV_PVA1_CLASS_ID 0xF2 struct pva_syncpt_rw_info { - /** Dont switch order since syncpt_id and syncpt_iova is prefilled during kmd boot - * and first field gets updated by pva_kmd_allocator everytime its freed */ - uint32_t syncpt_value; uint32_t syncpt_id; uint64_t syncpt_iova; }; @@ -127,12 +125,13 @@ struct pva_kmd_device { uint8_t bl_sector_pack_format; /** Offset between 2 syncpoints */ - uint32_t syncpt_offset; - uint64_t syncpt_ro_iova; - uint64_t syncpt_rw_iova; - uint32_t num_syncpts; - struct pva_syncpt_rw_info syncpt_rw[PVA_NUM_RW_SYNCPTS]; - struct pva_kmd_block_allocator syncpt_allocator; + uint32_t syncpt_page_size; + uint64_t ro_syncpt_base_iova; + uint32_t num_ro_syncpts; + + uint64_t rw_syncpt_base_iova; + uint32_t rw_syncpt_region_size; + struct pva_syncpt_rw_info rw_syncpts[PVA_NUM_RW_SYNCPTS]; struct vmem_region *vmem_regions_tab; bool support_hwseq_frame_linking; @@ -145,11 +144,14 @@ struct pva_kmd_device { /** Carveout info for FW */ struct pva_co_info fw_carveout; + + bool test_mode; }; struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id, uint32_t device_index, - bool app_authenticate); + bool app_authenticate, + bool test_mode); void pva_kmd_device_destroy(struct pva_kmd_device *pva); @@ -161,11 +163,7 @@ enum pva_error pva_kmd_ccq_push_with_timeout(struct pva_kmd_device *pva, uint64_t sleep_interval_us, uint64_t timeout_us); -void pva_kmd_send_resource_table_info_by_ccq( - struct pva_kmd_device *pva, struct pva_kmd_resource_table *res_table); - -void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva, - struct pva_kmd_queue *queue); +enum pva_error pva_kmd_config_fw_after_boot(struct pva_kmd_device *pva); bool pva_kmd_device_maybe_on(struct pva_kmd_device *pva); @@ -177,4 +175,14 @@ static inline uint32_t pva_kmd_get_device_class_id(struct pva_kmd_device *pva) return NV_PVA1_CLASS_ID; } } + +static inline uint16_t +pva_kmd_get_max_cmdbuf_chunk_size(struct pva_kmd_device *pva) +{ + if (pva->test_mode) { + return PVA_TEST_MODE_MAX_CMDBUF_CHUNK_SIZE; + } else { + return PVA_MAX_CMDBUF_CHUNK_SIZE; + } +} #endif // PVA_KMD_DEVICE_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_devmem_pool.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_devmem_pool.c new file mode 100644 index 00000000..7f2d8159 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_devmem_pool.c @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +#include "pva_kmd_devmem_pool.h" +#include "pva_kmd_utils.h" +#include "pva_api.h" +#include "pva_utils.h" + +static uint64_t get_devmem_offset(struct pva_kmd_devmem_element const *devmem) +{ + return (uint64_t)safe_mulu32(devmem->ele_idx, + devmem->segment->owner_pool->element_size); +} + +uint64_t pva_kmd_get_devmem_iova(struct pva_kmd_devmem_element const *devmem) +{ + return safe_addu64(devmem->segment->mem->iova, + get_devmem_offset(devmem)); +} + +void *pva_kmd_get_devmem_va(struct pva_kmd_devmem_element const *devmem) +{ + return pva_offset_pointer(devmem->segment->mem->va, + get_devmem_offset(devmem)); +} + +static struct pva_kmd_devmem_pool_segment * +allocate_segment(struct pva_kmd_devmem_pool *pool) +{ + struct pva_kmd_devmem_pool_segment *segment; + struct pva_kmd_device_memory *mem = NULL; + uint64_t segment_size = safe_mulu64((uint64_t)pool->element_size, + (uint64_t)pool->n_element_incr); + void *va; + enum pva_error err; + + /* Allocate the segment structure */ + segment = pva_kmd_zalloc(sizeof(*segment)); + if (segment == NULL) { + goto err_out; + } + + /* Allocate device memory */ + mem = pva_kmd_device_memory_alloc_map( + segment_size, pool->pva, PVA_ACCESS_RW, pool->smmu_ctx_idx); + if (mem == NULL) { + goto free_segment; + } + + segment->mem = mem; + segment->owner_pool = pool; + segment->n_free_ele = + pool->n_element_incr; /* Initialize all elements as free */ + va = mem->va; + + /* Initialize the segment allocator */ + err = pva_kmd_block_allocator_init(&segment->elem_allocator, va, 0, + pool->element_size, + pool->n_element_incr); + if (err != PVA_SUCCESS) { + goto free_mem; + } + + /* Add segment to the pool */ + segment->next = pool->segment_list_head; + pool->segment_list_head = segment; + pool->n_free_element = + safe_addu32(pool->n_free_element, pool->n_element_incr); + + return segment; + +free_mem: + pva_kmd_device_memory_free(mem); +free_segment: + pva_kmd_free(segment); +err_out: + return NULL; +} + +enum pva_error pva_kmd_devmem_pool_init(struct pva_kmd_devmem_pool *pool, + struct pva_kmd_device *pva, + uint8_t smmu_ctx_idx, + uint32_t element_size, + uint32_t ele_incr_count) +{ + struct pva_kmd_devmem_pool_segment *segment; + enum pva_error err = PVA_SUCCESS; + + /* Initialize the pool structure */ + memset(pool, 0, sizeof(*pool)); + pool->smmu_ctx_idx = smmu_ctx_idx; + pool->element_size = + safe_pow2_roundup_u32(element_size, sizeof(uint64_t)); + pool->n_element_incr = ele_incr_count; + pool->n_free_element = 0; + pool->segment_list_head = NULL; + pool->pva = pva; + + err = pva_kmd_mutex_init(&pool->pool_lock); + if (err != PVA_SUCCESS) { + goto err_out; + } + + /* Allocate the first segment */ + segment = allocate_segment(pool); + if (segment == NULL) { + err = PVA_NOMEM; + goto deinit_mutex; + } + + return PVA_SUCCESS; + +deinit_mutex: + pva_kmd_mutex_deinit(&pool->pool_lock); +err_out: + return err; +} + +static enum pva_error +pva_kmd_devmem_pool_alloc(struct pva_kmd_devmem_pool *pool, + struct pva_kmd_devmem_element *devmem) +{ + struct pva_kmd_devmem_pool_segment *segment = NULL; + struct pva_kmd_devmem_pool_segment *new_segment = NULL; + uint32_t ele_idx = (uint32_t)-1; + enum pva_error err = PVA_SUCCESS; + + pva_kmd_mutex_lock(&pool->pool_lock); + + /* Check if we have any free elements */ + if (pool->n_free_element == 0) { + /* Need to allocate a new segment */ + new_segment = allocate_segment(pool); + if (new_segment == NULL) { + err = PVA_NOMEM; + goto unlock; + } + } + + /* Try to find a free element in the pool */ + segment = pool->segment_list_head; + while (segment != NULL) { + void *va = NULL; + va = pva_kmd_alloc_block_unsafe(&segment->elem_allocator, + &ele_idx); + if (va != NULL) { + /* Found a free element */ + break; + } + segment = segment->next; + } + + ASSERT(segment != NULL); + + devmem->segment = segment; + devmem->ele_idx = ele_idx; + pool->n_free_element = safe_subu32(pool->n_free_element, 1); + segment->n_free_ele = safe_subu32(segment->n_free_ele, 1); + +unlock: + pva_kmd_mutex_unlock(&pool->pool_lock); + return err; +} + +enum pva_error pva_kmd_devmem_pool_zalloc(struct pva_kmd_devmem_pool *pool, + struct pva_kmd_devmem_element *devmem) +{ + enum pva_error err = pva_kmd_devmem_pool_alloc(pool, devmem); + if (err != PVA_SUCCESS) { + return err; + } + + memset(pva_kmd_get_devmem_va(devmem), 0, pool->element_size); + return PVA_SUCCESS; +} + +static void free_segment(struct pva_kmd_devmem_pool *pool, + struct pva_kmd_devmem_pool_segment *target_segment) +{ + struct pva_kmd_devmem_pool_segment *segment; + struct pva_kmd_devmem_pool_segment *prev_segment = NULL; + + /* Find previous segment to update the linked list */ + segment = pool->segment_list_head; + while (segment != NULL && segment != target_segment) { + prev_segment = segment; + segment = segment->next; + } + + /* Segment not found in the list */ + ASSERT(segment != NULL); + + /* Remove this segment from the list */ + if (prev_segment == NULL) { + /* This is the head segment */ + pool->segment_list_head = target_segment->next; + } else { + prev_segment->next = target_segment->next; + } + + /* Free the segment allocator */ + pva_kmd_block_allocator_deinit(&target_segment->elem_allocator); + + /* Free the device memory */ + pva_kmd_device_memory_free(target_segment->mem); + + /* Free the segment structure */ + pva_kmd_free(target_segment); + + /* Update the free element count */ + pool->n_free_element = + safe_subu32(pool->n_free_element, pool->n_element_incr); +} + +void pva_kmd_devmem_pool_free(struct pva_kmd_devmem_element *devmem) +{ + struct pva_kmd_devmem_pool *pool = devmem->segment->owner_pool; + struct pva_kmd_devmem_pool_segment *current_segment = devmem->segment; + uint32_t threshold; + + pva_kmd_mutex_lock(&pool->pool_lock); + + /* Free the element */ + pva_kmd_free_block_unsafe(¤t_segment->elem_allocator, + devmem->ele_idx); + pool->n_free_element = safe_addu32(pool->n_free_element, 1); + current_segment->n_free_ele = + safe_addu32(current_segment->n_free_ele, 1); + + /* Check if the current segment is now empty using n_free_ele counter */ + if (current_segment->n_free_ele == + current_segment->elem_allocator.max_num_blocks) { + /* We only free the segment if we still have n_ele_incr free elements + after the free */ + threshold = safe_mulu32(pool->n_element_incr, 2); + if (pool->n_free_element >= threshold) { + free_segment(pool, current_segment); + } + } + + pva_kmd_mutex_unlock(&pool->pool_lock); +} + +void pva_kmd_devmem_pool_deinit(struct pva_kmd_devmem_pool *pool) +{ + struct pva_kmd_devmem_pool_segment *segment = pool->segment_list_head; + struct pva_kmd_devmem_pool_segment *next; + + /* Free all segments */ + while (segment != NULL) { + next = segment->next; + + /* Free the segment allocator */ + pva_kmd_block_allocator_deinit(&segment->elem_allocator); + + /* Free the device memory */ + pva_kmd_device_memory_free(segment->mem); + + /* Free the segment structure */ + pva_kmd_free(segment); + + segment = next; + } + + pool->segment_list_head = NULL; + pva_kmd_mutex_deinit(&pool->pool_lock); +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_devmem_pool.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_devmem_pool.h new file mode 100644 index 00000000..70ff7dcc --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_devmem_pool.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ +#ifndef PVA_KMD_DEVMEM_POOL_H +#define PVA_KMD_DEVMEM_POOL_H +#include "pva_api.h" +#include "pva_kmd_block_allocator.h" +#include "pva_kmd_device_memory.h" + +/** @brief A segment of a device memory pool. + * + * It holds a fixed size array of device memory blocks. A pool is a linked list + * of segments. + */ +struct pva_kmd_devmem_pool_segment { + /** The owner pool. */ + struct pva_kmd_devmem_pool *owner_pool; + /** The next segment in the pool. */ + struct pva_kmd_devmem_pool_segment *next; + /** The device memory for the segment. */ + struct pva_kmd_device_memory *mem; + /** The allocator for the elements in the segment. */ + struct pva_kmd_block_allocator elem_allocator; + /** The number of free elements in the segment. */ + uint32_t n_free_ele; +}; + +/** @brief A device memory pool that holds fixed size elements. + * + * It allocates memory in segments, each segment contains n_element_incr + * elements. + * - element_size will be rounded up to the nearest 8 bytes for alignment. + * - The pool is initialized with element_size * n_element_incr capacity. + * - Once exhausted, the pool will allocate a new segment of memory and increase + * the capacity by n_element_incr. + * - When an element is freed, the pool does not immediately release the whole + * segment even if the whole segment is empty. However, if there are 2 * + * n_element_incr free elements, the pool will release a whole segment, so + * that there's still at least n_element_incr free elements. + * - The pool is thread safe. + */ +struct pva_kmd_devmem_pool { + /** The SMMU context index for the pool. */ + uint8_t smmu_ctx_idx; + /** The size of each element in the pool. */ + uint32_t element_size; + /** The number of elements to allocate in each segment. */ + uint32_t n_element_incr; + /** The total number of free elements in the pool, across all segments. */ + uint32_t n_free_element; + /** The head of the segment list. */ + struct pva_kmd_devmem_pool_segment *segment_list_head; + /** The PVA device. */ + struct pva_kmd_device *pva; + /** The mutex for the pool. */ + pva_kmd_mutex_t pool_lock; +}; + +/** @brief Device memory from a pool. + * + * It is an element in a segment of a pool. + */ +struct pva_kmd_devmem_element { + /** The segment that contains the element. */ + struct pva_kmd_devmem_pool_segment *segment; + /** The index of the element in the segment. */ + uint32_t ele_idx; +}; + +/** @brief Get the IOVA of a device memory element. */ +uint64_t pva_kmd_get_devmem_iova(struct pva_kmd_devmem_element const *devmem); + +/** @brief Get the virtual address of a device memory element. */ +void *pva_kmd_get_devmem_va(struct pva_kmd_devmem_element const *devmem); + +/** @brief Initialize a device memory pool. + * + * @param pool The device memory pool to initialize. + * @param pva The PVA device. + * @param smmu_ctx_idx The SMMU context index for the pool. + * @param element_size The size of each element in the pool. + * @param ele_incr_count The number of elements to allocate in each segment. + */ +enum pva_error pva_kmd_devmem_pool_init(struct pva_kmd_devmem_pool *pool, + struct pva_kmd_device *pva, + uint8_t smmu_ctx_idx, + uint32_t element_size, + uint32_t ele_incr_count); + +/** @brief Allocate a device memory element from a pool and zero-initialize it. */ +enum pva_error +pva_kmd_devmem_pool_zalloc(struct pva_kmd_devmem_pool *pool, + struct pva_kmd_devmem_element *devmem); + +/** @brief Free a device memory element from a pool. */ +void pva_kmd_devmem_pool_free(struct pva_kmd_devmem_element *devmem); + +/** @brief Deinitialize a device memory pool. */ +void pva_kmd_devmem_pool_deinit(struct pva_kmd_devmem_pool *pool); + +#endif diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c index ebc79965..8726df9b 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c @@ -62,42 +62,41 @@ pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table, struct pva_dma_config dma_config; struct pva_fw_dma_slot *dyn_slots; struct pva_fw_dma_reloc *dyn_relocs; - struct pva_fw_dma_slot *static_slots = dma_aux->static_slots; - struct pva_fw_dma_reloc *static_relocs = dma_aux->static_relocs; - struct pva_kmd_dma_access *access_sizes = dma_aux->access_sizes; + struct pva_kmd_dma_scratch_buffer *scratch_buf; // Mapping descriptor index to channel index uint8_t desc_to_ch[PVA_MAX_NUM_DMA_DESC]; + scratch_buf = pva_kmd_zalloc(sizeof(*scratch_buf)); + if (scratch_buf == NULL) { + err = PVA_NOMEM; + goto err_out; + } + for (uint32_t i = 0; i < PVA_MAX_NUM_DMA_DESC; i++) { desc_to_ch[i] = PVA_KMD_INVALID_CH_IDX; } - //set access_sizes to 0 by default - (void)memset( - access_sizes, 0, - (PVA_MAX_NUM_DMA_DESC * sizeof(struct pva_kmd_dma_access))); - err = pva_kmd_parse_dma_config(dma_cfg_hdr, dma_config_size, &dma_config, &resource_table->pva->hw_consts); if (err != PVA_SUCCESS) { - goto err_out; + goto free_scratch_buf; } err = pva_kmd_validate_dma_config(&dma_config, &resource_table->pva->hw_consts, - access_sizes, - dma_aux->hw_dma_descs_mask); + scratch_buf->access_sizes, + scratch_buf->hw_dma_descs_mask); if (err != PVA_SUCCESS) { - goto err_out; + goto free_scratch_buf; } trace_dma_channels(&dma_config, desc_to_ch); - err = pva_kmd_compute_dma_access(&dma_config, access_sizes, - dma_aux->hw_dma_descs_mask); + err = pva_kmd_compute_dma_access(&dma_config, scratch_buf->access_sizes, + scratch_buf->hw_dma_descs_mask); if (err != PVA_SUCCESS) { - goto err_out; + goto free_scratch_buf; } dyn_slots = pva_offset_pointer(fw_dma_cfg, @@ -107,9 +106,10 @@ pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table, dma_config.header.num_dynamic_slots * sizeof(*dyn_slots)); - pva_kmd_collect_relocs(&dma_config, access_sizes, static_slots, + pva_kmd_collect_relocs(&dma_config, scratch_buf->access_sizes, + scratch_buf->static_slots, dma_config.header.num_static_slots, - static_relocs, dyn_slots, + scratch_buf->static_relocs, dyn_slots, dma_config.header.num_dynamic_slots, dyn_relocs, desc_to_ch); @@ -117,26 +117,27 @@ pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table, &dma_config, fw_dma_cfg, &fw_fetch_size, resource_table->pva->support_hwseq_frame_linking); - dma_aux->res_table = resource_table; err = pva_kmd_dma_use_resources(&dma_config, dma_aux); if (err != PVA_SUCCESS) { - goto err_out; + goto free_scratch_buf; } - err = pva_kmd_bind_static_buffers(fw_dma_cfg, dma_aux, static_slots, - dma_config.header.num_static_slots, - static_relocs, - dma_config.static_bindings, - dma_config.header.num_static_slots); + err = pva_kmd_bind_static_buffers( + fw_dma_cfg, dma_aux, scratch_buf->static_slots, + dma_config.header.num_static_slots, scratch_buf->static_relocs, + dma_config.static_bindings, dma_config.header.num_static_slots); if (err != PVA_SUCCESS) { goto drop_res; } *out_fw_fetch_size = fw_fetch_size; + pva_kmd_free(scratch_buf); return PVA_SUCCESS; drop_res: pva_kmd_unload_dma_config_unsafe(dma_aux); +free_scratch_buf: + pva_kmd_free(scratch_buf); err_out: return err; } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h index d349bf30..abdfee32 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h @@ -50,9 +50,10 @@ struct pva_kmd_dma_resource_aux { uint32_t dram_res_count; /** DRAM buffers statically referenced by the DMA configuration */ uint32_t static_dram_res_ids[PVA_KMD_MAX_NUM_DMA_DRAM_SLOTS]; +}; - /* Below are work buffers need during DMA configuration loading. They - * don't fit on stack. */ +/* Scratch buffers needed during DMA configuration loading. They don't fit on stack. */ +struct pva_kmd_dma_scratch_buffer { struct pva_fw_dma_slot static_slots[PVA_KMD_MAX_NUM_DMA_SLOTS]; struct pva_fw_dma_reloc static_relocs[PVA_KMD_MAX_NUM_DMA_SLOTS]; struct pva_kmd_dma_access access_sizes[PVA_MAX_NUM_DMA_DESC]; diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c index 75dd3370..533f7cc8 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c @@ -94,6 +94,9 @@ bind_static_dram_slot(struct pva_dma_config_resource *dma_config, int64_t slot_access_end_addr = 0LL; uint64_t slot_surface_combined_offset = 0ULL; pva_math_error math_error = MATH_OP_SUCCESS; + uint8_t slot_access_flags = + PVA_EXTRACT16(slot->flags, PVA_FW_DMA_SLOT_FLAG_ACCESS_MSB, + PVA_FW_DMA_SLOT_FLAG_ACCESS_LSB, uint8_t); if ((slot->flags & PVA_FW_DMA_SLOT_FLAG_DRAM) == 0) { pva_kmd_log_err("Binding DRAM buffer to incompatible slot"); @@ -101,6 +104,14 @@ bind_static_dram_slot(struct pva_dma_config_resource *dma_config, goto out; } + if ((slot_access_flags & dram_res->mem->iova_access_flags) != + slot_access_flags) { + pva_kmd_log_err( + "DRAM buffer does not have the required access permissions"); + err = PVA_INVALID_BINDING; + goto out; + } + if (is_block_linear) { if (slot->flags & PVA_FW_DMA_SLOT_FLAG_CB) { pva_kmd_log_err( diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c index a7f0e71c..6139b9c7 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c @@ -218,7 +218,7 @@ validate_descriptor(const struct pva_dma_descriptor *desc, /* DMA_DESC_LDID */ if ((desc->link_desc_id > cfg_hdr->num_descriptors) || ((desc->link_desc_id != 0) && - pva_is_reserved_desc(desc->link_desc_id - PVA_DMA_DESC0))) { + pva_is_reserved_desc(desc->link_desc_id - PVA_DMA_DESC_ID_BASE))) { pva_kmd_log_err("ERR: Invalid linker Desc ID"); return PVA_INVAL; } @@ -423,6 +423,8 @@ pva_kmd_dma_use_resources(struct pva_dma_config const *dma_cfg, err = PVA_INVAL; goto err_out; } + dma_aux->vpu_bin_res_id = dma_cfg->header.vpu_exec_resource_id; + if (vpu_bin_rec->type != PVA_RESOURCE_TYPE_EXEC_BIN) { pva_kmd_log_err( "Invalid VPU exec resource id used by DMA config"); @@ -432,9 +434,6 @@ pva_kmd_dma_use_resources(struct pva_dma_config const *dma_cfg, vpu_bin = &vpu_bin_rec->vpu_bin; } - dma_aux->vpu_bin_res_id = dma_cfg->header.vpu_exec_resource_id; - - dma_aux->dram_res_count = 0; /* Increment reference count for all static DRAM buffers; For static * VMEM buffers, check that symbol ID is valid. */ for (i = 0; i < dma_cfg->header.num_static_slots; i++) { @@ -455,7 +454,8 @@ pva_kmd_dma_use_resources(struct pva_dma_config const *dma_cfg, dma_aux->static_dram_res_ids[dma_aux->dram_res_count] = slot_buf->dram.resource_id; - dma_aux->dram_res_count += 1; + dma_aux->dram_res_count = + safe_addu32(dma_aux->dram_res_count, 1U); if (rec->type != PVA_RESOURCE_TYPE_DRAM) { pva_kmd_log_err( @@ -505,9 +505,10 @@ static uint16_t get_slot_id(uint16_t slot) return slot & PVA_DMA_SLOT_ID_MASK; } -static uint8_t get_slot_flag(uint8_t transfer_mode, bool cb_enable) +static uint16_t get_slot_flag(uint8_t transfer_mode, bool cb_enable, + bool is_dst) { - uint8_t flags = 0; + uint16_t flags = 0; if (transfer_mode == PVA_DMA_TRANS_MODE_VMEM) { flags |= PVA_FW_DMA_SLOT_FLAG_VMEM_DATA; } else if (transfer_mode == PVA_DMA_TRANS_MODE_L2SRAM) { @@ -521,6 +522,15 @@ static uint8_t get_slot_flag(uint8_t transfer_mode, bool cb_enable) if (cb_enable) { flags |= PVA_FW_DMA_SLOT_FLAG_CB; } + if (is_dst) { + flags |= PVA_INSERT(PVA_ACCESS_WO, + PVA_FW_DMA_SLOT_FLAG_ACCESS_MSB, + PVA_FW_DMA_SLOT_FLAG_ACCESS_LSB); + } else { + flags |= PVA_INSERT(PVA_ACCESS_RO, + PVA_FW_DMA_SLOT_FLAG_ACCESS_MSB, + PVA_FW_DMA_SLOT_FLAG_ACCESS_LSB); + } return flags; } @@ -529,7 +539,7 @@ static void update_reloc_count(uint16_t slot, uint8_t transfer_mode, struct pva_fw_dma_slot *out_static_slots, uint16_t num_static_slots, struct pva_fw_dma_slot *out_dyn_slots, - uint16_t num_dyn_slots) + uint16_t num_dyn_slots, bool is_dst) { uint8_t slot_id = get_slot_id(slot); @@ -537,13 +547,12 @@ static void update_reloc_count(uint16_t slot, uint8_t transfer_mode, out_dyn_slots[slot_id].reloc_count = safe_addu16(out_dyn_slots[slot_id].reloc_count, 1U); out_dyn_slots[slot_id].flags |= - get_slot_flag(transfer_mode, cb_enable); + get_slot_flag(transfer_mode, cb_enable, is_dst); } else if (slot & PVA_DMA_STATIC_SLOT) { out_static_slots[slot_id].reloc_count = safe_addu16(out_static_slots[slot_id].reloc_count, 1U); - ; out_static_slots[slot_id].flags |= - get_slot_flag(transfer_mode, cb_enable); + get_slot_flag(transfer_mode, cb_enable, is_dst); } } @@ -567,17 +576,17 @@ static void count_relocs(struct pva_dma_config const *dma_cfg, update_reloc_count(desc->src.slot, desc->src.transfer_mode, desc->src.cb_enable, out_static_slots, num_static_slots, out_dyn_slots, - num_dyn_slots); + num_dyn_slots, false); update_reloc_count(desc->dst.slot, desc->dst.transfer_mode, desc->dst.cb_enable, out_static_slots, num_static_slots, out_dyn_slots, - num_dyn_slots); + num_dyn_slots, true); update_reloc_count(desc->dst2_slot, desc->dst.transfer_mode, desc->dst.cb_enable, out_static_slots, num_static_slots, out_dyn_slots, - num_dyn_slots); + num_dyn_slots, true); } } @@ -867,10 +876,6 @@ void pva_kmd_collect_relocs(struct pva_dma_config const *dma_cfg, uint8_t static_reloc_off[PVA_MAX_NUM_DMA_DESC * 3]; uint8_t dyn_reloc_off[PVA_MAX_NUM_DMA_DESC * 3]; - memset(out_static_slots, 0, - num_static_slots * sizeof(*out_static_slots)); - memset(out_dyn_slots, 0, num_dyn_slots * sizeof(*out_dyn_slots)); - /* First pass: count the number of relocates for each slot */ count_relocs(dma_cfg, out_static_slots, num_static_slots, out_dyn_slots, num_dyn_slots); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c index 0fa3bea5..5b9e6bce 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c @@ -16,42 +16,23 @@ enum pva_error pva_kmd_notify_fw_set_debug_log_level(struct pva_kmd_device *pva, uint32_t log_level) { - struct pva_kmd_submitter *submitter = &pva->submitter; - struct pva_kmd_cmdbuf_builder builder; - struct pva_cmd_set_debug_log_level *cmd; - uint32_t fence_val; - enum pva_error err; + struct pva_cmd_set_debug_log_level cmd = { 0 }; + pva_kmd_set_cmd_set_debug_log_level(&cmd, log_level); - err = pva_kmd_submitter_prepare(submitter, &builder); - if (err != PVA_SUCCESS) { - goto err_out; - } + return pva_kmd_submit_cmd_sync(&pva->submitter, &cmd, sizeof(cmd), + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); +} - cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd)); - ASSERT(cmd != NULL); +enum pva_error pva_kmd_notify_fw_set_profiling_level(struct pva_kmd_device *pva, + uint32_t level) +{ + struct pva_cmd_set_profiling_level cmd = { 0 }; + pva_kmd_set_cmd_set_profiling_level(&cmd, level); - pva_kmd_set_cmd_set_debug_log_level(cmd, log_level); - - err = pva_kmd_submitter_submit(submitter, &builder, &fence_val); - if (err != PVA_SUCCESS) { - pva_kmd_log_err("set debug log level cmd submission failed"); - goto cancel_builder; - } - - err = pva_kmd_submitter_wait(submitter, fence_val, - PVA_KMD_WAIT_FW_POLL_INTERVAL_US, - PVA_KMD_WAIT_FW_TIMEOUT_US); - if (err != PVA_SUCCESS) { - pva_kmd_log_err( - "Waiting for FW timed out when setting debug log level"); - goto err_out; - } - -cancel_builder: - pva_kmd_cmdbuf_builder_cancel(&builder); - -err_out: - return err; + return pva_kmd_submit_cmd_sync(&pva->submitter, &cmd, sizeof(cmd), + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); } void pva_kmd_drain_fw_print(struct pva_kmd_fw_print_buffer *print_buffer) diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.h index b6089a24..1dc3fc1e 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.h @@ -15,6 +15,9 @@ struct pva_kmd_fw_print_buffer { enum pva_error pva_kmd_notify_fw_set_debug_log_level(struct pva_kmd_device *pva, uint32_t log_level); +enum pva_error pva_kmd_notify_fw_set_profiling_level(struct pva_kmd_device *pva, + uint32_t level); + void pva_kmd_drain_fw_print(struct pva_kmd_fw_print_buffer *print_buffer); #endif // PVA_KMD_FW_DEBUG_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c index 7327a291..23156b97 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c @@ -10,6 +10,7 @@ #include "pva_utils.h" #include "pva_kmd_fw_profiler.h" #include "pva_kmd_shared_buffer.h" +#include "pva_api_private.h" // TODO: This is here temporarily just for testing. Should be moved to a common header #define CMD_ID(x) PVA_EXTRACT(x, 6, 0, uint8_t) @@ -101,13 +102,11 @@ void pva_kmd_device_deinit_profiler(struct pva_kmd_device *pva) enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva) { - struct pva_kmd_cmdbuf_builder builder; struct pva_kmd_submitter *dev_submitter = &pva->submitter; - struct pva_cmd_enable_fw_profiling *cmd; + struct pva_cmd_enable_fw_profiling cmd = { 0 }; uint32_t filter = 0U; uint8_t timestamp_type = TIMESTAMP_TYPE_CYCLE_COUNT; - uint32_t fence_val; - enum pva_error err; + enum pva_error err = PVA_SUCCESS; struct pva_kmd_shared_buffer *profiling_buffer = &pva->kmd_fw_buffers[PVA_PRIV_CCQ_ID]; @@ -123,26 +122,14 @@ enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva) return PVA_SUCCESS; } - err = pva_kmd_submitter_prepare(dev_submitter, &builder); - if (err != PVA_SUCCESS) { - goto err_out; - } - cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd)); - ASSERT(cmd != NULL); - pva_kmd_set_cmd_enable_fw_profiling(cmd, filter, timestamp_type); + pva_kmd_set_cmd_enable_fw_profiling(&cmd, filter, timestamp_type); - err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + err = pva_kmd_submit_cmd_sync(dev_submitter, &cmd, sizeof(cmd), + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); if (err != PVA_SUCCESS) { - goto err_out; - } - - err = pva_kmd_submitter_wait(dev_submitter, fence_val, - PVA_KMD_WAIT_FW_POLL_INTERVAL_US, - PVA_KMD_WAIT_FW_TIMEOUT_US); - if (err != PVA_SUCCESS) { - pva_kmd_log_err( - "Waiting for FW timed out when initializing context"); - goto err_out; + pva_kmd_log_err("Failed to submit command"); + goto out; } pva->debugfs_context.g_fw_profiling_config.enabled = true; @@ -155,38 +142,22 @@ enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva) 8 : 4; - return PVA_SUCCESS; -err_out: +out: return err; } enum pva_error pva_kmd_notify_fw_disable_profiling(struct pva_kmd_device *pva) { - struct pva_kmd_cmdbuf_builder builder; - struct pva_kmd_submitter *dev_submitter = &pva->submitter; - struct pva_cmd_disable_fw_profiling *cmd; - uint32_t fence_val; + struct pva_cmd_disable_fw_profiling cmd = { 0 }; enum pva_error err; - err = pva_kmd_submitter_prepare(dev_submitter, &builder); - if (err != PVA_SUCCESS) { - goto err_out; - } - cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd)); - ASSERT(cmd != NULL); - pva_kmd_set_cmd_disable_fw_profiling(cmd); + pva_kmd_set_cmd_disable_fw_profiling(&cmd); - err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + err = pva_kmd_submit_cmd_sync(&pva->submitter, &cmd, sizeof(cmd), + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); if (err != PVA_SUCCESS) { - goto err_out; - } - - err = pva_kmd_submitter_wait(dev_submitter, fence_val, - PVA_KMD_WAIT_FW_POLL_INTERVAL_US, - PVA_KMD_WAIT_FW_TIMEOUT_US); - if (err != PVA_SUCCESS) { - pva_kmd_log_err( - "Waiting for FW timed out when initializing context"); + pva_kmd_log_err("Failed to submit command"); goto err_out; } @@ -194,6 +165,7 @@ enum pva_error pva_kmd_notify_fw_disable_profiling(struct pva_kmd_device *pva) pva->debugfs_context.g_fw_profiling_config.filter = 0x0; return PVA_SUCCESS; + err_out: return err; } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c index d4047b90..780cfd2f 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c @@ -50,7 +50,7 @@ void pva_kmd_handle_hyp_msg(void *pva_dev, uint32_t const *data, uint8_t len) memcpy(abort_msg + 2, &data[1], size); abort_msg[PVA_FW_MSG_ABORT_STR_MAX_LEN] = '\0'; pva_kmd_log_err(abort_msg); - pva_kmd_abort(pva); + pva_kmd_abort_fw(pva); } break; case PVA_FW_MSG_TYPE_FLUSH_PRINT: pva_kmd_drain_fw_print(&pva->fw_print_buffer); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c index 8f881e28..30f20479 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c @@ -86,7 +86,6 @@ pva_kmd_op_memory_register_async(struct pva_kmd_context *ctx, err = PVA_NOMEM; goto err_out; } - if (args->segment == PVA_MEMORY_SEGMENT_R5) { smmu_ctx_id = PVA_R5_SMMU_CONTEXT_ID; } else { @@ -168,8 +167,8 @@ static enum pva_error pva_kmd_op_executable_register_async( } args = (struct pva_ops_executable_register *)input_buffer; - if (args->exec_size + sizeof(struct pva_ops_executable_register) > - size) { + if (args->exec_size > + (size - sizeof(struct pva_ops_executable_register))) { pva_kmd_log_err("Executable register payload size too small"); return PVA_INVAL; } @@ -404,8 +403,10 @@ exit_loop: post_fence->flags |= PVA_FW_POSTFENCE_FLAGS_USER_FENCE; submit_error = pva_kmd_submitter_submit_with_fence( &ctx->submitter, &cmdbuf_builder, post_fence); - ASSERT(submit_error == PVA_SUCCESS); + if (err == PVA_SUCCESS) { + err = submit_error; + } out: return err; } @@ -434,97 +435,14 @@ pva_kmd_op_context_init(struct pva_kmd_context *ctx, const void *input_buffer, err = pva_kmd_context_init(ctx, ctx_init_args->resource_table_capacity); ctx_init_out.error = err; ctx_init_out.ccq_shm_hdl = (uint64_t)ctx->ccq_shm_handle; + ctx_init_out.max_cmdbuf_chunk_size = + pva_kmd_get_max_cmdbuf_chunk_size(ctx->pva); produce_data(out_buffer, &ctx_init_out, sizeof(ctx_init_out)); return PVA_SUCCESS; } -static enum pva_error pva_kmd_op_syncpt_register_async( - struct pva_kmd_context *ctx, const void *input_buffer, - uint32_t input_buffer_size, struct pva_kmd_ops_buffer *out_buffer, - struct pva_kmd_cmdbuf_builder *cmdbuf_builder) -{ - enum pva_error err; - struct pva_syncpt_rw_info *syncpts; - struct pva_kmd_device_memory dev_mem; - uint32_t resource_id = 0; - struct pva_cmd_update_resource_table *update_cmd; - struct pva_resource_entry entry = { 0 }; - struct pva_ops_response_syncpt_register syncpt_register_out = { 0 }; - - if (input_buffer_size != sizeof(struct pva_ops_syncpt_register)) { - pva_kmd_log_err("Syncpt register size is not correct"); - return PVA_INVAL; - } - - if (!access_ok(out_buffer, - sizeof(struct pva_ops_response_syncpt_register))) { - return PVA_INVAL; - } - - /* Register RO syncpts */ - dev_mem.iova = ctx->pva->syncpt_ro_iova; - dev_mem.va = 0; - dev_mem.size = ctx->pva->syncpt_offset * ctx->pva->num_syncpts; - dev_mem.pva = ctx->pva; - dev_mem.smmu_ctx_idx = PVA_R5_SMMU_CONTEXT_ID; - err = pva_kmd_add_syncpt_resource(&ctx->ctx_resource_table, &dev_mem, - &resource_id); - if (err != PVA_SUCCESS) { - goto err_out; - } - syncpt_register_out.syncpt_ro_res_id = resource_id; - syncpt_register_out.num_ro_syncpoints = ctx->pva->num_syncpts; - update_cmd = - pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd)); - ASSERT(update_cmd != NULL); - err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id, - &entry); - ASSERT(err == PVA_SUCCESS); - pva_kmd_set_cmd_update_resource_table( - update_cmd, ctx->resource_table_id, resource_id, &entry); - - /* Register RW syncpts */ - pva_kmd_mutex_lock(&ctx->pva->syncpt_allocator.allocator_lock); - syncpts = (struct pva_syncpt_rw_info *)pva_kmd_get_block_unsafe( - &ctx->pva->syncpt_allocator, ctx->syncpt_block_index); - ASSERT(syncpts != NULL); - - for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS_PER_CONTEXT; i++) { - ctx->syncpt_ids[i] = syncpts[i].syncpt_id; - syncpt_register_out.synpt_ids[i] = syncpts[i].syncpt_id; - } - - dev_mem.iova = syncpts[0].syncpt_iova; - pva_kmd_mutex_unlock(&ctx->pva->syncpt_allocator.allocator_lock); - dev_mem.va = 0; - dev_mem.size = ctx->pva->syncpt_offset * PVA_NUM_RW_SYNCPTS_PER_CONTEXT; - dev_mem.pva = ctx->pva; - dev_mem.smmu_ctx_idx = PVA_R5_SMMU_CONTEXT_ID; - err = pva_kmd_add_syncpt_resource(&ctx->ctx_resource_table, &dev_mem, - &resource_id); - if (err != PVA_SUCCESS) { - goto err_out; - } - syncpt_register_out.syncpt_rw_res_id = resource_id; - syncpt_register_out.synpt_size = ctx->pva->syncpt_offset; - update_cmd = - pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd)); - ASSERT(update_cmd != NULL); - err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id, - &entry); - ASSERT(err == PVA_SUCCESS); - pva_kmd_set_cmd_update_resource_table( - update_cmd, ctx->resource_table_id, resource_id, &entry); - -err_out: - syncpt_register_out.error = err; - produce_data(out_buffer, &syncpt_register_out, - sizeof(syncpt_register_out)); - return PVA_SUCCESS; -} - static enum pva_error pva_kmd_op_queue_create(struct pva_kmd_context *ctx, const void *input_buffer, uint32_t input_buffer_size, @@ -532,6 +450,7 @@ pva_kmd_op_queue_create(struct pva_kmd_context *ctx, const void *input_buffer, { const struct pva_ops_queue_create *queue_create_args; struct pva_ops_response_queue_create queue_out_args = { 0 }; + const struct pva_syncpt_rw_info *syncpt_info; uint32_t queue_id = PVA_INVALID_QUEUE_ID; enum pva_error err = PVA_SUCCESS; @@ -553,10 +472,12 @@ pva_kmd_op_queue_create(struct pva_kmd_context *ctx, const void *input_buffer, goto out; } + syncpt_info = pva_kmd_queue_get_rw_syncpt_info(ctx, queue_id); queue_out_args.error = err; queue_out_args.queue_id = queue_id; - pva_kmd_read_syncpt_val(ctx->pva, ctx->syncpt_ids[queue_id], - &queue_out_args.syncpt_fence_counter); + queue_out_args.syncpt_id = syncpt_info->syncpt_id; + pva_kmd_read_syncpt_val(ctx->pva, syncpt_info->syncpt_id, + &queue_out_args.syncpt_current_value); out: produce_data(out_buffer, &queue_out_args, @@ -687,15 +608,16 @@ pva_kmd_op_synced_submit(struct pva_kmd_context *ctx, const void *input_buffer, err = pva_kmd_submitter_submit(&ctx->submitter, &cmdbuf_builder, &fence_val); - /* TODO: handle this error */ - ASSERT(err == PVA_SUCCESS); + if (err != PVA_SUCCESS) { + goto cancel_submit; + } err = pva_kmd_submitter_wait(&ctx->submitter, fence_val, PVA_KMD_WAIT_FW_POLL_INTERVAL_US, PVA_KMD_WAIT_FW_TIMEOUT_US); if (err != PVA_SUCCESS) { - goto err_out; + goto cancel_submit; } return PVA_SUCCESS; @@ -758,11 +680,6 @@ pva_kmd_sync_ops_handler(struct pva_kmd_context *ctx, ctx, input_buffer, input_buffer_size, out_arg, pva_kmd_op_memory_register_async); break; - case PVA_OPS_OPCODE_SYNCPT_REGISTER: - err = pva_kmd_op_synced_submit( - ctx, input_buffer, input_buffer_size, out_arg, - pva_kmd_op_syncpt_register_async); - break; case PVA_OPS_OPCODE_EXECUTABLE_REGISTER: err = pva_kmd_op_synced_submit( ctx, input_buffer, input_buffer_size, out_arg, @@ -798,11 +715,6 @@ enum pva_error pva_kmd_ops_handler(struct pva_kmd_context *ctx, struct pva_kmd_ops_buffer in_buffer = { 0 }, out_buffer = { 0 }; enum pva_error err = PVA_SUCCESS; - if (ctx->pva->recovery) { - pva_kmd_log_err("PVA firmware aborted. No KMD ops allowed."); - return PVA_ERR_FW_ABORTED; - } - in_buffer.base = ops_buffer; in_buffer.size = ops_size; diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.c index 4f769e63..008b2917 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + #include "pva_kmd_utils.h" #include "pva_fw.h" #include "pva_kmd_device_memory.h" @@ -14,11 +15,8 @@ enum pva_error pva_kmd_prepare_suspend(struct pva_kmd_device *pva) { - struct pva_kmd_cmdbuf_builder builder; - struct pva_kmd_submitter *dev_submitter = &pva->submitter; enum pva_error err = PVA_SUCCESS; - struct pva_cmd_suspend_fw *fw_suspend; - uint32_t fence_val; + struct pva_cmd_suspend_fw cmd = { 0 }; pva_kmd_mutex_lock(&pva->powercycle_lock); if (pva->refcount == 0u) { @@ -27,44 +25,16 @@ enum pva_error pva_kmd_prepare_suspend(struct pva_kmd_device *pva) goto err_out; } - err = pva_kmd_submitter_prepare(dev_submitter, &builder); + pva_kmd_set_cmd_suspend_fw(&cmd); + + err = pva_kmd_submit_cmd_sync(&pva->submitter, &cmd, sizeof(cmd), + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); if (err != PVA_SUCCESS) { - pva_kmd_log_err( - "PVA: Prepare submitter for FW suspend command failed\n"); + pva_kmd_log_err("PVA: Failed to submit FW suspend command\n"); goto err_out; } - //Build args - fw_suspend = pva_kmd_reserve_cmd_space(&builder, sizeof(*fw_suspend)); - if (fw_suspend == NULL) { - pva_kmd_log_err( - "PVA: Memory alloc for FW suspend command failed\n"); - err = PVA_NOMEM; - goto cancel_submit; - } - - pva_kmd_set_cmd_suspend_fw(fw_suspend); - - //Submit - err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); - if (err != PVA_SUCCESS) { - pva_kmd_log_err( - "PVA: Submission for FW suspend command failed\n"); - goto cancel_submit; - } - - err = pva_kmd_submitter_wait(dev_submitter, fence_val, - PVA_KMD_WAIT_FW_POLL_INTERVAL_US, - PVA_KMD_WAIT_FW_TIMEOUT_US); - if (err != PVA_SUCCESS) { - pva_kmd_log_err( - "PVA: Waiting for FW timed out when preparing for suspend state\n"); - goto err_out; - } - -cancel_submit: - pva_kmd_cmdbuf_builder_cancel(&builder); - err_out: pva_kmd_mutex_unlock(&pva->powercycle_lock); return err; @@ -77,9 +47,11 @@ enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva) struct pva_cmd_init_resource_table *res_cmd; struct pva_cmd_init_queue *queue_cmd; struct pva_cmd_resume_fw *fw_resume; + struct pva_cmd_init_shared_dram_buffer *shared_buf_cmd; enum pva_error err; uint32_t fence_val; struct pva_kmd_queue *queue; + const struct pva_syncpt_rw_info *syncpt_info; pva_kmd_mutex_lock(&pva->powercycle_lock); if (pva->refcount == 0u) { @@ -89,8 +61,10 @@ enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva) goto err_out; } - pva_kmd_send_resource_table_info_by_ccq(pva, &pva->dev_resource_table); - pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue); + err = pva_kmd_config_fw_after_boot(pva); + if (err != PVA_SUCCESS) { + goto err_out; + } err = pva_kmd_submitter_prepare(dev_submitter, &builder); if (err != PVA_SUCCESS) { @@ -140,14 +114,38 @@ enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva) goto cancel_builder; } + /* Initialize shared buffer */ + shared_buf_cmd = pva_kmd_reserve_cmd_space( + &builder, sizeof(*shared_buf_cmd)); + if (shared_buf_cmd == NULL) { + pva_kmd_log_err( + "PVA: Memory alloc for shared buffer registration in FW resume command failed\n"); + err = PVA_NOMEM; + goto cancel_builder; + } + + pva_dbg_printf( + "PVA: Resume shared buffer for context %d\n", + ctx->ccq_id); + pva_kmd_set_cmd_init_shared_dram_buffer( + shared_buf_cmd, ctx->ccq_id, + pva->kmd_fw_buffers[ctx->ccq_id] + .resource_memory->iova, + pva->kmd_fw_buffers[ctx->ccq_id] + .resource_memory->size); + pva_dbg_printf( "PVA: Resume priv queue for context %d\n", ctx->ccq_id); + syncpt_info = pva_kmd_queue_get_rw_syncpt_info( + PVA_PRIV_CCQ_ID, ctx->ccq_id); pva_kmd_set_cmd_init_queue( queue_cmd, PVA_PRIV_CCQ_ID, ctx->ccq_id, /* For privileged queues, queue ID == user CCQ ID*/ ctx->ctx_queue.queue_memory->iova, - ctx->ctx_queue.max_num_submit); + ctx->ctx_queue.max_num_submit, + syncpt_info->syncpt_id, + syncpt_info->syncpt_iova); /**Initialize resource table */ for (uint32_t j = 0; j < ctx->max_n_queues; j++) { @@ -168,11 +166,16 @@ enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva) goto cancel_builder; } + syncpt_info = + pva_kmd_queue_get_rw_syncpt_info( + ctx, queue->queue_id); pva_kmd_set_cmd_init_queue( queue_cmd, queue->ccq_id, queue->queue_id, queue->queue_memory->iova, - queue->max_num_submit); + queue->max_num_submit, + syncpt_info->syncpt_id, + syncpt_info->syncpt_iova); } pva_kmd_mutex_unlock( &ctx->queue_allocator.allocator_lock); @@ -194,9 +197,12 @@ enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva) if (err != PVA_SUCCESS) { pva_kmd_log_err( "Waiting for FW timed out when resuming from suspend state"); - goto err_out; + goto cancel_builder; } + pva_kmd_mutex_unlock(&pva->powercycle_lock); + return PVA_SUCCESS; + cancel_builder: pva_kmd_cmdbuf_builder_cancel(&builder); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c index d579acb5..c7ce374a 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +#include "pva_constants.h" #include "pva_kmd.h" #include "pva_kmd_utils.h" #include "pva_fw.h" @@ -74,48 +75,23 @@ pva_kmd_queue_submit(struct pva_kmd_queue *queue, return err; } - -void pva_kmd_queue_deinit(struct pva_kmd_queue *queue) -{ - queue->queue_memory = NULL; - queue->ccq_id = PVA_INVALID_QUEUE_ID; - queue->max_num_submit = 0; -} - static enum pva_error notify_fw_queue_deinit(struct pva_kmd_context *ctx, struct pva_kmd_queue *queue) { - enum pva_error err = PVA_SUCCESS; - struct pva_kmd_cmdbuf_builder builder; - struct pva_cmd_deinit_queue *queue_cmd; - uint32_t fence_val; + struct pva_cmd_deinit_queue cmd = { 0 }; + enum pva_error err; - err = pva_kmd_submitter_prepare(&ctx->submitter, &builder); + pva_kmd_set_cmd_deinit_queue(&cmd, queue->ccq_id, queue->queue_id); + + err = pva_kmd_submit_cmd_sync(&ctx->submitter, &cmd, sizeof(cmd), + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); if (err != PVA_SUCCESS) { goto end; } - queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd)); - if (queue_cmd == NULL) { - err = PVA_NOMEM; - goto cancel_submitter; - } - pva_kmd_set_cmd_deinit_queue(queue_cmd, queue->ccq_id, queue->queue_id); - - err = pva_kmd_submitter_submit(&ctx->submitter, &builder, &fence_val); - if (err != PVA_SUCCESS) { - goto cancel_submitter; - } - - err = pva_kmd_submitter_wait(&ctx->submitter, fence_val, - PVA_KMD_WAIT_FW_POLL_INTERVAL_US, - PVA_KMD_WAIT_FW_TIMEOUT_US); - if (err != PVA_SUCCESS) { - goto end; - } return PVA_SUCCESS; -cancel_submitter: - pva_kmd_cmdbuf_builder_cancel(&builder); + end: return err; } @@ -126,10 +102,9 @@ enum pva_error pva_kmd_queue_create(struct pva_kmd_context *ctx, { struct pva_kmd_device_memory *submission_mem_kmd = NULL; struct pva_kmd_queue *queue = NULL; - struct pva_kmd_cmdbuf_builder builder; - struct pva_cmd_init_queue *queue_cmd; - uint32_t fence_val; + struct pva_cmd_init_queue cmd = { 0 }; enum pva_error err, tmperr; + const struct pva_syncpt_rw_info *syncpt_info; queue = pva_kmd_zalloc_block(&ctx->queue_allocator, queue_id); if (queue == NULL) { @@ -160,42 +135,26 @@ enum pva_error pva_kmd_queue_create(struct pva_kmd_context *ctx, goto err_free_kmd_memory; } - err = pva_kmd_submitter_prepare(&ctx->submitter, &builder); + syncpt_info = pva_kmd_queue_get_rw_syncpt_info(ctx, queue->queue_id); + pva_kmd_set_cmd_init_queue(&cmd, queue->ccq_id, queue->queue_id, + queue->queue_memory->iova, + queue->max_num_submit, + syncpt_info->syncpt_id, + syncpt_info->syncpt_iova); + + err = pva_kmd_submit_cmd_sync(&ctx->submitter, &cmd, sizeof(cmd), + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); if (err != PVA_SUCCESS) { goto unmap_iova; } - queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd)); - if (queue_cmd == NULL) { - err = PVA_NOMEM; - goto cancel_submitter; - } - ASSERT(queue_cmd != NULL); - pva_kmd_set_cmd_init_queue(queue_cmd, queue->ccq_id, queue->queue_id, - queue->queue_memory->iova, - queue->max_num_submit); - - err = pva_kmd_submitter_submit(&ctx->submitter, &builder, &fence_val); - if (err != PVA_SUCCESS) { - goto cancel_submitter; - } - - err = pva_kmd_submitter_wait(&ctx->submitter, fence_val, - PVA_KMD_WAIT_FW_POLL_INTERVAL_US, - PVA_KMD_WAIT_FW_TIMEOUT_US); - if (err != PVA_SUCCESS) { - goto cancel_submitter; - } - return PVA_SUCCESS; -cancel_submitter: - pva_kmd_cmdbuf_builder_cancel(&builder); unmap_iova: pva_kmd_device_memory_iova_unmap(submission_mem_kmd); err_free_kmd_memory: pva_kmd_device_memory_free(queue->queue_memory); - pva_kmd_queue_deinit(queue); err_free_queue: tmperr = pva_kmd_free_block(&ctx->queue_allocator, *queue_id); ASSERT(tmperr == PVA_SUCCESS); @@ -210,35 +169,40 @@ enum pva_error pva_kmd_queue_destroy(struct pva_kmd_context *ctx, { struct pva_kmd_queue *queue; enum pva_error err = PVA_SUCCESS; + enum pva_error tmp_err; - /* - * TODO : - * Send command to FW to stop queue usage. Wait for ack. - * This call needs to be added after syncpoint and ccq functions are ready. - */ pva_kmd_mutex_lock(&ctx->queue_allocator.allocator_lock); queue = pva_kmd_get_block_unsafe(&ctx->queue_allocator, queue_id); if (queue == NULL) { - pva_kmd_mutex_unlock(&ctx->queue_allocator.allocator_lock); - return PVA_INVAL; + pva_kmd_log_err("Destroying non-existent queue"); + err = PVA_INVAL; + goto unlock; } - if (!ctx->pva->recovery) { - err = notify_fw_queue_deinit(ctx, queue); - if (err != PVA_SUCCESS) { - pva_kmd_mutex_unlock( - &ctx->queue_allocator.allocator_lock); - return err; - } + + err = notify_fw_queue_deinit(ctx, queue); + if (err != PVA_SUCCESS) { + //Might happen if FW is aborted. It's safe to keep going. + pva_kmd_log_err("Failed to notify FW to destroy queue"); } pva_kmd_device_memory_iova_unmap(queue->queue_memory); - pva_kmd_device_memory_free(queue->queue_memory); - - pva_kmd_queue_deinit(queue); + tmp_err = pva_kmd_free_block_unsafe(&ctx->queue_allocator, queue_id); + // This cannot fail as we have already checked for queue existence and we + // are still holding the lock + ASSERT(tmp_err == PVA_SUCCESS); +unlock: pva_kmd_mutex_unlock(&ctx->queue_allocator.allocator_lock); - - err = pva_kmd_free_block(&ctx->queue_allocator, queue_id); - ASSERT(err == PVA_SUCCESS); - return PVA_SUCCESS; + return err; +} + +const struct pva_syncpt_rw_info * +pva_kmd_queue_get_rw_syncpt_info(struct pva_kmd_context *ctx, uint8_t queue_id) +{ + uint8_t ctx_offset = + safe_mulu32(ctx->ccq_id, PVA_NUM_RW_SYNCPTS_PER_CONTEXT); + uint32_t syncpt_index = safe_addu32(ctx_offset, queue_id); + + ASSERT(syncpt_index < PVA_NUM_RW_SYNCPTS); + return &ctx->pva->rw_syncpts[syncpt_index]; } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h index 0861a557..dbcdac32 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h @@ -33,6 +33,8 @@ enum pva_error pva_kmd_queue_submit(struct pva_kmd_queue *queue, struct pva_fw_cmdbuf_submit_info const *submit_info); uint32_t pva_kmd_queue_space(struct pva_kmd_queue *queue); -void pva_kmd_queue_deinit(struct pva_kmd_queue *queue); + +const struct pva_syncpt_rw_info * +pva_kmd_queue_get_rw_syncpt_info(struct pva_kmd_context *ctx, uint8_t queue_id); #endif // PVA_KMD_QUEUE_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c index 83d97010..3ab30a93 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c @@ -46,8 +46,7 @@ static uint32_t get_max_dma_config_size(struct pva_kmd_device *pva) enum pva_error pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table, struct pva_kmd_device *pva, - uint8_t user_smmu_ctx_id, uint32_t n_entries, - uint32_t max_num_dma_configs) + uint8_t user_smmu_ctx_id, uint32_t n_entries) { uint32_t max_dma_config_size = get_max_dma_config_size(pva); enum pva_error err; @@ -56,45 +55,55 @@ pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table, res_table->pva = pva; res_table->n_entries = n_entries; res_table->user_smmu_ctx_id = user_smmu_ctx_id; + pva_kmd_sema_init(&res_table->resource_semaphore, n_entries); + pva_kmd_mutex_init(&res_table->resource_table_lock); size = (uint64_t)safe_mulu32( n_entries, (uint32_t)sizeof(struct pva_resource_entry)); res_table->table_mem = pva_kmd_device_memory_alloc_map( size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID); - ASSERT(res_table->table_mem != NULL); - - pva_kmd_sema_init(&res_table->resource_semaphore, n_entries); - pva_kmd_mutex_init(&res_table->resource_table_lock); + if (res_table->table_mem == NULL) { + err = PVA_NOMEM; + goto deinit_locks; + } size = (uint64_t)safe_mulu32(sizeof(struct pva_kmd_resource_record), n_entries); res_table->records_mem = pva_kmd_zalloc(size); - ASSERT(res_table->records_mem != NULL); + if (res_table->records_mem == NULL) { + err = PVA_NOMEM; + goto free_table_mem; + } err = pva_kmd_block_allocator_init( &res_table->resource_record_allocator, res_table->records_mem, PVA_RESOURCE_ID_BASE, sizeof(struct pva_kmd_resource_record), n_entries); - ASSERT(err == PVA_SUCCESS); + if (err != PVA_SUCCESS) { + goto free_records_mem; + } - size = (uint64_t)safe_mulu32(max_num_dma_configs, max_dma_config_size); - res_table->dma_config_mem = pva_kmd_device_memory_alloc_map( - size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID); - ASSERT(res_table->dma_config_mem != NULL); - - err = pva_kmd_block_allocator_init(&res_table->dma_config_allocator, - res_table->dma_config_mem->va, 0, - max_dma_config_size, - max_num_dma_configs); - ASSERT(err == PVA_SUCCESS); - - res_table->dma_aux = pva_kmd_zalloc( - safe_mulu32((uint32_t)sizeof(struct pva_kmd_dma_resource_aux), - max_num_dma_configs)); - ASSERT(res_table->dma_aux != NULL); + err = pva_kmd_devmem_pool_init(&res_table->dma_config_pool, pva, + PVA_R5_SMMU_CONTEXT_ID, + max_dma_config_size, + PVA_KMD_DMA_CONFIG_POOL_INCR); + if (err != PVA_SUCCESS) { + goto free_resource_record_allocator; + } return PVA_SUCCESS; + +free_resource_record_allocator: + pva_kmd_block_allocator_deinit(&res_table->resource_record_allocator); +free_records_mem: + pva_kmd_free(res_table->records_mem); +free_table_mem: + pva_kmd_device_memory_free(res_table->table_mem); +deinit_locks: + pva_kmd_mutex_deinit(&res_table->resource_table_lock); + pva_kmd_sema_deinit(&res_table->resource_semaphore); + return err; } static struct pva_kmd_resource_record * @@ -118,7 +127,7 @@ pva_kmd_alloc_resource_id(struct pva_kmd_resource_table *resource_table, goto out; } - rec = (struct pva_kmd_resource_record *)pva_kmd_alloc_block( + rec = (struct pva_kmd_resource_record *)pva_kmd_zalloc_block( &resource_table->resource_record_allocator, out_resource_id); ASSERT(rec != NULL); @@ -141,9 +150,8 @@ pva_kmd_free_resource_id(struct pva_kmd_resource_table *resource_table, static void pva_kmd_release_resource(struct pva_kmd_resource_table *resource_table, - uint32_t resource_id) + uint32_t resource_id, bool drop_dma_reference) { - enum pva_error err; struct pva_kmd_resource_record *rec = pva_kmd_get_block_unsafe( &resource_table->resource_record_allocator, resource_id); @@ -151,9 +159,7 @@ pva_kmd_release_resource(struct pva_kmd_resource_table *resource_table, switch (rec->type) { case PVA_RESOURCE_TYPE_DRAM: - if (rec->dram.syncpt != true) { - pva_kmd_device_memory_free(rec->dram.mem); - } + pva_kmd_device_memory_free(rec->dram.mem); break; case PVA_RESOURCE_TYPE_EXEC_BIN: pva_kmd_unload_executable(&rec->vpu_bin.symbol_table, @@ -161,12 +167,12 @@ pva_kmd_release_resource(struct pva_kmd_resource_table *resource_table, rec->vpu_bin.sections_mem); break; case PVA_RESOURCE_TYPE_DMA_CONFIG: { - struct pva_kmd_dma_resource_aux *dma_aux; - dma_aux = &resource_table->dma_aux[rec->dma_config.block_index]; - pva_kmd_unload_dma_config_unsafe(dma_aux); - err = pva_kmd_free_block(&resource_table->dma_config_allocator, - rec->dma_config.block_index); - ASSERT(err == PVA_SUCCESS); + if (drop_dma_reference) { + pva_kmd_unload_dma_config_unsafe( + rec->dma_config.aux_mem); + } + pva_kmd_free(rec->dma_config.aux_mem); + pva_kmd_devmem_pool_free(&rec->dma_config.devmem); break; } @@ -177,33 +183,6 @@ pva_kmd_release_resource(struct pva_kmd_resource_table *resource_table, pva_kmd_free_resource_id(resource_table, resource_id); } -enum pva_error -pva_kmd_add_syncpt_resource(struct pva_kmd_resource_table *resource_table, - struct pva_kmd_device_memory *dev_mem, - uint32_t *out_resource_id) -{ - struct pva_kmd_resource_record *rec = - pva_kmd_alloc_resource_id(resource_table, out_resource_id); - - if (rec == NULL) { - pva_kmd_log_err("No more resource id"); - return PVA_NO_RESOURCE_ID; - } - - pva_kmd_mutex_lock(&resource_table->resource_table_lock); - if (*out_resource_id > resource_table->curr_max_resource_id) { - resource_table->curr_max_resource_id = *out_resource_id; - } - pva_kmd_mutex_unlock(&resource_table->resource_table_lock); - - rec->type = PVA_RESOURCE_TYPE_DRAM; - rec->dram.mem = dev_mem; - rec->dram.syncpt = true; - rec->ref_count = 1; - - return PVA_SUCCESS; -} - enum pva_error pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table, struct pva_kmd_device_memory *dev_mem, @@ -225,7 +204,6 @@ pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table, rec->type = PVA_RESOURCE_TYPE_DRAM; rec->dram.mem = dev_mem; - rec->dram.syncpt = false; rec->ref_count = 1; return PVA_SUCCESS; @@ -271,6 +249,7 @@ void pva_kmd_update_fw_resource_table(struct pva_kmd_resource_table *res_table) entry->size_lo = iova_lo(rec->dram.mem->size); entry->size_hi = iova_hi(rec->dram.mem->size); entry->smmu_context_id = rec->dram.mem->smmu_ctx_idx; + entry->access_flags = rec->dram.mem->iova_access_flags; break; case PVA_RESOURCE_TYPE_INVALID: break; @@ -349,7 +328,7 @@ void pva_kmd_drop_resource_unsafe(struct pva_kmd_resource_table *resource_table, rec->ref_count = safe_subu32(rec->ref_count, 1U); if (rec->ref_count == 0) { - pva_kmd_release_resource(resource_table, resource_id); + pva_kmd_release_resource(resource_table, resource_id, true); } } @@ -414,6 +393,7 @@ pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table, entry->size_lo = iova_lo(rec->dram.mem->size); entry->size_hi = iova_hi(rec->dram.mem->size); entry->smmu_context_id = rec->dram.mem->smmu_ctx_idx; + entry->access_flags = rec->dram.mem->iova_access_flags; break; case PVA_RESOURCE_TYPE_EXEC_BIN: entry->type = rec->type; @@ -423,6 +403,7 @@ pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table, entry->size_hi = iova_hi(rec->vpu_bin.metainfo_mem->size); entry->smmu_context_id = rec->vpu_bin.metainfo_mem->smmu_ctx_idx; + entry->access_flags = PVA_ACCESS_RO; break; case PVA_RESOURCE_TYPE_DMA_CONFIG: entry->type = rec->type; @@ -431,6 +412,7 @@ pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table, entry->size_lo = iova_lo(rec->dma_config.size); entry->size_hi = iova_hi(rec->dma_config.size); entry->smmu_context_id = PVA_R5_SMMU_CONTEXT_ID; + entry->access_flags = PVA_ACCESS_RO; break; default: pva_kmd_log_err("Unsupported resource type"); @@ -447,24 +429,30 @@ enum pva_error pva_kmd_add_dma_config_resource( uint32_t dma_config_size, uint32_t *out_resource_id) { enum pva_error err = PVA_SUCCESS; - uint32_t block_idx, fw_fetch_size; + uint32_t fw_fetch_size; void *fw_dma_cfg; struct pva_kmd_dma_resource_aux *dma_aux; struct pva_kmd_resource_record *rec; uint32_t res_id; + struct pva_kmd_devmem_element dma_cfg_mem = { 0 }; - fw_dma_cfg = pva_kmd_zalloc_block(&resource_table->dma_config_allocator, - &block_idx); - if (fw_dma_cfg == NULL) { - err = PVA_NOMEM; + err = pva_kmd_devmem_pool_zalloc(&resource_table->dma_config_pool, + &dma_cfg_mem); + if (err != PVA_SUCCESS) { goto err_out; } + fw_dma_cfg = pva_kmd_get_devmem_va(&dma_cfg_mem); // Must satisfy alignment requirement for converting to struct // pva_dma_config_resource* ASSERT(((uintptr_t)fw_dma_cfg) % sizeof(uint64_t) == 0); - dma_aux = &resource_table->dma_aux[block_idx]; + dma_aux = pva_kmd_zalloc(sizeof(struct pva_kmd_dma_resource_aux)); + if (dma_aux == NULL) { + err = PVA_NOMEM; + goto free_dma_cfg_mem; + } + dma_aux->res_table = resource_table; pva_kmd_mutex_lock(&resource_table->resource_table_lock); err = pva_kmd_load_dma_config(resource_table, dma_cfg_hdr, @@ -472,7 +460,7 @@ enum pva_error pva_kmd_add_dma_config_resource( &fw_fetch_size); pva_kmd_mutex_unlock(&resource_table->resource_table_lock); if (err != PVA_SUCCESS) { - goto free_block; + goto free_dma_aux; } rec = pva_kmd_alloc_resource_id(resource_table, &res_id); @@ -489,12 +477,9 @@ enum pva_error pva_kmd_add_dma_config_resource( rec->type = PVA_RESOURCE_TYPE_DMA_CONFIG; rec->ref_count = 1; - rec->dma_config.block_index = block_idx; - rec->dma_config.iova_addr = safe_addu64( - resource_table->dma_config_mem->iova, - (uint64_t)safe_mulu32( - block_idx, - resource_table->dma_config_allocator.block_size)); + rec->dma_config.devmem = dma_cfg_mem; + rec->dma_config.aux_mem = dma_aux; + rec->dma_config.iova_addr = pva_kmd_get_devmem_iova(&dma_cfg_mem); rec->dma_config.size = fw_fetch_size; *out_resource_id = res_id; @@ -504,8 +489,10 @@ unload_dma: pva_kmd_mutex_lock(&resource_table->resource_table_lock); pva_kmd_unload_dma_config_unsafe(dma_aux); pva_kmd_mutex_unlock(&resource_table->resource_table_lock); -free_block: - pva_kmd_free_block(&resource_table->dma_config_allocator, block_idx); +free_dma_aux: + pva_kmd_free(dma_aux); +free_dma_cfg_mem: + pva_kmd_devmem_pool_free(&dma_cfg_mem); err_out: return err; } @@ -523,7 +510,7 @@ pva_kmd_release_all_resources(struct pva_kmd_resource_table *res_table) struct pva_kmd_resource_record *rec = pva_kmd_peek_resource(res_table, id); if (rec != NULL) { - pva_kmd_release_resource(res_table, id); + pva_kmd_release_resource(res_table, id, false); } } pva_kmd_mutex_unlock(&res_table->resource_table_lock); @@ -533,11 +520,9 @@ pva_kmd_release_all_resources(struct pva_kmd_resource_table *res_table) void pva_kmd_resource_table_deinit(struct pva_kmd_resource_table *res_table) { pva_kmd_release_all_resources(res_table); - pva_kmd_free(res_table->dma_aux); - pva_kmd_block_allocator_deinit(&res_table->dma_config_allocator); - pva_kmd_device_memory_free(res_table->dma_config_mem); pva_kmd_block_allocator_deinit(&res_table->resource_record_allocator); pva_kmd_free(res_table->records_mem); + pva_kmd_devmem_pool_deinit(&res_table->dma_config_pool); pva_kmd_mutex_deinit(&res_table->resource_table_lock); pva_kmd_sema_deinit(&res_table->resource_semaphore); pva_kmd_device_memory_free(res_table->table_mem); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h index 957257a3..86511019 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h @@ -14,12 +14,12 @@ #include "pva_kmd_dma_cfg.h" #include "pva_kmd_mutex.h" #include "pva_kmd_thread_sema.h" +#include "pva_kmd_devmem_pool.h" struct pva_kmd_device; struct pva_kmd_dram_resource { struct pva_kmd_device_memory *mem; - bool syncpt; }; struct pva_kmd_vpu_bin_resource { @@ -29,7 +29,8 @@ struct pva_kmd_vpu_bin_resource { }; struct pva_kmd_dma_config_resource { - uint32_t block_index; + struct pva_kmd_devmem_element devmem; + struct pva_kmd_dma_resource_aux *aux_mem; uint64_t size; uint64_t iova_addr; }; @@ -70,13 +71,8 @@ struct pva_kmd_resource_table { /** Memory for resource table entries, in R5 segment */ struct pva_kmd_device_memory *table_mem; - /** Memory for fw dma configs, in DMA segment */ - struct pva_kmd_device_memory *dma_config_mem; - struct pva_kmd_block_allocator dma_config_allocator; - - /** Memory for tracking resources used by DMA configuration. Single - * allocation shared by all DMA configs */ - struct pva_kmd_dma_resource_aux *dma_aux; + /** Pool for FW DMA configurations */ + struct pva_kmd_devmem_pool dma_config_pool; /** Memory for resource records */ void *records_mem; @@ -88,8 +84,7 @@ struct pva_kmd_resource_table { enum pva_error pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table, struct pva_kmd_device *pva, - uint8_t user_smmu_ctx_id, uint32_t n_entries, - uint32_t max_num_dma_configs); + uint8_t user_smmu_ctx_id, uint32_t n_entries); void pva_kmd_resource_table_deinit(struct pva_kmd_resource_table *res_table); /** KMD only writes to FW resource table during init time. Once the address of @@ -97,11 +92,6 @@ void pva_kmd_resource_table_deinit(struct pva_kmd_resource_table *res_table); */ void pva_kmd_update_fw_resource_table(struct pva_kmd_resource_table *res_table); -enum pva_error -pva_kmd_add_syncpt_resource(struct pva_kmd_resource_table *resource_table, - struct pva_kmd_device_memory *dev_mem, - uint32_t *out_resource_id); - enum pva_error pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table, struct pva_kmd_device_memory *memory, diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.c index f0e91ac5..57264b97 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.c @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. #include "pva_kmd_sha256.h" +#include "pva_math_utils.h" #define ROTLEFT(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) #define ROTRIGHT(a, b) (((a) >> (b)) | ((a) << (32 - (b)))) @@ -58,9 +59,11 @@ static void sha256_transform(struct sha256_ctx *ctx, const void *data_in) m[i] = SWAP32(data[i]); } for (i = 0; i < U32(64) - U32(16); ++i) { - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - m[i + U32(16)] = SIG1(m[U32(14) + i]) + m[U32(9) + i] + - SIG0(m[U32(1) + i]) + m[i]; + m[i + U32(16)] = safe_wrap_add_u32( + safe_wrap_add_u32(safe_wrap_add_u32(SIG1(m[U32(14) + i]), + m[U32(9) + i]), + SIG0(m[U32(1) + i])), + m[i]); } a = ctx->state[0]; @@ -73,38 +76,32 @@ static void sha256_transform(struct sha256_ctx *ctx, const void *data_in) h = ctx->state[7]; for (i = 0; i < U32(64); ++i) { - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - t1 = h + SHA_EP1(e) + CH(e, f, g) + k[i] + m[i]; - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - t2 = SHA_EP0(a) + MAJ(a, b, c); + t1 = safe_wrap_add_u32( + safe_wrap_add_u32( + safe_wrap_add_u32(safe_wrap_add_u32(h, + SHA_EP1(e)), + CH(e, f, g)), + k[i]), + m[i]); + t2 = safe_wrap_add_u32(SHA_EP0(a), MAJ(a, b, c)); h = g; g = f; f = e; - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - e = d + t1; + e = safe_wrap_add_u32(d, t1); d = c; c = b; b = a; - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - a = t1 + t2; + a = safe_wrap_add_u32(t1, t2); } - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - ctx->state[0] += a; - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - ctx->state[1] += b; - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - ctx->state[2] += c; - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - ctx->state[3] += d; - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - ctx->state[4] += e; - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - ctx->state[5] += f; - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - ctx->state[6] += g; - /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ - ctx->state[7] += h; + ctx->state[0] = safe_wrap_add_u32(ctx->state[0], a); + ctx->state[1] = safe_wrap_add_u32(ctx->state[1], b); + ctx->state[2] = safe_wrap_add_u32(ctx->state[2], c); + ctx->state[3] = safe_wrap_add_u32(ctx->state[3], d); + ctx->state[4] = safe_wrap_add_u32(ctx->state[4], e); + ctx->state[5] = safe_wrap_add_u32(ctx->state[5], f); + ctx->state[6] = safe_wrap_add_u32(ctx->state[6], g); + ctx->state[7] = safe_wrap_add_u32(ctx->state[7], h); } void sha256_init(struct sha256_ctx *ctx) @@ -127,7 +124,8 @@ void sha256_update(struct sha256_ctx *ctx, const void *data, size_t len) for (i = 0; i < len; i += U32(64)) { ctx->bitlen &= U32(0xffffffff); sha256_transform(ctx, ((const uint8_t *)data) + i); - ctx->bitlen += U32(512); + ctx->bitlen = + safe_wrap_add_u32((uint32_t)ctx->bitlen, U32(512)); } } @@ -148,7 +146,9 @@ void sha256_finalize(struct sha256_ctx *ctx, const void *input, /* the false of this condition is illegal for this API agreement */ /* this check is here only for Coverity INT30-C */ - ctx->bitlen += input_size * U32(8); + ctx->bitlen = safe_wrap_add_u32((uint32_t)ctx->bitlen, + safe_wrap_mul_u32((uint32_t)input_size, + U32(8))); (void)memcpy(p, input, input_size); data[input_size] = 0x80; diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c index 5a5e03ab..25ef40f4 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c @@ -7,82 +7,6 @@ #include "pva_kmd_shim_trace_event.h" #include "pva_kmd_shared_buffer.h" -static void -setup_cmd_init_shared_dram_buffer(void *cmd, uint8_t interface, - struct pva_kmd_shared_buffer *fw_buffer) -{ - struct pva_cmd_init_shared_dram_buffer *init_cmd = - (struct pva_cmd_init_shared_dram_buffer *)cmd; - - pva_kmd_set_cmd_init_shared_dram_buffer( - init_cmd, interface, fw_buffer->resource_memory->iova, - fw_buffer->resource_memory->size); -} - -static void -setup_cmd_deinit_shared_dram_buffer(void *cmd, uint8_t interface, - struct pva_kmd_shared_buffer *fw_buffer) -{ - struct pva_cmd_deinit_shared_dram_buffer *deinit_cmd = - (struct pva_cmd_deinit_shared_dram_buffer *)cmd; - - pva_kmd_set_cmd_deinit_shared_dram_buffer(deinit_cmd, interface); -} - -static enum pva_error -notify_fw(struct pva_kmd_device *pva, uint8_t interface, - void (*setup_cmd_cb)(void *cmd, uint8_t interface, - struct pva_kmd_shared_buffer *fw_buffer), - size_t cmd_size) -{ - enum pva_error err; - struct pva_kmd_cmdbuf_builder builder; - struct pva_kmd_submitter *dev_submitter = &pva->submitter; - struct pva_kmd_shared_buffer *fw_buffer; - void *cmd_space; - uint32_t fence_val; - - ASSERT(interface < PVA_MAX_NUM_CCQ); - - fw_buffer = &pva->kmd_fw_buffers[interface]; - - err = pva_kmd_submitter_prepare(dev_submitter, &builder); - if (err != PVA_SUCCESS) { - goto err_out; - } - - // Make sure FW buffer was allocated - ASSERT(fw_buffer->header != NULL); - - cmd_space = pva_kmd_reserve_cmd_space(&builder, cmd_size); - ASSERT(cmd_space != NULL); - - // Let the setup callback configure the specific command - setup_cmd_cb(cmd_space, interface, fw_buffer); - - err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); - if (err != PVA_SUCCESS) { - // Error is either QUEUE_FULL or TIMEDOUT - goto cancel_builder; - } - - err = pva_kmd_submitter_wait(dev_submitter, fence_val, - PVA_KMD_WAIT_FW_POLL_INTERVAL_US, - PVA_KMD_WAIT_FW_TIMEOUT_US); - if (err != PVA_SUCCESS) { - pva_kmd_log_err( - "Waiting for FW timed out while processing buffer command"); - goto err_out; - } - - return PVA_SUCCESS; - -cancel_builder: - pva_kmd_cmdbuf_builder_cancel(&builder); -err_out: - return err; -} - enum pva_error pva_kmd_shared_buffer_init(struct pva_kmd_device *pva, uint8_t interface, uint32_t element_size, @@ -95,42 +19,61 @@ enum pva_error pva_kmd_shared_buffer_init(struct pva_kmd_device *pva, struct pva_kmd_device_memory *device_memory; struct pva_kmd_shared_buffer *buffer; uint64_t buffer_size; + struct pva_cmd_init_shared_dram_buffer init_cmd = { 0 }; ASSERT(interface < PVA_MAX_NUM_CCQ); buffer = &pva->kmd_fw_buffers[interface]; - // Ensure that the buffer body is a multiple of 'element size' - buffer_size = safe_mulu64(num_entries, element_size); - buffer_size = safe_addu64(buffer_size, - sizeof(struct pva_fw_shared_buffer_header)); + // If the buffer is already initialized, skip buffer allocation and just notify FW. + // This is needed to support suspend/resume. + if (buffer->header == NULL) { + // Ensure that the buffer body is a multiple of 'element size' + buffer_size = safe_mulu64(num_entries, element_size); + buffer_size = + safe_addu64(buffer_size, + sizeof(struct pva_fw_shared_buffer_header)); - device_memory = pva_kmd_device_memory_alloc_map( - buffer_size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID); - if (device_memory == NULL) { - return PVA_NOMEM; + device_memory = + pva_kmd_device_memory_alloc_map(buffer_size, pva, + PVA_ACCESS_RW, + PVA_R5_SMMU_CONTEXT_ID); + if (device_memory == NULL) { + return PVA_NOMEM; + } + + buffer->header = + (struct pva_fw_shared_buffer_header *)device_memory->va; + buffer->header->flags = 0U; + buffer->header->element_size = element_size; + buffer->header->head = 0U; + buffer->header->tail = 0U; + buffer->body = (pva_offset_pointer(buffer->header, + sizeof(*buffer->header))); + buffer->lock_cb = lock_cb; + buffer->unlock_cb = unlock_cb; + buffer->resource_offset = 0U; + buffer->resource_memory = device_memory; + + err = pva_kmd_bind_shared_buffer_handler(pva, interface, pva); + if (err != PVA_SUCCESS) { + pva_kmd_log_err_u64( + "Failed to bind shared buffer handler for interface", + interface); + goto free_buffer_memory; + } + } else { + device_memory = buffer->resource_memory; } - buffer->header = - (struct pva_fw_shared_buffer_header *)device_memory->va; - buffer->header->flags = 0U; - buffer->header->element_size = element_size; - buffer->header->head = 0U; - buffer->header->tail = 0U; - buffer->body = - (pva_offset_pointer(buffer->header, sizeof(*buffer->header))); - buffer->lock_cb = lock_cb; - buffer->unlock_cb = unlock_cb; - buffer->resource_offset = 0U; - buffer->resource_memory = device_memory; + pva_kmd_set_cmd_init_shared_dram_buffer( + &init_cmd, interface, device_memory->iova, device_memory->size); - err = pva_kmd_bind_shared_buffer_handler(pva, interface, pva); - if (err != PVA_SUCCESS) { - goto free_buffer_memory; - } - - err = notify_fw(pva, interface, setup_cmd_init_shared_dram_buffer, - sizeof(struct pva_cmd_init_shared_dram_buffer)); + err = pva_kmd_submit_cmd_sync(&pva->submitter, &init_cmd, + sizeof(init_cmd), + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); if (err != PVA_SUCCESS) { + pva_kmd_log_err("Failed to submit command"); goto release_handler; } @@ -140,6 +83,8 @@ release_handler: pva_kmd_release_shared_buffer_handler(pva, interface); free_buffer_memory: pva_kmd_device_memory_free(device_memory); + buffer->header = NULL; + buffer->resource_memory = NULL; return err; } @@ -148,22 +93,26 @@ enum pva_error pva_kmd_shared_buffer_deinit(struct pva_kmd_device *pva, { enum pva_error err = PVA_SUCCESS; struct pva_kmd_shared_buffer *buffer; + struct pva_cmd_deinit_shared_dram_buffer deinit_cmd = { 0 }; ASSERT(interface < PVA_MAX_NUM_CCQ); buffer = &pva->kmd_fw_buffers[interface]; - if (!pva->recovery) { - err = notify_fw( - pva, interface, setup_cmd_deinit_shared_dram_buffer, - sizeof(struct pva_cmd_deinit_shared_dram_buffer)); - if (err != PVA_SUCCESS) { - pva_kmd_log_err("Failed to deinit FW buffer"); - } + pva_kmd_set_cmd_deinit_shared_dram_buffer(&deinit_cmd, interface); + + err = pva_kmd_submit_cmd_sync(&pva->submitter, &deinit_cmd, + sizeof(deinit_cmd), + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + if (err != PVA_SUCCESS) { + // This might happen if FW is aborted. It's safe to keep going. + pva_kmd_log_err("Failed to notify FW of buffer deinit"); } pva_kmd_release_shared_buffer_handler(pva, interface); pva_kmd_shared_buffer_process(pva, interface); + buffer->header = NULL; pva_kmd_device_memory_free(buffer->resource_memory); buffer->resource_memory = NULL; @@ -176,6 +125,7 @@ static void shared_buffer_process_msg(struct pva_kmd_device *pva, enum pva_error err = PVA_SUCCESS; struct pva_kmd_fw_buffer_msg_header header; struct pva_kmd_fw_msg_vpu_trace vpu_trace; + struct pva_kmd_fw_msg_fence_trace fence_trace; struct pva_kmd_fw_msg_res_unreg unreg_data; struct pva_kmd_context *ctx = NULL; void *msg_body; @@ -214,6 +164,12 @@ static void shared_buffer_process_msg(struct pva_kmd_device *pva, pva_kmd_shim_add_trace_vpu_exec(pva, &vpu_trace); break; } + case PVA_KMD_FW_BUF_MSG_TYPE_FENCE_TRACE: { + ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_fence_trace)); + memcpy(&fence_trace, msg_body, sizeof(fence_trace)); + pva_kmd_shim_add_trace_fence(pva, &fence_trace); + break; + } case PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG: { ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_res_unreg)); memcpy(&unreg_data, msg_body, sizeof(unreg_data)); @@ -281,7 +237,7 @@ void pva_kmd_shared_buffer_process(void *pva_dev, uint8_t interface) // Note that ideally this should never happen as the buffer is expected to be // the same size as the resource table. // TODO: abort only the user context, not the device. - pva_kmd_abort(pva); + pva_kmd_abort_fw(pva); } // Buffer corresponding to CCQ 0 is used for sending messages common to a VM. diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c index 1fca1632..c28c0ec6 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c @@ -4,6 +4,7 @@ #include "pva_kmd_device.h" #include "pva_fw_address_map.h" #include "pva_fw_hyp.h" +#include "pva_kmd_shim_init.h" #include "pva_kmd_thread_sema.h" #include "pva_kmd_constants.h" #include "pva_kmd_silicon_isr.h" @@ -153,27 +154,12 @@ void pva_kmd_config_sid(struct pva_kmd_device *pva) } } -static uint32_t pva_kmd_get_syncpt_ro_offset(struct pva_kmd_device *pva) +static uint32_t get_syncpt_offset(struct pva_kmd_device *pva, + uint64_t syncpt_iova) { - if (pva->num_syncpts > 0U) { + if (pva->num_ro_syncpts > 0U) { uint64_t offset; - offset = safe_subu64(pva->syncpt_ro_iova, - pva_kmd_get_r5_iova_start()); - - ASSERT(offset <= UINT32_MAX); - return (uint32_t)offset; - } else { - // This is only for SIM mode where syncpoints are not supported. - return PVA_R5_SYNCPT_REGION_IOVA_OFFSET_NOT_SET; - } -} - -static uint32_t pva_kmd_get_syncpt_rw_offset(struct pva_kmd_device *pva) -{ - if (pva->num_syncpts > 0U) { - uint64_t offset; - offset = safe_subu64(pva->syncpt_rw_iova, - pva_kmd_get_r5_iova_start()); + offset = safe_subu64(syncpt_iova, pva_kmd_get_r5_iova_start()); ASSERT(offset <= UINT32_MAX); return (uint32_t)offset; @@ -249,12 +235,17 @@ enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva) if (pva->bl_sector_pack_format == PVA_BL_XBAR_RAW) { boot_sema = PVA_BOOT_SEMA_USE_XBAR_RAW; } + if (pva->test_mode) { + boot_sema |= PVA_BOOT_SEMA_TEST_MODE; + } pva_kmd_set_sema(pva, PVA_BOOT_SEMA, boot_sema); - pva_kmd_write(pva, PVA_REG_HSP_SS2_SET_ADDR, - pva_kmd_get_syncpt_ro_offset(pva)); - pva_kmd_write(pva, PVA_REG_HSP_SS3_SET_ADDR, - pva_kmd_get_syncpt_rw_offset(pva)); + pva_kmd_set_sema(pva, PVA_RO_SYNC_BASE_SEMA, + get_syncpt_offset(pva, pva->ro_syncpt_base_iova)); + pva_kmd_set_sema(pva, PVA_RW_SYNC_BASE_SEMA, + get_syncpt_offset(pva, pva->rw_syncpt_base_iova)); + pva_kmd_set_sema(pva, PVA_RW_SYNC_SIZE_SEMA, + pva->rw_syncpt_region_size); pva_kmd_config_sid_regs(pva); @@ -290,6 +281,7 @@ free_sec_lic: pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC); free_fw_debug_mem: pva_kmd_drain_fw_print(&pva->fw_print_buffer); + pva_kmd_freeze_fw(pva); pva_kmd_device_memory_free(pva->fw_debug_mem); free_fw_mem: if (!pva->load_from_gsc) { @@ -299,17 +291,14 @@ out: return err; } -void pva_kmd_deinit_fw(struct pva_kmd_device *pva) +void pva_kmd_freeze_fw(struct pva_kmd_device *pva) { - pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC); - pva_kmd_drain_fw_print(&pva->fw_print_buffer); - /* - * Before powering off PVA, disable SEC error reporting. - * While powering off, PVA might generate (unexplained) error interrupts - * This causes HSM to read some PVA SEC registers. However, since PVA might - * already be powergated by this time, access to PVA SEC registers from HSM - * fails. This was discussed in Bug 3785498. + * Before freezing PVA, disable SEC error reporting. + * While setting the reset line, PVA might generate (unexplained) error + * interrupts This causes HSM to read some PVA SEC registers. However, + * since PVA might already be powergated by this time, access to PVA SEC + * registers from HSM fails. This was discussed in Bug 3785498. * * Note: we do not explicity enable these errors during power on since * 'enable' is their reset value @@ -317,6 +306,17 @@ void pva_kmd_deinit_fw(struct pva_kmd_device *pva) disable_sec_mission_error_reporting(pva); disable_sec_latent_error_reporting(pva); + pva_kmd_set_reset_line(pva); +} + +void pva_kmd_deinit_fw(struct pva_kmd_device *pva) +{ + pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC); + pva_kmd_drain_fw_print(&pva->fw_print_buffer); + + // FW so that we can free memory + pva_kmd_freeze_fw(pva); + pva_kmd_device_memory_free(pva->fw_debug_mem); if (!pva->load_from_gsc) { pva_kmd_device_memory_free(pva->fw_bin_mem); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c index 19fdbd64..7209477f 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c @@ -748,9 +748,11 @@ load_metainfo(struct pva_kmd_device *pva, uint64_t section_iova, metainfo->num_vmem_buffers = n_symbols; data_sections_mem = pva_offset_pointer(metainfo, sizeof(*metainfo)); - memcpy(data_sections_mem, section_infos, - mulu32(n_data_sections, (uint32_t)sizeof(*section_infos), - &math_err)); + if (n_data_sections > 0U && section_infos != NULL) { + memcpy(data_sections_mem, section_infos, + mulu32(n_data_sections, (uint32_t)sizeof(*section_infos), + &math_err)); + } vmem_buffers_mem = pva_offset_pointer( data_sections_mem, diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.c index d4f6433f..5fcd56e7 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.c @@ -42,6 +42,7 @@ int pva_kmd_hwpm_ip_pm(void *ip_dev, bool disable) { struct pva_kmd_device *dev = ip_dev; enum pva_error err = PVA_SUCCESS; + int ret = 0; if (disable) { err = pva_kmd_device_busy(dev); @@ -51,5 +52,10 @@ int pva_kmd_hwpm_ip_pm(void *ip_dev, bool disable) } else { pva_kmd_device_idle(dev); } - return err; + + if (err != PVA_SUCCESS) { + ret = -1; + } + + return ret; } \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.c index ef10f5a5..1978841b 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.c @@ -45,22 +45,16 @@ void pva_kmd_hyp_isr(void *data, enum pva_kmd_intr_line intr_line) if (wdt_val != 0) { /* Clear interrupt status */ - pva_kmd_write(pva, pva->regspec.sec_lic_intr_status, - intr_status & - PVA_MASK(PVA_REG_SEC_LIC_INTR_WDT_MSB, - PVA_REG_SEC_LIC_INTR_WDT_LSB)); + pva_kmd_write(pva, pva->regspec.sec_lic_intr_status, wdt_val); pva_kmd_log_err("PVA watchdog timeout!"); - pva_kmd_abort(pva); + pva_kmd_abort_fw(pva); } if (h1x_val != 0) { pva_kmd_log_err_u64("Host1x errors", h1x_val); /* Clear interrupt status */ - pva_kmd_write(pva, pva->regspec.sec_lic_intr_status, - intr_status & - PVA_MASK(PVA_REG_SEC_LIC_INTR_H1X_MSB, - PVA_REG_SEC_LIC_INTR_H1X_LSB)); - pva_kmd_abort(pva); + pva_kmd_write(pva, pva->regspec.sec_lic_intr_status, h1x_val); + pva_kmd_abort_fw(pva); } if (hsp_val != 0) { diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_misc.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_misc.c index 699e6b12..7c4da142 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_misc.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_misc.c @@ -23,3 +23,10 @@ uint32_t pva_kmd_get_ccq_space(struct pva_kmd_device *pva, uint8_t ccq_id) PVA_REG_CCQ_STATUS2_NUM_ENTRIES_LSB, uint32_t); return safe_subu32((uint32_t)PVA_CCQ_DEPTH, len) / 2U; } + +void pva_kmd_disable_all_interrupts_nosync(struct pva_kmd_device *pva) +{ + for (int i = 0; i < PVA_KMD_INTR_LINE_COUNT; i++) { + pva_kmd_disable_intr_nosync(pva, (enum pva_kmd_intr_line)i); + } +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.c index 17c93ba7..75c09287 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.c @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. #include "pva_kmd_submitter.h" +#include "pva_api_types.h" #include "pva_kmd_utils.h" #include "pva_kmd_abort.h" @@ -70,6 +71,7 @@ pva_kmd_submitter_submit_with_fence(struct pva_kmd_submitter *submitter, submit_info.first_chunk_offset_lo = iova_lo(first_chunk_offset); submit_info.first_chunk_offset_hi = iova_hi(first_chunk_offset); submit_info.first_chunk_size = first_chunk_size; + submit_info.execution_timeout_ms = PVA_EXEC_TIMEOUT_INF; pva_kmd_mutex_lock(submitter->submit_lock); err = pva_kmd_queue_submit(submitter->queue, &submit_info); @@ -108,6 +110,7 @@ enum pva_error pva_kmd_submitter_submit(struct pva_kmd_submitter *submitter, submit_info.first_chunk_offset_lo = iova_lo(first_chunk_offset); submit_info.first_chunk_offset_hi = iova_hi(first_chunk_offset); submit_info.first_chunk_size = first_chunk_size; + submit_info.execution_timeout_ms = PVA_EXEC_TIMEOUT_INF; /* TODO: remove these flags after FW execute command buffer with no engines. */ submit_info.flags = PVA_INSERT8(0x3, PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_MSB, @@ -137,16 +140,63 @@ enum pva_error pva_kmd_submitter_wait(struct pva_kmd_submitter *submitter, { uint32_t volatile *fence_addr = submitter->post_fence_va; uint32_t time_spent = 0; + struct pva_kmd_device *pva = submitter->queue->pva; while (*fence_addr < fence_val) { + if (pva->recovery) { + return PVA_ERR_FW_ABORTED; + } pva_kmd_sleep_us(poll_interval_us); time_spent = safe_addu32(time_spent, poll_interval_us); if (time_spent >= timeout_us) { pva_kmd_log_err("pva_kmd_submitter_wait Timed out"); - pva_kmd_abort(submitter->queue->pva); + pva_kmd_abort_fw(submitter->queue->pva); return PVA_TIMEDOUT; } } return PVA_SUCCESS; } + +enum pva_error pva_kmd_submit_cmd_sync(struct pva_kmd_submitter *submitter, + void *cmds, uint32_t size, + uint32_t poll_interval_us, + uint32_t timeout_us) +{ + struct pva_kmd_cmdbuf_builder builder = { 0 }; + enum pva_error err; + void *cmd_dst = NULL; + uint32_t fence_val = 0; + + err = pva_kmd_submitter_prepare(submitter, &builder); + if (err != PVA_SUCCESS) { + goto err_out; + } + + cmd_dst = pva_kmd_reserve_cmd_space(&builder, size); + if (cmd_dst == NULL) { + err = PVA_INVAL; + pva_kmd_log_err( + "Trying to submit too many commands using pva_kmd_submit_cmd_sync."); + goto cancel_builder; + } + + memcpy(cmd_dst, cmds, size); + err = pva_kmd_submitter_submit(submitter, &builder, &fence_val); + if (err != PVA_SUCCESS) { + goto cancel_builder; + } + + err = pva_kmd_submitter_wait(submitter, fence_val, poll_interval_us, + timeout_us); + if (err != PVA_SUCCESS) { + goto cancel_builder; + } + + return err; + +cancel_builder: + pva_kmd_cmdbuf_builder_cancel(&builder); +err_out: + return err; +} \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.h index 73762d99..d85c0d75 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.h @@ -57,4 +57,11 @@ pva_kmd_submitter_submit_with_fence(struct pva_kmd_submitter *submitter, /* add cmd */ /* do submit with fence (provide a fence) */ +/* Helper function to submit several commands and wait for them to complete. +Total size must be smaller than a chunk. */ +enum pva_error pva_kmd_submit_cmd_sync(struct pva_kmd_submitter *submitter, + void *cmds, uint32_t size, + uint32_t poll_interval_us, + uint32_t timeout_us); + #endif // PVA_KMD_SUBMITTER_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c index 4e4f6030..1952f828 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c @@ -59,11 +59,8 @@ enum pva_error pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva, struct pva_kmd_tegrastats *kmd_tegra_stats) { - struct pva_kmd_cmdbuf_builder builder; - struct pva_kmd_submitter *dev_submitter = &pva->submitter; - struct pva_cmd_get_tegra_stats *cmd; + struct pva_cmd_get_tegra_stats cmd = { 0 }; uint64_t buffer_offset = 0U; - uint32_t fence_val; enum pva_error err = PVA_SUCCESS; struct pva_kmd_fw_tegrastats fw_tegra_stats = { 0 }; bool stats_enabled = pva->debugfs_context.stats_enable; @@ -86,29 +83,15 @@ pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva, goto err_out; } - err = pva_kmd_submitter_prepare(dev_submitter, &builder); - if (err != PVA_SUCCESS) { - goto dev_idle; - } - cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd)); - ASSERT(cmd != NULL); - - pva_kmd_set_cmd_get_tegra_stats(cmd, pva->tegra_stats_resource_id, + pva_kmd_set_cmd_get_tegra_stats(&cmd, pva->tegra_stats_resource_id, pva->tegra_stats_buf_size, buffer_offset, stats_enabled); - err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + err = pva_kmd_submit_cmd_sync(&pva->submitter, &cmd, sizeof(cmd), + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); if (err != PVA_SUCCESS) { pva_kmd_log_err("tegra stats cmd submission failed"); - goto cancel_builder; - } - - err = pva_kmd_submitter_wait(dev_submitter, fence_val, - PVA_KMD_WAIT_FW_POLL_INTERVAL_US, - PVA_KMD_WAIT_FW_TIMEOUT_US); - if (err != PVA_SUCCESS) { - pva_kmd_log_err( - "Waiting for FW timed out when getting tegra stats"); goto dev_idle; } @@ -129,8 +112,7 @@ out: kmd_tegra_stats->window_end_time = fw_tegra_stats.window_end_time; return PVA_SUCCESS; -cancel_builder: - pva_kmd_cmdbuf_builder_cancel(&builder); + dev_idle: pva_kmd_device_idle(pva); err_out: diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_device_memory.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_device_memory.h index 90e8cbb0..18d73029 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_device_memory.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_device_memory.h @@ -21,6 +21,7 @@ struct pva_kmd_device_memory { uint64_t size; /**< Size of the mapping. */ struct pva_kmd_device *pva; /**< The PVA this memory is mapped to. */ uint32_t smmu_ctx_idx; /**< The SMMU context this memory is mapped to. */ + uint32_t iova_access_flags; /**< Access flags for the memory. RO - 1/WO - 2/RW - 3 */ }; /** diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_debugfs.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_debugfs.h index 556034ba..2279e46e 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_debugfs.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_debugfs.h @@ -9,8 +9,9 @@ void pva_kmd_debugfs_create_bool(struct pva_kmd_device *pva, const char *name, bool *val); void pva_kmd_debugfs_create_u32(struct pva_kmd_device *pva, const char *name, uint32_t *val); -void pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, const char *name, - struct pva_kmd_file_ops *fops); +enum pva_error pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, + const char *name, + struct pva_kmd_file_ops *fops); void pva_kmd_debugfs_remove_nodes(struct pva_kmd_device *pva); unsigned long pva_kmd_copy_data_from_user(void *dst, const void *src, uint64_t size); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_init.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_init.h index 07b320f6..9d6e6fc6 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_init.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_init.h @@ -14,9 +14,6 @@ void pva_kmd_device_plat_deinit(struct pva_kmd_device *pva); void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id, uint32_t *syncpt_value); -void pva_kmd_get_syncpt_iova(struct pva_kmd_device *pva, uint32_t syncpt_id, - uint64_t *syncpt_iova); - void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva); /** @@ -34,7 +31,7 @@ void pva_kmd_power_off(struct pva_kmd_device *pva); * user submission halted. This is requied for host1x * watchdog, or kmd submission timeout failures. */ -void pva_kmd_fw_reset_assert(struct pva_kmd_device *pva); +void pva_kmd_freeze_fw(struct pva_kmd_device *pva); /** * @brief Initialize firmware. @@ -60,4 +57,18 @@ enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva); * @param pva pointer to the PVA device to de-initialize */ void pva_kmd_deinit_fw(struct pva_kmd_device *pva); + +/** + * @brief Disable all interrupts without waiting for running interrupt handlers + * to complete. + * + * We don't wait for running interrupt handlers to complete because we want to + * be able to call this function from interrupt handles themselves. + * + * This function is to be called when PVA enters bad state and we want to + * protect KMD from potential interrupt floods from PVA (particularly watchdog + * interrupt that will trigger repeatedly by HW). + */ +void pva_kmd_disable_all_interrupts_nosync(struct pva_kmd_device *pva); + #endif // PVA_KMD_SHIM_INIT_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_silicon.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_silicon.h index d86915c1..b22953bd 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_silicon.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_silicon.h @@ -81,10 +81,10 @@ void pva_kmd_enable_intr(struct pva_kmd_device *pva, enum pva_kmd_intr_line intr_line); /** - * @brief Disable an interrupt line. + * @brief Disable an interrupt line without waiting for running interrupt handlers to complete. */ -void pva_kmd_disable_intr(struct pva_kmd_device *pva, - enum pva_kmd_intr_line intr_line); +void pva_kmd_disable_intr_nosync(struct pva_kmd_device *pva, + enum pva_kmd_intr_line intr_line); /** * @brief Free an interrupt line. @@ -104,13 +104,6 @@ void pva_kmd_free_intr(struct pva_kmd_device *pva, */ enum pva_error pva_kmd_read_fw_bin(struct pva_kmd_device *pva); -/** - * @brief Reset assert FW so it can be in recovery and - * user submission halted. This is requied for host1x - * watchdog, or kmd submission timeout failures. - */ -void pva_kmd_fw_reset_assert(struct pva_kmd_device *pva); - /** * @brief Get starting IOVA of the memory shared by R5 and KMD. * @@ -141,4 +134,9 @@ void pva_kmd_config_evp_seg_scr_regs(struct pva_kmd_device *pva); */ void pva_kmd_config_sid_regs(struct pva_kmd_device *pva); +/** + * @brief Set the PVA HW reset line. + */ +void pva_kmd_set_reset_line(struct pva_kmd_device *pva); + #endif // PVA_KMD_SHIM_SILICON_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_trace_event.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_trace_event.h index dd64532c..72ed37a8 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_trace_event.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_trace_event.h @@ -9,4 +9,8 @@ void pva_kmd_shim_add_trace_vpu_exec( struct pva_kmd_device *pva, struct pva_kmd_fw_msg_vpu_trace const *trace_info); +void pva_kmd_shim_add_trace_fence( + struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_fence_trace const *trace_info); + #endif // PVA_KMD_SHIM_TRACE_EVENT_H diff --git a/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h b/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h index d8d73cfc..6a590386 100644 --- a/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h +++ b/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h @@ -20,24 +20,10 @@ struct pva_ops_context_init { struct pva_ops_response_context_init { enum pva_error error; + uint16_t max_cmdbuf_chunk_size; uint64_t ccq_shm_hdl; }; -struct pva_ops_syncpt_register { -#define PVA_OPS_OPCODE_SYNCPT_REGISTER (2U | PVA_OPS_PRIVATE_OPCODE_FLAG) - struct pva_ops_header header; -}; - -struct pva_ops_response_syncpt_register { - enum pva_error error; - uint32_t syncpt_ro_res_id; - uint32_t syncpt_rw_res_id; - uint32_t synpt_size; - uint32_t synpt_ids[PVA_NUM_RW_SYNCPTS_PER_CONTEXT]; - uint32_t num_ro_syncpoints; - uint32_t pad; -}; - /** * Calculates the total memory size required for a PVA submission queue. * This includes the size of the queue header and the combined size of all command buffer submission info structures. @@ -67,7 +53,8 @@ struct pva_ops_queue_create { struct pva_ops_response_queue_create { enum pva_error error; uint32_t queue_id; - uint32_t syncpt_fence_counter; + uint32_t syncpt_id; + uint32_t syncpt_current_value; }; /* KMD API: queue destroy */ diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c index d284a2af..b6ed0e6b 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c @@ -130,8 +130,9 @@ void pva_kmd_debugfs_create_u32(struct pva_kmd_device *pva, const char *name, debugfs_create_u32(name, 0644, de, pdata); } -void pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, const char *name, - struct pva_kmd_file_ops *pvafops) +enum pva_error pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, + const char *name, + struct pva_kmd_file_ops *pvafops) { struct pva_kmd_linux_device_data *device_data = pva_kmd_linux_device_get_data(pva); @@ -142,7 +143,12 @@ void pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, const char *name, struct dentry *file; file = debugfs_create_file(name, 0644, de, pvafops, fops); - ASSERT(file != NULL); + if (file == NULL) { + pva_kmd_log_err("Failed to create debugfs file"); + return PVA_INVAL; + } + + return PVA_SUCCESS; } void pva_kmd_debugfs_remove_nodes(struct pva_kmd_device *pva) diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c index e4b3348f..b9e7c97d 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c @@ -53,15 +53,6 @@ void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id, } } -void pva_kmd_get_syncpt_iova(struct pva_kmd_device *pva, uint32_t syncpt_id, - uint64_t *syncpt_iova) -{ - uint32_t offset = 0; - - offset = nvpva_syncpt_unit_interface_get_byte_offset_ext(syncpt_id); - *syncpt_iova = safe_addu64(pva->syncpt_ro_iova, (uint64_t)offset); -} - void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva) { phys_addr_t base; @@ -69,7 +60,6 @@ void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva) int err = 0; uint32_t stride, num_syncpts; uint32_t syncpt_page_size; - uint32_t syncpt_offset[PVA_NUM_RW_SYNCPTS]; dma_addr_t sp_start; struct device *dev; struct pva_kmd_linux_device_data *device_data = @@ -92,53 +82,38 @@ void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva) syncpt_page_size = nvpva_syncpt_unit_interface_get_byte_offset_ext(1); dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev; if (iommu_get_domain_for_dev(dev)) { - sp_start = dma_map_resource(dev, base, size, DMA_TO_DEVICE, + sp_start = dma_map_resource(dev, base, size, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(dev, sp_start)) { - FAULT("Failed to pin RO syncpoints\n"); + FAULT("Failed to pin syncpoints\n"); } } else { - FAULT("Failed to pin RO syncpoints\n"); + FAULT("Failed to pin syncpoints\n"); } - pva->syncpt_ro_iova = sp_start; - pva->syncpt_offset = syncpt_page_size; - pva->num_syncpts = (size / syncpt_page_size); + pva->ro_syncpt_base_iova = sp_start; + pva->syncpt_page_size = syncpt_page_size; + pva->num_ro_syncpts = num_syncpts; + + // The same region is also used for RW syncpts... + pva->rw_syncpt_base_iova = sp_start; + pva->rw_syncpt_region_size = size; for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) { - pva->syncpt_rw[i].syncpt_id = nvpva_get_syncpt_client_managed( - props->pdev, "pva_syncpt"); - if (pva->syncpt_rw[i].syncpt_id == 0) { + uint32_t syncpt_id; + uint64_t syncpt_iova; + + syncpt_id = nvpva_get_syncpt_client_managed(props->pdev, + "pva_syncpt"); + if (syncpt_id == 0) { FAULT("Failed to get syncpt\n"); } - syncpt_offset[i] = + syncpt_iova = safe_addu64( + sp_start, nvpva_syncpt_unit_interface_get_byte_offset_ext( - pva->syncpt_rw[i].syncpt_id); - err = nvpva_syncpt_read_ext_check( - props->pdev, pva->syncpt_rw[i].syncpt_id, - &pva->syncpt_rw[i].syncpt_value); - if (err < 0) { - FAULT("Failed to read syncpoint value\n"); - } - } + syncpt_id)); - pva->syncpt_rw_iova = - dma_map_resource(dev, - safe_addu64(base, (uint64_t)syncpt_offset[0]), - safe_mulu64((uint64_t)pva->syncpt_offset, - (uint64_t)PVA_NUM_RW_SYNCPTS), - DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); - if (dma_mapping_error(dev, pva->syncpt_rw_iova)) { - FAULT("Failed to pin RW syncpoints\n"); - } - pva->syncpt_rw[0].syncpt_iova = pva->syncpt_rw_iova; - for (uint32_t i = 1; i < PVA_NUM_RW_SYNCPTS; i++) { - if (safe_addu32(syncpt_offset[i - 1], pva->syncpt_offset) != - syncpt_offset[i]) { - FAULT("RW syncpts are not contiguous\n"); - } - pva->syncpt_rw[i].syncpt_iova = safe_addu64( - pva->syncpt_rw_iova, - safe_mulu64((uint64_t)pva->syncpt_offset, (uint64_t)i)); + pva->rw_syncpts[i].syncpt_iova = syncpt_iova; + pva->rw_syncpts[i].syncpt_id = syncpt_id; } } @@ -166,25 +141,19 @@ void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva) dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev; if (iommu_get_domain_for_dev(dev)) { - dma_unmap_resource(dev, pva->syncpt_ro_iova, size, - DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); - dma_unmap_resource(dev, pva->syncpt_rw_iova, - safe_mulu64((uint64_t)pva->syncpt_offset, - (uint64_t)PVA_NUM_RW_SYNCPTS), + dma_unmap_resource(dev, pva->ro_syncpt_base_iova, size, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); } else { FAULT("Failed to unmap syncpts\n"); } for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) { nvpva_syncpt_put_ref_ext(props->pdev, - pva->syncpt_rw[i].syncpt_id); - pva->syncpt_rw[i].syncpt_id = 0; - pva->syncpt_rw[i].syncpt_iova = 0; - pva->syncpt_rw[i].syncpt_value = 0; + pva->rw_syncpts[i].syncpt_id); + pva->rw_syncpts[i].syncpt_id = 0; + pva->rw_syncpts[i].syncpt_iova = 0; } - pva->syncpt_ro_iova = 0; - pva->syncpt_rw_iova = 0; - pva->syncpt_offset = 0; + pva->ro_syncpt_base_iova = 0; + pva->syncpt_page_size = 0; nvpva_syncpt_unit_interface_deinit(props->pdev); } @@ -235,21 +204,11 @@ void pva_kmd_power_off(struct pva_kmd_device *pva) pva_kmd_linux_device_get_data(pva); struct nvpva_device_data *props = device_data->pva_device_properties; - // Set reset line before cutting off power - - /* Power management operation is asynchronous. We don't control when PVA - * will really be powered down. However, we need to free memories after - * this call. Therefore, we assert the reset line to stop PVA from any - * further activity. */ - reset_control_acquire(props->reset_control); - reset_control_assert(props->reset_control); - reset_control_release(props->reset_control); - pm_runtime_mark_last_busy(&props->pdev->dev); pm_runtime_put(&props->pdev->dev); } -void pva_kmd_fw_reset_assert(struct pva_kmd_device *pva) +void pva_kmd_set_reset_line(struct pva_kmd_device *pva) { struct pva_kmd_linux_device_data *device_data = pva_kmd_linux_device_get_data(pva); diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c index c2faea27..1703e2bf 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c @@ -23,7 +23,7 @@ struct pva_kmd_device_memory_impl { struct pva_kmd_device_memory dev_mem; struct dma_buf *dmabuf; struct iosys_map iosysmap; - struct dma_buf_attachment *dmabuf_attch; + struct dma_buf_attachment *dmabuf_attach; struct sg_table *sgt; uint64_t offset; }; @@ -36,11 +36,20 @@ pva_kmd_device_memory_alloc_map(uint64_t size, struct pva_kmd_device *pva, struct device *dev = get_context_device(pva, smmu_ctx_idx); dma_addr_t pa = 0U; void *va = NULL; + struct pva_kmd_device_memory_impl *mem_impl; + + mem_impl = pva_kmd_zalloc(sizeof(struct pva_kmd_device_memory_impl)); + if (mem_impl == NULL) { + goto err_out; + } + + if (size == 0u) { + pva_kmd_log_err("Invalid allocation size"); + goto free_mem; + } - struct pva_kmd_device_memory_impl *mem_impl = - pva_kmd_zalloc(sizeof(struct pva_kmd_device_memory_impl)); va = dma_alloc_coherent(dev, size, &pa, GFP_KERNEL); - if (va == NULL) { + if (IS_ERR_OR_NULL(va)) { pva_kmd_log_err("dma_alloc_coherent failed"); goto free_mem; } @@ -49,12 +58,13 @@ pva_kmd_device_memory_alloc_map(uint64_t size, struct pva_kmd_device *pva, mem_impl->dev_mem.size = size; mem_impl->dev_mem.pva = pva; mem_impl->dev_mem.smmu_ctx_idx = smmu_ctx_idx; + mem_impl->dev_mem.iova_access_flags = iova_access_flags; mem_impl->dmabuf = NULL; return &mem_impl->dev_mem; - free_mem: pva_kmd_free(mem_impl); +err_out: return NULL; } @@ -66,13 +76,16 @@ struct pva_kmd_device_memory * pva_kmd_device_memory_acquire(uint64_t memory_handle, uint64_t offset, uint64_t size, struct pva_kmd_context *ctx) { - struct pva_kmd_device_memory_impl *mem_impl = - (struct pva_kmd_device_memory_impl *)pva_kmd_zalloc( - sizeof(struct pva_kmd_device_memory_impl)); - struct dma_buf *dma_buf; + struct pva_kmd_device_memory_impl *mem_impl; + + mem_impl = pva_kmd_zalloc(sizeof(struct pva_kmd_device_memory_impl)); + if (mem_impl == NULL) { + goto err_out; + } + dma_buf = dma_buf_get(memory_handle); - if (dma_buf == NULL) { + if (IS_ERR_OR_NULL(dma_buf)) { pva_kmd_log_err("Failed to acquire memory"); goto free_mem; } @@ -92,6 +105,7 @@ put_dmabuf: dma_buf_put(dma_buf); free_mem: pva_kmd_free(mem_impl); +err_out: return NULL; } @@ -103,7 +117,7 @@ void pva_kmd_device_memory_free(struct pva_kmd_device_memory *mem) if (mem_impl->dmabuf != NULL) { /* This memory comes from dma_buf_get */ - if (mem->iova != 0U) { + if (mem_impl->dmabuf_attach != NULL) { pva_kmd_device_memory_iova_unmap(mem); } @@ -160,14 +174,28 @@ pva_kmd_device_memory_iova_map(struct pva_kmd_device_memory *memory, pva_math_error math_err = MATH_OP_SUCCESS; struct pva_kmd_device_memory_impl *mem_impl = container_of( memory, struct pva_kmd_device_memory_impl, dev_mem); - - // struct pva_kmd_linux_device_plat_data *plat_data = - // pva_kmd_linux_device_get_plat_data(pva); - // struct device *dev = plat_data->dev[smmu_ctx_idx]; struct device *dev = get_context_device(pva, smmu_ctx_idx); struct dma_buf_attachment *attach; struct sg_table *sgt; enum pva_error err = PVA_SUCCESS; + enum dma_data_direction dma_direction; + uint64_t iova; + + switch (access_flags) { + case PVA_ACCESS_RO: // Read-Only + dma_direction = DMA_TO_DEVICE; + break; + case PVA_ACCESS_WO: // Write-Only + dma_direction = DMA_FROM_DEVICE; + break; + case PVA_ACCESS_RW: // Read-Write + dma_direction = DMA_BIDIRECTIONAL; + break; + default: + pva_kmd_log_err("Invalid access flags\n"); + err = PVA_INVAL; + goto err_out; + } attach = dma_buf_attach(mem_impl->dmabuf, dev); if (IS_ERR_OR_NULL(attach)) { @@ -176,28 +204,32 @@ pva_kmd_device_memory_iova_map(struct pva_kmd_device_memory *memory, goto err_out; } - mem_impl->dmabuf_attch = attach; - sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + sgt = dma_buf_map_attachment(attach, dma_direction); if (IS_ERR_OR_NULL(sgt)) { err = PVA_INVAL; pva_kmd_log_err("Failed to map attachment\n"); goto detach; } - mem_impl->sgt = sgt; - mem_impl->dev_mem.iova = - addu64(sg_dma_address(sgt->sgl), mem_impl->offset, &math_err); + iova = addu64(sg_dma_address(sgt->sgl), mem_impl->offset, &math_err); if (math_err != MATH_OP_SUCCESS) { err = PVA_INVAL; pva_kmd_log_err( "pva_kmd_device_memory_iova_map Invalid DMA address\n"); - goto detach; + goto unmap; } + + mem_impl->sgt = sgt; + mem_impl->dmabuf_attach = attach; + mem_impl->dev_mem.iova = iova; mem_impl->dev_mem.pva = pva; mem_impl->dev_mem.smmu_ctx_idx = smmu_ctx_idx; + mem_impl->dev_mem.iova_access_flags = access_flags; return PVA_SUCCESS; +unmap: + dma_buf_unmap_attachment(attach, sgt, dma_direction); detach: - dma_buf_detach(mem_impl->dmabuf, mem_impl->dmabuf_attch); + dma_buf_detach(mem_impl->dmabuf, attach); err_out: return err; } @@ -209,13 +241,14 @@ void pva_kmd_device_memory_iova_unmap(struct pva_kmd_device_memory *memory) ASSERT(mem_impl->dmabuf != NULL); - dma_buf_unmap_attachment(mem_impl->dmabuf_attch, mem_impl->sgt, + dma_buf_unmap_attachment(mem_impl->dmabuf_attach, mem_impl->sgt, DMA_BIDIRECTIONAL); - dma_buf_detach(mem_impl->dmabuf, mem_impl->dmabuf_attch); - memory->iova = 0; + dma_buf_detach(mem_impl->dmabuf, mem_impl->dmabuf_attach); + mem_impl->sgt = NULL; + mem_impl->dmabuf_attach = NULL; } uint64_t pva_kmd_get_r5_iova_start(void) { return 0; -} \ No newline at end of file +} diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c index b58c74c6..c819d02e 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c @@ -50,13 +50,13 @@ extern struct platform_driver pva_kmd_linux_smmu_context_driver; extern atomic_t g_num_smmu_ctxs; static bool load_from_gsc = PVA_KMD_LOAD_FROM_GSC_DEFAULT; -static bool app_authenticate = PVA_KMD_APP_AUTH_DEFAULT; +static bool pva_test_mode; //false by default module_param(load_from_gsc, bool, 0); MODULE_PARM_DESC(load_from_gsc, "Load V3 FW from GSC"); -module_param(app_authenticate, bool, 0); -MODULE_PARM_DESC(app_authenticate, "Enable app authentication"); +module_param(pva_test_mode, bool, 0); +MODULE_PARM_DESC(pva_test_mode, "Enable test mode"); struct nvpva_device_data t23x_pva0_props = { .version = PVA_CHIP_T23X, @@ -112,11 +112,15 @@ static int pva_get_gsc_priv_hwid(struct platform_device *pdev) return fwspec->ids[0] & 0xffff; } -static void pva_kmd_linux_register_hwpm(struct pva_kmd_device *pva) +static int pva_kmd_linux_register_hwpm(struct pva_kmd_device *pva) { struct tegra_soc_hwpm_ip_ops *hwpm_ip_ops = pva_kmd_zalloc(sizeof(*hwpm_ip_ops)); + if (hwpm_ip_ops == NULL) { + return -ENOMEM; + } + hwpm_ip_ops->ip_dev = pva; hwpm_ip_ops->ip_base_address = safe_addu64( pva->reg_phy_base[0], (uint64_t)pva->regspec.cfg_perf_mon); @@ -125,6 +129,7 @@ static void pva_kmd_linux_register_hwpm(struct pva_kmd_device *pva) hwpm_ip_ops->hwpm_ip_reg_op = &pva_kmd_hwpm_ip_reg_op; tegra_soc_hwpm_ip_register(hwpm_ip_ops); pva->debugfs_context.data_hwpm = hwpm_ip_ops; + return 0; } static void pva_kmd_linux_unregister_hwpm(struct pva_kmd_device *pva) @@ -256,10 +261,57 @@ static void pva_kmd_free_co_mem(struct platform_device *pdev) } } +static bool pva_kmd_in_test_mode(struct device *dev, bool param_test_mode) +{ + const char *dt_test_mode = NULL; + + if (of_property_read_string(dev->of_node, "nvidia,test_mode_enable", + &dt_test_mode)) { + return param_test_mode; + } + + if (strcmp(dt_test_mode, "true")) { + return param_test_mode; + } + + return true; +} + static struct kobj_type nvpva_kobj_ktype = { .sysfs_ops = &kobj_sysfs_ops, }; +/** + * Read VPU authentication property from device tree + * + * @param dev Pointer to the device structure + * @return true if authentication should be enabled, false otherwise + */ +static bool pva_kmd_linux_read_vpu_auth(const struct device *dev) +{ + bool auth_enabled = false; + int len; + const __be32 *val; + + val = of_get_property(dev->of_node, "nvidia,vpu-auth", &len); + if ((val != NULL) && (len >= (int)sizeof(__be32))) { + u32 value = (u32)be32_to_cpu(*val); + if (value != 0U) { + auth_enabled = true; + dev_dbg(dev, "VPU authentication enabled\n"); + } else { + auth_enabled = false; + dev_dbg(dev, "VPU authentication disabled\n"); + } + } else { + dev_dbg(dev, + "No VPU authentication property found, using default: %d\n", + auth_enabled); + } + + return auth_enabled; +} + static int pva_probe(struct platform_device *pdev) { int err = 0U; @@ -273,6 +325,9 @@ static int pva_probe(struct platform_device *pdev) struct clk_bulk_data *clks; struct clk *c; + bool pva_enter_test_mode = false; + bool app_authenticate; + device_id = of_match_device(tegra_pva_of_match, dev); if (!device_id) { dev_err(dev, "no match for pva dev\n"); @@ -286,6 +341,8 @@ static int pva_probe(struct platform_device *pdev) return -ENODATA; } + app_authenticate = pva_kmd_linux_read_vpu_auth(dev); + /* Create devices for child nodes of this device */ of_platform_default_populate(dev->of_node, NULL, dev); @@ -300,17 +357,12 @@ static int pva_probe(struct platform_device *pdev) pva_props->pdev = pdev; mutex_init(&pva_props->lock); - pva_device = - pva_kmd_device_create(pva_props->version, 0, app_authenticate); + pva_enter_test_mode = pva_kmd_in_test_mode(dev, pva_test_mode); + pva_device = pva_kmd_device_create( + pva_props->version, 0, app_authenticate, pva_enter_test_mode); pva_device->is_hv_mode = is_tegra_hypervisor_mode(); - /* On L4T, forcing boot from file */ - /* If needed to load from GSC, remove the below block */ - if (!pva_device->is_hv_mode) { - load_from_gsc = false; - } - pva_device->load_from_gsc = load_from_gsc; pva_device->stream_ids[pva_device->r5_image_smmu_context_id] = pva_get_gsc_priv_hwid(pdev); @@ -352,8 +404,17 @@ static int pva_probe(struct platform_device *pdev) pva_kmd_linux_host1x_init(pva_device); - pva_kmd_debugfs_create_nodes(pva_device); - pva_kmd_linux_register_hwpm(pva_device); + err = pva_kmd_debugfs_create_nodes(pva_device); + if (err != PVA_SUCCESS) { + dev_err(dev, "debugfs creation failed\n"); + goto err_cdev_init; + } + + err = pva_kmd_linux_register_hwpm(pva_device); + if (err != PVA_SUCCESS) { + dev_err(dev, "pva_kmd_linux_register_hwpm failed\n"); + goto err_cdev_init; + } if (!pva_device->is_hv_mode && pva_device->load_from_gsc) { err = pva_kmd_get_co_info(pdev); diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c index 149cbabb..a55ede5a 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c @@ -6,6 +6,11 @@ #include "trace/events/nvpva_ftrace.h" #include +static uint32_t get_job_id(uint32_t queue_id, uint64_t submit_id) +{ + return (queue_id & 0x000000FF) << 24 | (submit_id & 0xFFFFFFU); +} + void pva_kmd_shim_add_trace_vpu_exec( struct pva_kmd_device *pva, struct pva_kmd_fw_msg_vpu_trace const *trace_info) @@ -38,7 +43,8 @@ void pva_kmd_shim_add_trace_vpu_exec( // In V2, Job ID is a 32-bit value with the top 8 bits being the queue ID // and the bottom 24 bits being a per-task counter. In V3, we only use the // queue ID. - uint32_t job_id = (trace_info->queue_id & 0x000000FF) << 24; + uint32_t job_id = + get_job_id(trace_info->queue_id, trace_info->submit_id); trace_pva_job_ext_event(job_id, trace_info->ccq_id, 0, // syncpt_thresh, @@ -50,3 +56,42 @@ void pva_kmd_shim_add_trace_vpu_exec( trace_info->num_prefences, trace_info->prog_id, trace_info->submit_id, vpu_start); } + +void pva_kmd_shim_add_trace_fence( + struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_fence_trace const *trace_info) +{ + uint32_t job_id; + + // We want to log events only for user workloads + if (trace_info->ccq_id == PVA_PRIV_CCQ_ID) { + return; + } + + job_id = get_job_id(trace_info->queue_id, trace_info->submit_id); + + if (trace_info->action == PVA_KMD_FW_BUF_MSG_FENCE_ACTION_WAIT) { + if (trace_info->type == PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SYNCPT) { + trace_job_prefence(job_id, trace_info->fence_id, + trace_info->value); + } else if (trace_info->type == + PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SEMAPHORE) { + trace_job_prefence_semaphore(job_id, + trace_info->fence_id, + trace_info->offset, + trace_info->value); + } + } else if (trace_info->action == + PVA_KMD_FW_BUF_MSG_FENCE_ACTION_SIGNAL) { + if (trace_info->type == PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SYNCPT) { + trace_job_postfence(job_id, trace_info->fence_id, + trace_info->value); + } else if (trace_info->type == + PVA_KMD_FW_BUF_MSG_FENCE_TYPE_SEMAPHORE) { + trace_job_postfence_semaphore(job_id, + trace_info->fence_id, + trace_info->offset, + trace_info->value); + } + } +} diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_isr.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_isr.c index ba1ffa78..3203e31f 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_isr.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_isr.c @@ -14,9 +14,13 @@ static struct pva_kmd_isr_data *get_isr(struct pva_kmd_device *pva, struct pva_kmd_isr_data *isr_data; ASSERT(intr_line < PVA_KMD_INTR_LINE_COUNT); isr_data = &plat_data->isr[intr_line]; - ASSERT(isr_data->binded); + if (!isr_data->binded) { + return NULL; + } + return isr_data; } + static irqreturn_t pva_isr(int irq, void *dev_id) { struct pva_kmd_isr_data *isr_data = (struct pva_kmd_isr_data *)dev_id; @@ -35,40 +39,60 @@ enum pva_error pva_kmd_bind_intr_handler(struct pva_kmd_device *pva, pva_kmd_linux_device_get_data(pva); struct pva_kmd_isr_data *isr_data = &plat_data->isr[intr_line]; struct nvpva_device_data *props = plat_data->pva_device_properties; + enum pva_error pva_err = PVA_SUCCESS; + int irq; - isr_data->irq = platform_get_irq(props->pdev, intr_line); + ASSERT(isr_data->binded == false); + irq = platform_get_irq(props->pdev, intr_line); + if (irq < 0) { + pva_kmd_log_err("Failed to get irq number"); + pva_err = kernel_err2pva_err(irq); + goto err_out; + } + + isr_data->irq = irq; isr_data->handler = handler; isr_data->handler_data = data; - isr_data->binded = true; isr_data->intr_line = intr_line; err = request_threaded_irq(isr_data->irq, NULL, pva_isr, IRQF_ONESHOT, "pva-isr", isr_data); - if (err != 0) { pva_kmd_log_err("Failed to bind interrupt handler"); + pva_err = kernel_err2pva_err(err); + goto err_out; } - return kernel_err2pva_err(err); + isr_data->binded = true; + + return PVA_SUCCESS; +err_out: + return pva_err; } void pva_kmd_enable_intr(struct pva_kmd_device *pva, enum pva_kmd_intr_line intr_line) { struct pva_kmd_isr_data *isr_data = get_isr(pva, intr_line); - enable_irq(isr_data->irq); + if (isr_data != NULL) { + enable_irq(isr_data->irq); + } } -void pva_kmd_disable_intr(struct pva_kmd_device *pva, - enum pva_kmd_intr_line intr_line) +void pva_kmd_disable_intr_nosync(struct pva_kmd_device *pva, + enum pva_kmd_intr_line intr_line) { struct pva_kmd_isr_data *isr_data = get_isr(pva, intr_line); - disable_irq(isr_data->irq); + if (isr_data != NULL) { + disable_irq_nosync(isr_data->irq); + } } void pva_kmd_free_intr(struct pva_kmd_device *pva, enum pva_kmd_intr_line intr_line) { struct pva_kmd_isr_data *isr_data = get_isr(pva, intr_line); - free_irq(isr_data->irq, isr_data); + ASSERT(isr_data != NULL); + + (void)free_irq(isr_data->irq, isr_data); isr_data->binded = false; } diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c index 8117a83e..5b46ed76 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c @@ -11,7 +11,12 @@ void *pva_kmd_zalloc(uint64_t size) { - return kvzalloc(size, GFP_KERNEL); + void *ptr = kvzalloc(size, GFP_KERNEL); + + if (IS_ERR_OR_NULL(ptr)) { + return NULL; + } + return ptr; } void pva_kmd_free(void *ptr) diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c index 1bd1cca5..5b944141 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c @@ -132,12 +132,16 @@ void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device) sid_idx < safe_subu32(pva_device->hw_consts.n_smmu_contexts, 2U); sid_idx++) { uint32_t smmu_ctx_idx = safe_addu32(sid_idx, 1U); - pva_device->stream_ids[smmu_ctx_idx] = g_smmu_ctxs[sid_idx].sid; - device_data->smmu_contexts[smmu_ctx_idx] = - g_smmu_ctxs[sid_idx].pdev; - dma_set_mask_and_coherent( - &device_data->smmu_contexts[smmu_ctx_idx]->dev, - DMA_BIT_MASK(39)); + struct pva_kmd_linux_smmu_ctx *smmu_ctx = &g_smmu_ctxs[sid_idx]; + + pva_device->stream_ids[smmu_ctx_idx] = smmu_ctx->sid; + device_data->smmu_contexts[smmu_ctx_idx] = smmu_ctx->pdev; + dma_set_mask_and_coherent(&smmu_ctx->pdev->dev, + DMA_BIT_MASK(39)); + //set max segment size to UINT_MAX to avoid creating scatterlist >= 4GB + //during IOVA mapping, which will overflow the scatterlist length field, + //causing IOVA leak + dma_set_max_seg_size(&smmu_ctx->pdev->dev, UINT_MAX); } /* Configure SMMU contexts for privileged operations */ diff --git a/drivers/video/tegra/host/pva/src/libs/pva/include/pva_constants.h b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_constants.h index 9b6e3d33..cd99f95c 100644 --- a/drivers/video/tegra/host/pva/src/libs/pva/include/pva_constants.h +++ b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_constants.h @@ -6,7 +6,7 @@ #define PVA_NUM_ENGINES 2U #define PVA_MAX_NUM_CCQ 8 -#define PVA_CCQ_DEPTH 8U +#define PVA_CCQ_DEPTH 14U #define PVA_USER_CCQ_BASE 1 #define PVA_INVALID_CCQ_ID 0xFF #define PVA_INVALID_ENGINE_ID 0xFFU @@ -138,4 +138,6 @@ #define PVA_KMD_CHIP_ID_T26X "GEN3" #define PVA_KMD_CHIP_ID_DEFAULT PVA_KMD_CHIP_ID_T23X +#define PVA_KMD_TEST_MODE_ENV_VAR "PVA_TEST_MODE" + #endif // PVA_CONSTANTS_H diff --git a/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h index e9eaa58b..716a2d04 100644 --- a/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h +++ b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h @@ -689,6 +689,21 @@ static inline uint32_t safe_wraparound_dec_u32(uint32_t counter) return result; } +static inline uint32_t safe_wrap_add_u32(uint32_t a, uint32_t b) +{ + return (uint32_t)(((uint64_t)a + (uint64_t)b) & 0xFFFFFFFFU); +} + +static inline uint32_t safe_wrap_sub_u32(uint32_t a, uint32_t b) +{ + return (uint32_t)(((uint64_t)a - (uint64_t)b) & 0xFFFFFFFFU); +} + +static inline uint32_t safe_wrap_mul_u32(uint32_t a, uint32_t b) +{ + return (uint32_t)(((uint64_t)a * (uint64_t)b) & 0xFFFFFFFFU); +} + #define SAT_ADD_DEFINE(a, b, name, type) \ static inline type sat_add##name(type a, type b) \ { \ diff --git a/drivers/video/tegra/host/pva/src/private_api/pva_api_private.h b/drivers/video/tegra/host/pva/src/private_api/pva_api_private.h new file mode 100644 index 00000000..ec8ddaea --- /dev/null +++ b/drivers/video/tegra/host/pva/src/private_api/pva_api_private.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ +#ifndef PVA_API_PRIVATE_H +#define PVA_API_PRIVATE_H + +#include "pva_api.h" + +//For legacy support not exposed by public API +#define PVA_CMD_FLAGS_USE_LEGACY_POINTER 0x1 +struct pva_fw_vpu_legacy_ptr_symbol { + uint64_t base; + uint32_t offset; + uint32_t size; +}; + +enum pva_error_inject_codes { + PVA_ERR_INJECT_WDT_HW_ERR, // watchdog Hardware error + PVA_ERR_INJECT_WDT_TIMEOUT, // watchdog Timeout error + PVA_ERR_INJECT_VMEM_CLEAR, // vmem clear + PVA_ERR_INJECT_ASSERT_CHECK, // assert check + PVA_ERR_INJECT_ARMV7_EXCEPTION, // ARMv7 exception +}; + +struct pva_cmd_run_unit_tests { +#define PVA_CMD_OPCODE_RUN_UNIT_TESTS (PVA_CMD_OPCODE_MAX + 0U) + struct pva_cmd_header header; +#define PVA_FW_UTESTS_MAX_ARGC 16U + uint8_t argc; + uint8_t pad[3]; + uint32_t in_resource_id; + uint32_t in_offset; + uint32_t in_size; + uint32_t out_resource_id; + uint32_t out_offset; + uint32_t out_size; +}; + +struct pva_cmd_err_inject { +#define PVA_CMD_OPCODE_ERR_INJECT (PVA_CMD_OPCODE_MAX + 1U) + struct pva_cmd_header header; + uint32_t err_inject_code; // enum pva_error_inject_codes +}; + +struct pva_cmd_gr_check { +#define PVA_CMD_OPCODE_GR_CHECK (PVA_CMD_OPCODE_MAX + 2U) + struct pva_cmd_header header; +}; + +#define PVA_CMD_OPCODE_COUNT (PVA_CMD_OPCODE_MAX + 3U) + +#endif // PVA_API_PRIVATE_H